From 1e0ab97e561970c41326618b90535433007ba056 Mon Sep 17 00:00:00 2001 From: Kirill Simonov Date: Fri, 26 May 2006 21:46:47 +0000 Subject: [PATCH 01/73] Doxygenify the header file. Add basic reader fields to the parser structure. Start implementing basic parser functions. --- doc/doxygen.cfg | 8 +- include/Makefile.am | 2 +- include/yaml/yaml.h | 155 ++++++++++++++++++++++++++++++++++-- include/yaml/yaml_version.h | 6 +- src/Makefile.am | 2 +- src/api.c | 34 ++++++++ 6 files changed, 193 insertions(+), 14 deletions(-) create mode 100644 src/api.c diff --git a/doc/doxygen.cfg b/doc/doxygen.cfg index c3f6d850..f7a9aef4 100644 --- a/doc/doxygen.cfg +++ b/doc/doxygen.cfg @@ -10,7 +10,7 @@ CREATE_SUBDIRS = NO OUTPUT_LANGUAGE = English USE_WINDOWS_ENCODING = NO BRIEF_MEMBER_DESC = YES -REPEAT_BRIEF = NO +REPEAT_BRIEF = YES ABBREVIATE_BRIEF = ALWAYS_DETAILED_SEC = NO INLINE_INHERITED_MEMB = NO @@ -18,7 +18,7 @@ FULL_PATH_NAMES = YES STRIP_FROM_PATH = STRIP_FROM_INC_PATH = SHORT_NAMES = NO -JAVADOC_AUTOBRIEF = NO +JAVADOC_AUTOBRIEF = YES MULTILINE_CPP_IS_BRIEF = NO DETAILS_AT_TOP = NO INHERIT_DOCS = YES @@ -46,7 +46,7 @@ CASE_SENSE_NAMES = YES HIDE_SCOPE_NAMES = NO SHOW_INCLUDE_FILES = YES INLINE_INFO = YES -SORT_MEMBER_DOCS = YES +SORT_MEMBER_DOCS = NO SORT_BRIEF_DOCS = NO SORT_BY_SCOPE_NAME = NO GENERATE_TODOLIST = YES @@ -117,7 +117,7 @@ GENERATE_CHI = NO BINARY_TOC = NO TOC_EXPAND = NO DISABLE_INDEX = NO -ENUM_VALUES_PER_LINE = 4 +ENUM_VALUES_PER_LINE = 1 GENERATE_TREEVIEW = NO TREEVIEW_WIDTH = 250 #--------------------------------------------------------------------------- diff --git a/include/Makefile.am b/include/Makefile.am index 5db6705b..5c5f7dd6 100644 --- a/include/Makefile.am +++ b/include/Makefile.am @@ -1,4 +1,4 @@ -INCLUDES = yaml/yaml.h yaml/yaml_version.h yaml/yaml_error.h +INCLUDES = yaml/yaml.h #yaml/yaml_version.h yaml/yaml_error.h DOXYGEN_CFG = $(top_srcdir)/doc/doxygen.cfg nobase_include_HEADERS = $(INCLUDES) diff --git a/include/yaml/yaml.h b/include/yaml/yaml.h index 64412c7b..40cd9aff 100644 --- a/include/yaml/yaml.h +++ b/include/yaml/yaml.h @@ -2,7 +2,7 @@ * @file yaml.h * @brief Public interface for libyaml. * - * Include the header file with + * Include the header file with the code: * @code * #include * @endcode @@ -17,16 +17,68 @@ extern "C" { #include -#include "yaml_version.h" -#include "yaml_error.h" +/** + * @defgroup version Version Information + * @{ + */ + +/** + * Get the library version as a string. + * + * @returns The function returns the pointer to a static string of the form + * @c "X.Y.Z", where @c X is the major version number, @c Y is a minor version + * number, and @c Z is the patch version number. + */ + +const char * +yaml_get_version_string(void); + +/** + * Get the library version numbers. + * + * @param[out] major Major version number. + * @param[out] minor Minor version number. + * @param[out] patch Patch version number. + */ + +void +yaml_get_version(int *major, int *minor, int *patch); + +/** @} */ + +/** + * @defgroup basic Basic Types + * @{ + */ + +/** The character type. */ +typedef unsigned char yaml_char_t; +/** The stream encoding. */ typedef enum { - YAML_DETECT_ENCODING, + YAML_ANY_ENCODING, YAML_UTF8_ENCODING, YAML_UTF16LE_ENCODING, YAML_UTF16BE_ENCODING } yaml_encoding_t; +/** @} */ + +/* + +typedef enum { + YAML_NO_ERROR, + + YAML_MEMORY_ERROR, + + YAML_READER_ERROR, + YAML_SCANNER_ERROR, + YAML_PARSER_ERROR, + + YAML_WRITER_ERROR, + YAML_EMITTER_ERROR +} yaml_error_type_t; + typedef enum { YAML_ANY_SCALAR_STYLE, YAML_PLAIN_SCALAR_STYLE, @@ -182,10 +234,103 @@ typedef struct { yaml_mark_t end_mark; } yaml_event_t; -/* +*/ + + +/** + * @defgroup parser Parser Definitions + * @{ + */ + +/** + * The prototype of a read handler. + * + * The read handler is called when the parser needs to read more bytes from the + * source. The handler should write not more than @a size bytes to the @a + * buffer. The number of written bytes should be set to the @a length variable. + * + * @param[in] ext A pointer to an application data specified by + * @c yaml_parser_set_read_handler. + * @param[out] buffer The buffer to write the data from the source. + * @param[in] size The size of the buffer. + * @param[out] length The actual number of bytes read from the source. + * + * @returns On success, the handler should return @c 1. If the handler failed, + * the returned value should be @c 0. On EOF, the handler should set the + * @a length to @c 0 and return @c 1. + */ +typedef int yaml_read_handler_t(void *ext, yaml_char_t *buffer, size_t size, + size_t *length); + + +/** + * The parser structure. + * + * All members are internal. Manage the structure using the @c yaml_parser_ + * family of functions. + */ + typedef struct { + + /** + * @name Reader stuff + * @{ + */ + + /** Read handler */ + yaml_read_handler_t *reader; + + /** A pointer for passing to the read handler. */ + void *reader_ext; + + /** EOF flag */ + int eof; + + /** The pointer to the beginning of the working buffer. */ + yaml_char_t *buffer; + + /** The pointer to the current character in the working buffer. */ + yaml_char_t *pointer; + + /** The remaining undecoded characters. */ + unsigned char *raw_buffer; + + /** The size of the raw buffer. */ + size_t raw_buffer_size; + + /** The input encoding. */ + yaml_encoding_t encoding; + + /** + * @} + */ + } yaml_parser_t; +/** + * Create a new parser. + * + * This function creates a new parser object. An application is responsible + * for destroying the object using the @c yaml_parser_delete function. + * + * @returns A new parser object; @c NULL on error. + */ + +yaml_parser_t * +yaml_parser_new(void); + +/** + * Destroy a parser. + * + * @param[in] parser A parser object. + */ + +void +yaml_parser_delete(yaml_parser_t *parser); + +/** @} */ + +/* typedef struct { } yaml_emitter_t; */ diff --git a/include/yaml/yaml_version.h b/include/yaml/yaml_version.h index 2ff74a51..9718db24 100644 --- a/include/yaml/yaml_version.h +++ b/include/yaml/yaml_version.h @@ -1,4 +1,4 @@ -/** +/* * @file yaml_version.h * @brief Version information. * @@ -12,14 +12,14 @@ extern "C" { #endif -/** +/* * @brief Get the library version. */ const char * yaml_get_version_string(void); -/** +/* * @brief Get the library version numbers. */ diff --git a/src/Makefile.am b/src/Makefile.am index 963962ed..fac4a558 100644 --- a/src/Makefile.am +++ b/src/Makefile.am @@ -1,4 +1,4 @@ AM_CPPFLAGS = -I$(top_srcdir)/include lib_LTLIBRARIES = libyaml.la -libyaml_la_SOURCES = version.c +libyaml_la_SOURCES = version.c api.c libyaml_la_LDFLAGS = -release $(YAML_LT_RELEASE) -version-info $(YAML_LT_CURRENT):$(YAML_LT_REVISION):$(YAML_LT_AGE) diff --git a/src/api.c b/src/api.c new file mode 100644 index 00000000..cffa8e96 --- /dev/null +++ b/src/api.c @@ -0,0 +1,34 @@ + +#if HAVE_CONFIG_H +#include +#endif + +#include + +/* + * Create a new parser. + */ + +yaml_parser_t * +yaml_parser_new(void) +{ + yaml_parser_t *parser; + + parser = malloc(sizeof(yaml_parser_t)); + if (!parser) return NULL; + + memset(parser, 0, sizeof(yaml_parser_t)); + + return parser; +} + +/* + * Destroy a parser object. + */ + +void +yaml_parser_delete(yaml_parser_t *parser) +{ + free(parser); +} + From 72b49b094bb935887070fe8e78ab1e8cbee68eb1 Mon Sep 17 00:00:00 2001 From: Kirill Simonov Date: Sat, 27 May 2006 17:19:07 +0000 Subject: [PATCH 02/73] Implementing Reader: first tries. --- include/yaml/yaml.h | 161 +++++++++++++++++++++++++++++++++++++++----- src/Makefile.am | 2 +- src/api.c | 135 ++++++++++++++++++++++++++++++++++++- src/reader.c | 53 +++++++++++++++ 4 files changed, 331 insertions(+), 20 deletions(-) create mode 100644 src/reader.c diff --git a/include/yaml/yaml.h b/include/yaml/yaml.h index 40cd9aff..c4bd0ba8 100644 --- a/include/yaml/yaml.h +++ b/include/yaml/yaml.h @@ -16,6 +16,8 @@ extern "C" { #endif #include +#include +#include /** * @defgroup version Version Information @@ -62,10 +64,7 @@ typedef enum { YAML_UTF16BE_ENCODING } yaml_encoding_t; -/** @} */ - -/* - +/** Many bad things could happen with the parser and emitter. */ typedef enum { YAML_NO_ERROR, @@ -79,6 +78,10 @@ typedef enum { YAML_EMITTER_ERROR } yaml_error_type_t; +/** @} */ + +/* + typedef enum { YAML_ANY_SCALAR_STYLE, YAML_PLAIN_SCALAR_STYLE, @@ -249,19 +252,18 @@ typedef struct { * source. The handler should write not more than @a size bytes to the @a * buffer. The number of written bytes should be set to the @a length variable. * - * @param[in] ext A pointer to an application data specified by - * @c yaml_parser_set_read_handler. - * @param[out] buffer The buffer to write the data from the source. - * @param[in] size The size of the buffer. - * @param[out] length The actual number of bytes read from the source. + * @param[in] ext A pointer to an application data specified by + * @c yaml_parser_set_read_handler. + * @param[out] buffer The buffer to write the data from the source. + * @param[in] size The size of the buffer. + * @param[out] size_read The actual number of bytes read from the source. * * @returns On success, the handler should return @c 1. If the handler failed, * the returned value should be @c 0. On EOF, the handler should set the * @a length to @c 0 and return @c 1. */ -typedef int yaml_read_handler_t(void *ext, yaml_char_t *buffer, size_t size, - size_t *length); - +typedef int yaml_read_handler_t(void *ext, unsigned char *buffer, size_t size, + size_t *size_read); /** * The parser structure. @@ -272,16 +274,27 @@ typedef int yaml_read_handler_t(void *ext, yaml_char_t *buffer, size_t size, typedef struct { + /** + * @name Error handling + * @{ + */ + + error_type_t error; + + /** + * @} + */ + /** * @name Reader stuff * @{ */ /** Read handler */ - yaml_read_handler_t *reader; + yaml_read_handler_t *read_handler; /** A pointer for passing to the read handler. */ - void *reader_ext; + void *read_handler_data; /** EOF flag */ int eof; @@ -289,18 +302,39 @@ typedef struct { /** The pointer to the beginning of the working buffer. */ yaml_char_t *buffer; + /** The size of the buffer (in bytes). */ + size_t buffer_size; + /** The pointer to the current character in the working buffer. */ - yaml_char_t *pointer; + yaml_char_t *buffer_pointer; + + /** The number of unread characters in the buffer (in characters). */ + size_t buffer_length; /** The remaining undecoded characters. */ unsigned char *raw_buffer; - /** The size of the raw buffer. */ + /** The size of the raw buffer (in bytes). */ size_t raw_buffer_size; + /** Is the application responsible for freeing the raw buffer? */ + int raw_buffer_foreign; + /** The input encoding. */ yaml_encoding_t encoding; + /** The offset of the current position (in bytes). */ + size_t offset; + + /** The index of the current position (in characters). */ + size_t index; + + /** The line of the current position (starting from @c 0). */ + size_t line; + + /** The column of the current position (starting from @c 0). */ + size_t column; + /** * @} */ @@ -328,6 +362,57 @@ yaml_parser_new(void); void yaml_parser_delete(yaml_parser_t *parser); +/** + * Set a string input. + * + * Note that the @a input pointer must be valid while the @a parser object + * exists. The application is responsible for destroing @a input after + * destroying the @a parser. + * + * @param[in] parser A parser object. + * @param[in] input A source data. + * @param[in] length The length of the source data in bytes. + */ + +void +yaml_parser_set_input_string(yaml_parser_t *parser, + unsigned char *input, size_t size); + + +/** + * Set a file input. + * + * @a file should be a file object open for reading. The application is + * responsible for closing the @a file. + * + * @param[in] parser A parser object. + * @param[in] file An open file. + */ + +void +yaml_parser_set_input_file(yaml_parser_t *parser, FILE *file); + +/** + * Set a generic input handler. + * + * @param[in] parser A parser object. + * @param[in] handler A read handler. + * @param[in] data Any application data for passing to the read handler. + */ + +void +yaml_parser_set_input(yaml_parser_t *parser, + yaml_read_handler_t *handler, void *data); + +/** + * Set the source encoding. + * + * @param[in] encoding The source encoding. + */ + +void +yaml_parser_set_encoding(yaml_parser_t *parser, yaml_encoding_t encoding); + /** @} */ /* @@ -335,6 +420,50 @@ typedef struct { } yaml_emitter_t; */ +/** + * @defgroup internal Internal Definitions + * @{ + */ + +/** + * Allocate a dynamic memory block. + * + * @param[in] size Size of a memory block, \c 0 is valid. + * + * @returns @c yaml_malloc returns a pointer to a newly allocated memory block, + * or @c NULL if it failed. + */ + +void * +yaml_malloc(size_t size); + +/** + * Reallocate a dynamic memory block. + * + * @param[in] ptr A pointer to an existing memory block, \c NULL is + * valid. + * @param[in] size A size of a new block, \c 0 is valid. + * + * @returns @c yaml_realloc returns a pointer to a reallocated memory block, + * or @c NULL if it failed. + */ + +void * +yaml_realloc(void *ptr, size_t size); + +/** + * Free a dynamic memory block. + * + * @param[in] ptr A pointer to an existing memory block, \c NULL is + * valid. + */ + +void +yaml_free(void *ptr); + +/** @} */ + + #ifdef __cplusplus } #endif diff --git a/src/Makefile.am b/src/Makefile.am index fac4a558..6816d814 100644 --- a/src/Makefile.am +++ b/src/Makefile.am @@ -1,4 +1,4 @@ AM_CPPFLAGS = -I$(top_srcdir)/include lib_LTLIBRARIES = libyaml.la -libyaml_la_SOURCES = version.c api.c +libyaml_la_SOURCES = version.c api.c reader.c libyaml_la_LDFLAGS = -release $(YAML_LT_RELEASE) -version-info $(YAML_LT_CURRENT):$(YAML_LT_REVISION):$(YAML_LT_AGE) diff --git a/src/api.c b/src/api.c index cffa8e96..0594727e 100644 --- a/src/api.c +++ b/src/api.c @@ -5,8 +5,40 @@ #include +#include + +/* + * Allocate a dynamic memory block. + */ + +void * +yaml_malloc(size_t size) +{ + return malloc(size ? size : 1); +} + +/* + * Reallocate a dynamic memory block. + */ + +void * +yaml_realloc(void *ptr, size_t size) +{ + return ptr ? realloc(ptr, size ? size : 1) : malloc(size ? size : 1); +} + /* - * Create a new parser. + * Free a dynamic memory block. + */ + +void +yaml_free(void *ptr) +{ + if (ptr) free(ptr); +} + +/* + * Create a new parser object. */ yaml_parser_t * @@ -14,7 +46,7 @@ yaml_parser_new(void) { yaml_parser_t *parser; - parser = malloc(sizeof(yaml_parser_t)); + parser = yaml_malloc(sizeof(yaml_parser_t)); if (!parser) return NULL; memset(parser, 0, sizeof(yaml_parser_t)); @@ -29,6 +61,103 @@ yaml_parser_new(void) void yaml_parser_delete(yaml_parser_t *parser) { - free(parser); + assert(parser); /* Non-NULL parser object expected. */ + + yaml_free(parser->buffer); + if (!parser->raw_buffer_foreign) + yaml_free(parser->raw_buffer); + + memset(parser, 0, sizeof(yaml_parser_t)); + + yaml_free(parser); +} + +/* + * String read handler (always returns error). + */ + +static int +yaml_string_read_handler(void *data, unsigned char *buffer, size_t size, + size_t *size_read) +{ + *size_read = 0; + return 1; +} + +/* + * File read handler. + */ + +static int +yaml_file_read_handler(void *data, unsigned char *buffer, size_t size, + size_t *size_read) +{ + *size_read = fread(buffer, 1, size, (FILE *)ext); + return !ferror((FILE *)ext); +} + +/* + * Set a string input. + */ + +void +yaml_parser_set_input_string(yaml_parser_t *parser, + unsigned char *input, size_t size) +{ + assert(parser); /* Non-NULL parser object expected. */ + assert(!parser->reader); /* You can set the source only once. */ + assert(input); /* Non-NULL input string expected. */ + + parser->read_handler = yaml_string_read_handler; + parser->read_handler_data = NULL; + + /* We use the input string as a raw (undecoded) buffer. */ + parser->raw_buffer = input; + parser->raw_buffer_size = size; + parser->raw_buffer_foreign = 1; +} + +/* + * Set a file input. + */ + +void +yaml_parser_set_input_file(yaml_parser_t *parser, FILE *file) +{ + assert(parser); /* Non-NULL parser object expected. */ + assert(!parser->reader); /* You can set the source only once. */ + assert(file); /* Non-NULL file object expected. */ + + parser->read_handler = yaml_file_read_handler; + parser->read_handler_data = file; +} + +/* + * Set a generic input. + */ + +void +yaml_parser_set_input(yaml_parser_t *parser, + yaml_read_handler_t *handler, void *data) +{ + assert(parser); /* Non-NULL parser object expected. */ + assert(!parser->reader); /* You can set the source only once. */ + assert(handler); /* Non-NULL read handler expected. */ + + parser->read_handler = handler; + parser->read_handler_data = data +} + +/* + * Set the source encoding. + */ + +void +yaml_parser_set_encoding(yaml_parser_t *parser, yaml_encoding_t encoding) +{ + assert(parser); /* Non-NULL parser object expected. */ + assert(!parser->encoding); /* Encoding is already set or detected. */ + + parser->encoding = encoding; } diff --git a/src/reader.c b/src/reader.c new file mode 100644 index 00000000..787f785d --- /dev/null +++ b/src/reader.c @@ -0,0 +1,53 @@ + +#define RAW_BUFFER_SIZE 16384 +#define BUFFER_SIZE (RAW_BUFFER_SIZE*2) /* Should be enough for decoding + the whole raw buffer. */ + +/* + * Ensure that the buffer contains at least length characters. + * Return 1 on success, 0 on failure. + */ + +int +yaml_parser_update_reader(yaml_parser_t *parser, size_t length) +{ + /* If the EOF flag is set, do nothing. */ + + if (parser->eof) + return 1; + + /* First, let us check that the buffers are allocated. */ + + if (!parser->buffer) { + parser->buffer = yaml_malloc(BUFFER_SIZE); + if (!parser->buffer) { + parser->error = YAML_MEMORY_ERROR; + return 0; + } + parser->buffer_size = BUFFER_SIZE; + parser->buffer_pointer = parser->buffer; + parser->buffer_length = 0; + } + + if (!parser->raw_buffer) { + parser->raw_buffer = yaml_malloc(RAW_BUFFER_SIZE); + if (!parser->raw_buffer) { + parser->error = YAML_MEMORY_ERROR; + return 0; + } + parser->raw_buffer_size = RAW_BUFFER_SIZE; + } + + /* Next, determine the input encoding. */ + + if (!parser->encoding) { + if (!yaml_parser_determine_encoding(parser)) + return 0; + } + + /* more... */ + +} + + + From 8ae37c3f0752351477b63273652f8f8fc3a08879 Mon Sep 17 00:00:00 2001 From: Kirill Simonov Date: Mon, 29 May 2006 20:08:09 +0000 Subject: [PATCH 03/73] Working on the decoding code. --- include/yaml/yaml.h | 53 ++++++-- src/api.c | 67 +++++++--- src/reader.c | 297 ++++++++++++++++++++++++++++++++++++++++---- 3 files changed, 365 insertions(+), 52 deletions(-) diff --git a/include/yaml/yaml.h b/include/yaml/yaml.h index c4bd0ba8..9ec2c7a7 100644 --- a/include/yaml/yaml.h +++ b/include/yaml/yaml.h @@ -252,7 +252,7 @@ typedef struct { * source. The handler should write not more than @a size bytes to the @a * buffer. The number of written bytes should be set to the @a length variable. * - * @param[in] ext A pointer to an application data specified by + * @param[in] data A pointer to an application data specified by * @c yaml_parser_set_read_handler. * @param[out] buffer The buffer to write the data from the source. * @param[in] size The size of the buffer. @@ -262,9 +262,21 @@ typedef struct { * the returned value should be @c 0. On EOF, the handler should set the * @a length to @c 0 and return @c 1. */ -typedef int yaml_read_handler_t(void *ext, unsigned char *buffer, size_t size, + +typedef int yaml_read_handler_t(void *data, unsigned char *buffer, size_t size, size_t *size_read); +/** + * This structure holds a string input specified by + * @c yaml_parser_set_input_string. + */ + +typedef struct { + unsigned char *start; + unsigned char *end; + unsigned char *current; +} yaml_string_input_t; + /** * The parser structure. * @@ -279,7 +291,7 @@ typedef struct { * @{ */ - error_type_t error; + yaml_error_type_t error; /** * @} @@ -302,23 +314,23 @@ typedef struct { /** The pointer to the beginning of the working buffer. */ yaml_char_t *buffer; - /** The size of the buffer (in bytes). */ - size_t buffer_size; + /** The pointer to the end of the working buffer. */ + yaml_char_t *buffer_end; /** The pointer to the current character in the working buffer. */ - yaml_char_t *buffer_pointer; + yaml_char_t *pointer; - /** The number of unread characters in the buffer (in characters). */ - size_t buffer_length; + /** The number of unread characters in the working buffer. */ + size_t unread; - /** The remaining undecoded characters. */ + /** The pointer to the beginning of the raw buffer. */ unsigned char *raw_buffer; - /** The size of the raw buffer (in bytes). */ - size_t raw_buffer_size; + /** The pointer to the current character in the raw buffer. */ + unsigned char *raw_pointer; - /** Is the application responsible for freeing the raw buffer? */ - int raw_buffer_foreign; + /** The number of unread bytes in the raw buffer. */ + size_t raw_unread; /** The input encoding. */ yaml_encoding_t encoding; @@ -335,6 +347,9 @@ typedef struct { /** The column of the current position (starting from @c 0). */ size_t column; + /* String input structure. */ + yaml_string_input_t string_input; + /** * @} */ @@ -461,6 +476,18 @@ yaml_realloc(void *ptr, size_t size); void yaml_free(void *ptr); +/** The size of the raw buffer. */ + +#define YAML_RAW_BUFFER_SIZE 16384 + +/** + * The size of the buffer. + * + * We allocate enough space for decoding the whole raw buffer. + */ + +#define YAML_BUFFER_SIZE (YAML_RAW_BUFFER_SIZE*3) + /** @} */ diff --git a/src/api.c b/src/api.c index 0594727e..aa183afb 100644 --- a/src/api.c +++ b/src/api.c @@ -46,11 +46,35 @@ yaml_parser_new(void) { yaml_parser_t *parser; + /* Allocate the parser structure. */ + parser = yaml_malloc(sizeof(yaml_parser_t)); if (!parser) return NULL; memset(parser, 0, sizeof(yaml_parser_t)); + /* Allocate the raw buffer. */ + + parser->raw_buffer = yaml_malloc(YAML_RAW_BUFFER_SIZE); + if (!parser->raw_buffer) { + yaml_free(parser); + return NULL; + } + parser->raw_pointer = parser->raw_buffer; + parser->raw_unread = 0; + + /* Allocate the character buffer. */ + + parser->buffer = yaml_malloc(YAML_BUFFER_SIZE); + if (!parser->buffer) { + yaml_free(parser->raw_buffer); + yaml_free(parser); + return NULL; + } + parser->buffer_end = parser->buffer; + parser->pointer = parser->buffer; + parser->unread = 0; + return parser; } @@ -64,8 +88,7 @@ yaml_parser_delete(yaml_parser_t *parser) assert(parser); /* Non-NULL parser object expected. */ yaml_free(parser->buffer); - if (!parser->raw_buffer_foreign) - yaml_free(parser->raw_buffer); + yaml_free(parser->raw_buffer); memset(parser, 0, sizeof(yaml_parser_t)); @@ -73,14 +96,27 @@ yaml_parser_delete(yaml_parser_t *parser) } /* - * String read handler (always returns error). + * String read handler. */ static int yaml_string_read_handler(void *data, unsigned char *buffer, size_t size, size_t *size_read) { - *size_read = 0; + yaml_string_input_t *input = data; + + if (input->current == input->end) { + *size_read = 0; + return 1; + } + + if (size > (input->end - input->current)) { + size = input->end - input->current; + } + + memcpy(buffer, input->current, size); + input->current += size; + *size_read = size; return 1; } @@ -92,8 +128,8 @@ static int yaml_file_read_handler(void *data, unsigned char *buffer, size_t size, size_t *size_read) { - *size_read = fread(buffer, 1, size, (FILE *)ext); - return !ferror((FILE *)ext); + *size_read = fread(buffer, 1, size, (FILE *)data); + return !ferror((FILE *)data); } /* @@ -105,16 +141,15 @@ yaml_parser_set_input_string(yaml_parser_t *parser, unsigned char *input, size_t size) { assert(parser); /* Non-NULL parser object expected. */ - assert(!parser->reader); /* You can set the source only once. */ + assert(!parser->read_handler); /* You can set the source only once. */ assert(input); /* Non-NULL input string expected. */ - parser->read_handler = yaml_string_read_handler; - parser->read_handler_data = NULL; + parser->string_input.start = input; + parser->string_input.current = input; + parser->string_input.end = input+size; - /* We use the input string as a raw (undecoded) buffer. */ - parser->raw_buffer = input; - parser->raw_buffer_size = size; - parser->raw_buffer_foreign = 1; + parser->read_handler = yaml_string_read_handler; + parser->read_handler_data = &parser->string_input; } /* @@ -125,7 +160,7 @@ void yaml_parser_set_input_file(yaml_parser_t *parser, FILE *file) { assert(parser); /* Non-NULL parser object expected. */ - assert(!parser->reader); /* You can set the source only once. */ + assert(!parser->read_handler); /* You can set the source only once. */ assert(file); /* Non-NULL file object expected. */ parser->read_handler = yaml_file_read_handler; @@ -141,11 +176,11 @@ yaml_parser_set_input(yaml_parser_t *parser, yaml_read_handler_t *handler, void *data) { assert(parser); /* Non-NULL parser object expected. */ - assert(!parser->reader); /* You can set the source only once. */ + assert(!parser->read_handler); /* You can set the source only once. */ assert(handler); /* Non-NULL read handler expected. */ parser->read_handler = handler; - parser->read_handler_data = data + parser->read_handler_data = data; } /* diff --git a/src/reader.c b/src/reader.c index 787f785d..440a88f9 100644 --- a/src/reader.c +++ b/src/reader.c @@ -1,53 +1,304 @@ -#define RAW_BUFFER_SIZE 16384 -#define BUFFER_SIZE (RAW_BUFFER_SIZE*2) /* Should be enough for decoding - the whole raw buffer. */ +#if HAVE_CONFIG_H +#include +#endif + +#include + +#include + +/* Check for the UTF-16-BE BOM. */ +#define IS_UTF16BE_BOM(pointer) ((pointer)[0] == 0xFE && (pointer)[1] == 0xFF) + +/* Check for the UTF-16-LE BOM. */ +#define IS_UTF16LE_BOM(pointer) ((pointer)[0] == 0xFF && (pointer)[1] == 0xFE) + +/* Get a UTF-16-BE character. */ +#define UTF16BE_CHAR(pointer) ((pointer)[0] << 8 + (pointer)[1]) + +/* Get a UTF-16-LE character. */ +#define UTF16LE_CHAR(pointer) ((pointer)[0] + (pointer)[1] << 8) + +/* + * From http://www.ietf.org/rfc/rfc3629.txt: + * + * Char. number range | UTF-8 octet sequence + * (hexadecimal) | (binary) + * --------------------+--------------------------------------------- + * 0000 0000-0000 007F | 0xxxxxxx + * 0000 0080-0000 07FF | 110xxxxx 10xxxxxx + * 0000 0800-0000 FFFF | 1110xxxx 10xxxxxx 10xxxxxx + * 0001 0000-0010 FFFF | 11110xxx 10xxxxxx 10xxxxxx 10xxxxxx + */ + +/* Get the length of a UTF-8 character (0 on error). */ +#define UTF8_LENGTH(pointer) \ + ((pointer)[0] < 0x80 ? 1 : \ + (pointer)[0] < 0xC0 ? 0 : \ + (pointer)[0] < 0xE0 ? 2 : \ + (pointer)[0] < 0xF0 ? 3 : \ + (pointer)[0] < 0xF8 ? 4 : 0) + +/* Get the value of the first byte of a UTF-8 sequence (0xFF on error). */ +#define UTF8_FIRST_CHUNK(pointer) \ + ((pointer)[0] < 0x80 ? (pointer)[0] & 0x7F : \ + (pointer)[0] < 0xC0 ? 0xFF : \ + (pointer)[0] < 0xE0 ? (pointer)[0] & 0x1F : \ + (pointer)[0] < 0xF0 ? (pointer)[0] & 0x0F : \ + (pointer)[0] < 0xF8 ? (pointer)[0] & 0x07 : 0xFF) + +/* Get the value of a non-first byte of a UTF-8 sequence (0xFF on error). */ +#define UTF8_NEXT_CHUNK(pointer) \ + ((pointer)[0] >= 0x80 && (pointer)[0] < 0xC0 ? (pointer)[0] & 0x3F : 0xFF) + +/* Determine the length of a UTF-8 character. */ /* * Ensure that the buffer contains at least length characters. * Return 1 on success, 0 on failure. + * + * The length is supposed to be significantly less that the buffer size. */ int -yaml_parser_update_reader(yaml_parser_t *parser, size_t length) +yaml_parser_update_buffer(yaml_parser_t *parser, size_t length) { /* If the EOF flag is set, do nothing. */ if (parser->eof) return 1; - /* First, let us check that the buffers are allocated. */ + /* Return if the buffer contains enough characters. */ + + if (parser->unread >= length) + return 1; + + /* Determine the input encoding if it is not known yet. */ - if (!parser->buffer) { - parser->buffer = yaml_malloc(BUFFER_SIZE); - if (!parser->buffer) { - parser->error = YAML_MEMORY_ERROR; + if (!parser->encoding) { + if (!yaml_parser_determine_encoding(parser)) return 0; - } - parser->buffer_size = BUFFER_SIZE; - parser->buffer_pointer = parser->buffer; - parser->buffer_length = 0; } - if (!parser->raw_buffer) { - parser->raw_buffer = yaml_malloc(RAW_BUFFER_SIZE); - if (!parser->raw_buffer) { - parser->error = YAML_MEMORY_ERROR; - return 0; + /* Move the unread characters to the beginning of the buffer. */ + + if (parser->buffer < parser->pointer + && parser->pointer < parser->buffer_end) { + size_t size = parser->buffer_end - parser->pointer; + memmove(parser->buffer, parser->pointer, size); + parser->pointer = parser->buffer; + parser->buffer_end -= size; + } + else if (parser->pointer == parser->buffer_end) { + parser->pointer = parser->buffer; + parser->buffer_end = parser->buffer; + } + + /* Fill the buffer until it has enough characters. */ + + while (parser->unread < length) + { + /* Fill the raw buffer. */ + + if (!yaml_parser_update_raw_buffer(parser)) return 0; + + /* Decode the raw buffer. */ + + while (parser->raw_unread) + { + unsigned int ch; + int incomplete = 0; + + /* Decode the next character. */ + + switch (parser->encoding) + { + case YAML_UTF8_ENCODING: + + unsigned int utf8_length = UTF8_LENGTH(parser->raw_pointer); + unsigned int utf8_chunk; + + /* Check if the raw buffer contains an incomplete character. */ + + if (utf8_length > parser->raw_unread) { + if (parser->eof) { + parser->error = YAML_READER_ERROR; + return 0; + } + incomplete = 1; + } + + /* Get the character checking it for validity. */ + + utf8_chunk = UTF8_FIRST_CHUNK(parser->raw_pointer ++); + if (utf8_chunk == 0xFF) { + parser->error = YAML_READER_ERROR; + return 0; + } + ch = utf8_chunk; + parser->raw_unread --; + while (-- utf8_length) { + utf8_chunk = UTF8_NEXT_CHUNK(parser->raw_pointer ++); + if (utf8_chunk == 0xFF) { + parser->error = YAML_READER_ERROR; + return 0; + } + ch = ch << 6 + utf8_chunk; + parser->raw_unread --; + } + + break; + + case YAML_UTF16LE_ENCODING: + + /* Check if the raw buffer contains an incomplete character. */ + + if (parser->raw_unread < 2) { + if (parser->eof) { + parser->error = YAML_READER_ERROR; + return 0; + } + incomplete = 1; + } + + /* Get the current character. */ + + ch = UTF16LE_CHAR(parser->raw_pointer); + parser->raw_pointer += 2; + parser->raw_unread -= 2; + + break; + + case YAML_UTF16BE_ENCODING: + + /* Check if the raw buffer contains an incomplete character. */ + + if (parser->raw_unread < 2) { + if (parser->eof) { + parser->error = YAML_READER_ERROR; + return 0; + } + incomplete = 1; + } + + /* Get the current character. */ + + ch = UTF16BE_CHAR(parser->raw_pointer); + parser->raw_pointer += 2; + parser->raw_unread -= 2; + + break; + } + + /* + * Check if the character is in the allowed range: + * #x9 | #xA | #xD | [#x20-#x7E] (8 bit) + * | #x85 | [#xA0-#xD7FF] | [#xE000-#xFFFD] (16 bit) + * | [#x10000-#x10FFFF] (32 bit) + */ + + if (! (ch == 0x09 || ch == 0x0A || ch == 0x0D + || (ch >= 0x20 && ch <= 0x7E) + || (ch == 0x85) || (ch >= 0xA0 && ch <= 0xD7FF) + || (ch >= 0xE000 && ch <= 0xFFFD) + || (ch >= 0x10000 && ch <= 0x10FFFF))) { + parser->error = YAML_READER_ERROR; + return 0; + } + + /* Finally put the character into the buffer. */ + + /* 0000 0000-0000 007F -> 0xxxxxxx */ + if (ch <= 0x7F) { + *(parser->buffer_end++) = ch; + } + /* 0000 0080-0000 07FF -> 110xxxxx 10xxxxxx */ + else if (ch <= 0x7FF) { + *(parser->buffer_end++) = 0xC0 + (ch >> 6) & 0x1F; + *(parser->buffer_end++) = 0x80 + ch & 0x3F; + } + /* 0000 0800-0000 FFFF -> 1110xxxx 10xxxxxx 10xxxxxx */ + else if (ch <= 0xFFFF) { + *(parser->buffer_end++) = 0x80 + ch & 0x3F; + *(parser->buffer_end++) = 0xC0 + (ch >> 6) & 0x1F; + + } + /* 0001 0000-0010 FFFF -> 11110xxx 10xxxxxx 10xxxxxx 10xxxxxx */ + else { + } } - parser->raw_buffer_size = RAW_BUFFER_SIZE; + } - /* Next, determine the input encoding. */ +} - if (!parser->encoding) { - if (!yaml_parser_determine_encoding(parser)) +/* + * Determine the input stream encoding by checking the BOM symbol. If no BOM is + * found, the UTF-8 encoding is assumed. Return 1 on success, 0 on failure. + */ + +int +yaml_parser_determine_encoding(yaml_parser_t *parser) +{ + /* Ensure that we had enough bytes in the raw buffer. */ + + while (!parser->eof && parser->raw_unread < 2) { + if (!yaml_parser_update_raw_buffer(parser)) { return 0; + } } - /* more... */ + /* Determine the encoding. */ + if (parser->raw_unread >= 2 && IS_UTF16BE_BOM(parser->raw_pointer)) { + parser->encoding = YAML_UTF16BE_ENCODING; + } + else if (parser->raw_unread >= 2 && IS_UTF16LE_BOM(parser->raw_pointer)) { + parser->encoding = YAML_UTF16LE_ENCODING; + } + else { + parser->encoding = YAML_UTF8_ENCODING; + } } +/* + * Update the raw buffer. + */ + +int +yaml_parser_update_raw_buffer(yaml_parser_t *parser) +{ + size_t size_read = 0; + + /* Return if the raw buffer is full. */ + + if (parser->raw_unread == YAML_RAW_BUFFER_SIZE) return 1; + /* Return on EOF. */ + + if (parser->eof) return 1; + + /* Move the remaining bytes in the raw buffer to the beginning. */ + + if (parser->raw_unread && parser->raw_buffer < parser->raw_pointer) { + memmove(parser->raw_buffer, parser->raw_pointer, parser->raw_unread); + } + parser->raw_pointer = parser->raw_buffer; + + /* Call the read handler to fill the buffer. */ + + if (!parser->read_handler(parser->read_handler_data, + parser->raw_buffer + parser->raw_unread, + YAML_RAW_BUFFER_SIZE - parser->raw_unread, + &size_read)) { + parser->error = YAML_READER_ERROR; + return 0; + } + parser->raw_unread += size_read; + if (!size_read) { + parser->eof = 1; + } + + return 1; +} From 29358a3f70af2492a496cb77af9f905f0982a1b5 Mon Sep 17 00:00:00 2001 From: Kirill Simonov Date: Tue, 30 May 2006 17:49:18 +0000 Subject: [PATCH 04/73] Complete UTF-8 and UTF-16 decoders. Reader is mostly done (though untested). --- include/yaml/yaml.h | 21 ++- src/reader.c | 325 ++++++++++++++++++++++++++++++-------------- tests/Makefile.am | 4 +- tests/test-reader.c | 144 ++++++++++++++++++++ 4 files changed, 386 insertions(+), 108 deletions(-) create mode 100644 tests/test-reader.c diff --git a/include/yaml/yaml.h b/include/yaml/yaml.h index 9ec2c7a7..2f32e521 100644 --- a/include/yaml/yaml.h +++ b/include/yaml/yaml.h @@ -291,8 +291,15 @@ typedef struct { * @{ */ + /** Error type. */ yaml_error_type_t error; + /** Error description. */ + const char *problem; + + /** The byte about which the problem occured. */ + size_t problem_offset; + /** * @} */ @@ -302,7 +309,7 @@ typedef struct { * @{ */ - /** Read handler */ + /** Read handler. */ yaml_read_handler_t *read_handler; /** A pointer for passing to the read handler. */ @@ -488,6 +495,18 @@ yaml_free(void *ptr); #define YAML_BUFFER_SIZE (YAML_RAW_BUFFER_SIZE*3) +/** + * Ensure that the buffer contains at least @a length characters. + * + * @param[in] parser A parser object. + * @param[in] length The number of characters in the buffer. + * + * @returns @c 1 on success, @c 0 on error. + */ + +int +yaml_parser_update_buffer(yaml_parser_t *parser, size_t length); + /** @} */ diff --git a/src/reader.c b/src/reader.c index 440a88f9..5ee8c434 100644 --- a/src/reader.c +++ b/src/reader.c @@ -7,51 +7,20 @@ #include -/* Check for the UTF-16-BE BOM. */ -#define IS_UTF16BE_BOM(pointer) ((pointer)[0] == 0xFE && (pointer)[1] == 0xFF) - -/* Check for the UTF-16-LE BOM. */ -#define IS_UTF16LE_BOM(pointer) ((pointer)[0] == 0xFF && (pointer)[1] == 0xFE) - -/* Get a UTF-16-BE character. */ -#define UTF16BE_CHAR(pointer) ((pointer)[0] << 8 + (pointer)[1]) - -/* Get a UTF-16-LE character. */ -#define UTF16LE_CHAR(pointer) ((pointer)[0] + (pointer)[1] << 8) - /* - * From http://www.ietf.org/rfc/rfc3629.txt: - * - * Char. number range | UTF-8 octet sequence - * (hexadecimal) | (binary) - * --------------------+--------------------------------------------- - * 0000 0000-0000 007F | 0xxxxxxx - * 0000 0080-0000 07FF | 110xxxxx 10xxxxxx - * 0000 0800-0000 FFFF | 1110xxxx 10xxxxxx 10xxxxxx - * 0001 0000-0010 FFFF | 11110xxx 10xxxxxx 10xxxxxx 10xxxxxx + * Set the reader error and return 0. */ -/* Get the length of a UTF-8 character (0 on error). */ -#define UTF8_LENGTH(pointer) \ - ((pointer)[0] < 0x80 ? 1 : \ - (pointer)[0] < 0xC0 ? 0 : \ - (pointer)[0] < 0xE0 ? 2 : \ - (pointer)[0] < 0xF0 ? 3 : \ - (pointer)[0] < 0xF8 ? 4 : 0) - -/* Get the value of the first byte of a UTF-8 sequence (0xFF on error). */ -#define UTF8_FIRST_CHUNK(pointer) \ - ((pointer)[0] < 0x80 ? (pointer)[0] & 0x7F : \ - (pointer)[0] < 0xC0 ? 0xFF : \ - (pointer)[0] < 0xE0 ? (pointer)[0] & 0x1F : \ - (pointer)[0] < 0xF0 ? (pointer)[0] & 0x0F : \ - (pointer)[0] < 0xF8 ? (pointer)[0] & 0x07 : 0xFF) +int +yaml_parser_set_reader_error(yaml_parser_t *parser, const char *problem) +{ + parser->error = YAML_READER_ERROR; + parser->problem = problem; + parser->problem_offset = parser->offset; -/* Get the value of a non-first byte of a UTF-8 sequence (0xFF on error). */ -#define UTF8_NEXT_CHUNK(pointer) \ - ((pointer)[0] >= 0x80 && (pointer)[0] < 0xC0 ? (pointer)[0] & 0x3F : 0xFF) + return 0; +} -/* Determine the length of a UTF-8 character. */ /* * Ensure that the buffer contains at least length characters. @@ -102,12 +71,19 @@ yaml_parser_update_buffer(yaml_parser_t *parser, size_t length) if (!yaml_parser_update_raw_buffer(parser)) return 0; + /* If the raw buffer is empty, it is EOF. */ + + if (!parser->raw_unread) return 1; + /* Decode the raw buffer. */ while (parser->raw_unread) { - unsigned int ch; + unsigned int value, value2; int incomplete = 0; + unsigned char utf8_octet; + unsigned int utf8_length; + int k, low, high; /* Decode the next character. */ @@ -115,81 +91,200 @@ yaml_parser_update_buffer(yaml_parser_t *parser, size_t length) { case YAML_UTF8_ENCODING: - unsigned int utf8_length = UTF8_LENGTH(parser->raw_pointer); - unsigned int utf8_chunk; + /* + * Decode a UTF-8 character. Check RFC 3629 + * (http://www.ietf.org/rfc/rfc3629.txt) for more details. + * + * The following table (taken from the RFC) is used for + * decoding. + * + * Char. number range | UTF-8 octet sequence + * (hexadecimal) | (binary) + * --------------------+------------------------------------ + * 0000 0000-0000 007F | 0xxxxxxx + * 0000 0080-0000 07FF | 110xxxxx 10xxxxxx + * 0000 0800-0000 FFFF | 1110xxxx 10xxxxxx 10xxxxxx + * 0001 0000-0010 FFFF | 11110xxx 10xxxxxx 10xxxxxx 10xxxxxx + * + * Additionally, the characters in the range 0xD800-0xDFFF + * are prohibited as they are reserved for use with UTF-16 + * surrogate pairs. + */ + + /* Determine the length of the UTF-8 sequence. */ + + utf8_octet = parser->raw_pointer[0]; + utf8_length = ( + (utf8_octet & 0x80) == 0x00 ? 1 : + (utf8_octet & 0xE0) == 0xC0 ? 2 : + (utf8_octet & 0xF0) == 0xE0 ? 3 : + (utf8_octet & 0xF8) == 0xF0 ? 4 : 0); + + /* Check if the leading octet is valid. */ + + if (!utf8_length) + return yaml_parser_set_reader_error(parser, + "Invalid leading UTF-8 octet"); /* Check if the raw buffer contains an incomplete character. */ if (utf8_length > parser->raw_unread) { if (parser->eof) { - parser->error = YAML_READER_ERROR; - return 0; + return yaml_parser_set_reader_error(parser, + "Incomplete UTF-8 octet sequence"); } incomplete = 1; + break; } - /* Get the character checking it for validity. */ + /* Decode the leading octet. */ - utf8_chunk = UTF8_FIRST_CHUNK(parser->raw_pointer ++); - if (utf8_chunk == 0xFF) { - parser->error = YAML_READER_ERROR; - return 0; - } - ch = utf8_chunk; - parser->raw_unread --; - while (-- utf8_length) { - utf8_chunk = UTF8_NEXT_CHUNK(parser->raw_pointer ++); - if (utf8_chunk == 0xFF) { - parser->error = YAML_READER_ERROR; - return 0; - } - ch = ch << 6 + utf8_chunk; - parser->raw_unread --; + value = ( + (utf8_octet & 0x80) == 0x00 ? utf8_octet & 0x7F : + (utf8_octet & 0xE0) == 0xC0 ? utf8_octet & 0x1F : + (utf8_octet & 0xF0) == 0xE0 ? utf8_octet & 0x0F : + (utf8_octet & 0xF8) == 0xF0 ? utf8_octet & 0x07 : 0); + + /* Check and decode the trailing octets. */ + + for (k = 1; k < utf8_length; k ++) + { + utf8_octet = parser->raw_pointer[k]; + + /* Check if the octet is valid. */ + + if ((utf8_octet & 0xC0) != 0x80) + return yaml_parser_set_reader_error(parser, + "Invalid trailing UTF-8 octet"); + + /* Decode the octet. */ + + value = (value << 6) + (utf8_octet & 0x3F); } + /* Check the length of the sequence against the value. */ + + if (!((utf8_length == 1) || + (utf8_length == 2 && value >= 0x80) || + (utf8_length == 3 && value >= 0x800) || + (utf8_length == 4 && value >= 0x10000))) + return yaml_parser_set_reader_error(parser, + "Invalid length of a UTF-8 sequence"); + + /* Check the range of the value. */ + + if ((value >= 0xD800 && value <= 0xDFFF) || value > 0x10FFFF) + return yaml_parser_set_reader_error(parser, + "Invalid Unicode character"); + + parser->raw_pointer += utf8_length; + parser->raw_unread -= utf8_length; + parser->offset += utf8_length; + break; case YAML_UTF16LE_ENCODING: + case YAML_UTF16BE_ENCODING: - /* Check if the raw buffer contains an incomplete character. */ + low = (parser->encoding == YAML_UTF16LE_ENCODING ? 0 : 1); + high = (parser->encoding == YAML_UTF16LE_ENCODING ? 1 : 0); + + /* + * The UTF-16 encoding is not as simple as one might + * naively think. Check RFC 2781 + * (http://www.ietf.org/rfc/rfc2781.txt). + * + * Normally, two subsequent bytes describe a Unicode + * character. However a special technique (called a + * surrogate pair) is used for specifying character + * values larger than 0xFFFF. + * + * A surrogate pair consists of two pseudo-characters: + * high surrogate area (0xD800-0xDBFF) + * low surrogate area (0xDC00-0xDFFF) + * + * The following formulas are used for decoding + * and encoding characters using surrogate pairs: + * + * U = U' + 0x10000 (0x01 00 00 <= U <= 0x10 FF FF) + * U' = yyyyyyyyyyxxxxxxxxxx (0 <= U' <= 0x0F FF FF) + * W1 = 110110yyyyyyyyyy + * W2 = 110111xxxxxxxxxx + * + * where U is the character value, W1 is the high surrogate + * area, W2 is the low surrogate area. + */ + + /* Check for incomplete UTF-16 character. */ if (parser->raw_unread < 2) { if (parser->eof) { - parser->error = YAML_READER_ERROR; - return 0; + return yaml_parser_set_reader_error(parser, + "Incomplete UTF-16 character"); } incomplete = 1; + break; } - /* Get the current character. */ + /* Get the character. */ - ch = UTF16LE_CHAR(parser->raw_pointer); - parser->raw_pointer += 2; - parser->raw_unread -= 2; + value = parser->raw_pointer[low] + + (parser->raw_pointer[high] << 8); - break; + /* Check for unexpected low surrogate area. */ - case YAML_UTF16BE_ENCODING: + if ((value & 0xFC00) == 0xDC00) + return yaml_parser_set_reader_error(parser, + "Unexpected low surrogate area"); - /* Check if the raw buffer contains an incomplete character. */ + /* Check for a high surrogate area. */ - if (parser->raw_unread < 2) { - if (parser->eof) { - parser->error = YAML_READER_ERROR; - return 0; + if ((value & 0xFC00) == 0xD800) { + + /* Check for incomplete surrogate pair. */ + + if (parser->raw_unread < 4) { + if (parser->eof) { + return yaml_parser_set_reader_error(parser, + "Incomplete UTF-16 surrogate pair"); + } + incomplete = 1; + break; } - incomplete = 1; - } - /* Get the current character. */ + /* Get the next character. */ - ch = UTF16BE_CHAR(parser->raw_pointer); - parser->raw_pointer += 2; - parser->raw_unread -= 2; + unsigned int value2 = parser->raw_pointer[low+2] + + (parser->raw_pointer[high+2] << 8); + + /* Check for a low surrogate area. */ + + if ((value2 & 0xFC00) != 0xDC00) + return yaml_parser_set_reader_error(parser, + "Expected low surrogate area"); + + /* Generate the value of the surrogate pair. */ + + value = 0x10000 + ((value & 0x3FF) << 10) + (value2 & 0x3FF); + + parser->raw_pointer += 4; + parser->raw_unread -= 4; + parser->offset += 4; + } + + else { + parser->raw_pointer += 2; + parser->raw_unread -= 2; + parser->offset += 4; + } break; } + /* Check if the raw buffer contains enough bytes to form a character. */ + + if (incomplete) break; + /* * Check if the character is in the allowed range: * #x9 | #xA | #xD | [#x20-#x7E] (8 bit) @@ -197,39 +292,42 @@ yaml_parser_update_buffer(yaml_parser_t *parser, size_t length) * | [#x10000-#x10FFFF] (32 bit) */ - if (! (ch == 0x09 || ch == 0x0A || ch == 0x0D - || (ch >= 0x20 && ch <= 0x7E) - || (ch == 0x85) || (ch >= 0xA0 && ch <= 0xD7FF) - || (ch >= 0xE000 && ch <= 0xFFFD) - || (ch >= 0x10000 && ch <= 0x10FFFF))) { - parser->error = YAML_READER_ERROR; - return 0; - } + if (! (value == 0x09 || value == 0x0A || value == 0x0D + || (value >= 0x20 && value <= 0x7E) + || (value == 0x85) || (value >= 0xA0 && value <= 0xD7FF) + || (value >= 0xE000 && value <= 0xFFFD) + || (value >= 0x10000 && value <= 0x10FFFF))) + return yaml_parser_set_reader_error(parser, + "Control characters are not allowed"); /* Finally put the character into the buffer. */ /* 0000 0000-0000 007F -> 0xxxxxxx */ - if (ch <= 0x7F) { - *(parser->buffer_end++) = ch; + if (value <= 0x7F) { + *(parser->buffer_end++) = value; } /* 0000 0080-0000 07FF -> 110xxxxx 10xxxxxx */ - else if (ch <= 0x7FF) { - *(parser->buffer_end++) = 0xC0 + (ch >> 6) & 0x1F; - *(parser->buffer_end++) = 0x80 + ch & 0x3F; + else if (value <= 0x7FF) { + *(parser->buffer_end++) = 0xC0 + (value >> 6); + *(parser->buffer_end++) = 0x80 + value & 0x3F; } /* 0000 0800-0000 FFFF -> 1110xxxx 10xxxxxx 10xxxxxx */ - else if (ch <= 0xFFFF) { - *(parser->buffer_end++) = 0x80 + ch & 0x3F; - *(parser->buffer_end++) = 0xC0 + (ch >> 6) & 0x1F; - + else if (value <= 0xFFFF) { + *(parser->buffer_end++) = 0xE0 + (value >> 12); + *(parser->buffer_end++) = 0x80 + (value >> 6) & 0x3F; + *(parser->buffer_end++) = 0x80 + value & 0x3F; } /* 0001 0000-0010 FFFF -> 11110xxx 10xxxxxx 10xxxxxx 10xxxxxx */ else { + *(parser->buffer_end++) = 0xF0 + (value >> 18); + *(parser->buffer_end++) = 0x80 + (value >> 12) & 0x3F; + *(parser->buffer_end++) = 0x80 + (value >> 6) & 0x3F; + *(parser->buffer_end++) = 0x80 + value & 0x3F; } } - } + return 1; } /* @@ -237,12 +335,16 @@ yaml_parser_update_buffer(yaml_parser_t *parser, size_t length) * found, the UTF-8 encoding is assumed. Return 1 on success, 0 on failure. */ +#define BOM_UTF8 "\xef\xbb\xbf" +#define BOM_UTF16LE "\xff\xfe" +#define BOM_UTF16BE "\xfe\xff" + int yaml_parser_determine_encoding(yaml_parser_t *parser) { /* Ensure that we had enough bytes in the raw buffer. */ - while (!parser->eof && parser->raw_unread < 2) { + while (!parser->eof && parser->raw_unread < 3) { if (!yaml_parser_update_raw_buffer(parser)) { return 0; } @@ -250,15 +352,29 @@ yaml_parser_determine_encoding(yaml_parser_t *parser) /* Determine the encoding. */ - if (parser->raw_unread >= 2 && IS_UTF16BE_BOM(parser->raw_pointer)) { + if (parser->raw_unread >= 2 + && !memcmp(parser->raw_pointer, BOM_UTF16LE, 2)) { + parser->encoding = YAML_UTF16LE_ENCODING; + parser->raw_pointer += 2; + parser->raw_unread -= 2; + } + else if (parser->raw_unread >= 2 + && !memcmp(parser->raw_pointer, BOM_UTF16BE, 2)) { parser->encoding = YAML_UTF16BE_ENCODING; + parser->raw_pointer += 2; + parser->raw_unread -= 2; } - else if (parser->raw_unread >= 2 && IS_UTF16LE_BOM(parser->raw_pointer)) { - parser->encoding = YAML_UTF16LE_ENCODING; + else if (parser->raw_unread >= 3 + && !memcmp(parser->raw_pointer, BOM_UTF8, 3)) { + parser->encoding = YAML_UTF8_ENCODING; + parser->raw_pointer += 3; + parser->raw_unread -= 3; } else { parser->encoding = YAML_UTF8_ENCODING; } + + return 1; } /* @@ -291,8 +407,7 @@ yaml_parser_update_raw_buffer(yaml_parser_t *parser) parser->raw_buffer + parser->raw_unread, YAML_RAW_BUFFER_SIZE - parser->raw_unread, &size_read)) { - parser->error = YAML_READER_ERROR; - return 0; + return yaml_parser_set_reader_error(parser, "Input error"); } parser->raw_unread += size_read; if (!size_read) { diff --git a/tests/Makefile.am b/tests/Makefile.am index ea70ba5a..8f699070 100644 --- a/tests/Makefile.am +++ b/tests/Makefile.am @@ -1,4 +1,4 @@ AM_CPPFLAGS = -I$(top_srcdir)/include LDADD = $(top_builddir)/src/libyaml.la -TESTS = test-version -check_PROGRAMS = test-version +TESTS = test-version test-reader +check_PROGRAMS = test-version test-reader diff --git a/tests/test-reader.c b/tests/test-reader.c new file mode 100644 index 00000000..b1a7426c --- /dev/null +++ b/tests/test-reader.c @@ -0,0 +1,144 @@ +#include + +#include +#include +#include + +/* + * Test cases are stolen from + * http://www.cl.cam.ac.uk/~mgk25/ucs/examples/UTF-8-test.txt + */ + +typedef struct { + char *title; + char *test; + int result; +} test_case; + +test_case utf8_sequences[] = { + /* {"title", "test 1|test 2|...|test N!", (0 or 1)}, */ + + {"a simple test", "'test' is '\xd0\xbf\xd1\x80\xd0\xbe\xd0\xb2\xd0\xb5\xd1\x80\xd0\xba\xd0\xb0' in Russian!", 1}, + {"an empty line", "!", 1}, + + {"u-0 is a control character", "\x00!", 0}, + {"u-80 is a control character", "\xc2\x80!", 0}, + {"u-800 is valid", "\xe0\xa0\x80!", 1}, + {"u-10000 is valid", "\xf0\x90\x80\x80!", 1}, + {"5 bytes sequences are not allowed", "\xf8\x88\x80\x80\x80!", 0}, + {"6 bytes sequences are not allowed", "\xfc\x84\x80\x80\x80\x80!", 0}, + + {"u-7f is a control character", "\x7f!", 0}, + {"u-7FF is valid", "\xdf\xbf!", 1}, + {"u-FFFF is a control character", "\xef\xbf\xbf!", 0}, + {"u-1FFFFF is too large", "\xf7\xbf\xbf\xbf!", 0}, + {"u-3FFFFFF is 5 bytes", "\xfb\xbf\xbf\xbf\xbf!", 0}, + {"u-7FFFFFFF is 6 bytes", "\xfd\xbf\xbf\xbf\xbf\xbf!", 0}, + + {"u-D7FF", "\xed\x9f\xbf!", 1}, + {"u-E000", "\xee\x80\x80!", 1}, + {"u-FFFD", "\xef\xbf\xbd!", 1}, + {"u-10FFFF", "\xf4\x8f\xbf\xbf!", 1}, + {"u-110000", "\xf4\x90\x80\x80!", 0}, + + {"first continuation byte", "\x80!", 0}, + {"last continuation byte", "\xbf!", 0}, + + {"2 continuation bytes", "\x80\xbf!", 0}, + {"3 continuation bytes", "\x80\xbf\x80!", 0}, + {"4 continuation bytes", "\x80\xbf\x80\xbf!", 0}, + {"5 continuation bytes", "\x80\xbf\x80\xbf\x80!", 0}, + {"6 continuation bytes", "\x80\xbf\x80\xbf\x80\xbf!", 0}, + {"7 continuation bytes", "\x80\xbf\x80\xbf\x80\xbf\x80!", 0}, + + {"sequence of all 64 possible continuation bytes", + "\x80|\x81|\x82|\x83|\x84|\x85|\x86|\x87|\x88|\x89|\x8a|\x8b|\x8c|\x8d|\x8e|\x8f|" + "\x90|\x91|\x92|\x93|\x94|\x95|\x96|\x97|\x98|\x99|\x9a|\x9b|\x9c|\x9d|\x9e|\x9f|" + "\xa0|\xa1|\xa2|\xa3|\xa4|\xa5|\xa6|\xa7|\xa8|\xa9|\xaa|\xab|\xac|\xad|\xae|\xaf|" + "\xb0|\xb1|\xb2|\xb3|\xb4|\xb5|\xb6|\xb7|\xb8|\xb9|\xba|\xbb|\xbc|\xbd|\xbe|\xbf!", 0}, + {"32 first bytes of 2-byte sequences {0xc0-0xdf}", + "\xc0 |\xc1 |\xc2 |\xc3 |\xc4 |\xc5 |\xc6 |\xc7 |\xc8 |\xc9 |\xca |\xcb |\xcc |\xcd |\xce |\xcf |" + "\xd0 |\xd1 |\xd2 |\xd3 |\xd4 |\xd5 |\xd6 |\xd7 |\xd8 |\xd9 |\xda |\xdb |\xdc |\xdd |\xde |\xdf !", 0}, + {"16 first bytes of 3-byte sequences {0xe0-0xef}", + "\xe0 |\xe1 |\xe2 |\xe3 |\xe4 |\xe5 |\xe6 |\xe7 |\xe8 |\xe9 |\xea |\xeb |\xec |\xed |\xee |\xef !", 0}, + {"8 first bytes of 4-byte sequences {0xf0-0xf7}", "\xf0 |\xf1 |\xf2 |\xf3 |\xf4 |\xf5 |\xf6 |\xf7 !", 0}, + {"4 first bytes of 5-byte sequences {0xf8-0xfb}", "\xf8 |\xf9 |\xfa |\xfb !", 0}, + {"2 first bytes of 6-byte sequences {0xfc-0xfd}", "\xfc |\xfd !", 0}, + + {"sequences with last byte missing {u-0}", + "\xc0|\xe0\x80|\xf0\x80\x80|\xf8\x80\x80\x80|\xfc\x80\x80\x80\x80!", 0}, + {"sequences with last byte missing {u-...FF}", + "\xdf|\xef\xbf|\xf7\xbf\xbf|\xfb\xbf\xbf\xbf|\xfd\xbf\xbf\xbf\xbf!", 0}, + + {"impossible bytes", "\xfe|\xff|\xfe\xfe\xff\xff!", 0}, + + {"overlong sequences {u-2f}", + "\xc0\xaf|\xe0\x80\xaf|\xf0\x80\x80\xaf|\xf8\x80\x80\x80\xaf|\xfc\x80\x80\x80\x80\xaf!", 0}, + + {"maximum overlong sequences", + "\xc1\xbf|\xe0\x9f\xbf|\xf0\x8f\xbf\xbf|\xf8\x87\xbf\xbf\xbf|\xfc\x83\xbf\xbf\xbf\xbf!", 0}, + + {"overlong representation of the NUL character", + "\xc0\x80|\xe0\x80\x80|\xf0\x80\x80\x80|\xf8\x80\x80\x80\x80|\xfc\x80\x80\x80\x80\x80!", 0}, + + {"single UTF-16 surrogates", + "\xed\xa0\x80|\xed\xad\xbf|\xed\xae\x80|\xed\xaf\xbf|\xed\xb0\x80|\xed\xbe\x80|\xed\xbf\xbf!", 0}, + + {"paired UTF-16 surrogates", + "\xed\xa0\x80\xed\xb0\x80|\xed\xa0\x80\xed\xbf\xbf|\xed\xad\xbf\xed\xb0\x80|" + "\xed\xad\xbf\xed\xbf\xbf|\xed\xae\x80\xed\xb0\x80|\xed\xae\x80\xed\xbf\xbf|" + "\xed\xaf\xbf\xed\xb0\x80|\xed\xaf\xbf\xed\xbf\xbf!", 0}, + + {"other illegal code positions", "\xef\xbf\xbe|\xef\xbf\xbf!", 0}, + + {NULL, NULL, 0} +}; + +int check_utf8_sequences(void) +{ + yaml_parser_t *parser; + int failed = 0; + int k; + printf("checking utf-8 sequences...\n"); + for (k = 0; utf8_sequences[k].test; k++) { + char *title = utf8_sequences[k].title; + int check = utf8_sequences[k].result; + int result; + char *start = utf8_sequences[k].test; + char *end = start; + printf("\t%s:\n", title); + while(1) { + while (*end != '|' && *end != '!') end++; + parser = yaml_parser_new(); + assert(parser); + yaml_parser_set_input_string(parser, (unsigned char *)start, end-start); + result = yaml_parser_update_buffer(parser, end-start); + if (result != check) { + printf("\t\t- "); + failed ++; + } + else { + printf("\t\t+ "); + } + if (!parser->error) { + printf("(no error)\n"); + } + else if (parser->error == YAML_READER_ERROR) { + printf("(reader error: %s at %d)\n", parser->problem, parser->problem_offset); + } + if (*end == '!') break; + start = ++end; + yaml_parser_delete(parser); + }; + printf("\n"); + } + printf("checking utf-8 sequences: %d fail(s)\n", failed); + return failed; +} + + +int +main(void) +{ + return check_utf8_sequences(); +} From 7adcbc3d49b6e82532a411783e6f90542996ec70 Mon Sep 17 00:00:00 2001 From: Kirill Simonov Date: Thu, 1 Jun 2006 20:19:43 +0000 Subject: [PATCH 05/73] Complete buffering and encoding code. --- include/yaml/yaml.h | 3 + src/reader.c | 137 +++++++++-------- tests/test-reader.c | 362 ++++++++++++++++++++++++++++++++++---------- 3 files changed, 365 insertions(+), 137 deletions(-) diff --git a/include/yaml/yaml.h b/include/yaml/yaml.h index 2f32e521..cfdeacf5 100644 --- a/include/yaml/yaml.h +++ b/include/yaml/yaml.h @@ -300,6 +300,9 @@ typedef struct { /** The byte about which the problem occured. */ size_t problem_offset; + /** The problematic value (@c -1 is none). */ + int problem_value; + /** * @} */ diff --git a/src/reader.c b/src/reader.c index 5ee8c434..ac11323c 100644 --- a/src/reader.c +++ b/src/reader.c @@ -12,11 +12,13 @@ */ int -yaml_parser_set_reader_error(yaml_parser_t *parser, const char *problem) +yaml_parser_set_reader_error(yaml_parser_t *parser, const char *problem, + size_t offset, int value) { parser->error = YAML_READER_ERROR; parser->problem = problem; - parser->problem_offset = parser->offset; + parser->problem_offset = offset; + parser->problem_value = value; return 0; } @@ -32,9 +34,9 @@ yaml_parser_set_reader_error(yaml_parser_t *parser, const char *problem) int yaml_parser_update_buffer(yaml_parser_t *parser, size_t length) { - /* If the EOF flag is set, do nothing. */ + /* If the EOF flag is set and the raw buffer is empty, do nothing. */ - if (parser->eof) + if (parser->eof && !parser->raw_unread) return 1; /* Return if the buffer contains enough characters. */ @@ -71,18 +73,14 @@ yaml_parser_update_buffer(yaml_parser_t *parser, size_t length) if (!yaml_parser_update_raw_buffer(parser)) return 0; - /* If the raw buffer is empty, it is EOF. */ - - if (!parser->raw_unread) return 1; - /* Decode the raw buffer. */ while (parser->raw_unread) { unsigned int value, value2; int incomplete = 0; - unsigned char utf8_octet; - unsigned int utf8_length; + unsigned char octet; + unsigned int width; int k, low, high; /* Decode the next character. */ @@ -113,25 +111,26 @@ yaml_parser_update_buffer(yaml_parser_t *parser, size_t length) /* Determine the length of the UTF-8 sequence. */ - utf8_octet = parser->raw_pointer[0]; - utf8_length = ( - (utf8_octet & 0x80) == 0x00 ? 1 : - (utf8_octet & 0xE0) == 0xC0 ? 2 : - (utf8_octet & 0xF0) == 0xE0 ? 3 : - (utf8_octet & 0xF8) == 0xF0 ? 4 : 0); + octet = parser->raw_pointer[0]; + width = (octet & 0x80) == 0x00 ? 1 : + (octet & 0xE0) == 0xC0 ? 2 : + (octet & 0xF0) == 0xE0 ? 3 : + (octet & 0xF8) == 0xF0 ? 4 : 0; /* Check if the leading octet is valid. */ - if (!utf8_length) + if (!width) return yaml_parser_set_reader_error(parser, - "Invalid leading UTF-8 octet"); + "Invalid leading UTF-8 octet", + parser->offset, octet); /* Check if the raw buffer contains an incomplete character. */ - if (utf8_length > parser->raw_unread) { + if (width > parser->raw_unread) { if (parser->eof) { return yaml_parser_set_reader_error(parser, - "Incomplete UTF-8 octet sequence"); + "Incomplete UTF-8 octet sequence", + parser->offset, -1); } incomplete = 1; break; @@ -139,47 +138,45 @@ yaml_parser_update_buffer(yaml_parser_t *parser, size_t length) /* Decode the leading octet. */ - value = ( - (utf8_octet & 0x80) == 0x00 ? utf8_octet & 0x7F : - (utf8_octet & 0xE0) == 0xC0 ? utf8_octet & 0x1F : - (utf8_octet & 0xF0) == 0xE0 ? utf8_octet & 0x0F : - (utf8_octet & 0xF8) == 0xF0 ? utf8_octet & 0x07 : 0); + value = (octet & 0x80) == 0x00 ? octet & 0x7F : + (octet & 0xE0) == 0xC0 ? octet & 0x1F : + (octet & 0xF0) == 0xE0 ? octet & 0x0F : + (octet & 0xF8) == 0xF0 ? octet & 0x07 : 0; /* Check and decode the trailing octets. */ - for (k = 1; k < utf8_length; k ++) + for (k = 1; k < width; k ++) { - utf8_octet = parser->raw_pointer[k]; + octet = parser->raw_pointer[k]; /* Check if the octet is valid. */ - if ((utf8_octet & 0xC0) != 0x80) + if ((octet & 0xC0) != 0x80) return yaml_parser_set_reader_error(parser, - "Invalid trailing UTF-8 octet"); + "Invalid trailing UTF-8 octet", + parser->offset+k, octet); /* Decode the octet. */ - value = (value << 6) + (utf8_octet & 0x3F); + value = (value << 6) + (octet & 0x3F); } /* Check the length of the sequence against the value. */ - if (!((utf8_length == 1) || - (utf8_length == 2 && value >= 0x80) || - (utf8_length == 3 && value >= 0x800) || - (utf8_length == 4 && value >= 0x10000))) + if (!((width == 1) || + (width == 2 && value >= 0x80) || + (width == 3 && value >= 0x800) || + (width == 4 && value >= 0x10000))) return yaml_parser_set_reader_error(parser, - "Invalid length of a UTF-8 sequence"); + "Invalid length of a UTF-8 sequence", + parser->offset, -1); /* Check the range of the value. */ if ((value >= 0xD800 && value <= 0xDFFF) || value > 0x10FFFF) return yaml_parser_set_reader_error(parser, - "Invalid Unicode character"); - - parser->raw_pointer += utf8_length; - parser->raw_unread -= utf8_length; - parser->offset += utf8_length; + "Invalid Unicode character", + parser->offset, value); break; @@ -220,7 +217,8 @@ yaml_parser_update_buffer(yaml_parser_t *parser, size_t length) if (parser->raw_unread < 2) { if (parser->eof) { return yaml_parser_set_reader_error(parser, - "Incomplete UTF-16 character"); + "Incomplete UTF-16 character", + parser->offset, -1); } incomplete = 1; break; @@ -235,18 +233,22 @@ yaml_parser_update_buffer(yaml_parser_t *parser, size_t length) if ((value & 0xFC00) == 0xDC00) return yaml_parser_set_reader_error(parser, - "Unexpected low surrogate area"); + "Unexpected low surrogate area", + parser->offset, value); /* Check for a high surrogate area. */ if ((value & 0xFC00) == 0xD800) { + width = 4; + /* Check for incomplete surrogate pair. */ if (parser->raw_unread < 4) { if (parser->eof) { return yaml_parser_set_reader_error(parser, - "Incomplete UTF-16 surrogate pair"); + "Incomplete UTF-16 surrogate pair", + parser->offset, -1); } incomplete = 1; break; @@ -261,21 +263,16 @@ yaml_parser_update_buffer(yaml_parser_t *parser, size_t length) if ((value2 & 0xFC00) != 0xDC00) return yaml_parser_set_reader_error(parser, - "Expected low surrogate area"); + "Expected low surrogate area", + parser->offset+2, value2); /* Generate the value of the surrogate pair. */ value = 0x10000 + ((value & 0x3FF) << 10) + (value2 & 0x3FF); - - parser->raw_pointer += 4; - parser->raw_unread -= 4; - parser->offset += 4; } else { - parser->raw_pointer += 2; - parser->raw_unread -= 2; - parser->offset += 4; + width = 2; } break; @@ -298,7 +295,14 @@ yaml_parser_update_buffer(yaml_parser_t *parser, size_t length) || (value >= 0xE000 && value <= 0xFFFD) || (value >= 0x10000 && value <= 0x10FFFF))) return yaml_parser_set_reader_error(parser, - "Control characters are not allowed"); + "Control characters are not allowed", + parser->offset, value); + + /* Move the raw pointers. */ + + parser->raw_pointer += width; + parser->raw_unread -= width; + parser->offset += width; /* Finally put the character into the buffer. */ @@ -309,22 +313,33 @@ yaml_parser_update_buffer(yaml_parser_t *parser, size_t length) /* 0000 0080-0000 07FF -> 110xxxxx 10xxxxxx */ else if (value <= 0x7FF) { *(parser->buffer_end++) = 0xC0 + (value >> 6); - *(parser->buffer_end++) = 0x80 + value & 0x3F; + *(parser->buffer_end++) = 0x80 + (value & 0x3F); } /* 0000 0800-0000 FFFF -> 1110xxxx 10xxxxxx 10xxxxxx */ else if (value <= 0xFFFF) { *(parser->buffer_end++) = 0xE0 + (value >> 12); - *(parser->buffer_end++) = 0x80 + (value >> 6) & 0x3F; - *(parser->buffer_end++) = 0x80 + value & 0x3F; + *(parser->buffer_end++) = 0x80 + ((value >> 6) & 0x3F); + *(parser->buffer_end++) = 0x80 + (value & 0x3F); } /* 0001 0000-0010 FFFF -> 11110xxx 10xxxxxx 10xxxxxx 10xxxxxx */ else { *(parser->buffer_end++) = 0xF0 + (value >> 18); - *(parser->buffer_end++) = 0x80 + (value >> 12) & 0x3F; - *(parser->buffer_end++) = 0x80 + (value >> 6) & 0x3F; - *(parser->buffer_end++) = 0x80 + value & 0x3F; + *(parser->buffer_end++) = 0x80 + ((value >> 12) & 0x3F); + *(parser->buffer_end++) = 0x80 + ((value >> 6) & 0x3F); + *(parser->buffer_end++) = 0x80 + (value & 0x3F); } + + parser->unread ++; + } + + /* On EOF, put NUL into the buffer and return. */ + + if (parser->eof) { + *(parser->buffer_end++) = '\0'; + parser->unread ++; + return 1; } + } return 1; @@ -357,18 +372,21 @@ yaml_parser_determine_encoding(yaml_parser_t *parser) parser->encoding = YAML_UTF16LE_ENCODING; parser->raw_pointer += 2; parser->raw_unread -= 2; + parser->offset += 2; } else if (parser->raw_unread >= 2 && !memcmp(parser->raw_pointer, BOM_UTF16BE, 2)) { parser->encoding = YAML_UTF16BE_ENCODING; parser->raw_pointer += 2; parser->raw_unread -= 2; + parser->offset += 2; } else if (parser->raw_unread >= 3 && !memcmp(parser->raw_pointer, BOM_UTF8, 3)) { parser->encoding = YAML_UTF8_ENCODING; parser->raw_pointer += 3; parser->raw_unread -= 3; + parser->offset += 3; } else { parser->encoding = YAML_UTF8_ENCODING; @@ -407,7 +425,8 @@ yaml_parser_update_raw_buffer(yaml_parser_t *parser) parser->raw_buffer + parser->raw_unread, YAML_RAW_BUFFER_SIZE - parser->raw_unread, &size_read)) { - return yaml_parser_set_reader_error(parser, "Input error"); + return yaml_parser_set_reader_error(parser, "Input error", + parser->offset, -1); } parser->raw_unread += size_read; if (!size_read) { diff --git a/tests/test-reader.c b/tests/test-reader.c index b1a7426c..53e4e7a0 100644 --- a/tests/test-reader.c +++ b/tests/test-reader.c @@ -16,84 +16,96 @@ typedef struct { } test_case; test_case utf8_sequences[] = { - /* {"title", "test 1|test 2|...|test N!", (0 or 1)}, */ - - {"a simple test", "'test' is '\xd0\xbf\xd1\x80\xd0\xbe\xd0\xb2\xd0\xb5\xd1\x80\xd0\xba\xd0\xb0' in Russian!", 1}, - {"an empty line", "!", 1}, - - {"u-0 is a control character", "\x00!", 0}, - {"u-80 is a control character", "\xc2\x80!", 0}, - {"u-800 is valid", "\xe0\xa0\x80!", 1}, - {"u-10000 is valid", "\xf0\x90\x80\x80!", 1}, - {"5 bytes sequences are not allowed", "\xf8\x88\x80\x80\x80!", 0}, - {"6 bytes sequences are not allowed", "\xfc\x84\x80\x80\x80\x80!", 0}, - - {"u-7f is a control character", "\x7f!", 0}, - {"u-7FF is valid", "\xdf\xbf!", 1}, - {"u-FFFF is a control character", "\xef\xbf\xbf!", 0}, - {"u-1FFFFF is too large", "\xf7\xbf\xbf\xbf!", 0}, - {"u-3FFFFFF is 5 bytes", "\xfb\xbf\xbf\xbf\xbf!", 0}, - {"u-7FFFFFFF is 6 bytes", "\xfd\xbf\xbf\xbf\xbf\xbf!", 0}, - - {"u-D7FF", "\xed\x9f\xbf!", 1}, - {"u-E000", "\xee\x80\x80!", 1}, - {"u-FFFD", "\xef\xbf\xbd!", 1}, - {"u-10FFFF", "\xf4\x8f\xbf\xbf!", 1}, - {"u-110000", "\xf4\x90\x80\x80!", 0}, - - {"first continuation byte", "\x80!", 0}, - {"last continuation byte", "\xbf!", 0}, - - {"2 continuation bytes", "\x80\xbf!", 0}, - {"3 continuation bytes", "\x80\xbf\x80!", 0}, - {"4 continuation bytes", "\x80\xbf\x80\xbf!", 0}, - {"5 continuation bytes", "\x80\xbf\x80\xbf\x80!", 0}, - {"6 continuation bytes", "\x80\xbf\x80\xbf\x80\xbf!", 0}, - {"7 continuation bytes", "\x80\xbf\x80\xbf\x80\xbf\x80!", 0}, - - {"sequence of all 64 possible continuation bytes", - "\x80|\x81|\x82|\x83|\x84|\x85|\x86|\x87|\x88|\x89|\x8a|\x8b|\x8c|\x8d|\x8e|\x8f|" - "\x90|\x91|\x92|\x93|\x94|\x95|\x96|\x97|\x98|\x99|\x9a|\x9b|\x9c|\x9d|\x9e|\x9f|" - "\xa0|\xa1|\xa2|\xa3|\xa4|\xa5|\xa6|\xa7|\xa8|\xa9|\xaa|\xab|\xac|\xad|\xae|\xaf|" - "\xb0|\xb1|\xb2|\xb3|\xb4|\xb5|\xb6|\xb7|\xb8|\xb9|\xba|\xbb|\xbc|\xbd|\xbe|\xbf!", 0}, - {"32 first bytes of 2-byte sequences {0xc0-0xdf}", - "\xc0 |\xc1 |\xc2 |\xc3 |\xc4 |\xc5 |\xc6 |\xc7 |\xc8 |\xc9 |\xca |\xcb |\xcc |\xcd |\xce |\xcf |" - "\xd0 |\xd1 |\xd2 |\xd3 |\xd4 |\xd5 |\xd6 |\xd7 |\xd8 |\xd9 |\xda |\xdb |\xdc |\xdd |\xde |\xdf !", 0}, - {"16 first bytes of 3-byte sequences {0xe0-0xef}", - "\xe0 |\xe1 |\xe2 |\xe3 |\xe4 |\xe5 |\xe6 |\xe7 |\xe8 |\xe9 |\xea |\xeb |\xec |\xed |\xee |\xef !", 0}, - {"8 first bytes of 4-byte sequences {0xf0-0xf7}", "\xf0 |\xf1 |\xf2 |\xf3 |\xf4 |\xf5 |\xf6 |\xf7 !", 0}, - {"4 first bytes of 5-byte sequences {0xf8-0xfb}", "\xf8 |\xf9 |\xfa |\xfb !", 0}, - {"2 first bytes of 6-byte sequences {0xfc-0xfd}", "\xfc |\xfd !", 0}, - - {"sequences with last byte missing {u-0}", - "\xc0|\xe0\x80|\xf0\x80\x80|\xf8\x80\x80\x80|\xfc\x80\x80\x80\x80!", 0}, - {"sequences with last byte missing {u-...FF}", - "\xdf|\xef\xbf|\xf7\xbf\xbf|\xfb\xbf\xbf\xbf|\xfd\xbf\xbf\xbf\xbf!", 0}, - - {"impossible bytes", "\xfe|\xff|\xfe\xfe\xff\xff!", 0}, - - {"overlong sequences {u-2f}", - "\xc0\xaf|\xe0\x80\xaf|\xf0\x80\x80\xaf|\xf8\x80\x80\x80\xaf|\xfc\x80\x80\x80\x80\xaf!", 0}, - - {"maximum overlong sequences", - "\xc1\xbf|\xe0\x9f\xbf|\xf0\x8f\xbf\xbf|\xf8\x87\xbf\xbf\xbf|\xfc\x83\xbf\xbf\xbf\xbf!", 0}, - - {"overlong representation of the NUL character", - "\xc0\x80|\xe0\x80\x80|\xf0\x80\x80\x80|\xf8\x80\x80\x80\x80|\xfc\x80\x80\x80\x80\x80!", 0}, - - {"single UTF-16 surrogates", - "\xed\xa0\x80|\xed\xad\xbf|\xed\xae\x80|\xed\xaf\xbf|\xed\xb0\x80|\xed\xbe\x80|\xed\xbf\xbf!", 0}, - - {"paired UTF-16 surrogates", - "\xed\xa0\x80\xed\xb0\x80|\xed\xa0\x80\xed\xbf\xbf|\xed\xad\xbf\xed\xb0\x80|" - "\xed\xad\xbf\xed\xbf\xbf|\xed\xae\x80\xed\xb0\x80|\xed\xae\x80\xed\xbf\xbf|" - "\xed\xaf\xbf\xed\xb0\x80|\xed\xaf\xbf\xed\xbf\xbf!", 0}, - - {"other illegal code positions", "\xef\xbf\xbe|\xef\xbf\xbf!", 0}, - - {NULL, NULL, 0} + /* {"title", "test 1|test 2|...|test N!", (0 or 1)}, */ + + {"a simple test", "'test' is '\xd0\xbf\xd1\x80\xd0\xbe\xd0\xb2\xd0\xb5\xd1\x80\xd0\xba\xd0\xb0' in Russian!", 1}, + {"an empty line", "!", 1}, + + {"u-0 is a control character", "\x00!", 0}, + {"u-80 is a control character", "\xc2\x80!", 0}, + {"u-800 is valid", "\xe0\xa0\x80!", 1}, + {"u-10000 is valid", "\xf0\x90\x80\x80!", 1}, + {"5 bytes sequences are not allowed", "\xf8\x88\x80\x80\x80!", 0}, + {"6 bytes sequences are not allowed", "\xfc\x84\x80\x80\x80\x80!", 0}, + + {"u-7f is a control character", "\x7f!", 0}, + {"u-7FF is valid", "\xdf\xbf!", 1}, + {"u-FFFF is a control character", "\xef\xbf\xbf!", 0}, + {"u-1FFFFF is too large", "\xf7\xbf\xbf\xbf!", 0}, + {"u-3FFFFFF is 5 bytes", "\xfb\xbf\xbf\xbf\xbf!", 0}, + {"u-7FFFFFFF is 6 bytes", "\xfd\xbf\xbf\xbf\xbf\xbf!", 0}, + + {"u-D7FF", "\xed\x9f\xbf!", 1}, + {"u-E000", "\xee\x80\x80!", 1}, + {"u-FFFD", "\xef\xbf\xbd!", 1}, + {"u-10FFFF", "\xf4\x8f\xbf\xbf!", 1}, + {"u-110000", "\xf4\x90\x80\x80!", 0}, + + {"first continuation byte", "\x80!", 0}, + {"last continuation byte", "\xbf!", 0}, + + {"2 continuation bytes", "\x80\xbf!", 0}, + {"3 continuation bytes", "\x80\xbf\x80!", 0}, + {"4 continuation bytes", "\x80\xbf\x80\xbf!", 0}, + {"5 continuation bytes", "\x80\xbf\x80\xbf\x80!", 0}, + {"6 continuation bytes", "\x80\xbf\x80\xbf\x80\xbf!", 0}, + {"7 continuation bytes", "\x80\xbf\x80\xbf\x80\xbf\x80!", 0}, + + {"sequence of all 64 possible continuation bytes", + "\x80|\x81|\x82|\x83|\x84|\x85|\x86|\x87|\x88|\x89|\x8a|\x8b|\x8c|\x8d|\x8e|\x8f|" + "\x90|\x91|\x92|\x93|\x94|\x95|\x96|\x97|\x98|\x99|\x9a|\x9b|\x9c|\x9d|\x9e|\x9f|" + "\xa0|\xa1|\xa2|\xa3|\xa4|\xa5|\xa6|\xa7|\xa8|\xa9|\xaa|\xab|\xac|\xad|\xae|\xaf|" + "\xb0|\xb1|\xb2|\xb3|\xb4|\xb5|\xb6|\xb7|\xb8|\xb9|\xba|\xbb|\xbc|\xbd|\xbe|\xbf!", 0}, + {"32 first bytes of 2-byte sequences {0xc0-0xdf}", + "\xc0 |\xc1 |\xc2 |\xc3 |\xc4 |\xc5 |\xc6 |\xc7 |\xc8 |\xc9 |\xca |\xcb |\xcc |\xcd |\xce |\xcf |" + "\xd0 |\xd1 |\xd2 |\xd3 |\xd4 |\xd5 |\xd6 |\xd7 |\xd8 |\xd9 |\xda |\xdb |\xdc |\xdd |\xde |\xdf !", 0}, + {"16 first bytes of 3-byte sequences {0xe0-0xef}", + "\xe0 |\xe1 |\xe2 |\xe3 |\xe4 |\xe5 |\xe6 |\xe7 |\xe8 |\xe9 |\xea |\xeb |\xec |\xed |\xee |\xef !", 0}, + {"8 first bytes of 4-byte sequences {0xf0-0xf7}", "\xf0 |\xf1 |\xf2 |\xf3 |\xf4 |\xf5 |\xf6 |\xf7 !", 0}, + {"4 first bytes of 5-byte sequences {0xf8-0xfb}", "\xf8 |\xf9 |\xfa |\xfb !", 0}, + {"2 first bytes of 6-byte sequences {0xfc-0xfd}", "\xfc |\xfd !", 0}, + + {"sequences with last byte missing {u-0}", + "\xc0|\xe0\x80|\xf0\x80\x80|\xf8\x80\x80\x80|\xfc\x80\x80\x80\x80!", 0}, + {"sequences with last byte missing {u-...FF}", + "\xdf|\xef\xbf|\xf7\xbf\xbf|\xfb\xbf\xbf\xbf|\xfd\xbf\xbf\xbf\xbf!", 0}, + + {"impossible bytes", "\xfe|\xff|\xfe\xfe\xff\xff!", 0}, + + {"overlong sequences {u-2f}", + "\xc0\xaf|\xe0\x80\xaf|\xf0\x80\x80\xaf|\xf8\x80\x80\x80\xaf|\xfc\x80\x80\x80\x80\xaf!", 0}, + + {"maximum overlong sequences", + "\xc1\xbf|\xe0\x9f\xbf|\xf0\x8f\xbf\xbf|\xf8\x87\xbf\xbf\xbf|\xfc\x83\xbf\xbf\xbf\xbf!", 0}, + + {"overlong representation of the NUL character", + "\xc0\x80|\xe0\x80\x80|\xf0\x80\x80\x80|\xf8\x80\x80\x80\x80|\xfc\x80\x80\x80\x80\x80!", 0}, + + {"single UTF-16 surrogates", + "\xed\xa0\x80|\xed\xad\xbf|\xed\xae\x80|\xed\xaf\xbf|\xed\xb0\x80|\xed\xbe\x80|\xed\xbf\xbf!", 0}, + + {"paired UTF-16 surrogates", + "\xed\xa0\x80\xed\xb0\x80|\xed\xa0\x80\xed\xbf\xbf|\xed\xad\xbf\xed\xb0\x80|" + "\xed\xad\xbf\xed\xbf\xbf|\xed\xae\x80\xed\xb0\x80|\xed\xae\x80\xed\xbf\xbf|" + "\xed\xaf\xbf\xed\xb0\x80|\xed\xaf\xbf\xed\xbf\xbf!", 0}, + + {"other illegal code positions", "\xef\xbf\xbe|\xef\xbf\xbf!", 0}, + + {NULL, NULL, 0} +}; + +test_case boms[] = { + + /* {"title", "test!", lenth}, */ + + {"no bom (utf-8)", "Hi is \xd0\x9f\xd1\x80\xd0\xb8\xd0\xb2\xd0\xb5\xd1\x82!", 13}, + {"bom (utf-8)", "\xef\xbb\xbfHi is \xd0\x9f\xd1\x80\xd0\xb8\xd0\xb2\xd0\xb5\xd1\x82!", 13}, + {"bom (utf-16-le)", "\xff\xfeH\x00i\x00 \x00i\x00s\x00 \x00\x1f\x04@\x04""8\x04""2\x04""5\x04""B\x04!", 13}, + {"bom (utf-16-be)", "\xfe\xff\x00H\x00i\x00 \x00i\x00s\x00 \x04\x1f\x04@\x04""8\x04""2\x04""5\x04""B!", 13} }; +char *bom_original = "Hi is \xd0\x9f\xd1\x80\xd0\xb8\xd0\xb2\xd0\xb5\xd1\x82"; + int check_utf8_sequences(void) { yaml_parser_t *parser; @@ -124,7 +136,14 @@ int check_utf8_sequences(void) printf("(no error)\n"); } else if (parser->error == YAML_READER_ERROR) { - printf("(reader error: %s at %d)\n", parser->problem, parser->problem_offset); + if (parser->problem_value != -1) { + printf("(reader error: %s: #%X at %d)\n", + parser->problem, parser->problem_value, parser->problem_offset); + } + else { + printf("(reader error: %s at %d)\n", + parser->problem, parser->problem_offset); + } } if (*end == '!') break; start = ++end; @@ -136,9 +155,196 @@ int check_utf8_sequences(void) return failed; } +int check_boms(void) +{ + yaml_parser_t *parser; + int failed = 0; + int k; + printf("checking boms...\n"); + for (k = 0; boms[k].test; k++) { + char *title = boms[k].title; + int check = boms[k].result; + int result; + char *start = boms[k].test; + char *end = start; + while (*end != '!') end++; + printf("\t%s: ", title); + parser = yaml_parser_new(); + assert(parser); + yaml_parser_set_input_string(parser, (unsigned char *)start, end-start); + result = yaml_parser_update_buffer(parser, end-start); + if (!result) { + printf("- (reader error: %s at %d)\n", parser->problem, parser->problem_offset); + failed++; + } + else { + if (parser->unread != check) { + printf("- (length=%d while expected length=%d)\n", parser->unread, check); + failed++; + } + else if (memcmp(parser->buffer, bom_original, check) != 0) { + printf("- (value '%s' does not equal to the original value '%s')\n", parser->buffer, bom_original); + failed++; + } + else { + printf("+\n"); + } + } + yaml_parser_delete(parser); + } + printf("checking boms: %d fail(s)\n", failed); + return failed; +} + +#define LONG 100000 + +int check_long_utf8(void) +{ + yaml_parser_t *parser; + int k = 0; + int j; + int failed = 0; + unsigned char ch0, ch1; + unsigned char *buffer = malloc(3+LONG*2); + assert(buffer); + printf("checking a long utf8 sequence...\n"); + buffer[k++] = '\xef'; + buffer[k++] = '\xbb'; + buffer[k++] = '\xbf'; + for (j = 0; j < LONG; j ++) { + if (j % 2) { + buffer[k++] = '\xd0'; + buffer[k++] = '\x90'; + } + else { + buffer[k++] = '\xd0'; + buffer[k++] = '\xaf'; + } + } + parser = yaml_parser_new(); + assert(parser); + yaml_parser_set_input_string(parser, buffer, 3+LONG*2); + for (k = 0; k < LONG; k++) { + if (!parser->unread) { + if (!yaml_parser_update_buffer(parser, 1)) { + printf("\treader error: %s at %d\n", parser->problem, parser->problem_offset); + failed = 1; + break; + } + } + if (!parser->unread) { + printf("\tnot enough characters at %d\n", k); + failed = 1; + break; + } + if (k % 2) { + ch0 = '\xd0'; + ch1 = '\x90'; + } + else { + ch0 = '\xd0'; + ch1 = '\xaf'; + } + if (parser->pointer[0] != ch0 || parser->pointer[1] != ch1) { + printf("\tincorrect UTF-8 sequence: %X %X instead of %X %X\n", + (int)parser->pointer[0], (int)parser->pointer[1], + (int)ch0, (int)ch1); + failed = 1; + break; + } + parser->pointer += 2; + parser->unread -= 1; + } + if (!failed) { + if (!yaml_parser_update_buffer(parser, 1)) { + printf("\treader error: %s at %d\n", parser->problem, parser->problem_offset); + failed = 1; + } + else if (parser->pointer[0] != '\0') { + printf("\texpected NUL, found %X (eof=%d, unread=%d)\n", (int)parser->pointer[0], parser->eof, parser->unread); + failed = 1; + } + } + yaml_parser_delete(parser); + free(buffer); + printf("checking a long utf8 sequence: %d fail(s)\n", failed); + return failed; +} + +int check_long_utf16(void) +{ + yaml_parser_t *parser; + int k = 0; + int j; + int failed = 0; + unsigned char ch0, ch1; + unsigned char *buffer = malloc(2+LONG*2); + assert(buffer); + printf("checking a long utf16 sequence...\n"); + buffer[k++] = '\xff'; + buffer[k++] = '\xfe'; + for (j = 0; j < LONG; j ++) { + if (j % 2) { + buffer[k++] = '\x10'; + buffer[k++] = '\x04'; + } + else { + buffer[k++] = '/'; + buffer[k++] = '\x04'; + } + } + parser = yaml_parser_new(); + assert(parser); + yaml_parser_set_input_string(parser, buffer, 2+LONG*2); + for (k = 0; k < LONG; k++) { + if (!parser->unread) { + if (!yaml_parser_update_buffer(parser, 1)) { + printf("\treader error: %s at %d\n", parser->problem, parser->problem_offset); + failed = 1; + break; + } + } + if (!parser->unread) { + printf("\tnot enough characters at %d\n", k); + failed = 1; + break; + } + if (k % 2) { + ch0 = '\xd0'; + ch1 = '\x90'; + } + else { + ch0 = '\xd0'; + ch1 = '\xaf'; + } + if (parser->pointer[0] != ch0 || parser->pointer[1] != ch1) { + printf("\tincorrect UTF-8 sequence: %X %X instead of %X %X\n", + (int)parser->pointer[0], (int)parser->pointer[1], + (int)ch0, (int)ch1); + failed = 1; + break; + } + parser->pointer += 2; + parser->unread -= 1; + } + if (!failed) { + if (!yaml_parser_update_buffer(parser, 1)) { + printf("\treader error: %s at %d\n", parser->problem, parser->problem_offset); + failed = 1; + } + else if (parser->pointer[0] != '\0') { + printf("\texpected NUL, found %X (eof=%d, unread=%d)\n", (int)parser->pointer[0], parser->eof, parser->unread); + failed = 1; + } + } + yaml_parser_delete(parser); + free(buffer); + printf("checking a long utf16 sequence: %d fail(s)\n", failed); + return failed; +} int main(void) { - return check_utf8_sequences(); + return check_utf8_sequences() + check_boms() + check_long_utf8() + check_long_utf16(); } From ec7c4d2b0a9e5d22d08086e2a158923e3d5317ed Mon Sep 17 00:00:00 2001 From: Kirill Simonov Date: Fri, 2 Jun 2006 13:03:14 +0000 Subject: [PATCH 06/73] Add token constructors and destructors. Add YAML_DECLARE to the API declarations (Thanks to Peter Murphy for suggestion). --- doc/doxygen.cfg | 6 +- include/yaml/yaml.h | 358 +++++++++++++++++++++++++++++++++++++------- src/api.c | 222 +++++++++++++++++++++++++-- src/reader.c | 185 ++++++++++++----------- 4 files changed, 615 insertions(+), 156 deletions(-) diff --git a/doc/doxygen.cfg b/doc/doxygen.cfg index f7a9aef4..a58bb177 100644 --- a/doc/doxygen.cfg +++ b/doc/doxygen.cfg @@ -174,12 +174,12 @@ PERLMOD_MAKEVAR_PREFIX = # Configuration options related to the preprocessor #--------------------------------------------------------------------------- ENABLE_PREPROCESSING = YES -MACRO_EXPANSION = NO -EXPAND_ONLY_PREDEF = NO +MACRO_EXPANSION = YES +EXPAND_ONLY_PREDEF = YES SEARCH_INCLUDES = YES INCLUDE_PATH = INCLUDE_FILE_PATTERNS = -PREDEFINED = +PREDEFINED = "YAML_DECLARE(type)=type" EXPAND_AS_DEFINED = SKIP_FUNCTION_MACROS = YES #--------------------------------------------------------------------------- diff --git a/include/yaml/yaml.h b/include/yaml/yaml.h index cfdeacf5..6faf7df8 100644 --- a/include/yaml/yaml.h +++ b/include/yaml/yaml.h @@ -19,6 +19,27 @@ extern "C" { #include #include +/** + * @defgroup Export Definitions + * @{ + */ + +/** The public API declaration. */ + +#ifdef WIN32 +# if defined(YAML_DECLARE_STATIC) +# define YAML_DECLARE(type) type +# elif defined(YAML_DECLARE_EXPORT) +# define YAML_DECLARE(type) __declspec(dllexport) type +# else +# define YAML_DECLARE(type) __declspec(dllimport) type +# endif +#else +# define YAML_DECLARE(type) type +#endif + +/** @} */ + /** * @defgroup version Version Information * @{ @@ -32,7 +53,7 @@ extern "C" { * number, and @c Z is the patch version number. */ -const char * +YAML_DECLARE(const char *) yaml_get_version_string(void); /** @@ -43,7 +64,7 @@ yaml_get_version_string(void); * @param[out] patch Patch version number. */ -void +YAML_DECLARE(void) yaml_get_version(int *major, int *minor, int *patch); /** @} */ @@ -53,7 +74,7 @@ yaml_get_version(int *major, int *minor, int *patch); * @{ */ -/** The character type. */ +/** The character type (UTF-8 octet). */ typedef unsigned char yaml_char_t; /** The stream encoding. */ @@ -78,31 +99,63 @@ typedef enum { YAML_EMITTER_ERROR } yaml_error_type_t; +/** The pointer position. */ +typedef struct { + /** The position index. */ + size_t index; + + /** The position line. */ + size_t line; + + /** The position column. */ + size_t column; +} yaml_mark_t; + /** @} */ -/* +/** + * @defgroup Node Styles + * @{ + */ +/** Scalar styles. */ typedef enum { YAML_ANY_SCALAR_STYLE, + YAML_PLAIN_SCALAR_STYLE, + YAML_SINGLE_QUOTED_SCALAR_STYLE, YAML_DOUBLE_QUOTED_SCALAR_STYLE, + YAML_LITERAL_SCALAR_STYLE, YAML_FOLDED_SCALAR_STYLE } yaml_scalar_style_t; + +/** Sequence styles. */ typedef enum { YAML_ANY_SEQUENCE_STYLE, + YAML_BLOCK_SEQUENCE_STYLE, YAML_FLOW_SEQUENCE_STYLE } yaml_sequence_style_t; +/** Mapping styles. */ typedef enum { YAML_ANY_MAPPING_STYLE, + YAML_BLOCK_MAPPING_STYLE, YAML_FLOW_MAPPING_STYLE } yaml_mapping_style_t; +/** @} */ + +/** + * @defgroup Tokens + * @{ + */ + +/** Token types. */ typedef enum { YAML_STREAM_START_TOKEN, YAML_STREAM_END_TOKEN, @@ -132,62 +185,259 @@ typedef enum { YAML_SCALAR_TOKEN } yaml_token_type_t; -typedef enum { - YAML_STREAM_START_EVENT, - YAML_STREAM_END_EVENT, +/** The token structure. */ +typedef struct { - YAML_DOCUMENT_START_EVENT, - YAML_DOCUMENT_END_EVENT, + /** The token type. */ + yaml_token_type_t type; - YAML_ALIAS_EVENT, - YAML_SCALAR_EVENT, + /** The token data. */ + union { - YAML_SEQUENCE_START_EVENT, - YAML_SEQUENCE_END_EVENT, + /** The stream encoding (for @c YAML_STREAM_START_TOKEN). */ + yaml_encoding_t encoding; - YAML_MAPPING_START_EVENT, - YAML_MAPPING_END_EVENT -} yaml_event_type_t; + /** The anchor (for @c YAML_ALIAS_TOKEN and @c YAML_ANCHOR_TOKEN). */ + yaml_char_t *anchor; -typedef struct { - size_t offset; - size_t index; - size_t line; - size_t column; -} yaml_mark_t; + /** The tag (for @c YAML_TAG_TOKEN). */ + struct { + /** The tag handle. */ + yaml_char_t *handle; -typedef struct { - yaml_error_type_t type; - char *context; - yaml_mark_t context_mark; - char *problem; - yaml_mark_t problem_mark; -} yaml_error_t; + /** The tag suffix. */ + yaml_char_t *suffix; + } tag; -typedef struct { - yaml_token_type_t type; - union { - yaml_encoding_t encoding; - char *anchor; - char *tag; + /** The scalar value (for @c YAML_SCALAR_TOKEN). */ struct { - char *value; + + /** The scalar value. */ + yaml_char_t *value; + + /** The length of the scalar value. */ size_t length; + + /** The scalar style. */ yaml_scalar_style_t style; } scalar; + + /** The version directive (for @c YAML_VERSION_DIRECTIVE_TOKEN). */ struct { + /** The major version number. */ int major; + + /** The minor version number. */ int minor; - } version; + } version_directive; + + /** The tag directive (for @c YAML_TAG_DIRECTIVE_TOKEN). */ struct { - char *handle; - char *prefix; - } tag_pair; + /** The tag handle. */ + yaml_char_t *handle; + + /** The tag prefix. */ + yaml_char_t *prefix; + } tag_directive; } data; + + /** The beginning of the token. */ yaml_mark_t start_mark; + + /** The end of the token. */ yaml_mark_t end_mark; + } yaml_token_t; +/** + * Create a new token without assigning any data. + * + * This function can be used for constructing indicator tokens: + * @c YAML_DOCUMENT_START, @c YAML_DOCUMENT_END, + * @c YAML_BLOCK_SEQUENCE_START_TOKEN, @c YAML_BLOCK_MAPPING_START_TOKEN, + * @c YAML_BLOCK_END_TOKEN, + * @c YAML_FLOW_SEQUENCE_START_TOKEN, @c YAML_FLOW_SEQUENCE_END_TOKEN, + * @c YAML_FLOW_MAPPING_START_TOKEN, @c YAML_FLOW_MAPPING_END_TOKEN, + * @c YAML_BLOCK_ENTRY_TOKEN, @c YAML_FLOW_ENTRY_TOKEN, + * @c YAML_KEY_TOKEN, @c YAML_VALUE_TOKEN. + * + * @param[in] type The token type. + * @param[in] start_mark The beginning of the token. + * @param[in] end_mark The end of the token. + * + * @returns A new token object, or @c NULL on error. + */ + +YAML_DECLARE(yaml_token_t *) +yaml_token_new(yaml_token_type_t type, + yaml_mark_t start_mark, yaml_mark_t end_mark); + +/** + * Create a new @c YAML_STREAM_START_TOKEN token with the specified encoding. + * + * @param[in] encoding The stream encoding. + * @param[in] start_mark The beginning of the token. + * @param[in] end_mark The end of the token. + * + * @returns A new token object, or @c NULL on error. + */ + +YAML_DECLARE(yaml_token_t *) +yaml_stream_start_token_new(yaml_encoding_t encoding, + yaml_mark_t start_mark, yaml_mark_t end_mark); + +/** + * Create a new @c YAML_STREAM_END_TOKEN token. + * + * @param[in] start_mark The beginning of the token. + * @param[in] end_mark The end of the token. + * + * @returns A new token object, or @c NULL on error. + */ + +YAML_DECLARE(yaml_token_t *) +yaml_stream_end_token_new(yaml_mark_t start_mark, yaml_mark_t end_mark); + +/** + * Create a new @c YAML_VERSION_DIRECTIVE_TOKEN token with the specified + * version numbers. + * + * @param[in] major The major version number. + * @param[in] minor The minor version number. + * @param[in] start_mark The beginning of the token. + * @param[in] end_mark The end of the token. + * + * @returns A new token object, or @c NULL on error. + */ + +YAML_DECLARE(yaml_token_t *) +yaml_version_directive_token_new(int major, int minor, + yaml_mark_t start_mark, yaml_mark_t end_mark); + +/** + * Create a new @c YAML_TAG_DIRECTIVE_TOKEN token with the specified tag + * handle and prefix. + * + * Note that the @a handle and the @a prefix pointers will be freed by + * the token descructor. + * + * @param[in] handle The tag handle. + * @param[in] prefix The tag prefix. + * @param[in] start_mark The beginning of the token. + * @param[in] end_mark The end of the token. + * + * @returns A new token object, or @c NULL on error. + */ + +YAML_DECLARE(yaml_token_t *) +yaml_tag_directive_token_new(yaml_char_t *handle, yaml_char_t *prefix, + yaml_mark_t start_mark, yaml_mark_t end_mark); + +/** + * Create a new @c YAML_ALIAS_TOKEN token with the specified anchor. + * + * Note that the @a anchor pointer will be freed by the token descructor. + * + * @param[in] anchor The anchor. + * @param[in] start_mark The beginning of the token. + * @param[in] end_mark The end of the token. + * + * @returns A new token object, or @c NULL on error. + */ + +YAML_DECLARE(yaml_token_t *) +yaml_alias_token_new(yaml_char_t *anchor, + yaml_mark_t start_mark, yaml_mark_t end_mark); + +/** + * Create a new @c YAML_ANCHOR_TOKEN token with the specified anchor. + * + * Note that the @a anchor pointer will be freed by the token descructor. + * + * @param[in] anchor The anchor. + * @param[in] start_mark The beginning of the token. + * @param[in] end_mark The end of the token. + * + * @returns A new token object, or @c NULL on error. + */ + +YAML_DECLARE(yaml_token_t *) +yaml_anchor_token_new(yaml_char_t *anchor, + yaml_mark_t start_mark, yaml_mark_t end_mark); + +/** + * Create a new @c YAML_TAG_TOKEN token with the specified tag handle and + * suffix. + * + * Note that the @a handle and the @a suffix pointers will be freed by + * the token descructor. + * + * @param[in] handle The tag handle. + * @param[in] suffix The tag suffix. + * @param[in] start_mark The beginning of the token. + * @param[in] end_mark The end of the token. + * + * @returns A new token object, or @c NULL on error. + */ + +YAML_DECLARE(yaml_token_t *) +yaml_tag_token_new(yaml_char_t *handle, yaml_char_t *suffix, + yaml_mark_t start_mark, yaml_mark_t end_mark); + +/** + * Create a new @c YAML_SCALAR_TOKEN token with the specified scalar value, + * length, and style. + * + * Note that the scalar value may contain the @c NUL character, therefore + * the value length is also required. The scalar value always ends with + * @c NUL. + * + * Note that the @a value pointer will be freed by the token descructor. + * + * @param[in] value The scalar value. + * @param[in] length The value length. + * @param[in] style The scalar style. + * @param[in] start_mark The beginning of the token. + * @param[in] end_mark The end of the token. + * + * @returns A new token object, or @c NULL on error. + */ + +YAML_DECLARE(yaml_token_t *) +yaml_scalar_token_new(yaml_char_t *value, size_t length, + yaml_scalar_style_t style, + yaml_mark_t start_mark, yaml_mark_t end_mark); + +/** + * Destroy a token object. + * + * @param[in] token A token object. + */ + +YAML_DECLARE(void) +yaml_token_delete(yaml_token_t *token); + +/** @} */ + +/* + +typedef enum { + YAML_STREAM_START_EVENT, + YAML_STREAM_END_EVENT, + + YAML_DOCUMENT_START_EVENT, + YAML_DOCUMENT_END_EVENT, + + YAML_ALIAS_EVENT, + YAML_SCALAR_EVENT, + + YAML_SEQUENCE_START_EVENT, + YAML_SEQUENCE_END_EVENT, + + YAML_MAPPING_START_EVENT, + YAML_MAPPING_END_EVENT +} yaml_event_type_t; + typedef struct { yaml_event_type_t type; union { @@ -272,8 +522,13 @@ typedef int yaml_read_handler_t(void *data, unsigned char *buffer, size_t size, */ typedef struct { + /** The string start pointer. */ unsigned char *start; + + /** The string end pointer. */ unsigned char *end; + + /** The string current position. */ unsigned char *current; } yaml_string_input_t; @@ -375,7 +630,7 @@ typedef struct { * @returns A new parser object; @c NULL on error. */ -yaml_parser_t * +YAML_DECLARE(yaml_parser_t *) yaml_parser_new(void); /** @@ -384,7 +639,7 @@ yaml_parser_new(void); * @param[in] parser A parser object. */ -void +YAML_DECLARE(void) yaml_parser_delete(yaml_parser_t *parser); /** @@ -396,10 +651,10 @@ yaml_parser_delete(yaml_parser_t *parser); * * @param[in] parser A parser object. * @param[in] input A source data. - * @param[in] length The length of the source data in bytes. + * @param[in] size The length of the source data in bytes. */ -void +YAML_DECLARE(void) yaml_parser_set_input_string(yaml_parser_t *parser, unsigned char *input, size_t size); @@ -414,7 +669,7 @@ yaml_parser_set_input_string(yaml_parser_t *parser, * @param[in] file An open file. */ -void +YAML_DECLARE(void) yaml_parser_set_input_file(yaml_parser_t *parser, FILE *file); /** @@ -425,17 +680,18 @@ yaml_parser_set_input_file(yaml_parser_t *parser, FILE *file); * @param[in] data Any application data for passing to the read handler. */ -void +YAML_DECLARE(void) yaml_parser_set_input(yaml_parser_t *parser, yaml_read_handler_t *handler, void *data); /** * Set the source encoding. * + * @param[in] parser A parser object. * @param[in] encoding The source encoding. */ -void +YAML_DECLARE(void) yaml_parser_set_encoding(yaml_parser_t *parser, yaml_encoding_t encoding); /** @} */ @@ -459,7 +715,7 @@ typedef struct { * or @c NULL if it failed. */ -void * +YAML_DECLARE(void *) yaml_malloc(size_t size); /** @@ -473,7 +729,7 @@ yaml_malloc(size_t size); * or @c NULL if it failed. */ -void * +YAML_DECLARE(void *) yaml_realloc(void *ptr, size_t size); /** @@ -483,7 +739,7 @@ yaml_realloc(void *ptr, size_t size); * valid. */ -void +YAML_DECLARE(void) yaml_free(void *ptr); /** The size of the raw buffer. */ @@ -507,7 +763,7 @@ yaml_free(void *ptr); * @returns @c 1 on success, @c 0 on error. */ -int +YAML_DECLARE(int) yaml_parser_update_buffer(yaml_parser_t *parser, size_t length); /** @} */ diff --git a/src/api.c b/src/api.c index aa183afb..50f118a2 100644 --- a/src/api.c +++ b/src/api.c @@ -11,7 +11,7 @@ * Allocate a dynamic memory block. */ -void * +YAML_DECLARE(void *) yaml_malloc(size_t size) { return malloc(size ? size : 1); @@ -21,7 +21,7 @@ yaml_malloc(size_t size) * Reallocate a dynamic memory block. */ -void * +YAML_DECLARE(void *) yaml_realloc(void *ptr, size_t size) { return ptr ? realloc(ptr, size ? size : 1) : malloc(size ? size : 1); @@ -31,7 +31,7 @@ yaml_realloc(void *ptr, size_t size) * Free a dynamic memory block. */ -void +YAML_DECLARE(void) yaml_free(void *ptr) { if (ptr) free(ptr); @@ -41,7 +41,7 @@ yaml_free(void *ptr) * Create a new parser object. */ -yaml_parser_t * +YAML_DECLARE(yaml_parser_t *) yaml_parser_new(void) { yaml_parser_t *parser; @@ -82,7 +82,7 @@ yaml_parser_new(void) * Destroy a parser object. */ -void +YAML_DECLARE(void) yaml_parser_delete(yaml_parser_t *parser) { assert(parser); /* Non-NULL parser object expected. */ @@ -136,7 +136,7 @@ yaml_file_read_handler(void *data, unsigned char *buffer, size_t size, * Set a string input. */ -void +YAML_DECLARE(void) yaml_parser_set_input_string(yaml_parser_t *parser, unsigned char *input, size_t size) { @@ -156,7 +156,7 @@ yaml_parser_set_input_string(yaml_parser_t *parser, * Set a file input. */ -void +YAML_DECLARE(void) yaml_parser_set_input_file(yaml_parser_t *parser, FILE *file) { assert(parser); /* Non-NULL parser object expected. */ @@ -171,7 +171,7 @@ yaml_parser_set_input_file(yaml_parser_t *parser, FILE *file) * Set a generic input. */ -void +YAML_DECLARE(void) yaml_parser_set_input(yaml_parser_t *parser, yaml_read_handler_t *handler, void *data) { @@ -187,7 +187,7 @@ yaml_parser_set_input(yaml_parser_t *parser, * Set the source encoding. */ -void +YAML_DECLARE(void) yaml_parser_set_encoding(yaml_parser_t *parser, yaml_encoding_t encoding) { assert(parser); /* Non-NULL parser object expected. */ @@ -196,3 +196,207 @@ yaml_parser_set_encoding(yaml_parser_t *parser, yaml_encoding_t encoding) parser->encoding = encoding; } +/* + * Create a token. + */ + +YAML_DECLARE(yaml_token_t *) +yaml_token_new(yaml_token_type_t type, + yaml_mark_t start_mark, yaml_mark_t end_mark) +{ + yaml_token_t *token = yaml_malloc(sizeof(yaml_token_t)); + + if (!token) return NULL; + + memset(token, 0, sizeof(yaml_token_t)); + + token->type = type; + token->start_mark = start_mark; + token->end_mark = end_mark; + + return token; +} + +/* + * Create a STREAM-START token. + */ + +YAML_DECLARE(yaml_token_t *) +yaml_stream_start_token(yaml_encoding_t encoding, + yaml_mark_t start_mark, yaml_mark_t end_mark) +{ + yaml_token_t *token = yaml_token_new(YAML_STREAM_START_TOKEN, + start_mark, end_mark); + + if (!token) return NULL; + + token->data.encoding = encoding; + + return token; +} + +/* + * Create a STREAM-END token. + */ + +YAML_DECLARE(yaml_token_t *) +yaml_stream_end_token(yaml_mark_t start_mark, yaml_mark_t end_mark) +{ + yaml_token_t *token = yaml_token_new(YAML_STREAM_END_TOKEN, + start_mark, end_mark); + + if (!token) return NULL; + + return token; +} + +/* + * Create a VERSION-DIRECTIVE token. + */ + +YAML_DECLARE(yaml_token_t *) +yaml_version_directive_token_new(int major, int minor, + yaml_mark_t start_mark, yaml_mark_t end_mark) +{ + yaml_token_t *token = yaml_token_new(YAML_VERSION_DIRECTIVE_TOKEN, + start_mark, end_mark); + + if (!token) return NULL; + + token->data.version_directive.major = major; + token->data.version_directive.minor = minor; + + return token; +} + +/* + * Create a TAG-DIRECTIVE token. + */ + +YAML_DECLARE(yaml_token_t *) +yaml_tag_directive_token_new(yaml_char_t *handle, yaml_char_t *prefix, + yaml_mark_t start_mark, yaml_mark_t end_mark) +{ + yaml_token_t *token = yaml_token_new(YAML_TAG_DIRECTIVE_TOKEN, + start_mark, end_mark); + + if (!token) return NULL; + + token->data.tag_directive.handle = handle; + token->data.tag_directive.prefix = prefix; + + return token; +} + +/* + * Create an ALIAS token. + */ + +YAML_DECLARE(yaml_token_t *) +yaml_alias_token_new(yaml_char_t *anchor, + yaml_mark_t start_mark, yaml_mark_t end_mark) +{ + yaml_token_t *token = yaml_token_new(YAML_ALIAS_TOKEN, + start_mark, end_mark); + + if (!token) return NULL; + + token->data.anchor = anchor; + + return token; +} + +/* + * Create an ANCHOR token. + */ + +YAML_DECLARE(yaml_token_t *) +yaml_anchor_token_new(yaml_char_t *anchor, + yaml_mark_t start_mark, yaml_mark_t end_mark) +{ + yaml_token_t *token = yaml_token_new(YAML_ANCHOR_TOKEN, + start_mark, end_mark); + + if (!token) return NULL; + + token->data.anchor = anchor; + + return token; +} + +/* + * Create a TAG token. + */ + +YAML_DECLARE(yaml_token_t *) +yaml_tag_token_new(yaml_char_t *handle, yaml_char_t *suffix, + yaml_mark_t start_mark, yaml_mark_t end_mark) +{ + yaml_token_t *token = yaml_token_new(YAML_TAG_TOKEN, + start_mark, end_mark); + + if (!token) return NULL; + + token->data.tag.handle = handle; + token->data.tag.suffix = suffix; + + return token; +} + +/* + * Create a SCALAR token. + */ + +YAML_DECLARE(yaml_token_t *) +yaml_scalar_token_new(yaml_char_t *value, size_t length, + yaml_scalar_style_t style, + yaml_mark_t start_mark, yaml_mark_t end_mark) +{ + yaml_token_t *token = yaml_token_new(YAML_SCALAR_TOKEN, + start_mark, end_mark); + + if (!token) return NULL; + + token->data.scalar.value = value; + token->data.scalar.length = length; + token->data.scalar.style = style; + + return token; +} + +/* + * Destroy a token object. + */ + +YAML_DECLARE(void) +yaml_token_delete(yaml_token_t *token) +{ + assert(token); /* Non-NULL token object expected. */ + + switch (token->type) + { + case YAML_TAG_DIRECTIVE_TOKEN: + yaml_free(token->data.tag_directive.handle); + yaml_free(token->data.tag_directive.prefix); + break; + + case YAML_ALIAS_TOKEN: + case YAML_ANCHOR_TOKEN: + yaml_free(token->data.anchor); + break; + + case YAML_TAG_TOKEN: + yaml_free(token->data.tag.handle); + yaml_free(token->data.tag.suffix); + break; + + case YAML_SCALAR_TOKEN: + yaml_free(token->data.scalar.value); + break; + } + + memset(token, 0, sizeof(yaml_token_t)); + + yaml_free(token); +} + diff --git a/src/reader.c b/src/reader.c index ac11323c..e4e6f1a9 100644 --- a/src/reader.c +++ b/src/reader.c @@ -11,7 +11,7 @@ * Set the reader error and return 0. */ -int +static int yaml_parser_set_reader_error(yaml_parser_t *parser, const char *problem, size_t offset, int value) { @@ -23,6 +23,96 @@ yaml_parser_set_reader_error(yaml_parser_t *parser, const char *problem, return 0; } +/* + * Update the raw buffer. + */ + +static int +yaml_parser_update_raw_buffer(yaml_parser_t *parser) +{ + size_t size_read = 0; + + /* Return if the raw buffer is full. */ + + if (parser->raw_unread == YAML_RAW_BUFFER_SIZE) return 1; + + /* Return on EOF. */ + + if (parser->eof) return 1; + + /* Move the remaining bytes in the raw buffer to the beginning. */ + + if (parser->raw_unread && parser->raw_buffer < parser->raw_pointer) { + memmove(parser->raw_buffer, parser->raw_pointer, parser->raw_unread); + } + parser->raw_pointer = parser->raw_buffer; + + /* Call the read handler to fill the buffer. */ + + if (!parser->read_handler(parser->read_handler_data, + parser->raw_buffer + parser->raw_unread, + YAML_RAW_BUFFER_SIZE - parser->raw_unread, + &size_read)) { + return yaml_parser_set_reader_error(parser, "Input error", + parser->offset, -1); + } + parser->raw_unread += size_read; + if (!size_read) { + parser->eof = 1; + } + + return 1; +} + +/* + * Determine the input stream encoding by checking the BOM symbol. If no BOM is + * found, the UTF-8 encoding is assumed. Return 1 on success, 0 on failure. + */ + +#define BOM_UTF8 "\xef\xbb\xbf" +#define BOM_UTF16LE "\xff\xfe" +#define BOM_UTF16BE "\xfe\xff" + +static int +yaml_parser_determine_encoding(yaml_parser_t *parser) +{ + /* Ensure that we had enough bytes in the raw buffer. */ + + while (!parser->eof && parser->raw_unread < 3) { + if (!yaml_parser_update_raw_buffer(parser)) { + return 0; + } + } + + /* Determine the encoding. */ + + if (parser->raw_unread >= 2 + && !memcmp(parser->raw_pointer, BOM_UTF16LE, 2)) { + parser->encoding = YAML_UTF16LE_ENCODING; + parser->raw_pointer += 2; + parser->raw_unread -= 2; + parser->offset += 2; + } + else if (parser->raw_unread >= 2 + && !memcmp(parser->raw_pointer, BOM_UTF16BE, 2)) { + parser->encoding = YAML_UTF16BE_ENCODING; + parser->raw_pointer += 2; + parser->raw_unread -= 2; + parser->offset += 2; + } + else if (parser->raw_unread >= 3 + && !memcmp(parser->raw_pointer, BOM_UTF8, 3)) { + parser->encoding = YAML_UTF8_ENCODING; + parser->raw_pointer += 3; + parser->raw_unread -= 3; + parser->offset += 3; + } + else { + parser->encoding = YAML_UTF8_ENCODING; + } + + return 1; +} /* * Ensure that the buffer contains at least length characters. @@ -31,7 +121,7 @@ yaml_parser_set_reader_error(yaml_parser_t *parser, const char *problem, * The length is supposed to be significantly less that the buffer size. */ -int +YAML_DECLARE(int) yaml_parser_update_buffer(yaml_parser_t *parser, size_t length) { /* If the EOF flag is set and the raw buffer is empty, do nothing. */ @@ -345,94 +435,3 @@ yaml_parser_update_buffer(yaml_parser_t *parser, size_t length) return 1; } -/* - * Determine the input stream encoding by checking the BOM symbol. If no BOM is - * found, the UTF-8 encoding is assumed. Return 1 on success, 0 on failure. - */ - -#define BOM_UTF8 "\xef\xbb\xbf" -#define BOM_UTF16LE "\xff\xfe" -#define BOM_UTF16BE "\xfe\xff" - -int -yaml_parser_determine_encoding(yaml_parser_t *parser) -{ - /* Ensure that we had enough bytes in the raw buffer. */ - - while (!parser->eof && parser->raw_unread < 3) { - if (!yaml_parser_update_raw_buffer(parser)) { - return 0; - } - } - - /* Determine the encoding. */ - - if (parser->raw_unread >= 2 - && !memcmp(parser->raw_pointer, BOM_UTF16LE, 2)) { - parser->encoding = YAML_UTF16LE_ENCODING; - parser->raw_pointer += 2; - parser->raw_unread -= 2; - parser->offset += 2; - } - else if (parser->raw_unread >= 2 - && !memcmp(parser->raw_pointer, BOM_UTF16BE, 2)) { - parser->encoding = YAML_UTF16BE_ENCODING; - parser->raw_pointer += 2; - parser->raw_unread -= 2; - parser->offset += 2; - } - else if (parser->raw_unread >= 3 - && !memcmp(parser->raw_pointer, BOM_UTF8, 3)) { - parser->encoding = YAML_UTF8_ENCODING; - parser->raw_pointer += 3; - parser->raw_unread -= 3; - parser->offset += 3; - } - else { - parser->encoding = YAML_UTF8_ENCODING; - } - - return 1; -} - -/* - * Update the raw buffer. - */ - -int -yaml_parser_update_raw_buffer(yaml_parser_t *parser) -{ - size_t size_read = 0; - - /* Return if the raw buffer is full. */ - - if (parser->raw_unread == YAML_RAW_BUFFER_SIZE) return 1; - - /* Return on EOF. */ - - if (parser->eof) return 1; - - /* Move the remaining bytes in the raw buffer to the beginning. */ - - if (parser->raw_unread && parser->raw_buffer < parser->raw_pointer) { - memmove(parser->raw_buffer, parser->raw_pointer, parser->raw_unread); - } - parser->raw_pointer = parser->raw_buffer; - - /* Call the read handler to fill the buffer. */ - - if (!parser->read_handler(parser->read_handler_data, - parser->raw_buffer + parser->raw_unread, - YAML_RAW_BUFFER_SIZE - parser->raw_unread, - &size_read)) { - return yaml_parser_set_reader_error(parser, "Input error", - parser->offset, -1); - } - parser->raw_unread += size_read; - if (!size_read) { - parser->eof = 1; - } - - return 1; -} - From 460af32c1e15c968bff672c74167ca5b8a2302ab Mon Sep 17 00:00:00 2001 From: Kirill Simonov Date: Tue, 6 Jun 2006 19:43:34 +0000 Subject: [PATCH 07/73] Add scanner definitions. Add a basic description of all tokens produced by the scanner. --- include/yaml/yaml.h | 117 +++++++- src/Makefile.am | 2 +- src/scanner.c | 667 ++++++++++++++++++++++++++++++++++++++++++++ 3 files changed, 782 insertions(+), 4 deletions(-) create mode 100644 src/scanner.c diff --git a/include/yaml/yaml.h b/include/yaml/yaml.h index 6faf7df8..7ee6b28c 100644 --- a/include/yaml/yaml.h +++ b/include/yaml/yaml.h @@ -20,7 +20,7 @@ extern "C" { #include /** - * @defgroup Export Definitions + * @defgroup export Export Definitions * @{ */ @@ -114,7 +114,7 @@ typedef struct { /** @} */ /** - * @defgroup Node Styles + * @defgroup styles Node Styles * @{ */ @@ -151,7 +151,7 @@ typedef enum { /** @} */ /** - * @defgroup Tokens + * @defgroup tokens Tokens * @{ */ @@ -532,6 +532,33 @@ typedef struct { unsigned char *current; } yaml_string_input_t; +/** + * This structure holds information about a potential simple key. + */ + +typedef struct { + /** Is a simple key possible? */ + int possible; + + /** Is a simple key required? */ + int required; + + /** The number of the token. */ + size_t token_number; + + /** The position index. */ + size_t index; + + /** The position line. */ + size_t line; + + /** The position column. */ + size_t column; + + /** The position mark. */ + yaml_mark_t mark; +} yaml_simple_key_t; + /** * The parser structure. * @@ -619,6 +646,60 @@ typedef struct { * @} */ + /** + * @name Scanner stuff + * @{ + */ + + /** Have we started to scan the input stream? */ + int stream_start_produced; + + /** Have we reached the end of the input stream? */ + int stream_end_produced; + + /** The number of unclosed '[' and '{' indicators. */ + int flow_level; + + /** The tokens queue, which contains the current produced tokens. */ + yaml_token_t *tokens; + + /** The size of the tokens queue. */ + size_t tokens_size; + + /** The head of the tokens queue. */ + size_t tokens_head; + + /** The tail of the tokens queue. */ + size_t tokens_tail; + + /** The number of tokens fetched from the tokens queue. */ + size_t tokens_parsed; + + /** The stack of indentation levels. */ + int *indents; + + /** The size of the indents stack. */ + size_t indents_size; + + /** The number of items in the indents stack. */ + size_t indents_length; + + /** The current indentation level. */ + int indent; + + /** May a simple key occur at the current position? */ + int simple_key_allowed; + + /** The stack of potential simple keys. */ + yaml_simple_key_t *simple_keys; + + /** The size of the simple keys stack. */ + size_t simple_keys_size; + + /** + * @} + */ + } yaml_parser_t; /** @@ -694,6 +775,36 @@ yaml_parser_set_input(yaml_parser_t *parser, YAML_DECLARE(void) yaml_parser_set_encoding(yaml_parser_t *parser, yaml_encoding_t encoding); +/** + * Get the next token. + * + * The token is removed from the internal token queue and the application is + * responsible for destroing the token object. + * + * @param[in] parser A parser object. + * + * @returns A token object, or @c NULL on error. + */ + +YAML_DECLARE(yaml_token_t *) +yaml_parser_get_token(yaml_parser_t *parser); + +/** + * Peek the next token. + * + * The token is not removed from the internal token queue and will be returned + * again on a subsequent call of @c yaml_parser_get_token or + * @c yaml_parser_peek_token. The application should not destroy the token + * object. + * + * @param[in] parser A parser object. + * + * @returns A token object, or @c NULL on error. + */ + +YAML_DECLARE(yaml_token_t *) +yaml_parser_peek_token(yaml_parser_t *parser); + /** @} */ /* diff --git a/src/Makefile.am b/src/Makefile.am index 6816d814..d30f3dd1 100644 --- a/src/Makefile.am +++ b/src/Makefile.am @@ -1,4 +1,4 @@ AM_CPPFLAGS = -I$(top_srcdir)/include lib_LTLIBRARIES = libyaml.la -libyaml_la_SOURCES = version.c api.c reader.c +libyaml_la_SOURCES = version.c api.c reader.c scanner.c libyaml_la_LDFLAGS = -release $(YAML_LT_RELEASE) -version-info $(YAML_LT_CURRENT):$(YAML_LT_REVISION):$(YAML_LT_AGE) diff --git a/src/scanner.c b/src/scanner.c new file mode 100644 index 00000000..530bd91e --- /dev/null +++ b/src/scanner.c @@ -0,0 +1,667 @@ + +/* + * Introduction + * ************ + * + * The following notes assume that you are familiar with the YAML specification + * (http://yaml.org/spec/cvs/current.html). We mostly follow it, although in + * some cases we are less restrictive that it requires. + * + * The process of transforming a YAML stream into a sequence of events is + * divided on two steps: Scanning and Parsing. + * + * The Scanner transforms the input stream into a sequence of tokens, while the + * parser transform the sequence of tokens produced by the Scanner into a + * sequence of parsing events. + * + * The Scanner is rather clever and complicated. The Parser, on the contrary, + * is a straightforward implementation of a recursive-descendant parser (or, + * LL(1) parser, as it is usually called). + * + * Actually there are two issues of Scanning that might be called "clever", the + * rest is quite straightforward. The issues are "block collection start" and + * "simple keys". Both issues are explained below in details. + * + * Here the Scanning step is explained and implemented. We start with the list + * of all the tokens produced by the Scanner together with short descriptions. + * + * Now, tokens: + * + * STREAM-START(encoding) # The stream start. + * STREAM-END # The stream end. + * VERSION-DIRECTIVE(major,minor) # The '%YAML' directive. + * TAG-DIRECTIVE(handle,prefix) # The '%TAG' directive. + * DOCUMENT-START # '---' + * DOCUMENT-END # '...' + * BLOCK-SEQUENCE-START # Indentation increase denoting a block + * BLOCK-MAPPING-START # sequence or a block mapping. + * BLOCK-END # Indentation decrease. + * FLOW-SEQUENCE-START # '[' + * FLOW-SEQUENCE-END # ']' + * BLOCK-SEQUENCE-START # '{' + * BLOCK-SEQUENCE-END # '}' + * BLOCK-ENTRY # '-' + * FLOW-ENTRY # ',' + * KEY # '?' or nothing (simple keys). + * VALUE # ':' + * ALIAS(anchor) # '*anchor' + * ANCHOR(anchor) # '&anchor' + * TAG(handle,suffix) # '!handle!suffix' + * SCALAR(value,style) # A scalar. + * + * The following two tokens are "virtual" tokens denoting the beginning and the + * end of the stream: + * + * STREAM-START(encoding) + * STREAM-END + * + * We pass the information about the input stream encoding with the + * STREAM-START token. + * + * The next two tokens are responsible for tags: + * + * VERSION-DIRECTIVE(major,minor) + * TAG-DIRECTIVE(handle,prefix) + * + * Example: + * + * %YAML 1.1 + * %TAG ! !foo + * %TAG !yaml! tag:yaml.org,2002: + * --- + * + * The correspoding sequence of tokens: + * + * STREAM-START(utf-8) + * VERSION-DIRECTIVE(1,1) + * TAG-DIRECTIVE("!","!foo") + * TAG-DIRECTIVE("!yaml","tag:yaml.org,2002:") + * DOCUMENT-START + * STREAM-END + * + * Note that the VERSION-DIRECTIVE and TAG-DIRECTIVE tokens occupy a whole + * line. + * + * The document start and end indicators are represented by: + * + * DOCUMENT-START + * DOCUMENT-END + * + * Note that if a YAML stream contains an implicit document (without '---' + * and '...' indicators), no DOCUMENT-START and DOCUMENT-END tokens will be + * produced. + * + * In the following examples, we present whole documents together with the + * produced tokens. + * + * 1. An implicit document: + * + * 'a scalar' + * + * Tokens: + * + * STREAM-START(utf-8) + * SCALAR("a scalar",single-quoted) + * STREAM-END + * + * 2. An explicit document: + * + * --- + * 'a scalar' + * ... + * + * Tokens: + * + * STREAM-START(utf-8) + * DOCUMENT-START + * SCALAR("a scalar",single-quoted) + * DOCUMENT-END + * STREAM-END + * + * 3. Several documents in a stream: + * + * 'a scalar' + * --- + * 'another scalar' + * --- + * 'yet another scalar' + * + * Tokens: + * + * STREAM-START(utf-8) + * SCALAR("a scalar",single-quoted) + * DOCUMENT-START + * SCALAR("another scalar",single-quoted) + * DOCUMENT-START + * SCALAR("yet another scalar",single-quoted) + * STREAM-END + * + * We have already introduced the SCALAR token above. The following tokens are + * used to describe aliases, anchors, tag, and scalars: + * + * ALIAS(anchor) + * ANCHOR(anchor) + * TAG(handle,suffix) + * SCALAR(value,style) + * + * The following series of examples illustrate the usage of these tokens: + * + * 1. A recursive sequence: + * + * &A [ *A ] + * + * Tokens: + * + * STREAM-START(utf-8) + * ANCHOR("A") + * FLOW-SEQUENCE-START + * ALIAS("A") + * FLOW-SEQUENCE-END + * STREAM-END + * + * 2. A tagged scalar: + * + * !!float "3.14" # A good approximation. + * + * Tokens: + * + * STREAM-START(utf-8) + * TAG("!!","float") + * SCALAR("3.14",double-quoted) + * STREAM-END + * + * 3. Various scalar styles: + * + * --- # Implicit empty plain scalars do not produce tokens. + * --- a plain scalar + * --- 'a single-quoted scalar' + * --- "a double-quoted scalar" + * --- |- + * a literal scalar + * --- >- + * a folded + * scalar + * + * Tokens: + * + * STREAM-START(utf-8) + * DOCUMENT-START + * DOCUMENT-START + * SCALAR("a plain scalar",plain) + * DOCUMENT-START + * SCALAR("a single-quoted scalar",single-quoted) + * DOCUMENT-START + * SCALAR("a double-quoted scalar",double-quoted) + * DOCUMENT-START + * SCALAR("a literal scalar",literal) + * DOCUMENT-START + * SCALAR("a folded scalar",folded) + * STREAM-END + * + * Now it's time to review collection-related tokens. We will start with + * flow collections: + * + * FLOW-SEQUENCE-START + * FLOW-SEQUENCE-END + * FLOW-MAPPING-START + * FLOW-MAPPING-END + * FLOW-ENTRY + * KEY + * VALUE + * + * The tokens FLOW-SEQUENCE-START, FLOW-SEQUENCE-END, FLOW-MAPPING-START, and + * FLOW-MAPPING-END represent the indicators '[', ']', '{', and '}' + * correspondingly. FLOW-ENTRY represent the ',' indicator. Finally the + * indicators '?' and ':', which are used for denoting mapping keys and values, + * are represented by the KEY and VALUE tokens. + * + * The following examples show flow collections: + * + * 1. A flow sequence: + * + * [item 1, item 2, item 3] + * + * Tokens: + * + * STREAM-START(utf-8) + * FLOW-SEQUENCE-START + * SCALAR("item 1",plain) + * FLOW-ENTRY + * SCALAR("item 2",plain) + * FLOW-ENTRY + * SCALAR("item 3",plain) + * FLOW-SEQUENCE-END + * STREAM-END + * + * 2. A flow mapping: + * + * { + * a simple key: a value, # Note that the KEY token is produced. + * ? a complex key: another value, + * } + * + * Tokens: + * + * STREAM-START(utf-8) + * FLOW-MAPPING-START + * KEY + * SCALAR("a simple key",plain) + * VALUE + * SCALAR("a value",plain) + * FLOW-ENTRY + * KEY + * SCALAR("a complex key",plain) + * VALUE + * SCALAR("another value",plain) + * FLOW-ENTRY + * FLOW-MAPPING-END + * STREAM-END + * + * A simple key is a key which is not denoted by the '?' indicator. Note that + * the Scanner still produce the KEY token whenever it encounters a simple key. + * + * For scanning block collections, the following tokens are used (note that we + * repeat KEY and VALUE here): + * + * BLOCK-SEQUENCE-START + * BLOCK-MAPPING-START + * BLOCK-END + * BLOCK-ENTRY + * KEY + * VALUE + * + * The tokens BLOCK-SEQUENCE-START and BLOCK-MAPPING-START denote indentation + * increase that precedes a block collection (cf. the INDENT token in Python). + * The token BLOCK-END denote indentation decrease that ends a block collection + * (cf. the DEDENT token in Python). However YAML has some syntax pecularities + * that makes detections of these tokens more complex. + * + * The tokens BLOCK-ENTRY, KEY, and VALUE are used to represent the indicators + * '-', '?', and ':' correspondingly. + * + * The following examples show how the tokens BLOCK-SEQUENCE-START, + * BLOCK-MAPPING-START, and BLOCK-END are emitted by the Scanner: + * + * 1. Block sequences: + * + * - item 1 + * - item 2 + * - + * - item 3.1 + * - item 3.2 + * - + * key 1: value 1 + * key 2: value 2 + * + * Tokens: + * + * STREAM-START(utf-8) + * BLOCK-SEQUENCE-START + * BLOCK-ENTRY + * SCALAR("item 1",plain) + * BLOCK-ENTRY + * SCALAR("item 2",plain) + * BLOCK-ENTRY + * BLOCK-SEQUENCE-START + * BLOCK-ENTRY + * SCALAR("item 3.1",plain) + * BLOCK-ENTRY + * SCALAR("item 3.2",plain) + * BLOCK-END + * BLOCK-ENTRY + * BLOCK-MAPPING-START + * KEY + * SCALAR("key 1",plain) + * VALUE + * SCALAR("value 1",plain) + * KEY + * SCALAR("key 2",plain) + * VALUE + * SCALAR("value 2",plain) + * BLOCK-END + * BLOCK-END + * STREAM-END + * + * 2. Block mappings: + * + * a simple key: a value # The KEY token is produced here. + * ? a complex key + * : another value + * a mapping: + * key 1: value 1 + * key 2: value 2 + * a sequence: + * - item 1 + * - item 2 + * + * Tokens: + * + * STREAM-START(utf-8) + * BLOCK-MAPPING-START + * KEY + * SCALAR("a simple key",plain) + * VALUE + * SCALAR("a value",plain) + * KEY + * SCALAR("a complex key",plain) + * VALUE + * SCALAR("another value",plain) + * KEY + * SCALAR("a mapping",plain) + * BLOCK-MAPPING-START + * KEY + * SCALAR("key 1",plain) + * VALUE + * SCALAR("value 1",plain) + * KEY + * SCALAR("key 2",plain) + * VALUE + * SCALAR("value 2",plain) + * BLOCK-END + * KEY + * SCALAR("a sequence",plain) + * VALUE + * BLOCK-SEQUENCE-START + * BLOCK-ENTRY + * SCALAR("item 1",plain) + * BLOCK-ENTRY + * SCALAR("item 2",plain) + * BLOCK-END + * BLOCK-END + * STREAM-END + * + * YAML does not always require to start a new block collection from a new + * line. If the current line contains only '-', '?', and ':' indicators, a new + * block collection may start at the current line. The following examples + * illustrate this case: + * + * 1. Collections in a sequence: + * + * - - item 1 + * - item 2 + * - key 1: value 1 + * key 2: value 2 + * - ? complex key + * : complex value + * + * Tokens: + * + * STREAM-START(utf-8) + * BLOCK-SEQUENCE-START + * BLOCK-ENTRY + * BLOCK-SEQUENCE-START + * BLOCK-ENTRY + * SCALAR("item 1",plain) + * BLOCK-ENTRY + * SCALAR("item 2",plain) + * BLOCK-END + * BLOCK-ENTRY + * BLOCK-MAPPING-START + * KEY + * SCALAR("key 1",plain) + * VALUE + * SCALAR("value 1",plain) + * KEY + * SCALAR("key 2",plain) + * VALUE + * SCALAR("value 2",plain) + * BLOCK-END + * BLOCK-ENTRY + * BLOCK-MAPPING-START + * KEY + * SCALAR("complex key") + * VALUE + * SCALAR("complex value") + * BLOCK-END + * BLOCK-END + * STREAM-END + * + * 2. Collections in a mapping: + * + * ? a sequence + * : - item 1 + * - item 2 + * ? a mapping + * : key 1: value 1 + * key 2: value 2 + * + * Tokens: + * + * STREAM-START(utf-8) + * BLOCK-MAPPING-START + * KEY + * SCALAR("a sequence",plain) + * VALUE + * BLOCK-SEQUENCE-START + * BLOCK-ENTRY + * SCALAR("item 1",plain) + * BLOCK-ENTRY + * SCALAR("item 2",plain) + * BLOCK-END + * KEY + * SCALAR("a mapping",plain) + * VALUE + * BLOCK-MAPPING-START + * KEY + * SCALAR("key 1",plain) + * VALUE + * SCALAR("value 1",plain) + * KEY + * SCALAR("key 2",plain) + * VALUE + * SCALAR("value 2",plain) + * BLOCK-END + * BLOCK-END + * STREAM-END + * + * YAML also permits non-indented sequences if they are included into a block + * mapping. In this case, the token BLOCK-SEQUENCE-START is not produced: + * + * key: + * - item 1 # BLOCK-SEQUENCE-START is NOT produced here. + * - item 2 + * + * Tokens: + * + * STREAM-START(utf-8) + * BLOCK-MAPPING-START + * KEY + * SCALAR("key",plain) + * VALUE + * BLOCK-ENTRY + * SCALAR("item 1",plain) + * BLOCK-ENTRY + * SCALAR("item 2",plain) + * BLOCK-END + */ + +#if HAVE_CONFIG_H +#include +#endif + +#include + +#include + +/* + * Public API declarations. + */ + +YAML_DECLARE(yaml_token_t *) +yaml_parser_get_token(yaml_parser_t *parser); + +YAML_DECLARE(yaml_token_t *) +yaml_parser_peek_token(yaml_parser_t *parser); + +/* + * High-level token API. + */ + +static int +yaml_parser_fetch_more_tokens(yaml_parser_t *parser); + +static int +yaml_parser_fetch_next_token(yaml_parser_t *parser); + +/* + * Potential simple keys. + */ + +static int +yaml_parser_stale_simple_keys(yaml_parser_t *parser); + +static int +yaml_parser_save_simple_key(yaml_parser_t *parser); + +static int +yaml_parser_remove_simple_key(yaml_parser_t *parser); + +/* + * Indentation treatment. + */ + +static int +yaml_parser_add_indent(yaml_parser_t *parser); + +static int +yaml_parser_remove_indent(yaml_parser_t *parser); + +/* + * Token fetchers. + */ + +static int +yaml_parser_fetch_stream_start(yaml_parser_t *parser); + +static int +yaml_parser_fetch_stream_end(yaml_parser_t *parser); + +static int +yaml_parser_fetch_directive(yaml_parser_t *parser); + +static int +yaml_parser_fetch_document_start(yaml_parser_t *parser); + +static int +yaml_parser_fetch_document_end(yaml_parser_t *parser); + +static int +yaml_parser_fetch_document_indicator(yaml_parser_t *parser, + yaml_token_type_t type); + +static int +yaml_parser_fetch_flow_sequence_start(yaml_parser_t *parser); + +static int +yaml_parser_fetch_flow_mapping_start(yaml_parser_t *parser); + +static int +yaml_parser_fetch_flow_collection_start(yaml_parser_t *parser, + yaml_token_type_t type); + +static int +yaml_parser_fetch_flow_sequence_end(yaml_parser_t *parser); + +static int +yaml_parser_fetch_flow_mapping_end(yaml_parser_t *parser); + +static int +yaml_parser_fetch_flow_collection_end(yaml_parser_t *parser, + yaml_token_type_t type); + +static int +yaml_parser_fetch_flow_entry(yaml_parser_t *parser); + +static int +yaml_parser_fetch_block_entry(yaml_parser_t *parser); + +static int +yaml_parser_fetch_key(yaml_parser_t *parser); + +static int +yaml_parser_fetch_value(yaml_parser_t *parser); + +static int +yaml_parser_fetch_alias(yaml_parser_t *parser); + +static int +yaml_parser_fetch_anchor(yaml_parser_t *parser); + +static int +yaml_parser_fetch_tag(yaml_parser_t *parser); + +static int +yaml_parser_fetch_literal_scalar(yaml_parser_t *parser); + +static int +yaml_parser_fetch_folded_scalar(yaml_parser_t *parser); + +static int +yaml_parser_fetch_block_scalar(yaml_parser_t *parser, int literal); + +static int +yaml_parser_fetch_single_quoted_scalar(yaml_parser_t *parser); + +static int +yaml_parser_fetch_double_quoted_scalar(yaml_parser_t *parser); + +static int +yaml_parser_fetch_flow_scalar(yaml_parser_t *parser, int single); + +static int +yaml_parser_fetch_plain_scalar(yaml_parser_t *parser); + +/* + * Token scanners. + */ + +static int +yaml_parser_scan_to_next_token(yaml_parser_t *parser); + +static yaml_token_t * +yaml_parser_scan_directive(yaml_parser_t *parser); + +static int +yaml_parser_scan_directive_name(yaml_parser_t *parser, + yaml_mark_t start_mark, yaml_char_t **name); + +static int +yaml_parser_scan_yaml_directive_value(yaml_parser_t *parser, + yaml_mark_t start_mark, int *major, int *minor); + +static int +yaml_parser_scan_yaml_directive_number(yaml_parser_t *parser, + yaml_mark_t start_mark, int *number); + +static int +yaml_parser_scan_tag_directive_value(yaml_parser_t *parser, + yaml_char_t **handle, yaml_char_t **prefix); + +static yaml_token_t * +yaml_parser_scan_anchor(yaml_parser_t *parser, + yaml_token_type_t type); + +static yaml_token_t * +yaml_parser_scan_tag(yaml_parser_t *parser); + +static int +yaml_parser_scan_tag_handle(yaml_parser_t *parser, int directive, + yaml_mark_t start_mark, yaml_char_t **handle); + +static int +yaml_parser_scan_tag_uri(yaml_parser_t *parser, int directive, + yaml_mark_t start_mark, yaml_char_t **url); + +static yaml_token_t * +yaml_parser_scan_block_scalar(yaml_parser_t *parser, int literal); + +static int +yaml_parser_scan_block_scalar_indicators(yaml_parser_t *parser, + yaml_mark_t start_mark, int *chomping, int *increment); + +static yaml_token_t * +yaml_parser_scan_flow_scalar(yaml_parser_t *parser, int single); + +static yaml_token_t * +yaml_parser_scan_plain_scalar(yaml_parser_t *parser); + From 5e2975adebfa592f733b20715d61d912249766b3 Mon Sep 17 00:00:00 2001 From: Kirill Simonov Date: Wed, 7 Jun 2006 20:30:22 +0000 Subject: [PATCH 08/73] Implementing Scanner: macros and high-level functions. --- include/yaml/yaml.h | 20 +- src/api.c | 66 ++++++- src/scanner.c | 437 ++++++++++++++++++++++++++++++++++++++++++-- 3 files changed, 494 insertions(+), 29 deletions(-) diff --git a/include/yaml/yaml.h b/include/yaml/yaml.h index 7ee6b28c..6acbb545 100644 --- a/include/yaml/yaml.h +++ b/include/yaml/yaml.h @@ -537,9 +537,6 @@ typedef struct { */ typedef struct { - /** Is a simple key possible? */ - int possible; - /** Is a simple key required? */ int required; @@ -585,6 +582,15 @@ typedef struct { /** The problematic value (@c -1 is none). */ int problem_value; + /** The problem position. */ + yaml_mark_t problem_mark; + + /** The error context. */ + const char *context; + + /** The context position. */ + yaml_mark_t context_mark; + /** * @} */ @@ -661,7 +667,7 @@ typedef struct { int flow_level; /** The tokens queue, which contains the current produced tokens. */ - yaml_token_t *tokens; + yaml_token_t **tokens; /** The size of the tokens queue. */ size_t tokens_size; @@ -691,7 +697,7 @@ typedef struct { int simple_key_allowed; /** The stack of potential simple keys. */ - yaml_simple_key_t *simple_keys; + yaml_simple_key_t **simple_keys; /** The size of the simple keys stack. */ size_t simple_keys_size; @@ -853,6 +859,10 @@ yaml_realloc(void *ptr, size_t size); YAML_DECLARE(void) yaml_free(void *ptr); +/** The initial size for various buffers. */ + +#define YAML_DEFAULT_SIZE 16 + /** The size of the raw buffer. */ #define YAML_RAW_BUFFER_SIZE 16384 diff --git a/src/api.c b/src/api.c index 50f118a2..c63da451 100644 --- a/src/api.c +++ b/src/api.c @@ -49,33 +49,76 @@ yaml_parser_new(void) /* Allocate the parser structure. */ parser = yaml_malloc(sizeof(yaml_parser_t)); - if (!parser) return NULL; + if (!parser) goto error; memset(parser, 0, sizeof(yaml_parser_t)); /* Allocate the raw buffer. */ parser->raw_buffer = yaml_malloc(YAML_RAW_BUFFER_SIZE); - if (!parser->raw_buffer) { - yaml_free(parser); - return NULL; - } + if (!parser->raw_buffer) goto error; + memset(parser->raw_buffer, 0, YAML_RAW_BUFFER_SIZE); + parser->raw_pointer = parser->raw_buffer; parser->raw_unread = 0; /* Allocate the character buffer. */ parser->buffer = yaml_malloc(YAML_BUFFER_SIZE); - if (!parser->buffer) { - yaml_free(parser->raw_buffer); - yaml_free(parser); - return NULL; - } + if (!parser->buffer) goto error; + memset(parser->buffer, 0, YAML_BUFFER_SIZE); + parser->buffer_end = parser->buffer; parser->pointer = parser->buffer; parser->unread = 0; + /* Allocate the tokens queue. */ + + parser->tokens = yaml_malloc(YAML_DEFAULT_SIZE*sizeof(yaml_token_t *)); + if (!parser->tokens) goto error; + memset(parser->tokens, 0, YAML_DEFAULT_SIZE*sizeof(yaml_token_t *)); + + parser->tokens_size = YAML_DEFAULT_SIZE; + parser->tokens_head = 0; + parser->tokens_tail = 0; + parser->tokens_parsed = 0; + + /* Allocate the indents stack. */ + + parser->indents = yaml_malloc(YAML_DEFAULT_SIZE*sizeof(int)); + if (!parser->indents) goto error; + memset(parser->indents, 0, YAML_DEFAULT_SIZE*sizeof(int)); + + parser->indents_size = YAML_DEFAULT_SIZE; + parser->indents_length = 0; + + /* Allocate the stack of potential simple keys. */ + + parser->simple_keys = yaml_malloc(YAML_DEFAULT_SIZE*sizeof(yaml_simple_key_t *)); + if (!parser->simple_keys) goto error; + memset(parser->simple_keys, 0, YAML_DEFAULT_SIZE*sizeof(yaml_simple_key_t *)); + + parser->simple_keys_size = YAML_DEFAULT_SIZE; + + /* Done. */ + return parser; + + /* On error, free allocated buffers. */ + +error: + + if (!parser) return NULL; + + yaml_free(parser->simple_keys); + yaml_free(parser->indents); + yaml_free(parser->tokens); + yaml_free(parser->buffer); + yaml_free(parser->raw_buffer); + + yaml_free(parser); + + return NULL; } /* @@ -87,6 +130,9 @@ yaml_parser_delete(yaml_parser_t *parser) { assert(parser); /* Non-NULL parser object expected. */ + yaml_free(parser->simple_keys); + yaml_free(parser->indents); + yaml_free(parser->tokens); yaml_free(parser->buffer); yaml_free(parser->raw_buffer); diff --git a/src/scanner.c b/src/scanner.c index 530bd91e..2a555d0d 100644 --- a/src/scanner.c +++ b/src/scanner.c @@ -483,6 +483,107 @@ #include +/* + * Ensure that the buffer contains the required number of characters. + * Return 1 on success, 0 on failure (reader error or memory error). + */ + +#define UPDATE(parser,length) \ + (parser->unread >= (length) \ + ? 1 \ + : yaml_parser_update_buffer(parser, (length))) + +/* + * Check the octet at the specified position. + */ + +#define CHECK_AT(parser,octet,offset) \ + (parser->buffer[offset] == (yaml_char_t)(octet)) + +/* + * Check the current octet in the buffer. + */ + +#define CHECK(parser,octet) CHECK_AT(parser,(octet),0) + +/* + * Check if the character at the specified position is NUL. + */ + +#define IS_Z_AT(parser,offset) CHECK_AT(parser,'\0',(offset)) + +#define IS_Z(parser) IS_Z_AT(parser,0) + +/* + * Check if the character at the specified position is space. + */ + +#define IS_SPACE_AT(parser,offset) CHECK_AT(parser,' ',(offset)) + +#define IS_SPACE(parser) IS_SPACE_AT(parser,0) + +/* + * Check if the character at the specified position is tab. + */ + +#define IS_TAB_AT(parser,offset) CHECK_AT(parser,'\t',(offset)) + +#define IS_TAB(parser) IS_TAB_AT(parser,0) + +/* + * Check if the character at the specified position is blank (space or tab). + */ + +#define IS_BLANK_AT(parser,offset) \ + (IS_SPACE_AT(parser,(offset)) || IS_TAB_AT(parser,(offset))) + +#define IS_BLANK(parser) IS_BLANK_AT(parser,0) + +/* + * Check if the character at the specified position is a line break. + */ + +#define IS_BREAK_AT(parser,offset) \ + (CHECK_AT(parser,'\r',(offset)) /* CR (#xD)*/ \ + || CHECK_AT(parser,'\n',(offset)) /* LF (#xA) */ \ + || (CHECK_AT(parser,'\xC2',(offset)) \ + && CHECK_AT(parser,'\x85',(offset+1))) /* NEL (#x85) */ \ + || (CHECK_AT(parser,'\xE2',(offset)) \ + && CHECK_AT(parser,'\x80',(offset+1)) \ + && CHECK_AT(parser,'\xA8',(offset+2))) /* LS (#x2028) */ \ + || (CHECK_AT(parser,'\xE2',(offset)) \ + && CHECK_AT(parser,'\x80',(offset+1)) \ + && CHECK_AT(parser,'\xA9',(offset+2)))) /* LS (#x2029) */ + +#define IS_BREAK(parser) IS_BREAK_AT(parser,0) + +/* + * Check if the character is a line break or NUL. + */ + +#define IS_BREAKZ_AT(parser,offset) \ + (IS_BREAK_AT(parser,(offset)) || IS_Z_AT(parser,(offset))) + +#define IS_BREAKZ(parser) IS_BREAKZ_AT(parser,0) + +/* + * Check if the character is a line break, space, or NUL. + */ + +#define IS_SPACEZ_AT(parser,offset) \ + (IS_SPACE_AT(parser,(offset)) || IS_BREAKZ_AT(parser,(offset))) + +#define IS_SPACEZ(parser) IS_SPACEZ_AT(parser,0) + +/* + * Check if the character is a line break, space, tab, or NUL. + */ + +#define IS_BLANKZ_AT(parser,offset) \ + (IS_BLANK_AT(parser,(offset)) || IS_BREAKZ_AT(parser,(offset))) + +#define IS_BLANKZ(parser) IS_BLANKZ_AT(parser,0) + /* * Public API declarations. */ @@ -493,6 +594,17 @@ yaml_parser_get_token(yaml_parser_t *parser); YAML_DECLARE(yaml_token_t *) yaml_parser_peek_token(yaml_parser_t *parser); +/* + * Error handling. + */ + +static int +yaml_parser_set_scanner_error(yaml_parser_t *parser, const char *context, + yaml_mark_t context_mark, const char *problem); + +static yaml_mark_t +yaml_parser_get_mark(yaml_parser_t *parser); + /* * High-level token API. */ @@ -521,10 +633,10 @@ yaml_parser_remove_simple_key(yaml_parser_t *parser); */ static int -yaml_parser_add_indent(yaml_parser_t *parser); +yaml_parser_roll_indent(yaml_parser_t *parser, int column); static int -yaml_parser_remove_indent(yaml_parser_t *parser); +yaml_parser_unroll_indent(yaml_parser_t *parser, int column); /* * Token fetchers. @@ -590,21 +702,9 @@ yaml_parser_fetch_anchor(yaml_parser_t *parser); static int yaml_parser_fetch_tag(yaml_parser_t *parser); -static int -yaml_parser_fetch_literal_scalar(yaml_parser_t *parser); - -static int -yaml_parser_fetch_folded_scalar(yaml_parser_t *parser); - static int yaml_parser_fetch_block_scalar(yaml_parser_t *parser, int literal); -static int -yaml_parser_fetch_single_quoted_scalar(yaml_parser_t *parser); - -static int -yaml_parser_fetch_double_quoted_scalar(yaml_parser_t *parser); - static int yaml_parser_fetch_flow_scalar(yaml_parser_t *parser, int single); @@ -665,3 +765,312 @@ yaml_parser_scan_flow_scalar(yaml_parser_t *parser, int single); static yaml_token_t * yaml_parser_scan_plain_scalar(yaml_parser_t *parser); +/* + * Get the next token and remove it from the tokens queue. + */ + +YAML_DECLARE(yaml_token_t *) +yaml_parser_get_token(yaml_parser_t *parser) +{ + yaml_token_t *token; + + assert(parser); /* Non-NULL parser object is expected. */ + assert(!parser->stream_end_produced); /* No tokens after STREAM-END. */ + + /* Ensure that the tokens queue contains enough tokens. */ + + if (!yaml_parser_fetch_more_tokens(parser)) return NULL; + + /* Fetch the next token from the queue. */ + + token = parser->tokens[parser->tokens_head]; + + /* Move the queue head. */ + + parser->tokens[parser->tokens_head++] = NULL; + if (parser->tokens_head == parser->tokens_size) + parser->tokens_head = 0; + + parser->tokens_parsed++; + + return token; +} + +/* + * Get the next token, but don't remove it from the queue. + */ + +YAML_DECLARE(yaml_token_t *) +yaml_parser_peek_token(yaml_parser_t *parser) +{ + assert(parser); /* Non-NULL parser object is expected. */ + assert(!parser->stream_end_produced); /* No tokens after STREAM-END. */ + + /* Ensure that the tokens queue contains enough tokens. */ + + if (!yaml_parser_fetch_more_tokens(parser)) return NULL; + + /* Fetch the next token from the queue. */ + + return parser->tokens[parser->tokens_head]; +} + +/* + * Set the scanner error and return 0. + */ + +static int +yaml_parser_set_scanner_error(yaml_parser_t *parser, const char *context, + yaml_mark_t context_mark, const char *problem) +{ + parser->error = YAML_SCANNER_ERROR; + parser->context = context; + parser->context_mark = context_mark; + parser->problem = problem; + parser->problem_mark = yaml_parser_get_mark(parser); +} + +/* + * Get the mark for the current buffer position. + */ + +static yaml_mark_t +yaml_parser_get_mark(yaml_parser_t *parser) +{ + yaml_mark_t mark = { parser->index, parser->line, parser->column }; + + return mark; +} + + +/* + * Ensure that the tokens queue contains at least one token which can be + * returned to the Parser. + */ + +static int +yaml_parser_fetch_more_tokens(yaml_parser_t *parser) +{ + int need_more_tokens; + int k; + + /* While we need more tokens to fetch, do it. */ + + while (1) + { + /* + * Check if we really need to fetch more tokens. + */ + + need_more_tokens = 0; + + if (parser->tokens_head == parser->tokens_tail) + { + /* Queue is empty. */ + + need_more_tokens = 1; + } + else + { + /* Check if any potential simple key may occupy the head position. */ + + for (k = 0; k <= parser->flow_level; k++) { + yaml_simple_key_t *simple_key = parser->simple_keys[k]; + if (simple_key + && (simple_key->token_number == parser->tokens_parsed)) { + need_more_tokens = 1; + break; + } + } + } + + /* We are finished. */ + + if (!need_more_tokens) + break; + + /* Fetch the next token. */ + + if (!yaml_parser_fetch_next_token(parser)) + return 0; + } + + return 1; +} + +/* + * The dispatcher for token fetchers. + */ + +static int +yaml_parser_fetch_next_token(yaml_parser_t *parser) +{ + /* Ensure that the buffer is initialized. */ + + if (!UPDATE(parser, 1)) + return 0; + + /* Check if we just started scanning. Fetch STREAM-START then. */ + + if (!parser->stream_start_produced) + return yaml_parser_fetch_stream_start(parser); + + /* Eat whitespaces and comments until we reach the next token. */ + + if (!yaml_parser_scan_to_next_token(parser)) + return 0; + + /* Check the indentation level against the current column. */ + + if (!yaml_parser_unroll_indent(parser, parser->column)) + return 0; + + /* + * Ensure that the buffer contains at least 4 characters. 4 is the length + * of the longest indicators ('--- ' and '... '). + */ + + if (!UPDATE(parser, 4)) + return 0; + + /* Is it the end of the stream? */ + + if (IS_Z(parser)) + return yaml_parser_fetch_stream_end(parser); + + /* Is it a directive? */ + + if (parser->column == 0 && CHECK(parser, '%')) + return yaml_parser_fetch_directive(parser); + + /* Is it the document start indicator? */ + + if (parser->column == 0 + && CHECK_AT(parser, '-', 0) + && CHECK_AT(parser, '-', 1) + && CHECK_AT(parser, '-', 2) + && IS_BLANKZ_AT(parser, 3)) + return yaml_parser_fetch_document_start(parser); + + /* Is it the document end indicator? */ + + if (parser->column == 0 + && CHECK_AT(parser, '.', 0) + && CHECK_AT(parser, '.', 1) + && CHECK_AT(parser, '.', 2) + && IS_BLANKZ_AT(parser, 3)) + return yaml_parser_fetch_document_start(parser); + + /* Is it the flow sequence start indicator? */ + + if (CHECK(parser, '[')) + return yaml_parser_fetch_flow_sequence_start(parser); + + /* Is it the flow mapping start indicator? */ + + if (CHECK(parser, '{')) + return yaml_parser_fetch_flow_mapping_start(parser); + + /* Is it the flow sequence end indicator? */ + + if (CHECK(parser, ']')) + return yaml_parser_fetch_flow_sequence_end(parser); + + /* Is it the flow mapping end indicator? */ + + if (CHECK(parser, '}')) + return yaml_parser_fetch_flow_mapping_end(parser); + + /* Is it the flow entry indicator? */ + + if (CHECK(parser, ',')) + return yaml_parser_fetch_flow_entry(parser); + + /* Is it the block entry indicator? */ + + if (CHECK(parser, '-') && IS_BLANKZ_AT(parser, 1)) + return yaml_parser_fetch_block_entry(parser); + + /* Is it the key indicator? */ + + if (CHECK(parser, '?') && (!parser->flow_level || IS_BLANKZ_AT(parser, 1))) + return yaml_parser_fetch_key(parser); + + /* Is it the value indicator? */ + + if (CHECK(parser, ':') && (!parser->flow_level || IS_BLANKZ_AT(parser, 1))) + return yaml_parser_fetch_value(parser); + + /* Is it an alias? */ + + if (CHECK(parser, '*')) + return yaml_parser_fetch_alias(parser); + + /* Is it an anchor? */ + + if (CHECK(parser, '&')) + return yaml_parser_fetch_anchor(parser); + + /* Is it a tag? */ + + if (CHECK(parser, '!')) + return yaml_parser_fetch_tag(parser); + + /* Is it a literal scalar? */ + + if (CHECK(parser, '|') && !parser->flow_level) + return yaml_parser_fetch_block_scalar(parser, 1); + + /* Is it a folded scalar? */ + + if (CHECK(parser, '>') && !parser->flow_level) + return yaml_parser_fetch_block_scalar(parser, 0); + + /* Is it a single-quoted scalar? */ + + if (CHECK(parser, '\'')) + return yaml_parser_fetch_flow_scalar(parser, 1); + + /* Is it a double-quoted scalar? */ + + if (CHECK(parser, '"')) + return yaml_parser_fetch_flow_scalar(parser, 0); + + /* + * Is it a plain scalar? + * + * A plain scalar may start with any non-blank characters except + * + * '-', '?', ':', ',', '[', ']', '{', '}', + * '#', '&', '*', '!', '|', '>', '\'', '\"', + * '%', '@', '`'. + * + * In the block context, it may also start with the characters + * + * '-', '?', ':' + * + * if it is followed by a non-space character. + * + * The last rule is more restrictive than the specification requires. + */ + + if (!(IS_BLANKZ(parser) || CHECK(parser, '-') || CHECK(parser, '?') + || CHECK(parser, ':') || CHECK(parser, ',') || CHECK(parser, '[') + || CHECK(parser, ']') || CHECK(parser, '{') || CHECK(parser, '}') + || CHECK(parser, '#') || CHECK(parser, '&') || CHECK(parser, '*') + || CHECK(parser, '!') || CHECK(parser, '|') || CHECK(parser, '>') + || CHECK(parser, '\'') || CHECK(parser, '"') || CHECK(parser, '%') + || CHECK(parser, '@') || CHECK(parser, '`')) || + (!parser->flow_level && + (CHECK(parser, '-') || CHECK(parser, '?') || CHECK(parser, ':')) && + IS_BLANKZ_AT(parser, 1))) + return yaml_parser_fetch_plain_scalar(parser); + + /* + * If we don't determine the token type so far, it is an error. + */ + + return yaml_parser_set_scanner_error(parser, "while scanning for the next token", + yaml_parser_get_mark(parser), "found character that cannot start any token"); +} + From aadb786bdb0020bbb605670a33a776ef5d600b9e Mon Sep 17 00:00:00 2001 From: Kirill Simonov Date: Thu, 8 Jun 2006 21:37:10 +0000 Subject: [PATCH 09/73] Scanner: implement everything except token scanners. --- src/scanner.c | 1157 +++++++++++++++++++++++++++++++++++++++++++++++-- 1 file changed, 1125 insertions(+), 32 deletions(-) diff --git a/src/scanner.c b/src/scanner.c index 2a555d0d..31fce5c6 100644 --- a/src/scanner.c +++ b/src/scanner.c @@ -498,7 +498,7 @@ */ #define CHECK_AT(parser,octet,offset) \ - (parser->buffer[offset] == (yaml_char_t)(octet)) + (parser->pointer[offset] == (yaml_char_t)(octet)) /* * Check the current octet in the buffer. @@ -557,6 +557,11 @@ #define IS_BREAK(parser) IS_BREAK_AT(parser,0) +#define IS_CRLF_AT(parser,offset) \ + (CHECK_AT(parser,'\r',(offset)) && CHECK_AT(parser,'\n',(offset)+1)) + +#define IS_CRLF(parser) IS_CRLF_AT(parser,0) + /* * Check if the character is a line break or NUL. */ @@ -584,6 +589,30 @@ #define IS_BLANKZ(parser) IS_BLANKZ_AT(parser,0) +/* + * Determine the width of the character. + */ + +#define WIDTH_AT(parser,offset) \ + ((parser->pointer[(offset)] & 0x80) == 0x00 ? 1 : \ + (parser->pointer[(offset)] & 0xE0) == 0xC0 ? 2 : \ + (parser->pointer[(offset)] & 0xF0) == 0xE0 ? 3 : \ + (parser->pointer[(offset)] & 0xF8) == 0xF0 ? 4 : 0) + +#define WIDTH(parser) WIDTH_AT(parser,0) + +/* + * Advance the buffer pointer. + */ + +#define FORWARD(parser) \ + (parser->index ++, \ + ((IS_BREAK(parser) && !IS_CRLF(parser)) ? \ + (parser->line ++, parser->column = 0) : \ + (parser->column ++)), \ + parser->unread --, \ + parser->pointer += WIDTH(parser)) + /* * Public API declarations. */ @@ -628,12 +657,30 @@ yaml_parser_save_simple_key(yaml_parser_t *parser); static int yaml_parser_remove_simple_key(yaml_parser_t *parser); +static int +yaml_parser_increase_flow_level(yaml_parser_t *parser); + +static int +yaml_parser_decrease_flow_level(yaml_parser_t *parser); + +/* + * Token manipulation. + */ + +static int +yaml_parser_append_token(yaml_parser_t *parser, yaml_token_t *token); + +static int +yaml_parser_insert_token(yaml_parser_t *parser, + int number, yaml_token_t *token); + /* * Indentation treatment. */ static int -yaml_parser_roll_indent(yaml_parser_t *parser, int column); +yaml_parser_roll_indent(yaml_parser_t *parser, int column, + int number, yaml_token_type_t type, yaml_mark_t mark); static int yaml_parser_unroll_indent(yaml_parser_t *parser, int column); @@ -651,32 +698,14 @@ yaml_parser_fetch_stream_end(yaml_parser_t *parser); static int yaml_parser_fetch_directive(yaml_parser_t *parser); -static int -yaml_parser_fetch_document_start(yaml_parser_t *parser); - -static int -yaml_parser_fetch_document_end(yaml_parser_t *parser); - static int yaml_parser_fetch_document_indicator(yaml_parser_t *parser, yaml_token_type_t type); -static int -yaml_parser_fetch_flow_sequence_start(yaml_parser_t *parser); - -static int -yaml_parser_fetch_flow_mapping_start(yaml_parser_t *parser); - static int yaml_parser_fetch_flow_collection_start(yaml_parser_t *parser, yaml_token_type_t type); -static int -yaml_parser_fetch_flow_sequence_end(yaml_parser_t *parser); - -static int -yaml_parser_fetch_flow_mapping_end(yaml_parser_t *parser); - static int yaml_parser_fetch_flow_collection_end(yaml_parser_t *parser, yaml_token_type_t type); @@ -694,10 +723,7 @@ static int yaml_parser_fetch_value(yaml_parser_t *parser); static int -yaml_parser_fetch_alias(yaml_parser_t *parser); - -static int -yaml_parser_fetch_anchor(yaml_parser_t *parser); +yaml_parser_fetch_anchor(yaml_parser_t *parser, yaml_token_type_t type); static int yaml_parser_fetch_tag(yaml_parser_t *parser); @@ -950,7 +976,8 @@ yaml_parser_fetch_next_token(yaml_parser_t *parser) && CHECK_AT(parser, '-', 1) && CHECK_AT(parser, '-', 2) && IS_BLANKZ_AT(parser, 3)) - return yaml_parser_fetch_document_start(parser); + return yaml_parser_fetch_document_indicator(parser, + YAML_DOCUMENT_START_TOKEN); /* Is it the document end indicator? */ @@ -959,27 +986,32 @@ yaml_parser_fetch_next_token(yaml_parser_t *parser) && CHECK_AT(parser, '.', 1) && CHECK_AT(parser, '.', 2) && IS_BLANKZ_AT(parser, 3)) - return yaml_parser_fetch_document_start(parser); + return yaml_parser_fetch_document_indicator(parser, + YAML_DOCUMENT_END_TOKEN); /* Is it the flow sequence start indicator? */ if (CHECK(parser, '[')) - return yaml_parser_fetch_flow_sequence_start(parser); + return yaml_parser_fetch_flow_collection_start(parser, + YAML_FLOW_SEQUENCE_START_TOKEN); /* Is it the flow mapping start indicator? */ if (CHECK(parser, '{')) - return yaml_parser_fetch_flow_mapping_start(parser); + return yaml_parser_fetch_flow_collection_start(parser, + YAML_FLOW_MAPPING_START_TOKEN); /* Is it the flow sequence end indicator? */ if (CHECK(parser, ']')) - return yaml_parser_fetch_flow_sequence_end(parser); + return yaml_parser_fetch_flow_collection_end(parser, + YAML_FLOW_SEQUENCE_END_TOKEN); /* Is it the flow mapping end indicator? */ if (CHECK(parser, '}')) - return yaml_parser_fetch_flow_mapping_end(parser); + return yaml_parser_fetch_flow_collection_end(parser, + YAML_FLOW_MAPPING_END_TOKEN); /* Is it the flow entry indicator? */ @@ -1004,12 +1036,12 @@ yaml_parser_fetch_next_token(yaml_parser_t *parser) /* Is it an alias? */ if (CHECK(parser, '*')) - return yaml_parser_fetch_alias(parser); + return yaml_parser_fetch_anchor(parser, YAML_ALIAS_TOKEN); /* Is it an anchor? */ if (CHECK(parser, '&')) - return yaml_parser_fetch_anchor(parser); + return yaml_parser_fetch_anchor(parser, YAML_ANCHOR_TOKEN); /* Is it a tag? */ @@ -1074,3 +1106,1064 @@ yaml_parser_fetch_next_token(yaml_parser_t *parser) yaml_parser_get_mark(parser), "found character that cannot start any token"); } +/* + * Check the list of potential simple keys and remove the positions that + * cannot contain simple keys anymore. + */ + +static int +yaml_parser_stale_simple_keys(yaml_parser_t *parser) +{ + int level; + + /* Check for a potential simple key for each flow level. */ + + for (level = 0; level <= parser->flow_level; level++) + { + yaml_simple_key_t *simple_key = parser->simple_keys[level]; + + /* + * The specification requires that a simple key + * + * - is limited to a single line, + * - is shorter than 1024 characters. + */ + + if (simple_key && (simple_key->line < parser->line || + simple_key->index < parser->index+1024)) { + + /* Check if the potential simple key to be removed is required. */ + + if (simple_key->required) { + return yaml_parser_set_scanner_error(parser, + "while scanning a simple key", simple_key->mark, + "could not found expected ':'"); + } + + yaml_free(simple_key); + parser->simple_keys[level] = NULL; + } + } + + return 1; +} + +/* + * Check if a simple key may start at the current position and add it if + * needed. + */ + +static int +yaml_parser_save_simple_key(yaml_parser_t *parser) +{ + /* + * A simple key is required at the current position if the scanner is in + * the block context and the current column coincides with the indentation + * level. + */ + + int required = (!parser->flow_level && parser->indent == parser->column); + + /* + * A simple key is required only when it is the first token in the current + * line. Therefore it is always allowed. But we add a check anyway. + */ + + assert(parser->simple_key_allowed || !required); /* Impossible. */ + + /* + * If the current position may start a simple key, save it. + */ + + if (parser->simple_key_allowed) + { + yaml_simple_key_t simple_key = { required, + parser->tokens_parsed + parser->tokens_tail - parser->tokens_head, + parser->index, parser->line, parser->column, + yaml_parser_get_mark(parser) }; + + if (!yaml_parser_remove_simple_key(parser)) return 0; + + parser->simple_keys[parser->flow_level] = + yaml_malloc(sizeof(yaml_simple_key_t)); + if (!parser->simple_keys[parser->flow_level]) { + parser->error = YAML_MEMORY_ERROR; + return 0; + } + + *(parser->simple_keys[parser->flow_level]) = simple_key; + } + + return 1; +} + +/* + * Remove a potential simple key at the current flow level. + */ + +static int +yaml_parser_remove_simple_key(yaml_parser_t *parser) +{ + yaml_simple_key_t *simple_key = parser->simple_keys[parser->flow_level]; + + if (simple_key) + { + /* If the key is required, it is an error. */ + + if (simple_key->required) { + return yaml_parser_set_scanner_error(parser, + "while scanning a simple key", simple_key->mark, + "could not found expected ':'"); + } + + /* Remove the key from the list. */ + + yaml_free(simple_key); + parser->simple_keys[parser->flow_level] = NULL; + } + + return 1; +} + +/* + * Increase the flow level and resize the simple key list if needed. + */ + +static int +yaml_parser_increase_flow_level(yaml_parser_t *parser) +{ + /* Check if we need to resize the list. */ + + if (parser->flow_level == parser->simple_keys_size-1) + { + yaml_simple_key_t **new_simple_keys = + yaml_realloc(parser->simple_keys, + sizeof(yaml_simple_key_t *) * parser->simple_keys_size * 2); + + if (!new_simple_keys) { + parser->error = YAML_MEMORY_ERROR; + return 0; + } + + memset(new_simple_keys+parser->simple_keys_size, 0, + sizeof(yaml_simple_key_t *)*parser->simple_keys_size); + + parser->simple_keys = new_simple_keys; + parser->simple_keys_size *= 2; + } + + /* Increase the flow level and reset the simple key. */ + + parser->simple_keys[++parser->flow_level] = NULL; + + return 1; +} + +/* + * Decrease the flow level. + */ + +static int +yaml_parser_decrease_flow_level(yaml_parser_t *parser) +{ + assert(parser->flow_level); /* Greater than 0. */ + assert(!parser->simple_keys[parser->flow_level]); /* Must be removed. */ + + parser->flow_level --; + + return 1; +} + +/* + * Add a token to the tail of the tokens queue. + */ + +static int +yaml_parser_append_token(yaml_parser_t *parser, yaml_token_t *token) +{ + return yaml_parser_insert_token(parser, -1, token); +} + +/* + * Insert the token into the tokens queue. The number parameter is the + * ordinal number of the token. If the number is equal to -1, add the token + * to the tail of the queue. + */ + +static int +yaml_parser_insert_token(yaml_parser_t *parser, + int number, yaml_token_t *token) +{ + /* The index of the token in the queue. */ + + int index = (number == -1) + ? parser->tokens_tail - parser->tokens_head + : number - parser->tokens_parsed; + + assert(index >= 0 && index <= (parser->tokens_tail-parser->tokens_head)); + + /* Check if we need to resize the queue. */ + + if (parser->tokens_head == 0 && parser->tokens_tail == parser->tokens_size) + { + yaml_token_t **new_tokens = yaml_realloc(parser->tokens, + sizeof(yaml_token_t *) * parser->tokens_size * 2); + + if (!new_tokens) { + parser->error = YAML_MEMORY_ERROR; + return 0; + } + + memset(new_tokens+parser->tokens_size, 0, + sizeof(yaml_token_t *)*parser->tokens_size); + + parser->tokens = new_tokens; + parser->tokens_size *= 2; + } + + /* Check if we need to move the queue to the beginning of the buffer. */ + + if (parser->tokens_tail == parser->tokens_size) + { + if (parser->tokens_head < parser->tokens_tail) { + memmove(parser->tokens, parser->tokens+parser->tokens_head, + sizeof(yaml_token_t *)*(parser->tokens_tail-parser->tokens_head)); + } + parser->tokens_tail -= parser->tokens_head; + parser->tokens_head = 0; + } + + /* Check if we need to free space within the queue. */ + + if (index < (parser->tokens_tail-parser->tokens_head)) { + memmove(parser->tokens+parser->tokens_head+index+1, + parser->tokens+parser->tokens_head+index, + sizeof(yaml_token_t *)*(parser->tokens_tail-parser->tokens_head-index)); + } + + /* Insert the token. */ + + parser->tokens[parser->tokens_head+index] = token; + parser->tokens_tail ++; + + return 1; +} + +/* + * Push the current indentation level to the stack and set the new level + * the current column is greater than the indentation level. In this case, + * append or insert the specified token into the token queue. + * + */ + +static int +yaml_parser_roll_indent(yaml_parser_t *parser, int column, + int number, yaml_token_type_t type, yaml_mark_t mark) +{ + yaml_token_t *token; + + /* In the flow context, do nothing. */ + + if (parser->flow_level) + return 1; + + if (parser->indent < column) + { + /* Check if we need to expand the indents stack. */ + + if (parser->indents_length == parser->indents_size) + { + int *new_indents = yaml_realloc(parser->indents, + sizeof(int) * parser->indents_size * 2); + + if (!new_indents) { + parser->error = YAML_MEMORY_ERROR; + return 0; + } + + memset(new_indents+parser->indents_size, 0, + sizeof(int)*parser->indents_size); + + parser->indents = new_indents; + parser->indents_size *= 2; + } + + /* + * Push the current indentation level to the stack and set the new + * indentation level. + */ + + parser->indents[parser->indents_length++] = parser->indent; + parser->indent = column; + + /* Create a token. */ + + token = yaml_token_new(type, mark, mark); + if (!token) { + parser->error = YAML_MEMORY_ERROR; + return 0; + } + + /* Insert the token into the queue. */ + + if (!yaml_parser_insert_token(parser, number, token)) { + yaml_token_delete(token); + return 0; + } + } + + return 1; +} + +/* + * Pop indentation levels from the indents stack until the current level + * becomes less or equal to the column. For each intendation level, append + * the BLOCK-END token. + */ + + +static int +yaml_parser_unroll_indent(yaml_parser_t *parser, int column) +{ + yaml_token_t *token; + + /* In the flow context, do nothing. */ + + if (parser->flow_level) + return 1; + + /* Loop through the intendation levels in the stack. */ + + while (parser->indent > column) + { + yaml_mark_t mark = yaml_parser_get_mark(parser); + + /* Create a token. */ + + token = yaml_token_new(YAML_BLOCK_END_TOKEN, mark, mark); + if (!token) { + parser->error = YAML_MEMORY_ERROR; + return 0; + } + + /* Append the token to the queue. */ + + if (!yaml_parser_append_token(parser, token)) { + yaml_token_delete(token); + return 0; + } + + /* Pop the indentation level. */ + + assert(parser->indents_length); /* Non-empty stack expected. */ + + parser->indent = parser->indents[--parser->indents_length]; + } + + return 1; +} + +/* + * Initialize the scanner and produce the STREAM-START token. + */ + +static int +yaml_parser_fetch_stream_start(yaml_parser_t *parser) +{ + yaml_mark_t mark = yaml_parser_get_mark(parser); + yaml_token_t *token; + + /* Set the initial indentation. */ + + parser->indent = -1; + + /* A simple key is allowed at the beginning of the stream. */ + + parser->simple_key_allowed = 1; + + /* We have started. */ + + parser->stream_start_produced = 1; + + /* Create the STREAM-START token. */ + + token = yaml_stream_start_token_new(parser->encoding, mark, mark); + if (!token) { + parser->error = YAML_MEMORY_ERROR; + return 0; + } + + /* Append the token to the queue. */ + + if (!yaml_parser_append_token(parser, token)) { + yaml_token_delete(token); + return 0; + } + + return 1; +} + +/* + * Produce the STREAM-END token and shut down the scanner. + */ + +static int +yaml_parser_fetch_stream_end(yaml_parser_t *parser) +{ + yaml_mark_t mark = yaml_parser_get_mark(parser); + yaml_token_t *token; + + /* Reset the indentation level. */ + + if (!yaml_parser_unroll_indent(parser, -1)) + return 0; + + /* We have finished. */ + + parser->stream_end_produced = 1; + + /* Create the STREAM-END token. */ + + token = yaml_stream_end_token_new(mark, mark); + if (!token) { + parser->error = YAML_MEMORY_ERROR; + return 0; + } + + /* Append the token to the queue. */ + + if (!yaml_parser_append_token(parser, token)) { + yaml_token_delete(token); + return 0; + } + + return 1; +} + +/* + * Produce the YAML-DIRECTIVE or TAG-DIRECTIVE token. + */ + +static int +yaml_parser_fetch_directive(yaml_parser_t *parser) +{ + yaml_token_t *token; + + /* Reset the indentation level. */ + + if (!yaml_parser_unroll_indent(parser, -1)) + return 0; + + /* Reset simple keys. */ + + if (!yaml_parser_remove_simple_key(parser)) + return 0; + + parser->simple_key_allowed = 0; + + /* Create the YAML-DIRECTIVE or TAG-DIRECTIVE token. */ + + token = yaml_parser_scan_directive(parser); + if (!token) return 0; + + /* Append the token to the queue. */ + + if (!yaml_parser_append_token(parser, token)) { + yaml_token_delete(token); + return 0; + } + + return 1; +} + +/* + * Produce the DOCUMENT-START or DOCUMENT-END token. + */ + +static int +yaml_parser_fetch_document_indicator(yaml_parser_t *parser, + yaml_token_type_t type) +{ + yaml_mark_t start_mark, end_mark; + yaml_token_t *token; + + /* Reset the indentation level. */ + + if (!yaml_parser_unroll_indent(parser, -1)) + return 0; + + /* Reset simple keys. */ + + if (!yaml_parser_remove_simple_key(parser)) + return 0; + + parser->simple_key_allowed = 0; + + /* Consume the token. */ + + start_mark = yaml_parser_get_mark(parser); + + FORWARD(parser); + FORWARD(parser); + FORWARD(parser); + + end_mark = yaml_parser_get_mark(parser); + + /* Create the DOCUMENT-START or DOCUMENT-END token. */ + + token = yaml_token_new(type, start_mark, end_mark); + if (!token) { + parser->error = YAML_MEMORY_ERROR; + return 0; + } + + /* Append the token to the queue. */ + + if (!yaml_parser_append_token(parser, token)) { + yaml_token_delete(token); + return 0; + } + + return 1; +} + +/* + * Produce the FLOW-SEQUENCE-START or FLOW-MAPPING-START token. + */ + +static int +yaml_parser_fetch_flow_collection_start(yaml_parser_t *parser, + yaml_token_type_t type) +{ + yaml_mark_t start_mark, end_mark; + yaml_token_t *token; + + /* The indicators '[' and '{' may start a simple key. */ + + if (!yaml_parser_save_simple_key(parser)) + return 0; + + /* Increase the flow level. */ + + if (!yaml_parser_increase_flow_level(parser)) + return 0; + + /* A simple key may follow the indicators '[' and '{'. */ + + parser->simple_key_allowed = 1; + + /* Consume the token. */ + + start_mark = yaml_parser_get_mark(parser); + FORWARD(parser); + end_mark = yaml_parser_get_mark(parser); + + /* Create the FLOW-SEQUENCE-START of FLOW-MAPPING-START token. */ + + token = yaml_token_new(type, start_mark, end_mark); + if (!token) { + parser->error = YAML_MEMORY_ERROR; + return 0; + } + + /* Append the token to the queue. */ + + if (!yaml_parser_append_token(parser, token)) { + yaml_token_delete(token); + return 0; + } + + return 1; +} + +/* + * Produce the FLOW-SEQUENCE-END or FLOW-MAPPING-END token. + */ + +static int +yaml_parser_fetch_flow_collection_end(yaml_parser_t *parser, + yaml_token_type_t type) +{ + yaml_mark_t start_mark, end_mark; + yaml_token_t *token; + + /* Reset any potential simple key on the current flow level. */ + + if (!yaml_parser_remove_simple_key(parser)) + return 0; + + /* Decrease the flow level. */ + + if (!yaml_parser_decrease_flow_level(parser)) + return 0; + + /* No simple keys after the indicators ']' and '}'. */ + + parser->simple_key_allowed = 0; + + /* Consume the token. */ + + start_mark = yaml_parser_get_mark(parser); + FORWARD(parser); + end_mark = yaml_parser_get_mark(parser); + + /* Create the FLOW-SEQUENCE-END of FLOW-MAPPING-END token. */ + + token = yaml_token_new(type, start_mark, end_mark); + if (!token) { + parser->error = YAML_MEMORY_ERROR; + return 0; + } + + /* Append the token to the queue. */ + + if (!yaml_parser_append_token(parser, token)) { + yaml_token_delete(token); + return 0; + } + + return 1; +} + +/* + * Produce the FLOW-ENTRY token. + */ + +static int +yaml_parser_fetch_flow_entry(yaml_parser_t *parser) +{ + yaml_mark_t start_mark, end_mark; + yaml_token_t *token; + + /* Reset any potential simple keys on the current flow level. */ + + if (!yaml_parser_remove_simple_key(parser)) + return 0; + + /* Simple keys are allowed after ','. */ + + parser->simple_key_allowed = 1; + + /* Consume the token. */ + + start_mark = yaml_parser_get_mark(parser); + FORWARD(parser); + end_mark = yaml_parser_get_mark(parser); + + /* Create the FLOW-ENTRY token. */ + + token = yaml_token_new(YAML_FLOW_ENTRY_TOKEN, start_mark, end_mark); + if (!token) { + parser->error = YAML_MEMORY_ERROR; + return 0; + } + + /* Append the token to the queue. */ + + if (!yaml_parser_append_token(parser, token)) { + yaml_token_delete(token); + return 0; + } + + return 1; +} + +/* + * Produce the BLOCK-ENTRY token. + */ + +static int +yaml_parser_fetch_block_entry(yaml_parser_t *parser) +{ + yaml_mark_t start_mark, end_mark; + yaml_token_t *token; + + /* Check if the scanner is in the block context. */ + + if (!parser->flow_level) + { + /* Check if we are allowed to start a new entry. */ + + if (!parser->simple_key_allowed) { + return yaml_parser_set_scanner_error(parser, NULL, + yaml_parser_get_mark(parser), + "block sequence entries are not allowed in this context"); + } + + /* Add the BLOCK-SEQUENCE-START token if needed. */ + + if (!yaml_parser_roll_indent(parser, parser->column, -1, + YAML_BLOCK_SEQUENCE_START_TOKEN, yaml_parser_get_mark(parser))) + return 0; + } + else + { + /* + * It is an error for the '-' indicator to occur in the flow context, + * but we let the Parser detect and report about it because the Parser + * is able to point to the context. + */ + } + + /* Reset any potential simple keys on the current flow level. */ + + if (!yaml_parser_remove_simple_key(parser)) + return 0; + + /* Simple keys are allowed after '-'. */ + + parser->simple_key_allowed = 1; + + /* Consume the token. */ + + start_mark = yaml_parser_get_mark(parser); + FORWARD(parser); + end_mark = yaml_parser_get_mark(parser); + + /* Create the BLOCK-ENTRY token. */ + + token = yaml_token_new(YAML_BLOCK_ENTRY_TOKEN, start_mark, end_mark); + if (!token) { + parser->error = YAML_MEMORY_ERROR; + return 0; + } + + /* Append the token to the queue. */ + + if (!yaml_parser_append_token(parser, token)) { + yaml_token_delete(token); + return 0; + } + + return 1; +} + +/* + * Produce the KEY token. + */ + +static int +yaml_parser_fetch_key(yaml_parser_t *parser) +{ + yaml_mark_t start_mark, end_mark; + yaml_token_t *token; + + /* In the block context, additional checks are required. */ + + if (!parser->flow_level) + { + /* Check if we are allowed to start a new key (not nessesary simple). */ + + if (!parser->simple_key_allowed) { + return yaml_parser_set_scanner_error(parser, NULL, + yaml_parser_get_mark(parser), + "mapping keys are not allowed in this context"); + } + + /* Add the BLOCK-MAPPING-START token if needed. */ + + if (!yaml_parser_roll_indent(parser, parser->column, -1, + YAML_BLOCK_MAPPING_START_TOKEN, yaml_parser_get_mark(parser))) + return 0; + } + + /* Reset any potential simple keys on the current flow level. */ + + if (!yaml_parser_remove_simple_key(parser)) + return 0; + + /* Simple keys are allowed after '?' in the block context. */ + + parser->simple_key_allowed = (!parser->flow_level); + + /* Consume the token. */ + + start_mark = yaml_parser_get_mark(parser); + FORWARD(parser); + end_mark = yaml_parser_get_mark(parser); + + /* Create the KEY token. */ + + token = yaml_token_new(YAML_KEY_TOKEN, start_mark, end_mark); + if (!token) { + parser->error = YAML_MEMORY_ERROR; + return 0; + } + + /* Append the token to the queue. */ + + if (!yaml_parser_append_token(parser, token)) { + yaml_token_delete(token); + return 0; + } + + return 1; +} + +/* + * Produce the VALUE token. + */ + +static int +yaml_parser_fetch_value(yaml_parser_t *parser) +{ + yaml_mark_t start_mark, end_mark; + yaml_token_t *token; + + /* Have we found a simple key? */ + + if (parser->simple_keys[parser->flow_level]) + { + yaml_simple_key_t *simple_key = parser->simple_keys[parser->flow_level]; + + /* Create the KEY token. */ + + token = yaml_token_new(YAML_KEY_TOKEN, simple_key->mark, simple_key->mark); + if (!token) { + parser->error = YAML_MEMORY_ERROR; + return 0; + } + + /* Insert the token into the queue. */ + + if (!yaml_parser_insert_token(parser, simple_key->token_number, token)) { + yaml_token_delete(token); + return 0; + } + + /* In the block context, we may need to add the BLOCK-MAPPING-START token. */ + + if (!yaml_parser_roll_indent(parser, parser->column, + simple_key->token_number, + YAML_BLOCK_MAPPING_START_TOKEN, simple_key->mark)) + return 0; + + /* Remove the simple key from the list. */ + + if (!yaml_parser_remove_simple_key(parser)) return 0; + + /* A simple key cannot follow another simple key. */ + + parser->simple_key_allowed = 0; + } + else + { + /* The ':' indicator follows a complex key. */ + + /* In the block context, extra checks are required. */ + + if (!parser->flow_level) + { + /* Check if we are allowed to start a complex value. */ + + if (!parser->simple_key_allowed) { + return yaml_parser_set_scanner_error(parser, NULL, + yaml_parser_get_mark(parser), + "mapping values are not allowed in this context"); + } + + /* Add the BLOCK-MAPPING-START token if needed. */ + + if (!yaml_parser_roll_indent(parser, parser->column, -1, + YAML_BLOCK_MAPPING_START_TOKEN, yaml_parser_get_mark(parser))) + return 0; + } + + /* Remove a potential simple key from the list. */ + + if (!yaml_parser_remove_simple_key(parser)) return 0; + + /* Simple keys after ':' are allowed in the block context. */ + + parser->simple_key_allowed = (!parser->flow_level); + } + + /* Consume the token. */ + + start_mark = yaml_parser_get_mark(parser); + FORWARD(parser); + end_mark = yaml_parser_get_mark(parser); + + /* Create the VALUE token. */ + + token = yaml_token_new(YAML_VALUE_TOKEN, start_mark, end_mark); + if (!token) { + parser->error = YAML_MEMORY_ERROR; + return 0; + } + + /* Append the token to the queue. */ + + if (!yaml_parser_append_token(parser, token)) { + yaml_token_delete(token); + return 0; + } + + return 1; +} + +/* + * Produce the ALIAS or ANCHOR token. + */ + +static int +yaml_parser_fetch_anchor(yaml_parser_t *parser, yaml_token_type_t type) +{ + yaml_token_t *token; + + /* An anchor or an alias could be a simple key. */ + + if (!yaml_parser_save_simple_key(parser)) + return 0; + + /* A simple key cannot follow an anchor or an alias. */ + + parser->simple_key_allowed = 0; + + /* Create the ALIAS or ANCHOR token. */ + + token = yaml_parser_scan_anchor(parser, type); + if (!token) return 0; + + /* Append the token to the queue. */ + + if (!yaml_parser_append_token(parser, token)) { + yaml_token_delete(token); + return 0; + } + + return 1; +} + +/* + * Produce the TAG token. + */ + +static int +yaml_parser_fetch_tag(yaml_parser_t *parser) +{ + yaml_token_t *token; + + /* A tag could be a simple key. */ + + if (!yaml_parser_save_simple_key(parser)) + return 0; + + /* A simple key cannot follow a tag. */ + + parser->simple_key_allowed = 0; + + /* Create the TAG token. */ + + token = yaml_parser_scan_tag(parser); + if (!token) return 0; + + /* Append the token to the queue. */ + + if (!yaml_parser_append_token(parser, token)) { + yaml_token_delete(token); + return 0; + } + + return 1; +} + +/* + * Produce the SCALAR(...,literal) or SCALAR(...,folded) tokens. + */ + +static int +yaml_parser_fetch_block_scalar(yaml_parser_t *parser, int literal) +{ + yaml_token_t *token; + + /* Remove any potential simple keys. */ + + if (!yaml_parser_remove_simple_key(parser)) + return 0; + + /* A simple key may follow a block scalar. */ + + parser->simple_key_allowed = 1; + + /* Create the SCALAR token. */ + + token = yaml_parser_scan_block_scalar(parser, literal); + if (!token) return 0; + + /* Append the token to the queue. */ + + if (!yaml_parser_append_token(parser, token)) { + yaml_token_delete(token); + return 0; + } + + return 1; +} + +/* + * Produce the SCALAR(...,single-quoted) or SCALAR(...,double-quoted) tokens. + */ + +static int +yaml_parser_fetch_flow_scalar(yaml_parser_t *parser, int single) +{ + yaml_token_t *token; + + /* A plain scalar could be a simple key. */ + + if (!yaml_parser_save_simple_key(parser)) + return 0; + + /* A simple key cannot follow a flow scalar. */ + + parser->simple_key_allowed = 0; + + /* Create the SCALAR token. */ + + token = yaml_parser_scan_flow_scalar(parser, single); + if (!token) return 0; + + /* Append the token to the queue. */ + + if (!yaml_parser_append_token(parser, token)) { + yaml_token_delete(token); + return 0; + } + + return 1; +} + +/* + * Produce the SCALAR(...,plain) token. + */ + +static int +yaml_parser_fetch_plain_scalar(yaml_parser_t *parser) +{ + yaml_token_t *token; + + /* A plain scalar could be a simple key. */ + + if (!yaml_parser_save_simple_key(parser)) + return 0; + + /* A simple key cannot follow a flow scalar. */ + + parser->simple_key_allowed = 0; + + /* Create the SCALAR token. */ + + token = yaml_parser_scan_plain_scalar(parser); + if (!token) return 0; + + /* Append the token to the queue. */ + + if (!yaml_parser_append_token(parser, token)) { + yaml_token_delete(token); + return 0; + } + + return 1; +} + From badfb19a9ce487a80df23061cf3966c1f68baed7 Mon Sep 17 00:00:00 2001 From: Kirill Simonov Date: Sun, 11 Jun 2006 17:29:09 +0000 Subject: [PATCH 10/73] Implement scanners for directives, anchors, and tags. --- src/scanner.c | 1087 ++++++++++++++++++++++++++++++++++++++++++++++--- 1 file changed, 1026 insertions(+), 61 deletions(-) diff --git a/src/scanner.c b/src/scanner.c index 31fce5c6..07723bcc 100644 --- a/src/scanner.c +++ b/src/scanner.c @@ -506,6 +506,71 @@ #define CHECK(parser,octet) CHECK_AT(parser,(octet),0) +/* + * Check if the character at the specified position is an alphabetical + * character, a digit, '_', or '-'. + */ + +#define IS_ALPHA_AT(parser,offset) \ + ((parser->pointer[offset] >= (yaml_char_t) '0' && \ + parser->pointer[offset] <= (yaml_char_t) '9') || \ + (parser->pointer[offset] >= (yaml_char_t) 'A' && \ + parser->pointer[offset] <= (yaml_char_t) 'Z') || \ + (parser->pointer[offset] >= (yaml_char_t) 'a' && \ + parser->pointer[offset] <= (yaml_char_t) 'z') || \ + parser->pointer[offset] == '_' || \ + parser->pointer[offset] == '-') + +#define IS_ALPHA(parser) IS_ALPHA_AT(parser,0) + +/* + * Check if the character at the specified position is a digit. + */ + +#define IS_DIGIT_AT(parser,offset) \ + ((parser->pointer[offset] >= (yaml_char_t) '0' && \ + parser->pointer[offset] <= (yaml_char_t) '9')) + +#define IS_DIGIT(parser) IS_DIGIT_AT(parser,0) + +/* + * Get the value of a digit. + */ + +#define AS_DIGIT_AT(parser,offset) \ + (parser->pointer[offset] - (yaml_char_t) '0') + +#define AS_DIGIT(parser) AS_DIGIT_AT(parser,0) + +/* + * Check if the character at the specified position is a hex-digit. + */ + +#define IS_HEX_AT(parser,offset) \ + ((parser->pointer[offset] >= (yaml_char_t) '0' && \ + parser->pointer[offset] <= (yaml_char_t) '9') || \ + (parser->pointer[offset] >= (yaml_char_t) 'A' && \ + parser->pointer[offset] <= (yaml_char_t) 'F') || \ + (parser->pointer[offset] >= (yaml_char_t) 'a' && \ + parser->pointer[offset] <= (yaml_char_t) 'f')) + +#define IS_HEX(parser) IS_HEX_AT(parser,0) + +/* + * Get the value of a hex-digit. + */ + +#define AS_HEX_AT(parser,offset) \ + ((parser->pointer[offset] >= (yaml_char_t) 'A' && \ + parser->pointer[offset] <= (yaml_char_t) 'F') ? \ + (parser->pointer[offset] - (yaml_char_t) 'A' + 10) : \ + (parser->pointer[offset] >= (yaml_char_t) 'a' && \ + parser->pointer[offset] <= (yaml_char_t) 'f') ? \ + (parser->pointer[offset] - (yaml_char_t) 'a' + 10) : \ + (parser->pointer[offset] - (yaml_char_t) '0')) + +#define AS_HEX(parser) AS_HEX_AT(parser,0) + /* * Check if the character at the specified position is NUL. */ @@ -514,6 +579,17 @@ #define IS_Z(parser) IS_Z_AT(parser,0) +/* + * Check if the character at the specified position is BOM. + */ + +#define IS_BOM_AT(parser,offset) \ + (CHECK_AT(parser,'\xEF',(offset)) \ + && CHECK_AT(parser,'\xBB',(offset)+1) \ + && CHECK_AT(parser,'\xBF',(offset)+1)) /* BOM (#xFEFF) */ + +#define IS_BOM(parser) IS_BOM_AT(parser,0) + /* * Check if the character at the specified position is space. */ @@ -547,13 +623,13 @@ (CHECK_AT(parser,'\r',(offset)) /* CR (#xD)*/ \ || CHECK_AT(parser,'\n',(offset)) /* LF (#xA) */ \ || (CHECK_AT(parser,'\xC2',(offset)) \ - && CHECK_AT(parser,'\x85',(offset+1))) /* NEL (#x85) */ \ + && CHECK_AT(parser,'\x85',(offset)+1)) /* NEL (#x85) */ \ || (CHECK_AT(parser,'\xE2',(offset)) \ - && CHECK_AT(parser,'\x80',(offset+1)) \ - && CHECK_AT(parser,'\xA8',(offset+2))) /* LS (#x2028) */ \ + && CHECK_AT(parser,'\x80',(offset)+1) \ + && CHECK_AT(parser,'\xA8',(offset)+2)) /* LS (#x2028) */ \ || (CHECK_AT(parser,'\xE2',(offset)) \ - && CHECK_AT(parser,'\x80',(offset+1)) \ - && CHECK_AT(parser,'\xA9',(offset+2)))) /* LS (#x2029) */ + && CHECK_AT(parser,'\x80',(offset)+1) \ + && CHECK_AT(parser,'\xA9',(offset)+2))) /* LS (#x2029) */ #define IS_BREAK(parser) IS_BREAK_AT(parser,0) @@ -605,14 +681,56 @@ * Advance the buffer pointer. */ -#define FORWARD(parser) \ +#define FORWARD(parser) \ (parser->index ++, \ - ((IS_BREAK(parser) && !IS_CRLF(parser)) ? \ - (parser->line ++, parser->column = 0) : \ - (parser->column ++)), \ + parser->column ++, \ parser->unread --, \ parser->pointer += WIDTH(parser)) +#define FORWARD_LINE(parser) \ + (IS_CRLF(parser) ? \ + (parser->index += 2, \ + parser->column = 0, \ + parser->unread -= 2, \ + parser->pointer += 2) : \ + IS_BREAK(parser) ? \ + (parser->index ++, \ + parser->column = 0, \ + parser->unread --, \ + parser->pointer += WIDTH(parser)) : 0) + +/* + * Resize a string if needed. + */ + +#define RESIZE(parser,string) \ + (string.pointer-string.buffer+5 < string.size ? 1 : \ + yaml_parser_resize_string(parser, &string)) + +/* + * Copy a character to a string buffer and advance pointers. + */ + +#define COPY(parser,string) \ + (((*parser->pointer & 0x80) == 0x00 ? \ + (*(string.pointer++) = *(parser->pointer++)) : \ + (*parser->pointer & 0xE0) == 0xC0 ? \ + (*(string.pointer++) = *(parser->pointer++), \ + *(string.pointer++) = *(parser->pointer++)) : \ + (*parser->pointer & 0xF0) == 0xE0 ? \ + (*(string.pointer++) = *(parser->pointer++), \ + *(string.pointer++) = *(parser->pointer++), \ + *(string.pointer++) = *(parser->pointer++)) : \ + (*parser->pointer & 0xF8) == 0xF0 ? \ + (*(string.pointer++) = *(parser->pointer++), \ + *(string.pointer++) = *(parser->pointer++), \ + *(string.pointer++) = *(parser->pointer++), \ + *(string.pointer++) = *(parser->pointer++)) : 0), \ + parser->index ++, \ + parser->column ++, \ + parser->unread --) + + /* * Public API declarations. */ @@ -634,6 +752,26 @@ yaml_parser_set_scanner_error(yaml_parser_t *parser, const char *context, static yaml_mark_t yaml_parser_get_mark(yaml_parser_t *parser); +/* + * Buffers and lists. + */ + +typedef struct { + yaml_char_t *buffer; + yaml_char_t *pointer; + size_t size; +} yaml_string_t; + +static yaml_string_t +yaml_parser_new_string(yaml_parser_t *parser); + +static int +yaml_parser_resize_string(yaml_parser_t *parser, yaml_string_t *string); + +static int +yaml_parser_resize_list(yaml_parser_t *parser, void **buffer, size_t *size, + size_t item_size); + /* * High-level token API. */ @@ -752,16 +890,16 @@ yaml_parser_scan_directive_name(yaml_parser_t *parser, yaml_mark_t start_mark, yaml_char_t **name); static int -yaml_parser_scan_yaml_directive_value(yaml_parser_t *parser, +yaml_parser_scan_version_directive_value(yaml_parser_t *parser, yaml_mark_t start_mark, int *major, int *minor); static int -yaml_parser_scan_yaml_directive_number(yaml_parser_t *parser, +yaml_parser_scan_version_directive_number(yaml_parser_t *parser, yaml_mark_t start_mark, int *number); static int yaml_parser_scan_tag_directive_value(yaml_parser_t *parser, - yaml_char_t **handle, yaml_char_t **prefix); + yaml_mark_t mark, yaml_char_t **handle, yaml_char_t **prefix); static yaml_token_t * yaml_parser_scan_anchor(yaml_parser_t *parser, @@ -776,7 +914,11 @@ yaml_parser_scan_tag_handle(yaml_parser_t *parser, int directive, static int yaml_parser_scan_tag_uri(yaml_parser_t *parser, int directive, - yaml_mark_t start_mark, yaml_char_t **url); + yaml_char_t *head, yaml_mark_t start_mark, yaml_char_t **uri); + +static int +yaml_parser_scan_uri_escapes(yaml_parser_t *parser, int directive, + yaml_mark_t start_mark, yaml_string_t *string); static yaml_token_t * yaml_parser_scan_block_scalar(yaml_parser_t *parser, int literal); @@ -841,6 +983,78 @@ yaml_parser_peek_token(yaml_parser_t *parser) return parser->tokens[parser->tokens_head]; } +/* + * Create a new string. + */ + +static yaml_string_t +yaml_parser_new_string(yaml_parser_t *parser) +{ + yaml_string_t string = { NULL, NULL, 0 }; + + string.buffer = yaml_malloc(YAML_DEFAULT_SIZE); + if (!string.buffer) { + parser->error = YAML_MEMORY_ERROR; + return string; + } + + memset(string.buffer, 0, YAML_DEFAULT_SIZE); + string.pointer = string.buffer; + string.size = YAML_DEFAULT_SIZE; + + return string; +} + +/* + * Double the size of a string. + */ + +static int +yaml_parser_resize_string(yaml_parser_t *parser, yaml_string_t *string) +{ + yaml_char_t *new_buffer = yaml_realloc(string->buffer, string->size*2); + + if (!new_buffer) { + yaml_free(string->buffer); + string->buffer = NULL; + string->pointer = NULL; + string->size = 0; + parser->error = YAML_MEMORY_ERROR; + return 0; + } + + memset(new_buffer+string->size, 0, string->size); + + string->pointer = new_buffer + (string->buffer-string->pointer); + string->buffer = new_buffer; + string->size *= 2; + + return 1; +} + +/* + * Double a list. + */ + +static int +yaml_parser_resize_list(yaml_parser_t *parser, void **buffer, size_t *size, + size_t item_size) +{ + void *new_buffer = yaml_realloc(*buffer, item_size*(*size)*2); + + if (!new_buffer) { + parser->error = YAML_MEMORY_ERROR; + return 0; + } + + memset(new_buffer+(*size), 0, item_size*(*size)); + + *buffer = new_buffer; + *size *= 2; + + return 1; +} + /* * Set the scanner error and return 0. */ @@ -1234,22 +1448,10 @@ yaml_parser_increase_flow_level(yaml_parser_t *parser) { /* Check if we need to resize the list. */ - if (parser->flow_level == parser->simple_keys_size-1) - { - yaml_simple_key_t **new_simple_keys = - yaml_realloc(parser->simple_keys, - sizeof(yaml_simple_key_t *) * parser->simple_keys_size * 2); - - if (!new_simple_keys) { - parser->error = YAML_MEMORY_ERROR; + if (parser->flow_level == parser->simple_keys_size-1) { + if (!yaml_parser_resize_list(parser, (void **)&parser->simple_keys, + &parser->simple_keys_size, sizeof(yaml_simple_key_t *))) return 0; - } - - memset(new_simple_keys+parser->simple_keys_size, 0, - sizeof(yaml_simple_key_t *)*parser->simple_keys_size); - - parser->simple_keys = new_simple_keys; - parser->simple_keys_size *= 2; } /* Increase the flow level and reset the simple key. */ @@ -1304,21 +1506,10 @@ yaml_parser_insert_token(yaml_parser_t *parser, /* Check if we need to resize the queue. */ - if (parser->tokens_head == 0 && parser->tokens_tail == parser->tokens_size) - { - yaml_token_t **new_tokens = yaml_realloc(parser->tokens, - sizeof(yaml_token_t *) * parser->tokens_size * 2); - - if (!new_tokens) { - parser->error = YAML_MEMORY_ERROR; + if (parser->tokens_head == 0 && parser->tokens_tail == parser->tokens_size) { + if (!yaml_parser_resize_list(parser, (void **)&parser->tokens, + &parser->tokens_size, sizeof(yaml_token_t *))) return 0; - } - - memset(new_tokens+parser->tokens_size, 0, - sizeof(yaml_token_t *)*parser->tokens_size); - - parser->tokens = new_tokens; - parser->tokens_size *= 2; } /* Check if we need to move the queue to the beginning of the buffer. */ @@ -1371,21 +1562,10 @@ yaml_parser_roll_indent(yaml_parser_t *parser, int column, { /* Check if we need to expand the indents stack. */ - if (parser->indents_length == parser->indents_size) - { - int *new_indents = yaml_realloc(parser->indents, - sizeof(int) * parser->indents_size * 2); - - if (!new_indents) { - parser->error = YAML_MEMORY_ERROR; + if (parser->indents_length == parser->indents_size) { + if (!yaml_parser_resize_list(parser, (void **)&parser->indents, + &parser->indents_size, sizeof(int))) return 0; - } - - memset(new_indents+parser->indents_size, 0, - sizeof(int)*parser->indents_size); - - parser->indents = new_indents; - parser->indents_size *= 2; } /* @@ -1940,7 +2120,8 @@ yaml_parser_fetch_value(yaml_parser_t *parser) /* Remove the simple key from the list. */ - if (!yaml_parser_remove_simple_key(parser)) return 0; + yaml_free(simple_key); + parser->simple_keys[parser->flow_level] = NULL; /* A simple key cannot follow another simple key. */ @@ -1969,10 +2150,6 @@ yaml_parser_fetch_value(yaml_parser_t *parser) return 0; } - /* Remove a potential simple key from the list. */ - - if (!yaml_parser_remove_simple_key(parser)) return 0; - /* Simple keys after ':' are allowed in the block context. */ parser->simple_key_allowed = (!parser->flow_level); @@ -2167,3 +2344,791 @@ yaml_parser_fetch_plain_scalar(yaml_parser_t *parser) return 1; } +/* + * Eat whitespaces and comments until the next token is found. + */ + +static int +yaml_parser_scan_to_next_token(yaml_parser_t *parser) +{ + /* Until the next token is not found. */ + + while (1) + { + /* Allow the BOM mark to start a line. */ + + if (!UPDATE(parser, 1)) return 0; + + if (parser->column == 0 && IS_BOM(parser)) + FORWARD(parser); + + /* + * Eat whitespaces. + * + * Tabs are allowed: + * + * - in the flow context; + * - in the block context, but not at the beginning of the line or + * after '-', '?', or ':' (complex value). + */ + + if (!UPDATE(parser, 1)) return 0; + + while (CHECK(parser,' ') || + ((parser->flow_level || !parser->simple_key_allowed) && + CHECK(parser, '\t'))) { + FORWARD(parser); + if (!UPDATE(parser, 1)) return 0; + } + + /* Eat a comment until a line break. */ + + if (CHECK(parser, '#')) { + while (!IS_BREAKZ(parser)) { + FORWARD(parser); + if (!UPDATE(parser, 1)) return 0; + } + } + + /* If it is a line break, eat it. */ + + if (IS_BREAK(parser)) + { + if (!UPDATE(parser, 2)) return 0; + FORWARD_LINE(parser); + + /* In the block context, a new line may start a simple key. */ + + if (!parser->flow_level) { + parser->simple_key_allowed = 1; + } + } + else + { + /* We have found a token. */ + + break; + } + } + + return 1; +} + +/* + * Scan a YAML-DIRECTIVE or TAG-DIRECTIVE token. + * + * Scope: + * %YAML 1.1 # a comment \n + * ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ + * %TAG !yaml! tag:yaml.org,2002: \n + * ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ + */ + +static yaml_token_t * +yaml_parser_scan_directive(yaml_parser_t *parser) +{ + yaml_mark_t start_mark, end_mark; + yaml_char_t *name = NULL; + int major, minor; + yaml_char_t *handle = NULL, *prefix = NULL; + yaml_token_t *token = NULL; + + /* Eat '%'. */ + + start_mark = yaml_parser_get_mark(parser); + + FORWARD(parser); + + /* Scan the directive name. */ + + if (!yaml_parser_scan_directive_name(parser, start_mark, &name)) + goto error; + + /* Is it a YAML directive? */ + + if (strcmp((char *)name, "YAML") == 0) + { + /* Scan the VERSION directive value. */ + + if (!yaml_parser_scan_version_directive_value(parser, start_mark, + &major, &minor)) + goto error; + + end_mark = yaml_parser_get_mark(parser); + + /* Create a VERSION-DIRECTIVE token. */ + + token = yaml_version_directive_token_new(major, minor, + start_mark, end_mark); + if (!token) goto error; + } + + /* Is it a TAG directive? */ + + else if (strcmp((char *)name, "TAG") == 0) + { + /* Scan the TAG directive value. */ + + if (!yaml_parser_scan_tag_directive_value(parser, start_mark, + &handle, &prefix)) + goto error; + + end_mark = yaml_parser_get_mark(parser); + + /* Create a TAG-DIRECTIVE token. */ + + token = yaml_tag_directive_token_new(handle, prefix, + start_mark, end_mark); + if (!token) goto error; + } + + /* Unknown directive. */ + + else + { + yaml_parser_set_scanner_error(parser, "While scanning a directive", + start_mark, "found uknown directive name"); + goto error; + } + + /* Eat the rest of the line including any comments. */ + + while (IS_BLANK(parser)) { + FORWARD(parser); + if (!UPDATE(parser, 1)) goto error; + } + + if (CHECK(parser, '#')) { + while (!IS_BREAKZ(parser)) { + FORWARD(parser); + if (!UPDATE(parser, 1)) goto error; + } + } + + /* Check if we are at the end of the line. */ + + if (!IS_BREAKZ(parser)) { + yaml_parser_set_scanner_error(parser, "While scanning a directive", + start_mark, "did not found expected comment or line break"); + goto error; + } + + /* Eat a line break. */ + + if (IS_BREAK(parser)) { + if (!UPDATE(parser, 2)) goto error; + FORWARD_LINE(parser); + } + + yaml_free(name); + + return token; + +error: + yaml_free(token); + yaml_free(prefix); + yaml_free(handle); + yaml_free(name); + return NULL; +} + +/* + * Scan the directive name. + * + * Scope: + * %YAML 1.1 # a comment \n + * ^^^^ + * %TAG !yaml! tag:yaml.org,2002: \n + * ^^^ + */ + +static int +yaml_parser_scan_directive_name(yaml_parser_t *parser, + yaml_mark_t start_mark, yaml_char_t **name) +{ + yaml_string_t string = yaml_parser_new_string(parser); + + if (!string.buffer) goto error; + + /* Consume the directive name. */ + + if (!UPDATE(parser, 1)) goto error; + + while (IS_ALPHA(parser)) + { + if (!RESIZE(parser, string)) goto error; + COPY(parser, string); + if (!UPDATE(parser, 1)) goto error; + } + + /* Check if the name is empty. */ + + if (string.buffer == string.pointer) { + yaml_parser_set_scanner_error(parser, "while scanning a directive", + start_mark, "cannot found expected directive name"); + goto error; + } + + /* Check for an blank character after the name. */ + + if (!IS_BLANKZ(parser)) { + yaml_parser_set_scanner_error(parser, "while scanning a directive", + start_mark, "found unexpected non-alphabetical character"); + goto error; + } + + *name = string.buffer; + + return 1; + +error: + yaml_free(string.buffer); + return 0; +} + +/* + * Scan the value of VERSION-DIRECTIVE. + * + * Scope: + * %YAML 1.1 # a comment \n + * ^^^^^^ + */ + +static int +yaml_parser_scan_version_directive_value(yaml_parser_t *parser, + yaml_mark_t start_mark, int *major, int *minor) +{ + /* Eat whitespaces. */ + + if (!UPDATE(parser, 1)) return 0; + + while (IS_BLANK(parser)) { + FORWARD(parser); + if (!UPDATE(parser, 1)) return 0; + } + + /* Consume the major version number. */ + + if (!yaml_parser_scan_version_directive_number(parser, start_mark, major)) + return 0; + + /* Eat '.'. */ + + if (!CHECK(parser, '.')) { + return yaml_parser_set_scanner_error(parser, "while scanning a %YAML directive", + start_mark, "did not find expected digit or '.' character"); + } + + FORWARD(parser); + + /* Consume the minor version number. */ + + if (!yaml_parser_scan_version_directive_number(parser, start_mark, minor)) + return 0; +} + +#define MAX_NUMBER_LENGTH 9 + +/* + * Scan the version number of VERSION-DIRECTIVE. + * + * Scope: + * %YAML 1.1 # a comment \n + * ^ + * %YAML 1.1 # a comment \n + * ^ + */ + +static int +yaml_parser_scan_version_directive_number(yaml_parser_t *parser, + yaml_mark_t start_mark, int *number) +{ + int value = 0; + size_t length = 0; + + /* Repeat while the next character is digit. */ + + if (!UPDATE(parser, 1)) return 0; + + while (IS_DIGIT(parser)) + { + /* Check if the number is too long. */ + + if (++length > MAX_NUMBER_LENGTH) { + return yaml_parser_set_scanner_error(parser, "while scanning a %YAML directive", + start_mark, "found extremely long version number"); + } + + value = value*10 + AS_DIGIT(parser); + + FORWARD(parser); + + if (!UPDATE(parser, 1)) return 0; + } + + /* Check if the number was present. */ + + if (!length) { + return yaml_parser_set_scanner_error(parser, "while scanning a %YAML directive", + start_mark, "did not find expected version number"); + } + + *number = value; + + return 1; +} + +/* + * Scan the value of a TAG-DIRECTIVE token. + * + * Scope: + * %TAG !yaml! tag:yaml.org,2002: \n + * ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ + */ + +static int +yaml_parser_scan_tag_directive_value(yaml_parser_t *parser, + yaml_mark_t start_mark, yaml_char_t **handle, yaml_char_t **prefix) +{ + yaml_char_t *handle_value = NULL; + yaml_char_t *prefix_value = NULL; + + /* Eat whitespaces. */ + + if (!UPDATE(parser, 1)) goto error; + + while (IS_BLANK(parser)) { + FORWARD(parser); + if (!UPDATE(parser, 1)) goto error; + } + + /* Scan a handle. */ + + if (!yaml_parser_scan_tag_handle(parser, 1, start_mark, &handle_value)) + goto error; + + /* Expect a whitespace. */ + + if (!UPDATE(parser, 1)) goto error; + + if (!IS_BLANK(parser)) { + yaml_parser_set_scanner_error(parser, "while scanning a %TAG directive", + start_mark, "did not find expected whitespace"); + goto error; + } + + /* Eat whitespaces. */ + + while (IS_BLANK(parser)) { + FORWARD(parser); + if (!UPDATE(parser, 1)) goto error; + } + + /* Scan a prefix. */ + + if (!yaml_parser_scan_tag_uri(parser, 1, NULL, start_mark, &prefix_value)) + goto error; + + /* Expect a whitespace or line break. */ + + if (!UPDATE(parser, 1)) goto error; + + if (!IS_BLANKZ(parser)) { + yaml_parser_set_scanner_error(parser, "while scanning a %TAG directive", + start_mark, "did not find expected whitespace or line break"); + goto error; + } + + *handle = handle_value; + *prefix = prefix_value; + + return 1; + +error: + yaml_free(handle_value); + yaml_free(prefix_value); + return 0; +} + +static yaml_token_t * +yaml_parser_scan_anchor(yaml_parser_t *parser, + yaml_token_type_t type) +{ + int length = 0; + yaml_mark_t start_mark, end_mark; + yaml_token_t *token = NULL; + yaml_string_t string = yaml_parser_new_string(parser); + + if (!string.buffer) goto error; + + /* Eat the indicator character. */ + + start_mark = yaml_parser_get_mark(parser); + + FORWARD(parser); + + /* Consume the value. */ + + if (!UPDATE(parser, 1)) goto error; + + while (IS_ALPHA(parser)) { + if (!RESIZE(parser, string)) goto error; + COPY(parser, string); + if (!UPDATE(parser, 1)) goto error; + length ++; + } + + end_mark = yaml_parser_get_mark(parser); + + /* + * Check if length of the anchor is greater than 0 and it is followed by + * a whitespace character or one of the indicators: + * + * '?', ':', ',', ']', '}', '%', '@', '`'. + */ + + if (!length || !(IS_BLANKZ(parser) || CHECK(parser, '?') || CHECK(parser, ':') || + CHECK(parser, ',') || CHECK(parser, ']') || CHECK(parser, '}') || + CHECK(parser, '%') || CHECK(parser, '@') || CHECK(parser, '`'))) { + yaml_parser_set_scanner_error(parser, type == YAML_ANCHOR_TOKEN ? + "while scanning an anchor" : "while scanning an alias", start_mark, + "did not find expected alphabetic or numeric character"); + goto error; + } + + /* Create a token. */ + + token = type == YAML_ANCHOR_TOKEN ? + yaml_anchor_token_new(string.buffer, start_mark, end_mark) : + yaml_alias_token_new(string.buffer, start_mark, end_mark); + if (!token) goto error; + + return token; + +error: + yaml_free(string.buffer); + yaml_free(token); + return 0; +} + +/* + * Scan a TAG token. + */ + +static yaml_token_t * +yaml_parser_scan_tag(yaml_parser_t *parser) +{ + yaml_char_t *handle = NULL; + yaml_char_t *suffix = NULL; + yaml_token_t *token = NULL; + yaml_mark_t start_mark, end_mark; + + start_mark = yaml_parser_get_mark(parser); + + /* Check if the tag is in the canonical form. */ + + if (!UPDATE(parser, 2)) goto error; + + if (CHECK_AT(parser, '<', 1)) + { + /* Set the handle to '' */ + + handle = yaml_malloc(1); + if (!handle) goto error; + handle[0] = '\0'; + + /* Eat '!<' */ + + FORWARD(parser); + FORWARD(parser); + + /* Consume the tag value. */ + + if (!yaml_parser_scan_tag_uri(parser, 0, NULL, start_mark, &suffix)) + goto error; + + /* Check for '>' and eat it. */ + + if (!CHECK(parser, '>')) { + yaml_parser_set_scanner_error(parser, "while scanning a tag", + start_mark, "did not find the expected '>'"); + goto error; + } + + FORWARD(parser); + } + else + { + /* The tag has either the '!suffix' or the '!handle!suffix' form. */ + + /* First, try to scan a handle. */ + + if (!yaml_parser_scan_tag_handle(parser, 0, start_mark, &handle)) + goto error; + + /* Check if it is, indeed, handle. */ + + if (handle[0] == '!' && handle[1] != '\0' && handle[strlen((char *)handle)-1] == '!') + { + /* Scan the suffix now. */ + + if (!yaml_parser_scan_tag_uri(parser, 0, NULL, start_mark, &suffix)) + goto error; + } + else + { + /* It wasn't a handle after all. Scan the rest of the tag. */ + + if (!yaml_parser_scan_tag_uri(parser, 0, handle, start_mark, &suffix)) + goto error; + + /* Set the handle to '!'. */ + + yaml_free(handle); + handle = yaml_malloc(2); + if (!handle) goto error; + handle[0] = '!'; + handle[1] = '\0'; + } + } + + /* Check the character which ends the tag. */ + + if (!UPDATE(parser, 1)) goto error; + + if (!IS_BLANKZ(parser)) { + yaml_parser_set_scanner_error(parser, "while scanning a tag", + start_mark, "did not found expected whitespace or line break"); + goto error; + } + + end_mark = yaml_parser_get_mark(parser); + + /* Create a token. */ + + token = yaml_tag_token_new(handle, suffix, start_mark, end_mark); + if (!token) goto error; + + return token; + +error: + yaml_free(handle); + yaml_free(suffix); + return NULL; +} + +/* + * Scan a tag handle. + */ + +static int +yaml_parser_scan_tag_handle(yaml_parser_t *parser, int directive, + yaml_mark_t start_mark, yaml_char_t **handle) +{ + yaml_string_t string = yaml_parser_new_string(parser); + + if (!string.buffer) goto error; + + /* Check the initial '!' character. */ + + if (!UPDATE(parser, 1)) goto error; + + if (!CHECK(parser, '!')) { + yaml_parser_set_scanner_error(parser, directive ? + "while scanning a tag directive" : "while scanning a tag", + start_mark, "did not find expected '!'"); + goto error; + } + + /* Copy the '!' character. */ + + COPY(parser, string); + + /* Copy all subsequent alphabetical and numerical characters. */ + + if (!UPDATE(parser, 1)) goto error; + + while (IS_ALPHA(parser)) + { + if (!RESIZE(parser, string)) goto error; + COPY(parser, string); + if (!UPDATE(parser, 1)) goto error; + } + + /* Check if the trailing character is '!' and copy it. */ + + if (CHECK(parser, '!')) + { + if (!RESIZE(parser, string)) goto error; + COPY(parser, string); + } + else + { + /* + * It's not really a tag handle. If it's a %TAG directive, it's an + * error. If it's a tag token, it must be a part of URI. + */ + + if (directive) { + yaml_parser_set_scanner_error(parser, "while parsing a directive", + start_mark, "did not find expected '!'"); + goto error; + } + } + + *handle = string.buffer; + + return 1; + +error: + yaml_free(string.buffer); + return 0; +} + +/* + * Scan a tag. + */ + +static int +yaml_parser_scan_tag_uri(yaml_parser_t *parser, int directive, + yaml_char_t *head, yaml_mark_t start_mark, yaml_char_t **uri) +{ + size_t length = head ? strlen((char *)head) : 0; + yaml_string_t string = yaml_parser_new_string(parser); + + if (!string.buffer) goto error; + + /* Resize the string to include the head. */ + + while (string.size <= length) { + if (!yaml_parser_resize_string(parser, &string)) goto error; + } + + /* Copy the head if needed. */ + + if (length) { + memcpy(string.buffer, head, length); + string.pointer += length; + } + + /* Scan the tag. */ + + if (!UPDATE(parser, 1)) goto error; + + /* + * The set of characters that may appear in URI is as follows: + * + * '0'-'9', 'A'-'Z', 'a'-'z', '_', '-', ';', '/', '?', ':', '@', '&', + * '=', '+', '$', ',', '.', '!', '~', '*', '\'', '(', ')', '[', ']', + * '%'. + */ + + while (IS_ALPHA(parser) || CHECK(parser, ';') || CHECK(parser, '/') || + CHECK(parser, '?') || CHECK(parser, ':') || CHECK(parser, '@') || + CHECK(parser, '&') || CHECK(parser, '=') || CHECK(parser, '+') || + CHECK(parser, '$') || CHECK(parser, ',') || CHECK(parser, '.') || + CHECK(parser, '!') || CHECK(parser, '~') || CHECK(parser, '*') || + CHECK(parser, '\'') || CHECK(parser, '(') || CHECK(parser, ')') || + CHECK(parser, '[') || CHECK(parser, ']') || CHECK(parser, '%')) + { + if (!RESIZE(parser, string)) goto error; + + /* Check if it is a URI-escape sequence. */ + + if (CHECK(parser, '%')) { + if (!yaml_parser_scan_uri_escapes(parser, + directive, start_mark, &string)) goto error; + } + else { + COPY(parser, string); + } + + length ++; + if (!UPDATE(parser, 1)) goto error; + } + + /* Check if the tag is non-empty. */ + + if (!length) { + yaml_parser_set_scanner_error(parser, directive ? + "while parsing a %TAG directive" : "while parsing a tag", + start_mark, "did not find expected tag URI"); + goto error; + } + + *uri = string.buffer; + + return 1; + +error: + yaml_free(string.buffer); + return 0; +} + +/* + * Decode an URI-escape sequence corresponding to a single UTF-8 character. + */ + +static int +yaml_parser_scan_uri_escapes(yaml_parser_t *parser, int directive, + yaml_mark_t start_mark, yaml_string_t *string) +{ + int width = 0; + + /* Decode the required number of characters. */ + + do { + + unsigned char octet = 0; + + /* Check for a URI-escaped octet. */ + + if (!UPDATE(parser, 3)) return 0; + + if (!(CHECK(parser, '%') && IS_HEX_AT(parser, 1) && IS_HEX_AT(parser, 2))) { + return yaml_parser_set_scanner_error(parser, directive ? + "while parsing a %TAG directive" : "while parsing a tag", + start_mark, "did not find URI escaped octet"); + } + + /* Get the octet. */ + + octet = (AS_HEX_AT(parser, 1) << 4) + AS_HEX_AT(parser, 2); + + /* If it is the leading octet, determine the length of the UTF-8 sequence. */ + + if (!width) + { + width = (octet & 0x80) == 0x00 ? 1 : + (octet & 0xE0) == 0xC0 ? 2 : + (octet & 0xF0) == 0xE0 ? 3 : + (octet & 0xF8) == 0xF0 ? 4 : 0; + if (!width) { + return yaml_parser_set_scanner_error(parser, directive ? + "while parsing a %TAG directive" : "while parsing a tag", + start_mark, "found an incorrect leading UTF-8 octet"); + } + } + else + { + /* Check if the trailing octet is correct. */ + + if ((octet & 0xC0) != 0x80) { + return yaml_parser_set_scanner_error(parser, directive ? + "while parsing a %TAG directive" : "while parsing a tag", + start_mark, "found an incorrect trailing UTF-8 octet"); + } + } + + /* Copy the octet and move the pointers. */ + + *(string->pointer++) = octet; + FORWARD(parser); + FORWARD(parser); + FORWARD(parser); + + } while (--width); + + return 1; +} + From 339b45df067d4b9e6d492f38c518fda02ea4b426 Mon Sep 17 00:00:00 2001 From: Kirill Simonov Date: Fri, 16 Jun 2006 20:42:55 +0000 Subject: [PATCH 11/73] Implement the block scalar scanner. --- src/scanner.c | 432 +++++++++++++++++++++++++++++++++++++++++++++++--- 1 file changed, 412 insertions(+), 20 deletions(-) diff --git a/src/scanner.c b/src/scanner.c index 07723bcc..949be39d 100644 --- a/src/scanner.c +++ b/src/scanner.c @@ -629,7 +629,7 @@ && CHECK_AT(parser,'\xA8',(offset)+2)) /* LS (#x2028) */ \ || (CHECK_AT(parser,'\xE2',(offset)) \ && CHECK_AT(parser,'\x80',(offset)+1) \ - && CHECK_AT(parser,'\xA9',(offset)+2))) /* LS (#x2029) */ + && CHECK_AT(parser,'\xA9',(offset)+2))) /* PS (#x2029) */ #define IS_BREAK(parser) IS_BREAK_AT(parser,0) @@ -691,11 +691,13 @@ (IS_CRLF(parser) ? \ (parser->index += 2, \ parser->column = 0, \ + parser->line ++, \ parser->unread -= 2, \ parser->pointer += 2) : \ IS_BREAK(parser) ? \ (parser->index ++, \ parser->column = 0, \ + parser->line ++, \ parser->unread --, \ parser->pointer += WIDTH(parser)) : 0) @@ -704,8 +706,8 @@ */ #define RESIZE(parser,string) \ - (string.pointer-string.buffer+5 < string.size ? 1 : \ - yaml_parser_resize_string(parser, &string)) + ((string).pointer-(string).buffer+5 < (string).size ? 1 : \ + yaml_parser_resize_string(parser, &(string))) /* * Copy a character to a string buffer and advance pointers. @@ -713,23 +715,68 @@ #define COPY(parser,string) \ (((*parser->pointer & 0x80) == 0x00 ? \ - (*(string.pointer++) = *(parser->pointer++)) : \ + (*((string).pointer++) = *(parser->pointer++)) : \ (*parser->pointer & 0xE0) == 0xC0 ? \ - (*(string.pointer++) = *(parser->pointer++), \ - *(string.pointer++) = *(parser->pointer++)) : \ + (*((string).pointer++) = *(parser->pointer++), \ + *((string).pointer++) = *(parser->pointer++)) : \ (*parser->pointer & 0xF0) == 0xE0 ? \ - (*(string.pointer++) = *(parser->pointer++), \ - *(string.pointer++) = *(parser->pointer++), \ - *(string.pointer++) = *(parser->pointer++)) : \ + (*((string).pointer++) = *(parser->pointer++), \ + *((string).pointer++) = *(parser->pointer++), \ + *((string).pointer++) = *(parser->pointer++)) : \ (*parser->pointer & 0xF8) == 0xF0 ? \ - (*(string.pointer++) = *(parser->pointer++), \ - *(string.pointer++) = *(parser->pointer++), \ - *(string.pointer++) = *(parser->pointer++), \ - *(string.pointer++) = *(parser->pointer++)) : 0), \ + (*((string).pointer++) = *(parser->pointer++), \ + *((string).pointer++) = *(parser->pointer++), \ + *((string).pointer++) = *(parser->pointer++), \ + *((string).pointer++) = *(parser->pointer++)) : 0), \ parser->index ++, \ parser->column ++, \ parser->unread --) - + +/* + * Copy a line break character to a string buffer and advance pointers. + */ + +#define COPY_LINE(parser,string) \ + ((CHECK_AT(parser,'\r',0) && CHECK_AT(parser,'\n',1)) ? /* CR LF -> LF */ \ + (*((string).pointer++) = (yaml_char_t) '\n', \ + parser->pointer += 2, \ + parser->index += 2, \ + parser->column = 0, \ + parser->line ++, \ + parser->unread -= 2) : \ + (CHECK_AT(parser,'\r',0) || CHECK_AT(parser,'\n',0)) ? /* CR|LF -> LF */ \ + (*((string).pointer++) = (yaml_char_t) '\n', \ + parser->pointer ++, \ + parser->index ++, \ + parser->column = 0, \ + parser->line ++, \ + parser->unread --) : \ + (CHECK_AT(parser,'\xC2',0) && CHECK_AT(parser,'\x85',1)) ? /* NEL -> LF */ \ + (*((string).pointer++) = (yaml_char_t) '\n', \ + parser->pointer += 2, \ + parser->index ++, \ + parser->column = 0, \ + parser->line ++, \ + parser->unread --) : \ + (CHECK_AT(parser,'\xE2',0) && \ + CHECK_AT(parser,'\x80',1) && \ + (CHECK_AT(parser,'\xA8',2) || \ + CHECK_AT(parser,'\xA9',2))) ? /* LS|PS -> LS|PS */ \ + (*((string).pointer++) = *(parser->pointer++), \ + *((string).pointer++) = *(parser->pointer++), \ + *((string).pointer++) = *(parser->pointer++), \ + parser->index ++, \ + parser->column = 0, \ + parser->line ++, \ + parser->unread --) : 0) + +/* + * Append a string to another string and clear the former string. + */ + +#define JOIN(parser,head_string,tail_string) \ + (yaml_parser_join_string(parser, &(head_string), &(tail_string)) && \ + yaml_parser_clear_string(parser, &(tail_string))) /* * Public API declarations. @@ -768,6 +815,13 @@ yaml_parser_new_string(yaml_parser_t *parser); static int yaml_parser_resize_string(yaml_parser_t *parser, yaml_string_t *string); +static int +yaml_parser_join_string(yaml_parser_t *parser, + yaml_string_t *string1, yaml_string_t *string2); + +static int +yaml_parser_clear_string(yaml_parser_t *parser, yaml_string_t *string); + static int yaml_parser_resize_list(yaml_parser_t *parser, void **buffer, size_t *size, size_t item_size); @@ -923,6 +977,11 @@ yaml_parser_scan_uri_escapes(yaml_parser_t *parser, int directive, static yaml_token_t * yaml_parser_scan_block_scalar(yaml_parser_t *parser, int literal); +static int +yaml_parser_scan_block_scalar_breaks(yaml_parser_t *parser, + int *indent, yaml_string_t *breaks, + yaml_mark_t start_mark, yaml_mark_t *end_mark); + static int yaml_parser_scan_block_scalar_indicators(yaml_parser_t *parser, yaml_mark_t start_mark, int *chomping, int *increment); @@ -1032,6 +1091,42 @@ yaml_parser_resize_string(yaml_parser_t *parser, yaml_string_t *string) return 1; } +/* + * Append a string to another string. + */ + +static int +yaml_parser_join_string(yaml_parser_t *parser, + yaml_string_t *string1, yaml_string_t *string2) +{ + if (string2->buffer == string2->pointer) return 1; + + while (string1->pointer - string1->buffer + string2->pointer - string2->buffer + 1 + > string1->size) { + if (!yaml_parser_resize_string(parser, string1)) return 0; + } + + memcpy(string1->pointer, string2->buffer, string2->pointer-string2->buffer); + + return 1; +} + +/* + * Fill the string with NULs and move the pointer to the beginning. + */ + +static int +yaml_parser_clear_string(yaml_parser_t *parser, yaml_string_t *string) +{ + if (string->buffer == string->pointer) return 1; + + memset(string->buffer, 0, string->pointer-string->buffer); + + string->pointer = string->buffer; + + return 1; +} + /* * Double a list. */ @@ -2460,7 +2555,10 @@ yaml_parser_scan_directive(yaml_parser_t *parser) token = yaml_version_directive_token_new(major, minor, start_mark, end_mark); - if (!token) goto error; + if (!token) { + parser->error = YAML_MEMORY_ERROR; + return 0; + } } /* Is it a TAG directive? */ @@ -2479,14 +2577,17 @@ yaml_parser_scan_directive(yaml_parser_t *parser) token = yaml_tag_directive_token_new(handle, prefix, start_mark, end_mark); - if (!token) goto error; + if (!token) { + parser->error = YAML_MEMORY_ERROR; + return 0; + } } /* Unknown directive. */ else { - yaml_parser_set_scanner_error(parser, "While scanning a directive", + yaml_parser_set_scanner_error(parser, "while scanning a directive", start_mark, "found uknown directive name"); goto error; } @@ -2508,7 +2609,7 @@ yaml_parser_scan_directive(yaml_parser_t *parser) /* Check if we are at the end of the line. */ if (!IS_BREAKZ(parser)) { - yaml_parser_set_scanner_error(parser, "While scanning a directive", + yaml_parser_set_scanner_error(parser, "while scanning a directive", start_mark, "did not found expected comment or line break"); goto error; } @@ -2801,7 +2902,10 @@ yaml_parser_scan_anchor(yaml_parser_t *parser, token = type == YAML_ANCHOR_TOKEN ? yaml_anchor_token_new(string.buffer, start_mark, end_mark) : yaml_alias_token_new(string.buffer, start_mark, end_mark); - if (!token) goto error; + if (!token) { + parser->error = YAML_MEMORY_ERROR; + return 0; + } return token; @@ -2907,7 +3011,10 @@ yaml_parser_scan_tag(yaml_parser_t *parser) /* Create a token. */ token = yaml_tag_token_new(handle, suffix, start_mark, end_mark); - if (!token) goto error; + if (!token) { + parser->error = YAML_MEMORY_ERROR; + return 0; + } return token; @@ -3132,3 +3239,288 @@ yaml_parser_scan_uri_escapes(yaml_parser_t *parser, int directive, return 1; } +/* + * Scan a block scalar. + */ + +static yaml_token_t * +yaml_parser_scan_block_scalar(yaml_parser_t *parser, int literal) +{ + yaml_mark_t start_mark; + yaml_mark_t end_mark; + yaml_string_t string = yaml_parser_new_string(parser); + yaml_string_t line_break = yaml_parser_new_string(parser); + yaml_string_t breaks = yaml_parser_new_string(parser); + yaml_token_t *token = NULL; + int chomping = 0; + int increment = 0; + int indent = 0; + int leading_blank = 0; + int trailing_blank = 0; + + if (!string.buffer) goto error; + if (!line_break.buffer) goto error; + if (!breaks.buffer) goto error; + + /* Eat the indicator '|' or '>'. */ + + start_mark = yaml_parser_get_mark(parser); + + FORWARD(parser); + + /* Scan the additional block scalar indicators. */ + + if (!UPDATE(parser, 1)) goto error; + + /* Check for a chomping indicator. */ + + if (CHECK(parser, '+') || CHECK(parser, '-')) + { + /* Set the chomping method and eat the indicator. */ + + chomping = CHECK(parser, '+') ? +1 : -1; + + FORWARD(parser); + + /* Check for an indentation indicator. */ + + if (!UPDATE(parser, 1)) goto error; + + if (IS_DIGIT(parser)) + { + /* Check that the intendation is greater than 0. */ + + if (CHECK(parser, '0')) { + yaml_parser_set_scanner_error(parser, "while scanning a block scalar", + start_mark, "found an intendation indicator equal to 0"); + goto error; + } + + /* Get the intendation level and eat the indicator. */ + + increment = AS_DIGIT(parser); + + FORWARD(parser); + } + } + + /* Do the same as above, but in the opposite order. */ + + else if (IS_DIGIT(parser)) + { + if (CHECK(parser, '0')) { + yaml_parser_set_scanner_error(parser, "while scanning a block scalar", + start_mark, "found an intendation indicator equal to 0"); + goto error; + } + + increment = AS_DIGIT(parser); + + FORWARD(parser); + + if (!UPDATE(parser, 1)) goto error; + + if (CHECK(parser, '+') || CHECK(parser, '-')) { + chomping = CHECK(parser, '+') ? +1 : -1; + FORWARD(parser); + } + } + + /* Eat whitespaces and comments to the end of the line. */ + + if (!UPDATE(parser, 1)) goto error; + + while (IS_BLANK(parser)) { + FORWARD(parser); + if (!UPDATE(parser, 1)) goto error; + } + + if (CHECK(parser, '#')) { + while (!IS_BREAKZ(parser)) { + FORWARD(parser); + if (!UPDATE(parser, 1)) goto error; + } + } + + /* Check if we are at the end of the line. */ + + if (!IS_BREAKZ(parser)) { + yaml_parser_set_scanner_error(parser, "while scanning a block scalar", + start_mark, "did not found expected comment or line break"); + goto error; + } + + /* Eat a line break. */ + + if (IS_BREAK(parser)) { + if (!UPDATE(parser, 2)) goto error; + FORWARD_LINE(parser); + } + + end_mark = yaml_parser_get_mark(parser); + + /* Set the intendation level if it was specified. */ + + if (increment) { + indent = parser->indent >= 0 ? parser->indent+increment : increment; + } + + /* Scan the leading line breaks and determine the indentation level if needed. */ + + if (!yaml_parser_scan_block_scalar_breaks(parser, &indent, &breaks, + start_mark, &end_mark)) goto error; + + /* Scan the block scalar content. */ + + if (!UPDATE(parser, 1)) goto error; + + while (parser->column == indent && !IS_Z(parser)) + { + /* + * We are at the beginning of a non-empty line. + */ + + /* Is it a trailing whitespace? */ + + trailing_blank = IS_BLANK(parser); + + /* Check if we need to fold the leading line break. */ + + if (!literal && (*line_break.buffer == '\n') + && !leading_blank && !trailing_blank) + { + /* Do we need to join the lines by space? */ + + if (*breaks.buffer == '\0') { + if (!RESIZE(parser, string)) goto error; + *(string.pointer ++) = ' '; + } + + yaml_parser_clear_string(parser, &line_break); + } + else { + if (!JOIN(parser, string, line_break)) goto error; + } + + /* Append the remaining line breaks. */ + + if (!JOIN(parser, string, breaks)) goto error; + + /* Is it a leading whitespace? */ + + leading_blank = IS_BLANK(parser); + + /* Consume the current line. */ + + while (!IS_BREAKZ(parser)) { + if (!RESIZE(parser, string)) goto error; + COPY(parser, string); + if (!UPDATE(parser, 1)) goto error; + } + + /* Consume the line break. */ + + if (!UPDATE(parser, 2)) goto error; + + COPY_LINE(parser, line_break); + + /* Eat the following intendation spaces and line breaks. */ + + if (!yaml_parser_scan_block_scalar_breaks(parser, + &indent, &breaks, start_mark, &end_mark)) goto error; + } + + /* Chomp the tail. */ + + if (chomping != -1) { + if (!JOIN(parser, string, line_break)) goto error; + } + if (chomping == 1) { + if (!JOIN(parser, string, breaks)) goto error; + } + + /* Create a token. */ + + token = yaml_scalar_token_new(string.buffer, string.pointer-string.buffer, + literal ? YAML_LITERAL_SCALAR_STYLE : YAML_FOLDED_SCALAR_STYLE, + start_mark, end_mark); + if (!token) { + parser->error = YAML_MEMORY_ERROR; + return 0; + } + + yaml_free(line_break.buffer); + yaml_free(breaks.buffer); + + return token; + +error: + yaml_free(string.buffer); + yaml_free(line_break.buffer); + yaml_free(breaks.buffer); + + return NULL; +} + +/* + * Scan intendation spaces and line breaks for a block scalar. Determine the + * intendation level if needed. + */ + +static int +yaml_parser_scan_block_scalar_breaks(yaml_parser_t *parser, + int *indent, yaml_string_t *breaks, + yaml_mark_t start_mark, yaml_mark_t *end_mark) +{ + int max_indent = 0; + + *end_mark = yaml_parser_get_mark(parser); + + /* Eat the intendation spaces and line breaks. */ + + while (1) + { + /* Eat the intendation spaces. */ + + if (!UPDATE(parser, 1)) return 0; + + while ((!*indent || parser->column < *indent) && IS_SPACE(parser)) { + FORWARD(parser); + if (!UPDATE(parser, 1)) return 0; + } + + if (parser->column > max_indent) + max_indent = parser->column; + + /* Check for a tab character messing the intendation. */ + + if ((!*indent || parser->column < *indent) && IS_TAB(parser)) { + return yaml_parser_set_scanner_error(parser, "while scanning a block scalar", + start_mark, "found a tab character where an intendation space is expected"); + } + + /* Have we found a non-empty line? */ + + if (!IS_BREAK(parser)) break; + + /* Consume the line break. */ + + if (!UPDATE(parser, 2)) return 0; + if (!RESIZE(parser, *breaks)) return 0; + COPY_LINE(parser, *breaks); + *end_mark = yaml_parser_get_mark(parser); + } + + /* Determine the indentation level if needed. */ + + if (!*indent) { + *indent = max_indent; + if (*indent < parser->indent + 1) + *indent = parser->indent + 1; + if (*indent < 1) + *indent = 1; + } + + return 1; +} + From b0d6dcfc0956796b2dc012446b9eb3e8d3d9c3c8 Mon Sep 17 00:00:00 2001 From: Kirill Simonov Date: Sun, 18 Jun 2006 17:20:25 +0000 Subject: [PATCH 12/73] The scanner is completed (not tested though). --- src/api.c | 4 +- src/scanner.c | 597 ++++++++++++++++++++++++++++++++++++++++++++++++-- 2 files changed, 577 insertions(+), 24 deletions(-) diff --git a/src/api.c b/src/api.c index c63da451..2af17c36 100644 --- a/src/api.c +++ b/src/api.c @@ -268,7 +268,7 @@ yaml_token_new(yaml_token_type_t type, */ YAML_DECLARE(yaml_token_t *) -yaml_stream_start_token(yaml_encoding_t encoding, +yaml_stream_start_token_new(yaml_encoding_t encoding, yaml_mark_t start_mark, yaml_mark_t end_mark) { yaml_token_t *token = yaml_token_new(YAML_STREAM_START_TOKEN, @@ -286,7 +286,7 @@ yaml_stream_start_token(yaml_encoding_t encoding, */ YAML_DECLARE(yaml_token_t *) -yaml_stream_end_token(yaml_mark_t start_mark, yaml_mark_t end_mark) +yaml_stream_end_token_new(yaml_mark_t start_mark, yaml_mark_t end_mark) { yaml_token_t *token = yaml_token_new(YAML_STREAM_END_TOKEN, start_mark, end_mark); diff --git a/src/scanner.c b/src/scanner.c index 949be39d..e701aa52 100644 --- a/src/scanner.c +++ b/src/scanner.c @@ -982,10 +982,6 @@ yaml_parser_scan_block_scalar_breaks(yaml_parser_t *parser, int *indent, yaml_string_t *breaks, yaml_mark_t start_mark, yaml_mark_t *end_mark); -static int -yaml_parser_scan_block_scalar_indicators(yaml_parser_t *parser, - yaml_mark_t start_mark, int *chomping, int *increment); - static yaml_token_t * yaml_parser_scan_flow_scalar(yaml_parser_t *parser, int single); @@ -3249,8 +3245,8 @@ yaml_parser_scan_block_scalar(yaml_parser_t *parser, int literal) yaml_mark_t start_mark; yaml_mark_t end_mark; yaml_string_t string = yaml_parser_new_string(parser); - yaml_string_t line_break = yaml_parser_new_string(parser); - yaml_string_t breaks = yaml_parser_new_string(parser); + yaml_string_t leading_break = yaml_parser_new_string(parser); + yaml_string_t trailing_breaks = yaml_parser_new_string(parser); yaml_token_t *token = NULL; int chomping = 0; int increment = 0; @@ -3259,8 +3255,8 @@ yaml_parser_scan_block_scalar(yaml_parser_t *parser, int literal) int trailing_blank = 0; if (!string.buffer) goto error; - if (!line_break.buffer) goto error; - if (!breaks.buffer) goto error; + if (!leading_break.buffer) goto error; + if (!trailing_breaks.buffer) goto error; /* Eat the indicator '|' or '>'. */ @@ -3367,7 +3363,7 @@ yaml_parser_scan_block_scalar(yaml_parser_t *parser, int literal) /* Scan the leading line breaks and determine the indentation level if needed. */ - if (!yaml_parser_scan_block_scalar_breaks(parser, &indent, &breaks, + if (!yaml_parser_scan_block_scalar_breaks(parser, &indent, &trailing_breaks, start_mark, &end_mark)) goto error; /* Scan the block scalar content. */ @@ -3386,25 +3382,25 @@ yaml_parser_scan_block_scalar(yaml_parser_t *parser, int literal) /* Check if we need to fold the leading line break. */ - if (!literal && (*line_break.buffer == '\n') + if (!literal && (*leading_break.buffer == '\n') && !leading_blank && !trailing_blank) { /* Do we need to join the lines by space? */ - if (*breaks.buffer == '\0') { + if (*trailing_breaks.buffer == '\0') { if (!RESIZE(parser, string)) goto error; *(string.pointer ++) = ' '; } - yaml_parser_clear_string(parser, &line_break); + yaml_parser_clear_string(parser, &leading_break); } else { - if (!JOIN(parser, string, line_break)) goto error; + if (!JOIN(parser, string, leading_break)) goto error; } /* Append the remaining line breaks. */ - if (!JOIN(parser, string, breaks)) goto error; + if (!JOIN(parser, string, trailing_breaks)) goto error; /* Is it a leading whitespace? */ @@ -3422,21 +3418,21 @@ yaml_parser_scan_block_scalar(yaml_parser_t *parser, int literal) if (!UPDATE(parser, 2)) goto error; - COPY_LINE(parser, line_break); + COPY_LINE(parser, leading_break); /* Eat the following intendation spaces and line breaks. */ if (!yaml_parser_scan_block_scalar_breaks(parser, - &indent, &breaks, start_mark, &end_mark)) goto error; + &indent, &trailing_breaks, start_mark, &end_mark)) goto error; } /* Chomp the tail. */ if (chomping != -1) { - if (!JOIN(parser, string, line_break)) goto error; + if (!JOIN(parser, string, leading_break)) goto error; } if (chomping == 1) { - if (!JOIN(parser, string, breaks)) goto error; + if (!JOIN(parser, string, trailing_breaks)) goto error; } /* Create a token. */ @@ -3449,15 +3445,15 @@ yaml_parser_scan_block_scalar(yaml_parser_t *parser, int literal) return 0; } - yaml_free(line_break.buffer); - yaml_free(breaks.buffer); + yaml_free(leading_break.buffer); + yaml_free(trailing_breaks.buffer); return token; error: yaml_free(string.buffer); - yaml_free(line_break.buffer); - yaml_free(breaks.buffer); + yaml_free(leading_break.buffer); + yaml_free(trailing_breaks.buffer); return NULL; } @@ -3524,3 +3520,560 @@ yaml_parser_scan_block_scalar_breaks(yaml_parser_t *parser, return 1; } +/* + * Scan a quoted scalar. + */ + +static yaml_token_t * +yaml_parser_scan_flow_scalar(yaml_parser_t *parser, int single) +{ + yaml_mark_t start_mark; + yaml_mark_t end_mark; + yaml_string_t string = yaml_parser_new_string(parser); + yaml_string_t leading_break = yaml_parser_new_string(parser); + yaml_string_t trailing_breaks = yaml_parser_new_string(parser); + yaml_string_t whitespaces = yaml_parser_new_string(parser); + yaml_token_t *token = NULL; + int leading_blanks; + + if (!string.buffer) goto error; + if (!leading_break.buffer) goto error; + if (!trailing_breaks.buffer) goto error; + if (!whitespaces.buffer) goto error; + + /* Eat the left quote. */ + + start_mark = yaml_parser_get_mark(parser); + + FORWARD(parser); + + /* Consume the content of the quoted scalar. */ + + while (1) + { + /* Check that there are no document indicators at the beginning of the line. */ + + if (!UPDATE(parser, 4)) goto error; + + if (parser->column == 0 && + ((CHECK_AT(parser, '-', 0) && + CHECK_AT(parser, '-', 1) && + CHECK_AT(parser, '-', 2)) || + (CHECK_AT(parser, '.', 0) && + CHECK_AT(parser, '.', 1) && + CHECK_AT(parser, '.', 2))) && + IS_BLANKZ_AT(parser, 3)) + { + yaml_parser_set_scanner_error(parser, "while scanning a quoted scalar", + start_mark, "found unexpected document indicator"); + goto error; + } + + /* Check for EOF. */ + + if (IS_Z(parser)) { + yaml_parser_set_scanner_error(parser, "while scanning a quoted scalar", + start_mark, "found unexpected end of stream"); + goto error; + } + + /* Consume non-blank characters. */ + + if (!UPDATE(parser, 2)) goto error; + if (!RESIZE(parser, string)) goto error; + + leading_blanks = 0; + + while (!IS_BLANKZ(parser)) + { + /* Check for an escaped single quote. */ + + if (single && CHECK_AT(parser, '\'', 0) && CHECK_AT(parser, '\'', 1)) + { + *(string.pointer++) = '\''; + FORWARD(parser); + FORWARD(parser); + } + + /* Check for the right quote. */ + + else if (CHECK(parser, single ? '\'' : '"')) + { + break; + } + + /* Check for an escaped line break. */ + + else if (!single && CHECK(parser, '\\') && IS_BREAK_AT(parser, 1)) + { + if (!UPDATE(parser, 3)) goto error; + FORWARD(parser); + FORWARD_LINE(parser); + leading_blanks = 1; + break; + } + + /* Check for an escape sequence. */ + + else if (!single && CHECK(parser, '\\')) + { + int code_length = 0; + + /* Check the escape character. */ + + switch (parser->pointer[1]) + { + case '0': + *(string.pointer++) = '\0'; + break; + + case 'a': + *(string.pointer++) = '\x07'; + break; + + case 'b': + *(string.pointer++) = '\x08'; + break; + + case 't': + case '\t': + *(string.pointer++) = '\x09'; + break; + + case 'n': + *(string.pointer++) = '\x0A'; + break; + + case 'v': + *(string.pointer++) = '\x0B'; + break; + + case 'f': + *(string.pointer++) = '\x0C'; + break; + + case 'r': + *(string.pointer++) = '\x0D'; + break; + + case 'e': + *(string.pointer++) = '\x1B'; + break; + + case ' ': + *(string.pointer++) = '\x20'; + break; + + case '"': + *(string.pointer++) = '"'; + break; + + case '\'': + *(string.pointer++) = '\''; + break; + + case 'N': /* NEL (#x85) */ + *(string.pointer++) = '\xC2'; + *(string.pointer++) = '\x85'; + break; + + case '_': /* #xA0 */ + *(string.pointer++) = '\xC2'; + *(string.pointer++) = '\xA0'; + break; + + case 'L': /* LS (#x2028) */ + *(string.pointer++) = '\xE2'; + *(string.pointer++) = '\x80'; + *(string.pointer++) = '\xA8'; + break; + + case 'P': /* PS (#x2029) */ + *(string.pointer++) = '\xE2'; + *(string.pointer++) = '\x80'; + *(string.pointer++) = '\xA8'; + break; + + case 'x': + code_length = 2; + break; + + case 'u': + code_length = 4; + break; + + case 'U': + code_length = 8; + break; + + default: + yaml_parser_set_scanner_error(parser, "while parsing a quoted scalar", + start_mark, "found unknown escape character"); + goto error; + } + + FORWARD(parser); + FORWARD(parser); + + /* Consume an arbitrary escape code. */ + + if (code_length) + { + unsigned int value = 0; + int k; + + /* Scan the character value. */ + + if (!UPDATE(parser, code_length)) goto error; + + for (k = 0; k < code_length; k ++) { + if (!IS_HEX_AT(parser, k)) { + yaml_parser_set_scanner_error(parser, "while parsing a quoted scalar", + start_mark, "did not find expected hexdecimal number"); + goto error; + } + value = (value << 4) + AS_HEX_AT(parser, k); + } + + /* Check the value and write the character. */ + + if ((value >= 0xD800 && value <= 0xDFFF) || value > 0x10FFFF) { + yaml_parser_set_scanner_error(parser, "while parsing a quoted scalar", + start_mark, "found invalid Unicode character escape code"); + goto error; + } + + if (value <= 0x7F) { + *(string.pointer++) = value; + } + else if (value <= 0x7FF) { + *(string.pointer++) = 0xC0 + (value >> 6); + *(string.pointer++) = 0x80 + (value & 0x3F); + } + else if (value <= 0xFFFF) { + *(string.pointer++) = 0xE0 + (value >> 12); + *(string.pointer++) = 0x80 + ((value >> 6) & 0x3F); + *(string.pointer++) = 0x80 + (value & 0x3F); + } + else { + *(string.pointer++) = 0xF0 + (value >> 18); + *(string.pointer++) = 0x80 + ((value >> 12) & 0x3F); + *(string.pointer++) = 0x80 + ((value >> 6) & 0x3F); + *(string.pointer++) = 0x80 + (value & 0x3F); + } + + /* Advance the pointer. */ + + for (k = 0; k < code_length; k ++) { + FORWARD(parser); + } + } + } + + else + { + /* It is a non-escaped non-blank character. */ + + COPY(parser, string); + } + + if (!UPDATE(parser, 2)) goto error; + if (!RESIZE(parser, string)) goto error; + } + + /* Check if we are at the end of the scalar. */ + + if (CHECK(parser, single ? '\'' : '"')) + break; + + /* Consume blank characters. */ + + if (!UPDATE(parser, 1)) goto error; + + while (IS_BLANK(parser) || IS_BREAK(parser)) + { + if (IS_BLANK(parser)) + { + /* Consume a space or a tab character. */ + + if (!leading_blanks) { + if (!RESIZE(parser, whitespaces)) goto error; + COPY(parser, whitespaces); + } + } + else + { + if (!UPDATE(parser, 2)) goto error; + + /* Check if it is a first line break. */ + + if (!leading_blanks) + { + yaml_parser_clear_string(parser, &whitespaces); + COPY_LINE(parser, leading_break); + leading_blanks = 1; + } + else + { + if (!RESIZE(parser, trailing_breaks)) goto error; + COPY_LINE(parser, trailing_breaks); + } + } + if (!UPDATE(parser, 1)) goto error; + } + + /* Join the whitespaces or fold line breaks. */ + + if (!RESIZE(parser, string)) goto error; + + if (leading_blanks) + { + /* Do we need to fold line breaks? */ + + if (leading_break.buffer[0] == '\n') { + if (trailing_breaks.buffer[0] == '\0') { + *(string.pointer++) = ' '; + } + else { + if (!JOIN(parser, string, trailing_breaks)) goto error; + } + yaml_parser_clear_string(parser, &leading_break); + } + else { + if (!JOIN(parser, string, leading_break)) goto error; + if (!JOIN(parser, string, trailing_breaks)) goto error; + } + } + else + { + if (!JOIN(parser, string, whitespaces)) goto error; + } + } + + /* Eat the right quote. */ + + FORWARD(parser); + + end_mark = yaml_parser_get_mark(parser); + + /* Create a token. */ + + token = yaml_scalar_token_new(string.buffer, string.pointer-string.buffer, + single ? YAML_SINGLE_QUOTED_SCALAR_STYLE : YAML_DOUBLE_QUOTED_SCALAR_STYLE, + start_mark, end_mark); + if (!token) { + parser->error = YAML_MEMORY_ERROR; + return 0; + } + + yaml_free(leading_break.buffer); + yaml_free(trailing_breaks.buffer); + yaml_free(whitespaces.buffer); + + return token; + +error: + yaml_free(string.buffer); + yaml_free(leading_break.buffer); + yaml_free(trailing_breaks.buffer); + yaml_free(whitespaces.buffer); + + return NULL; +} + +/* + * Scan a plain scalar. + */ + +static yaml_token_t * +yaml_parser_scan_plain_scalar(yaml_parser_t *parser) +{ + yaml_mark_t start_mark; + yaml_mark_t end_mark; + yaml_string_t string = yaml_parser_new_string(parser); + yaml_string_t leading_break = yaml_parser_new_string(parser); + yaml_string_t trailing_breaks = yaml_parser_new_string(parser); + yaml_string_t whitespaces = yaml_parser_new_string(parser); + yaml_token_t *token = NULL; + int leading_blanks = 0; + int indent = parser->indent+1; + + if (!string.buffer) goto error; + if (!leading_break.buffer) goto error; + if (!trailing_breaks.buffer) goto error; + if (!whitespaces.buffer) goto error; + + start_mark = yaml_parser_get_mark(parser); + + /* Consume the content of the plain scalar. */ + + while (1) + { + /* Check for a document indicator. */ + + if (!UPDATE(parser, 4)) goto error; + + if (parser->column == 0 && + ((CHECK_AT(parser, '-', 0) && + CHECK_AT(parser, '-', 1) && + CHECK_AT(parser, '-', 2)) || + (CHECK_AT(parser, '.', 0) && + CHECK_AT(parser, '.', 1) && + CHECK_AT(parser, '.', 2))) && + IS_BLANKZ_AT(parser, 3)) break; + + /* Check for a comment. */ + + if (CHECK(parser, '#')) + break; + + /* Consume non-blank characters. */ + + while (!IS_BLANKZ(parser)) + { + /* Check for 'x:x' in the flow context. */ + + if (parser->flow_level && CHECK(parser, ':') && !IS_BLANKZ_AT(parser, 1)) { + yaml_parser_set_scanner_error(parser, "while scanning a plain scalar", + start_mark, "found unexpected ':'"); + goto error; + } + + /* Check for indicators that may end a plain scalar. */ + + if ((CHECK(parser, ':') && IS_BLANKZ_AT(parser, 1)) || + (parser->flow_level && + (CHECK(parser, ',') || CHECK(parser, ':') || + CHECK(parser, '?') || CHECK(parser, '[') || + CHECK(parser, ']') || CHECK(parser, '{') || + CHECK(parser, '}')))) + break; + + /* Check if we need to join whitespaces and breaks. */ + + if (leading_blanks || whitespaces.buffer != whitespaces.pointer) + { + if (!RESIZE(parser, string)) goto error; + + if (leading_blanks) + { + /* Do we need to fold line breaks? */ + + if (leading_break.buffer[0] == '\n') { + if (trailing_breaks.buffer[0] == '\0') { + *(string.pointer++) = ' '; + } + else { + if (!JOIN(parser, string, trailing_breaks)) goto error; + } + yaml_parser_clear_string(parser, &leading_break); + } + else { + if (!JOIN(parser, string, leading_break)) goto error; + if (!JOIN(parser, string, trailing_breaks)) goto error; + } + + leading_blanks = 0; + } + else + { + if (!JOIN(parser, string, whitespaces)) goto error; + } + } + + /* Copy the character. */ + + if (!RESIZE(parser, string)) goto error; + + COPY(parser, string); + + end_mark = yaml_parser_get_mark(parser); + + if (!UPDATE(parser, 2)) goto error; + } + + /* Is it the end? */ + + if (!(IS_BLANK(parser) || IS_BREAK(parser))) + break; + + /* Consume blank characters. */ + + if (!UPDATE(parser, 1)) goto error; + + while (IS_BLANK(parser) || IS_BREAK(parser)) + { + if (IS_BLANK(parser)) + { + /* Check for tab character that abuse intendation. */ + + if (leading_blanks && parser->column < indent && IS_TAB(parser)) { + yaml_parser_set_scanner_error(parser, "while scanning a plain scalar", + start_mark, "found a tab character that violate intendation"); + break; + } + + /* Consume a space or a tab character. */ + + if (!leading_blanks) { + if (!RESIZE(parser, whitespaces)) goto error; + COPY(parser, whitespaces); + } + } + else + { + if (!UPDATE(parser, 2)) goto error; + + /* Check if it is a first line break. */ + + if (!leading_blanks) + { + yaml_parser_clear_string(parser, &whitespaces); + COPY_LINE(parser, leading_break); + leading_blanks = 1; + } + else + { + if (!RESIZE(parser, trailing_breaks)) goto error; + COPY_LINE(parser, trailing_breaks); + } + } + if (!UPDATE(parser, 1)) goto error; + } + + /* Check intendation level. */ + + if (parser->column < indent) + break; + } + + /* Create a token. */ + + token = yaml_scalar_token_new(string.buffer, string.pointer-string.buffer, + YAML_PLAIN_SCALAR_STYLE, start_mark, end_mark); + if (!token) { + parser->error = YAML_MEMORY_ERROR; + return 0; + } + + /* Note that we change the 'simple_key_allowed' flag. */ + + if (leading_blanks) { + parser->simple_key_allowed = 1; + } + + yaml_free(leading_break.buffer); + yaml_free(trailing_breaks.buffer); + yaml_free(whitespaces.buffer); + + return token; + +error: + yaml_free(string.buffer); + yaml_free(leading_break.buffer); + yaml_free(trailing_breaks.buffer); + yaml_free(whitespaces.buffer); + + return NULL; +} + From 3b8e342aaf4df9308d6b1f1ddd092f3bf8fff4d6 Mon Sep 17 00:00:00 2001 From: Kirill Simonov Date: Mon, 19 Jun 2006 20:27:22 +0000 Subject: [PATCH 13/73] Fix numerous bugs in the Scanner. --- src/scanner.c | 92 +++++++++++++++++++++++++++++++++++++-------------- 1 file changed, 67 insertions(+), 25 deletions(-) diff --git a/src/scanner.c b/src/scanner.c index e701aa52..8313acd2 100644 --- a/src/scanner.c +++ b/src/scanner.c @@ -1011,11 +1011,13 @@ yaml_parser_get_token(yaml_parser_t *parser) /* Move the queue head. */ parser->tokens[parser->tokens_head++] = NULL; - if (parser->tokens_head == parser->tokens_size) - parser->tokens_head = 0; parser->tokens_parsed++; + if (token->type == YAML_STREAM_END_TOKEN) { + parser->stream_end_produced = 1; + } + return token; } @@ -1080,7 +1082,7 @@ yaml_parser_resize_string(yaml_parser_t *parser, yaml_string_t *string) memset(new_buffer+string->size, 0, string->size); - string->pointer = new_buffer + (string->buffer-string->pointer); + string->pointer = new_buffer + (string->pointer-string->buffer); string->buffer = new_buffer; string->size *= 2; @@ -1103,6 +1105,7 @@ yaml_parser_join_string(yaml_parser_t *parser, } memcpy(string1->pointer, string2->buffer, string2->pointer-string2->buffer); + string1->pointer += string2->pointer-string2->buffer; return 1; } @@ -1138,7 +1141,7 @@ yaml_parser_resize_list(yaml_parser_t *parser, void **buffer, size_t *size, return 0; } - memset(new_buffer+(*size), 0, item_size*(*size)); + memset(new_buffer+item_size*(*size), 0, item_size*(*size)); *buffer = new_buffer; *size *= 2; @@ -1159,6 +1162,8 @@ yaml_parser_set_scanner_error(yaml_parser_t *parser, const char *context, parser->context_mark = context_mark; parser->problem = problem; parser->problem_mark = yaml_parser_get_mark(parser); + + return 0; } /* @@ -1205,6 +1210,9 @@ yaml_parser_fetch_more_tokens(yaml_parser_t *parser) { /* Check if any potential simple key may occupy the head position. */ + if (!yaml_parser_stale_simple_keys(parser)) + return 0; + for (k = 0; k <= parser->flow_level; k++) { yaml_simple_key_t *simple_key = parser->simple_keys[k]; if (simple_key @@ -1251,6 +1259,11 @@ yaml_parser_fetch_next_token(yaml_parser_t *parser) if (!yaml_parser_scan_to_next_token(parser)) return 0; + /* Remove obsolete potential simple keys. */ + + if (!yaml_parser_stale_simple_keys(parser)) + return 0; + /* Check the indentation level against the current column. */ if (!yaml_parser_unroll_indent(parser, parser->column)) @@ -1330,12 +1343,12 @@ yaml_parser_fetch_next_token(yaml_parser_t *parser) /* Is it the key indicator? */ - if (CHECK(parser, '?') && (!parser->flow_level || IS_BLANKZ_AT(parser, 1))) + if (CHECK(parser, '?') && (parser->flow_level || IS_BLANKZ_AT(parser, 1))) return yaml_parser_fetch_key(parser); /* Is it the value indicator? */ - if (CHECK(parser, ':') && (!parser->flow_level || IS_BLANKZ_AT(parser, 1))) + if (CHECK(parser, ':') && (parser->flow_level || IS_BLANKZ_AT(parser, 1))) return yaml_parser_fetch_value(parser); /* Is it an alias? */ @@ -1382,7 +1395,8 @@ yaml_parser_fetch_next_token(yaml_parser_t *parser) * '#', '&', '*', '!', '|', '>', '\'', '\"', * '%', '@', '`'. * - * In the block context, it may also start with the characters + * In the block context (and, for the '-' indicator, in the flow context + * too), it may also start with the characters * * '-', '?', ':' * @@ -1398,9 +1412,9 @@ yaml_parser_fetch_next_token(yaml_parser_t *parser) || CHECK(parser, '!') || CHECK(parser, '|') || CHECK(parser, '>') || CHECK(parser, '\'') || CHECK(parser, '"') || CHECK(parser, '%') || CHECK(parser, '@') || CHECK(parser, '`')) || + (CHECK(parser, '-') && !IS_BLANK_AT(parser, 1)) || (!parser->flow_level && - (CHECK(parser, '-') || CHECK(parser, '?') || CHECK(parser, ':')) && - IS_BLANKZ_AT(parser, 1))) + (CHECK(parser, '?') || CHECK(parser, ':')) && !IS_BLANKZ_AT(parser, 1))) return yaml_parser_fetch_plain_scalar(parser); /* @@ -1435,7 +1449,7 @@ yaml_parser_stale_simple_keys(yaml_parser_t *parser) */ if (simple_key && (simple_key->line < parser->line || - simple_key->index < parser->index+1024)) { + simple_key->index+1024 < parser->index)) { /* Check if the potential simple key to be removed is required. */ @@ -1789,9 +1803,12 @@ yaml_parser_fetch_stream_end(yaml_parser_t *parser) if (!yaml_parser_unroll_indent(parser, -1)) return 0; - /* We have finished. */ + /* Reset simple keys. */ - parser->stream_end_produced = 1; + if (!yaml_parser_remove_simple_key(parser)) + return 0; + + parser->simple_key_allowed = 0; /* Create the STREAM-END token. */ @@ -2204,7 +2221,7 @@ yaml_parser_fetch_value(yaml_parser_t *parser) /* In the block context, we may need to add the BLOCK-MAPPING-START token. */ - if (!yaml_parser_roll_indent(parser, parser->column, + if (!yaml_parser_roll_indent(parser, simple_key->column, simple_key->token_number, YAML_BLOCK_MAPPING_START_TOKEN, simple_key->mark)) return 0; @@ -2989,6 +3006,16 @@ yaml_parser_scan_tag(yaml_parser_t *parser) if (!handle) goto error; handle[0] = '!'; handle[1] = '\0'; + + /* + * A special case: the '!' tag. + */ + + if (suffix[0] == '\0') { + yaml_char_t *tmp = handle; + handle = suffix; + suffix = tmp; + } } } @@ -3068,12 +3095,13 @@ yaml_parser_scan_tag_handle(yaml_parser_t *parser, int directive, else { /* - * It's not really a tag handle. If it's a %TAG directive, it's an - * error. If it's a tag token, it must be a part of URI. + * It's either the '!' tag or not really a tag handle. If it's a %TAG + * directive, it's an error. If it's a tag token, it must be a part of + * URI. */ - if (directive) { - yaml_parser_set_scanner_error(parser, "while parsing a directive", + if (directive && !(string.buffer[0] == '!' && string.buffer[1] == '\0')) { + yaml_parser_set_scanner_error(parser, "while parsing a tag directive", start_mark, "did not find expected '!'"); goto error; } @@ -3107,11 +3135,15 @@ yaml_parser_scan_tag_uri(yaml_parser_t *parser, int directive, if (!yaml_parser_resize_string(parser, &string)) goto error; } - /* Copy the head if needed. */ + /* + * Copy the head if needed. + * + * Note that we don't copy the leading '!' character. + */ - if (length) { - memcpy(string.buffer, head, length); - string.pointer += length; + if (length > 1) { + memcpy(string.buffer, head+1, length-1); + string.pointer += length-1; } /* Scan the tag. */ @@ -3672,6 +3704,10 @@ yaml_parser_scan_flow_scalar(yaml_parser_t *parser, int single) *(string.pointer++) = '\''; break; + case '\\': + *(string.pointer++) = '\\'; + break; + case 'N': /* NEL (#x85) */ *(string.pointer++) = '\xC2'; *(string.pointer++) = '\x85'; @@ -3691,7 +3727,7 @@ yaml_parser_scan_flow_scalar(yaml_parser_t *parser, int single) case 'P': /* PS (#x2029) */ *(string.pointer++) = '\xE2'; *(string.pointer++) = '\x80'; - *(string.pointer++) = '\xA8'; + *(string.pointer++) = '\xA9'; break; case 'x': @@ -3800,6 +3836,9 @@ yaml_parser_scan_flow_scalar(yaml_parser_t *parser, int single) if (!RESIZE(parser, whitespaces)) goto error; COPY(parser, whitespaces); } + else { + FORWARD(parser); + } } else { @@ -3931,7 +3970,7 @@ yaml_parser_scan_plain_scalar(yaml_parser_t *parser) while (!IS_BLANKZ(parser)) { - /* Check for 'x:x' in the flow context. */ + /* Check for 'x:x' in the flow context. TODO: Fix the test "spec-08-13". */ if (parser->flow_level && CHECK(parser, ':') && !IS_BLANKZ_AT(parser, 1)) { yaml_parser_set_scanner_error(parser, "while scanning a plain scalar", @@ -4010,7 +4049,7 @@ yaml_parser_scan_plain_scalar(yaml_parser_t *parser) if (leading_blanks && parser->column < indent && IS_TAB(parser)) { yaml_parser_set_scanner_error(parser, "while scanning a plain scalar", start_mark, "found a tab character that violate intendation"); - break; + goto error; } /* Consume a space or a tab character. */ @@ -4019,6 +4058,9 @@ yaml_parser_scan_plain_scalar(yaml_parser_t *parser) if (!RESIZE(parser, whitespaces)) goto error; COPY(parser, whitespaces); } + else { + FORWARD(parser); + } } else { @@ -4043,7 +4085,7 @@ yaml_parser_scan_plain_scalar(yaml_parser_t *parser) /* Check intendation level. */ - if (parser->column < indent) + if (!parser->flow_level && parser->column < indent) break; } From 201292cb69d318aeff3333d2eda2b7e37720b9c9 Mon Sep 17 00:00:00 2001 From: Kirill Simonov Date: Mon, 3 Jul 2006 13:34:57 +0000 Subject: [PATCH 14/73] Add event constructors and destructors. --- include/yaml/yaml.h | 277 ++++++++++++++++++++++++++++++++++++++++---- src/api.c | 259 ++++++++++++++++++++++++++++++++++++++++- src/reader.c | 2 +- 3 files changed, 511 insertions(+), 27 deletions(-) diff --git a/include/yaml/yaml.h b/include/yaml/yaml.h index 6acbb545..7ca4a9bb 100644 --- a/include/yaml/yaml.h +++ b/include/yaml/yaml.h @@ -77,6 +77,22 @@ yaml_get_version(int *major, int *minor, int *patch); /** The character type (UTF-8 octet). */ typedef unsigned char yaml_char_t; +/** The version directive data. */ +typedef struct { + /** The major version number. */ + int major; + /** The minor version number. */ + int minor; +} yaml_version_directive_t; + +/** The tag directive data. */ +typedef struct { + /** The tag handle. */ + yaml_char_t *handle; + /** The tag prefix. */ + yaml_char_t *prefix; +} yaml_tag_directive_t; + /** The stream encoding. */ typedef enum { YAML_ANY_ENCODING, @@ -194,11 +210,23 @@ typedef struct { /** The token data. */ union { - /** The stream encoding (for @c YAML_STREAM_START_TOKEN). */ - yaml_encoding_t encoding; + /** The stream start (for @c YAML_STREAM_START_TOKEN). */ + struct { + /** The stream encoding. */ + yaml_encoding_t encoding; + } stream_start; + + /** The alias (for @c YAML_ALIAS_TOKEN). */ + struct { + /** The alias value. */ + yaml_char_t *value; + } alias; - /** The anchor (for @c YAML_ALIAS_TOKEN and @c YAML_ANCHOR_TOKEN). */ - yaml_char_t *anchor; + /** The anchor (for @c YAML_ANCHOR_TOKEN). */ + struct { + /** The anchor value. */ + yaml_char_t *value; + } anchor; /** The tag (for @c YAML_TAG_TOKEN). */ struct { @@ -419,8 +447,12 @@ yaml_token_delete(yaml_token_t *token); /** @} */ -/* +/** + * @defgroup events Events + * @{ + */ +/** Event types. */ typedef enum { YAML_STREAM_START_EVENT, YAML_STREAM_END_EVENT, @@ -438,57 +470,258 @@ typedef enum { YAML_MAPPING_END_EVENT } yaml_event_type_t; +/** The event structure. */ typedef struct { + + /** The event type. */ yaml_event_type_t type; + + /** The event data. */ union { + + /** The stream parameters (for @c YAML_STREAM_START_EVENT). */ struct { + /** The document encoding. */ yaml_encoding_t encoding; } stream_start; + + /** The document parameters (for @c YAML_DOCUMENT_START_EVENT). */ struct { - struct { - int major; - int minor; - } version; - struct { - char *handle; - char *prefix; - } **tag_pairs; + /** The version directive. */ + yaml_version_directive_t *version_directive; + /** The list of tag directives. */ + yaml_tag_directive_t **tag_directives; + /** Is the document indicator implicit? */ int implicit; } document_start; + + /** The document end parameters (for @c YAML_DOCUMENT_END_EVENT). */ struct { + /** Is the document end indicator implicit? */ int implicit; } document_end; + + /** The alias parameters (for @c YAML_ALIAS_EVENT). */ struct { - char *anchor; + /** The anchor. */ + yaml_char_t *anchor; } alias; + + /** The scalar parameters (for @c YAML_SCALAR_EVENT). */ struct { - char *anchor; - char *tag; - char *value; + /** The anchor. */ + yaml_char_t *anchor; + /** The tag. */ + yaml_char_t *tag; + /** The scalar value. */ + yaml_char_t *value; + /** The length of the scalar value. */ size_t length; + /** Is the tag optional for the plain style? */ int plain_implicit; + /** Is the tag optional for any non-plain style? */ int quoted_implicit; + /** The scalar style. */ yaml_scalar_style_t style; } scalar; + + /** The sequence parameters (for @c YAML_SEQUENCE_START_EVENT). */ struct { - char *anchor; - char *tag; + /** The anchor. */ + yaml_char_t *anchor; + /** The tag. */ + yaml_char_t *tag; + /** Is the tag optional? */ int implicit; + /** The sequence style. */ yaml_sequence_style_t style; } sequence_start; + + /** The mapping parameters (for @c YAML_MAPPING_START_EVENT). */ struct { - char *anchor; - char *tag; + /** The anchor. */ + yaml_char_t *anchor; + /** The tag. */ + yaml_char_t *tag; + /** Is the tag optional? */ int implicit; + /** The mapping style. */ yaml_mapping_style_t style; } mapping_start; + } data; + + /** The beginning of the token. */ yaml_mark_t start_mark; + + /** The end of the token. */ yaml_mark_t end_mark; } yaml_event_t; -*/ +/** + * Create a new @c YAML_STREAM_START_EVENT event. + * + * @param[in] encoding The stream encoding. + * @param[in] start_mark The beginning of the event. + * @param[in] end_mark The end of the event. + * + * @returns A new event object, or @c NULL on error. + */ + +YAML_DECLARE(yaml_event_t *) +yaml_stream_start_event_new(yaml_encoding_t encoding, + yaml_mark_t start_mark, yaml_mark_t end_mark); + +/** + * Create a new @c YAML_STREAM_END_TOKEN event. + * + * @param[in] start_mark The beginning of the event. + * @param[in] end_mark The end of the event. + * + * @returns A new event object, or @c NULL on error. + */ + +YAML_DECLARE(yaml_event_t *) +yaml_stream_end_event_new(yaml_mark_t start_mark, yaml_mark_t end_mark); + +/** + * Create a new @c YAML_DOCUMENT_START_EVENT event. + * + * @param[in] version_directive The version directive or @c NULL. + * @param[in] tag_directives A list of tag directives or @c NULL. + * @param[in] implicit Is the document indicator present? + * @param[in] start_mark The beginning of the event. + * @param[in] end_mark The end of the event. + * + * @returns A new event object, or @c NULL on error. + */ + +YAML_DECLARE(yaml_event_t *) +yaml_document_start_event_new(yaml_version_directive_t *version_directive, + yaml_tag_directive_t **tag_directives, int implicit, + yaml_mark_t start_mark, yaml_mark_t end_mark); +/** + * Create a new @c YAML_DOCUMENT_END_EVENT event. + * + * @param[in] implicit Is the document end indicator present? + * @param[in] start_mark The beginning of the event. + * @param[in] end_mark The end of the event. + * + * @returns A new event object, or @c NULL on error. + */ + +YAML_DECLARE(yaml_event_t *) +yaml_document_end_event_new(int implicit, + yaml_mark_t start_mark, yaml_mark_t end_mark); + +/** + * Create a new @c YAML_ALIAS_EVENT event. + * + * @param[in] anchor The anchor value. + * @param[in] start_mark The beginning of the event. + * @param[in] end_mark The end of the event. + * + * @returns A new event object, or @c NULL on error. + */ + +YAML_DECLARE(yaml_event_t *) +yaml_alias_event_new(yaml_char_t *anchor, + yaml_mark_t start_mark, yaml_mark_t end_mark); + +/** + * Create a new @c YAML_SCALAR_EVENT event. + * + * @param[in] anchor The anchor value or @c NULL. + * @param[in] tag The tag value or @c NULL. + * @param[in] value The scalar value. + * @param[in] length The length of the scalar value. + * @param[in] plain_implicit Is the tag optional for the plain style? + * @param[in] quoted_implicit Is the tag optional for any non-plain style? + * @param[in] style The scalar style. + * @param[in] start_mark The beginning of the event. + * @param[in] end_mark The end of the event. + * + * @returns A new event object, or @c NULL on error. + */ + +YAML_DECLARE(yaml_event_t *) +yaml_scalar_event_new(yaml_char_t *anchor, yaml_char_t *tag, + yaml_char_t *value, size_t length, + int plain_implicit, int quoted_implicit, + yaml_scalar_style_t style, + yaml_mark_t start_mark, yaml_mark_t end_mark); + +/** + * Create a new @c YAML_SEQUENCE_START_EVENT event. + * + * @param[in] anchor The anchor value or @c NULL. + * @param[in] tag The tag value or @c NULL. + * @param[in] implicit Is the tag optional? + * @param[in] style The sequence style. + * @param[in] start_mark The beginning of the event. + * @param[in] end_mark The end of the event. + * + * @returns A new event object, or @c NULL on error. + */ + +YAML_DECLARE(yaml_event_t *) +yaml_sequence_start_new(yaml_char_t *anchor, yaml_char_t *tag, + int implicit, yaml_sequence_style_t style, + yaml_mark_t start_mark, yaml_mark_t end_mark); + +/** + * Create a new @c YAML_SEQUENCE_END_EVENT event. + * + * @param[in] start_mark The beginning of the event. + * @param[in] end_mark The end of the event. + * + * @returns A new event object, or @c NULL on error. + */ + +YAML_DECLARE(yaml_event_t *) +yaml_sequence_end_new(yaml_mark_t start_mark, yaml_mark_t end_mark); + +/** + * Create a new @c YAML_MAPPING_START_EVENT event. + * + * @param[in] anchor The anchor value or @c NULL. + * @param[in] tag The tag value or @c NULL. + * @param[in] implicit Is the tag optional? + * @param[in] style The mapping style. + * @param[in] start_mark The beginning of the event. + * @param[in] end_mark The end of the event. + * + * @returns A new event object, or @c NULL on error. + */ + +YAML_DECLARE(yaml_event_t *) +yaml_mapping_start_new(yaml_char_t *anchor, yaml_char_t *tag, + int implicit, yaml_mapping_style_t style, + yaml_mark_t start_mark, yaml_mark_t end_mark); + +/** + * Create a new @c YAML_MAPPING_END_EVENT event. + * + * @param[in] start_mark The beginning of the event. + * @param[in] end_mark The end of the event. + * + * @returns A new event object, or @c NULL on error. + */ + +YAML_DECLARE(yaml_event_t *) +yaml_mapping_end_new(yaml_mark_t start_mark, yaml_mark_t end_mark); + +/** + * Destroy an event object. + * + * @param[in] event An event object. + */ + +YAML_DECLARE(void) +yaml_event_delete(yaml_event_t *event); + +/** @} */ /** * @defgroup parser Parser Definitions diff --git a/src/api.c b/src/api.c index 2af17c36..88e03e76 100644 --- a/src/api.c +++ b/src/api.c @@ -276,7 +276,7 @@ yaml_stream_start_token_new(yaml_encoding_t encoding, if (!token) return NULL; - token->data.encoding = encoding; + token->data.stream_start.encoding = encoding; return token; } @@ -347,7 +347,7 @@ yaml_alias_token_new(yaml_char_t *anchor, if (!token) return NULL; - token->data.anchor = anchor; + token->data.alias.value = anchor; return token; } @@ -365,7 +365,7 @@ yaml_anchor_token_new(yaml_char_t *anchor, if (!token) return NULL; - token->data.anchor = anchor; + token->data.anchor.value = anchor; return token; } @@ -427,8 +427,11 @@ yaml_token_delete(yaml_token_t *token) break; case YAML_ALIAS_TOKEN: + yaml_free(token->data.alias.value); + break; + case YAML_ANCHOR_TOKEN: - yaml_free(token->data.anchor); + yaml_free(token->data.anchor.value); break; case YAML_TAG_TOKEN: @@ -446,3 +449,251 @@ yaml_token_delete(yaml_token_t *token) yaml_free(token); } +/* + * Create an event. + */ + +static yaml_event_t * +yaml_event_new(yaml_event_type_t type, + yaml_mark_t start_mark, yaml_mark_t end_mark) +{ + yaml_event_t *event = yaml_malloc(sizeof(yaml_event_t)); + + if (!event) return NULL; + + memset(event, 0, sizeof(yaml_event_t)); + + event->type = type; + event->start_mark = start_mark; + event->end_mark = end_mark; + + return event; +} + +/* + * Create a STREAM-START event. + */ + +YAML_DECLARE(yaml_event_t *) +yaml_stream_start_event_new(yaml_encoding_t encoding, + yaml_mark_t start_mark, yaml_mark_t end_mark) +{ + yaml_event_t *event = yaml_event_new(YAML_STREAM_START_EVENT, + start_mark, end_mark); + + if (!event) return NULL; + + event->data.stream_start.encoding = encoding; + + return event; +} + +/* + * Create a STREAM-END event. + */ + +YAML_DECLARE(yaml_event_t *) +yaml_stream_end_event_new(yaml_mark_t start_mark, yaml_mark_t end_mark) +{ + return yaml_event_new(YAML_STREAM_END_EVENT, start_mark, end_mark); +} + +/* + * Create a DOCUMENT-START event. + */ + +YAML_DECLARE(yaml_event_t *) +yaml_document_start_event_new(yaml_version_directive_t *version_directive, + yaml_tag_directive_t **tag_directives, int implicit, + yaml_mark_t start_mark, yaml_mark_t end_mark) +{ + yaml_event_t *event = yaml_event_new(YAML_DOCUMENT_START_EVENT, + start_mark, end_mark); + + if (!event) return NULL; + + event->data.document_start.version_directive = version_directive; + event->data.document_start.tag_directives = tag_directives; + event->data.document_start.implicit = implicit; + + return event; +} + +/* + * Create a DOCUMENT-END event. + */ + +YAML_DECLARE(yaml_event_t *) +yaml_document_end_event_new(int implicit, + yaml_mark_t start_mark, yaml_mark_t end_mark) +{ + yaml_event_t *event = yaml_event_new(YAML_DOCUMENT_END_EVENT, + start_mark, end_mark); + + if (!event) return NULL; + + event->data.document_end.implicit = implicit; + + return event; +} + +/* + * Create an ALIAS event. + */ + +YAML_DECLARE(yaml_event_t *) +yaml_alias_event_new(yaml_char_t *anchor, + yaml_mark_t start_mark, yaml_mark_t end_mark) +{ + yaml_event_t *event = yaml_event_new(YAML_ALIAS_EVENT, + start_mark, end_mark); + + if (!event) return NULL; + + event->data.alias.anchor = anchor; + + return event; +} + +/* + * Create a SCALAR event. + */ + +YAML_DECLARE(yaml_event_t *) +yaml_scalar_event_new(yaml_char_t *anchor, yaml_char_t *tag, + yaml_char_t *value, size_t length, + int plain_implicit, int quoted_implicit, + yaml_scalar_style_t style, + yaml_mark_t start_mark, yaml_mark_t end_mark) +{ + yaml_event_t *event = yaml_event_new(YAML_SCALAR_EVENT, + start_mark, end_mark); + + if (!event) return NULL; + + event->data.scalar.anchor = anchor; + event->data.scalar.tag = tag; + event->data.scalar.value = value; + event->data.scalar.length = length; + event->data.scalar.plain_implicit = plain_implicit; + event->data.scalar.quoted_implicit = quoted_implicit; + event->data.scalar.style = style; + + return event; +} + +/* + * Create a SEQUENCE-START event. + */ + +YAML_DECLARE(yaml_event_t *) +yaml_sequence_start_new(yaml_char_t *anchor, yaml_char_t *tag, + int implicit, yaml_sequence_style_t style, + yaml_mark_t start_mark, yaml_mark_t end_mark) +{ + yaml_event_t *event = yaml_event_new(YAML_SEQUENCE_START_EVENT, + start_mark, end_mark); + + if (!event) return NULL; + + event->data.sequence_start.anchor = anchor; + event->data.sequence_start.tag = tag; + event->data.sequence_start.implicit = implicit; + event->data.sequence_start.style = style; + + return event; +} + +/* + * Create a SEQUENCE-END event. + */ + +YAML_DECLARE(yaml_event_t *) +yaml_sequence_end_event_new(yaml_mark_t start_mark, yaml_mark_t end_mark) +{ + return yaml_event_new(YAML_SEQUENCE_END_EVENT, start_mark, end_mark); +} + +/* + * Create a MAPPING-START event. + */ + +YAML_DECLARE(yaml_event_t *) +yaml_mapping_start_new(yaml_char_t *anchor, yaml_char_t *tag, + int implicit, yaml_mapping_style_t style, + yaml_mark_t start_mark, yaml_mark_t end_mark) +{ + yaml_event_t *event = yaml_event_new(YAML_MAPPING_START_EVENT, + start_mark, end_mark); + + if (!event) return NULL; + + event->data.mapping_start.anchor = anchor; + event->data.mapping_start.tag = tag; + event->data.mapping_start.implicit = implicit; + event->data.mapping_start.style = style; + + return event; +} + +/* + * Create a MAPPING-END event. + */ + +YAML_DECLARE(yaml_event_t *) +yaml_mapping_end_event_new(yaml_mark_t start_mark, yaml_mark_t end_mark) +{ + return yaml_event_new(YAML_MAPPING_END_EVENT, start_mark, end_mark); +} + +/* + * Destroy an event object. + */ + +YAML_DECLARE(void) +yaml_event_delete(yaml_event_t *event) +{ + assert(event); /* Non-NULL event object expected. */ + + switch (event->type) + { + case YAML_DOCUMENT_START_EVENT: + yaml_free(event->data.document_start.version_directive); + if (event->data.document_start.tag_directives) { + yaml_tag_directive_t **tag_directive; + for (tag_directive = event->data.document_start.tag_directives; + *tag_directive; tag_directive++) { + yaml_free((*tag_directive)->handle); + yaml_free((*tag_directive)->prefix); + yaml_free(*tag_directive); + } + yaml_free(event->data.document_start.tag_directives); + } + break; + + case YAML_ALIAS_EVENT: + yaml_free(event->data.alias.anchor); + break; + + case YAML_SCALAR_EVENT: + yaml_free(event->data.scalar.anchor); + yaml_free(event->data.scalar.tag); + yaml_free(event->data.scalar.value); + break; + + case YAML_SEQUENCE_START_EVENT: + yaml_free(event->data.sequence_start.anchor); + yaml_free(event->data.sequence_start.tag); + break; + + case YAML_MAPPING_START_EVENT: + yaml_free(event->data.mapping_start.anchor); + yaml_free(event->data.mapping_start.tag); + break; + } + + memset(event, 0, sizeof(yaml_event_t)); + + yaml_free(event); +} + diff --git a/src/reader.c b/src/reader.c index e4e6f1a9..946d298d 100644 --- a/src/reader.c +++ b/src/reader.c @@ -148,7 +148,7 @@ yaml_parser_update_buffer(yaml_parser_t *parser, size_t length) size_t size = parser->buffer_end - parser->pointer; memmove(parser->buffer, parser->pointer, size); parser->pointer = parser->buffer; - parser->buffer_end -= size; + parser->buffer_end = parser->buffer + size; } else if (parser->pointer == parser->buffer_end) { parser->pointer = parser->buffer; From 9c9b6fb000a00b8ad63e461f14aa4f81eb0e74c1 Mon Sep 17 00:00:00 2001 From: Kirill Simonov Date: Mon, 3 Jul 2006 13:49:14 +0000 Subject: [PATCH 15/73] Move yaml/yaml.h to yaml.h and merge version.c to api.c. --- include/Makefile.am | 2 +- include/{yaml => }/yaml.h | 0 include/yaml/yaml_error.h | 25 ------------------------- include/yaml/yaml_version.h | 33 --------------------------------- src/Makefile.am | 2 +- src/api.c | 16 +++++++++++++++- src/reader.c | 2 +- src/scanner.c | 2 +- src/version.c | 21 --------------------- tests/test-reader.c | 2 +- tests/test-version.c | 2 +- 11 files changed, 21 insertions(+), 86 deletions(-) rename include/{yaml => }/yaml.h (100%) delete mode 100644 include/yaml/yaml_error.h delete mode 100644 include/yaml/yaml_version.h delete mode 100644 src/version.c diff --git a/include/Makefile.am b/include/Makefile.am index 5c5f7dd6..3c7323c4 100644 --- a/include/Makefile.am +++ b/include/Makefile.am @@ -1,4 +1,4 @@ -INCLUDES = yaml/yaml.h #yaml/yaml_version.h yaml/yaml_error.h +INCLUDES = yaml.h DOXYGEN_CFG = $(top_srcdir)/doc/doxygen.cfg nobase_include_HEADERS = $(INCLUDES) diff --git a/include/yaml/yaml.h b/include/yaml.h similarity index 100% rename from include/yaml/yaml.h rename to include/yaml.h diff --git a/include/yaml/yaml_error.h b/include/yaml/yaml_error.h deleted file mode 100644 index df0ca7dd..00000000 --- a/include/yaml/yaml_error.h +++ /dev/null @@ -1,25 +0,0 @@ -#ifndef YAML_ERROR_H -#define YAML_ERROR_H - -#ifdef __cplusplus -extern "C" { -#endif - -typedef enum { - YAML_NO_ERROR, - - YAML_MEMORY_ERROR, - - YAML_READER_ERROR, - YAML_SCANNER_ERROR, - YAML_PARSER_ERROR, - - YAML_WRITER_ERROR, - YAML_EMITTER_ERROR -} yaml_error_type_t; - -#ifdef __cplusplus -} -#endif - -#endif /* #ifndef YAML_ERROR_H */ diff --git a/include/yaml/yaml_version.h b/include/yaml/yaml_version.h deleted file mode 100644 index 9718db24..00000000 --- a/include/yaml/yaml_version.h +++ /dev/null @@ -1,33 +0,0 @@ -/* - * @file yaml_version.h - * @brief Version information. - * - * Do not include yaml_version.h directly. - */ - -#ifndef YAML_VERSION_H -#define YAML_VERSION_H - -#ifdef __cplusplus -extern "C" { -#endif - -/* - * @brief Get the library version. - */ - -const char * -yaml_get_version_string(void); - -/* - * @brief Get the library version numbers. - */ - -void -yaml_get_version(int *major, int *minor, int *patch); - -#ifdef __cplusplus -} -#endif - -#endif /* #ifndef YAML_VERSION_H */ diff --git a/src/Makefile.am b/src/Makefile.am index d30f3dd1..c8797dc0 100644 --- a/src/Makefile.am +++ b/src/Makefile.am @@ -1,4 +1,4 @@ AM_CPPFLAGS = -I$(top_srcdir)/include lib_LTLIBRARIES = libyaml.la -libyaml_la_SOURCES = version.c api.c reader.c scanner.c +libyaml_la_SOURCES = api.c reader.c scanner.c libyaml_la_LDFLAGS = -release $(YAML_LT_RELEASE) -version-info $(YAML_LT_CURRENT):$(YAML_LT_REVISION):$(YAML_LT_AGE) diff --git a/src/api.c b/src/api.c index 88e03e76..30477536 100644 --- a/src/api.c +++ b/src/api.c @@ -3,10 +3,24 @@ #include #endif -#include +#include #include +YAML_DECLARE(const char *) +yaml_get_version_string(void) +{ + return YAML_VERSION_STRING; +} + +YAML_DECLARE(void) +yaml_get_version(int *major, int *minor, int *patch) +{ + *major = YAML_VERSION_MAJOR; + *minor = YAML_VERSION_MINOR; + *patch = YAML_VERSION_PATCH; +} + /* * Allocate a dynamic memory block. */ diff --git a/src/reader.c b/src/reader.c index 946d298d..9cc8e7bb 100644 --- a/src/reader.c +++ b/src/reader.c @@ -3,7 +3,7 @@ #include #endif -#include +#include #include diff --git a/src/scanner.c b/src/scanner.c index 8313acd2..45af0c19 100644 --- a/src/scanner.c +++ b/src/scanner.c @@ -479,7 +479,7 @@ #include #endif -#include +#include #include diff --git a/src/version.c b/src/version.c deleted file mode 100644 index b7f5904f..00000000 --- a/src/version.c +++ /dev/null @@ -1,21 +0,0 @@ - -#if HAVE_CONFIG_H -#include -#endif - -#include - -const char * -yaml_get_version_string(void) -{ - return YAML_VERSION_STRING; -} - -void -yaml_get_version(int *major, int *minor, int *patch) -{ - *major = YAML_VERSION_MAJOR; - *minor = YAML_VERSION_MINOR; - *patch = YAML_VERSION_PATCH; -} - diff --git a/tests/test-reader.c b/tests/test-reader.c index 53e4e7a0..c5ce2790 100644 --- a/tests/test-reader.c +++ b/tests/test-reader.c @@ -1,4 +1,4 @@ -#include +#include #include #include diff --git a/tests/test-version.c b/tests/test-version.c index 578d6780..ab4f93c2 100644 --- a/tests/test-version.c +++ b/tests/test-version.c @@ -1,4 +1,4 @@ -#include +#include #include #include From b6a55737d8063ef3e31a4fa0cf8d9a151a4a95c6 Mon Sep 17 00:00:00 2001 From: Kirill Simonov Date: Tue, 4 Jul 2006 19:39:56 +0000 Subject: [PATCH 16/73] Start working on the parser. --- include/yaml.h | 103 +++++++++++++++++++++ src/Makefile.am | 2 +- src/api.c | 24 +++++ src/parser.c | 237 ++++++++++++++++++++++++++++++++++++++++++++++++ 4 files changed, 365 insertions(+), 1 deletion(-) create mode 100644 src/parser.c diff --git a/include/yaml.h b/include/yaml.h index 7ca4a9bb..0a2bab07 100644 --- a/include/yaml.h +++ b/include/yaml.h @@ -789,6 +789,36 @@ typedef struct { yaml_mark_t mark; } yaml_simple_key_t; +/** + * The states of the parser. + */ +typedef enum { + YAML_PARSE_END_STATE, + YAML_PARSE_STREAM_START_STATE, + YAML_PARSE_IMPLICIT_DOCUMENT_START_STATE, + YAML_PARSE_DOCUMENT_START_STATE, + YAML_PARSE_DOCUMENT_CONTENT_STATE, + YAML_PARSE_DOCUMENT_END_STATE, + YAML_PARSE_BLOCK_NODE_STATE, + YAML_PARSE_BLOCK_NODE_OR_INDENTLESS_SEQUENCE_STATE, + YAML_PARSE_FLOW_NODE_STATE, + YAML_PARSE_BLOCK_SEQUENCE_FIRST_ENTRY_STATE, + YAML_PARSE_BLOCK_SEQUENCE_ENTRY_STATE, + YAML_PARSE_INDENTLESS_SEQUENCE_ENTRY_STATE, + YAML_PARSE_BLOCK_MAPPING_FIRST_KEY_STATE, + YAML_PARSE_BLOCK_MAPPING_KEY_STATE, + YAML_PARSE_BLOCK_MAPPING_VALUE_STATE, + YAML_PARSE_FLOW_SEQUENCE_FIRST_ENTRY_STATE, + YAML_PARSE_FLOW_SEQUENCE_ENTRY_STATE, + YAML_PARSE_FLOW_SEQUENCE_ENTRY_MAPPING_KEY_STATE, + YAML_PARSE_FLOW_SEQUENCE_ENTRY_MAPPING_VALUE_STATE, + YAML_PARSE_FLOW_SEQUENCE_ENTRY_MAPPING_END_STATE, + YAML_PARSE_FLOW_MAPPING_FIRST_KEY_STATE, + YAML_PARSE_FLOW_MAPPING_KEY_STATE, + YAML_PARSE_FLOW_MAPPING_VALUE_STATE, + YAML_PARSE_FLOW_MAPPING_EMPTY_VALUE_STATE +} yaml_parser_state_t; + /** * The parser structure. * @@ -939,6 +969,51 @@ typedef struct { * @} */ + /** + * @name Parser stuff + * @{ + */ + + /** The parser states stack. */ + yaml_parser_state_t *states; + + /** The size of the parser states stack. */ + size_t states_size; + + /** The number of items in the parser states stack. */ + size_t states_length; + + /** The current parser state. */ + yaml_parser_state_t state; + + /** The stack of marks. */ + yaml_mark_t *marks; + + /** The size of the marks stack. */ + size_t marks_size; + + /** The number of items in the marks stack. */ + size_t marks_length; + + /** The current event. */ + yaml_event_t *current_event; + + /** The YAML version directive. */ + yaml_version_directive_t *version_directive; + + /** The list of TAG directives. */ + yaml_tag_directive_t **tag_directives; + + /** The size of the TAG directives list. */ + size_t tag_directives_size; + + /** The number of items in the TAG directives list. */ + size_t tag_directives_length; + + /** + * @} + */ + } yaml_parser_t; /** @@ -1044,6 +1119,34 @@ yaml_parser_get_token(yaml_parser_t *parser); YAML_DECLARE(yaml_token_t *) yaml_parser_peek_token(yaml_parser_t *parser); +/** + * Get the next event. + * + * The application is responsible for destroing the event object. + * + * @param[in] parser A parser object. + * + * @returns An event object, or @c NULL on error. + */ + +YAML_DECLARE(yaml_event_t *) +yaml_parser_get_event(yaml_parser_t *parser); + +/** + * Peek the next event. + * + * The event will be returned again on a subsequent call of + * @c yaml_parser_get_event or @c yaml_parser_peek_event. The application + * should not destroy the event object. + * + * @param[in] parser A parser object. + * + * @returns An event object, or @c NULL on error. + */ + +YAML_DECLARE(yaml_event_t *) +yaml_parser_peek_event(yaml_parser_t *parser); + /** @} */ /* diff --git a/src/Makefile.am b/src/Makefile.am index c8797dc0..c7f6da2a 100644 --- a/src/Makefile.am +++ b/src/Makefile.am @@ -1,4 +1,4 @@ AM_CPPFLAGS = -I$(top_srcdir)/include lib_LTLIBRARIES = libyaml.la -libyaml_la_SOURCES = api.c reader.c scanner.c +libyaml_la_SOURCES = api.c reader.c scanner.c parser.c libyaml_la_LDFLAGS = -release $(YAML_LT_RELEASE) -version-info $(YAML_LT_CURRENT):$(YAML_LT_REVISION):$(YAML_LT_AGE) diff --git a/src/api.c b/src/api.c index 30477536..51a1b31f 100644 --- a/src/api.c +++ b/src/api.c @@ -114,6 +114,26 @@ yaml_parser_new(void) parser->simple_keys_size = YAML_DEFAULT_SIZE; + /* Allocate the stack of parser states. */ + + parser->states = yaml_malloc(YAML_DEFAULT_SIZE*sizeof(yaml_parser_state_t)); + if (!parser->states) goto error; + memset(parser->states, 0, YAML_DEFAULT_SIZE*sizeof(yaml_parser_state_t)); + + parser->states_size = YAML_DEFAULT_SIZE; + + /* Set the initial state. */ + + parser->state = YAML_PARSE_STREAM_START_STATE; + + /* Allocate the list of TAG directives. */ + + parser->tag_directives = yaml_malloc(YAML_DEFAULT_SIZE*sizeof(yaml_tag_directive_t *)); + if (!parser->tag_directives) goto error; + memset(parser->tag_directives, 0, YAML_DEFAULT_SIZE*sizeof(yaml_tag_directive_t *)); + + parser->tag_directives_size = YAML_DEFAULT_SIZE; + /* Done. */ return parser; @@ -124,6 +144,8 @@ yaml_parser_new(void) if (!parser) return NULL; + yaml_free(parser->tag_directives); + yaml_free(parser->states); yaml_free(parser->simple_keys); yaml_free(parser->indents); yaml_free(parser->tokens); @@ -144,6 +166,8 @@ yaml_parser_delete(yaml_parser_t *parser) { assert(parser); /* Non-NULL parser object expected. */ + yaml_free(parser->tag_directives); + yaml_free(parser->states); yaml_free(parser->simple_keys); yaml_free(parser->indents); yaml_free(parser->tokens); diff --git a/src/parser.c b/src/parser.c new file mode 100644 index 00000000..6928193c --- /dev/null +++ b/src/parser.c @@ -0,0 +1,237 @@ + +/* + * The parser implements the following grammar: + * + * stream ::= STREAM-START implicit_document? explicit_document* STREAM-END + * implicit_document ::= block_node DOCUMENT-END* + * explicit_document ::= DIRECTIVE* DOCUMENT-START block_node? DOCUMENT-END* + * block_node_or_indentless_sequence ::= + * ALIAS + * | properties (block_content | indentless_block_sequence)? + * | block_content + * | indentless_block_sequence + * block_node ::= ALIAS + * | properties block_content? + * | block_content + * flow_node ::= ALIAS + * | properties flow_content? + * | flow_content + * properties ::= TAG ANCHOR? | ANCHOR TAG? + * block_content ::= block_collection | flow_collection | SCALAR + * flow_content ::= flow_collection | SCALAR + * block_collection ::= block_sequence | block_mapping + * flow_collection ::= flow_sequence | flow_mapping + * block_sequence ::= BLOCK-SEQUENCE-START (BLOCK-ENTRY block_node?)* BLOCK-END + * indentless_sequence ::= (BLOCK-ENTRY block_node?)+ + * block_mapping ::= BLOCK-MAPPING_START + * ((KEY block_node_or_indentless_sequence?)? + * (VALUE block_node_or_indentless_sequence?)?)* + * BLOCK-END + * flow_sequence ::= FLOW-SEQUENCE-START + * (flow_sequence_entry FLOW-ENTRY)* + * flow_sequence_entry? + * FLOW-SEQUENCE-END + * flow_sequence_entry ::= flow_node | KEY flow_node? (VALUE flow_node?)? + * flow_mapping ::= FLOW-MAPPING-START + * (flow_mapping_entry FLOW-ENTRY)* + * flow_mapping_entry? + * FLOW-MAPPING-END + * flow_mapping_entry ::= flow_node | KEY flow_node? (VALUE flow_node?)? + */ + +#if HAVE_CONFIG_H +#include +#endif + +#include + +#include + +/* + * Public API declarations. + */ + +YAML_DECLARE(yaml_event_t *) +yaml_parser_get_event(yaml_parser_t *parser); + +YAML_DECLARE(yaml_event_t *) +yaml_parser_peek_event(yaml_parser_t *parser); + +/* + * State functions. + */ + +static yaml_event_t * +yaml_parser_state_machine(yaml_parser_t *parser); + +static yaml_event_t * +yaml_parser_parse_stream_start(yaml_parser_t *parser); + +static yaml_event_t * +yaml_parser_parse_document_start(yaml_parser_t *parser, int implicit); + +static yaml_event_t * +yaml_parser_parse_document_content(yaml_parser_t *parser); + +static yaml_event_t * +yaml_parser_parse_document_end(yaml_parser_t *parser); + +static yaml_event_t * +yaml_parser_parse_node(yaml_parser_t *parser, + int block, int indentless_sequence); + +static yaml_event_t * +yaml_parser_parse_block_sequence_entry(yaml_parser_t *parser, int first); + +static yaml_event_t * +yaml_parser_parse_indentless_sequence_entry(yaml_parser_t *parser); + +static yaml_event_t * +yaml_parser_parse_block_mapping_key(yaml_parser_t *parser, int first); + +static yaml_event_t * +yaml_parser_parse_block_mapping_value(yaml_parser_t *parser); + +static yaml_event_t * +yaml_parser_parse_flow_sequence_entry(yaml_parser_t *parser, int first); + +static yaml_event_t * +yaml_parser_parse_flow_sequence_entry_mapping_key(yaml_parser_t *parser); + +static yaml_event_t * +yaml_parser_parse_flow_sequence_entry_mapping_value(yaml_parser_t *parser); + +static yaml_event_t * +yaml_parser_parse_flow_sequence_entry_mapping_end(yaml_parser_t *parser); + +static yaml_event_t * +yaml_parser_parse_flow_mapping_key(yaml_parser_t *parser, int first); + +static yaml_event_t * +yaml_parser_parse_flow_mapping_value(yaml_parser_t *parser, int empty); + +/* + * Get the next event and advance the parser. + */ + +YAML_DECLARE(yaml_event_t *) +yaml_parser_get_event(yaml_parser_t *parser) +{ + yaml_event_t *value; + + /* Update the current event if needed. */ + + if (!parser->current_event) { + parser->current_event = yaml_parser_state_machine(parser); + } + + /* Return and clear the current event. */ + + value = parser->current_event; + parser->current_event = NULL; + return value; +} + +/* + * Peek the next event. + */ + +YAML_DECLARE(yaml_event_t *) +yaml_parser_peek_event(yaml_parser_t *parser) +{ + yaml_event_t *value; + + /* Update the current event if needed. */ + + if (!parser->current_event) { + parser->current_event = yaml_parser_state_machine(parser); + } + + /* Return the current event. */ + + return parser->current_event; +} + +/* + * State dispatcher. + */ + +static yaml_event_t * +yaml_parser_state_machine(yaml_parser_t *parser) +{ + assert (parser->state != YAML_PARSE_END_STATE); + + switch (parser->state) + { + case YAML_PARSE_STREAM_START_STATE: + return yaml_parser_parse_stream_start(parser); + + case YAML_PARSE_IMPLICIT_DOCUMENT_START_STATE: + return yaml_parser_parse_document_start(parser, 1); + + case YAML_PARSE_DOCUMENT_START_STATE: + return yaml_parser_parse_document_start(parser, 0); + + case YAML_PARSE_DOCUMENT_CONTENT_STATE: + return yaml_parser_parse_document_content(parser); + + case YAML_PARSE_DOCUMENT_END_STATE: + return yaml_parser_parse_document_end(parser); + + case YAML_PARSE_BLOCK_NODE_STATE: + return yaml_parser_parse_node(parser, 1, 0); + + case YAML_PARSE_BLOCK_NODE_OR_INDENTLESS_SEQUENCE_STATE: + return yaml_parser_parse_node(parser, 1, 1); + + case YAML_PARSE_FLOW_NODE_STATE: + return yaml_parser_parse_node(parser, 0, 0); + + case YAML_PARSE_BLOCK_SEQUENCE_FIRST_ENTRY_STATE: + return yaml_parser_parse_block_sequence_entry(parser, 1); + + case YAML_PARSE_BLOCK_SEQUENCE_ENTRY_STATE: + return yaml_parser_parse_block_sequence_entry(parser, 0); + + case YAML_PARSE_INDENTLESS_SEQUENCE_ENTRY_STATE: + return yaml_parser_parse_indentless_sequence_entry(parser); + + case YAML_PARSE_BLOCK_MAPPING_FIRST_KEY_STATE: + return yaml_parser_parse_block_mapping_key(parser, 1); + + case YAML_PARSE_BLOCK_MAPPING_KEY_STATE: + return yaml_parser_parse_block_mapping_key(parser, 0); + + case YAML_PARSE_BLOCK_MAPPING_VALUE_STATE: + return yaml_parser_parse_block_mapping_value(parser); + + case YAML_PARSE_FLOW_SEQUENCE_FIRST_ENTRY_STATE: + return yaml_parser_parse_flow_sequence_entry(parser, 1); + + case YAML_PARSE_FLOW_SEQUENCE_ENTRY_STATE: + return yaml_parser_parse_flow_sequence_entry(parser, 0); + + case YAML_PARSE_FLOW_SEQUENCE_ENTRY_MAPPING_KEY_STATE: + return yaml_parser_parse_flow_sequence_entry_mapping_key(parser); + + case YAML_PARSE_FLOW_SEQUENCE_ENTRY_MAPPING_VALUE_STATE: + return yaml_parser_parse_flow_sequence_entry_mapping_value(parser); + + case YAML_PARSE_FLOW_SEQUENCE_ENTRY_MAPPING_END_STATE: + return yaml_parser_parse_flow_sequence_entry_mapping_end(parser); + + case YAML_PARSE_FLOW_MAPPING_FIRST_KEY_STATE: + return yaml_parser_parse_flow_mapping_key(parser, 1); + + case YAML_PARSE_FLOW_MAPPING_KEY_STATE: + return yaml_parser_parse_flow_mapping_key(parser, 0); + + case YAML_PARSE_FLOW_MAPPING_VALUE_STATE: + return yaml_parser_parse_flow_mapping_value(parser, 0); + + case YAML_PARSE_FLOW_MAPPING_EMPTY_VALUE_STATE: + return yaml_parser_parse_flow_mapping_value(parser, 1); + } + assert(1); +} + From 3113e5361cbbd92e85cd609c0fb49ade2bc342e9 Mon Sep 17 00:00:00 2001 From: Kirill Simonov Date: Wed, 5 Jul 2006 19:58:30 +0000 Subject: [PATCH 17/73] Implement half of the parsers. --- include/yaml.h | 8 +- src/api.c | 4 +- src/parser.c | 766 +++++++++++++++++++++++++++++++++++++++++++++++++ 3 files changed, 772 insertions(+), 6 deletions(-) diff --git a/include/yaml.h b/include/yaml.h index 0a2bab07..f195bf42 100644 --- a/include/yaml.h +++ b/include/yaml.h @@ -666,7 +666,7 @@ yaml_scalar_event_new(yaml_char_t *anchor, yaml_char_t *tag, */ YAML_DECLARE(yaml_event_t *) -yaml_sequence_start_new(yaml_char_t *anchor, yaml_char_t *tag, +yaml_sequence_start_event_new(yaml_char_t *anchor, yaml_char_t *tag, int implicit, yaml_sequence_style_t style, yaml_mark_t start_mark, yaml_mark_t end_mark); @@ -680,7 +680,7 @@ yaml_sequence_start_new(yaml_char_t *anchor, yaml_char_t *tag, */ YAML_DECLARE(yaml_event_t *) -yaml_sequence_end_new(yaml_mark_t start_mark, yaml_mark_t end_mark); +yaml_sequence_end_event_new(yaml_mark_t start_mark, yaml_mark_t end_mark); /** * Create a new @c YAML_MAPPING_START_EVENT event. @@ -696,7 +696,7 @@ yaml_sequence_end_new(yaml_mark_t start_mark, yaml_mark_t end_mark); */ YAML_DECLARE(yaml_event_t *) -yaml_mapping_start_new(yaml_char_t *anchor, yaml_char_t *tag, +yaml_mapping_start_event_new(yaml_char_t *anchor, yaml_char_t *tag, int implicit, yaml_mapping_style_t style, yaml_mark_t start_mark, yaml_mark_t end_mark); @@ -710,7 +710,7 @@ yaml_mapping_start_new(yaml_char_t *anchor, yaml_char_t *tag, */ YAML_DECLARE(yaml_event_t *) -yaml_mapping_end_new(yaml_mark_t start_mark, yaml_mark_t end_mark); +yaml_mapping_end_event_new(yaml_mark_t start_mark, yaml_mark_t end_mark); /** * Destroy an event object. diff --git a/src/api.c b/src/api.c index 51a1b31f..81d515b8 100644 --- a/src/api.c +++ b/src/api.c @@ -625,7 +625,7 @@ yaml_scalar_event_new(yaml_char_t *anchor, yaml_char_t *tag, */ YAML_DECLARE(yaml_event_t *) -yaml_sequence_start_new(yaml_char_t *anchor, yaml_char_t *tag, +yaml_sequence_start_event_new(yaml_char_t *anchor, yaml_char_t *tag, int implicit, yaml_sequence_style_t style, yaml_mark_t start_mark, yaml_mark_t end_mark) { @@ -657,7 +657,7 @@ yaml_sequence_end_event_new(yaml_mark_t start_mark, yaml_mark_t end_mark) */ YAML_DECLARE(yaml_event_t *) -yaml_mapping_start_new(yaml_char_t *anchor, yaml_char_t *tag, +yaml_mapping_start_event_new(yaml_char_t *anchor, yaml_char_t *tag, int implicit, yaml_mapping_style_t style, yaml_mark_t start_mark, yaml_mark_t end_mark) { diff --git a/src/parser.c b/src/parser.c index 6928193c..03a9ebb9 100644 --- a/src/parser.c +++ b/src/parser.c @@ -57,6 +57,33 @@ yaml_parser_get_event(yaml_parser_t *parser); YAML_DECLARE(yaml_event_t *) yaml_parser_peek_event(yaml_parser_t *parser); +/* + * Error handling. + */ + +static int +yaml_parser_set_parser_error(yaml_parser_t *parser, + const char *problem, yaml_mark_t problem_mark); + +static int +yaml_parser_set_parser_error_context(yaml_parser_t *parser, + const char *context, yaml_mark_t context_mark, + const char *problem, yaml_mark_t problem_mark); + +/* + * Buffers and lists. + */ + +static int +yaml_parser_resize_list(yaml_parser_t *parser, void **buffer, size_t *size, + size_t item_size); + +static int +yaml_parser_append_state(yaml_parser_t *parser, yaml_parser_state_t state); + +static int +yaml_parser_append_mark(yaml_parser_t *parser, yaml_mark_t mark); + /* * State functions. */ @@ -110,6 +137,16 @@ yaml_parser_parse_flow_mapping_key(yaml_parser_t *parser, int first); static yaml_event_t * yaml_parser_parse_flow_mapping_value(yaml_parser_t *parser, int empty); +/* + * Utility functions. + */ + +static yaml_event_t * +yaml_parser_process_empty_scalar(yaml_parser_t *parser, yaml_mark_t mark); + +static int +yaml_parser_process_directives(yaml_parser_t *parser); + /* * Get the next event and advance the parser. */ @@ -152,6 +189,69 @@ yaml_parser_peek_event(yaml_parser_t *parser) return parser->current_event; } +/* + * Set parser error. + */ + +static int +yaml_parser_set_parser_error(yaml_parser_t *parser, + const char *problem, yaml_mark_t problem_mark) +{ + parser->error = YAML_PARSER_ERROR; + parser->problem = problem; + parser->problem_mark = problem_mark; + + return 0; +} + +static int +yaml_parser_set_parser_error_context(yaml_parser_t *parser, + const char *context, yaml_mark_t context_mark, + const char *problem, yaml_mark_t problem_mark) +{ + parser->error = YAML_PARSER_ERROR; + parser->context = context; + parser->context_mark = context_mark; + parser->problem = problem; + parser->problem_mark = problem_mark; + + return 0; +} + +/* + * Push a state to the state stack. + */ + +static int +yaml_parser_append_state(yaml_parser_t *parser, yaml_parser_state_t state) +{ + if (parser->states_length == parser->states_size-1) { + if (!yaml_parser_resize_list(parser, (void **)&parser->states, + &parser->states_size, sizeof(yaml_parser_state_t))) + return 0; + } + + parser->states[parser->states_length++] = state; + return 1; +} + +/* + * Push a mark to the mark stack. + */ + +static int +yaml_parser_append_mark(yaml_parser_t *parser, yaml_mark_t mark) +{ + if (parser->marks_length == parser->marks_size-1) { + if (!yaml_parser_resize_list(parser, (void **)&parser->marks, + &parser->marks_size, sizeof(yaml_mark_t))) + return 0; + } + + parser->marks[parser->marks_length++] = mark; + return 1; +} + /* * State dispatcher. */ @@ -235,3 +335,669 @@ yaml_parser_state_machine(yaml_parser_t *parser) assert(1); } +/* + * Parse the production: + * stream ::= STREAM-START implicit_document? explicit_document* STREAM-END + * ************ + */ + +static yaml_event_t * +yaml_parser_parse_stream_start(yaml_parser_t *parser) +{ + yaml_token_t *token; + yaml_event_t *event; + + token = yaml_parser_get_token(parser); + if (!token) return NULL; + + assert(token->type == YAML_STREAM_START_TOKEN); + + event = yaml_stream_start_event_new(token->data.stream_start.encoding, + token->start_mark, token->start_mark); + yaml_token_delete(token); + if (!event) { + parser->error = YAML_MEMORY_ERROR; + return NULL; + } + + parser->state = YAML_PARSE_IMPLICIT_DOCUMENT_START_STATE; + + return event; +} + +/* + * Parse the productions: + * implicit_document ::= block_node DOCUMENT-END* + * * + * explicit_document ::= DIRECTIVE* DOCUMENT-START block_node? DOCUMENT-END* + * ************************* + */ + +static yaml_event_t * +yaml_parser_parse_document_start(yaml_parser_t *parser, int implicit) +{ + yaml_token_t *token; + yaml_event_t *event; + + token = yaml_parser_peek_token(parser); + if (!token) return NULL; + + /* Parse an implicit document. */ + + if (implicit && token->type != YAML_VERSION_DIRECTIVE_TOKEN && + token->type != YAML_TAG_DIRECTIVE_TOKEN && + token->type != YAML_DOCUMENT_START_TOKEN && + token->type != YAML_STREAM_END_TOKEN) + { + if (!yaml_parser_process_directives(parser)) return NULL; + if (!yaml_parser_append_state(parser, YAML_PARSE_DOCUMENT_END_STATE)) + return NULL; + parser->state = YAML_PARSE_BLOCK_NODE_STATE; + event = yaml_document_start_event_new( + parser->version_directive, parser->tag_directives, 1, + token->start_mark, token->start_mark); + if (!event) return NULL; + return event; + } + + /* Parse an explicit document. */ + + else if (token->type != YAML_STREAM_END_TOKEN) + { + yaml_mark_t start_mark, end_mark; + start_mark = token->start_mark; + if (!yaml_parser_process_directives(parser)) return NULL; + token = yaml_parser_peek_token(parser); + if (!token) return NULL; + if (token->type != YAML_DOCUMENT_START_TOKEN) { + yaml_parser_set_parser_error(parser, + "did not found expected ", token->start_mark); + return NULL; + } + token = yaml_parser_get_token(parser); + end_mark = token->end_mark; + yaml_token_delete(token); + if (!yaml_parser_append_state(parser, YAML_PARSE_DOCUMENT_END_STATE)) + return NULL; + parser->state = YAML_PARSE_DOCUMENT_CONTENT_STATE; + event = yaml_document_start_event_new( + parser->version_directive, parser->tag_directives, 0, + start_mark, end_mark); + if (!event) return NULL; + return event; + } + + /* Parse the stream end. */ + + else + { + token = yaml_parser_get_token(parser); + parser->state = YAML_PARSE_END_STATE; + event = yaml_stream_end_event_new(token->start_mark, token->end_mark); + yaml_token_delete(token); + return event; + } +} + +/* + * Parse the productions: + * explicit_document ::= DIRECTIVE* DOCUMENT-START block_node? DOCUMENT-END* + * *********** + */ + +static yaml_event_t * +yaml_parser_parse_document_content(yaml_parser_t *parser) +{ + yaml_token_t *token; + + token = yaml_parser_peek_token(parser); + if (!token) return NULL; + + if (token->type == YAML_VERSION_DIRECTIVE_TOKEN || + token->type == YAML_TAG_DIRECTIVE_TOKEN || + token->type == YAML_DOCUMENT_START_TOKEN || + token->type == YAML_DOCUMENT_END_TOKEN || + token->type == YAML_STREAM_END_TOKEN) { + parser->state = parser->states[--parser->states_length]; + return yaml_parser_process_empty_scalar(parser, token->start_mark); + } + else { + return yaml_parser_parse_node(parser, 1, 0); + } +} + +/* + * Parse the productions: + * implicit_document ::= block_node DOCUMENT-END* + * ************* + * explicit_document ::= DIRECTIVE* DOCUMENT-START block_node? DOCUMENT-END* + * ************* + */ + +static yaml_event_t * +yaml_parser_parse_document_end(yaml_parser_t *parser) +{ + yaml_token_t *token; + yaml_event_t *event; + yaml_mark_t start_mark, end_mark; + int implicit = 1; + + token = yaml_parser_peek_token(parser); + if (!token) return NULL; + + start_mark = end_mark = token->start_mark; + + while (token->type == YAML_DOCUMENT_END_TOKEN) { + end_mark = token->end_mark; + yaml_token_delete(yaml_parser_get_token(parser)); + token = yaml_parser_peek_token(parser); + if (!token) return NULL; + implicit = 0; + } + + event = yaml_document_end_event_new(implicit, start_mark, end_mark); + if (!event) { + parser->error = YAML_MEMORY_ERROR; + return NULL; + } + return event; +} + +/* + * Parse the productions: + * block_node_or_indentless_sequence ::= + * ALIAS + * ***** + * | properties (block_content | indentless_block_sequence)? + * ********** * + * | block_content | indentless_block_sequence + * * + * block_node ::= ALIAS + * ***** + * | properties block_content? + * ********** * + * | block_content + * * + * flow_node ::= ALIAS + * ***** + * | properties flow_content? + * ********** * + * | flow_content + * * + * properties ::= TAG ANCHOR? | ANCHOR TAG? + * ************************* + * block_content ::= block_collection | flow_collection | SCALAR + * ****** + * flow_content ::= flow_collection | SCALAR + * ****** + */ + +static yaml_event_t * +yaml_parser_parse_node(yaml_parser_t *parser, + int block, int indentless_sequence) +{ + yaml_token_t *token; + yaml_event_t *event; + yaml_char_t *anchor = NULL; + yaml_char_t *tag_handle = NULL; + yaml_char_t *tag_suffix = NULL; + yaml_char_t *tag = NULL; + yaml_mark_t start_mark, end_mark, tag_mark; + int implicit; + + token = yaml_parser_peek_token(parser); + if (!token) return NULL; + + if (token->type == YAML_ALIAS_TOKEN) + { + token = yaml_parser_get_token(parser); + event = yaml_alias_event_new(token->data.alias.value, + token->start_mark, token->end_mark); + if (!event) { + yaml_token_delete(token); + parser->error = YAML_MEMORY_ERROR; + return NULL; + } + yaml_free(token); + return event; + } + + else + { + start_mark = end_mark = token->start_mark; + + if (token->type == YAML_ANCHOR_TOKEN) + { + token = yaml_parser_get_token(parser); + anchor = token->data.anchor.value; + start_mark = token->start_mark; + end_mark = token->end_mark; + yaml_free(token); + token = yaml_parser_peek_token(parser); + if (!token) goto error; + if (token->type == YAML_TAG_TOKEN) + { + token = yaml_parser_get_token(parser); + tag_handle = token->data.tag.handle; + tag_suffix = token->data.tag.suffix; + tag_mark = token->start_mark; + end_mark = token->end_mark; + yaml_free(token); + token = yaml_parser_peek_token(parser); + if (!token) goto error; + } + } + else if (token->type == YAML_TAG_TOKEN) + { + token = yaml_parser_get_token(parser); + tag_handle = token->data.tag.handle; + tag_suffix = token->data.tag.suffix; + start_mark = tag_mark = token->start_mark; + end_mark = token->end_mark; + yaml_free(token); + token = yaml_parser_peek_token(parser); + if (!token) goto error; + if (token->type == YAML_ANCHOR_TOKEN) + { + token = yaml_parser_get_token(parser); + anchor = token->data.anchor.value; + end_mark = token->end_mark; + yaml_free(token); + token = yaml_parser_peek_token(parser); + if (!token) goto error; + } + } + + if (tag_handle) { + if (!*tag_handle) { + tag = tag_suffix; + yaml_free(tag_handle); + tag_handle = tag_suffix = NULL; + } + else { + yaml_tag_directive_t **tag_directive = parser->tag_directives; + for (tag_directive = parser->tag_directives; + *tag_directive; tag_directive++) { + if (strcmp((char *)(*tag_directive)->handle, (char *)tag_handle) == 0) { + size_t prefix_len = strlen((char *)(*tag_directive)->prefix); + size_t suffix_len = strlen((char *)tag_suffix); + tag = yaml_malloc(prefix_len+suffix_len+1); + if (!tag) { + parser->error = YAML_MEMORY_ERROR; + goto error; + } + memcpy(tag, (*tag_directive)->handle, prefix_len); + memcpy(tag+prefix_len, tag_suffix, suffix_len); + tag[prefix_len+suffix_len] = '\0'; + yaml_free(tag_handle); + yaml_free(tag_suffix); + tag_handle = tag_suffix = NULL; + break; + } + } + if (*tag_directive) { + yaml_parser_set_parser_error_context(parser, + "while parsing a node", start_mark, + "found undefined tag handle", tag_mark); + goto error; + } + } + } + + implicit = (!tag || !*tag); + if (indentless_sequence && token->type == YAML_BLOCK_ENTRY_TOKEN) { + end_mark = token->end_mark; + parser->state = YAML_PARSE_INDENTLESS_SEQUENCE_ENTRY_STATE; + event = yaml_sequence_start_event_new(anchor, tag, implicit, + YAML_BLOCK_SEQUENCE_STYLE, start_mark, end_mark); + if (!event) goto error; + } + else { + if (token->type == YAML_SCALAR_TOKEN) { + int plain_implicit = 0; + int quoted_implicit = 0; + token = yaml_parser_get_token(parser); + end_mark = token->end_mark; + if ((token->data.scalar.style == YAML_PLAIN_SCALAR_STYLE && !tag) + || strcmp((char *)tag, "!") == 0) { + plain_implicit = 1; + } + else if (!tag) { + quoted_implicit = 1; + } + parser->state = parser->states[--parser->states_length]; + event = yaml_scalar_event_new(anchor, tag, + token->data.scalar.value, token->data.scalar.length, + plain_implicit, quoted_implicit, + token->data.scalar.style, start_mark, end_mark); + if (!event) { + parser->error = YAML_MEMORY_ERROR; + yaml_token_delete(token); + goto error; + } + yaml_free(token); + } + else if (token->type == YAML_FLOW_SEQUENCE_START_TOKEN) { + end_mark = token->end_mark; + parser->state = YAML_PARSE_FLOW_SEQUENCE_FIRST_ENTRY_STATE; + event = yaml_sequence_start_event_new(anchor, tag, implicit, + YAML_FLOW_SEQUENCE_STYLE, start_mark, end_mark); + if (!event) { + parser->error = YAML_MEMORY_ERROR; + goto error; + } + } + else if (token->type == YAML_FLOW_MAPPING_START_TOKEN) { + end_mark = token->end_mark; + parser->state = YAML_PARSE_FLOW_MAPPING_FIRST_KEY_STATE; + event = yaml_mapping_start_event_new(anchor, tag, implicit, + YAML_FLOW_MAPPING_STYLE, start_mark, end_mark); + if (!event) { + parser->error = YAML_MEMORY_ERROR; + goto error; + } + } + else if (block && token->type == YAML_BLOCK_SEQUENCE_START_TOKEN) { + end_mark = token->end_mark; + parser->state = YAML_PARSE_BLOCK_SEQUENCE_FIRST_ENTRY_STATE; + event = yaml_sequence_start_event_new(anchor, tag, implicit, + YAML_BLOCK_SEQUENCE_STYLE, start_mark, end_mark); + if (!event) { + parser->error = YAML_MEMORY_ERROR; + goto error; + } + } + else if (block && token->type == YAML_BLOCK_MAPPING_START_TOKEN) { + end_mark = token->end_mark; + parser->state = YAML_PARSE_BLOCK_MAPPING_FIRST_KEY_STATE; + event = yaml_mapping_start_event_new(anchor, tag, implicit, + YAML_BLOCK_MAPPING_STYLE, start_mark, end_mark); + if (!event) { + parser->error = YAML_MEMORY_ERROR; + goto error; + } + } + else if (anchor || tag) { + yaml_char_t *value = yaml_malloc(1); + if (!value) { + parser->error = YAML_MEMORY_ERROR; + goto error; + } + value[0] = '\0'; + event = yaml_scalar_event_new(anchor, tag, value, 0, + implicit, 0, YAML_PLAIN_SCALAR_STYLE, + start_mark, end_mark); + if (!event) { + yaml_free(value); + parser->error = YAML_MEMORY_ERROR; + goto error; + } + } + else { + yaml_parser_set_parser_error_context(parser, + (block ? "while parsing a block node" + : "while parsing a flow node"), start_mark, + "did not found expected node content", token->start_mark); + goto error; + } + return event; + } + } + +error: + yaml_free(anchor); + yaml_free(tag_handle); + yaml_free(tag_suffix); + yaml_free(tag); + + return NULL; +} + +/* + * Parse the productions: + * block_sequence ::= BLOCK-SEQUENCE-START (BLOCK-ENTRY block_node?)* BLOCK-END + * ******************** *********** * ********* + */ + +static yaml_event_t * +yaml_parser_parse_block_sequence_entry(yaml_parser_t *parser, int first) +{ + yaml_token_t *token; + yaml_event_t *event; + + if (first) { + token = yaml_parser_get_token(parser); + if (!yaml_parser_append_mark(parser, token->start_mark)) { + yaml_token_delete(token); + return NULL; + } + yaml_token_delete(token); + } + + token = yaml_parser_get_token(parser); + if (!token) return NULL; + + if (token->type == YAML_BLOCK_ENTRY_TOKEN) + { + yaml_mark_t mark = token->end_mark; + yaml_token_delete(token); + token = yaml_parser_peek_token(parser); + if (!token) return NULL; + if (token->type != YAML_BLOCK_ENTRY_TOKEN && + token->type != YAML_BLOCK_END_TOKEN) { + if (!yaml_parser_append_state(parser, + YAML_PARSE_BLOCK_SEQUENCE_ENTRY_STATE)) + return NULL; + return yaml_parser_parse_node(parser, 1, 0); + } + else { + parser->state = YAML_PARSE_BLOCK_SEQUENCE_ENTRY_STATE; + return yaml_parser_process_empty_scalar(parser, mark); + } + } + + else if (token->type == YAML_BLOCK_END_TOKEN) + { + parser->state = parser->states[--parser->states_length]; + event = yaml_sequence_end_event_new(token->start_mark, token->end_mark); + yaml_token_delete(token); + if (!event) { + parser->error = YAML_MEMORY_ERROR; + return NULL; + } + return event; + } + + else + { + yaml_parser_set_parser_error_context(parser, + "while parsing a block collection", parser->marks[parser->marks_length-1], + "did not found expected '-' indicator", token->start_mark); + yaml_token_delete(token); + return NULL; + } +} + +/* + * Parse the productions: + * indentless_sequence ::= (BLOCK-ENTRY block_node?)+ + * *********** * + */ + +static yaml_event_t * +yaml_parser_parse_indentless_sequence_entry(yaml_parser_t *parser) +{ + yaml_token_t *token; + yaml_event_t *event; + + token = yaml_parser_peek_token(parser); + if (!token) return NULL; + + if (token->type == YAML_BLOCK_ENTRY_TOKEN) + { + yaml_mark_t mark = token->end_mark; + yaml_token_delete(yaml_parser_get_token(parser)); + token = yaml_parser_peek_token(parser); + if (token->type != YAML_BLOCK_ENTRY_TOKEN && + token->type != YAML_BLOCK_END_TOKEN) { + if (!yaml_parser_append_state(parser, + YAML_PARSE_INDENTLESS_SEQUENCE_ENTRY_STATE)) + return NULL; + return yaml_parser_parse_node(parser, 1, 0); + } + else { + parser->state = YAML_PARSE_INDENTLESS_SEQUENCE_ENTRY_STATE; + return yaml_parser_process_empty_scalar(parser, mark); + } + } + + else + { + parser->state = parser->states[--parser->states_length]; + event = yaml_sequence_end_event_new(token->start_mark, token->start_mark); + if (!event) { + parser->error = YAML_MEMORY_ERROR; + return NULL; + } + return event; + } +} + +/* + * Parse the productions: + * block_mapping ::= BLOCK-MAPPING_START + * ******************* + * ((KEY block_node_or_indentless_sequence?)? + * *** * + * (VALUE block_node_or_indentless_sequence?)?)* + * + * BLOCK-END + * ********* + */ + +static yaml_event_t * +yaml_parser_parse_block_mapping_key(yaml_parser_t *parser, int first) +{ + yaml_token_t *token; + yaml_event_t *event; + + if (first) { + token = yaml_parser_get_token(parser); + if (!yaml_parser_append_mark(parser, token->start_mark)) { + yaml_token_delete(token); + return NULL; + } + yaml_token_delete(token); + } + + token = yaml_parser_get_token(parser); + if (!token) return NULL; + + if (token->type == YAML_KEY_TOKEN) + { + yaml_mark_t mark = token->end_mark; + yaml_token_delete(token); + token = yaml_parser_peek_token(parser); + if (!token) return NULL; + if (token->type != YAML_KEY_TOKEN && + token->type != YAML_VALUE_TOKEN && + token->type != YAML_BLOCK_END_TOKEN) { + if (!yaml_parser_append_state(parser, + YAML_PARSE_BLOCK_MAPPING_VALUE_STATE)) + return NULL; + return yaml_parser_parse_node(parser, 1, 1); + } + else { + parser->state = YAML_PARSE_BLOCK_MAPPING_VALUE_STATE; + return yaml_parser_process_empty_scalar(parser, mark); + } + } + + else if (token->type == YAML_BLOCK_END_TOKEN) + { + parser->state = parser->states[--parser->states_length]; + event = yaml_sequence_end_event_new(token->start_mark, token->end_mark); + yaml_token_delete(token); + if (!event) { + parser->error = YAML_MEMORY_ERROR; + return NULL; + } + return event; + } + + else + { + yaml_parser_set_parser_error_context(parser, + "while parsing a block mapping", parser->marks[parser->marks_length-1], + "did not found expected key", token->start_mark); + yaml_token_delete(token); + return NULL; + } +} + +/* + * Parse the productions: + * block_mapping ::= BLOCK-MAPPING_START + * + * ((KEY block_node_or_indentless_sequence?)? + * + * (VALUE block_node_or_indentless_sequence?)?)* + * ***** * + * BLOCK-END + * + */ + +static yaml_event_t * +yaml_parser_parse_block_mapping_value(yaml_parser_t *parser) +{ + yaml_token_t *token; + yaml_event_t *event; + + token = yaml_parser_peek_token(parser); + if (!token) return NULL; + + if (token->type == YAML_VALUE_TOKEN) + { + yaml_mark_t mark = token->end_mark; + yaml_token_delete(yaml_parser_get_token(parser)); + token = yaml_parser_peek_token(parser); + if (!token) return NULL; + if (token->type != YAML_KEY_TOKEN && + token->type != YAML_VALUE_TOKEN && + token->type != YAML_BLOCK_END_TOKEN) { + if (!yaml_parser_append_state(parser, + YAML_PARSE_BLOCK_MAPPING_KEY_STATE)) + return NULL; + return yaml_parser_parse_node(parser, 1, 1); + } + else { + parser->state = YAML_PARSE_BLOCK_MAPPING_KEY_STATE; + return yaml_parser_process_empty_scalar(parser, mark); + } + } + + else + { + parser->state = YAML_PARSE_BLOCK_MAPPING_KEY_STATE; + return yaml_parser_process_empty_scalar(parser, token->start_mark); + } +} + +static yaml_event_t * +yaml_parser_parse_flow_sequence_entry(yaml_parser_t *parser, int first); + +static yaml_event_t * +yaml_parser_parse_flow_sequence_entry_mapping_key(yaml_parser_t *parser); + +static yaml_event_t * +yaml_parser_parse_flow_sequence_entry_mapping_value(yaml_parser_t *parser); + +static yaml_event_t * +yaml_parser_parse_flow_sequence_entry_mapping_end(yaml_parser_t *parser); + +static yaml_event_t * +yaml_parser_parse_flow_mapping_key(yaml_parser_t *parser, int first); + +static yaml_event_t * +yaml_parser_parse_flow_mapping_value(yaml_parser_t *parser, int empty); + From bfaf9d8fc012d4dbca2e9217ca8b9a0449ba6f4d Mon Sep 17 00:00:00 2001 From: Kirill Simonov Date: Fri, 7 Jul 2006 19:34:25 +0000 Subject: [PATCH 18/73] Complete the Parser (it requires refactoring though) and fix some bugs. --- src/api.c | 16 +- src/parser.c | 495 ++++++++++++++++++++++++++++++++++++++++++++++++-- src/scanner.c | 2 + 3 files changed, 498 insertions(+), 15 deletions(-) diff --git a/src/api.c b/src/api.c index 81d515b8..85271a5a 100644 --- a/src/api.c +++ b/src/api.c @@ -126,6 +126,14 @@ yaml_parser_new(void) parser->state = YAML_PARSE_STREAM_START_STATE; + /* Allocate the stack of marks. */ + + parser->marks = yaml_malloc(YAML_DEFAULT_SIZE*sizeof(yaml_mark_t)); + if (!parser->marks) goto error; + memset(parser->marks, 0, YAML_DEFAULT_SIZE*sizeof(yaml_mark_t)); + + parser->marks_size = YAML_DEFAULT_SIZE; + /* Allocate the list of TAG directives. */ parser->tag_directives = yaml_malloc(YAML_DEFAULT_SIZE*sizeof(yaml_tag_directive_t *)); @@ -145,6 +153,7 @@ yaml_parser_new(void) if (!parser) return NULL; yaml_free(parser->tag_directives); + yaml_free(parser->marks); yaml_free(parser->states); yaml_free(parser->simple_keys); yaml_free(parser->indents); @@ -166,7 +175,8 @@ yaml_parser_delete(yaml_parser_t *parser) { assert(parser); /* Non-NULL parser object expected. */ - yaml_free(parser->tag_directives); + /*yaml_free(parser->tag_directives);*/ + yaml_free(parser->marks); yaml_free(parser->states); yaml_free(parser->simple_keys); yaml_free(parser->indents); @@ -696,7 +706,7 @@ yaml_event_delete(yaml_event_t *event) switch (event->type) { case YAML_DOCUMENT_START_EVENT: - yaml_free(event->data.document_start.version_directive); + /*yaml_free(event->data.document_start.version_directive); if (event->data.document_start.tag_directives) { yaml_tag_directive_t **tag_directive; for (tag_directive = event->data.document_start.tag_directives; @@ -706,7 +716,7 @@ yaml_event_delete(yaml_event_t *event) yaml_free(*tag_directive); } yaml_free(event->data.document_start.tag_directives); - } + }*/ break; case YAML_ALIAS_EVENT: diff --git a/src/parser.c b/src/parser.c index 03a9ebb9..af3aad85 100644 --- a/src/parser.c +++ b/src/parser.c @@ -78,6 +78,10 @@ static int yaml_parser_resize_list(yaml_parser_t *parser, void **buffer, size_t *size, size_t item_size); +static int +yaml_parser_append_tag_directive(yaml_parser_t *parser, + yaml_tag_directive_t *tag_directive); + static int yaml_parser_append_state(yaml_parser_t *parser, yaml_parser_state_t state); @@ -218,6 +222,48 @@ yaml_parser_set_parser_error_context(yaml_parser_t *parser, return 0; } +/* + * Double a list. + */ + +static int +yaml_parser_resize_list(yaml_parser_t *parser, void **buffer, size_t *size, + size_t item_size) +{ + void *new_buffer = yaml_realloc(*buffer, item_size*(*size)*2); + + if (!new_buffer) { + parser->error = YAML_MEMORY_ERROR; + return 0; + } + + memset(new_buffer+item_size*(*size), 0, item_size*(*size)); + + *buffer = new_buffer; + *size *= 2; + + return 1; +} + +/* + * Push a tag directive to the directive list. + */ + +static int +yaml_parser_append_tag_directive(yaml_parser_t *parser, + yaml_tag_directive_t *tag_directive) +{ + if (parser->tag_directives_length == parser->tag_directives_size-1) { + if (!yaml_parser_resize_list(parser, (void **)&parser->tag_directives, + &parser->tag_directives_size, sizeof(yaml_tag_directive_t))) + return 0; + } + + parser->tag_directives[parser->tag_directives_length++] = tag_directive; + parser->tag_directives[parser->tag_directives_length] = NULL; + return 1; +} + /* * Push a state to the state stack. */ @@ -396,7 +442,10 @@ yaml_parser_parse_document_start(yaml_parser_t *parser, int implicit) event = yaml_document_start_event_new( parser->version_directive, parser->tag_directives, 1, token->start_mark, token->start_mark); - if (!event) return NULL; + if (!event) { + parser->error = YAML_MEMORY_ERROR; + return NULL; + } return event; } @@ -423,7 +472,10 @@ yaml_parser_parse_document_start(yaml_parser_t *parser, int implicit) event = yaml_document_start_event_new( parser->version_directive, parser->tag_directives, 0, start_mark, end_mark); - if (!event) return NULL; + if (!event) { + parser->error = YAML_MEMORY_ERROR; + return NULL; + } return event; } @@ -435,6 +487,10 @@ yaml_parser_parse_document_start(yaml_parser_t *parser, int implicit) parser->state = YAML_PARSE_END_STATE; event = yaml_stream_end_event_new(token->start_mark, token->end_mark); yaml_token_delete(token); + if (!event) { + parser->error = YAML_MEMORY_ERROR; + return NULL; + } return event; } } @@ -495,11 +551,26 @@ yaml_parser_parse_document_end(yaml_parser_t *parser) implicit = 0; } + parser->version_directive = NULL; + parser->tag_directives = NULL; + parser->tag_directives = yaml_malloc(YAML_DEFAULT_SIZE*sizeof(yaml_tag_directive_t *)); + if (!parser->tag_directives) { + parser->error = YAML_MEMORY_ERROR; + return NULL; + } + memset(parser->tag_directives, 0, YAML_DEFAULT_SIZE*sizeof(yaml_tag_directive_t *)); + parser->tag_directives_size = YAML_DEFAULT_SIZE; + parser->tag_directives_length = 0; + + + event = yaml_document_end_event_new(implicit, start_mark, end_mark); if (!event) { parser->error = YAML_MEMORY_ERROR; return NULL; } + parser->state = YAML_PARSE_DOCUMENT_START_STATE; + return event; } @@ -550,6 +621,7 @@ yaml_parser_parse_node(yaml_parser_t *parser, if (token->type == YAML_ALIAS_TOKEN) { + parser->state = parser->states[--parser->states_length]; token = yaml_parser_get_token(parser); event = yaml_alias_event_new(token->data.alias.value, token->start_mark, token->end_mark); @@ -626,7 +698,7 @@ yaml_parser_parse_node(yaml_parser_t *parser, parser->error = YAML_MEMORY_ERROR; goto error; } - memcpy(tag, (*tag_directive)->handle, prefix_len); + memcpy(tag, (*tag_directive)->prefix, prefix_len); memcpy(tag+prefix_len, tag_suffix, suffix_len); tag[prefix_len+suffix_len] = '\0'; yaml_free(tag_handle); @@ -635,7 +707,7 @@ yaml_parser_parse_node(yaml_parser_t *parser, break; } } - if (*tag_directive) { + if (!*tag_directive) { yaml_parser_set_parser_error_context(parser, "while parsing a node", start_mark, "found undefined tag handle", tag_mark); @@ -651,6 +723,7 @@ yaml_parser_parse_node(yaml_parser_t *parser, event = yaml_sequence_start_event_new(anchor, tag, implicit, YAML_BLOCK_SEQUENCE_STYLE, start_mark, end_mark); if (!event) goto error; + return event; } else { if (token->type == YAML_SCALAR_TOKEN) { @@ -659,7 +732,7 @@ yaml_parser_parse_node(yaml_parser_t *parser, token = yaml_parser_get_token(parser); end_mark = token->end_mark; if ((token->data.scalar.style == YAML_PLAIN_SCALAR_STYLE && !tag) - || strcmp((char *)tag, "!") == 0) { + || (tag && strcmp((char *)tag, "!") == 0)) { plain_implicit = 1; } else if (!tag) { @@ -724,6 +797,7 @@ yaml_parser_parse_node(yaml_parser_t *parser, goto error; } value[0] = '\0'; + parser->state = parser->states[--parser->states_length]; event = yaml_scalar_event_new(anchor, tag, value, 0, implicit, 0, YAML_PLAIN_SCALAR_STYLE, start_mark, end_mark); @@ -799,6 +873,7 @@ yaml_parser_parse_block_sequence_entry(yaml_parser_t *parser, int first) else if (token->type == YAML_BLOCK_END_TOKEN) { parser->state = parser->states[--parser->states_length]; + parser->marks_length --; event = yaml_sequence_end_event_new(token->start_mark, token->end_mark); yaml_token_delete(token); if (!event) { @@ -916,7 +991,8 @@ yaml_parser_parse_block_mapping_key(yaml_parser_t *parser, int first) else if (token->type == YAML_BLOCK_END_TOKEN) { parser->state = parser->states[--parser->states_length]; - event = yaml_sequence_end_event_new(token->start_mark, token->end_mark); + parser->marks_length --; + event = yaml_mapping_end_event_new(token->start_mark, token->end_mark); yaml_token_delete(token); if (!event) { parser->error = YAML_MEMORY_ERROR; @@ -983,21 +1059,416 @@ yaml_parser_parse_block_mapping_value(yaml_parser_t *parser) } } +/* + * Parse the productions: + * flow_sequence ::= FLOW-SEQUENCE-START + * ******************* + * (flow_sequence_entry FLOW-ENTRY)* + * * ********** + * flow_sequence_entry? + * * + * FLOW-SEQUENCE-END + * ***************** + * flow_sequence_entry ::= flow_node | KEY flow_node? (VALUE flow_node?)? + * * + */ + static yaml_event_t * -yaml_parser_parse_flow_sequence_entry(yaml_parser_t *parser, int first); +yaml_parser_parse_flow_sequence_entry(yaml_parser_t *parser, int first) +{ + yaml_token_t *token; + yaml_event_t *event; + + if (first) { + token = yaml_parser_get_token(parser); + if (!yaml_parser_append_mark(parser, token->start_mark)) { + yaml_token_delete(token); + return NULL; + } + yaml_token_delete(token); + } + + token = yaml_parser_peek_token(parser); + if (!token) return NULL; + + if (token->type != YAML_FLOW_SEQUENCE_END_TOKEN) + { + if (!first) { + if (token->type == YAML_FLOW_ENTRY_TOKEN) { + yaml_token_delete(yaml_parser_get_token(parser)); + token = yaml_parser_peek_token(parser); + if (!token) return NULL; + } + else { + yaml_parser_set_parser_error_context(parser, + "while parsing a flow sequence", parser->marks[parser->marks_length-1], + "did not found expected ',' or ']'", token->start_mark); + return NULL; + } + } + + if (token->type == YAML_KEY_TOKEN) { + token = yaml_parser_get_token(parser); + parser->state = YAML_PARSE_FLOW_SEQUENCE_ENTRY_MAPPING_KEY_STATE; + event = yaml_mapping_start_event_new(NULL, NULL, + 1, YAML_FLOW_MAPPING_STYLE, + token->start_mark, token->end_mark); + yaml_token_delete(token); + return event; + } + + else if (token->type != YAML_FLOW_SEQUENCE_END_TOKEN) { + if (!yaml_parser_append_state(parser, + YAML_PARSE_FLOW_SEQUENCE_ENTRY_STATE)) + return NULL; + return yaml_parser_parse_node(parser, 0, 0); + } + } + + parser->state = parser->states[--parser->states_length]; + parser->marks_length --; + token = yaml_parser_get_token(parser); + event = yaml_sequence_end_event_new(token->start_mark, token->end_mark); + yaml_token_delete(token); + return event; +} + +/* + * Parse the productions: + * flow_sequence_entry ::= flow_node | KEY flow_node? (VALUE flow_node?)? + * *** * + */ static yaml_event_t * -yaml_parser_parse_flow_sequence_entry_mapping_key(yaml_parser_t *parser); +yaml_parser_parse_flow_sequence_entry_mapping_key(yaml_parser_t *parser) +{ + yaml_token_t *token; + yaml_event_t *event; + + token = yaml_parser_peek_token(parser); + if (!token) return NULL; + + if (token->type != YAML_VALUE_TOKEN && token->type != YAML_FLOW_ENTRY_TOKEN + && token->type != YAML_FLOW_SEQUENCE_END_TOKEN) { + if (!yaml_parser_append_state(parser, + YAML_PARSE_FLOW_SEQUENCE_ENTRY_MAPPING_VALUE_STATE)) + return NULL; + return yaml_parser_parse_node(parser, 0, 0); + } + else { + parser->state = YAML_PARSE_FLOW_SEQUENCE_ENTRY_MAPPING_VALUE_STATE; + event = yaml_parser_process_empty_scalar(parser, token->end_mark); + yaml_token_delete(token); + return event; + } +} + +/* + * Parse the productions: + * flow_sequence_entry ::= flow_node | KEY flow_node? (VALUE flow_node?)? + * ***** * + */ static yaml_event_t * -yaml_parser_parse_flow_sequence_entry_mapping_value(yaml_parser_t *parser); +yaml_parser_parse_flow_sequence_entry_mapping_value(yaml_parser_t *parser) +{ + yaml_token_t *token; + + token = yaml_parser_peek_token(parser); + if (!token) return NULL; + + if (token->type == YAML_VALUE_TOKEN) { + yaml_token_delete(yaml_parser_get_token(parser)); + token = yaml_parser_peek_token(parser); + if (!token) return NULL; + if (token->type != YAML_FLOW_ENTRY_TOKEN + && token->type != YAML_FLOW_SEQUENCE_END_TOKEN) { + if (!yaml_parser_append_state(parser, + YAML_PARSE_FLOW_SEQUENCE_ENTRY_MAPPING_END_STATE)) + return NULL; + return yaml_parser_parse_node(parser, 0, 0); + } + } + parser->state = YAML_PARSE_FLOW_SEQUENCE_ENTRY_MAPPING_END_STATE; + return yaml_parser_process_empty_scalar(parser, token->start_mark); +} + +/* + * Parse the productions: + * flow_sequence_entry ::= flow_node | KEY flow_node? (VALUE flow_node?)? + * * + */ static yaml_event_t * -yaml_parser_parse_flow_sequence_entry_mapping_end(yaml_parser_t *parser); +yaml_parser_parse_flow_sequence_entry_mapping_end(yaml_parser_t *parser) +{ + yaml_token_t *token; + + token = yaml_parser_peek_token(parser); + if (!token) return NULL; + + parser->state = YAML_PARSE_FLOW_SEQUENCE_ENTRY_STATE; + + return yaml_mapping_end_event_new(token->start_mark, token->start_mark); +} + +/* + * Parse the productions: + * flow_mapping ::= FLOW-MAPPING-START + * ****************** + * (flow_mapping_entry FLOW-ENTRY)* + * * ********** + * flow_mapping_entry? + * ****************** + * FLOW-MAPPING-END + * **************** + * flow_mapping_entry ::= flow_node | KEY flow_node? (VALUE flow_node?)? + * * *** * + */ static yaml_event_t * -yaml_parser_parse_flow_mapping_key(yaml_parser_t *parser, int first); +yaml_parser_parse_flow_mapping_key(yaml_parser_t *parser, int first) +{ + yaml_token_t *token; + yaml_event_t *event; + + if (first) { + token = yaml_parser_get_token(parser); + if (!yaml_parser_append_mark(parser, token->start_mark)) { + yaml_token_delete(token); + return NULL; + } + yaml_token_delete(token); + } + + token = yaml_parser_peek_token(parser); + if (!token) return; + + if (token->type != YAML_FLOW_MAPPING_END_TOKEN) + { + if (!first) { + if (token->type == YAML_FLOW_ENTRY_TOKEN) { + yaml_token_delete(yaml_parser_get_token(parser)); + token = yaml_parser_peek_token(parser); + if (!token) return; + } + else { + yaml_parser_set_parser_error_context(parser, + "while parsing a flow mapping", parser->marks[parser->marks_length-1], + "did not found expected ',' or '}'", token->start_mark); + return NULL; + } + } + + if (token->type == YAML_KEY_TOKEN) { + yaml_token_delete(yaml_parser_get_token(parser)); + token = yaml_parser_peek_token(parser); + if (!token) return NULL; + if (token->type != YAML_VALUE_TOKEN + && token->type != YAML_FLOW_ENTRY_TOKEN + && token->type != YAML_FLOW_MAPPING_END_TOKEN) { + if (!yaml_parser_append_state(parser, + YAML_PARSE_FLOW_MAPPING_VALUE_STATE)) + return NULL; + return yaml_parser_parse_node(parser, 0, 0); + } + else { + parser->state = YAML_PARSE_FLOW_MAPPING_VALUE_STATE; + return yaml_parser_process_empty_scalar(parser, token->start_mark); + } + } + else if (token->type != YAML_FLOW_MAPPING_END_TOKEN) { + if (!yaml_parser_append_state(parser, + YAML_PARSE_FLOW_MAPPING_EMPTY_VALUE_STATE)) + return NULL; + return yaml_parser_parse_node(parser, 0, 0); + } + } + + parser->state = parser->states[--parser->states_length]; + parser->marks_length --; + token = yaml_parser_get_token(parser); + event = yaml_mapping_end_event_new(token->start_mark, token->end_mark); + yaml_token_delete(token); + return event; +} + +/* + * Parse the productions: + * flow_mapping_entry ::= flow_node | KEY flow_node? (VALUE flow_node?)? + * * ***** * + */ static yaml_event_t * -yaml_parser_parse_flow_mapping_value(yaml_parser_t *parser, int empty); +yaml_parser_parse_flow_mapping_value(yaml_parser_t *parser, int empty) +{ + yaml_token_t *token; + yaml_event_t *event; + + token = yaml_parser_peek_token(parser); + if (!token) return; + + if (empty) { + parser->state = YAML_PARSE_FLOW_MAPPING_KEY_STATE; + return yaml_parser_process_empty_scalar(parser, token->start_mark); + } + + if (token->type == YAML_VALUE_TOKEN) { + yaml_token_delete(yaml_parser_get_token(parser)); + token = yaml_parser_peek_token(parser); + if (!token) return NULL; + if (token->type != YAML_FLOW_ENTRY_TOKEN + && token->type != YAML_FLOW_MAPPING_END_TOKEN) { + if (!yaml_parser_append_state(parser, + YAML_PARSE_FLOW_MAPPING_KEY_STATE)) + return NULL; + return yaml_parser_parse_node(parser, 0, 0); + } + } + + parser->state = YAML_PARSE_FLOW_MAPPING_KEY_STATE; + return yaml_parser_process_empty_scalar(parser, token->start_mark); +} + +/* + * Generate an empty scalar event. + */ + +static yaml_event_t * +yaml_parser_process_empty_scalar(yaml_parser_t *parser, yaml_mark_t mark) +{ + yaml_event_t *event; + yaml_char_t *value; + + value = yaml_malloc(1); + if (!value) { + parser->error = YAML_MEMORY_ERROR; + return NULL; + } + value[0] = '\0'; + + event = yaml_scalar_event_new(NULL, NULL, value, 0, + 1, 0, YAML_PLAIN_SCALAR_STYLE, mark, mark); + if (!event) { + yaml_free(value); + parser->error = YAML_MEMORY_ERROR; + return NULL; + } + + return event; +} + +/* + * Parse directives. + */ + +static int +yaml_parser_process_directives(yaml_parser_t *parser) +{ + yaml_tag_directive_t default_tag_directives[] = { + {(yaml_char_t *)"!", (yaml_char_t *)"!"}, + {(yaml_char_t *)"!!", (yaml_char_t *)"tag:yaml.org,2002:"}, + {NULL, NULL} + }; + yaml_tag_directive_t *ref; + yaml_tag_directive_t *default_tag_directive; + yaml_tag_directive_t **tag_directive; + yaml_token_t *token; + + token = yaml_parser_peek_token(parser); + if (!token) return 0; + + while (token->type == YAML_VERSION_DIRECTIVE_TOKEN || + token->type == YAML_TAG_DIRECTIVE_TOKEN) + { + if (token->type == YAML_VERSION_DIRECTIVE_TOKEN) { + if (parser->version_directive) { + return yaml_parser_set_parser_error(parser, + "found duplicate %YAML directive", token->start_mark); + } + if (token->data.version_directive.major != 1 + && token->data.version_directive.minor != 1) { + return yaml_parser_set_parser_error(parser, + "found incompatible YAML document", token->start_mark); + } + parser->version_directive = yaml_malloc(sizeof(yaml_version_directive_t)); + if (!parser->version_directive) { + parser->error = YAML_MEMORY_ERROR; + return 0; + } + parser->version_directive->major = token->data.version_directive.major; + parser->version_directive->minor = token->data.version_directive.minor; + } + + else if (token->type == YAML_TAG_DIRECTIVE_TOKEN) { + yaml_tag_directive_t value = { + token->data.tag_directive.handle, + token->data.tag_directive.prefix + }; + for (tag_directive = parser->tag_directives; + *tag_directive; tag_directive++) { + if (strcmp((char *)value.handle, + (char *)(*tag_directive)->handle) == 0) { + return yaml_parser_set_parser_error(parser, + "found duplicate %TAG directive", token->start_mark); + } + } + ref = yaml_malloc(sizeof(yaml_tag_directive_t)); + if (!ref) { + parser->error = YAML_MEMORY_ERROR; + return 0; + } + *ref = value; + if (!yaml_parser_append_tag_directive(parser, ref)) { + yaml_free(ref); + return 0; + } + } + yaml_free(yaml_parser_get_token(parser)); + token = yaml_parser_peek_token(parser); + if (!token) return 0; + } + + for (default_tag_directive = default_tag_directives; + default_tag_directive->handle; default_tag_directive++) { + int found = 0; + for (tag_directive = parser->tag_directives; + *tag_directive; tag_directive++) { + if (strcmp((char *)default_tag_directive->handle, + (char *)(*tag_directive)->handle) == 0) { + found = 1; + break; + } + } + if (!found) { + ref = yaml_malloc(sizeof(yaml_tag_directive_t)); + if (!ref) { + parser->error = YAML_MEMORY_ERROR; + return 0; + } + ref->handle = yaml_malloc(strlen((char *)default_tag_directive->handle)+1); + if (!ref->handle) { + yaml_free(ref); + parser->error = YAML_MEMORY_ERROR; + return 0; + } + ref->prefix = yaml_malloc(strlen((char *)default_tag_directive->prefix)+1); + if (!ref->prefix) { + yaml_free(ref->handle); + yaml_free(ref); + parser->error = YAML_MEMORY_ERROR; + return 0; + } + memcpy(ref->handle, default_tag_directive->handle, + strlen((char *)default_tag_directive->handle)+1); + memcpy(ref->prefix, default_tag_directive->prefix, + strlen((char *)default_tag_directive->prefix)+1); + if (!yaml_parser_append_tag_directive(parser, ref)) { + yaml_free(ref); + return 0; + } + } + } + return 1; +} diff --git a/src/scanner.c b/src/scanner.c index 45af0c19..7500f425 100644 --- a/src/scanner.c +++ b/src/scanner.c @@ -2739,6 +2739,8 @@ yaml_parser_scan_version_directive_value(yaml_parser_t *parser, if (!yaml_parser_scan_version_directive_number(parser, start_mark, minor)) return 0; + + return 1; } #define MAX_NUMBER_LENGTH 9 From ccaeae1cab158a4801ae9f031e80977adbd2c019 Mon Sep 17 00:00:00 2001 From: Kirill Simonov Date: Fri, 21 Jul 2006 13:50:32 +0000 Subject: [PATCH 19/73] Refactor internal and external API. --- include/yaml.h | 710 ++++-------------- src/api.c | 648 +++++------------ src/parser.c | 1202 +++++++++++++++--------------- src/reader.c | 205 +++--- src/scanner.c | 1654 +++++++++++++++++------------------------- src/yaml_private.h | 205 ++++++ tests/Makefile.am | 2 +- tests/run-parser.c | 46 ++ tests/run-scanner.c | 46 ++ tests/test-reader.c | 114 +-- tests/test-version.c | 5 + 11 files changed, 2018 insertions(+), 2819 deletions(-) create mode 100644 src/yaml_private.h create mode 100644 tests/run-parser.c create mode 100644 tests/run-scanner.c diff --git a/include/yaml.h b/include/yaml.h index f195bf42..ab1cf6a6 100644 --- a/include/yaml.h +++ b/include/yaml.h @@ -147,7 +147,6 @@ typedef enum { YAML_FOLDED_SCALAR_STYLE } yaml_scalar_style_t; - /** Sequence styles. */ typedef enum { YAML_ANY_SEQUENCE_STYLE, @@ -161,7 +160,8 @@ typedef enum { YAML_ANY_MAPPING_STYLE, YAML_BLOCK_MAPPING_STYLE, - YAML_FLOW_MAPPING_STYLE + YAML_FLOW_MAPPING_STYLE, + YAML_FLOW_SET_MAPPING_STYLE } yaml_mapping_style_t; /** @} */ @@ -173,6 +173,8 @@ typedef enum { /** Token types. */ typedef enum { + YAML_NO_TOKEN, + YAML_STREAM_START_TOKEN, YAML_STREAM_END_TOKEN, @@ -232,20 +234,16 @@ typedef struct { struct { /** The tag handle. */ yaml_char_t *handle; - /** The tag suffix. */ yaml_char_t *suffix; } tag; /** The scalar value (for @c YAML_SCALAR_TOKEN). */ struct { - /** The scalar value. */ yaml_char_t *value; - /** The length of the scalar value. */ size_t length; - /** The scalar style. */ yaml_scalar_style_t style; } scalar; @@ -254,7 +252,6 @@ typedef struct { struct { /** The major version number. */ int major; - /** The minor version number. */ int minor; } version_directive; @@ -263,181 +260,21 @@ typedef struct { struct { /** The tag handle. */ yaml_char_t *handle; - /** The tag prefix. */ yaml_char_t *prefix; } tag_directive; + } data; /** The beginning of the token. */ yaml_mark_t start_mark; - /** The end of the token. */ yaml_mark_t end_mark; } yaml_token_t; /** - * Create a new token without assigning any data. - * - * This function can be used for constructing indicator tokens: - * @c YAML_DOCUMENT_START, @c YAML_DOCUMENT_END, - * @c YAML_BLOCK_SEQUENCE_START_TOKEN, @c YAML_BLOCK_MAPPING_START_TOKEN, - * @c YAML_BLOCK_END_TOKEN, - * @c YAML_FLOW_SEQUENCE_START_TOKEN, @c YAML_FLOW_SEQUENCE_END_TOKEN, - * @c YAML_FLOW_MAPPING_START_TOKEN, @c YAML_FLOW_MAPPING_END_TOKEN, - * @c YAML_BLOCK_ENTRY_TOKEN, @c YAML_FLOW_ENTRY_TOKEN, - * @c YAML_KEY_TOKEN, @c YAML_VALUE_TOKEN. - * - * @param[in] type The token type. - * @param[in] start_mark The beginning of the token. - * @param[in] end_mark The end of the token. - * - * @returns A new token object, or @c NULL on error. - */ - -YAML_DECLARE(yaml_token_t *) -yaml_token_new(yaml_token_type_t type, - yaml_mark_t start_mark, yaml_mark_t end_mark); - -/** - * Create a new @c YAML_STREAM_START_TOKEN token with the specified encoding. - * - * @param[in] encoding The stream encoding. - * @param[in] start_mark The beginning of the token. - * @param[in] end_mark The end of the token. - * - * @returns A new token object, or @c NULL on error. - */ - -YAML_DECLARE(yaml_token_t *) -yaml_stream_start_token_new(yaml_encoding_t encoding, - yaml_mark_t start_mark, yaml_mark_t end_mark); - -/** - * Create a new @c YAML_STREAM_END_TOKEN token. - * - * @param[in] start_mark The beginning of the token. - * @param[in] end_mark The end of the token. - * - * @returns A new token object, or @c NULL on error. - */ - -YAML_DECLARE(yaml_token_t *) -yaml_stream_end_token_new(yaml_mark_t start_mark, yaml_mark_t end_mark); - -/** - * Create a new @c YAML_VERSION_DIRECTIVE_TOKEN token with the specified - * version numbers. - * - * @param[in] major The major version number. - * @param[in] minor The minor version number. - * @param[in] start_mark The beginning of the token. - * @param[in] end_mark The end of the token. - * - * @returns A new token object, or @c NULL on error. - */ - -YAML_DECLARE(yaml_token_t *) -yaml_version_directive_token_new(int major, int minor, - yaml_mark_t start_mark, yaml_mark_t end_mark); - -/** - * Create a new @c YAML_TAG_DIRECTIVE_TOKEN token with the specified tag - * handle and prefix. - * - * Note that the @a handle and the @a prefix pointers will be freed by - * the token descructor. - * - * @param[in] handle The tag handle. - * @param[in] prefix The tag prefix. - * @param[in] start_mark The beginning of the token. - * @param[in] end_mark The end of the token. - * - * @returns A new token object, or @c NULL on error. - */ - -YAML_DECLARE(yaml_token_t *) -yaml_tag_directive_token_new(yaml_char_t *handle, yaml_char_t *prefix, - yaml_mark_t start_mark, yaml_mark_t end_mark); - -/** - * Create a new @c YAML_ALIAS_TOKEN token with the specified anchor. - * - * Note that the @a anchor pointer will be freed by the token descructor. - * - * @param[in] anchor The anchor. - * @param[in] start_mark The beginning of the token. - * @param[in] end_mark The end of the token. - * - * @returns A new token object, or @c NULL on error. - */ - -YAML_DECLARE(yaml_token_t *) -yaml_alias_token_new(yaml_char_t *anchor, - yaml_mark_t start_mark, yaml_mark_t end_mark); - -/** - * Create a new @c YAML_ANCHOR_TOKEN token with the specified anchor. - * - * Note that the @a anchor pointer will be freed by the token descructor. - * - * @param[in] anchor The anchor. - * @param[in] start_mark The beginning of the token. - * @param[in] end_mark The end of the token. - * - * @returns A new token object, or @c NULL on error. - */ - -YAML_DECLARE(yaml_token_t *) -yaml_anchor_token_new(yaml_char_t *anchor, - yaml_mark_t start_mark, yaml_mark_t end_mark); - -/** - * Create a new @c YAML_TAG_TOKEN token with the specified tag handle and - * suffix. - * - * Note that the @a handle and the @a suffix pointers will be freed by - * the token descructor. - * - * @param[in] handle The tag handle. - * @param[in] suffix The tag suffix. - * @param[in] start_mark The beginning of the token. - * @param[in] end_mark The end of the token. - * - * @returns A new token object, or @c NULL on error. - */ - -YAML_DECLARE(yaml_token_t *) -yaml_tag_token_new(yaml_char_t *handle, yaml_char_t *suffix, - yaml_mark_t start_mark, yaml_mark_t end_mark); - -/** - * Create a new @c YAML_SCALAR_TOKEN token with the specified scalar value, - * length, and style. - * - * Note that the scalar value may contain the @c NUL character, therefore - * the value length is also required. The scalar value always ends with - * @c NUL. - * - * Note that the @a value pointer will be freed by the token descructor. - * - * @param[in] value The scalar value. - * @param[in] length The value length. - * @param[in] style The scalar style. - * @param[in] start_mark The beginning of the token. - * @param[in] end_mark The end of the token. - * - * @returns A new token object, or @c NULL on error. - */ - -YAML_DECLARE(yaml_token_t *) -yaml_scalar_token_new(yaml_char_t *value, size_t length, - yaml_scalar_style_t style, - yaml_mark_t start_mark, yaml_mark_t end_mark); - -/** - * Destroy a token object. + * Free any memory allocated for a token object. * * @param[in] token A token object. */ @@ -454,6 +291,8 @@ yaml_token_delete(yaml_token_t *token); /** Event types. */ typedef enum { + YAML_NO_EVENT, + YAML_STREAM_START_EVENT, YAML_STREAM_END_EVENT, @@ -489,8 +328,15 @@ typedef struct { struct { /** The version directive. */ yaml_version_directive_t *version_directive; + /** The list of tag directives. */ - yaml_tag_directive_t **tag_directives; + struct { + /** The beginning of the tag directives list. */ + yaml_tag_directive_t *start; + /** The end of the tag directives list. */ + yaml_tag_directive_t *end; + } tag_directives; + /** Is the document indicator implicit? */ int implicit; } document_start; @@ -553,167 +399,13 @@ typedef struct { /** The beginning of the token. */ yaml_mark_t start_mark; - /** The end of the token. */ yaml_mark_t end_mark; -} yaml_event_t; - -/** - * Create a new @c YAML_STREAM_START_EVENT event. - * - * @param[in] encoding The stream encoding. - * @param[in] start_mark The beginning of the event. - * @param[in] end_mark The end of the event. - * - * @returns A new event object, or @c NULL on error. - */ - -YAML_DECLARE(yaml_event_t *) -yaml_stream_start_event_new(yaml_encoding_t encoding, - yaml_mark_t start_mark, yaml_mark_t end_mark); - -/** - * Create a new @c YAML_STREAM_END_TOKEN event. - * - * @param[in] start_mark The beginning of the event. - * @param[in] end_mark The end of the event. - * - * @returns A new event object, or @c NULL on error. - */ - -YAML_DECLARE(yaml_event_t *) -yaml_stream_end_event_new(yaml_mark_t start_mark, yaml_mark_t end_mark); - -/** - * Create a new @c YAML_DOCUMENT_START_EVENT event. - * - * @param[in] version_directive The version directive or @c NULL. - * @param[in] tag_directives A list of tag directives or @c NULL. - * @param[in] implicit Is the document indicator present? - * @param[in] start_mark The beginning of the event. - * @param[in] end_mark The end of the event. - * - * @returns A new event object, or @c NULL on error. - */ - -YAML_DECLARE(yaml_event_t *) -yaml_document_start_event_new(yaml_version_directive_t *version_directive, - yaml_tag_directive_t **tag_directives, int implicit, - yaml_mark_t start_mark, yaml_mark_t end_mark); - -/** - * Create a new @c YAML_DOCUMENT_END_EVENT event. - * - * @param[in] implicit Is the document end indicator present? - * @param[in] start_mark The beginning of the event. - * @param[in] end_mark The end of the event. - * - * @returns A new event object, or @c NULL on error. - */ - -YAML_DECLARE(yaml_event_t *) -yaml_document_end_event_new(int implicit, - yaml_mark_t start_mark, yaml_mark_t end_mark); - -/** - * Create a new @c YAML_ALIAS_EVENT event. - * - * @param[in] anchor The anchor value. - * @param[in] start_mark The beginning of the event. - * @param[in] end_mark The end of the event. - * - * @returns A new event object, or @c NULL on error. - */ - -YAML_DECLARE(yaml_event_t *) -yaml_alias_event_new(yaml_char_t *anchor, - yaml_mark_t start_mark, yaml_mark_t end_mark); - -/** - * Create a new @c YAML_SCALAR_EVENT event. - * - * @param[in] anchor The anchor value or @c NULL. - * @param[in] tag The tag value or @c NULL. - * @param[in] value The scalar value. - * @param[in] length The length of the scalar value. - * @param[in] plain_implicit Is the tag optional for the plain style? - * @param[in] quoted_implicit Is the tag optional for any non-plain style? - * @param[in] style The scalar style. - * @param[in] start_mark The beginning of the event. - * @param[in] end_mark The end of the event. - * - * @returns A new event object, or @c NULL on error. - */ - -YAML_DECLARE(yaml_event_t *) -yaml_scalar_event_new(yaml_char_t *anchor, yaml_char_t *tag, - yaml_char_t *value, size_t length, - int plain_implicit, int quoted_implicit, - yaml_scalar_style_t style, - yaml_mark_t start_mark, yaml_mark_t end_mark); - -/** - * Create a new @c YAML_SEQUENCE_START_EVENT event. - * - * @param[in] anchor The anchor value or @c NULL. - * @param[in] tag The tag value or @c NULL. - * @param[in] implicit Is the tag optional? - * @param[in] style The sequence style. - * @param[in] start_mark The beginning of the event. - * @param[in] end_mark The end of the event. - * - * @returns A new event object, or @c NULL on error. - */ - -YAML_DECLARE(yaml_event_t *) -yaml_sequence_start_event_new(yaml_char_t *anchor, yaml_char_t *tag, - int implicit, yaml_sequence_style_t style, - yaml_mark_t start_mark, yaml_mark_t end_mark); - -/** - * Create a new @c YAML_SEQUENCE_END_EVENT event. - * - * @param[in] start_mark The beginning of the event. - * @param[in] end_mark The end of the event. - * - * @returns A new event object, or @c NULL on error. - */ -YAML_DECLARE(yaml_event_t *) -yaml_sequence_end_event_new(yaml_mark_t start_mark, yaml_mark_t end_mark); - -/** - * Create a new @c YAML_MAPPING_START_EVENT event. - * - * @param[in] anchor The anchor value or @c NULL. - * @param[in] tag The tag value or @c NULL. - * @param[in] implicit Is the tag optional? - * @param[in] style The mapping style. - * @param[in] start_mark The beginning of the event. - * @param[in] end_mark The end of the event. - * - * @returns A new event object, or @c NULL on error. - */ - -YAML_DECLARE(yaml_event_t *) -yaml_mapping_start_event_new(yaml_char_t *anchor, yaml_char_t *tag, - int implicit, yaml_mapping_style_t style, - yaml_mark_t start_mark, yaml_mark_t end_mark); - -/** - * Create a new @c YAML_MAPPING_END_EVENT event. - * - * @param[in] start_mark The beginning of the event. - * @param[in] end_mark The end of the event. - * - * @returns A new event object, or @c NULL on error. - */ - -YAML_DECLARE(yaml_event_t *) -yaml_mapping_end_event_new(yaml_mark_t start_mark, yaml_mark_t end_mark); +} yaml_event_t; /** - * Destroy an event object. + * Free any memory allocated for an event object. * * @param[in] event An event object. */ @@ -749,42 +441,20 @@ yaml_event_delete(yaml_event_t *event); typedef int yaml_read_handler_t(void *data, unsigned char *buffer, size_t size, size_t *size_read); -/** - * This structure holds a string input specified by - * @c yaml_parser_set_input_string. - */ - -typedef struct { - /** The string start pointer. */ - unsigned char *start; - - /** The string end pointer. */ - unsigned char *end; - - /** The string current position. */ - unsigned char *current; -} yaml_string_input_t; - /** * This structure holds information about a potential simple key. */ typedef struct { + /** Is a simple key possible? */ + int possible; + /** Is a simple key required? */ int required; /** The number of the token. */ size_t token_number; - /** The position index. */ - size_t index; - - /** The position line. */ - size_t line; - - /** The position column. */ - size_t column; - /** The position mark. */ yaml_mark_t mark; } yaml_simple_key_t; @@ -793,7 +463,6 @@ typedef struct { * The states of the parser. */ typedef enum { - YAML_PARSE_END_STATE, YAML_PARSE_STREAM_START_STATE, YAML_PARSE_IMPLICIT_DOCUMENT_START_STATE, YAML_PARSE_DOCUMENT_START_STATE, @@ -816,7 +485,8 @@ typedef enum { YAML_PARSE_FLOW_MAPPING_FIRST_KEY_STATE, YAML_PARSE_FLOW_MAPPING_KEY_STATE, YAML_PARSE_FLOW_MAPPING_VALUE_STATE, - YAML_PARSE_FLOW_MAPPING_EMPTY_VALUE_STATE + YAML_PARSE_FLOW_MAPPING_EMPTY_VALUE_STATE, + YAML_PARSE_END_STATE } yaml_parser_state_t; /** @@ -835,22 +505,16 @@ typedef struct { /** Error type. */ yaml_error_type_t error; - /** Error description. */ const char *problem; - /** The byte about which the problem occured. */ size_t problem_offset; - /** The problematic value (@c -1 is none). */ int problem_value; - /** The problem position. */ yaml_mark_t problem_mark; - /** The error context. */ const char *context; - /** The context position. */ yaml_mark_t context_mark; @@ -869,29 +533,51 @@ typedef struct { /** A pointer for passing to the read handler. */ void *read_handler_data; + /** Standard (string or file) input data. */ + union { + /** String input data. */ + struct { + /** The string start pointer. */ + unsigned char *start; + /** The string end pointer. */ + unsigned char *end; + /** The string current position. */ + unsigned char *current; + } string; + + /** File input data. */ + FILE *file; + } input; + /** EOF flag */ int eof; - /** The pointer to the beginning of the working buffer. */ - yaml_char_t *buffer; - - /** The pointer to the end of the working buffer. */ - yaml_char_t *buffer_end; - - /** The pointer to the current character in the working buffer. */ - yaml_char_t *pointer; - - /** The number of unread characters in the working buffer. */ + /** The working buffer. */ + struct { + /* The beginning of the buffer. */ + yaml_char_t *start; + /* The end of the buffer. */ + yaml_char_t *end; + /* The current position of the buffer. */ + yaml_char_t *pointer; + /* The last filled position of the buffer. */ + yaml_char_t *last; + } buffer; + + /* The number of unread characters in the buffer. */ size_t unread; - /** The pointer to the beginning of the raw buffer. */ - unsigned char *raw_buffer; - - /** The pointer to the current character in the raw buffer. */ - unsigned char *raw_pointer; - - /** The number of unread bytes in the raw buffer. */ - size_t raw_unread; + /** The raw buffer. */ + struct { + /** The beginning of the buffer. */ + unsigned char *start; + /** The end of the buffer. */ + unsigned char *end; + /** The current position of the buffer. */ + unsigned char *pointer; + /** The last filled position of the buffer. */ + unsigned char *last; + } raw_buffer; /** The input encoding. */ yaml_encoding_t encoding; @@ -899,17 +585,8 @@ typedef struct { /** The offset of the current position (in bytes). */ size_t offset; - /** The index of the current position (in characters). */ - size_t index; - - /** The line of the current position (starting from @c 0). */ - size_t line; - - /** The column of the current position (starting from @c 0). */ - size_t column; - - /* String input structure. */ - yaml_string_input_t string_input; + /** The mark of the current position. */ + yaml_mark_t mark; /** * @} @@ -929,29 +606,33 @@ typedef struct { /** The number of unclosed '[' and '{' indicators. */ int flow_level; - /** The tokens queue, which contains the current produced tokens. */ - yaml_token_t **tokens; - - /** The size of the tokens queue. */ - size_t tokens_size; - - /** The head of the tokens queue. */ - size_t tokens_head; - - /** The tail of the tokens queue. */ - size_t tokens_tail; - - /** The number of tokens fetched from the tokens queue. */ + /** The tokens queue. */ + struct { + /** The beginning of the tokens queue. */ + yaml_token_t *start; + /** The end of the tokens queue. */ + yaml_token_t *end; + /** The head of the tokens queue. */ + yaml_token_t *head; + /** The tail of the tokens queue. */ + yaml_token_t *tail; + } tokens; + + /** The number of tokens fetched from the queue. */ size_t tokens_parsed; - /** The stack of indentation levels. */ - int *indents; - - /** The size of the indents stack. */ - size_t indents_size; + /* Does the tokens queue contain a token ready for dequeueing. */ + int token_available; - /** The number of items in the indents stack. */ - size_t indents_length; + /** The indentation levels stack. */ + struct { + /** The beginning of the stack. */ + int *start; + /** The end of the stack. */ + int *end; + /** The top of the stack. */ + int *top; + } indents; /** The current indentation level. */ int indent; @@ -959,11 +640,15 @@ typedef struct { /** May a simple key occur at the current position? */ int simple_key_allowed; - /** The stack of potential simple keys. */ - yaml_simple_key_t **simple_keys; - - /** The size of the simple keys stack. */ - size_t simple_keys_size; + /** The stack of simple keys. */ + struct { + /** The beginning of the stack. */ + yaml_simple_key_t *start; + /** The end of the stack. */ + yaml_simple_key_t *end; + /** The top of the stack. */ + yaml_simple_key_t *top; + } simple_keys; /** * @} @@ -975,40 +660,37 @@ typedef struct { */ /** The parser states stack. */ - yaml_parser_state_t *states; - - /** The size of the parser states stack. */ - size_t states_size; - - /** The number of items in the parser states stack. */ - size_t states_length; + struct { + /** The beginning of the stack. */ + yaml_parser_state_t *start; + /** The end of the stack. */ + yaml_parser_state_t *end; + /** The top of the stack. */ + yaml_parser_state_t *top; + } states; /** The current parser state. */ yaml_parser_state_t state; /** The stack of marks. */ - yaml_mark_t *marks; - - /** The size of the marks stack. */ - size_t marks_size; - - /** The number of items in the marks stack. */ - size_t marks_length; - - /** The current event. */ - yaml_event_t *current_event; - - /** The YAML version directive. */ - yaml_version_directive_t *version_directive; + struct { + /** The beginning of the stack. */ + yaml_mark_t *start; + /** The end of the stack. */ + yaml_mark_t *end; + /** The top of the stack. */ + yaml_mark_t *top; + } marks; /** The list of TAG directives. */ - yaml_tag_directive_t **tag_directives; - - /** The size of the TAG directives list. */ - size_t tag_directives_size; - - /** The number of items in the TAG directives list. */ - size_t tag_directives_length; + struct { + /** The beginning of the list. */ + yaml_tag_directive_t *start; + /** The end of the list. */ + yaml_tag_directive_t *end; + /** The top of the list. */ + yaml_tag_directive_t *top; + } tag_directives; /** * @} @@ -1017,16 +699,18 @@ typedef struct { } yaml_parser_t; /** - * Create a new parser. + * Initialize a parser. * * This function creates a new parser object. An application is responsible * for destroying the object using the @c yaml_parser_delete function. * - * @returns A new parser object; @c NULL on error. + * @param[in] parser An empty parser object. + * + * @returns #c 1 if the function succeeded, @c 0 on error. */ -YAML_DECLARE(yaml_parser_t *) -yaml_parser_new(void); +YAML_DECLARE(int) +yaml_parser_initialize(yaml_parser_t *parser); /** * Destroy a parser. @@ -1090,141 +774,65 @@ YAML_DECLARE(void) yaml_parser_set_encoding(yaml_parser_t *parser, yaml_encoding_t encoding); /** - * Get the next token. + * Scan the input stream and produce the next token. * - * The token is removed from the internal token queue and the application is - * responsible for destroing the token object. - * - * @param[in] parser A parser object. + * Call the function subsequently to produce a sequence of tokens corresponding + * to the input stream. The initial token has the type + * @c YAML_STREAM_START_TOKEN while the ending token has the type + * @c YAML_STREAM_END_TOKEN. * - * @returns A token object, or @c NULL on error. - */ - -YAML_DECLARE(yaml_token_t *) -yaml_parser_get_token(yaml_parser_t *parser); - -/** - * Peek the next token. + * An application is responsible for freeing any buffers associated with the + * produced token object using the @c yaml_token_delete function. * - * The token is not removed from the internal token queue and will be returned - * again on a subsequent call of @c yaml_parser_get_token or - * @c yaml_parser_peek_token. The application should not destroy the token - * object. + * An application must not alternate the calls of @c yaml_parser_scan with the + * calls of @c yaml_parser_parse. Doing this will break the parser. * * @param[in] parser A parser object. + * @param[in] token An empty token object. * - * @returns A token object, or @c NULL on error. + * @returns @c 1 if the function succeeded, @c 0 on error. */ -YAML_DECLARE(yaml_token_t *) -yaml_parser_peek_token(yaml_parser_t *parser); +YAML_DECLARE(int) +yaml_parser_scan(yaml_parser_t *parser, yaml_token_t *token); /** - * Get the next event. + * Parse the input stream and produce the next parsing event. * - * The application is responsible for destroing the event object. + * Call the function subsequently to produce a sequence of events corresponding + * to the input stream. The initial event has the type + * @c YAML_STREAM_START_EVENT while the ending event has the type + * @c YAML_STREAM_END_EVENT. * - * @param[in] parser A parser object. + * An application is responsible for freeing any buffers associated with the + * produced event object using the @c yaml_event_delete function. * - * @returns An event object, or @c NULL on error. - */ - -YAML_DECLARE(yaml_event_t *) -yaml_parser_get_event(yaml_parser_t *parser); - -/** - * Peek the next event. - * - * The event will be returned again on a subsequent call of - * @c yaml_parser_get_event or @c yaml_parser_peek_event. The application - * should not destroy the event object. + * An application must not alternate the calls of @c yaml_parser_scan with the + * calls of @c yaml_parser_parse. Doing this will break the parser. * * @param[in] parser A parser object. + * @param[in] event An empty event object. * - * @returns An event object, or @c NULL on error. + * @returns @c 1 if the function succeeded, @c 0 on error. */ -YAML_DECLARE(yaml_event_t *) -yaml_parser_peek_event(yaml_parser_t *parser); +YAML_DECLARE(int) +yaml_parser_parse(yaml_parser_t *parser, yaml_event_t *event); /** @} */ /* typedef struct { } yaml_emitter_t; -*/ - -/** - * @defgroup internal Internal Definitions - * @{ - */ - -/** - * Allocate a dynamic memory block. - * - * @param[in] size Size of a memory block, \c 0 is valid. - * - * @returns @c yaml_malloc returns a pointer to a newly allocated memory block, - * or @c NULL if it failed. - */ - -YAML_DECLARE(void *) -yaml_malloc(size_t size); - -/** - * Reallocate a dynamic memory block. - * - * @param[in] ptr A pointer to an existing memory block, \c NULL is - * valid. - * @param[in] size A size of a new block, \c 0 is valid. - * - * @returns @c yaml_realloc returns a pointer to a reallocated memory block, - * or @c NULL if it failed. - */ - -YAML_DECLARE(void *) -yaml_realloc(void *ptr, size_t size); - -/** - * Free a dynamic memory block. - * - * @param[in] ptr A pointer to an existing memory block, \c NULL is - * valid. - */ - -YAML_DECLARE(void) -yaml_free(void *ptr); - -/** The initial size for various buffers. */ - -#define YAML_DEFAULT_SIZE 16 - -/** The size of the raw buffer. */ - -#define YAML_RAW_BUFFER_SIZE 16384 - -/** - * The size of the buffer. - * - * We allocate enough space for decoding the whole raw buffer. - */ - -#define YAML_BUFFER_SIZE (YAML_RAW_BUFFER_SIZE*3) - -/** - * Ensure that the buffer contains at least @a length characters. - * - * @param[in] parser A parser object. - * @param[in] length The number of characters in the buffer. - * - * @returns @c 1 on success, @c 0 on error. - */ YAML_DECLARE(int) -yaml_parser_update_buffer(yaml_parser_t *parser, size_t length); +yaml_emitter_emit(yaml_emitter_t *emitter, yaml_event_t *event); -/** @} */ +YAML_DECLARE(int) +yaml_emitter_emit_stream_start(yaml_emitter_t *emitter, + yaml_encoding_t encoding); +*/ #ifdef __cplusplus } diff --git a/src/api.c b/src/api.c index 85271a5a..0dd3c424 100644 --- a/src/api.c +++ b/src/api.c @@ -1,11 +1,9 @@ -#if HAVE_CONFIG_H -#include -#endif +#include "yaml_private.h" -#include - -#include +/* + * Get the library version. + */ YAML_DECLARE(const char *) yaml_get_version_string(void) @@ -13,6 +11,10 @@ yaml_get_version_string(void) return YAML_VERSION_STRING; } +/* + * Get the library version numbers. + */ + YAML_DECLARE(void) yaml_get_version(int *major, int *minor, int *patch) { @@ -52,118 +54,152 @@ yaml_free(void *ptr) } /* - * Create a new parser object. + * Duplicate a string. */ -YAML_DECLARE(yaml_parser_t *) -yaml_parser_new(void) +YAML_DECLARE(char *) +yaml_strdup(const char *str) { - yaml_parser_t *parser; - - /* Allocate the parser structure. */ - - parser = yaml_malloc(sizeof(yaml_parser_t)); - if (!parser) goto error; - - memset(parser, 0, sizeof(yaml_parser_t)); + return strdup(str); +} - /* Allocate the raw buffer. */ +/* + * Extend a string. + */ - parser->raw_buffer = yaml_malloc(YAML_RAW_BUFFER_SIZE); - if (!parser->raw_buffer) goto error; - memset(parser->raw_buffer, 0, YAML_RAW_BUFFER_SIZE); +YAML_DECLARE(int) +yaml_string_extend(yaml_char_t **start, + yaml_char_t **pointer, yaml_char_t **end) +{ + void *new_start = yaml_realloc(*start, (*end - *start)*2); - parser->raw_pointer = parser->raw_buffer; - parser->raw_unread = 0; + if (!new_start) return 0; - /* Allocate the character buffer. */ + memset(new_start + (*end - *start), 0, *end - *start); - parser->buffer = yaml_malloc(YAML_BUFFER_SIZE); - if (!parser->buffer) goto error; - memset(parser->buffer, 0, YAML_BUFFER_SIZE); + *pointer = new_start + (*pointer - *start); + *end = new_start + (*end - *start)*2; + *start = new_start; - parser->buffer_end = parser->buffer; - parser->pointer = parser->buffer; - parser->unread = 0; + return 1; +} - /* Allocate the tokens queue. */ +/* + * Append a string B to a string A. + */ - parser->tokens = yaml_malloc(YAML_DEFAULT_SIZE*sizeof(yaml_token_t *)); - if (!parser->tokens) goto error; - memset(parser->tokens, 0, YAML_DEFAULT_SIZE*sizeof(yaml_token_t *)); +YAML_DECLARE(int) +yaml_string_join( + yaml_char_t **a_start, yaml_char_t **a_pointer, yaml_char_t **a_end, + yaml_char_t **b_start, yaml_char_t **b_pointer, yaml_char_t **b_end) +{ + if (*b_start == *b_pointer) + return 1; - parser->tokens_size = YAML_DEFAULT_SIZE; - parser->tokens_head = 0; - parser->tokens_tail = 0; - parser->tokens_parsed = 0; + while (*a_end - *a_pointer <= *b_pointer - *b_start) { + if (!yaml_string_extend(a_start, a_pointer, a_end)) + return 0; + } - /* Allocate the indents stack. */ + memcpy(*a_pointer, *b_start, *b_pointer - *b_start); + *a_pointer += *b_pointer - *b_start; - parser->indents = yaml_malloc(YAML_DEFAULT_SIZE*sizeof(int)); - if (!parser->indents) goto error; - memset(parser->indents, 0, YAML_DEFAULT_SIZE*sizeof(int)); + return 1; +} - parser->indents_size = YAML_DEFAULT_SIZE; - parser->indents_length = 0; +/* + * Extend a stack. + */ - /* Allocate the stack of potential simple keys. */ +YAML_DECLARE(int) +yaml_stack_extend(void **start, void **top, void **end) +{ + void *new_start = yaml_realloc(*start, (*end - *start)*2); - parser->simple_keys = yaml_malloc(YAML_DEFAULT_SIZE*sizeof(yaml_simple_key_t *)); - if (!parser->simple_keys) goto error; - memset(parser->simple_keys, 0, YAML_DEFAULT_SIZE*sizeof(yaml_simple_key_t *)); + if (!new_start) return 0; - parser->simple_keys_size = YAML_DEFAULT_SIZE; + *top = new_start + (*top - *start); + *end = new_start + (*end - *start)*2; + *start = new_start; - /* Allocate the stack of parser states. */ + return 1; +} - parser->states = yaml_malloc(YAML_DEFAULT_SIZE*sizeof(yaml_parser_state_t)); - if (!parser->states) goto error; - memset(parser->states, 0, YAML_DEFAULT_SIZE*sizeof(yaml_parser_state_t)); +/* + * Extend or move a queue. + */ - parser->states_size = YAML_DEFAULT_SIZE; +YAML_DECLARE(int) +yaml_queue_extend(void **start, void **head, void **tail, void **end) +{ + /* Check if we need to resize the queue. */ - /* Set the initial state. */ + if (*start == *head && *tail == *end) { + void *new_start = yaml_realloc(*start, (*end - *start)*2); - parser->state = YAML_PARSE_STREAM_START_STATE; + if (!new_start) return 0; - /* Allocate the stack of marks. */ + *head = new_start + (*head - *start); + *tail = new_start + (*tail - *start); + *end = new_start + (*end - *start)*2; + *start = new_start; + } - parser->marks = yaml_malloc(YAML_DEFAULT_SIZE*sizeof(yaml_mark_t)); - if (!parser->marks) goto error; - memset(parser->marks, 0, YAML_DEFAULT_SIZE*sizeof(yaml_mark_t)); + /* Check if we need to move the queue at the beginning of the buffer. */ - parser->marks_size = YAML_DEFAULT_SIZE; + if (*tail == *end) { + if (*head != *tail) { + memmove(*start, *head, *tail - *head); + } + *tail -= *head - *start; + *head = *start; + } - /* Allocate the list of TAG directives. */ + return 1; +} - parser->tag_directives = yaml_malloc(YAML_DEFAULT_SIZE*sizeof(yaml_tag_directive_t *)); - if (!parser->tag_directives) goto error; - memset(parser->tag_directives, 0, YAML_DEFAULT_SIZE*sizeof(yaml_tag_directive_t *)); - parser->tag_directives_size = YAML_DEFAULT_SIZE; +/* + * Create a new parser object. + */ - /* Done. */ +YAML_DECLARE(int) +yaml_parser_initialize(yaml_parser_t *parser) +{ + assert(parser); /* Non-NULL parser object expected. */ - return parser; + memset(parser, 0, sizeof(yaml_parser_t)); + if (!BUFFER_INIT(parser, parser->raw_buffer, RAW_BUFFER_SIZE)) + goto error; + if (!BUFFER_INIT(parser, parser->buffer, BUFFER_SIZE)) + goto error; + if (!QUEUE_INIT(parser, parser->tokens, INITIAL_QUEUE_SIZE)) + goto error; + if (!STACK_INIT(parser, parser->indents, INITIAL_STACK_SIZE)) + goto error; + if (!STACK_INIT(parser, parser->simple_keys, INITIAL_STACK_SIZE)) + goto error; + if (!STACK_INIT(parser, parser->states, INITIAL_STACK_SIZE)) + goto error; + if (!STACK_INIT(parser, parser->marks, INITIAL_STACK_SIZE)) + goto error; + if (!STACK_INIT(parser, parser->tag_directives, INITIAL_STACK_SIZE)) + goto error; - /* On error, free allocated buffers. */ + return 1; error: - if (!parser) return NULL; - - yaml_free(parser->tag_directives); - yaml_free(parser->marks); - yaml_free(parser->states); - yaml_free(parser->simple_keys); - yaml_free(parser->indents); - yaml_free(parser->tokens); - yaml_free(parser->buffer); - yaml_free(parser->raw_buffer); + BUFFER_DEL(parser, parser->raw_buffer); + BUFFER_DEL(parser, parser->buffer); + QUEUE_DEL(parser, parser->tokens); + STACK_DEL(parser, parser->indents); + STACK_DEL(parser, parser->simple_keys); + STACK_DEL(parser, parser->states); + STACK_DEL(parser, parser->marks); + STACK_DEL(parser, parser->tag_directives); - yaml_free(parser); - - return NULL; + return 0; } /* @@ -175,18 +211,24 @@ yaml_parser_delete(yaml_parser_t *parser) { assert(parser); /* Non-NULL parser object expected. */ - /*yaml_free(parser->tag_directives);*/ - yaml_free(parser->marks); - yaml_free(parser->states); - yaml_free(parser->simple_keys); - yaml_free(parser->indents); - yaml_free(parser->tokens); - yaml_free(parser->buffer); - yaml_free(parser->raw_buffer); + BUFFER_DEL(parser, parser->raw_buffer); + BUFFER_DEL(parser, parser->buffer); + while (!QUEUE_EMPTY(parser, parser->tokens)) { + yaml_token_delete(&DEQUEUE(parser, parser->tokens)); + } + QUEUE_DEL(parser, parser->tokens); + STACK_DEL(parser, parser->indents); + STACK_DEL(parser, parser->simple_keys); + STACK_DEL(parser, parser->states); + STACK_DEL(parser, parser->marks); + while (!STACK_EMPTY(parser, parser->tag_directives)) { + yaml_tag_directive_t tag_directive = POP(parser, parser->tag_directives); + yaml_free(tag_directive.handle); + yaml_free(tag_directive.prefix); + } + STACK_DEL(parser, parser->tag_directives); memset(parser, 0, sizeof(yaml_parser_t)); - - yaml_free(parser); } /* @@ -197,19 +239,19 @@ static int yaml_string_read_handler(void *data, unsigned char *buffer, size_t size, size_t *size_read) { - yaml_string_input_t *input = data; + yaml_parser_t *parser = data; - if (input->current == input->end) { + if (parser->input.string.current == parser->input.string.end) { *size_read = 0; return 1; } - if (size > (input->end - input->current)) { - size = input->end - input->current; + if (size > (parser->input.string.end - parser->input.string.current)) { + size = parser->input.string.end - parser->input.string.current; } - memcpy(buffer, input->current, size); - input->current += size; + memcpy(buffer, parser->input.string.current, size); + parser->input.string.current += size; *size_read = size; return 1; } @@ -222,8 +264,10 @@ static int yaml_file_read_handler(void *data, unsigned char *buffer, size_t size, size_t *size_read) { - *size_read = fread(buffer, 1, size, (FILE *)data); - return !ferror((FILE *)data); + yaml_parser_t *parser = data; + + *size_read = fread(buffer, 1, size, parser->input.file); + return !ferror(parser->input.file); } /* @@ -238,12 +282,12 @@ yaml_parser_set_input_string(yaml_parser_t *parser, assert(!parser->read_handler); /* You can set the source only once. */ assert(input); /* Non-NULL input string expected. */ - parser->string_input.start = input; - parser->string_input.current = input; - parser->string_input.end = input+size; - parser->read_handler = yaml_string_read_handler; - parser->read_handler_data = &parser->string_input; + parser->read_handler_data = parser; + + parser->input.string.start = input; + parser->input.string.current = input; + parser->input.string.end = input+size; } /* @@ -258,7 +302,9 @@ yaml_parser_set_input_file(yaml_parser_t *parser, FILE *file) assert(file); /* Non-NULL file object expected. */ parser->read_handler = yaml_file_read_handler; - parser->read_handler_data = file; + parser->read_handler_data = parser; + + parser->input.file = file; } /* @@ -290,174 +336,6 @@ yaml_parser_set_encoding(yaml_parser_t *parser, yaml_encoding_t encoding) parser->encoding = encoding; } -/* - * Create a token. - */ - -YAML_DECLARE(yaml_token_t *) -yaml_token_new(yaml_token_type_t type, - yaml_mark_t start_mark, yaml_mark_t end_mark) -{ - yaml_token_t *token = yaml_malloc(sizeof(yaml_token_t)); - - if (!token) return NULL; - - memset(token, 0, sizeof(yaml_token_t)); - - token->type = type; - token->start_mark = start_mark; - token->end_mark = end_mark; - - return token; -} - -/* - * Create a STREAM-START token. - */ - -YAML_DECLARE(yaml_token_t *) -yaml_stream_start_token_new(yaml_encoding_t encoding, - yaml_mark_t start_mark, yaml_mark_t end_mark) -{ - yaml_token_t *token = yaml_token_new(YAML_STREAM_START_TOKEN, - start_mark, end_mark); - - if (!token) return NULL; - - token->data.stream_start.encoding = encoding; - - return token; -} - -/* - * Create a STREAM-END token. - */ - -YAML_DECLARE(yaml_token_t *) -yaml_stream_end_token_new(yaml_mark_t start_mark, yaml_mark_t end_mark) -{ - yaml_token_t *token = yaml_token_new(YAML_STREAM_END_TOKEN, - start_mark, end_mark); - - if (!token) return NULL; - - return token; -} - -/* - * Create a VERSION-DIRECTIVE token. - */ - -YAML_DECLARE(yaml_token_t *) -yaml_version_directive_token_new(int major, int minor, - yaml_mark_t start_mark, yaml_mark_t end_mark) -{ - yaml_token_t *token = yaml_token_new(YAML_VERSION_DIRECTIVE_TOKEN, - start_mark, end_mark); - - if (!token) return NULL; - - token->data.version_directive.major = major; - token->data.version_directive.minor = minor; - - return token; -} - -/* - * Create a TAG-DIRECTIVE token. - */ - -YAML_DECLARE(yaml_token_t *) -yaml_tag_directive_token_new(yaml_char_t *handle, yaml_char_t *prefix, - yaml_mark_t start_mark, yaml_mark_t end_mark) -{ - yaml_token_t *token = yaml_token_new(YAML_TAG_DIRECTIVE_TOKEN, - start_mark, end_mark); - - if (!token) return NULL; - - token->data.tag_directive.handle = handle; - token->data.tag_directive.prefix = prefix; - - return token; -} - -/* - * Create an ALIAS token. - */ - -YAML_DECLARE(yaml_token_t *) -yaml_alias_token_new(yaml_char_t *anchor, - yaml_mark_t start_mark, yaml_mark_t end_mark) -{ - yaml_token_t *token = yaml_token_new(YAML_ALIAS_TOKEN, - start_mark, end_mark); - - if (!token) return NULL; - - token->data.alias.value = anchor; - - return token; -} - -/* - * Create an ANCHOR token. - */ - -YAML_DECLARE(yaml_token_t *) -yaml_anchor_token_new(yaml_char_t *anchor, - yaml_mark_t start_mark, yaml_mark_t end_mark) -{ - yaml_token_t *token = yaml_token_new(YAML_ANCHOR_TOKEN, - start_mark, end_mark); - - if (!token) return NULL; - - token->data.anchor.value = anchor; - - return token; -} - -/* - * Create a TAG token. - */ - -YAML_DECLARE(yaml_token_t *) -yaml_tag_token_new(yaml_char_t *handle, yaml_char_t *suffix, - yaml_mark_t start_mark, yaml_mark_t end_mark) -{ - yaml_token_t *token = yaml_token_new(YAML_TAG_TOKEN, - start_mark, end_mark); - - if (!token) return NULL; - - token->data.tag.handle = handle; - token->data.tag.suffix = suffix; - - return token; -} - -/* - * Create a SCALAR token. - */ - -YAML_DECLARE(yaml_token_t *) -yaml_scalar_token_new(yaml_char_t *value, size_t length, - yaml_scalar_style_t style, - yaml_mark_t start_mark, yaml_mark_t end_mark) -{ - yaml_token_t *token = yaml_token_new(YAML_SCALAR_TOKEN, - start_mark, end_mark); - - if (!token) return NULL; - - token->data.scalar.value = value; - token->data.scalar.length = length; - token->data.scalar.style = style; - - return token; -} - /* * Destroy a token object. */ @@ -493,205 +371,6 @@ yaml_token_delete(yaml_token_t *token) } memset(token, 0, sizeof(yaml_token_t)); - - yaml_free(token); -} - -/* - * Create an event. - */ - -static yaml_event_t * -yaml_event_new(yaml_event_type_t type, - yaml_mark_t start_mark, yaml_mark_t end_mark) -{ - yaml_event_t *event = yaml_malloc(sizeof(yaml_event_t)); - - if (!event) return NULL; - - memset(event, 0, sizeof(yaml_event_t)); - - event->type = type; - event->start_mark = start_mark; - event->end_mark = end_mark; - - return event; -} - -/* - * Create a STREAM-START event. - */ - -YAML_DECLARE(yaml_event_t *) -yaml_stream_start_event_new(yaml_encoding_t encoding, - yaml_mark_t start_mark, yaml_mark_t end_mark) -{ - yaml_event_t *event = yaml_event_new(YAML_STREAM_START_EVENT, - start_mark, end_mark); - - if (!event) return NULL; - - event->data.stream_start.encoding = encoding; - - return event; -} - -/* - * Create a STREAM-END event. - */ - -YAML_DECLARE(yaml_event_t *) -yaml_stream_end_event_new(yaml_mark_t start_mark, yaml_mark_t end_mark) -{ - return yaml_event_new(YAML_STREAM_END_EVENT, start_mark, end_mark); -} - -/* - * Create a DOCUMENT-START event. - */ - -YAML_DECLARE(yaml_event_t *) -yaml_document_start_event_new(yaml_version_directive_t *version_directive, - yaml_tag_directive_t **tag_directives, int implicit, - yaml_mark_t start_mark, yaml_mark_t end_mark) -{ - yaml_event_t *event = yaml_event_new(YAML_DOCUMENT_START_EVENT, - start_mark, end_mark); - - if (!event) return NULL; - - event->data.document_start.version_directive = version_directive; - event->data.document_start.tag_directives = tag_directives; - event->data.document_start.implicit = implicit; - - return event; -} - -/* - * Create a DOCUMENT-END event. - */ - -YAML_DECLARE(yaml_event_t *) -yaml_document_end_event_new(int implicit, - yaml_mark_t start_mark, yaml_mark_t end_mark) -{ - yaml_event_t *event = yaml_event_new(YAML_DOCUMENT_END_EVENT, - start_mark, end_mark); - - if (!event) return NULL; - - event->data.document_end.implicit = implicit; - - return event; -} - -/* - * Create an ALIAS event. - */ - -YAML_DECLARE(yaml_event_t *) -yaml_alias_event_new(yaml_char_t *anchor, - yaml_mark_t start_mark, yaml_mark_t end_mark) -{ - yaml_event_t *event = yaml_event_new(YAML_ALIAS_EVENT, - start_mark, end_mark); - - if (!event) return NULL; - - event->data.alias.anchor = anchor; - - return event; -} - -/* - * Create a SCALAR event. - */ - -YAML_DECLARE(yaml_event_t *) -yaml_scalar_event_new(yaml_char_t *anchor, yaml_char_t *tag, - yaml_char_t *value, size_t length, - int plain_implicit, int quoted_implicit, - yaml_scalar_style_t style, - yaml_mark_t start_mark, yaml_mark_t end_mark) -{ - yaml_event_t *event = yaml_event_new(YAML_SCALAR_EVENT, - start_mark, end_mark); - - if (!event) return NULL; - - event->data.scalar.anchor = anchor; - event->data.scalar.tag = tag; - event->data.scalar.value = value; - event->data.scalar.length = length; - event->data.scalar.plain_implicit = plain_implicit; - event->data.scalar.quoted_implicit = quoted_implicit; - event->data.scalar.style = style; - - return event; -} - -/* - * Create a SEQUENCE-START event. - */ - -YAML_DECLARE(yaml_event_t *) -yaml_sequence_start_event_new(yaml_char_t *anchor, yaml_char_t *tag, - int implicit, yaml_sequence_style_t style, - yaml_mark_t start_mark, yaml_mark_t end_mark) -{ - yaml_event_t *event = yaml_event_new(YAML_SEQUENCE_START_EVENT, - start_mark, end_mark); - - if (!event) return NULL; - - event->data.sequence_start.anchor = anchor; - event->data.sequence_start.tag = tag; - event->data.sequence_start.implicit = implicit; - event->data.sequence_start.style = style; - - return event; -} - -/* - * Create a SEQUENCE-END event. - */ - -YAML_DECLARE(yaml_event_t *) -yaml_sequence_end_event_new(yaml_mark_t start_mark, yaml_mark_t end_mark) -{ - return yaml_event_new(YAML_SEQUENCE_END_EVENT, start_mark, end_mark); -} - -/* - * Create a MAPPING-START event. - */ - -YAML_DECLARE(yaml_event_t *) -yaml_mapping_start_event_new(yaml_char_t *anchor, yaml_char_t *tag, - int implicit, yaml_mapping_style_t style, - yaml_mark_t start_mark, yaml_mark_t end_mark) -{ - yaml_event_t *event = yaml_event_new(YAML_MAPPING_START_EVENT, - start_mark, end_mark); - - if (!event) return NULL; - - event->data.mapping_start.anchor = anchor; - event->data.mapping_start.tag = tag; - event->data.mapping_start.implicit = implicit; - event->data.mapping_start.style = style; - - return event; -} - -/* - * Create a MAPPING-END event. - */ - -YAML_DECLARE(yaml_event_t *) -yaml_mapping_end_event_new(yaml_mark_t start_mark, yaml_mark_t end_mark) -{ - return yaml_event_new(YAML_MAPPING_END_EVENT, start_mark, end_mark); } /* @@ -701,22 +380,21 @@ yaml_mapping_end_event_new(yaml_mark_t start_mark, yaml_mark_t end_mark) YAML_DECLARE(void) yaml_event_delete(yaml_event_t *event) { + yaml_tag_directive_t *tag_directive; + assert(event); /* Non-NULL event object expected. */ switch (event->type) { case YAML_DOCUMENT_START_EVENT: - /*yaml_free(event->data.document_start.version_directive); - if (event->data.document_start.tag_directives) { - yaml_tag_directive_t **tag_directive; - for (tag_directive = event->data.document_start.tag_directives; - *tag_directive; tag_directive++) { - yaml_free((*tag_directive)->handle); - yaml_free((*tag_directive)->prefix); - yaml_free(*tag_directive); - } - yaml_free(event->data.document_start.tag_directives); - }*/ + yaml_free(event->data.document_start.version_directive); + for (tag_directive = event->data.document_start.tag_directives.start; + tag_directive != event->data.document_start.tag_directives.end; + tag_directive++) { + yaml_free(tag_directive->handle); + yaml_free(tag_directive->prefix); + } + yaml_free(event->data.document_start.tag_directives.start); break; case YAML_ALIAS_EVENT: @@ -741,7 +419,5 @@ yaml_event_delete(yaml_event_t *event) } memset(event, 0, sizeof(yaml_event_t)); - - yaml_free(event); } diff --git a/src/parser.c b/src/parser.c index af3aad85..fe9e1719 100644 --- a/src/parser.c +++ b/src/parser.c @@ -39,23 +39,99 @@ * flow_mapping_entry ::= flow_node | KEY flow_node? (VALUE flow_node?)? */ -#if HAVE_CONFIG_H -#include -#endif +#include "yaml_private.h" -#include +/* + * Event initializers. + */ -#include +#define EVENT_INIT(event,event_type,event_start_mark,event_end_mark) \ + (memset(&(event), 0, sizeof(yaml_event_t)), \ + (event).type = (event_type), \ + (event).start_mark = (event_start_mark), \ + (event).end_mark = (event_end_mark)) + +#define STREAM_START_EVENT_INIT(event,event_encoding,start_mark,end_mark) \ + (EVENT_INIT((event),YAML_STREAM_START_EVENT,(start_mark),(end_mark)), \ + (event).data.stream_start.encoding = (event_encoding)) + +#define STREAM_END_EVENT_INIT(event,start_mark,end_mark) \ + (EVENT_INIT((event),YAML_STREAM_END_EVENT,(start_mark),(end_mark))) + +#define DOCUMENT_START_EVENT_INIT(event,event_version_directive, \ + event_tag_directives_start,event_tag_directives_end,event_implicit,start_mark,end_mark) \ + (EVENT_INIT((event),YAML_DOCUMENT_START_EVENT,(start_mark),(end_mark)), \ + (event).data.document_start.version_directive = (event_version_directive), \ + (event).data.document_start.tag_directives.start = (event_tag_directives_start), \ + (event).data.document_start.tag_directives.end = (event_tag_directives_end), \ + (event).data.document_start.implicit = (event_implicit)) + +#define DOCUMENT_END_EVENT_INIT(event,event_implicit,start_mark,end_mark) \ + (EVENT_INIT((event),YAML_DOCUMENT_END_EVENT,(start_mark),(end_mark)), \ + (event).data.document_end.implicit = (event_implicit)) + +#define ALIAS_EVENT_INIT(event,event_anchor,start_mark,end_mark) \ + (EVENT_INIT((event),YAML_ALIAS_EVENT,(start_mark),(end_mark)), \ + (event).data.alias.anchor = (event_anchor)) + +#define SCALAR_EVENT_INIT(event,event_anchor,event_tag,event_value,event_length, \ + event_plain_implicit, event_quoted_implicit,event_style,start_mark,end_mark) \ + (EVENT_INIT((event),YAML_SCALAR_EVENT,(start_mark),(end_mark)), \ + (event).data.scalar.anchor = (event_anchor), \ + (event).data.scalar.tag = (event_tag), \ + (event).data.scalar.value = (event_value), \ + (event).data.scalar.length = (event_length), \ + (event).data.scalar.plain_implicit = (event_plain_implicit), \ + (event).data.scalar.quoted_implicit = (event_quoted_implicit), \ + (event).data.scalar.style = (event_style)) + +#define SEQUENCE_START_EVENT_INIT(event,event_anchor,event_tag, \ + event_implicit,event_style,start_mark,end_mark) \ + (EVENT_INIT((event),YAML_SEQUENCE_START_EVENT,(start_mark),(end_mark)), \ + (event).data.sequence_start.anchor = (event_anchor), \ + (event).data.sequence_start.tag = (event_tag), \ + (event).data.sequence_start.implicit = (event_implicit), \ + (event).data.sequence_start.style = (event_style)) + +#define SEQUENCE_END_EVENT_INIT(event,start_mark,end_mark) \ + (EVENT_INIT((event),YAML_SEQUENCE_END_EVENT,(start_mark),(end_mark))) + +#define MAPPING_START_EVENT_INIT(event,event_anchor,event_tag, \ + event_implicit,event_style,start_mark,end_mark) \ + (EVENT_INIT((event),YAML_MAPPING_START_EVENT,(start_mark),(end_mark)), \ + (event).data.mapping_start.anchor = (event_anchor), \ + (event).data.mapping_start.tag = (event_tag), \ + (event).data.mapping_start.implicit = (event_implicit), \ + (event).data.mapping_start.style = (event_style)) + +#define MAPPING_END_EVENT_INIT(event,start_mark,end_mark) \ + (EVENT_INIT((event),YAML_MAPPING_END_EVENT,(start_mark),(end_mark))) /* - * Public API declarations. + * Peek the next token in the token queue. */ -YAML_DECLARE(yaml_event_t *) -yaml_parser_get_event(yaml_parser_t *parser); +#define PEEK_TOKEN(parser) \ + ((parser->token_available || yaml_parser_fetch_more_tokens(parser)) ? \ + parser->tokens.head : NULL) -YAML_DECLARE(yaml_event_t *) -yaml_parser_peek_event(yaml_parser_t *parser); +/* + * Remove the next token from the queue (must be called after PEEK_TOKEN). + */ + +#define SKIP_TOKEN(parser) \ + (parser->token_available = 0, \ + parser->tokens_parsed ++, \ + parser->stream_end_produced = \ + (parser->tokens.head->type == YAML_STREAM_END_TOKEN), \ + parser->tokens.head ++) + +/* + * Public API declarations. + */ + +YAML_DECLARE(int) +yaml_parser_parse(yaml_parser_t *parser, yaml_event_t *event); /* * Error handling. @@ -71,126 +147,109 @@ yaml_parser_set_parser_error_context(yaml_parser_t *parser, const char *problem, yaml_mark_t problem_mark); /* - * Buffers and lists. + * State functions. */ static int -yaml_parser_resize_list(yaml_parser_t *parser, void **buffer, size_t *size, - size_t item_size); +yaml_parser_state_machine(yaml_parser_t *parser, yaml_event_t *event); static int -yaml_parser_append_tag_directive(yaml_parser_t *parser, - yaml_tag_directive_t *tag_directive); +yaml_parser_parse_stream_start(yaml_parser_t *parser, yaml_event_t *event); static int -yaml_parser_append_state(yaml_parser_t *parser, yaml_parser_state_t state); +yaml_parser_parse_document_start(yaml_parser_t *parser, yaml_event_t *event, + int implicit); static int -yaml_parser_append_mark(yaml_parser_t *parser, yaml_mark_t mark); - -/* - * State functions. - */ +yaml_parser_parse_document_content(yaml_parser_t *parser, yaml_event_t *event); -static yaml_event_t * -yaml_parser_state_machine(yaml_parser_t *parser); - -static yaml_event_t * -yaml_parser_parse_stream_start(yaml_parser_t *parser); - -static yaml_event_t * -yaml_parser_parse_document_start(yaml_parser_t *parser, int implicit); - -static yaml_event_t * -yaml_parser_parse_document_content(yaml_parser_t *parser); - -static yaml_event_t * -yaml_parser_parse_document_end(yaml_parser_t *parser); +static int +yaml_parser_parse_document_end(yaml_parser_t *parser, yaml_event_t *event); -static yaml_event_t * -yaml_parser_parse_node(yaml_parser_t *parser, +static int +yaml_parser_parse_node(yaml_parser_t *parser, yaml_event_t *event, int block, int indentless_sequence); -static yaml_event_t * -yaml_parser_parse_block_sequence_entry(yaml_parser_t *parser, int first); +static int +yaml_parser_parse_block_sequence_entry(yaml_parser_t *parser, + yaml_event_t *event, int first); -static yaml_event_t * -yaml_parser_parse_indentless_sequence_entry(yaml_parser_t *parser); +static int +yaml_parser_parse_indentless_sequence_entry(yaml_parser_t *parser, + yaml_event_t *event); -static yaml_event_t * -yaml_parser_parse_block_mapping_key(yaml_parser_t *parser, int first); +static int +yaml_parser_parse_block_mapping_key(yaml_parser_t *parser, + yaml_event_t *event, int first); -static yaml_event_t * -yaml_parser_parse_block_mapping_value(yaml_parser_t *parser); +static int +yaml_parser_parse_block_mapping_value(yaml_parser_t *parser, + yaml_event_t *event); -static yaml_event_t * -yaml_parser_parse_flow_sequence_entry(yaml_parser_t *parser, int first); +static int +yaml_parser_parse_flow_sequence_entry(yaml_parser_t *parser, + yaml_event_t *event, int first); -static yaml_event_t * -yaml_parser_parse_flow_sequence_entry_mapping_key(yaml_parser_t *parser); +static int +yaml_parser_parse_flow_sequence_entry_mapping_key(yaml_parser_t *parser, + yaml_event_t *event); -static yaml_event_t * -yaml_parser_parse_flow_sequence_entry_mapping_value(yaml_parser_t *parser); +static int +yaml_parser_parse_flow_sequence_entry_mapping_value(yaml_parser_t *parser, + yaml_event_t *event); -static yaml_event_t * -yaml_parser_parse_flow_sequence_entry_mapping_end(yaml_parser_t *parser); +static int +yaml_parser_parse_flow_sequence_entry_mapping_end(yaml_parser_t *parser, + yaml_event_t *event); -static yaml_event_t * -yaml_parser_parse_flow_mapping_key(yaml_parser_t *parser, int first); +static int +yaml_parser_parse_flow_mapping_key(yaml_parser_t *parser, + yaml_event_t *event, int first); -static yaml_event_t * -yaml_parser_parse_flow_mapping_value(yaml_parser_t *parser, int empty); +static int +yaml_parser_parse_flow_mapping_value(yaml_parser_t *parser, + yaml_event_t *event, int empty); /* * Utility functions. */ -static yaml_event_t * -yaml_parser_process_empty_scalar(yaml_parser_t *parser, yaml_mark_t mark); +static int +yaml_parser_process_empty_scalar(yaml_parser_t *parser, + yaml_event_t *event, yaml_mark_t mark); static int -yaml_parser_process_directives(yaml_parser_t *parser); +yaml_parser_process_directives(yaml_parser_t *parser, + yaml_version_directive_t **version_directive_ref, + yaml_tag_directive_t **tag_directives_start_ref, + yaml_tag_directive_t **tag_directives_end_ref); + +static int +yaml_parser_append_tag_directive(yaml_parser_t *parser, + yaml_tag_directive_t value, int allow_duplicates, yaml_mark_t mark); /* - * Get the next event and advance the parser. + * Get the next event. */ -YAML_DECLARE(yaml_event_t *) -yaml_parser_get_event(yaml_parser_t *parser) +YAML_DECLARE(int) +yaml_parser_parse(yaml_parser_t *parser, yaml_event_t *event) { - yaml_event_t *value; + assert(parser); /* Non-NULL parser object is expected. */ + assert(event); /* Non-NULL event object is expected. */ - /* Update the current event if needed. */ - - if (!parser->current_event) { - parser->current_event = yaml_parser_state_machine(parser); - } + /* No events after the end of the stream or error. */ - /* Return and clear the current event. */ + if (parser->stream_end_produced || parser->error || + parser->state == YAML_PARSE_END_STATE) { + memset(event, 0, sizeof(yaml_event_t)); - value = parser->current_event; - parser->current_event = NULL; - return value; -} - -/* - * Peek the next event. - */ - -YAML_DECLARE(yaml_event_t *) -yaml_parser_peek_event(yaml_parser_t *parser) -{ - yaml_event_t *value; - - /* Update the current event if needed. */ - - if (!parser->current_event) { - parser->current_event = yaml_parser_state_machine(parser); + return 1; } - /* Return the current event. */ + /* Generate the next event. */ - return parser->current_event; + return yaml_parser_state_machine(parser, event); } /* @@ -222,163 +281,88 @@ yaml_parser_set_parser_error_context(yaml_parser_t *parser, return 0; } -/* - * Double a list. - */ - -static int -yaml_parser_resize_list(yaml_parser_t *parser, void **buffer, size_t *size, - size_t item_size) -{ - void *new_buffer = yaml_realloc(*buffer, item_size*(*size)*2); - - if (!new_buffer) { - parser->error = YAML_MEMORY_ERROR; - return 0; - } - - memset(new_buffer+item_size*(*size), 0, item_size*(*size)); - - *buffer = new_buffer; - *size *= 2; - - return 1; -} - -/* - * Push a tag directive to the directive list. - */ - -static int -yaml_parser_append_tag_directive(yaml_parser_t *parser, - yaml_tag_directive_t *tag_directive) -{ - if (parser->tag_directives_length == parser->tag_directives_size-1) { - if (!yaml_parser_resize_list(parser, (void **)&parser->tag_directives, - &parser->tag_directives_size, sizeof(yaml_tag_directive_t))) - return 0; - } - - parser->tag_directives[parser->tag_directives_length++] = tag_directive; - parser->tag_directives[parser->tag_directives_length] = NULL; - return 1; -} - -/* - * Push a state to the state stack. - */ - -static int -yaml_parser_append_state(yaml_parser_t *parser, yaml_parser_state_t state) -{ - if (parser->states_length == parser->states_size-1) { - if (!yaml_parser_resize_list(parser, (void **)&parser->states, - &parser->states_size, sizeof(yaml_parser_state_t))) - return 0; - } - - parser->states[parser->states_length++] = state; - return 1; -} - -/* - * Push a mark to the mark stack. - */ - -static int -yaml_parser_append_mark(yaml_parser_t *parser, yaml_mark_t mark) -{ - if (parser->marks_length == parser->marks_size-1) { - if (!yaml_parser_resize_list(parser, (void **)&parser->marks, - &parser->marks_size, sizeof(yaml_mark_t))) - return 0; - } - - parser->marks[parser->marks_length++] = mark; - return 1; -} /* * State dispatcher. */ -static yaml_event_t * -yaml_parser_state_machine(yaml_parser_t *parser) +static int +yaml_parser_state_machine(yaml_parser_t *parser, yaml_event_t *event) { - assert (parser->state != YAML_PARSE_END_STATE); - switch (parser->state) { case YAML_PARSE_STREAM_START_STATE: - return yaml_parser_parse_stream_start(parser); + return yaml_parser_parse_stream_start(parser, event); case YAML_PARSE_IMPLICIT_DOCUMENT_START_STATE: - return yaml_parser_parse_document_start(parser, 1); + return yaml_parser_parse_document_start(parser, event, 1); case YAML_PARSE_DOCUMENT_START_STATE: - return yaml_parser_parse_document_start(parser, 0); + return yaml_parser_parse_document_start(parser, event, 0); case YAML_PARSE_DOCUMENT_CONTENT_STATE: - return yaml_parser_parse_document_content(parser); + return yaml_parser_parse_document_content(parser, event); case YAML_PARSE_DOCUMENT_END_STATE: - return yaml_parser_parse_document_end(parser); + return yaml_parser_parse_document_end(parser, event); case YAML_PARSE_BLOCK_NODE_STATE: - return yaml_parser_parse_node(parser, 1, 0); + return yaml_parser_parse_node(parser, event, 1, 0); case YAML_PARSE_BLOCK_NODE_OR_INDENTLESS_SEQUENCE_STATE: - return yaml_parser_parse_node(parser, 1, 1); + return yaml_parser_parse_node(parser, event, 1, 1); case YAML_PARSE_FLOW_NODE_STATE: - return yaml_parser_parse_node(parser, 0, 0); + return yaml_parser_parse_node(parser, event, 0, 0); case YAML_PARSE_BLOCK_SEQUENCE_FIRST_ENTRY_STATE: - return yaml_parser_parse_block_sequence_entry(parser, 1); + return yaml_parser_parse_block_sequence_entry(parser, event, 1); case YAML_PARSE_BLOCK_SEQUENCE_ENTRY_STATE: - return yaml_parser_parse_block_sequence_entry(parser, 0); + return yaml_parser_parse_block_sequence_entry(parser, event, 0); case YAML_PARSE_INDENTLESS_SEQUENCE_ENTRY_STATE: - return yaml_parser_parse_indentless_sequence_entry(parser); + return yaml_parser_parse_indentless_sequence_entry(parser, event); case YAML_PARSE_BLOCK_MAPPING_FIRST_KEY_STATE: - return yaml_parser_parse_block_mapping_key(parser, 1); + return yaml_parser_parse_block_mapping_key(parser, event, 1); case YAML_PARSE_BLOCK_MAPPING_KEY_STATE: - return yaml_parser_parse_block_mapping_key(parser, 0); + return yaml_parser_parse_block_mapping_key(parser, event, 0); case YAML_PARSE_BLOCK_MAPPING_VALUE_STATE: - return yaml_parser_parse_block_mapping_value(parser); + return yaml_parser_parse_block_mapping_value(parser, event); case YAML_PARSE_FLOW_SEQUENCE_FIRST_ENTRY_STATE: - return yaml_parser_parse_flow_sequence_entry(parser, 1); + return yaml_parser_parse_flow_sequence_entry(parser, event, 1); case YAML_PARSE_FLOW_SEQUENCE_ENTRY_STATE: - return yaml_parser_parse_flow_sequence_entry(parser, 0); + return yaml_parser_parse_flow_sequence_entry(parser, event, 0); case YAML_PARSE_FLOW_SEQUENCE_ENTRY_MAPPING_KEY_STATE: - return yaml_parser_parse_flow_sequence_entry_mapping_key(parser); + return yaml_parser_parse_flow_sequence_entry_mapping_key(parser, event); case YAML_PARSE_FLOW_SEQUENCE_ENTRY_MAPPING_VALUE_STATE: - return yaml_parser_parse_flow_sequence_entry_mapping_value(parser); + return yaml_parser_parse_flow_sequence_entry_mapping_value(parser, event); case YAML_PARSE_FLOW_SEQUENCE_ENTRY_MAPPING_END_STATE: - return yaml_parser_parse_flow_sequence_entry_mapping_end(parser); + return yaml_parser_parse_flow_sequence_entry_mapping_end(parser, event); case YAML_PARSE_FLOW_MAPPING_FIRST_KEY_STATE: - return yaml_parser_parse_flow_mapping_key(parser, 1); + return yaml_parser_parse_flow_mapping_key(parser, event, 1); case YAML_PARSE_FLOW_MAPPING_KEY_STATE: - return yaml_parser_parse_flow_mapping_key(parser, 0); + return yaml_parser_parse_flow_mapping_key(parser, event, 0); case YAML_PARSE_FLOW_MAPPING_VALUE_STATE: - return yaml_parser_parse_flow_mapping_value(parser, 0); + return yaml_parser_parse_flow_mapping_value(parser, event, 0); case YAML_PARSE_FLOW_MAPPING_EMPTY_VALUE_STATE: - return yaml_parser_parse_flow_mapping_value(parser, 1); + return yaml_parser_parse_flow_mapping_value(parser, event, 1); + + default: + assert(1); /* Invalid state. */ } - assert(1); } /* @@ -387,28 +371,25 @@ yaml_parser_state_machine(yaml_parser_t *parser) * ************ */ -static yaml_event_t * -yaml_parser_parse_stream_start(yaml_parser_t *parser) +static int +yaml_parser_parse_stream_start(yaml_parser_t *parser, yaml_event_t *event) { yaml_token_t *token; - yaml_event_t *event; - - token = yaml_parser_get_token(parser); - if (!token) return NULL; - assert(token->type == YAML_STREAM_START_TOKEN); + token = PEEK_TOKEN(parser); + if (!token) return 0; - event = yaml_stream_start_event_new(token->data.stream_start.encoding, - token->start_mark, token->start_mark); - yaml_token_delete(token); - if (!event) { - parser->error = YAML_MEMORY_ERROR; - return NULL; + if (token->type != YAML_STREAM_START_TOKEN) { + return yaml_parser_set_parser_error(parser, + "did not found expected ", token->start_mark); } parser->state = YAML_PARSE_IMPLICIT_DOCUMENT_START_STATE; + STREAM_START_EVENT_INIT(*event, token->data.stream_start.encoding, + token->start_mark, token->start_mark); + SKIP_TOKEN(parser); - return event; + return 1; } /* @@ -419,14 +400,19 @@ yaml_parser_parse_stream_start(yaml_parser_t *parser) * ************************* */ -static yaml_event_t * -yaml_parser_parse_document_start(yaml_parser_t *parser, int implicit) +static int +yaml_parser_parse_document_start(yaml_parser_t *parser, yaml_event_t *event, + int implicit) { yaml_token_t *token; - yaml_event_t *event; + yaml_version_directive_t *version_directive = NULL; + struct { + yaml_tag_directive_t *start; + yaml_tag_directive_t *end; + } tag_directives = { NULL, NULL }; - token = yaml_parser_peek_token(parser); - if (!token) return NULL; + token = PEEK_TOKEN(parser); + if (!token) return 0; /* Parse an implicit document. */ @@ -435,18 +421,14 @@ yaml_parser_parse_document_start(yaml_parser_t *parser, int implicit) token->type != YAML_DOCUMENT_START_TOKEN && token->type != YAML_STREAM_END_TOKEN) { - if (!yaml_parser_process_directives(parser)) return NULL; - if (!yaml_parser_append_state(parser, YAML_PARSE_DOCUMENT_END_STATE)) - return NULL; + if (!yaml_parser_process_directives(parser, NULL, NULL, NULL)) + return 0; + if (!PUSH(parser, parser->states, YAML_PARSE_DOCUMENT_END_STATE)) + return 0; parser->state = YAML_PARSE_BLOCK_NODE_STATE; - event = yaml_document_start_event_new( - parser->version_directive, parser->tag_directives, 1, + DOCUMENT_START_EVENT_INIT(*event, NULL, NULL, NULL, 1, token->start_mark, token->start_mark); - if (!event) { - parser->error = YAML_MEMORY_ERROR; - return NULL; - } - return event; + return 1; } /* Parse an explicit document. */ @@ -455,44 +437,48 @@ yaml_parser_parse_document_start(yaml_parser_t *parser, int implicit) { yaml_mark_t start_mark, end_mark; start_mark = token->start_mark; - if (!yaml_parser_process_directives(parser)) return NULL; - token = yaml_parser_peek_token(parser); - if (!token) return NULL; + if (!yaml_parser_process_directives(parser, &version_directive, + &tag_directives.start, &tag_directives.end)) + return 0; + token = PEEK_TOKEN(parser); + if (!token) goto error; if (token->type != YAML_DOCUMENT_START_TOKEN) { yaml_parser_set_parser_error(parser, "did not found expected ", token->start_mark); - return NULL; + goto error; } - token = yaml_parser_get_token(parser); - end_mark = token->end_mark; - yaml_token_delete(token); - if (!yaml_parser_append_state(parser, YAML_PARSE_DOCUMENT_END_STATE)) - return NULL; + if (!PUSH(parser, parser->states, YAML_PARSE_DOCUMENT_END_STATE)) + goto error; parser->state = YAML_PARSE_DOCUMENT_CONTENT_STATE; - event = yaml_document_start_event_new( - parser->version_directive, parser->tag_directives, 0, + end_mark = token->end_mark; + DOCUMENT_START_EVENT_INIT(*event, version_directive, + tag_directives.start, tag_directives.end, 0, start_mark, end_mark); - if (!event) { - parser->error = YAML_MEMORY_ERROR; - return NULL; - } - return event; + SKIP_TOKEN(parser); + version_directive = NULL; + tag_directives.start = tag_directives.end = NULL; + return 1; } /* Parse the stream end. */ else { - token = yaml_parser_get_token(parser); parser->state = YAML_PARSE_END_STATE; - event = yaml_stream_end_event_new(token->start_mark, token->end_mark); - yaml_token_delete(token); - if (!event) { - parser->error = YAML_MEMORY_ERROR; - return NULL; - } - return event; + STREAM_END_EVENT_INIT(*event, token->start_mark, token->end_mark); + SKIP_TOKEN(parser); + return 1; } + +error: + yaml_free(version_directive); + while (tag_directives.start != tag_directives.end) { + yaml_free(tag_directives.end[-1].handle); + yaml_free(tag_directives.end[-1].prefix); + tag_directives.end --; + } + yaml_free(tag_directives.start); + return 0; } /* @@ -501,24 +487,25 @@ yaml_parser_parse_document_start(yaml_parser_t *parser, int implicit) * *********** */ -static yaml_event_t * -yaml_parser_parse_document_content(yaml_parser_t *parser) +static int +yaml_parser_parse_document_content(yaml_parser_t *parser, yaml_event_t *event) { yaml_token_t *token; - token = yaml_parser_peek_token(parser); - if (!token) return NULL; + token = PEEK_TOKEN(parser); + if (!token) return 0; if (token->type == YAML_VERSION_DIRECTIVE_TOKEN || token->type == YAML_TAG_DIRECTIVE_TOKEN || token->type == YAML_DOCUMENT_START_TOKEN || token->type == YAML_DOCUMENT_END_TOKEN || token->type == YAML_STREAM_END_TOKEN) { - parser->state = parser->states[--parser->states_length]; - return yaml_parser_process_empty_scalar(parser, token->start_mark); + parser->state = POP(parser, parser->states); + return yaml_parser_process_empty_scalar(parser, event, + token->start_mark); } else { - return yaml_parser_parse_node(parser, 1, 0); + return yaml_parser_parse_node(parser, event, 1, 0); } } @@ -530,48 +517,36 @@ yaml_parser_parse_document_content(yaml_parser_t *parser) * ************* */ -static yaml_event_t * -yaml_parser_parse_document_end(yaml_parser_t *parser) +static int +yaml_parser_parse_document_end(yaml_parser_t *parser, yaml_event_t *event) { yaml_token_t *token; - yaml_event_t *event; yaml_mark_t start_mark, end_mark; int implicit = 1; - token = yaml_parser_peek_token(parser); - if (!token) return NULL; + token = PEEK_TOKEN(parser); + if (!token) return 0; start_mark = end_mark = token->start_mark; while (token->type == YAML_DOCUMENT_END_TOKEN) { end_mark = token->end_mark; - yaml_token_delete(yaml_parser_get_token(parser)); - token = yaml_parser_peek_token(parser); - if (!token) return NULL; + SKIP_TOKEN(parser); + token = PEEK_TOKEN(parser); + if (!token) return 0; implicit = 0; } - parser->version_directive = NULL; - parser->tag_directives = NULL; - parser->tag_directives = yaml_malloc(YAML_DEFAULT_SIZE*sizeof(yaml_tag_directive_t *)); - if (!parser->tag_directives) { - parser->error = YAML_MEMORY_ERROR; - return NULL; + while (!STACK_EMPTY(parser, parser->tag_directives)) { + yaml_tag_directive_t tag_directive = POP(parser, parser->tag_directives); + yaml_free(tag_directive.handle); + yaml_free(tag_directive.prefix); } - memset(parser->tag_directives, 0, YAML_DEFAULT_SIZE*sizeof(yaml_tag_directive_t *)); - parser->tag_directives_size = YAML_DEFAULT_SIZE; - parser->tag_directives_length = 0; - - - event = yaml_document_end_event_new(implicit, start_mark, end_mark); - if (!event) { - parser->error = YAML_MEMORY_ERROR; - return NULL; - } parser->state = YAML_PARSE_DOCUMENT_START_STATE; + DOCUMENT_END_EVENT_INIT(*event, implicit, start_mark, end_mark); - return event; + return 1; } /* @@ -603,12 +578,11 @@ yaml_parser_parse_document_end(yaml_parser_t *parser) * ****** */ -static yaml_event_t * -yaml_parser_parse_node(yaml_parser_t *parser, +static int +yaml_parser_parse_node(yaml_parser_t *parser, yaml_event_t *event, int block, int indentless_sequence) { yaml_token_t *token; - yaml_event_t *event; yaml_char_t *anchor = NULL; yaml_char_t *tag_handle = NULL; yaml_char_t *tag_suffix = NULL; @@ -616,22 +590,16 @@ yaml_parser_parse_node(yaml_parser_t *parser, yaml_mark_t start_mark, end_mark, tag_mark; int implicit; - token = yaml_parser_peek_token(parser); - if (!token) return NULL; + token = PEEK_TOKEN(parser); + if (!token) return 0; if (token->type == YAML_ALIAS_TOKEN) { - parser->state = parser->states[--parser->states_length]; - token = yaml_parser_get_token(parser); - event = yaml_alias_event_new(token->data.alias.value, + parser->state = POP(parser, parser->states); + ALIAS_EVENT_INIT(*event, token->data.alias.value, token->start_mark, token->end_mark); - if (!event) { - yaml_token_delete(token); - parser->error = YAML_MEMORY_ERROR; - return NULL; - } - yaml_free(token); - return event; + SKIP_TOKEN(parser); + return 1; } else @@ -640,42 +608,38 @@ yaml_parser_parse_node(yaml_parser_t *parser, if (token->type == YAML_ANCHOR_TOKEN) { - token = yaml_parser_get_token(parser); anchor = token->data.anchor.value; start_mark = token->start_mark; end_mark = token->end_mark; - yaml_free(token); - token = yaml_parser_peek_token(parser); + SKIP_TOKEN(parser); + token = PEEK_TOKEN(parser); if (!token) goto error; if (token->type == YAML_TAG_TOKEN) { - token = yaml_parser_get_token(parser); tag_handle = token->data.tag.handle; tag_suffix = token->data.tag.suffix; tag_mark = token->start_mark; end_mark = token->end_mark; - yaml_free(token); - token = yaml_parser_peek_token(parser); + SKIP_TOKEN(parser); + token = PEEK_TOKEN(parser); if (!token) goto error; } } else if (token->type == YAML_TAG_TOKEN) { - token = yaml_parser_get_token(parser); tag_handle = token->data.tag.handle; tag_suffix = token->data.tag.suffix; start_mark = tag_mark = token->start_mark; end_mark = token->end_mark; - yaml_free(token); - token = yaml_parser_peek_token(parser); + SKIP_TOKEN(parser); + token = PEEK_TOKEN(parser); if (!token) goto error; if (token->type == YAML_ANCHOR_TOKEN) { - token = yaml_parser_get_token(parser); anchor = token->data.anchor.value; end_mark = token->end_mark; - yaml_free(token); - token = yaml_parser_peek_token(parser); + SKIP_TOKEN(parser); + token = PEEK_TOKEN(parser); if (!token) goto error; } } @@ -687,18 +651,19 @@ yaml_parser_parse_node(yaml_parser_t *parser, tag_handle = tag_suffix = NULL; } else { - yaml_tag_directive_t **tag_directive = parser->tag_directives; - for (tag_directive = parser->tag_directives; - *tag_directive; tag_directive++) { - if (strcmp((char *)(*tag_directive)->handle, (char *)tag_handle) == 0) { - size_t prefix_len = strlen((char *)(*tag_directive)->prefix); + yaml_tag_directive_t *tag_directive; + for (tag_directive = parser->tag_directives.start; + tag_directive != parser->tag_directives.top; + tag_directive ++) { + if (strcmp((char *)tag_directive->handle, (char *)tag_handle) == 0) { + size_t prefix_len = strlen((char *)tag_directive->prefix); size_t suffix_len = strlen((char *)tag_suffix); tag = yaml_malloc(prefix_len+suffix_len+1); if (!tag) { parser->error = YAML_MEMORY_ERROR; goto error; } - memcpy(tag, (*tag_directive)->prefix, prefix_len); + memcpy(tag, tag_directive->prefix, prefix_len); memcpy(tag+prefix_len, tag_suffix, suffix_len); tag[prefix_len+suffix_len] = '\0'; yaml_free(tag_handle); @@ -707,7 +672,7 @@ yaml_parser_parse_node(yaml_parser_t *parser, break; } } - if (!*tag_directive) { + if (!tag) { yaml_parser_set_parser_error_context(parser, "while parsing a node", start_mark, "found undefined tag handle", tag_mark); @@ -720,16 +685,14 @@ yaml_parser_parse_node(yaml_parser_t *parser, if (indentless_sequence && token->type == YAML_BLOCK_ENTRY_TOKEN) { end_mark = token->end_mark; parser->state = YAML_PARSE_INDENTLESS_SEQUENCE_ENTRY_STATE; - event = yaml_sequence_start_event_new(anchor, tag, implicit, + SEQUENCE_START_EVENT_INIT(*event, anchor, tag, implicit, YAML_BLOCK_SEQUENCE_STYLE, start_mark, end_mark); - if (!event) goto error; - return event; + return 1; } else { if (token->type == YAML_SCALAR_TOKEN) { int plain_implicit = 0; int quoted_implicit = 0; - token = yaml_parser_get_token(parser); end_mark = token->end_mark; if ((token->data.scalar.style == YAML_PLAIN_SCALAR_STYLE && !tag) || (tag && strcmp((char *)tag, "!") == 0)) { @@ -738,57 +701,41 @@ yaml_parser_parse_node(yaml_parser_t *parser, else if (!tag) { quoted_implicit = 1; } - parser->state = parser->states[--parser->states_length]; - event = yaml_scalar_event_new(anchor, tag, + parser->state = POP(parser, parser->states); + SCALAR_EVENT_INIT(*event, anchor, tag, token->data.scalar.value, token->data.scalar.length, plain_implicit, quoted_implicit, token->data.scalar.style, start_mark, end_mark); - if (!event) { - parser->error = YAML_MEMORY_ERROR; - yaml_token_delete(token); - goto error; - } - yaml_free(token); + SKIP_TOKEN(parser); + return 1; } else if (token->type == YAML_FLOW_SEQUENCE_START_TOKEN) { end_mark = token->end_mark; parser->state = YAML_PARSE_FLOW_SEQUENCE_FIRST_ENTRY_STATE; - event = yaml_sequence_start_event_new(anchor, tag, implicit, + SEQUENCE_START_EVENT_INIT(*event, anchor, tag, implicit, YAML_FLOW_SEQUENCE_STYLE, start_mark, end_mark); - if (!event) { - parser->error = YAML_MEMORY_ERROR; - goto error; - } + return 1; } else if (token->type == YAML_FLOW_MAPPING_START_TOKEN) { end_mark = token->end_mark; parser->state = YAML_PARSE_FLOW_MAPPING_FIRST_KEY_STATE; - event = yaml_mapping_start_event_new(anchor, tag, implicit, + MAPPING_START_EVENT_INIT(*event, anchor, tag, implicit, YAML_FLOW_MAPPING_STYLE, start_mark, end_mark); - if (!event) { - parser->error = YAML_MEMORY_ERROR; - goto error; - } + return 1; } else if (block && token->type == YAML_BLOCK_SEQUENCE_START_TOKEN) { end_mark = token->end_mark; parser->state = YAML_PARSE_BLOCK_SEQUENCE_FIRST_ENTRY_STATE; - event = yaml_sequence_start_event_new(anchor, tag, implicit, + SEQUENCE_START_EVENT_INIT(*event, anchor, tag, implicit, YAML_BLOCK_SEQUENCE_STYLE, start_mark, end_mark); - if (!event) { - parser->error = YAML_MEMORY_ERROR; - goto error; - } + return 1; } else if (block && token->type == YAML_BLOCK_MAPPING_START_TOKEN) { end_mark = token->end_mark; parser->state = YAML_PARSE_BLOCK_MAPPING_FIRST_KEY_STATE; - event = yaml_mapping_start_event_new(anchor, tag, implicit, + MAPPING_START_EVENT_INIT(*event, anchor, tag, implicit, YAML_BLOCK_MAPPING_STYLE, start_mark, end_mark); - if (!event) { - parser->error = YAML_MEMORY_ERROR; - goto error; - } + return 1; } else if (anchor || tag) { yaml_char_t *value = yaml_malloc(1); @@ -797,15 +744,11 @@ yaml_parser_parse_node(yaml_parser_t *parser, goto error; } value[0] = '\0'; - parser->state = parser->states[--parser->states_length]; - event = yaml_scalar_event_new(anchor, tag, value, 0, + parser->state = POP(parser, parser->states); + SCALAR_EVENT_INIT(*event, anchor, tag, value, 0, implicit, 0, YAML_PLAIN_SCALAR_STYLE, start_mark, end_mark); - if (!event) { - yaml_free(value); - parser->error = YAML_MEMORY_ERROR; - goto error; - } + return 1; } else { yaml_parser_set_parser_error_context(parser, @@ -814,7 +757,6 @@ yaml_parser_parse_node(yaml_parser_t *parser, "did not found expected node content", token->start_mark); goto error; } - return event; } } @@ -824,7 +766,7 @@ yaml_parser_parse_node(yaml_parser_t *parser, yaml_free(tag_suffix); yaml_free(tag); - return NULL; + return 0; } /* @@ -833,63 +775,55 @@ yaml_parser_parse_node(yaml_parser_t *parser, * ******************** *********** * ********* */ -static yaml_event_t * -yaml_parser_parse_block_sequence_entry(yaml_parser_t *parser, int first) +static int +yaml_parser_parse_block_sequence_entry(yaml_parser_t *parser, + yaml_event_t *event, int first) { yaml_token_t *token; - yaml_event_t *event; if (first) { - token = yaml_parser_get_token(parser); - if (!yaml_parser_append_mark(parser, token->start_mark)) { - yaml_token_delete(token); - return NULL; - } - yaml_token_delete(token); + token = PEEK_TOKEN(parser); + if (!PUSH(parser, parser->marks, token->start_mark)) + return 0; + SKIP_TOKEN(parser); } - token = yaml_parser_get_token(parser); - if (!token) return NULL; + token = PEEK_TOKEN(parser); + if (!token) return 0; if (token->type == YAML_BLOCK_ENTRY_TOKEN) { yaml_mark_t mark = token->end_mark; - yaml_token_delete(token); - token = yaml_parser_peek_token(parser); - if (!token) return NULL; + SKIP_TOKEN(parser); + token = PEEK_TOKEN(parser); + if (!token) return 0; if (token->type != YAML_BLOCK_ENTRY_TOKEN && token->type != YAML_BLOCK_END_TOKEN) { - if (!yaml_parser_append_state(parser, + if (!PUSH(parser, parser->states, YAML_PARSE_BLOCK_SEQUENCE_ENTRY_STATE)) - return NULL; - return yaml_parser_parse_node(parser, 1, 0); + return 0; + return yaml_parser_parse_node(parser, event, 1, 0); } else { parser->state = YAML_PARSE_BLOCK_SEQUENCE_ENTRY_STATE; - return yaml_parser_process_empty_scalar(parser, mark); + return yaml_parser_process_empty_scalar(parser, event, mark); } } else if (token->type == YAML_BLOCK_END_TOKEN) { - parser->state = parser->states[--parser->states_length]; - parser->marks_length --; - event = yaml_sequence_end_event_new(token->start_mark, token->end_mark); - yaml_token_delete(token); - if (!event) { - parser->error = YAML_MEMORY_ERROR; - return NULL; - } - return event; + parser->state = POP(parser, parser->states); + POP(parser, parser->marks); + SEQUENCE_END_EVENT_INIT(*event, token->start_mark, token->end_mark); + SKIP_TOKEN(parser); + return 1; } else { - yaml_parser_set_parser_error_context(parser, - "while parsing a block collection", parser->marks[parser->marks_length-1], + return yaml_parser_set_parser_error_context(parser, + "while parsing a block collection", POP(parser, parser->marks), "did not found expected '-' indicator", token->start_mark); - yaml_token_delete(token); - return NULL; } } @@ -899,42 +833,41 @@ yaml_parser_parse_block_sequence_entry(yaml_parser_t *parser, int first) * *********** * */ -static yaml_event_t * -yaml_parser_parse_indentless_sequence_entry(yaml_parser_t *parser) +static int +yaml_parser_parse_indentless_sequence_entry(yaml_parser_t *parser, + yaml_event_t *event) { yaml_token_t *token; - yaml_event_t *event; - token = yaml_parser_peek_token(parser); - if (!token) return NULL; + token = PEEK_TOKEN(parser); + if (!token) return 0; if (token->type == YAML_BLOCK_ENTRY_TOKEN) { yaml_mark_t mark = token->end_mark; - yaml_token_delete(yaml_parser_get_token(parser)); - token = yaml_parser_peek_token(parser); + SKIP_TOKEN(parser); + token = PEEK_TOKEN(parser); + if (!token) return 0; if (token->type != YAML_BLOCK_ENTRY_TOKEN && + token->type != YAML_KEY_TOKEN && + token->type != YAML_VALUE_TOKEN && token->type != YAML_BLOCK_END_TOKEN) { - if (!yaml_parser_append_state(parser, + if (!PUSH(parser, parser->states, YAML_PARSE_INDENTLESS_SEQUENCE_ENTRY_STATE)) - return NULL; - return yaml_parser_parse_node(parser, 1, 0); + return 0; + return yaml_parser_parse_node(parser, event, 1, 0); } else { parser->state = YAML_PARSE_INDENTLESS_SEQUENCE_ENTRY_STATE; - return yaml_parser_process_empty_scalar(parser, mark); + return yaml_parser_process_empty_scalar(parser, event, mark); } } else { - parser->state = parser->states[--parser->states_length]; - event = yaml_sequence_end_event_new(token->start_mark, token->start_mark); - if (!event) { - parser->error = YAML_MEMORY_ERROR; - return NULL; - } - return event; + parser->state = POP(parser, parser->states); + SEQUENCE_END_EVENT_INIT(*event, token->start_mark, token->start_mark); + return 1; } } @@ -950,64 +883,56 @@ yaml_parser_parse_indentless_sequence_entry(yaml_parser_t *parser) * ********* */ -static yaml_event_t * -yaml_parser_parse_block_mapping_key(yaml_parser_t *parser, int first) +static int +yaml_parser_parse_block_mapping_key(yaml_parser_t *parser, + yaml_event_t *event, int first) { yaml_token_t *token; - yaml_event_t *event; if (first) { - token = yaml_parser_get_token(parser); - if (!yaml_parser_append_mark(parser, token->start_mark)) { - yaml_token_delete(token); - return NULL; - } - yaml_token_delete(token); + token = PEEK_TOKEN(parser); + if (!PUSH(parser, parser->marks, token->start_mark)) + return 0; + SKIP_TOKEN(parser); } - token = yaml_parser_get_token(parser); - if (!token) return NULL; + token = PEEK_TOKEN(parser); + if (!token) return 0; if (token->type == YAML_KEY_TOKEN) { yaml_mark_t mark = token->end_mark; - yaml_token_delete(token); - token = yaml_parser_peek_token(parser); - if (!token) return NULL; + SKIP_TOKEN(parser); + token = PEEK_TOKEN(parser); + if (!token) return 0; if (token->type != YAML_KEY_TOKEN && token->type != YAML_VALUE_TOKEN && token->type != YAML_BLOCK_END_TOKEN) { - if (!yaml_parser_append_state(parser, + if (!PUSH(parser, parser->states, YAML_PARSE_BLOCK_MAPPING_VALUE_STATE)) - return NULL; - return yaml_parser_parse_node(parser, 1, 1); + return 0; + return yaml_parser_parse_node(parser, event, 1, 1); } else { parser->state = YAML_PARSE_BLOCK_MAPPING_VALUE_STATE; - return yaml_parser_process_empty_scalar(parser, mark); + return yaml_parser_process_empty_scalar(parser, event, mark); } } else if (token->type == YAML_BLOCK_END_TOKEN) { - parser->state = parser->states[--parser->states_length]; - parser->marks_length --; - event = yaml_mapping_end_event_new(token->start_mark, token->end_mark); - yaml_token_delete(token); - if (!event) { - parser->error = YAML_MEMORY_ERROR; - return NULL; - } - return event; + parser->state = POP(parser, parser->states); + POP(parser, parser->marks); + MAPPING_END_EVENT_INIT(*event, token->start_mark, token->end_mark); + SKIP_TOKEN(parser); + return 1; } else { - yaml_parser_set_parser_error_context(parser, - "while parsing a block mapping", parser->marks[parser->marks_length-1], + return yaml_parser_set_parser_error_context(parser, + "while parsing a block mapping", POP(parser, parser->marks), "did not found expected key", token->start_mark); - yaml_token_delete(token); - return NULL; } } @@ -1023,39 +948,39 @@ yaml_parser_parse_block_mapping_key(yaml_parser_t *parser, int first) * */ -static yaml_event_t * -yaml_parser_parse_block_mapping_value(yaml_parser_t *parser) +static int +yaml_parser_parse_block_mapping_value(yaml_parser_t *parser, + yaml_event_t *event) { yaml_token_t *token; - yaml_event_t *event; - token = yaml_parser_peek_token(parser); - if (!token) return NULL; + token = PEEK_TOKEN(parser); + if (!token) return 0; if (token->type == YAML_VALUE_TOKEN) { yaml_mark_t mark = token->end_mark; - yaml_token_delete(yaml_parser_get_token(parser)); - token = yaml_parser_peek_token(parser); - if (!token) return NULL; + SKIP_TOKEN(parser); + token = PEEK_TOKEN(parser); + if (!token) return 0; if (token->type != YAML_KEY_TOKEN && token->type != YAML_VALUE_TOKEN && token->type != YAML_BLOCK_END_TOKEN) { - if (!yaml_parser_append_state(parser, + if (!PUSH(parser, parser->states, YAML_PARSE_BLOCK_MAPPING_KEY_STATE)) - return NULL; - return yaml_parser_parse_node(parser, 1, 1); + return 0; + return yaml_parser_parse_node(parser, event, 1, 1); } else { parser->state = YAML_PARSE_BLOCK_MAPPING_KEY_STATE; - return yaml_parser_process_empty_scalar(parser, mark); + return yaml_parser_process_empty_scalar(parser, event, mark); } } else { parser->state = YAML_PARSE_BLOCK_MAPPING_KEY_STATE; - return yaml_parser_process_empty_scalar(parser, token->start_mark); + return yaml_parser_process_empty_scalar(parser, event, token->start_mark); } } @@ -1073,64 +998,59 @@ yaml_parser_parse_block_mapping_value(yaml_parser_t *parser) * * */ -static yaml_event_t * -yaml_parser_parse_flow_sequence_entry(yaml_parser_t *parser, int first) +static int +yaml_parser_parse_flow_sequence_entry(yaml_parser_t *parser, + yaml_event_t *event, int first) { yaml_token_t *token; - yaml_event_t *event; if (first) { - token = yaml_parser_get_token(parser); - if (!yaml_parser_append_mark(parser, token->start_mark)) { - yaml_token_delete(token); - return NULL; - } - yaml_token_delete(token); + token = PEEK_TOKEN(parser); + if (!PUSH(parser, parser->marks, token->start_mark)) + return 0; + SKIP_TOKEN(parser); } - token = yaml_parser_peek_token(parser); - if (!token) return NULL; + token = PEEK_TOKEN(parser); + if (!token) return 0; if (token->type != YAML_FLOW_SEQUENCE_END_TOKEN) { if (!first) { if (token->type == YAML_FLOW_ENTRY_TOKEN) { - yaml_token_delete(yaml_parser_get_token(parser)); - token = yaml_parser_peek_token(parser); - if (!token) return NULL; + SKIP_TOKEN(parser); + token = PEEK_TOKEN(parser); + if (!token) return 0; } else { - yaml_parser_set_parser_error_context(parser, - "while parsing a flow sequence", parser->marks[parser->marks_length-1], + return yaml_parser_set_parser_error_context(parser, + "while parsing a flow sequence", POP(parser, parser->marks), "did not found expected ',' or ']'", token->start_mark); - return NULL; } } if (token->type == YAML_KEY_TOKEN) { - token = yaml_parser_get_token(parser); parser->state = YAML_PARSE_FLOW_SEQUENCE_ENTRY_MAPPING_KEY_STATE; - event = yaml_mapping_start_event_new(NULL, NULL, + MAPPING_START_EVENT_INIT(*event, NULL, NULL, 1, YAML_FLOW_MAPPING_STYLE, token->start_mark, token->end_mark); - yaml_token_delete(token); - return event; + SKIP_TOKEN(parser); + return 1; } else if (token->type != YAML_FLOW_SEQUENCE_END_TOKEN) { - if (!yaml_parser_append_state(parser, + if (!PUSH(parser, parser->states, YAML_PARSE_FLOW_SEQUENCE_ENTRY_STATE)) - return NULL; - return yaml_parser_parse_node(parser, 0, 0); + return 0; + return yaml_parser_parse_node(parser, event, 0, 0); } } - parser->state = parser->states[--parser->states_length]; - parser->marks_length --; - token = yaml_parser_get_token(parser); - event = yaml_sequence_end_event_new(token->start_mark, token->end_mark); - yaml_token_delete(token); - return event; + parser->state = POP(parser, parser->states); + POP(parser, parser->marks); + SEQUENCE_END_EVENT_INIT(*event, token->start_mark, token->end_mark); + SKIP_TOKEN(parser); + return 1; } /* @@ -1139,27 +1059,27 @@ yaml_parser_parse_flow_sequence_entry(yaml_parser_t *parser, int first) * *** * */ -static yaml_event_t * -yaml_parser_parse_flow_sequence_entry_mapping_key(yaml_parser_t *parser) +static int +yaml_parser_parse_flow_sequence_entry_mapping_key(yaml_parser_t *parser, + yaml_event_t *event) { yaml_token_t *token; - yaml_event_t *event; - token = yaml_parser_peek_token(parser); - if (!token) return NULL; + token = PEEK_TOKEN(parser); + if (!token) return 0; if (token->type != YAML_VALUE_TOKEN && token->type != YAML_FLOW_ENTRY_TOKEN && token->type != YAML_FLOW_SEQUENCE_END_TOKEN) { - if (!yaml_parser_append_state(parser, + if (!PUSH(parser, parser->states, YAML_PARSE_FLOW_SEQUENCE_ENTRY_MAPPING_VALUE_STATE)) - return NULL; - return yaml_parser_parse_node(parser, 0, 0); + return 0; + return yaml_parser_parse_node(parser, event, 0, 0); } else { + yaml_mark_t mark = token->end_mark; + SKIP_TOKEN(parser); parser->state = YAML_PARSE_FLOW_SEQUENCE_ENTRY_MAPPING_VALUE_STATE; - event = yaml_parser_process_empty_scalar(parser, token->end_mark); - yaml_token_delete(token); - return event; + return yaml_parser_process_empty_scalar(parser, event, mark); } } @@ -1169,28 +1089,29 @@ yaml_parser_parse_flow_sequence_entry_mapping_key(yaml_parser_t *parser) * ***** * */ -static yaml_event_t * -yaml_parser_parse_flow_sequence_entry_mapping_value(yaml_parser_t *parser) +static int +yaml_parser_parse_flow_sequence_entry_mapping_value(yaml_parser_t *parser, + yaml_event_t *event) { yaml_token_t *token; - token = yaml_parser_peek_token(parser); - if (!token) return NULL; + token = PEEK_TOKEN(parser); + if (!token) return 0; if (token->type == YAML_VALUE_TOKEN) { - yaml_token_delete(yaml_parser_get_token(parser)); - token = yaml_parser_peek_token(parser); - if (!token) return NULL; + SKIP_TOKEN(parser); + token = PEEK_TOKEN(parser); + if (!token) return 0; if (token->type != YAML_FLOW_ENTRY_TOKEN && token->type != YAML_FLOW_SEQUENCE_END_TOKEN) { - if (!yaml_parser_append_state(parser, + if (!PUSH(parser, parser->states, YAML_PARSE_FLOW_SEQUENCE_ENTRY_MAPPING_END_STATE)) - return NULL; - return yaml_parser_parse_node(parser, 0, 0); + return 0; + return yaml_parser_parse_node(parser, event, 0, 0); } } parser->state = YAML_PARSE_FLOW_SEQUENCE_ENTRY_MAPPING_END_STATE; - return yaml_parser_process_empty_scalar(parser, token->start_mark); + return yaml_parser_process_empty_scalar(parser, event, token->start_mark); } /* @@ -1199,17 +1120,19 @@ yaml_parser_parse_flow_sequence_entry_mapping_value(yaml_parser_t *parser) * * */ -static yaml_event_t * -yaml_parser_parse_flow_sequence_entry_mapping_end(yaml_parser_t *parser) +static int +yaml_parser_parse_flow_sequence_entry_mapping_end(yaml_parser_t *parser, + yaml_event_t *event) { yaml_token_t *token; - token = yaml_parser_peek_token(parser); - if (!token) return NULL; + token = PEEK_TOKEN(parser); + if (!token) return 0; parser->state = YAML_PARSE_FLOW_SEQUENCE_ENTRY_STATE; - return yaml_mapping_end_event_new(token->start_mark, token->start_mark); + MAPPING_END_EVENT_INIT(*event, token->start_mark, token->start_mark); + return 1; } /* @@ -1226,71 +1149,68 @@ yaml_parser_parse_flow_sequence_entry_mapping_end(yaml_parser_t *parser) * * *** * */ -static yaml_event_t * -yaml_parser_parse_flow_mapping_key(yaml_parser_t *parser, int first) +static int +yaml_parser_parse_flow_mapping_key(yaml_parser_t *parser, + yaml_event_t *event, int first) { yaml_token_t *token; - yaml_event_t *event; if (first) { - token = yaml_parser_get_token(parser); - if (!yaml_parser_append_mark(parser, token->start_mark)) { - yaml_token_delete(token); - return NULL; - } - yaml_token_delete(token); + token = PEEK_TOKEN(parser); + if (!PUSH(parser, parser->marks, token->start_mark)) + return 0; + SKIP_TOKEN(parser); } - token = yaml_parser_peek_token(parser); - if (!token) return; + token = PEEK_TOKEN(parser); + if (!token) return 0; if (token->type != YAML_FLOW_MAPPING_END_TOKEN) { if (!first) { if (token->type == YAML_FLOW_ENTRY_TOKEN) { - yaml_token_delete(yaml_parser_get_token(parser)); - token = yaml_parser_peek_token(parser); - if (!token) return; + SKIP_TOKEN(parser); + token = PEEK_TOKEN(parser); + if (!token) return 0; } else { - yaml_parser_set_parser_error_context(parser, - "while parsing a flow mapping", parser->marks[parser->marks_length-1], + return yaml_parser_set_parser_error_context(parser, + "while parsing a flow mapping", POP(parser, parser->marks), "did not found expected ',' or '}'", token->start_mark); - return NULL; } } if (token->type == YAML_KEY_TOKEN) { - yaml_token_delete(yaml_parser_get_token(parser)); - token = yaml_parser_peek_token(parser); - if (!token) return NULL; + SKIP_TOKEN(parser); + token = PEEK_TOKEN(parser); + if (!token) return 0; if (token->type != YAML_VALUE_TOKEN && token->type != YAML_FLOW_ENTRY_TOKEN && token->type != YAML_FLOW_MAPPING_END_TOKEN) { - if (!yaml_parser_append_state(parser, + if (!PUSH(parser, parser->states, YAML_PARSE_FLOW_MAPPING_VALUE_STATE)) - return NULL; - return yaml_parser_parse_node(parser, 0, 0); + return 0; + return yaml_parser_parse_node(parser, event, 0, 0); } else { parser->state = YAML_PARSE_FLOW_MAPPING_VALUE_STATE; - return yaml_parser_process_empty_scalar(parser, token->start_mark); + return yaml_parser_process_empty_scalar(parser, event, + token->start_mark); } } else if (token->type != YAML_FLOW_MAPPING_END_TOKEN) { - if (!yaml_parser_append_state(parser, + if (!PUSH(parser, parser->states, YAML_PARSE_FLOW_MAPPING_EMPTY_VALUE_STATE)) - return NULL; - return yaml_parser_parse_node(parser, 0, 0); + return 0; + return yaml_parser_parse_node(parser, event, 0, 0); } } - parser->state = parser->states[--parser->states_length]; - parser->marks_length --; - token = yaml_parser_get_token(parser); - event = yaml_mapping_end_event_new(token->start_mark, token->end_mark); - yaml_token_delete(token); - return event; + parser->state = POP(parser, parser->states); + POP(parser, parser->marks); + MAPPING_END_EVENT_INIT(*event, token->start_mark, token->end_mark); + SKIP_TOKEN(parser); + return 1; } /* @@ -1299,63 +1219,59 @@ yaml_parser_parse_flow_mapping_key(yaml_parser_t *parser, int first) * * ***** * */ -static yaml_event_t * -yaml_parser_parse_flow_mapping_value(yaml_parser_t *parser, int empty) +static int +yaml_parser_parse_flow_mapping_value(yaml_parser_t *parser, + yaml_event_t *event, int empty) { yaml_token_t *token; - yaml_event_t *event; - token = yaml_parser_peek_token(parser); - if (!token) return; + token = PEEK_TOKEN(parser); + if (!token) return 0; if (empty) { parser->state = YAML_PARSE_FLOW_MAPPING_KEY_STATE; - return yaml_parser_process_empty_scalar(parser, token->start_mark); + return yaml_parser_process_empty_scalar(parser, event, + token->start_mark); } if (token->type == YAML_VALUE_TOKEN) { - yaml_token_delete(yaml_parser_get_token(parser)); - token = yaml_parser_peek_token(parser); - if (!token) return NULL; + SKIP_TOKEN(parser); + token = PEEK_TOKEN(parser); + if (!token) return 0; if (token->type != YAML_FLOW_ENTRY_TOKEN && token->type != YAML_FLOW_MAPPING_END_TOKEN) { - if (!yaml_parser_append_state(parser, + if (!PUSH(parser, parser->states, YAML_PARSE_FLOW_MAPPING_KEY_STATE)) - return NULL; - return yaml_parser_parse_node(parser, 0, 0); + return 0; + return yaml_parser_parse_node(parser, event, 0, 0); } } parser->state = YAML_PARSE_FLOW_MAPPING_KEY_STATE; - return yaml_parser_process_empty_scalar(parser, token->start_mark); + return yaml_parser_process_empty_scalar(parser, event, token->start_mark); } /* * Generate an empty scalar event. */ -static yaml_event_t * -yaml_parser_process_empty_scalar(yaml_parser_t *parser, yaml_mark_t mark) +static int +yaml_parser_process_empty_scalar(yaml_parser_t *parser, yaml_event_t *event, + yaml_mark_t mark) { - yaml_event_t *event; yaml_char_t *value; value = yaml_malloc(1); if (!value) { parser->error = YAML_MEMORY_ERROR; - return NULL; + return 0; } value[0] = '\0'; - event = yaml_scalar_event_new(NULL, NULL, value, 0, + SCALAR_EVENT_INIT(*event, NULL, NULL, value, 0, 1, 0, YAML_PLAIN_SCALAR_STYLE, mark, mark); - if (!event) { - yaml_free(value); - parser->error = YAML_MEMORY_ERROR; - return NULL; - } - return event; + return 1; } /* @@ -1363,41 +1279,53 @@ yaml_parser_process_empty_scalar(yaml_parser_t *parser, yaml_mark_t mark) */ static int -yaml_parser_process_directives(yaml_parser_t *parser) +yaml_parser_process_directives(yaml_parser_t *parser, + yaml_version_directive_t **version_directive_ref, + yaml_tag_directive_t **tag_directives_start_ref, + yaml_tag_directive_t **tag_directives_end_ref) { yaml_tag_directive_t default_tag_directives[] = { {(yaml_char_t *)"!", (yaml_char_t *)"!"}, {(yaml_char_t *)"!!", (yaml_char_t *)"tag:yaml.org,2002:"}, {NULL, NULL} }; - yaml_tag_directive_t *ref; yaml_tag_directive_t *default_tag_directive; - yaml_tag_directive_t **tag_directive; + yaml_version_directive_t *version_directive = NULL; + struct { + yaml_tag_directive_t *start; + yaml_tag_directive_t *end; + yaml_tag_directive_t *top; + } tag_directives = { NULL, NULL, NULL }; yaml_token_t *token; - token = yaml_parser_peek_token(parser); - if (!token) return 0; + if (!STACK_INIT(parser, tag_directives, INITIAL_STACK_SIZE)) + goto error; + + token = PEEK_TOKEN(parser); + if (!token) goto error; while (token->type == YAML_VERSION_DIRECTIVE_TOKEN || token->type == YAML_TAG_DIRECTIVE_TOKEN) { if (token->type == YAML_VERSION_DIRECTIVE_TOKEN) { - if (parser->version_directive) { - return yaml_parser_set_parser_error(parser, + if (version_directive) { + yaml_parser_set_parser_error(parser, "found duplicate %YAML directive", token->start_mark); + goto error; } if (token->data.version_directive.major != 1 && token->data.version_directive.minor != 1) { - return yaml_parser_set_parser_error(parser, + yaml_parser_set_parser_error(parser, "found incompatible YAML document", token->start_mark); + goto error; } - parser->version_directive = yaml_malloc(sizeof(yaml_version_directive_t)); - if (!parser->version_directive) { + version_directive = yaml_malloc(sizeof(yaml_version_directive_t)); + if (!version_directive) { parser->error = YAML_MEMORY_ERROR; - return 0; + goto error; } - parser->version_directive->major = token->data.version_directive.major; - parser->version_directive->minor = token->data.version_directive.minor; + version_directive->major = token->data.version_directive.major; + version_directive->minor = token->data.version_directive.minor; } else if (token->type == YAML_TAG_DIRECTIVE_TOKEN) { @@ -1405,70 +1333,84 @@ yaml_parser_process_directives(yaml_parser_t *parser) token->data.tag_directive.handle, token->data.tag_directive.prefix }; - for (tag_directive = parser->tag_directives; - *tag_directive; tag_directive++) { - if (strcmp((char *)value.handle, - (char *)(*tag_directive)->handle) == 0) { - return yaml_parser_set_parser_error(parser, - "found duplicate %TAG directive", token->start_mark); - } - } - ref = yaml_malloc(sizeof(yaml_tag_directive_t)); - if (!ref) { - parser->error = YAML_MEMORY_ERROR; - return 0; - } - *ref = value; - if (!yaml_parser_append_tag_directive(parser, ref)) { - yaml_free(ref); - return 0; - } + if (!yaml_parser_append_tag_directive(parser, value, 0, + token->start_mark)) + goto error; + if (!PUSH(parser, tag_directives, value)) + goto error; } - yaml_free(yaml_parser_get_token(parser)); - token = yaml_parser_peek_token(parser); - if (!token) return 0; - } + SKIP_TOKEN(parser); + token = PEEK_TOKEN(parser); + if (!token) goto error; + } + for (default_tag_directive = default_tag_directives; default_tag_directive->handle; default_tag_directive++) { - int found = 0; - for (tag_directive = parser->tag_directives; - *tag_directive; tag_directive++) { - if (strcmp((char *)default_tag_directive->handle, - (char *)(*tag_directive)->handle) == 0) { - found = 1; - break; - } + if (!yaml_parser_append_tag_directive(parser, *default_tag_directive, 1, + token->start_mark)) + goto error; + } + + if (version_directive_ref) { + *version_directive_ref = version_directive; + } + if (tag_directives_start_ref) { + if (STACK_EMPTY(parser, tag_directives)) { + *tag_directives_start_ref = *tag_directives_end_ref = NULL; } - if (!found) { - ref = yaml_malloc(sizeof(yaml_tag_directive_t)); - if (!ref) { - parser->error = YAML_MEMORY_ERROR; - return 0; - } - ref->handle = yaml_malloc(strlen((char *)default_tag_directive->handle)+1); - if (!ref->handle) { - yaml_free(ref); - parser->error = YAML_MEMORY_ERROR; - return 0; - } - ref->prefix = yaml_malloc(strlen((char *)default_tag_directive->prefix)+1); - if (!ref->prefix) { - yaml_free(ref->handle); - yaml_free(ref); - parser->error = YAML_MEMORY_ERROR; - return 0; - } - memcpy(ref->handle, default_tag_directive->handle, - strlen((char *)default_tag_directive->handle)+1); - memcpy(ref->prefix, default_tag_directive->prefix, - strlen((char *)default_tag_directive->prefix)+1); - if (!yaml_parser_append_tag_directive(parser, ref)) { - yaml_free(ref); - return 0; - } + else { + *tag_directives_start_ref = tag_directives.start; + *tag_directives_end_ref = tag_directives.end; } } + return 1; + +error: + yaml_free(version_directive); + while (!STACK_EMPTY(parser, tag_directives)) { + yaml_tag_directive_t tag_directive = POP(parser, tag_directives); + yaml_free(tag_directive.handle); + yaml_free(tag_directive.prefix); + } + STACK_DEL(parser, tag_directives); + return 0; +} + +static int +yaml_parser_append_tag_directive(yaml_parser_t *parser, + yaml_tag_directive_t value, int allow_duplicates, yaml_mark_t mark) +{ + yaml_tag_directive_t *tag_directive; + yaml_tag_directive_t copy = { NULL, NULL }; + int length; + + for (tag_directive = parser->tag_directives.start; + tag_directive != parser->tag_directives.top; tag_directive ++) { + if (strcmp((char *)value.handle, (char *)tag_directive->handle) == 0) { + if (allow_duplicates) + return 1; + return yaml_parser_set_parser_error(parser, + "found duplicate %TAG directive", mark); + } + } + + copy.handle = (yaml_char_t *)yaml_strdup((char *)value.handle); + copy.prefix = (yaml_char_t *)yaml_strdup((char *)value.prefix); + if (!copy.handle || !copy.prefix) { + parser->error = YAML_MEMORY_ERROR; + goto error; + } + + if (!PUSH(parser, parser->tag_directives, copy)) + goto error; + + return 1; + +error: + yaml_free(copy.handle); + yaml_free(copy.prefix); + return 0; } diff --git a/src/reader.c b/src/reader.c index 9cc8e7bb..99a51db0 100644 --- a/src/reader.c +++ b/src/reader.c @@ -1,11 +1,22 @@ -#if HAVE_CONFIG_H -#include -#endif +#include "yaml_private.h" -#include +/* + * Declarations. + */ + +static int +yaml_parser_set_reader_error(yaml_parser_t *parser, const char *problem, + size_t offset, int value); -#include +static int +yaml_parser_update_raw_buffer(yaml_parser_t *parser); + +static int +yaml_parser_determine_encoding(yaml_parser_t *parser); + +YAML_DECLARE(int) +yaml_parser_update_buffer(yaml_parser_t *parser, size_t length); /* * Set the reader error and return 0. @@ -24,61 +35,25 @@ yaml_parser_set_reader_error(yaml_parser_t *parser, const char *problem, } /* - * Update the raw buffer. + * Byte order marks. */ -static int -yaml_parser_update_raw_buffer(yaml_parser_t *parser) -{ - size_t size_read = 0; - - /* Return if the raw buffer is full. */ - - if (parser->raw_unread == YAML_RAW_BUFFER_SIZE) return 1; - - /* Return on EOF. */ - - if (parser->eof) return 1; - - /* Move the remaining bytes in the raw buffer to the beginning. */ - - if (parser->raw_unread && parser->raw_buffer < parser->raw_pointer) { - memmove(parser->raw_buffer, parser->raw_pointer, parser->raw_unread); - } - parser->raw_pointer = parser->raw_buffer; - - /* Call the read handler to fill the buffer. */ - - if (!parser->read_handler(parser->read_handler_data, - parser->raw_buffer + parser->raw_unread, - YAML_RAW_BUFFER_SIZE - parser->raw_unread, - &size_read)) { - return yaml_parser_set_reader_error(parser, "Input error", - parser->offset, -1); - } - parser->raw_unread += size_read; - if (!size_read) { - parser->eof = 1; - } - - return 1; -} +#define BOM_UTF8 "\xef\xbb\xbf" +#define BOM_UTF16LE "\xff\xfe" +#define BOM_UTF16BE "\xfe\xff" /* * Determine the input stream encoding by checking the BOM symbol. If no BOM is * found, the UTF-8 encoding is assumed. Return 1 on success, 0 on failure. */ -#define BOM_UTF8 "\xef\xbb\xbf" -#define BOM_UTF16LE "\xff\xfe" -#define BOM_UTF16BE "\xfe\xff" - static int yaml_parser_determine_encoding(yaml_parser_t *parser) { /* Ensure that we had enough bytes in the raw buffer. */ - while (!parser->eof && parser->raw_unread < 3) { + while (!parser->eof + && parser->raw_buffer.last - parser->raw_buffer.pointer < 3) { if (!yaml_parser_update_raw_buffer(parser)) { return 0; } @@ -86,25 +61,22 @@ yaml_parser_determine_encoding(yaml_parser_t *parser) /* Determine the encoding. */ - if (parser->raw_unread >= 2 - && !memcmp(parser->raw_pointer, BOM_UTF16LE, 2)) { + if (parser->raw_buffer.last - parser->raw_buffer.pointer >= 2 + && !memcmp(parser->raw_buffer.pointer, BOM_UTF16LE, 2)) { parser->encoding = YAML_UTF16LE_ENCODING; - parser->raw_pointer += 2; - parser->raw_unread -= 2; + parser->raw_buffer.pointer += 2; parser->offset += 2; } - else if (parser->raw_unread >= 2 - && !memcmp(parser->raw_pointer, BOM_UTF16BE, 2)) { + else if (parser->raw_buffer.last - parser->raw_buffer.pointer >= 2 + && !memcmp(parser->raw_buffer.pointer, BOM_UTF16BE, 2)) { parser->encoding = YAML_UTF16BE_ENCODING; - parser->raw_pointer += 2; - parser->raw_unread -= 2; + parser->raw_buffer.pointer += 2; parser->offset += 2; } - else if (parser->raw_unread >= 3 - && !memcmp(parser->raw_pointer, BOM_UTF8, 3)) { + else if (parser->raw_buffer.last - parser->raw_buffer.pointer >= 3 + && !memcmp(parser->raw_buffer.pointer, BOM_UTF8, 3)) { parser->encoding = YAML_UTF8_ENCODING; - parser->raw_pointer += 3; - parser->raw_unread -= 3; + parser->raw_buffer.pointer += 3; parser->offset += 3; } else { @@ -115,7 +87,52 @@ yaml_parser_determine_encoding(yaml_parser_t *parser) } /* - * Ensure that the buffer contains at least length characters. + * Update the raw buffer. + */ + +static int +yaml_parser_update_raw_buffer(yaml_parser_t *parser) +{ + size_t size_read = 0; + + /* Return if the raw buffer is full. */ + + if (parser->raw_buffer.start == parser->raw_buffer.pointer + && parser->raw_buffer.last == parser->raw_buffer.end) + return 1; + + /* Return on EOF. */ + + if (parser->eof) return 1; + + /* Move the remaining bytes in the raw buffer to the beginning. */ + + if (parser->raw_buffer.start < parser->raw_buffer.pointer + && parser->raw_buffer.pointer < parser->raw_buffer.last) { + memmove(parser->raw_buffer.start, parser->raw_buffer.pointer, + parser->raw_buffer.last - parser->raw_buffer.pointer); + } + parser->raw_buffer.last -= + parser->raw_buffer.pointer - parser->raw_buffer.start; + parser->raw_buffer.pointer = parser->raw_buffer.start; + + /* Call the read handler to fill the buffer. */ + + if (!parser->read_handler(parser->read_handler_data, parser->raw_buffer.last, + parser->raw_buffer.end - parser->raw_buffer.last, &size_read)) { + return yaml_parser_set_reader_error(parser, "Input error", + parser->offset, -1); + } + parser->raw_buffer.last += size_read; + if (!size_read) { + parser->eof = 1; + } + + return 1; +} + +/* + * Ensure that the buffer contains at least `length` characters. * Return 1 on success, 0 on failure. * * The length is supposed to be significantly less that the buffer size. @@ -124,9 +141,11 @@ yaml_parser_determine_encoding(yaml_parser_t *parser) YAML_DECLARE(int) yaml_parser_update_buffer(yaml_parser_t *parser, size_t length) { + assert(parser->read_handler); /* Read handler must be set. */ + /* If the EOF flag is set and the raw buffer is empty, do nothing. */ - if (parser->eof && !parser->raw_unread) + if (parser->eof && parser->raw_buffer.pointer == parser->raw_buffer.last) return 1; /* Return if the buffer contains enough characters. */ @@ -143,16 +162,16 @@ yaml_parser_update_buffer(yaml_parser_t *parser, size_t length) /* Move the unread characters to the beginning of the buffer. */ - if (parser->buffer < parser->pointer - && parser->pointer < parser->buffer_end) { - size_t size = parser->buffer_end - parser->pointer; - memmove(parser->buffer, parser->pointer, size); - parser->pointer = parser->buffer; - parser->buffer_end = parser->buffer + size; + if (parser->buffer.start < parser->buffer.pointer + && parser->buffer.pointer < parser->buffer.last) { + size_t size = parser->buffer.last - parser->buffer.pointer; + memmove(parser->buffer.start, parser->buffer.pointer, size); + parser->buffer.pointer = parser->buffer.start; + parser->buffer.last = parser->buffer.start + size; } - else if (parser->pointer == parser->buffer_end) { - parser->pointer = parser->buffer; - parser->buffer_end = parser->buffer; + else if (parser->buffer.pointer == parser->buffer.last) { + parser->buffer.pointer = parser->buffer.start; + parser->buffer.last = parser->buffer.start; } /* Fill the buffer until it has enough characters. */ @@ -165,13 +184,14 @@ yaml_parser_update_buffer(yaml_parser_t *parser, size_t length) /* Decode the raw buffer. */ - while (parser->raw_unread) + while (parser->raw_buffer.pointer != parser->raw_buffer.last) { unsigned int value, value2; int incomplete = 0; unsigned char octet; unsigned int width; int k, low, high; + int raw_unread = parser->raw_buffer.last - parser->raw_buffer.pointer; /* Decode the next character. */ @@ -201,7 +221,7 @@ yaml_parser_update_buffer(yaml_parser_t *parser, size_t length) /* Determine the length of the UTF-8 sequence. */ - octet = parser->raw_pointer[0]; + octet = parser->raw_buffer.pointer[0]; width = (octet & 0x80) == 0x00 ? 1 : (octet & 0xE0) == 0xC0 ? 2 : (octet & 0xF0) == 0xE0 ? 3 : @@ -216,7 +236,7 @@ yaml_parser_update_buffer(yaml_parser_t *parser, size_t length) /* Check if the raw buffer contains an incomplete character. */ - if (width > parser->raw_unread) { + if (width > raw_unread) { if (parser->eof) { return yaml_parser_set_reader_error(parser, "Incomplete UTF-8 octet sequence", @@ -237,7 +257,7 @@ yaml_parser_update_buffer(yaml_parser_t *parser, size_t length) for (k = 1; k < width; k ++) { - octet = parser->raw_pointer[k]; + octet = parser->raw_buffer.pointer[k]; /* Check if the octet is valid. */ @@ -304,7 +324,7 @@ yaml_parser_update_buffer(yaml_parser_t *parser, size_t length) /* Check for incomplete UTF-16 character. */ - if (parser->raw_unread < 2) { + if (raw_unread < 2) { if (parser->eof) { return yaml_parser_set_reader_error(parser, "Incomplete UTF-16 character", @@ -316,8 +336,8 @@ yaml_parser_update_buffer(yaml_parser_t *parser, size_t length) /* Get the character. */ - value = parser->raw_pointer[low] - + (parser->raw_pointer[high] << 8); + value = parser->raw_buffer.pointer[low] + + (parser->raw_buffer.pointer[high] << 8); /* Check for unexpected low surrogate area. */ @@ -334,7 +354,7 @@ yaml_parser_update_buffer(yaml_parser_t *parser, size_t length) /* Check for incomplete surrogate pair. */ - if (parser->raw_unread < 4) { + if (raw_unread < 4) { if (parser->eof) { return yaml_parser_set_reader_error(parser, "Incomplete UTF-16 surrogate pair", @@ -346,8 +366,8 @@ yaml_parser_update_buffer(yaml_parser_t *parser, size_t length) /* Get the next character. */ - unsigned int value2 = parser->raw_pointer[low+2] - + (parser->raw_pointer[high+2] << 8); + unsigned int value2 = parser->raw_buffer.pointer[low+2] + + (parser->raw_buffer.pointer[high+2] << 8); /* Check for a low surrogate area. */ @@ -390,33 +410,32 @@ yaml_parser_update_buffer(yaml_parser_t *parser, size_t length) /* Move the raw pointers. */ - parser->raw_pointer += width; - parser->raw_unread -= width; + parser->raw_buffer.pointer += width; parser->offset += width; /* Finally put the character into the buffer. */ /* 0000 0000-0000 007F -> 0xxxxxxx */ if (value <= 0x7F) { - *(parser->buffer_end++) = value; + *(parser->buffer.last++) = value; } /* 0000 0080-0000 07FF -> 110xxxxx 10xxxxxx */ else if (value <= 0x7FF) { - *(parser->buffer_end++) = 0xC0 + (value >> 6); - *(parser->buffer_end++) = 0x80 + (value & 0x3F); + *(parser->buffer.last++) = 0xC0 + (value >> 6); + *(parser->buffer.last++) = 0x80 + (value & 0x3F); } /* 0000 0800-0000 FFFF -> 1110xxxx 10xxxxxx 10xxxxxx */ else if (value <= 0xFFFF) { - *(parser->buffer_end++) = 0xE0 + (value >> 12); - *(parser->buffer_end++) = 0x80 + ((value >> 6) & 0x3F); - *(parser->buffer_end++) = 0x80 + (value & 0x3F); + *(parser->buffer.last++) = 0xE0 + (value >> 12); + *(parser->buffer.last++) = 0x80 + ((value >> 6) & 0x3F); + *(parser->buffer.last++) = 0x80 + (value & 0x3F); } /* 0001 0000-0010 FFFF -> 11110xxx 10xxxxxx 10xxxxxx 10xxxxxx */ else { - *(parser->buffer_end++) = 0xF0 + (value >> 18); - *(parser->buffer_end++) = 0x80 + ((value >> 12) & 0x3F); - *(parser->buffer_end++) = 0x80 + ((value >> 6) & 0x3F); - *(parser->buffer_end++) = 0x80 + (value & 0x3F); + *(parser->buffer.last++) = 0xF0 + (value >> 18); + *(parser->buffer.last++) = 0x80 + ((value >> 12) & 0x3F); + *(parser->buffer.last++) = 0x80 + ((value >> 6) & 0x3F); + *(parser->buffer.last++) = 0x80 + (value & 0x3F); } parser->unread ++; @@ -425,7 +444,7 @@ yaml_parser_update_buffer(yaml_parser_t *parser, size_t length) /* On EOF, put NUL into the buffer and return. */ if (parser->eof) { - *(parser->buffer_end++) = '\0'; + *(parser->buffer.last++) = '\0'; parser->unread ++; return 1; } diff --git a/src/scanner.c b/src/scanner.c index 7500f425..90a8979f 100644 --- a/src/scanner.c +++ b/src/scanner.c @@ -475,30 +475,24 @@ * BLOCK-END */ -#if HAVE_CONFIG_H -#include -#endif - -#include - -#include +#include "yaml_private.h" /* * Ensure that the buffer contains the required number of characters. * Return 1 on success, 0 on failure (reader error or memory error). */ -#define UPDATE(parser,length) \ - (parser->unread >= (length) \ - ? 1 \ +#define CACHE(parser,length) \ + (parser->unread >= (length) \ + ? 1 \ : yaml_parser_update_buffer(parser, (length))) /* * Check the octet at the specified position. */ -#define CHECK_AT(parser,octet,offset) \ - (parser->pointer[offset] == (yaml_char_t)(octet)) +#define CHECK_AT(parser,octet,offset) \ + (parser->buffer.pointer[offset] == (yaml_char_t)(octet)) /* * Check the current octet in the buffer. @@ -511,15 +505,15 @@ * character, a digit, '_', or '-'. */ -#define IS_ALPHA_AT(parser,offset) \ - ((parser->pointer[offset] >= (yaml_char_t) '0' && \ - parser->pointer[offset] <= (yaml_char_t) '9') || \ - (parser->pointer[offset] >= (yaml_char_t) 'A' && \ - parser->pointer[offset] <= (yaml_char_t) 'Z') || \ - (parser->pointer[offset] >= (yaml_char_t) 'a' && \ - parser->pointer[offset] <= (yaml_char_t) 'z') || \ - parser->pointer[offset] == '_' || \ - parser->pointer[offset] == '-') +#define IS_ALPHA_AT(parser,offset) \ + ((parser->buffer.pointer[offset] >= (yaml_char_t) '0' && \ + parser->buffer.pointer[offset] <= (yaml_char_t) '9') || \ + (parser->buffer.pointer[offset] >= (yaml_char_t) 'A' && \ + parser->buffer.pointer[offset] <= (yaml_char_t) 'Z') || \ + (parser->buffer.pointer[offset] >= (yaml_char_t) 'a' && \ + parser->buffer.pointer[offset] <= (yaml_char_t) 'z') || \ + parser->buffer.pointer[offset] == '_' || \ + parser->buffer.pointer[offset] == '-') #define IS_ALPHA(parser) IS_ALPHA_AT(parser,0) @@ -527,9 +521,9 @@ * Check if the character at the specified position is a digit. */ -#define IS_DIGIT_AT(parser,offset) \ - ((parser->pointer[offset] >= (yaml_char_t) '0' && \ - parser->pointer[offset] <= (yaml_char_t) '9')) +#define IS_DIGIT_AT(parser,offset) \ + ((parser->buffer.pointer[offset] >= (yaml_char_t) '0' && \ + parser->buffer.pointer[offset] <= (yaml_char_t) '9')) #define IS_DIGIT(parser) IS_DIGIT_AT(parser,0) @@ -537,8 +531,8 @@ * Get the value of a digit. */ -#define AS_DIGIT_AT(parser,offset) \ - (parser->pointer[offset] - (yaml_char_t) '0') +#define AS_DIGIT_AT(parser,offset) \ + (parser->buffer.pointer[offset] - (yaml_char_t) '0') #define AS_DIGIT(parser) AS_DIGIT_AT(parser,0) @@ -546,13 +540,13 @@ * Check if the character at the specified position is a hex-digit. */ -#define IS_HEX_AT(parser,offset) \ - ((parser->pointer[offset] >= (yaml_char_t) '0' && \ - parser->pointer[offset] <= (yaml_char_t) '9') || \ - (parser->pointer[offset] >= (yaml_char_t) 'A' && \ - parser->pointer[offset] <= (yaml_char_t) 'F') || \ - (parser->pointer[offset] >= (yaml_char_t) 'a' && \ - parser->pointer[offset] <= (yaml_char_t) 'f')) +#define IS_HEX_AT(parser,offset) \ + ((parser->buffer.pointer[offset] >= (yaml_char_t) '0' && \ + parser->buffer.pointer[offset] <= (yaml_char_t) '9') || \ + (parser->buffer.pointer[offset] >= (yaml_char_t) 'A' && \ + parser->buffer.pointer[offset] <= (yaml_char_t) 'F') || \ + (parser->buffer.pointer[offset] >= (yaml_char_t) 'a' && \ + parser->buffer.pointer[offset] <= (yaml_char_t) 'f')) #define IS_HEX(parser) IS_HEX_AT(parser,0) @@ -560,14 +554,14 @@ * Get the value of a hex-digit. */ -#define AS_HEX_AT(parser,offset) \ - ((parser->pointer[offset] >= (yaml_char_t) 'A' && \ - parser->pointer[offset] <= (yaml_char_t) 'F') ? \ - (parser->pointer[offset] - (yaml_char_t) 'A' + 10) : \ - (parser->pointer[offset] >= (yaml_char_t) 'a' && \ - parser->pointer[offset] <= (yaml_char_t) 'f') ? \ - (parser->pointer[offset] - (yaml_char_t) 'a' + 10) : \ - (parser->pointer[offset] - (yaml_char_t) '0')) +#define AS_HEX_AT(parser,offset) \ + ((parser->buffer.pointer[offset] >= (yaml_char_t) 'A' && \ + parser->buffer.pointer[offset] <= (yaml_char_t) 'F') ? \ + (parser->buffer.pointer[offset] - (yaml_char_t) 'A' + 10) : \ + (parser->buffer.pointer[offset] >= (yaml_char_t) 'a' && \ + parser->buffer.pointer[offset] <= (yaml_char_t) 'f') ? \ + (parser->buffer.pointer[offset] - (yaml_char_t) 'a' + 10) : \ + (parser->buffer.pointer[offset] - (yaml_char_t) '0')) #define AS_HEX(parser) AS_HEX_AT(parser,0) @@ -583,9 +577,9 @@ * Check if the character at the specified position is BOM. */ -#define IS_BOM_AT(parser,offset) \ - (CHECK_AT(parser,'\xEF',(offset)) \ - && CHECK_AT(parser,'\xBB',(offset)+1) \ +#define IS_BOM_AT(parser,offset) \ + (CHECK_AT(parser,'\xEF',(offset)) \ + && CHECK_AT(parser,'\xBB',(offset)+1) \ && CHECK_AT(parser,'\xBF',(offset)+1)) /* BOM (#xFEFF) */ #define IS_BOM(parser) IS_BOM_AT(parser,0) @@ -619,21 +613,21 @@ * Check if the character at the specified position is a line break. */ -#define IS_BREAK_AT(parser,offset) \ - (CHECK_AT(parser,'\r',(offset)) /* CR (#xD)*/ \ - || CHECK_AT(parser,'\n',(offset)) /* LF (#xA) */ \ - || (CHECK_AT(parser,'\xC2',(offset)) \ - && CHECK_AT(parser,'\x85',(offset)+1)) /* NEL (#x85) */ \ - || (CHECK_AT(parser,'\xE2',(offset)) \ - && CHECK_AT(parser,'\x80',(offset)+1) \ - && CHECK_AT(parser,'\xA8',(offset)+2)) /* LS (#x2028) */ \ - || (CHECK_AT(parser,'\xE2',(offset)) \ - && CHECK_AT(parser,'\x80',(offset)+1) \ +#define IS_BREAK_AT(parser,offset) \ + (CHECK_AT(parser,'\r',(offset)) /* CR (#xD)*/ \ + || CHECK_AT(parser,'\n',(offset)) /* LF (#xA) */ \ + || (CHECK_AT(parser,'\xC2',(offset)) \ + && CHECK_AT(parser,'\x85',(offset)+1)) /* NEL (#x85) */ \ + || (CHECK_AT(parser,'\xE2',(offset)) \ + && CHECK_AT(parser,'\x80',(offset)+1) \ + && CHECK_AT(parser,'\xA8',(offset)+2)) /* LS (#x2028) */ \ + || (CHECK_AT(parser,'\xE2',(offset)) \ + && CHECK_AT(parser,'\x80',(offset)+1) \ && CHECK_AT(parser,'\xA9',(offset)+2))) /* PS (#x2029) */ #define IS_BREAK(parser) IS_BREAK_AT(parser,0) -#define IS_CRLF_AT(parser,offset) \ +#define IS_CRLF_AT(parser,offset) \ (CHECK_AT(parser,'\r',(offset)) && CHECK_AT(parser,'\n',(offset)+1)) #define IS_CRLF(parser) IS_CRLF_AT(parser,0) @@ -642,7 +636,7 @@ * Check if the character is a line break or NUL. */ -#define IS_BREAKZ_AT(parser,offset) \ +#define IS_BREAKZ_AT(parser,offset) \ (IS_BREAK_AT(parser,(offset)) || IS_Z_AT(parser,(offset))) #define IS_BREAKZ(parser) IS_BREAKZ_AT(parser,0) @@ -651,7 +645,7 @@ * Check if the character is a line break, space, or NUL. */ -#define IS_SPACEZ_AT(parser,offset) \ +#define IS_SPACEZ_AT(parser,offset) \ (IS_SPACE_AT(parser,(offset)) || IS_BREAKZ_AT(parser,(offset))) #define IS_SPACEZ(parser) IS_SPACEZ_AT(parser,0) @@ -660,7 +654,7 @@ * Check if the character is a line break, space, tab, or NUL. */ -#define IS_BLANKZ_AT(parser,offset) \ +#define IS_BLANKZ_AT(parser,offset) \ (IS_BLANK_AT(parser,(offset)) || IS_BREAKZ_AT(parser,(offset))) #define IS_BLANKZ(parser) IS_BLANKZ_AT(parser,0) @@ -669,11 +663,11 @@ * Determine the width of the character. */ -#define WIDTH_AT(parser,offset) \ - ((parser->pointer[(offset)] & 0x80) == 0x00 ? 1 : \ - (parser->pointer[(offset)] & 0xE0) == 0xC0 ? 2 : \ - (parser->pointer[(offset)] & 0xF0) == 0xE0 ? 3 : \ - (parser->pointer[(offset)] & 0xF8) == 0xF0 ? 4 : 0) +#define WIDTH_AT(parser,offset) \ + ((parser->buffer.pointer[offset] & 0x80) == 0x00 ? 1 : \ + (parser->buffer.pointer[offset] & 0xE0) == 0xC0 ? 2 : \ + (parser->buffer.pointer[offset] & 0xF0) == 0xE0 ? 3 : \ + (parser->buffer.pointer[offset] & 0xF8) == 0xF0 ? 4 : 0) #define WIDTH(parser) WIDTH_AT(parser,0) @@ -681,156 +675,157 @@ * Advance the buffer pointer. */ -#define FORWARD(parser) \ - (parser->index ++, \ - parser->column ++, \ - parser->unread --, \ - parser->pointer += WIDTH(parser)) - -#define FORWARD_LINE(parser) \ - (IS_CRLF(parser) ? \ - (parser->index += 2, \ - parser->column = 0, \ - parser->line ++, \ - parser->unread -= 2, \ - parser->pointer += 2) : \ - IS_BREAK(parser) ? \ - (parser->index ++, \ - parser->column = 0, \ - parser->line ++, \ - parser->unread --, \ - parser->pointer += WIDTH(parser)) : 0) - -/* - * Resize a string if needed. - */ +#define SKIP(parser) \ + (parser->mark.index ++, \ + parser->mark.column ++, \ + parser->unread --, \ + parser->buffer.pointer += WIDTH(parser)) -#define RESIZE(parser,string) \ - ((string).pointer-(string).buffer+5 < (string).size ? 1 : \ - yaml_parser_resize_string(parser, &(string))) +#define SKIP_LINE(parser) \ + (IS_CRLF(parser) ? \ + (parser->mark.index += 2, \ + parser->mark.column = 0, \ + parser->mark.line ++, \ + parser->unread -= 2, \ + parser->buffer.pointer += 2) : \ + IS_BREAK(parser) ? \ + (parser->mark.index ++, \ + parser->mark.column = 0, \ + parser->mark.line ++, \ + parser->unread --, \ + parser->buffer.pointer += WIDTH(parser)) : 0) /* * Copy a character to a string buffer and advance pointers. */ -#define COPY(parser,string) \ - (((*parser->pointer & 0x80) == 0x00 ? \ - (*((string).pointer++) = *(parser->pointer++)) : \ - (*parser->pointer & 0xE0) == 0xC0 ? \ - (*((string).pointer++) = *(parser->pointer++), \ - *((string).pointer++) = *(parser->pointer++)) : \ - (*parser->pointer & 0xF0) == 0xE0 ? \ - (*((string).pointer++) = *(parser->pointer++), \ - *((string).pointer++) = *(parser->pointer++), \ - *((string).pointer++) = *(parser->pointer++)) : \ - (*parser->pointer & 0xF8) == 0xF0 ? \ - (*((string).pointer++) = *(parser->pointer++), \ - *((string).pointer++) = *(parser->pointer++), \ - *((string).pointer++) = *(parser->pointer++), \ - *((string).pointer++) = *(parser->pointer++)) : 0), \ - parser->index ++, \ - parser->column ++, \ - parser->unread --) +#define READ(parser,string) \ + (STRING_EXTEND(parser,string) ? \ + (((*parser->buffer.pointer & 0x80) == 0x00 ? \ + (*((string).pointer++) = *(parser->buffer.pointer++)) : \ + (*parser->buffer.pointer & 0xE0) == 0xC0 ? \ + (*((string).pointer++) = *(parser->buffer.pointer++), \ + *((string).pointer++) = *(parser->buffer.pointer++)) : \ + (*parser->buffer.pointer & 0xF0) == 0xE0 ? \ + (*((string).pointer++) = *(parser->buffer.pointer++), \ + *((string).pointer++) = *(parser->buffer.pointer++), \ + *((string).pointer++) = *(parser->buffer.pointer++)) : \ + (*parser->buffer.pointer & 0xF8) == 0xF0 ? \ + (*((string).pointer++) = *(parser->buffer.pointer++), \ + *((string).pointer++) = *(parser->buffer.pointer++), \ + *((string).pointer++) = *(parser->buffer.pointer++), \ + *((string).pointer++) = *(parser->buffer.pointer++)) : 0), \ + parser->mark.index ++, \ + parser->mark.column ++, \ + parser->unread --, \ + 1) : 0) /* * Copy a line break character to a string buffer and advance pointers. */ -#define COPY_LINE(parser,string) \ - ((CHECK_AT(parser,'\r',0) && CHECK_AT(parser,'\n',1)) ? /* CR LF -> LF */ \ +#define READ_LINE(parser,string) \ + (STRING_EXTEND(parser,string) ? \ + (((CHECK_AT(parser,'\r',0) && CHECK_AT(parser,'\n',1)) ? /* CR LF -> LF */ \ (*((string).pointer++) = (yaml_char_t) '\n', \ - parser->pointer += 2, \ - parser->index += 2, \ - parser->column = 0, \ - parser->line ++, \ + parser->buffer.pointer += 2, \ + parser->mark.index += 2, \ + parser->mark.column = 0, \ + parser->mark.line ++, \ parser->unread -= 2) : \ (CHECK_AT(parser,'\r',0) || CHECK_AT(parser,'\n',0)) ? /* CR|LF -> LF */ \ (*((string).pointer++) = (yaml_char_t) '\n', \ - parser->pointer ++, \ - parser->index ++, \ - parser->column = 0, \ - parser->line ++, \ + parser->buffer.pointer ++, \ + parser->mark.index ++, \ + parser->mark.column = 0, \ + parser->mark.line ++, \ parser->unread --) : \ (CHECK_AT(parser,'\xC2',0) && CHECK_AT(parser,'\x85',1)) ? /* NEL -> LF */ \ (*((string).pointer++) = (yaml_char_t) '\n', \ - parser->pointer += 2, \ - parser->index ++, \ - parser->column = 0, \ - parser->line ++, \ + parser->buffer.pointer += 2, \ + parser->mark.index ++, \ + parser->mark.column = 0, \ + parser->mark.line ++, \ parser->unread --) : \ (CHECK_AT(parser,'\xE2',0) && \ CHECK_AT(parser,'\x80',1) && \ (CHECK_AT(parser,'\xA8',2) || \ CHECK_AT(parser,'\xA9',2))) ? /* LS|PS -> LS|PS */ \ - (*((string).pointer++) = *(parser->pointer++), \ - *((string).pointer++) = *(parser->pointer++), \ - *((string).pointer++) = *(parser->pointer++), \ - parser->index ++, \ - parser->column = 0, \ - parser->line ++, \ - parser->unread --) : 0) + (*((string).pointer++) = *(parser->buffer.pointer++), \ + *((string).pointer++) = *(parser->buffer.pointer++), \ + *((string).pointer++) = *(parser->buffer.pointer++), \ + parser->mark.index ++, \ + parser->mark.column = 0, \ + parser->mark.line ++, \ + parser->unread --) : 0), \ + 1) : 0) /* - * Append a string to another string and clear the former string. + * Token initializers. */ -#define JOIN(parser,head_string,tail_string) \ - (yaml_parser_join_string(parser, &(head_string), &(tail_string)) && \ - yaml_parser_clear_string(parser, &(tail_string))) +#define TOKEN_INIT(token,token_type,token_start_mark,token_end_mark) \ + (memset(&(token), 0, sizeof(yaml_token_t)), \ + (token).type = (token_type), \ + (token).start_mark = (token_start_mark), \ + (token).end_mark = (token_end_mark)) -/* - * Public API declarations. - */ +#define STREAM_START_TOKEN_INIT(token,token_encoding,start_mark,end_mark) \ + (TOKEN_INIT((token),YAML_STREAM_START_TOKEN,(start_mark),(end_mark)), \ + (token).data.stream_start.encoding = (token_encoding)) -YAML_DECLARE(yaml_token_t *) -yaml_parser_get_token(yaml_parser_t *parser); +#define STREAM_END_TOKEN_INIT(token,start_mark,end_mark) \ + (TOKEN_INIT((token),YAML_STREAM_END_TOKEN,(start_mark),(end_mark))) -YAML_DECLARE(yaml_token_t *) -yaml_parser_peek_token(yaml_parser_t *parser); - -/* - * Error handling. - */ +#define ALIAS_TOKEN_INIT(token,token_value,start_mark,end_mark) \ + (TOKEN_INIT((token),YAML_ALIAS_TOKEN,(start_mark),(end_mark)), \ + (token).data.alias.value = (token_value)) -static int -yaml_parser_set_scanner_error(yaml_parser_t *parser, const char *context, - yaml_mark_t context_mark, const char *problem); +#define ANCHOR_TOKEN_INIT(token,token_value,start_mark,end_mark) \ + (TOKEN_INIT((token),YAML_ANCHOR_TOKEN,(start_mark),(end_mark)), \ + (token).data.anchor.value = (token_value)) -static yaml_mark_t -yaml_parser_get_mark(yaml_parser_t *parser); +#define TAG_TOKEN_INIT(token,token_handle,token_suffix,start_mark,end_mark) \ + (TOKEN_INIT((token),YAML_TAG_TOKEN,(start_mark),(end_mark)), \ + (token).data.tag.handle = (token_handle), \ + (token).data.tag.suffix = (token_suffix)) -/* - * Buffers and lists. - */ +#define SCALAR_TOKEN_INIT(token,token_value,token_length,token_style,start_mark,end_mark) \ + (TOKEN_INIT((token),YAML_SCALAR_TOKEN,(start_mark),(end_mark)), \ + (token).data.scalar.value = (token_value), \ + (token).data.scalar.length = (token_length), \ + (token).data.scalar.style = (token_style)) -typedef struct { - yaml_char_t *buffer; - yaml_char_t *pointer; - size_t size; -} yaml_string_t; +#define VERSION_DIRECTIVE_TOKEN_INIT(token,token_major,token_minor,start_mark,end_mark) \ + (TOKEN_INIT((token),YAML_VERSION_DIRECTIVE_TOKEN,(start_mark),(end_mark)), \ + (token).data.version_directive.major = (token_major), \ + (token).data.version_directive.minor = (token_minor)) -static yaml_string_t -yaml_parser_new_string(yaml_parser_t *parser); +#define TAG_DIRECTIVE_TOKEN_INIT(token,token_handle,token_prefix,start_mark,end_mark) \ + (TOKEN_INIT((token),YAML_TAG_DIRECTIVE_TOKEN,(start_mark),(end_mark)), \ + (token).data.tag_directive.handle = (token_handle), \ + (token).data.tag_directive.prefix = (token_prefix)) -static int -yaml_parser_resize_string(yaml_parser_t *parser, yaml_string_t *string); +/* + * Public API declarations. + */ -static int -yaml_parser_join_string(yaml_parser_t *parser, - yaml_string_t *string1, yaml_string_t *string2); +YAML_DECLARE(int) +yaml_parser_scan(yaml_parser_t *parser, yaml_token_t *token); -static int -yaml_parser_clear_string(yaml_parser_t *parser, yaml_string_t *string); +/* + * Error handling. + */ static int -yaml_parser_resize_list(yaml_parser_t *parser, void **buffer, size_t *size, - size_t item_size); +yaml_parser_set_scanner_error(yaml_parser_t *parser, const char *context, + yaml_mark_t context_mark, const char *problem); /* * High-level token API. */ -static int +YAML_DECLARE(int) yaml_parser_fetch_more_tokens(yaml_parser_t *parser); static int @@ -855,17 +850,6 @@ yaml_parser_increase_flow_level(yaml_parser_t *parser); static int yaml_parser_decrease_flow_level(yaml_parser_t *parser); -/* - * Token manipulation. - */ - -static int -yaml_parser_append_token(yaml_parser_t *parser, yaml_token_t *token); - -static int -yaml_parser_insert_token(yaml_parser_t *parser, - int number, yaml_token_t *token); - /* * Indentation treatment. */ @@ -936,8 +920,8 @@ yaml_parser_fetch_plain_scalar(yaml_parser_t *parser); static int yaml_parser_scan_to_next_token(yaml_parser_t *parser); -static yaml_token_t * -yaml_parser_scan_directive(yaml_parser_t *parser); +static int +yaml_parser_scan_directive(yaml_parser_t *parser, yaml_token_t *token); static int yaml_parser_scan_directive_name(yaml_parser_t *parser, @@ -955,12 +939,12 @@ static int yaml_parser_scan_tag_directive_value(yaml_parser_t *parser, yaml_mark_t mark, yaml_char_t **handle, yaml_char_t **prefix); -static yaml_token_t * -yaml_parser_scan_anchor(yaml_parser_t *parser, +static int +yaml_parser_scan_anchor(yaml_parser_t *parser, yaml_token_t *token, yaml_token_type_t type); -static yaml_token_t * -yaml_parser_scan_tag(yaml_parser_t *parser); +static int +yaml_parser_scan_tag(yaml_parser_t *parser, yaml_token_t *token); static int yaml_parser_scan_tag_handle(yaml_parser_t *parser, int directive, @@ -974,178 +958,57 @@ static int yaml_parser_scan_uri_escapes(yaml_parser_t *parser, int directive, yaml_mark_t start_mark, yaml_string_t *string); -static yaml_token_t * -yaml_parser_scan_block_scalar(yaml_parser_t *parser, int literal); +static int +yaml_parser_scan_block_scalar(yaml_parser_t *parser, yaml_token_t *token, + int literal); static int yaml_parser_scan_block_scalar_breaks(yaml_parser_t *parser, int *indent, yaml_string_t *breaks, yaml_mark_t start_mark, yaml_mark_t *end_mark); -static yaml_token_t * -yaml_parser_scan_flow_scalar(yaml_parser_t *parser, int single); +static int +yaml_parser_scan_flow_scalar(yaml_parser_t *parser, yaml_token_t *token, + int single); -static yaml_token_t * -yaml_parser_scan_plain_scalar(yaml_parser_t *parser); +static int +yaml_parser_scan_plain_scalar(yaml_parser_t *parser, yaml_token_t *token); /* - * Get the next token and remove it from the tokens queue. + * Get the next token. */ -YAML_DECLARE(yaml_token_t *) -yaml_parser_get_token(yaml_parser_t *parser) +YAML_DECLARE(int) +yaml_parser_scan(yaml_parser_t *parser, yaml_token_t *token) { - yaml_token_t *token; - assert(parser); /* Non-NULL parser object is expected. */ - assert(!parser->stream_end_produced); /* No tokens after STREAM-END. */ - - /* Ensure that the tokens queue contains enough tokens. */ - - if (!yaml_parser_fetch_more_tokens(parser)) return NULL; - - /* Fetch the next token from the queue. */ - - token = parser->tokens[parser->tokens_head]; + assert(token); /* Non-NULL token object is expected. */ - /* Move the queue head. */ + /* No tokens after STREAM-END or error. */ - parser->tokens[parser->tokens_head++] = NULL; + if (parser->stream_end_produced || parser->error) { + memset(token, 0, sizeof(yaml_token_t)); - parser->tokens_parsed++; - - if (token->type == YAML_STREAM_END_TOKEN) { - parser->stream_end_produced = 1; + return 1; } - return token; -} - -/* - * Get the next token, but don't remove it from the queue. - */ - -YAML_DECLARE(yaml_token_t *) -yaml_parser_peek_token(yaml_parser_t *parser) -{ - assert(parser); /* Non-NULL parser object is expected. */ - assert(!parser->stream_end_produced); /* No tokens after STREAM-END. */ - /* Ensure that the tokens queue contains enough tokens. */ - if (!yaml_parser_fetch_more_tokens(parser)) return NULL; - - /* Fetch the next token from the queue. */ - - return parser->tokens[parser->tokens_head]; -} - -/* - * Create a new string. - */ - -static yaml_string_t -yaml_parser_new_string(yaml_parser_t *parser) -{ - yaml_string_t string = { NULL, NULL, 0 }; - - string.buffer = yaml_malloc(YAML_DEFAULT_SIZE); - if (!string.buffer) { - parser->error = YAML_MEMORY_ERROR; - return string; - } - - memset(string.buffer, 0, YAML_DEFAULT_SIZE); - string.pointer = string.buffer; - string.size = YAML_DEFAULT_SIZE; - - return string; -} - -/* - * Double the size of a string. - */ - -static int -yaml_parser_resize_string(yaml_parser_t *parser, yaml_string_t *string) -{ - yaml_char_t *new_buffer = yaml_realloc(string->buffer, string->size*2); - - if (!new_buffer) { - yaml_free(string->buffer); - string->buffer = NULL; - string->pointer = NULL; - string->size = 0; - parser->error = YAML_MEMORY_ERROR; - return 0; - } - - memset(new_buffer+string->size, 0, string->size); - - string->pointer = new_buffer + (string->pointer-string->buffer); - string->buffer = new_buffer; - string->size *= 2; - - return 1; -} - -/* - * Append a string to another string. - */ - -static int -yaml_parser_join_string(yaml_parser_t *parser, - yaml_string_t *string1, yaml_string_t *string2) -{ - if (string2->buffer == string2->pointer) return 1; - - while (string1->pointer - string1->buffer + string2->pointer - string2->buffer + 1 - > string1->size) { - if (!yaml_parser_resize_string(parser, string1)) return 0; + if (!parser->token_available) { + if (!yaml_parser_fetch_more_tokens(parser)) + return 0; } - memcpy(string1->pointer, string2->buffer, string2->pointer-string2->buffer); - string1->pointer += string2->pointer-string2->buffer; - - return 1; -} - -/* - * Fill the string with NULs and move the pointer to the beginning. - */ - -static int -yaml_parser_clear_string(yaml_parser_t *parser, yaml_string_t *string) -{ - if (string->buffer == string->pointer) return 1; - - memset(string->buffer, 0, string->pointer-string->buffer); - - string->pointer = string->buffer; - - return 1; -} - -/* - * Double a list. - */ - -static int -yaml_parser_resize_list(yaml_parser_t *parser, void **buffer, size_t *size, - size_t item_size) -{ - void *new_buffer = yaml_realloc(*buffer, item_size*(*size)*2); + /* Fetch the next token from the queue. */ + + *token = DEQUEUE(parser, parser->tokens); + parser->token_available = 0; + parser->tokens_parsed ++; - if (!new_buffer) { - parser->error = YAML_MEMORY_ERROR; - return 0; + if (token->type == YAML_STREAM_END_TOKEN) { + parser->stream_end_produced = 1; } - memset(new_buffer+item_size*(*size), 0, item_size*(*size)); - - *buffer = new_buffer; - *size *= 2; - return 1; } @@ -1161,34 +1024,20 @@ yaml_parser_set_scanner_error(yaml_parser_t *parser, const char *context, parser->context = context; parser->context_mark = context_mark; parser->problem = problem; - parser->problem_mark = yaml_parser_get_mark(parser); + parser->problem_mark = parser->mark; return 0; } -/* - * Get the mark for the current buffer position. - */ - -static yaml_mark_t -yaml_parser_get_mark(yaml_parser_t *parser) -{ - yaml_mark_t mark = { parser->index, parser->line, parser->column }; - - return mark; -} - - /* * Ensure that the tokens queue contains at least one token which can be * returned to the Parser. */ -static int +YAML_DECLARE(int) yaml_parser_fetch_more_tokens(yaml_parser_t *parser) { int need_more_tokens; - int k; /* While we need more tokens to fetch, do it. */ @@ -1200,7 +1049,7 @@ yaml_parser_fetch_more_tokens(yaml_parser_t *parser) need_more_tokens = 0; - if (parser->tokens_head == parser->tokens_tail) + if (parser->tokens.head == parser->tokens.tail) { /* Queue is empty. */ @@ -1208,15 +1057,17 @@ yaml_parser_fetch_more_tokens(yaml_parser_t *parser) } else { + yaml_simple_key_t *simple_key; + /* Check if any potential simple key may occupy the head position. */ if (!yaml_parser_stale_simple_keys(parser)) return 0; - for (k = 0; k <= parser->flow_level; k++) { - yaml_simple_key_t *simple_key = parser->simple_keys[k]; - if (simple_key - && (simple_key->token_number == parser->tokens_parsed)) { + for (simple_key = parser->simple_keys.start; + simple_key != parser->simple_keys.top; simple_key++) { + if (simple_key->possible + && simple_key->token_number == parser->tokens_parsed) { need_more_tokens = 1; break; } @@ -1234,6 +1085,8 @@ yaml_parser_fetch_more_tokens(yaml_parser_t *parser) return 0; } + parser->token_available = 1; + return 1; } @@ -1246,7 +1099,7 @@ yaml_parser_fetch_next_token(yaml_parser_t *parser) { /* Ensure that the buffer is initialized. */ - if (!UPDATE(parser, 1)) + if (!CACHE(parser, 1)) return 0; /* Check if we just started scanning. Fetch STREAM-START then. */ @@ -1266,7 +1119,7 @@ yaml_parser_fetch_next_token(yaml_parser_t *parser) /* Check the indentation level against the current column. */ - if (!yaml_parser_unroll_indent(parser, parser->column)) + if (!yaml_parser_unroll_indent(parser, parser->mark.column)) return 0; /* @@ -1274,7 +1127,7 @@ yaml_parser_fetch_next_token(yaml_parser_t *parser) * of the longest indicators ('--- ' and '... '). */ - if (!UPDATE(parser, 4)) + if (!CACHE(parser, 4)) return 0; /* Is it the end of the stream? */ @@ -1284,12 +1137,12 @@ yaml_parser_fetch_next_token(yaml_parser_t *parser) /* Is it a directive? */ - if (parser->column == 0 && CHECK(parser, '%')) + if (parser->mark.column == 0 && CHECK(parser, '%')) return yaml_parser_fetch_directive(parser); /* Is it the document start indicator? */ - if (parser->column == 0 + if (parser->mark.column == 0 && CHECK_AT(parser, '-', 0) && CHECK_AT(parser, '-', 1) && CHECK_AT(parser, '-', 2) @@ -1299,7 +1152,7 @@ yaml_parser_fetch_next_token(yaml_parser_t *parser) /* Is it the document end indicator? */ - if (parser->column == 0 + if (parser->mark.column == 0 && CHECK_AT(parser, '.', 0) && CHECK_AT(parser, '.', 1) && CHECK_AT(parser, '.', 2) @@ -1421,8 +1274,9 @@ yaml_parser_fetch_next_token(yaml_parser_t *parser) * If we don't determine the token type so far, it is an error. */ - return yaml_parser_set_scanner_error(parser, "while scanning for the next token", - yaml_parser_get_mark(parser), "found character that cannot start any token"); + return yaml_parser_set_scanner_error(parser, + "while scanning for the next token", parser->mark, + "found character that cannot start any token"); } /* @@ -1433,14 +1287,13 @@ yaml_parser_fetch_next_token(yaml_parser_t *parser) static int yaml_parser_stale_simple_keys(yaml_parser_t *parser) { - int level; + yaml_simple_key_t *simple_key; /* Check for a potential simple key for each flow level. */ - for (level = 0; level <= parser->flow_level; level++) + for (simple_key = parser->simple_keys.start; + simple_key != parser->simple_keys.top; simple_key ++) { - yaml_simple_key_t *simple_key = parser->simple_keys[level]; - /* * The specification requires that a simple key * @@ -1448,8 +1301,9 @@ yaml_parser_stale_simple_keys(yaml_parser_t *parser) * - is shorter than 1024 characters. */ - if (simple_key && (simple_key->line < parser->line || - simple_key->index+1024 < parser->index)) { + if (simple_key->possible + && (simple_key->mark.line < parser->mark.line + || simple_key->mark.index+1024 < parser->mark.index)) { /* Check if the potential simple key to be removed is required. */ @@ -1459,8 +1313,7 @@ yaml_parser_stale_simple_keys(yaml_parser_t *parser) "could not found expected ':'"); } - yaml_free(simple_key); - parser->simple_keys[level] = NULL; + simple_key->possible = 0; } } @@ -1481,7 +1334,8 @@ yaml_parser_save_simple_key(yaml_parser_t *parser) * level. */ - int required = (!parser->flow_level && parser->indent == parser->column); + int required = (!parser->flow_level + && parser->indent == parser->mark.column); /* * A simple key is required only when it is the first token in the current @@ -1496,21 +1350,13 @@ yaml_parser_save_simple_key(yaml_parser_t *parser) if (parser->simple_key_allowed) { - yaml_simple_key_t simple_key = { required, - parser->tokens_parsed + parser->tokens_tail - parser->tokens_head, - parser->index, parser->line, parser->column, - yaml_parser_get_mark(parser) }; + yaml_simple_key_t simple_key = { 1, required, + parser->tokens_parsed + parser->tokens.tail - parser->tokens.head, + parser->mark }; if (!yaml_parser_remove_simple_key(parser)) return 0; - parser->simple_keys[parser->flow_level] = - yaml_malloc(sizeof(yaml_simple_key_t)); - if (!parser->simple_keys[parser->flow_level]) { - parser->error = YAML_MEMORY_ERROR; - return 0; - } - - *(parser->simple_keys[parser->flow_level]) = simple_key; + *(parser->simple_keys.top-1) = simple_key; } return 1; @@ -1523,9 +1369,9 @@ yaml_parser_save_simple_key(yaml_parser_t *parser) static int yaml_parser_remove_simple_key(yaml_parser_t *parser) { - yaml_simple_key_t *simple_key = parser->simple_keys[parser->flow_level]; + yaml_simple_key_t *simple_key = parser->simple_keys.top-1; - if (simple_key) + if (simple_key->possible) { /* If the key is required, it is an error. */ @@ -1534,12 +1380,11 @@ yaml_parser_remove_simple_key(yaml_parser_t *parser) "while scanning a simple key", simple_key->mark, "could not found expected ':'"); } + } - /* Remove the key from the list. */ + /* Remove the key from the stack. */ - yaml_free(simple_key); - parser->simple_keys[parser->flow_level] = NULL; - } + simple_key->possible = 0; return 1; } @@ -1551,17 +1396,16 @@ yaml_parser_remove_simple_key(yaml_parser_t *parser) static int yaml_parser_increase_flow_level(yaml_parser_t *parser) { - /* Check if we need to resize the list. */ + yaml_simple_key_t empty_simple_key = { 0, 0, 0, { 0, 0, 0 } }; - if (parser->flow_level == parser->simple_keys_size-1) { - if (!yaml_parser_resize_list(parser, (void **)&parser->simple_keys, - &parser->simple_keys_size, sizeof(yaml_simple_key_t *))) - return 0; - } + /* Reset the simple key on the next level. */ - /* Increase the flow level and reset the simple key. */ + if (!PUSH(parser, parser->simple_keys, empty_simple_key)) + return 0; + + /* Increase the flow level. */ - parser->simple_keys[++parser->flow_level] = NULL; + parser->flow_level++; return 1; } @@ -1573,75 +1417,11 @@ yaml_parser_increase_flow_level(yaml_parser_t *parser) static int yaml_parser_decrease_flow_level(yaml_parser_t *parser) { - assert(parser->flow_level); /* Greater than 0. */ - assert(!parser->simple_keys[parser->flow_level]); /* Must be removed. */ - - parser->flow_level --; - - return 1; -} - -/* - * Add a token to the tail of the tokens queue. - */ - -static int -yaml_parser_append_token(yaml_parser_t *parser, yaml_token_t *token) -{ - return yaml_parser_insert_token(parser, -1, token); -} - -/* - * Insert the token into the tokens queue. The number parameter is the - * ordinal number of the token. If the number is equal to -1, add the token - * to the tail of the queue. - */ - -static int -yaml_parser_insert_token(yaml_parser_t *parser, - int number, yaml_token_t *token) -{ - /* The index of the token in the queue. */ - - int index = (number == -1) - ? parser->tokens_tail - parser->tokens_head - : number - parser->tokens_parsed; - - assert(index >= 0 && index <= (parser->tokens_tail-parser->tokens_head)); - - /* Check if we need to resize the queue. */ - - if (parser->tokens_head == 0 && parser->tokens_tail == parser->tokens_size) { - if (!yaml_parser_resize_list(parser, (void **)&parser->tokens, - &parser->tokens_size, sizeof(yaml_token_t *))) - return 0; - } - - /* Check if we need to move the queue to the beginning of the buffer. */ - - if (parser->tokens_tail == parser->tokens_size) - { - if (parser->tokens_head < parser->tokens_tail) { - memmove(parser->tokens, parser->tokens+parser->tokens_head, - sizeof(yaml_token_t *)*(parser->tokens_tail-parser->tokens_head)); - } - parser->tokens_tail -= parser->tokens_head; - parser->tokens_head = 0; + if (parser->flow_level) { + parser->flow_level --; + POP(parser, parser->simple_keys); } - /* Check if we need to free space within the queue. */ - - if (index < (parser->tokens_tail-parser->tokens_head)) { - memmove(parser->tokens+parser->tokens_head+index+1, - parser->tokens+parser->tokens_head+index, - sizeof(yaml_token_t *)*(parser->tokens_tail-parser->tokens_head-index)); - } - - /* Insert the token. */ - - parser->tokens[parser->tokens_head+index] = token; - parser->tokens_tail ++; - return 1; } @@ -1656,7 +1436,7 @@ static int yaml_parser_roll_indent(yaml_parser_t *parser, int column, int number, yaml_token_type_t type, yaml_mark_t mark) { - yaml_token_t *token; + yaml_token_t token; /* In the flow context, do nothing. */ @@ -1665,35 +1445,28 @@ yaml_parser_roll_indent(yaml_parser_t *parser, int column, if (parser->indent < column) { - /* Check if we need to expand the indents stack. */ - - if (parser->indents_length == parser->indents_size) { - if (!yaml_parser_resize_list(parser, (void **)&parser->indents, - &parser->indents_size, sizeof(int))) - return 0; - } - /* * Push the current indentation level to the stack and set the new * indentation level. */ - parser->indents[parser->indents_length++] = parser->indent; + if (!PUSH(parser, parser->indents, parser->indent)) + return 0; + parser->indent = column; - /* Create a token. */ + /* Create a token and insert it into the queue. */ - token = yaml_token_new(type, mark, mark); - if (!token) { - parser->error = YAML_MEMORY_ERROR; - return 0; - } - - /* Insert the token into the queue. */ + TOKEN_INIT(token, type, mark, mark); - if (!yaml_parser_insert_token(parser, number, token)) { - yaml_token_delete(token); - return 0; + if (number == -1) { + if (!ENQUEUE(parser, parser->tokens, token)) + return 0; + } + else { + if (!QUEUE_INSERT(parser, + parser->tokens, number - parser->tokens_parsed, token)) + return 0; } } @@ -1710,7 +1483,7 @@ yaml_parser_roll_indent(yaml_parser_t *parser, int column, static int yaml_parser_unroll_indent(yaml_parser_t *parser, int column) { - yaml_token_t *token; + yaml_token_t token; /* In the flow context, do nothing. */ @@ -1721,28 +1494,16 @@ yaml_parser_unroll_indent(yaml_parser_t *parser, int column) while (parser->indent > column) { - yaml_mark_t mark = yaml_parser_get_mark(parser); + /* Create a token and append it to the queue. */ - /* Create a token. */ + TOKEN_INIT(token, YAML_BLOCK_END_TOKEN, parser->mark, parser->mark); - token = yaml_token_new(YAML_BLOCK_END_TOKEN, mark, mark); - if (!token) { - parser->error = YAML_MEMORY_ERROR; + if (!ENQUEUE(parser, parser->tokens, token)) return 0; - } - - /* Append the token to the queue. */ - - if (!yaml_parser_append_token(parser, token)) { - yaml_token_delete(token); - return 0; - } /* Pop the indentation level. */ - assert(parser->indents_length); /* Non-empty stack expected. */ - - parser->indent = parser->indents[--parser->indents_length]; + parser->indent = POP(parser, parser->indents); } return 1; @@ -1755,13 +1516,18 @@ yaml_parser_unroll_indent(yaml_parser_t *parser, int column) static int yaml_parser_fetch_stream_start(yaml_parser_t *parser) { - yaml_mark_t mark = yaml_parser_get_mark(parser); - yaml_token_t *token; + yaml_simple_key_t simple_key = { 0, 0, 0, { 0, 0, 0 } }; + yaml_token_t token; /* Set the initial indentation. */ parser->indent = -1; + /* Initialize the simple key stack. */ + + if (!PUSH(parser, parser->simple_keys, simple_key)) + return 0; + /* A simple key is allowed at the beginning of the stream. */ parser->simple_key_allowed = 1; @@ -1770,20 +1536,13 @@ yaml_parser_fetch_stream_start(yaml_parser_t *parser) parser->stream_start_produced = 1; - /* Create the STREAM-START token. */ + /* Create the STREAM-START token and append it to the queue. */ - token = yaml_stream_start_token_new(parser->encoding, mark, mark); - if (!token) { - parser->error = YAML_MEMORY_ERROR; - return 0; - } - - /* Append the token to the queue. */ + STREAM_START_TOKEN_INIT(token, parser->encoding, + parser->mark, parser->mark); - if (!yaml_parser_append_token(parser, token)) { - yaml_token_delete(token); + if (!ENQUEUE(parser, parser->tokens, token)) return 0; - } return 1; } @@ -1795,8 +1554,7 @@ yaml_parser_fetch_stream_start(yaml_parser_t *parser) static int yaml_parser_fetch_stream_end(yaml_parser_t *parser) { - yaml_mark_t mark = yaml_parser_get_mark(parser); - yaml_token_t *token; + yaml_token_t token; /* Reset the indentation level. */ @@ -1810,32 +1568,24 @@ yaml_parser_fetch_stream_end(yaml_parser_t *parser) parser->simple_key_allowed = 0; - /* Create the STREAM-END token. */ + /* Create the STREAM-END token and append it to the queue. */ - token = yaml_stream_end_token_new(mark, mark); - if (!token) { - parser->error = YAML_MEMORY_ERROR; - return 0; - } - - /* Append the token to the queue. */ + STREAM_END_TOKEN_INIT(token, parser->mark, parser->mark); - if (!yaml_parser_append_token(parser, token)) { - yaml_token_delete(token); + if (!ENQUEUE(parser, parser->tokens, token)) return 0; - } return 1; } /* - * Produce the YAML-DIRECTIVE or TAG-DIRECTIVE token. + * Produce a VERSION-DIRECTIVE or TAG-DIRECTIVE token. */ static int yaml_parser_fetch_directive(yaml_parser_t *parser) { - yaml_token_t *token; + yaml_token_t token; /* Reset the indentation level. */ @@ -1851,13 +1601,13 @@ yaml_parser_fetch_directive(yaml_parser_t *parser) /* Create the YAML-DIRECTIVE or TAG-DIRECTIVE token. */ - token = yaml_parser_scan_directive(parser); - if (!token) return 0; + if (!yaml_parser_scan_directive(parser, &token)) + return 0; /* Append the token to the queue. */ - if (!yaml_parser_append_token(parser, token)) { - yaml_token_delete(token); + if (!ENQUEUE(parser, parser->tokens, token)) { + yaml_token_delete(&token); return 0; } @@ -1873,7 +1623,7 @@ yaml_parser_fetch_document_indicator(yaml_parser_t *parser, yaml_token_type_t type) { yaml_mark_t start_mark, end_mark; - yaml_token_t *token; + yaml_token_t token; /* Reset the indentation level. */ @@ -1889,28 +1639,22 @@ yaml_parser_fetch_document_indicator(yaml_parser_t *parser, /* Consume the token. */ - start_mark = yaml_parser_get_mark(parser); + start_mark = parser->mark; - FORWARD(parser); - FORWARD(parser); - FORWARD(parser); + SKIP(parser); + SKIP(parser); + SKIP(parser); - end_mark = yaml_parser_get_mark(parser); + end_mark = parser->mark; /* Create the DOCUMENT-START or DOCUMENT-END token. */ - token = yaml_token_new(type, start_mark, end_mark); - if (!token) { - parser->error = YAML_MEMORY_ERROR; - return 0; - } + TOKEN_INIT(token, type, start_mark, end_mark); /* Append the token to the queue. */ - if (!yaml_parser_append_token(parser, token)) { - yaml_token_delete(token); + if (!ENQUEUE(parser, parser->tokens, token)) return 0; - } return 1; } @@ -1924,7 +1668,7 @@ yaml_parser_fetch_flow_collection_start(yaml_parser_t *parser, yaml_token_type_t type) { yaml_mark_t start_mark, end_mark; - yaml_token_t *token; + yaml_token_t token; /* The indicators '[' and '{' may start a simple key. */ @@ -1942,24 +1686,18 @@ yaml_parser_fetch_flow_collection_start(yaml_parser_t *parser, /* Consume the token. */ - start_mark = yaml_parser_get_mark(parser); - FORWARD(parser); - end_mark = yaml_parser_get_mark(parser); + start_mark = parser->mark; + SKIP(parser); + end_mark = parser->mark; /* Create the FLOW-SEQUENCE-START of FLOW-MAPPING-START token. */ - token = yaml_token_new(type, start_mark, end_mark); - if (!token) { - parser->error = YAML_MEMORY_ERROR; - return 0; - } + TOKEN_INIT(token, type, start_mark, end_mark); /* Append the token to the queue. */ - if (!yaml_parser_append_token(parser, token)) { - yaml_token_delete(token); + if (!ENQUEUE(parser, parser->tokens, token)) return 0; - } return 1; } @@ -1973,7 +1711,7 @@ yaml_parser_fetch_flow_collection_end(yaml_parser_t *parser, yaml_token_type_t type) { yaml_mark_t start_mark, end_mark; - yaml_token_t *token; + yaml_token_t token; /* Reset any potential simple key on the current flow level. */ @@ -1991,24 +1729,18 @@ yaml_parser_fetch_flow_collection_end(yaml_parser_t *parser, /* Consume the token. */ - start_mark = yaml_parser_get_mark(parser); - FORWARD(parser); - end_mark = yaml_parser_get_mark(parser); + start_mark = parser->mark; + SKIP(parser); + end_mark = parser->mark; /* Create the FLOW-SEQUENCE-END of FLOW-MAPPING-END token. */ - token = yaml_token_new(type, start_mark, end_mark); - if (!token) { - parser->error = YAML_MEMORY_ERROR; - return 0; - } + TOKEN_INIT(token, type, start_mark, end_mark); /* Append the token to the queue. */ - if (!yaml_parser_append_token(parser, token)) { - yaml_token_delete(token); + if (!ENQUEUE(parser, parser->tokens, token)) return 0; - } return 1; } @@ -2021,7 +1753,7 @@ static int yaml_parser_fetch_flow_entry(yaml_parser_t *parser) { yaml_mark_t start_mark, end_mark; - yaml_token_t *token; + yaml_token_t token; /* Reset any potential simple keys on the current flow level. */ @@ -2034,24 +1766,16 @@ yaml_parser_fetch_flow_entry(yaml_parser_t *parser) /* Consume the token. */ - start_mark = yaml_parser_get_mark(parser); - FORWARD(parser); - end_mark = yaml_parser_get_mark(parser); + start_mark = parser->mark; + SKIP(parser); + end_mark = parser->mark; - /* Create the FLOW-ENTRY token. */ + /* Create the FLOW-ENTRY token and append it to the queue. */ - token = yaml_token_new(YAML_FLOW_ENTRY_TOKEN, start_mark, end_mark); - if (!token) { - parser->error = YAML_MEMORY_ERROR; - return 0; - } + TOKEN_INIT(token, YAML_FLOW_ENTRY_TOKEN, start_mark, end_mark); - /* Append the token to the queue. */ - - if (!yaml_parser_append_token(parser, token)) { - yaml_token_delete(token); + if (!ENQUEUE(parser, parser->tokens, token)) return 0; - } return 1; } @@ -2064,7 +1788,7 @@ static int yaml_parser_fetch_block_entry(yaml_parser_t *parser) { yaml_mark_t start_mark, end_mark; - yaml_token_t *token; + yaml_token_t token; /* Check if the scanner is in the block context. */ @@ -2073,15 +1797,14 @@ yaml_parser_fetch_block_entry(yaml_parser_t *parser) /* Check if we are allowed to start a new entry. */ if (!parser->simple_key_allowed) { - return yaml_parser_set_scanner_error(parser, NULL, - yaml_parser_get_mark(parser), + return yaml_parser_set_scanner_error(parser, NULL, parser->mark, "block sequence entries are not allowed in this context"); } /* Add the BLOCK-SEQUENCE-START token if needed. */ - if (!yaml_parser_roll_indent(parser, parser->column, -1, - YAML_BLOCK_SEQUENCE_START_TOKEN, yaml_parser_get_mark(parser))) + if (!yaml_parser_roll_indent(parser, parser->mark.column, -1, + YAML_BLOCK_SEQUENCE_START_TOKEN, parser->mark)) return 0; } else @@ -2104,24 +1827,16 @@ yaml_parser_fetch_block_entry(yaml_parser_t *parser) /* Consume the token. */ - start_mark = yaml_parser_get_mark(parser); - FORWARD(parser); - end_mark = yaml_parser_get_mark(parser); + start_mark = parser->mark; + SKIP(parser); + end_mark = parser->mark; - /* Create the BLOCK-ENTRY token. */ + /* Create the BLOCK-ENTRY token and append it to the queue. */ - token = yaml_token_new(YAML_BLOCK_ENTRY_TOKEN, start_mark, end_mark); - if (!token) { - parser->error = YAML_MEMORY_ERROR; - return 0; - } - - /* Append the token to the queue. */ + TOKEN_INIT(token, YAML_BLOCK_ENTRY_TOKEN, start_mark, end_mark); - if (!yaml_parser_append_token(parser, token)) { - yaml_token_delete(token); + if (!ENQUEUE(parser, parser->tokens, token)) return 0; - } return 1; } @@ -2134,7 +1849,7 @@ static int yaml_parser_fetch_key(yaml_parser_t *parser) { yaml_mark_t start_mark, end_mark; - yaml_token_t *token; + yaml_token_t token; /* In the block context, additional checks are required. */ @@ -2143,15 +1858,14 @@ yaml_parser_fetch_key(yaml_parser_t *parser) /* Check if we are allowed to start a new key (not nessesary simple). */ if (!parser->simple_key_allowed) { - return yaml_parser_set_scanner_error(parser, NULL, - yaml_parser_get_mark(parser), + return yaml_parser_set_scanner_error(parser, NULL, parser->mark, "mapping keys are not allowed in this context"); } /* Add the BLOCK-MAPPING-START token if needed. */ - if (!yaml_parser_roll_indent(parser, parser->column, -1, - YAML_BLOCK_MAPPING_START_TOKEN, yaml_parser_get_mark(parser))) + if (!yaml_parser_roll_indent(parser, parser->mark.column, -1, + YAML_BLOCK_MAPPING_START_TOKEN, parser->mark)) return 0; } @@ -2166,24 +1880,16 @@ yaml_parser_fetch_key(yaml_parser_t *parser) /* Consume the token. */ - start_mark = yaml_parser_get_mark(parser); - FORWARD(parser); - end_mark = yaml_parser_get_mark(parser); + start_mark = parser->mark; + SKIP(parser); + end_mark = parser->mark; - /* Create the KEY token. */ + /* Create the KEY token and append it to the queue. */ - token = yaml_token_new(YAML_KEY_TOKEN, start_mark, end_mark); - if (!token) { - parser->error = YAML_MEMORY_ERROR; - return 0; - } - - /* Append the token to the queue. */ + TOKEN_INIT(token, YAML_KEY_TOKEN, start_mark, end_mark); - if (!yaml_parser_append_token(parser, token)) { - yaml_token_delete(token); + if (!ENQUEUE(parser, parser->tokens, token)) return 0; - } return 1; } @@ -2196,40 +1902,32 @@ static int yaml_parser_fetch_value(yaml_parser_t *parser) { yaml_mark_t start_mark, end_mark; - yaml_token_t *token; + yaml_token_t token; + yaml_simple_key_t *simple_key = parser->simple_keys.top-1; /* Have we found a simple key? */ - if (parser->simple_keys[parser->flow_level]) + if (simple_key->possible) { - yaml_simple_key_t *simple_key = parser->simple_keys[parser->flow_level]; - - /* Create the KEY token. */ - token = yaml_token_new(YAML_KEY_TOKEN, simple_key->mark, simple_key->mark); - if (!token) { - parser->error = YAML_MEMORY_ERROR; - return 0; - } + /* Create the KEY token and insert it into the queue. */ - /* Insert the token into the queue. */ + TOKEN_INIT(token, YAML_KEY_TOKEN, simple_key->mark, simple_key->mark); - if (!yaml_parser_insert_token(parser, simple_key->token_number, token)) { - yaml_token_delete(token); + if (!QUEUE_INSERT(parser, parser->tokens, + simple_key->token_number - parser->tokens_parsed, token)) return 0; - } /* In the block context, we may need to add the BLOCK-MAPPING-START token. */ - if (!yaml_parser_roll_indent(parser, simple_key->column, + if (!yaml_parser_roll_indent(parser, simple_key->mark.column, simple_key->token_number, YAML_BLOCK_MAPPING_START_TOKEN, simple_key->mark)) return 0; - /* Remove the simple key from the list. */ + /* Remove the simple key. */ - yaml_free(simple_key); - parser->simple_keys[parser->flow_level] = NULL; + simple_key->possible = 0; /* A simple key cannot follow another simple key. */ @@ -2246,15 +1944,14 @@ yaml_parser_fetch_value(yaml_parser_t *parser) /* Check if we are allowed to start a complex value. */ if (!parser->simple_key_allowed) { - return yaml_parser_set_scanner_error(parser, NULL, - yaml_parser_get_mark(parser), + return yaml_parser_set_scanner_error(parser, NULL, parser->mark, "mapping values are not allowed in this context"); } /* Add the BLOCK-MAPPING-START token if needed. */ - if (!yaml_parser_roll_indent(parser, parser->column, -1, - YAML_BLOCK_MAPPING_START_TOKEN, yaml_parser_get_mark(parser))) + if (!yaml_parser_roll_indent(parser, parser->mark.column, -1, + YAML_BLOCK_MAPPING_START_TOKEN, parser->mark)) return 0; } @@ -2265,24 +1962,16 @@ yaml_parser_fetch_value(yaml_parser_t *parser) /* Consume the token. */ - start_mark = yaml_parser_get_mark(parser); - FORWARD(parser); - end_mark = yaml_parser_get_mark(parser); + start_mark = parser->mark; + SKIP(parser); + end_mark = parser->mark; - /* Create the VALUE token. */ + /* Create the VALUE token and append it to the queue. */ - token = yaml_token_new(YAML_VALUE_TOKEN, start_mark, end_mark); - if (!token) { - parser->error = YAML_MEMORY_ERROR; - return 0; - } - - /* Append the token to the queue. */ + TOKEN_INIT(token, YAML_VALUE_TOKEN, start_mark, end_mark); - if (!yaml_parser_append_token(parser, token)) { - yaml_token_delete(token); + if (!ENQUEUE(parser, parser->tokens, token)) return 0; - } return 1; } @@ -2294,7 +1983,7 @@ yaml_parser_fetch_value(yaml_parser_t *parser) static int yaml_parser_fetch_anchor(yaml_parser_t *parser, yaml_token_type_t type) { - yaml_token_t *token; + yaml_token_t token; /* An anchor or an alias could be a simple key. */ @@ -2305,18 +1994,15 @@ yaml_parser_fetch_anchor(yaml_parser_t *parser, yaml_token_type_t type) parser->simple_key_allowed = 0; - /* Create the ALIAS or ANCHOR token. */ - - token = yaml_parser_scan_anchor(parser, type); - if (!token) return 0; + /* Create the ALIAS or ANCHOR token and append it to the queue. */ - /* Append the token to the queue. */ + if (!yaml_parser_scan_anchor(parser, &token, type)) + return 0; - if (!yaml_parser_append_token(parser, token)) { - yaml_token_delete(token); + if (!ENQUEUE(parser, parser->tokens, token)) { + yaml_token_delete(&token); return 0; } - return 1; } @@ -2327,7 +2013,7 @@ yaml_parser_fetch_anchor(yaml_parser_t *parser, yaml_token_type_t type) static int yaml_parser_fetch_tag(yaml_parser_t *parser) { - yaml_token_t *token; + yaml_token_t token; /* A tag could be a simple key. */ @@ -2338,15 +2024,13 @@ yaml_parser_fetch_tag(yaml_parser_t *parser) parser->simple_key_allowed = 0; - /* Create the TAG token. */ - - token = yaml_parser_scan_tag(parser); - if (!token) return 0; + /* Create the TAG token and append it to the queue. */ - /* Append the token to the queue. */ + if (!yaml_parser_scan_tag(parser, &token)) + return 0; - if (!yaml_parser_append_token(parser, token)) { - yaml_token_delete(token); + if (!ENQUEUE(parser, parser->tokens, token)) { + yaml_token_delete(&token); return 0; } @@ -2360,7 +2044,7 @@ yaml_parser_fetch_tag(yaml_parser_t *parser) static int yaml_parser_fetch_block_scalar(yaml_parser_t *parser, int literal) { - yaml_token_t *token; + yaml_token_t token; /* Remove any potential simple keys. */ @@ -2371,15 +2055,13 @@ yaml_parser_fetch_block_scalar(yaml_parser_t *parser, int literal) parser->simple_key_allowed = 1; - /* Create the SCALAR token. */ - - token = yaml_parser_scan_block_scalar(parser, literal); - if (!token) return 0; + /* Create the SCALAR token and append it to the queue. */ - /* Append the token to the queue. */ + if (!yaml_parser_scan_block_scalar(parser, &token, literal)) + return 0; - if (!yaml_parser_append_token(parser, token)) { - yaml_token_delete(token); + if (!ENQUEUE(parser, parser->tokens, token)) { + yaml_token_delete(&token); return 0; } @@ -2393,7 +2075,7 @@ yaml_parser_fetch_block_scalar(yaml_parser_t *parser, int literal) static int yaml_parser_fetch_flow_scalar(yaml_parser_t *parser, int single) { - yaml_token_t *token; + yaml_token_t token; /* A plain scalar could be a simple key. */ @@ -2404,15 +2086,13 @@ yaml_parser_fetch_flow_scalar(yaml_parser_t *parser, int single) parser->simple_key_allowed = 0; - /* Create the SCALAR token. */ - - token = yaml_parser_scan_flow_scalar(parser, single); - if (!token) return 0; + /* Create the SCALAR token and append it to the queue. */ - /* Append the token to the queue. */ + if (!yaml_parser_scan_flow_scalar(parser, &token, single)) + return 0; - if (!yaml_parser_append_token(parser, token)) { - yaml_token_delete(token); + if (!ENQUEUE(parser, parser->tokens, token)) { + yaml_token_delete(&token); return 0; } @@ -2426,7 +2106,7 @@ yaml_parser_fetch_flow_scalar(yaml_parser_t *parser, int single) static int yaml_parser_fetch_plain_scalar(yaml_parser_t *parser) { - yaml_token_t *token; + yaml_token_t token; /* A plain scalar could be a simple key. */ @@ -2437,15 +2117,13 @@ yaml_parser_fetch_plain_scalar(yaml_parser_t *parser) parser->simple_key_allowed = 0; - /* Create the SCALAR token. */ + /* Create the SCALAR token and append it to the queue. */ - token = yaml_parser_scan_plain_scalar(parser); - if (!token) return 0; - - /* Append the token to the queue. */ + if (!yaml_parser_scan_plain_scalar(parser, &token)) + return 0; - if (!yaml_parser_append_token(parser, token)) { - yaml_token_delete(token); + if (!ENQUEUE(parser, parser->tokens, token)) { + yaml_token_delete(&token); return 0; } @@ -2465,10 +2143,10 @@ yaml_parser_scan_to_next_token(yaml_parser_t *parser) { /* Allow the BOM mark to start a line. */ - if (!UPDATE(parser, 1)) return 0; + if (!CACHE(parser, 1)) return 0; - if (parser->column == 0 && IS_BOM(parser)) - FORWARD(parser); + if (parser->mark.column == 0 && IS_BOM(parser)) + SKIP(parser); /* * Eat whitespaces. @@ -2480,21 +2158,21 @@ yaml_parser_scan_to_next_token(yaml_parser_t *parser) * after '-', '?', or ':' (complex value). */ - if (!UPDATE(parser, 1)) return 0; + if (!CACHE(parser, 1)) return 0; while (CHECK(parser,' ') || ((parser->flow_level || !parser->simple_key_allowed) && CHECK(parser, '\t'))) { - FORWARD(parser); - if (!UPDATE(parser, 1)) return 0; + SKIP(parser); + if (!CACHE(parser, 1)) return 0; } /* Eat a comment until a line break. */ if (CHECK(parser, '#')) { while (!IS_BREAKZ(parser)) { - FORWARD(parser); - if (!UPDATE(parser, 1)) return 0; + SKIP(parser); + if (!CACHE(parser, 1)) return 0; } } @@ -2502,8 +2180,8 @@ yaml_parser_scan_to_next_token(yaml_parser_t *parser) if (IS_BREAK(parser)) { - if (!UPDATE(parser, 2)) return 0; - FORWARD_LINE(parser); + if (!CACHE(parser, 2)) return 0; + SKIP_LINE(parser); /* In the block context, a new line may start a simple key. */ @@ -2532,20 +2210,19 @@ yaml_parser_scan_to_next_token(yaml_parser_t *parser) * ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ */ -static yaml_token_t * -yaml_parser_scan_directive(yaml_parser_t *parser) +int +yaml_parser_scan_directive(yaml_parser_t *parser, yaml_token_t *token) { yaml_mark_t start_mark, end_mark; yaml_char_t *name = NULL; int major, minor; yaml_char_t *handle = NULL, *prefix = NULL; - yaml_token_t *token = NULL; /* Eat '%'. */ - start_mark = yaml_parser_get_mark(parser); + start_mark = parser->mark; - FORWARD(parser); + SKIP(parser); /* Scan the directive name. */ @@ -2562,16 +2239,12 @@ yaml_parser_scan_directive(yaml_parser_t *parser) &major, &minor)) goto error; - end_mark = yaml_parser_get_mark(parser); + end_mark = parser->mark; /* Create a VERSION-DIRECTIVE token. */ - token = yaml_version_directive_token_new(major, minor, + VERSION_DIRECTIVE_TOKEN_INIT(*token, major, minor, start_mark, end_mark); - if (!token) { - parser->error = YAML_MEMORY_ERROR; - return 0; - } } /* Is it a TAG directive? */ @@ -2584,16 +2257,12 @@ yaml_parser_scan_directive(yaml_parser_t *parser) &handle, &prefix)) goto error; - end_mark = yaml_parser_get_mark(parser); + end_mark = parser->mark; /* Create a TAG-DIRECTIVE token. */ - token = yaml_tag_directive_token_new(handle, prefix, + TAG_DIRECTIVE_TOKEN_INIT(*token, handle, prefix, start_mark, end_mark); - if (!token) { - parser->error = YAML_MEMORY_ERROR; - return 0; - } } /* Unknown directive. */ @@ -2607,15 +2276,17 @@ yaml_parser_scan_directive(yaml_parser_t *parser) /* Eat the rest of the line including any comments. */ + if (!CACHE(parser, 1)) goto error; + while (IS_BLANK(parser)) { - FORWARD(parser); - if (!UPDATE(parser, 1)) goto error; + SKIP(parser); + if (!CACHE(parser, 1)) goto error; } if (CHECK(parser, '#')) { while (!IS_BREAKZ(parser)) { - FORWARD(parser); - if (!UPDATE(parser, 1)) goto error; + SKIP(parser); + if (!CACHE(parser, 1)) goto error; } } @@ -2630,20 +2301,19 @@ yaml_parser_scan_directive(yaml_parser_t *parser) /* Eat a line break. */ if (IS_BREAK(parser)) { - if (!UPDATE(parser, 2)) goto error; - FORWARD_LINE(parser); + if (!CACHE(parser, 2)) goto error; + SKIP_LINE(parser); } yaml_free(name); - return token; + return 1; error: - yaml_free(token); yaml_free(prefix); yaml_free(handle); yaml_free(name); - return NULL; + return 0; } /* @@ -2660,24 +2330,23 @@ static int yaml_parser_scan_directive_name(yaml_parser_t *parser, yaml_mark_t start_mark, yaml_char_t **name) { - yaml_string_t string = yaml_parser_new_string(parser); + yaml_string_t string = NULL_STRING; - if (!string.buffer) goto error; + if (!STRING_INIT(parser, string, INITIAL_STRING_SIZE)) goto error; /* Consume the directive name. */ - if (!UPDATE(parser, 1)) goto error; + if (!CACHE(parser, 1)) goto error; while (IS_ALPHA(parser)) { - if (!RESIZE(parser, string)) goto error; - COPY(parser, string); - if (!UPDATE(parser, 1)) goto error; + if (!READ(parser, string)) goto error; + if (!CACHE(parser, 1)) goto error; } /* Check if the name is empty. */ - if (string.buffer == string.pointer) { + if (string.start == string.pointer) { yaml_parser_set_scanner_error(parser, "while scanning a directive", start_mark, "cannot found expected directive name"); goto error; @@ -2691,12 +2360,12 @@ yaml_parser_scan_directive_name(yaml_parser_t *parser, goto error; } - *name = string.buffer; + *name = string.start; return 1; error: - yaml_free(string.buffer); + STRING_DEL(parser, string); return 0; } @@ -2714,11 +2383,11 @@ yaml_parser_scan_version_directive_value(yaml_parser_t *parser, { /* Eat whitespaces. */ - if (!UPDATE(parser, 1)) return 0; + if (!CACHE(parser, 1)) return 0; while (IS_BLANK(parser)) { - FORWARD(parser); - if (!UPDATE(parser, 1)) return 0; + SKIP(parser); + if (!CACHE(parser, 1)) return 0; } /* Consume the major version number. */ @@ -2733,7 +2402,7 @@ yaml_parser_scan_version_directive_value(yaml_parser_t *parser, start_mark, "did not find expected digit or '.' character"); } - FORWARD(parser); + SKIP(parser); /* Consume the minor version number. */ @@ -2764,7 +2433,7 @@ yaml_parser_scan_version_directive_number(yaml_parser_t *parser, /* Repeat while the next character is digit. */ - if (!UPDATE(parser, 1)) return 0; + if (!CACHE(parser, 1)) return 0; while (IS_DIGIT(parser)) { @@ -2777,9 +2446,9 @@ yaml_parser_scan_version_directive_number(yaml_parser_t *parser, value = value*10 + AS_DIGIT(parser); - FORWARD(parser); + SKIP(parser); - if (!UPDATE(parser, 1)) return 0; + if (!CACHE(parser, 1)) return 0; } /* Check if the number was present. */ @@ -2811,11 +2480,11 @@ yaml_parser_scan_tag_directive_value(yaml_parser_t *parser, /* Eat whitespaces. */ - if (!UPDATE(parser, 1)) goto error; + if (!CACHE(parser, 1)) goto error; while (IS_BLANK(parser)) { - FORWARD(parser); - if (!UPDATE(parser, 1)) goto error; + SKIP(parser); + if (!CACHE(parser, 1)) goto error; } /* Scan a handle. */ @@ -2825,7 +2494,7 @@ yaml_parser_scan_tag_directive_value(yaml_parser_t *parser, /* Expect a whitespace. */ - if (!UPDATE(parser, 1)) goto error; + if (!CACHE(parser, 1)) goto error; if (!IS_BLANK(parser)) { yaml_parser_set_scanner_error(parser, "while scanning a %TAG directive", @@ -2836,8 +2505,8 @@ yaml_parser_scan_tag_directive_value(yaml_parser_t *parser, /* Eat whitespaces. */ while (IS_BLANK(parser)) { - FORWARD(parser); - if (!UPDATE(parser, 1)) goto error; + SKIP(parser); + if (!CACHE(parser, 1)) goto error; } /* Scan a prefix. */ @@ -2847,7 +2516,7 @@ yaml_parser_scan_tag_directive_value(yaml_parser_t *parser, /* Expect a whitespace or line break. */ - if (!UPDATE(parser, 1)) goto error; + if (!CACHE(parser, 1)) goto error; if (!IS_BLANKZ(parser)) { yaml_parser_set_scanner_error(parser, "while scanning a %TAG directive", @@ -2866,35 +2535,33 @@ yaml_parser_scan_tag_directive_value(yaml_parser_t *parser, return 0; } -static yaml_token_t * -yaml_parser_scan_anchor(yaml_parser_t *parser, +static int +yaml_parser_scan_anchor(yaml_parser_t *parser, yaml_token_t *token, yaml_token_type_t type) { int length = 0; yaml_mark_t start_mark, end_mark; - yaml_token_t *token = NULL; - yaml_string_t string = yaml_parser_new_string(parser); + yaml_string_t string = NULL_STRING; - if (!string.buffer) goto error; + if (!STRING_INIT(parser, string, INITIAL_STRING_SIZE)) goto error; /* Eat the indicator character. */ - start_mark = yaml_parser_get_mark(parser); + start_mark = parser->mark; - FORWARD(parser); + SKIP(parser); /* Consume the value. */ - if (!UPDATE(parser, 1)) goto error; + if (!CACHE(parser, 1)) goto error; while (IS_ALPHA(parser)) { - if (!RESIZE(parser, string)) goto error; - COPY(parser, string); - if (!UPDATE(parser, 1)) goto error; + if (!READ(parser, string)) goto error; + if (!CACHE(parser, 1)) goto error; length ++; } - end_mark = yaml_parser_get_mark(parser); + end_mark = parser->mark; /* * Check if length of the anchor is greater than 0 and it is followed by @@ -2914,19 +2581,17 @@ yaml_parser_scan_anchor(yaml_parser_t *parser, /* Create a token. */ - token = type == YAML_ANCHOR_TOKEN ? - yaml_anchor_token_new(string.buffer, start_mark, end_mark) : - yaml_alias_token_new(string.buffer, start_mark, end_mark); - if (!token) { - parser->error = YAML_MEMORY_ERROR; - return 0; + if (type == YAML_ANCHOR_TOKEN) { + ANCHOR_TOKEN_INIT(*token, string.start, start_mark, end_mark); + } + else { + ALIAS_TOKEN_INIT(*token, string.start, start_mark, end_mark); } - return token; + return 1; error: - yaml_free(string.buffer); - yaml_free(token); + STRING_DEL(parser, string); return 0; } @@ -2934,19 +2599,18 @@ yaml_parser_scan_anchor(yaml_parser_t *parser, * Scan a TAG token. */ -static yaml_token_t * -yaml_parser_scan_tag(yaml_parser_t *parser) +static int +yaml_parser_scan_tag(yaml_parser_t *parser, yaml_token_t *token) { yaml_char_t *handle = NULL; yaml_char_t *suffix = NULL; - yaml_token_t *token = NULL; yaml_mark_t start_mark, end_mark; - start_mark = yaml_parser_get_mark(parser); + start_mark = parser->mark; /* Check if the tag is in the canonical form. */ - if (!UPDATE(parser, 2)) goto error; + if (!CACHE(parser, 2)) goto error; if (CHECK_AT(parser, '<', 1)) { @@ -2958,8 +2622,8 @@ yaml_parser_scan_tag(yaml_parser_t *parser) /* Eat '!<' */ - FORWARD(parser); - FORWARD(parser); + SKIP(parser); + SKIP(parser); /* Consume the tag value. */ @@ -2974,7 +2638,7 @@ yaml_parser_scan_tag(yaml_parser_t *parser) goto error; } - FORWARD(parser); + SKIP(parser); } else { @@ -3010,7 +2674,8 @@ yaml_parser_scan_tag(yaml_parser_t *parser) handle[1] = '\0'; /* - * A special case: the '!' tag. + * A special case: the '!' tag. Set the handle to '' and the + * suffix to '!'. */ if (suffix[0] == '\0') { @@ -3023,7 +2688,7 @@ yaml_parser_scan_tag(yaml_parser_t *parser) /* Check the character which ends the tag. */ - if (!UPDATE(parser, 1)) goto error; + if (!CACHE(parser, 1)) goto error; if (!IS_BLANKZ(parser)) { yaml_parser_set_scanner_error(parser, "while scanning a tag", @@ -3031,22 +2696,18 @@ yaml_parser_scan_tag(yaml_parser_t *parser) goto error; } - end_mark = yaml_parser_get_mark(parser); + end_mark = parser->mark; /* Create a token. */ - token = yaml_tag_token_new(handle, suffix, start_mark, end_mark); - if (!token) { - parser->error = YAML_MEMORY_ERROR; - return 0; - } + TAG_TOKEN_INIT(*token, handle, suffix, start_mark, end_mark); - return token; + return 1; error: yaml_free(handle); yaml_free(suffix); - return NULL; + return 0; } /* @@ -3057,13 +2718,13 @@ static int yaml_parser_scan_tag_handle(yaml_parser_t *parser, int directive, yaml_mark_t start_mark, yaml_char_t **handle) { - yaml_string_t string = yaml_parser_new_string(parser); + yaml_string_t string = NULL_STRING; - if (!string.buffer) goto error; + if (!STRING_INIT(parser, string, INITIAL_STRING_SIZE)) goto error; /* Check the initial '!' character. */ - if (!UPDATE(parser, 1)) goto error; + if (!CACHE(parser, 1)) goto error; if (!CHECK(parser, '!')) { yaml_parser_set_scanner_error(parser, directive ? @@ -3074,25 +2735,23 @@ yaml_parser_scan_tag_handle(yaml_parser_t *parser, int directive, /* Copy the '!' character. */ - COPY(parser, string); + if (!READ(parser, string)) goto error; /* Copy all subsequent alphabetical and numerical characters. */ - if (!UPDATE(parser, 1)) goto error; + if (!CACHE(parser, 1)) goto error; while (IS_ALPHA(parser)) { - if (!RESIZE(parser, string)) goto error; - COPY(parser, string); - if (!UPDATE(parser, 1)) goto error; + if (!READ(parser, string)) goto error; + if (!CACHE(parser, 1)) goto error; } /* Check if the trailing character is '!' and copy it. */ if (CHECK(parser, '!')) { - if (!RESIZE(parser, string)) goto error; - COPY(parser, string); + if (!READ(parser, string)) goto error; } else { @@ -3102,19 +2761,19 @@ yaml_parser_scan_tag_handle(yaml_parser_t *parser, int directive, * URI. */ - if (directive && !(string.buffer[0] == '!' && string.buffer[1] == '\0')) { + if (directive && !(string.start[0] == '!' && string.start[1] == '\0')) { yaml_parser_set_scanner_error(parser, "while parsing a tag directive", start_mark, "did not find expected '!'"); goto error; } } - *handle = string.buffer; + *handle = string.start; return 1; error: - yaml_free(string.buffer); + STRING_DEL(parser, string); return 0; } @@ -3127,14 +2786,17 @@ yaml_parser_scan_tag_uri(yaml_parser_t *parser, int directive, yaml_char_t *head, yaml_mark_t start_mark, yaml_char_t **uri) { size_t length = head ? strlen((char *)head) : 0; - yaml_string_t string = yaml_parser_new_string(parser); + yaml_string_t string = NULL_STRING; - if (!string.buffer) goto error; + if (!STRING_INIT(parser, string, INITIAL_STRING_SIZE)) goto error; /* Resize the string to include the head. */ - while (string.size <= length) { - if (!yaml_parser_resize_string(parser, &string)) goto error; + while (string.end - string.start <= length) { + if (!yaml_string_extend(&string.start, &string.pointer, &string.end)) { + parser->error = YAML_MEMORY_ERROR; + goto error; + } } /* @@ -3144,13 +2806,13 @@ yaml_parser_scan_tag_uri(yaml_parser_t *parser, int directive, */ if (length > 1) { - memcpy(string.buffer, head+1, length-1); + memcpy(string.start, head+1, length-1); string.pointer += length-1; } /* Scan the tag. */ - if (!UPDATE(parser, 1)) goto error; + if (!CACHE(parser, 1)) goto error; /* * The set of characters that may appear in URI is as follows: @@ -3168,8 +2830,6 @@ yaml_parser_scan_tag_uri(yaml_parser_t *parser, int directive, CHECK(parser, '\'') || CHECK(parser, '(') || CHECK(parser, ')') || CHECK(parser, '[') || CHECK(parser, ']') || CHECK(parser, '%')) { - if (!RESIZE(parser, string)) goto error; - /* Check if it is a URI-escape sequence. */ if (CHECK(parser, '%')) { @@ -3177,28 +2837,31 @@ yaml_parser_scan_tag_uri(yaml_parser_t *parser, int directive, directive, start_mark, &string)) goto error; } else { - COPY(parser, string); + if (!READ(parser, string)) goto error; } length ++; - if (!UPDATE(parser, 1)) goto error; + if (!CACHE(parser, 1)) goto error; } /* Check if the tag is non-empty. */ if (!length) { + if (!STRING_EXTEND(parser, string)) + goto error; + yaml_parser_set_scanner_error(parser, directive ? "while parsing a %TAG directive" : "while parsing a tag", start_mark, "did not find expected tag URI"); goto error; } - *uri = string.buffer; + *uri = string.start; return 1; error: - yaml_free(string.buffer); + STRING_DEL(parser, string); return 0; } @@ -3220,7 +2883,7 @@ yaml_parser_scan_uri_escapes(yaml_parser_t *parser, int directive, /* Check for a URI-escaped octet. */ - if (!UPDATE(parser, 3)) return 0; + if (!CACHE(parser, 3)) return 0; if (!(CHECK(parser, '%') && IS_HEX_AT(parser, 1) && IS_HEX_AT(parser, 2))) { return yaml_parser_set_scanner_error(parser, directive ? @@ -3260,9 +2923,9 @@ yaml_parser_scan_uri_escapes(yaml_parser_t *parser, int directive, /* Copy the octet and move the pointers. */ *(string->pointer++) = octet; - FORWARD(parser); - FORWARD(parser); - FORWARD(parser); + SKIP(parser); + SKIP(parser); + SKIP(parser); } while (--width); @@ -3273,34 +2936,34 @@ yaml_parser_scan_uri_escapes(yaml_parser_t *parser, int directive, * Scan a block scalar. */ -static yaml_token_t * -yaml_parser_scan_block_scalar(yaml_parser_t *parser, int literal) +static int +yaml_parser_scan_block_scalar(yaml_parser_t *parser, yaml_token_t *token, + int literal) { yaml_mark_t start_mark; yaml_mark_t end_mark; - yaml_string_t string = yaml_parser_new_string(parser); - yaml_string_t leading_break = yaml_parser_new_string(parser); - yaml_string_t trailing_breaks = yaml_parser_new_string(parser); - yaml_token_t *token = NULL; + yaml_string_t string = NULL_STRING; + yaml_string_t leading_break = NULL_STRING; + yaml_string_t trailing_breaks = NULL_STRING; int chomping = 0; int increment = 0; int indent = 0; int leading_blank = 0; int trailing_blank = 0; - if (!string.buffer) goto error; - if (!leading_break.buffer) goto error; - if (!trailing_breaks.buffer) goto error; + if (!STRING_INIT(parser, string, INITIAL_STRING_SIZE)) goto error; + if (!STRING_INIT(parser, leading_break, INITIAL_STRING_SIZE)) goto error; + if (!STRING_INIT(parser, trailing_breaks, INITIAL_STRING_SIZE)) goto error; /* Eat the indicator '|' or '>'. */ - start_mark = yaml_parser_get_mark(parser); + start_mark = parser->mark; - FORWARD(parser); + SKIP(parser); /* Scan the additional block scalar indicators. */ - if (!UPDATE(parser, 1)) goto error; + if (!CACHE(parser, 1)) goto error; /* Check for a chomping indicator. */ @@ -3310,11 +2973,11 @@ yaml_parser_scan_block_scalar(yaml_parser_t *parser, int literal) chomping = CHECK(parser, '+') ? +1 : -1; - FORWARD(parser); + SKIP(parser); /* Check for an indentation indicator. */ - if (!UPDATE(parser, 1)) goto error; + if (!CACHE(parser, 1)) goto error; if (IS_DIGIT(parser)) { @@ -3330,7 +2993,7 @@ yaml_parser_scan_block_scalar(yaml_parser_t *parser, int literal) increment = AS_DIGIT(parser); - FORWARD(parser); + SKIP(parser); } } @@ -3346,29 +3009,30 @@ yaml_parser_scan_block_scalar(yaml_parser_t *parser, int literal) increment = AS_DIGIT(parser); - FORWARD(parser); + SKIP(parser); - if (!UPDATE(parser, 1)) goto error; + if (!CACHE(parser, 1)) goto error; if (CHECK(parser, '+') || CHECK(parser, '-')) { chomping = CHECK(parser, '+') ? +1 : -1; - FORWARD(parser); + + SKIP(parser); } } /* Eat whitespaces and comments to the end of the line. */ - if (!UPDATE(parser, 1)) goto error; + if (!CACHE(parser, 1)) goto error; while (IS_BLANK(parser)) { - FORWARD(parser); - if (!UPDATE(parser, 1)) goto error; + SKIP(parser); + if (!CACHE(parser, 1)) goto error; } if (CHECK(parser, '#')) { while (!IS_BREAKZ(parser)) { - FORWARD(parser); - if (!UPDATE(parser, 1)) goto error; + SKIP(parser); + if (!CACHE(parser, 1)) goto error; } } @@ -3383,11 +3047,11 @@ yaml_parser_scan_block_scalar(yaml_parser_t *parser, int literal) /* Eat a line break. */ if (IS_BREAK(parser)) { - if (!UPDATE(parser, 2)) goto error; - FORWARD_LINE(parser); + if (!CACHE(parser, 2)) goto error; + SKIP_LINE(parser); } - end_mark = yaml_parser_get_mark(parser); + end_mark = parser->mark; /* Set the intendation level if it was specified. */ @@ -3402,9 +3066,9 @@ yaml_parser_scan_block_scalar(yaml_parser_t *parser, int literal) /* Scan the block scalar content. */ - if (!UPDATE(parser, 1)) goto error; + if (!CACHE(parser, 1)) goto error; - while (parser->column == indent && !IS_Z(parser)) + while (parser->mark.column == indent && !IS_Z(parser)) { /* * We are at the beginning of a non-empty line. @@ -3416,25 +3080,27 @@ yaml_parser_scan_block_scalar(yaml_parser_t *parser, int literal) /* Check if we need to fold the leading line break. */ - if (!literal && (*leading_break.buffer == '\n') + if (!literal && (*leading_break.start == '\n') && !leading_blank && !trailing_blank) { /* Do we need to join the lines by space? */ - if (*trailing_breaks.buffer == '\0') { - if (!RESIZE(parser, string)) goto error; + if (*trailing_breaks.start == '\0') { + if (!STRING_EXTEND(parser, string)) goto error; *(string.pointer ++) = ' '; } - yaml_parser_clear_string(parser, &leading_break); + CLEAR(parser, leading_break); } else { if (!JOIN(parser, string, leading_break)) goto error; + CLEAR(parser, leading_break); } /* Append the remaining line breaks. */ if (!JOIN(parser, string, trailing_breaks)) goto error; + CLEAR(parser, trailing_breaks); /* Is it a leading whitespace? */ @@ -3443,16 +3109,15 @@ yaml_parser_scan_block_scalar(yaml_parser_t *parser, int literal) /* Consume the current line. */ while (!IS_BREAKZ(parser)) { - if (!RESIZE(parser, string)) goto error; - COPY(parser, string); - if (!UPDATE(parser, 1)) goto error; + if (!READ(parser, string)) goto error; + if (!CACHE(parser, 1)) goto error; } /* Consume the line break. */ - if (!UPDATE(parser, 2)) goto error; + if (!CACHE(parser, 2)) goto error; - COPY_LINE(parser, leading_break); + if (!READ_LINE(parser, leading_break)) goto error; /* Eat the following intendation spaces and line breaks. */ @@ -3471,25 +3136,21 @@ yaml_parser_scan_block_scalar(yaml_parser_t *parser, int literal) /* Create a token. */ - token = yaml_scalar_token_new(string.buffer, string.pointer-string.buffer, + SCALAR_TOKEN_INIT(*token, string.start, string.pointer-string.start, literal ? YAML_LITERAL_SCALAR_STYLE : YAML_FOLDED_SCALAR_STYLE, start_mark, end_mark); - if (!token) { - parser->error = YAML_MEMORY_ERROR; - return 0; - } - yaml_free(leading_break.buffer); - yaml_free(trailing_breaks.buffer); + STRING_DEL(parser, leading_break); + STRING_DEL(parser, trailing_breaks); - return token; + return 1; error: - yaml_free(string.buffer); - yaml_free(leading_break.buffer); - yaml_free(trailing_breaks.buffer); + STRING_DEL(parser, string); + STRING_DEL(parser, leading_break); + STRING_DEL(parser, trailing_breaks); - return NULL; + return 0; } /* @@ -3504,7 +3165,7 @@ yaml_parser_scan_block_scalar_breaks(yaml_parser_t *parser, { int max_indent = 0; - *end_mark = yaml_parser_get_mark(parser); + *end_mark = parser->mark; /* Eat the intendation spaces and line breaks. */ @@ -3512,19 +3173,19 @@ yaml_parser_scan_block_scalar_breaks(yaml_parser_t *parser, { /* Eat the intendation spaces. */ - if (!UPDATE(parser, 1)) return 0; + if (!CACHE(parser, 1)) return 0; - while ((!*indent || parser->column < *indent) && IS_SPACE(parser)) { - FORWARD(parser); - if (!UPDATE(parser, 1)) return 0; + while ((!*indent || parser->mark.column < *indent) && IS_SPACE(parser)) { + SKIP(parser); + if (!CACHE(parser, 1)) return 0; } - if (parser->column > max_indent) - max_indent = parser->column; + if (parser->mark.column > max_indent) + max_indent = parser->mark.column; /* Check for a tab character messing the intendation. */ - if ((!*indent || parser->column < *indent) && IS_TAB(parser)) { + if ((!*indent || parser->mark.column < *indent) && IS_TAB(parser)) { return yaml_parser_set_scanner_error(parser, "while scanning a block scalar", start_mark, "found a tab character where an intendation space is expected"); } @@ -3535,10 +3196,9 @@ yaml_parser_scan_block_scalar_breaks(yaml_parser_t *parser, /* Consume the line break. */ - if (!UPDATE(parser, 2)) return 0; - if (!RESIZE(parser, *breaks)) return 0; - COPY_LINE(parser, *breaks); - *end_mark = yaml_parser_get_mark(parser); + if (!CACHE(parser, 2)) return 0; + if (!READ_LINE(parser, *breaks)) return 0; + *end_mark = parser->mark; } /* Determine the indentation level if needed. */ @@ -3558,28 +3218,28 @@ yaml_parser_scan_block_scalar_breaks(yaml_parser_t *parser, * Scan a quoted scalar. */ -static yaml_token_t * -yaml_parser_scan_flow_scalar(yaml_parser_t *parser, int single) +static int +yaml_parser_scan_flow_scalar(yaml_parser_t *parser, yaml_token_t *token, + int single) { yaml_mark_t start_mark; yaml_mark_t end_mark; - yaml_string_t string = yaml_parser_new_string(parser); - yaml_string_t leading_break = yaml_parser_new_string(parser); - yaml_string_t trailing_breaks = yaml_parser_new_string(parser); - yaml_string_t whitespaces = yaml_parser_new_string(parser); - yaml_token_t *token = NULL; + yaml_string_t string = NULL_STRING; + yaml_string_t leading_break = NULL_STRING; + yaml_string_t trailing_breaks = NULL_STRING; + yaml_string_t whitespaces = NULL_STRING; int leading_blanks; - if (!string.buffer) goto error; - if (!leading_break.buffer) goto error; - if (!trailing_breaks.buffer) goto error; - if (!whitespaces.buffer) goto error; + if (!STRING_INIT(parser, string, INITIAL_STRING_SIZE)) goto error; + if (!STRING_INIT(parser, leading_break, INITIAL_STRING_SIZE)) goto error; + if (!STRING_INIT(parser, trailing_breaks, INITIAL_STRING_SIZE)) goto error; + if (!STRING_INIT(parser, whitespaces, INITIAL_STRING_SIZE)) goto error; /* Eat the left quote. */ - start_mark = yaml_parser_get_mark(parser); + start_mark = parser->mark; - FORWARD(parser); + SKIP(parser); /* Consume the content of the quoted scalar. */ @@ -3587,9 +3247,9 @@ yaml_parser_scan_flow_scalar(yaml_parser_t *parser, int single) { /* Check that there are no document indicators at the beginning of the line. */ - if (!UPDATE(parser, 4)) goto error; + if (!CACHE(parser, 4)) goto error; - if (parser->column == 0 && + if (parser->mark.column == 0 && ((CHECK_AT(parser, '-', 0) && CHECK_AT(parser, '-', 1) && CHECK_AT(parser, '-', 2)) || @@ -3613,8 +3273,7 @@ yaml_parser_scan_flow_scalar(yaml_parser_t *parser, int single) /* Consume non-blank characters. */ - if (!UPDATE(parser, 2)) goto error; - if (!RESIZE(parser, string)) goto error; + if (!CACHE(parser, 2)) goto error; leading_blanks = 0; @@ -3624,9 +3283,10 @@ yaml_parser_scan_flow_scalar(yaml_parser_t *parser, int single) if (single && CHECK_AT(parser, '\'', 0) && CHECK_AT(parser, '\'', 1)) { + if (!STRING_EXTEND(parser, string)) goto error; *(string.pointer++) = '\''; - FORWARD(parser); - FORWARD(parser); + SKIP(parser); + SKIP(parser); } /* Check for the right quote. */ @@ -3640,9 +3300,9 @@ yaml_parser_scan_flow_scalar(yaml_parser_t *parser, int single) else if (!single && CHECK(parser, '\\') && IS_BREAK_AT(parser, 1)) { - if (!UPDATE(parser, 3)) goto error; - FORWARD(parser); - FORWARD_LINE(parser); + if (!CACHE(parser, 3)) goto error; + SKIP(parser); + SKIP_LINE(parser); leading_blanks = 1; break; } @@ -3653,9 +3313,11 @@ yaml_parser_scan_flow_scalar(yaml_parser_t *parser, int single) { int code_length = 0; + if (!STRING_EXTEND(parser, string)) goto error; + /* Check the escape character. */ - switch (parser->pointer[1]) + switch (parser->buffer.pointer[1]) { case '0': *(string.pointer++) = '\0'; @@ -3750,8 +3412,8 @@ yaml_parser_scan_flow_scalar(yaml_parser_t *parser, int single) goto error; } - FORWARD(parser); - FORWARD(parser); + SKIP(parser); + SKIP(parser); /* Consume an arbitrary escape code. */ @@ -3762,7 +3424,7 @@ yaml_parser_scan_flow_scalar(yaml_parser_t *parser, int single) /* Scan the character value. */ - if (!UPDATE(parser, code_length)) goto error; + if (!CACHE(parser, code_length)) goto error; for (k = 0; k < code_length; k ++) { if (!IS_HEX_AT(parser, k)) { @@ -3803,7 +3465,7 @@ yaml_parser_scan_flow_scalar(yaml_parser_t *parser, int single) /* Advance the pointer. */ for (k = 0; k < code_length; k ++) { - FORWARD(parser); + SKIP(parser); } } } @@ -3812,11 +3474,10 @@ yaml_parser_scan_flow_scalar(yaml_parser_t *parser, int single) { /* It is a non-escaped non-blank character. */ - COPY(parser, string); + if (!READ(parser, string)) goto error; } - if (!UPDATE(parser, 2)) goto error; - if (!RESIZE(parser, string)) goto error; + if (!CACHE(parser, 2)) goto error; } /* Check if we are at the end of the scalar. */ @@ -3826,7 +3487,7 @@ yaml_parser_scan_flow_scalar(yaml_parser_t *parser, int single) /* Consume blank characters. */ - if (!UPDATE(parser, 1)) goto error; + if (!CACHE(parser, 1)) goto error; while (IS_BLANK(parser) || IS_BREAK(parser)) { @@ -3835,116 +3496,112 @@ yaml_parser_scan_flow_scalar(yaml_parser_t *parser, int single) /* Consume a space or a tab character. */ if (!leading_blanks) { - if (!RESIZE(parser, whitespaces)) goto error; - COPY(parser, whitespaces); + if (!READ(parser, whitespaces)) goto error; } else { - FORWARD(parser); + SKIP(parser); } } else { - if (!UPDATE(parser, 2)) goto error; + if (!CACHE(parser, 2)) goto error; /* Check if it is a first line break. */ if (!leading_blanks) { - yaml_parser_clear_string(parser, &whitespaces); - COPY_LINE(parser, leading_break); + CLEAR(parser, whitespaces); + if (!READ_LINE(parser, leading_break)) goto error; leading_blanks = 1; } else { - if (!RESIZE(parser, trailing_breaks)) goto error; - COPY_LINE(parser, trailing_breaks); + if (!READ_LINE(parser, trailing_breaks)) goto error; } } - if (!UPDATE(parser, 1)) goto error; + if (!CACHE(parser, 1)) goto error; } /* Join the whitespaces or fold line breaks. */ - if (!RESIZE(parser, string)) goto error; - if (leading_blanks) { /* Do we need to fold line breaks? */ - if (leading_break.buffer[0] == '\n') { - if (trailing_breaks.buffer[0] == '\0') { + if (leading_break.start[0] == '\n') { + if (trailing_breaks.start[0] == '\0') { + if (!STRING_EXTEND(parser, string)) goto error; *(string.pointer++) = ' '; } else { if (!JOIN(parser, string, trailing_breaks)) goto error; + CLEAR(parser, trailing_breaks); } - yaml_parser_clear_string(parser, &leading_break); + CLEAR(parser, leading_break); } else { if (!JOIN(parser, string, leading_break)) goto error; if (!JOIN(parser, string, trailing_breaks)) goto error; + CLEAR(parser, leading_break); + CLEAR(parser, trailing_breaks); } } else { if (!JOIN(parser, string, whitespaces)) goto error; + CLEAR(parser, whitespaces); } } /* Eat the right quote. */ - FORWARD(parser); + SKIP(parser); - end_mark = yaml_parser_get_mark(parser); + end_mark = parser->mark; /* Create a token. */ - token = yaml_scalar_token_new(string.buffer, string.pointer-string.buffer, + SCALAR_TOKEN_INIT(*token, string.start, string.pointer-string.start, single ? YAML_SINGLE_QUOTED_SCALAR_STYLE : YAML_DOUBLE_QUOTED_SCALAR_STYLE, start_mark, end_mark); - if (!token) { - parser->error = YAML_MEMORY_ERROR; - return 0; - } - yaml_free(leading_break.buffer); - yaml_free(trailing_breaks.buffer); - yaml_free(whitespaces.buffer); + STRING_DEL(parser, leading_break); + STRING_DEL(parser, trailing_breaks); + STRING_DEL(parser, whitespaces); - return token; + return 1; error: - yaml_free(string.buffer); - yaml_free(leading_break.buffer); - yaml_free(trailing_breaks.buffer); - yaml_free(whitespaces.buffer); + STRING_DEL(parser, string); + STRING_DEL(parser, leading_break); + STRING_DEL(parser, trailing_breaks); + STRING_DEL(parser, whitespaces); - return NULL; + return 0; } /* * Scan a plain scalar. */ -static yaml_token_t * -yaml_parser_scan_plain_scalar(yaml_parser_t *parser) +static int +yaml_parser_scan_plain_scalar(yaml_parser_t *parser, yaml_token_t *token) { yaml_mark_t start_mark; yaml_mark_t end_mark; - yaml_string_t string = yaml_parser_new_string(parser); - yaml_string_t leading_break = yaml_parser_new_string(parser); - yaml_string_t trailing_breaks = yaml_parser_new_string(parser); - yaml_string_t whitespaces = yaml_parser_new_string(parser); - yaml_token_t *token = NULL; + yaml_string_t string = NULL_STRING; + yaml_string_t leading_break = NULL_STRING; + yaml_string_t trailing_breaks = NULL_STRING; + yaml_string_t whitespaces = NULL_STRING; int leading_blanks = 0; int indent = parser->indent+1; - if (!string.buffer) goto error; - if (!leading_break.buffer) goto error; - if (!trailing_breaks.buffer) goto error; - if (!whitespaces.buffer) goto error; + if (!STRING_INIT(parser, string, INITIAL_STRING_SIZE)) goto error; + if (!STRING_INIT(parser, leading_break, INITIAL_STRING_SIZE)) goto error; + if (!STRING_INIT(parser, trailing_breaks, INITIAL_STRING_SIZE)) goto error; + if (!STRING_INIT(parser, whitespaces, INITIAL_STRING_SIZE)) goto error; - start_mark = yaml_parser_get_mark(parser); + start_mark = parser->mark; /* Consume the content of the plain scalar. */ @@ -3952,9 +3609,9 @@ yaml_parser_scan_plain_scalar(yaml_parser_t *parser) { /* Check for a document indicator. */ - if (!UPDATE(parser, 4)) goto error; + if (!CACHE(parser, 4)) goto error; - if (parser->column == 0 && + if (parser->mark.column == 0 && ((CHECK_AT(parser, '-', 0) && CHECK_AT(parser, '-', 1) && CHECK_AT(parser, '-', 2)) || @@ -3992,26 +3649,28 @@ yaml_parser_scan_plain_scalar(yaml_parser_t *parser) /* Check if we need to join whitespaces and breaks. */ - if (leading_blanks || whitespaces.buffer != whitespaces.pointer) + if (leading_blanks || whitespaces.start != whitespaces.pointer) { - if (!RESIZE(parser, string)) goto error; - if (leading_blanks) { /* Do we need to fold line breaks? */ - if (leading_break.buffer[0] == '\n') { - if (trailing_breaks.buffer[0] == '\0') { + if (leading_break.start[0] == '\n') { + if (trailing_breaks.start[0] == '\0') { + if (!STRING_EXTEND(parser, string)) goto error; *(string.pointer++) = ' '; } else { if (!JOIN(parser, string, trailing_breaks)) goto error; + CLEAR(parser, trailing_breaks); } - yaml_parser_clear_string(parser, &leading_break); + CLEAR(parser, leading_break); } else { if (!JOIN(parser, string, leading_break)) goto error; if (!JOIN(parser, string, trailing_breaks)) goto error; + CLEAR(parser, leading_break); + CLEAR(parser, trailing_breaks); } leading_blanks = 0; @@ -4019,18 +3678,17 @@ yaml_parser_scan_plain_scalar(yaml_parser_t *parser) else { if (!JOIN(parser, string, whitespaces)) goto error; + CLEAR(parser, whitespaces); } } /* Copy the character. */ - if (!RESIZE(parser, string)) goto error; - - COPY(parser, string); + if (!READ(parser, string)) goto error; - end_mark = yaml_parser_get_mark(parser); + end_mark = parser->mark; - if (!UPDATE(parser, 2)) goto error; + if (!CACHE(parser, 2)) goto error; } /* Is it the end? */ @@ -4040,7 +3698,7 @@ yaml_parser_scan_plain_scalar(yaml_parser_t *parser) /* Consume blank characters. */ - if (!UPDATE(parser, 1)) goto error; + if (!CACHE(parser, 1)) goto error; while (IS_BLANK(parser) || IS_BREAK(parser)) { @@ -4048,7 +3706,7 @@ yaml_parser_scan_plain_scalar(yaml_parser_t *parser) { /* Check for tab character that abuse intendation. */ - if (leading_blanks && parser->column < indent && IS_TAB(parser)) { + if (leading_blanks && parser->mark.column < indent && IS_TAB(parser)) { yaml_parser_set_scanner_error(parser, "while scanning a plain scalar", start_mark, "found a tab character that violate intendation"); goto error; @@ -4057,48 +3715,42 @@ yaml_parser_scan_plain_scalar(yaml_parser_t *parser) /* Consume a space or a tab character. */ if (!leading_blanks) { - if (!RESIZE(parser, whitespaces)) goto error; - COPY(parser, whitespaces); + if (!READ(parser, whitespaces)) goto error; } else { - FORWARD(parser); + SKIP(parser); } } else { - if (!UPDATE(parser, 2)) goto error; + if (!CACHE(parser, 2)) goto error; /* Check if it is a first line break. */ if (!leading_blanks) { - yaml_parser_clear_string(parser, &whitespaces); - COPY_LINE(parser, leading_break); + CLEAR(parser, whitespaces); + if (!READ_LINE(parser, leading_break)) goto error; leading_blanks = 1; } else { - if (!RESIZE(parser, trailing_breaks)) goto error; - COPY_LINE(parser, trailing_breaks); + if (!READ_LINE(parser, trailing_breaks)) goto error; } } - if (!UPDATE(parser, 1)) goto error; + if (!CACHE(parser, 1)) goto error; } /* Check intendation level. */ - if (!parser->flow_level && parser->column < indent) + if (!parser->flow_level && parser->mark.column < indent) break; } /* Create a token. */ - token = yaml_scalar_token_new(string.buffer, string.pointer-string.buffer, + SCALAR_TOKEN_INIT(*token, string.start, string.pointer-string.start, YAML_PLAIN_SCALAR_STYLE, start_mark, end_mark); - if (!token) { - parser->error = YAML_MEMORY_ERROR; - return 0; - } /* Note that we change the 'simple_key_allowed' flag. */ @@ -4106,18 +3758,18 @@ yaml_parser_scan_plain_scalar(yaml_parser_t *parser) parser->simple_key_allowed = 1; } - yaml_free(leading_break.buffer); - yaml_free(trailing_breaks.buffer); - yaml_free(whitespaces.buffer); + STRING_DEL(parser, leading_break); + STRING_DEL(parser, trailing_breaks); + STRING_DEL(parser, whitespaces); - return token; + return 1; error: - yaml_free(string.buffer); - yaml_free(leading_break.buffer); - yaml_free(trailing_breaks.buffer); - yaml_free(whitespaces.buffer); + STRING_DEL(parser, string); + STRING_DEL(parser, leading_break); + STRING_DEL(parser, trailing_breaks); + STRING_DEL(parser, whitespaces); - return NULL; + return 0; } diff --git a/src/yaml_private.h b/src/yaml_private.h new file mode 100644 index 00000000..d7ac644d --- /dev/null +++ b/src/yaml_private.h @@ -0,0 +1,205 @@ + +#if HAVE_CONFIG_H +#include +#endif + +#include + +#include + +/* + * Memory management. + */ + +YAML_DECLARE(void *) +yaml_malloc(size_t size); + +YAML_DECLARE(void *) +yaml_realloc(void *ptr, size_t size); + +YAML_DECLARE(void) +yaml_free(void *ptr); + +YAML_DECLARE(char *) +yaml_strdup(const char *); + +/* + * Reader: Ensure that the buffer contains at least `length` characters. + */ + +YAML_DECLARE(int) +yaml_parser_update_buffer(yaml_parser_t *parser, size_t length); + +/* + * Scanner: Ensure that the token stack contains at least one token ready. + */ + +YAML_DECLARE(int) +yaml_parser_fetch_more_tokens(yaml_parser_t *parser); + +/* + * The size of the raw buffer. + */ + +#define RAW_BUFFER_SIZE 16384 + +/* + * The size of the buffer. + * + * It should be possible to decode the whole raw buffer. + */ + +#define BUFFER_SIZE (RAW_BUFFER_SIZE*3) + +/* + * The size of other stacks and queues. + */ + +#define INITIAL_STACK_SIZE 16 +#define INITIAL_QUEUE_SIZE 16 +#define INITIAL_STRING_SIZE 16 + +/* + * Buffer management. + */ + +#define BUFFER_INIT(context,buffer,size) \ + (((buffer).start = yaml_malloc(size)) ? \ + ((buffer).last = (buffer).pointer = (buffer).start, \ + (buffer).end = (buffer).start+(size), \ + 1) : \ + ((context)->error = YAML_MEMORY_ERROR, \ + 0)) + +#define BUFFER_DEL(context,buffer) \ + (yaml_free((buffer).start), \ + (buffer).start = (buffer).pointer = (buffer).end = 0) + +/* + * String management. + */ + +typedef struct { + yaml_char_t *start; + yaml_char_t *end; + yaml_char_t *pointer; +} yaml_string_t; + +YAML_DECLARE(int) +yaml_string_extend(yaml_char_t **start, + yaml_char_t **pointer, yaml_char_t **end); + +YAML_DECLARE(int) +yaml_string_join( + yaml_char_t **a_start, yaml_char_t **a_pointer, yaml_char_t **a_end, + yaml_char_t **b_start, yaml_char_t **b_pointer, yaml_char_t **b_end); + +#define NULL_STRING { NULL, NULL, NULL } + +#define STRING_INIT(context,string,size) \ + (((string).start = yaml_malloc(size)) ? \ + ((string).pointer = (string).start, \ + (string).end = (string).start+(size), \ + memset((string).start, 0, (size)), \ + 1) : \ + ((context)->error = YAML_MEMORY_ERROR, \ + 0)) + +#define STRING_DEL(context,string) \ + (yaml_free((string).start), \ + (string).start = (string).pointer = (string).end = 0) + +#define STRING_EXTEND(context,string) \ + (((string).pointer+5 < (string).end) \ + || yaml_string_extend(&(string).start, \ + &(string).pointer, &(string).end)) + +#define CLEAR(context,string) \ + ((string).pointer = (string).start, \ + memset((string).start, 0, (string).end-(string).start)) + +#define JOIN(context,string_a,string_b) \ + ((yaml_string_join(&(string_a).start, &(string_a).pointer, \ + &(string_a).end, &(string_b).start, \ + &(string_b).pointer, &(string_b).end)) ? \ + ((string_b).pointer = (string_b).start, \ + 1) : \ + ((context)->error = YAML_MEMORY_ERROR, \ + 0)) + +/* + * Stack and queue management. + */ + +YAML_DECLARE(int) +yaml_stack_extend(void **start, void **top, void **end); + +YAML_DECLARE(int) +yaml_queue_extend(void **start, void **head, void **tail, void **end); + +#define STACK_INIT(context,stack,size) \ + (((stack).start = yaml_malloc((size)*sizeof(*(stack).start))) ? \ + ((stack).top = (stack).start, \ + (stack).end = (stack).start+(size), \ + 1) : \ + ((context)->error = YAML_MEMORY_ERROR, \ + 0)) + +#define STACK_DEL(context,stack) \ + (yaml_free((stack).start), \ + (stack).start = (stack).top = (stack).end = 0) + +#define STACK_EMPTY(context,stack) \ + ((stack).start == (stack).top) + +#define PUSH(context,stack,value) \ + (((stack).top != (stack).end \ + || yaml_stack_extend((void **)&(stack).start, \ + (void **)&(stack).top, (void **)&(stack).end)) ? \ + (*((stack).top++) = value, \ + 1) : \ + ((context)->error = YAML_MEMORY_ERROR, \ + 0)) + +#define POP(context,stack) \ + (*(--(stack).top)) + +#define QUEUE_INIT(context,queue,size) \ + (((queue).start = yaml_malloc((size)*sizeof(*(queue).start))) ? \ + ((queue).head = (queue).tail = (queue).start, \ + (queue).end = (queue).start+(size), \ + 1) : \ + ((context)->error = YAML_MEMORY_ERROR, \ + 0)) + +#define QUEUE_DEL(context,queue) \ + (yaml_free((queue).start), \ + (queue).start = (queue).head = (queue).tail = (queue).end = 0) + +#define QUEUE_EMPTY(context,queue) \ + ((queue).head == (queue).tail) + +#define ENQUEUE(context,queue,value) \ + (((queue).tail != (queue).end \ + || yaml_queue_extend((void **)&(queue).start, (void **)&(queue).head, \ + (void **)&(queue).tail, (void **)&(queue).end)) ? \ + (*((queue).tail++) = value, \ + 1) : \ + ((context)->error = YAML_MEMORY_ERROR, \ + 0)) + +#define DEQUEUE(context,queue) \ + (*((queue).head++)) + +#define QUEUE_INSERT(context,queue,index,value) \ + (((queue).tail != (queue).end \ + || yaml_queue_extend((void **)&(queue).start, (void **)&(queue).head, \ + (void **)&(queue).tail, (void **)&(queue).end)) ? \ + (memmove((queue).head+(index)+1,(queue).head+(index), \ + ((queue).tail-(queue).head-(index))*sizeof(*(queue).start)), \ + *((queue).head+(index)) = value, \ + (queue).tail++, \ + 1) : \ + ((context)->error = YAML_MEMORY_ERROR, \ + 0)) + diff --git a/tests/Makefile.am b/tests/Makefile.am index 8f699070..e2f9e9c0 100644 --- a/tests/Makefile.am +++ b/tests/Makefile.am @@ -1,4 +1,4 @@ AM_CPPFLAGS = -I$(top_srcdir)/include LDADD = $(top_builddir)/src/libyaml.la TESTS = test-version test-reader -check_PROGRAMS = test-version test-reader +check_PROGRAMS = test-version test-reader run-scanner run-parser diff --git a/tests/run-parser.c b/tests/run-parser.c new file mode 100644 index 00000000..16f4ce15 --- /dev/null +++ b/tests/run-parser.c @@ -0,0 +1,46 @@ +#include + +#include +#include +#include + +int +main(int argc, char *argv[]) +{ + FILE *file; + yaml_parser_t parser; + yaml_event_t event; + int done = 0; + int count = 0; + + if (argc != 2) { + printf("Usage: %s file.yaml\n", argv[0]); + return 0; + } + file = fopen(argv[1], "rb"); + assert(file); + + assert(yaml_parser_initialize(&parser)); + + yaml_parser_set_input_file(&parser, file); + + while (!done) + { + assert(yaml_parser_parse(&parser, &event)); + + done = (event.type == YAML_STREAM_END_EVENT); + + yaml_event_delete(&event); + + count ++; + } + + yaml_parser_delete(&parser); + + fclose(file); + + printf("Parsing the file '%s': %d events\n", argv[1], count); + + return 0; +} + diff --git a/tests/run-scanner.c b/tests/run-scanner.c new file mode 100644 index 00000000..e3a67f29 --- /dev/null +++ b/tests/run-scanner.c @@ -0,0 +1,46 @@ +#include + +#include +#include +#include + +int +main(int argc, char *argv[]) +{ + FILE *file; + yaml_parser_t parser; + yaml_token_t token; + int done = 0; + int count = 0; + + if (argc != 2) { + printf("Usage: %s file.yaml\n", argv[0]); + return 0; + } + file = fopen(argv[1], "rb"); + assert(file); + + assert(yaml_parser_initialize(&parser)); + + yaml_parser_set_input_file(&parser, file); + + while (!done) + { + assert(yaml_parser_scan(&parser, &token)); + + done = (token.type == YAML_STREAM_END_TOKEN); + + yaml_token_delete(&token); + + count ++; + } + + yaml_parser_delete(&parser); + + fclose(file); + + printf("Parsing the file '%s': %d tokens\n", argv[1], count); + + return 0; +} + diff --git a/tests/test-reader.c b/tests/test-reader.c index c5ce2790..49dc8746 100644 --- a/tests/test-reader.c +++ b/tests/test-reader.c @@ -1,5 +1,8 @@ #include +YAML_DECLARE(int) +yaml_parser_update_buffer(yaml_parser_t *parser, size_t length); + #include #include #include @@ -101,14 +104,15 @@ test_case boms[] = { {"no bom (utf-8)", "Hi is \xd0\x9f\xd1\x80\xd0\xb8\xd0\xb2\xd0\xb5\xd1\x82!", 13}, {"bom (utf-8)", "\xef\xbb\xbfHi is \xd0\x9f\xd1\x80\xd0\xb8\xd0\xb2\xd0\xb5\xd1\x82!", 13}, {"bom (utf-16-le)", "\xff\xfeH\x00i\x00 \x00i\x00s\x00 \x00\x1f\x04@\x04""8\x04""2\x04""5\x04""B\x04!", 13}, - {"bom (utf-16-be)", "\xfe\xff\x00H\x00i\x00 \x00i\x00s\x00 \x04\x1f\x04@\x04""8\x04""2\x04""5\x04""B!", 13} + {"bom (utf-16-be)", "\xfe\xff\x00H\x00i\x00 \x00i\x00s\x00 \x04\x1f\x04@\x04""8\x04""2\x04""5\x04""B!", 13}, + {NULL, NULL, 0} }; char *bom_original = "Hi is \xd0\x9f\xd1\x80\xd0\xb8\xd0\xb2\xd0\xb5\xd1\x82"; int check_utf8_sequences(void) { - yaml_parser_t *parser; + yaml_parser_t parser; int failed = 0; int k; printf("checking utf-8 sequences...\n"); @@ -121,10 +125,9 @@ int check_utf8_sequences(void) printf("\t%s:\n", title); while(1) { while (*end != '|' && *end != '!') end++; - parser = yaml_parser_new(); - assert(parser); - yaml_parser_set_input_string(parser, (unsigned char *)start, end-start); - result = yaml_parser_update_buffer(parser, end-start); + yaml_parser_initialize(&parser); + yaml_parser_set_input_string(&parser, (unsigned char *)start, end-start); + result = yaml_parser_update_buffer(&parser, end-start); if (result != check) { printf("\t\t- "); failed ++; @@ -132,22 +135,22 @@ int check_utf8_sequences(void) else { printf("\t\t+ "); } - if (!parser->error) { + if (!parser.error) { printf("(no error)\n"); } - else if (parser->error == YAML_READER_ERROR) { - if (parser->problem_value != -1) { + else if (parser.error == YAML_READER_ERROR) { + if (parser.problem_value != -1) { printf("(reader error: %s: #%X at %d)\n", - parser->problem, parser->problem_value, parser->problem_offset); + parser.problem, parser.problem_value, parser.problem_offset); } else { printf("(reader error: %s at %d)\n", - parser->problem, parser->problem_offset); + parser.problem, parser.problem_offset); } } if (*end == '!') break; start = ++end; - yaml_parser_delete(parser); + yaml_parser_delete(&parser); }; printf("\n"); } @@ -157,7 +160,7 @@ int check_utf8_sequences(void) int check_boms(void) { - yaml_parser_t *parser; + yaml_parser_t parser; int failed = 0; int k; printf("checking boms...\n"); @@ -169,28 +172,27 @@ int check_boms(void) char *end = start; while (*end != '!') end++; printf("\t%s: ", title); - parser = yaml_parser_new(); - assert(parser); - yaml_parser_set_input_string(parser, (unsigned char *)start, end-start); - result = yaml_parser_update_buffer(parser, end-start); + yaml_parser_initialize(&parser); + yaml_parser_set_input_string(&parser, (unsigned char *)start, end-start); + result = yaml_parser_update_buffer(&parser, end-start); if (!result) { - printf("- (reader error: %s at %d)\n", parser->problem, parser->problem_offset); + printf("- (reader error: %s at %d)\n", parser.problem, parser.problem_offset); failed++; } else { - if (parser->unread != check) { - printf("- (length=%d while expected length=%d)\n", parser->unread, check); + if (parser.unread != check) { + printf("- (length=%d while expected length=%d)\n", parser.unread, check); failed++; } - else if (memcmp(parser->buffer, bom_original, check) != 0) { - printf("- (value '%s' does not equal to the original value '%s')\n", parser->buffer, bom_original); + else if (memcmp(parser.buffer.start, bom_original, check) != 0) { + printf("- (value '%s' does not equal to the original value '%s')\n", parser.buffer.start, bom_original); failed++; } else { printf("+\n"); } } - yaml_parser_delete(parser); + yaml_parser_delete(&parser); } printf("checking boms: %d fail(s)\n", failed); return failed; @@ -200,7 +202,7 @@ int check_boms(void) int check_long_utf8(void) { - yaml_parser_t *parser; + yaml_parser_t parser; int k = 0; int j; int failed = 0; @@ -221,18 +223,17 @@ int check_long_utf8(void) buffer[k++] = '\xaf'; } } - parser = yaml_parser_new(); - assert(parser); - yaml_parser_set_input_string(parser, buffer, 3+LONG*2); + yaml_parser_initialize(&parser); + yaml_parser_set_input_string(&parser, buffer, 3+LONG*2); for (k = 0; k < LONG; k++) { - if (!parser->unread) { - if (!yaml_parser_update_buffer(parser, 1)) { - printf("\treader error: %s at %d\n", parser->problem, parser->problem_offset); + if (!parser.unread) { + if (!yaml_parser_update_buffer(&parser, 1)) { + printf("\treader error: %s at %d\n", parser.problem, parser.problem_offset); failed = 1; break; } } - if (!parser->unread) { + if (!parser.unread) { printf("\tnot enough characters at %d\n", k); failed = 1; break; @@ -245,27 +246,27 @@ int check_long_utf8(void) ch0 = '\xd0'; ch1 = '\xaf'; } - if (parser->pointer[0] != ch0 || parser->pointer[1] != ch1) { + if (parser.buffer.pointer[0] != ch0 || parser.buffer.pointer[1] != ch1) { printf("\tincorrect UTF-8 sequence: %X %X instead of %X %X\n", - (int)parser->pointer[0], (int)parser->pointer[1], + (int)parser.buffer.pointer[0], (int)parser.buffer.pointer[1], (int)ch0, (int)ch1); failed = 1; break; } - parser->pointer += 2; - parser->unread -= 1; + parser.buffer.pointer += 2; + parser.unread -= 1; } if (!failed) { - if (!yaml_parser_update_buffer(parser, 1)) { - printf("\treader error: %s at %d\n", parser->problem, parser->problem_offset); + if (!yaml_parser_update_buffer(&parser, 1)) { + printf("\treader error: %s at %d\n", parser.problem, parser.problem_offset); failed = 1; } - else if (parser->pointer[0] != '\0') { - printf("\texpected NUL, found %X (eof=%d, unread=%d)\n", (int)parser->pointer[0], parser->eof, parser->unread); + else if (parser.buffer.pointer[0] != '\0') { + printf("\texpected NUL, found %X (eof=%d, unread=%d)\n", (int)parser.buffer.pointer[0], parser.eof, parser.unread); failed = 1; } } - yaml_parser_delete(parser); + yaml_parser_delete(&parser); free(buffer); printf("checking a long utf8 sequence: %d fail(s)\n", failed); return failed; @@ -273,7 +274,7 @@ int check_long_utf8(void) int check_long_utf16(void) { - yaml_parser_t *parser; + yaml_parser_t parser; int k = 0; int j; int failed = 0; @@ -293,18 +294,17 @@ int check_long_utf16(void) buffer[k++] = '\x04'; } } - parser = yaml_parser_new(); - assert(parser); - yaml_parser_set_input_string(parser, buffer, 2+LONG*2); + yaml_parser_initialize(&parser); + yaml_parser_set_input_string(&parser, buffer, 2+LONG*2); for (k = 0; k < LONG; k++) { - if (!parser->unread) { - if (!yaml_parser_update_buffer(parser, 1)) { - printf("\treader error: %s at %d\n", parser->problem, parser->problem_offset); + if (!parser.unread) { + if (!yaml_parser_update_buffer(&parser, 1)) { + printf("\treader error: %s at %d\n", parser.problem, parser.problem_offset); failed = 1; break; } } - if (!parser->unread) { + if (!parser.unread) { printf("\tnot enough characters at %d\n", k); failed = 1; break; @@ -317,27 +317,27 @@ int check_long_utf16(void) ch0 = '\xd0'; ch1 = '\xaf'; } - if (parser->pointer[0] != ch0 || parser->pointer[1] != ch1) { + if (parser.buffer.pointer[0] != ch0 || parser.buffer.pointer[1] != ch1) { printf("\tincorrect UTF-8 sequence: %X %X instead of %X %X\n", - (int)parser->pointer[0], (int)parser->pointer[1], + (int)parser.buffer.pointer[0], (int)parser.buffer.pointer[1], (int)ch0, (int)ch1); failed = 1; break; } - parser->pointer += 2; - parser->unread -= 1; + parser.buffer.pointer += 2; + parser.unread -= 1; } if (!failed) { - if (!yaml_parser_update_buffer(parser, 1)) { - printf("\treader error: %s at %d\n", parser->problem, parser->problem_offset); + if (!yaml_parser_update_buffer(&parser, 1)) { + printf("\treader error: %s at %d\n", parser.problem, parser.problem_offset); failed = 1; } - else if (parser->pointer[0] != '\0') { - printf("\texpected NUL, found %X (eof=%d, unread=%d)\n", (int)parser->pointer[0], parser->eof, parser->unread); + else if (parser.buffer.pointer[0] != '\0') { + printf("\texpected NUL, found %X (eof=%d, unread=%d)\n", (int)parser.buffer.pointer[0], parser.eof, parser.unread); failed = 1; } } - yaml_parser_delete(parser); + yaml_parser_delete(&parser); free(buffer); printf("checking a long utf16 sequence: %d fail(s)\n", failed); return failed; diff --git a/tests/test-version.c b/tests/test-version.c index ab4f93c2..5982f7d7 100644 --- a/tests/test-version.c +++ b/tests/test-version.c @@ -16,5 +16,10 @@ main(void) sprintf(buf, "%d.%d.%d", major, minor, patch); assert(strcmp(buf, yaml_get_version_string()) == 0); + /* Print structure sizes. */ + printf("sizeof(token) = %d\n", sizeof(yaml_token_t)); + printf("sizeof(event) = %d\n", sizeof(yaml_event_t)); + printf("sizeof(parser) = %d\n", sizeof(yaml_parser_t)); + return 0; } From df1c95466e91343f55c48c064a7c3e6fee3ec3a0 Mon Sep 17 00:00:00 2001 From: Kirill Simonov Date: Fri, 21 Jul 2006 18:29:00 +0000 Subject: [PATCH 20/73] Fix some leaks, segfaults and warnings. --- src/api.c | 6 +++++ src/parser.c | 9 +++++-- src/reader.c | 9 ++++--- src/scanner.c | 2 +- tests/run-parser.c | 57 ++++++++++++++++++++++++++++----------------- tests/run-scanner.c | 57 ++++++++++++++++++++++++++++----------------- 6 files changed, 90 insertions(+), 50 deletions(-) diff --git a/src/api.c b/src/api.c index 0dd3c424..23f4ff20 100644 --- a/src/api.c +++ b/src/api.c @@ -368,6 +368,9 @@ yaml_token_delete(yaml_token_t *token) case YAML_SCALAR_TOKEN: yaml_free(token->data.scalar.value); break; + + default: + break; } memset(token, 0, sizeof(yaml_token_t)); @@ -416,6 +419,9 @@ yaml_event_delete(yaml_event_t *event) yaml_free(event->data.mapping_start.anchor); yaml_free(event->data.mapping_start.tag); break; + + default: + break; } memset(event, 0, sizeof(yaml_event_t)); diff --git a/src/parser.c b/src/parser.c index fe9e1719..2b81b0c2 100644 --- a/src/parser.c +++ b/src/parser.c @@ -363,6 +363,8 @@ yaml_parser_state_machine(yaml_parser_t *parser, yaml_event_t *event) default: assert(1); /* Invalid state. */ } + + return 0; } /* @@ -1358,12 +1360,16 @@ yaml_parser_process_directives(yaml_parser_t *parser, if (tag_directives_start_ref) { if (STACK_EMPTY(parser, tag_directives)) { *tag_directives_start_ref = *tag_directives_end_ref = NULL; + STACK_DEL(parser, tag_directives); } else { *tag_directives_start_ref = tag_directives.start; - *tag_directives_end_ref = tag_directives.end; + *tag_directives_end_ref = tag_directives.top; } } + else { + STACK_DEL(parser, tag_directives); + } return 1; @@ -1384,7 +1390,6 @@ yaml_parser_append_tag_directive(yaml_parser_t *parser, { yaml_tag_directive_t *tag_directive; yaml_tag_directive_t copy = { NULL, NULL }; - int length; for (tag_directive = parser->tag_directives.start; tag_directive != parser->tag_directives.top; tag_directive ++) { diff --git a/src/reader.c b/src/reader.c index 99a51db0..ea189c26 100644 --- a/src/reader.c +++ b/src/reader.c @@ -186,10 +186,10 @@ yaml_parser_update_buffer(yaml_parser_t *parser, size_t length) while (parser->raw_buffer.pointer != parser->raw_buffer.last) { - unsigned int value, value2; + unsigned int value = 0, value2 = 0; int incomplete = 0; unsigned char octet; - unsigned int width; + unsigned int width = 0; int k, low, high; int raw_unread = parser->raw_buffer.last - parser->raw_buffer.pointer; @@ -366,7 +366,7 @@ yaml_parser_update_buffer(yaml_parser_t *parser, size_t length) /* Get the next character. */ - unsigned int value2 = parser->raw_buffer.pointer[low+2] + value2 = parser->raw_buffer.pointer[low+2] + (parser->raw_buffer.pointer[high+2] << 8); /* Check for a low surrogate area. */ @@ -386,6 +386,9 @@ yaml_parser_update_buffer(yaml_parser_t *parser, size_t length) } break; + + default: + assert(1); /* Impossible. */ } /* Check if the raw buffer contains enough bytes to form a character. */ diff --git a/src/scanner.c b/src/scanner.c index 90a8979f..a9b78ff1 100644 --- a/src/scanner.c +++ b/src/scanner.c @@ -3601,7 +3601,7 @@ yaml_parser_scan_plain_scalar(yaml_parser_t *parser, yaml_token_t *token) if (!STRING_INIT(parser, trailing_breaks, INITIAL_STRING_SIZE)) goto error; if (!STRING_INIT(parser, whitespaces, INITIAL_STRING_SIZE)) goto error; - start_mark = parser->mark; + start_mark = end_mark = parser->mark; /* Consume the content of the plain scalar. */ diff --git a/tests/run-parser.c b/tests/run-parser.c index 16f4ce15..8c1a5dbc 100644 --- a/tests/run-parser.c +++ b/tests/run-parser.c @@ -7,39 +7,52 @@ int main(int argc, char *argv[]) { - FILE *file; - yaml_parser_t parser; - yaml_event_t event; - int done = 0; - int count = 0; - - if (argc != 2) { - printf("Usage: %s file.yaml\n", argv[0]); + int number; + + if (argc < 2) { + printf("Usage: %s file1.yaml ...\n", argv[0]); return 0; } - file = fopen(argv[1], "rb"); - assert(file); - assert(yaml_parser_initialize(&parser)); + for (number = 1; number < argc; number ++) + { + FILE *file; + yaml_parser_t parser; + yaml_event_t event; + int done = 0; + int count = 0; + int error = 0; - yaml_parser_set_input_file(&parser, file); + printf("[%d] Parsing '%s': ", number, argv[number]); + fflush(stdout); - while (!done) - { - assert(yaml_parser_parse(&parser, &event)); + file = fopen(argv[number], "rb"); + assert(file); - done = (event.type == YAML_STREAM_END_EVENT); + assert(yaml_parser_initialize(&parser)); - yaml_event_delete(&event); + yaml_parser_set_input_file(&parser, file); - count ++; - } + while (!done) + { + if (!yaml_parser_parse(&parser, &event)) { + error = 1; + break; + } - yaml_parser_delete(&parser); + done = (event.type == YAML_STREAM_END_EVENT); - fclose(file); + yaml_event_delete(&event); - printf("Parsing the file '%s': %d events\n", argv[1], count); + count ++; + } + + yaml_parser_delete(&parser); + + assert(!fclose(file)); + + printf("%s (%d events)\n", (error ? "FAILURE" : "SUCCESS"), count); + } return 0; } diff --git a/tests/run-scanner.c b/tests/run-scanner.c index e3a67f29..2c8d33e5 100644 --- a/tests/run-scanner.c +++ b/tests/run-scanner.c @@ -7,39 +7,52 @@ int main(int argc, char *argv[]) { - FILE *file; - yaml_parser_t parser; - yaml_token_t token; - int done = 0; - int count = 0; - - if (argc != 2) { - printf("Usage: %s file.yaml\n", argv[0]); + int number; + + if (argc < 2) { + printf("Usage: %s file1.yaml ...\n", argv[0]); return 0; } - file = fopen(argv[1], "rb"); - assert(file); - assert(yaml_parser_initialize(&parser)); + for (number = 1; number < argc; number ++) + { + FILE *file; + yaml_parser_t parser; + yaml_token_t token; + int done = 0; + int count = 0; + int error = 0; - yaml_parser_set_input_file(&parser, file); + printf("[%d] Scanning '%s': ", number, argv[number]); + fflush(stdout); - while (!done) - { - assert(yaml_parser_scan(&parser, &token)); + file = fopen(argv[number], "rb"); + assert(file); - done = (token.type == YAML_STREAM_END_TOKEN); + assert(yaml_parser_initialize(&parser)); - yaml_token_delete(&token); + yaml_parser_set_input_file(&parser, file); - count ++; - } + while (!done) + { + if (!yaml_parser_scan(&parser, &token)) { + error = 1; + break; + } - yaml_parser_delete(&parser); + done = (token.type == YAML_STREAM_END_TOKEN); - fclose(file); + yaml_token_delete(&token); - printf("Parsing the file '%s': %d tokens\n", argv[1], count); + count ++; + } + + yaml_parser_delete(&parser); + + assert(!fclose(file)); + + printf("%s (%d tokens)\n", (error ? "FAILURE" : "SUCCESS"), count); + } return 0; } From cece0a9fcd59847821ff39f6a43d4a42b5641689 Mon Sep 17 00:00:00 2001 From: Kirill Simonov Date: Sun, 23 Jul 2006 11:57:36 +0000 Subject: [PATCH 21/73] Add Emitter definitions and implement the Writer. --- include/yaml.h | 553 ++++++++++++++++++++++++++++++++++++++++++++- src/Makefile.am | 2 +- src/api.c | 231 ++++++++++++++++++- src/parser.c | 66 ------ src/scanner.c | 46 ---- src/writer.c | 138 +++++++++++ src/yaml_private.h | 134 ++++++++++- 7 files changed, 1042 insertions(+), 128 deletions(-) create mode 100644 src/writer.c diff --git a/include/yaml.h b/include/yaml.h index ab1cf6a6..204872ab 100644 --- a/include/yaml.h +++ b/include/yaml.h @@ -101,6 +101,15 @@ typedef enum { YAML_UTF16BE_ENCODING } yaml_encoding_t; +/** Line break types. */ + +typedef enum { + YAML_ANY_BREAK, + YAML_CR_BREAK, + YAML_LN_BREAK, + YAML_CRLN_BREAK +} yaml_break_t; + /** Many bad things could happen with the parser and emitter. */ typedef enum { YAML_NO_ERROR, @@ -435,7 +444,7 @@ yaml_event_delete(yaml_event_t *event); * * @returns On success, the handler should return @c 1. If the handler failed, * the returned value should be @c 0. On EOF, the handler should set the - * @a length to @c 0 and return @c 1. + * @a size_read to @c 0 and return @c 1. */ typedef int yaml_read_handler_t(void *data, unsigned char *buffer, size_t size, @@ -554,13 +563,13 @@ typedef struct { /** The working buffer. */ struct { - /* The beginning of the buffer. */ + /** The beginning of the buffer. */ yaml_char_t *start; - /* The end of the buffer. */ + /** The end of the buffer. */ yaml_char_t *end; - /* The current position of the buffer. */ + /** The current position of the buffer. */ yaml_char_t *pointer; - /* The last filled position of the buffer. */ + /** The last filled position of the buffer. */ yaml_char_t *last; } buffer; @@ -706,7 +715,7 @@ typedef struct { * * @param[in] parser An empty parser object. * - * @returns #c 1 if the function succeeded, @c 0 on error. + * @returns @c 1 if the function succeeded, @c 0 on error. */ YAML_DECLARE(int) @@ -737,7 +746,6 @@ YAML_DECLARE(void) yaml_parser_set_input_string(yaml_parser_t *parser, unsigned char *input, size_t size); - /** * Set a file input. * @@ -821,18 +829,545 @@ yaml_parser_parse(yaml_parser_t *parser, yaml_event_t *event); /** @} */ -/* +/** + * @defgroup emitter Emitter Definitions + * @{ + */ + +/** + * The prototype of a write handler. + * + * The write handler is called when the emitter needs to flush the accumulated + * characters to the output. The handler should write @a size bytes of the + * @a buffer to the output. + * + * @param[in] data A pointer to an application data specified by + * @c yaml_emitter_set_write_handler. + * @param[out] buffer The buffer with bytes to be written. + * @param[in] size The size of the buffer. + * + * @returns On success, the handler should return @c 1. If the handler failed, + * the returned value should be @c 0. + */ + +typedef int yaml_write_handler_t(void *data, unsigned char *buffer, size_t size); + +/** The emitter states. */ +typedef enum { + YAML_EMIT_STREAM_START_STATE, + YAML_EMIT_FIRST_DOCUMENT_START_STATE, + YAML_EMIT_DOCUMENT_START_STATE, + YAML_EMIT_DOCUMENT_CONTENT_STATE, + YAML_EMIT_DOCUMENT_END_STATE, + YAML_EMIT_FLOW_SEQUENCE_FIRST_ITEM_STATE, + YAML_EMIT_FLOW_SEQUENCE_ITEM_STATE, + YAML_EMIT_FLOW_MAPPING_FIRST_KEY_STATE, + YAML_EMIT_FLOW_MAPPING_KEY_STATE, + YAML_EMIT_FLOW_MAPPING_SIMPLE_VALUE_STATE, + YAML_EMIT_FLOW_MAPPING_VALUE_STATE, + YAML_EMIT_BLOCK_SEQUENCE_FIRST_ITEM_STATE, + YAML_EMIT_BLOCK_SEQUENCE_ITEM_STATE, + YAML_EMIT_BLOCK_MAPPING_FIRST_KEY_STATE, + YAML_EMIT_BLOCK_MAPPING_KEY_STATE, + YAML_EMIT_BLOCK_MAPPING_SIMPLE_VALUE_STATE, + YAML_EMIT_BLOCK_MAPPING_VALUE_STATE +} yaml_emitter_state_t; + +/** + * The emitter structure. + * + * All members are internal. Manage the structure using the @c yaml_emitter_ + * family of functions. + */ + typedef struct { + + /** + * @name Error handling + * @{ + */ + + /** Error type. */ + yaml_error_type_t error; + /** Error description. */ + const char *problem; + + /** + * @} + */ + + /** + * @name Writer stuff + * @{ + */ + + /** Write handler. */ + yaml_write_handler_t *write_handler; + + /** A pointer for passing to the white handler. */ + void *write_handler_data; + + /** Standard (string or file) output data. */ + union { + /** String output data. */ + struct { + /** The buffer pointer. */ + unsigned char *buffer; + /** The buffer size. */ + size_t size; + /** The number of written bytes. */ + size_t *size_written; + } string; + + /** File output data. */ + FILE *file; + } output; + + /** The working buffer. */ + struct { + /** The beginning of the buffer. */ + yaml_char_t *start; + /** The end of the buffer. */ + yaml_char_t *end; + /** The current position of the buffer. */ + yaml_char_t *pointer; + /** The last filled position of the buffer. */ + yaml_char_t *last; + } buffer; + + /** The raw buffer. */ + struct { + /** The beginning of the buffer. */ + unsigned char *start; + /** The end of the buffer. */ + unsigned char *end; + /** The current position of the buffer. */ + unsigned char *pointer; + /** The last filled position of the buffer. */ + unsigned char *last; + } raw_buffer; + + /** The stream encoding. */ + yaml_encoding_t encoding; + + /** + * @} + */ + + /** + * @name Emitter stuff + * @{ + */ + + /** If the output is in the canonical style? */ + int canonical; + /** The number of indentation spaces. */ + int best_indent; + /** The preferred width of the output lines. */ + int best_width; + /** Allow unescaped non-ASCII characters? */ + int unicode; + /** The preferred line break. */ + yaml_break_t line_break; + + /** The stack of states. */ + struct { + /** The beginning of the stack. */ + yaml_emitter_state_t *start; + /** The end of the stack. */ + yaml_emitter_state_t *end; + /** The top of the stack. */ + yaml_emitter_state_t *top; + } states; + + /** The current emitter state. */ + yaml_emitter_state_t state; + + /** The event queue. */ + struct { + /** The beginning of the event queue. */ + yaml_event_t *start; + /** The end of the event queue. */ + yaml_event_t *end; + /** The head of the event queue. */ + yaml_event_t *head; + /** The tail of the event queue. */ + yaml_event_t *tail; + } events; + + /** The current event. */ + yaml_event_t event; + + /** The stack of indentation levels. */ + struct { + /** The beginning of the stack. */ + int *start; + /** The end of the stack. */ + int *end; + /** The top of the stack. */ + int *top; + } indents; + + /** The list of tag directives. */ + struct { + /** The beginning of the list. */ + yaml_tag_directive_t *start; + /** The end of the list. */ + yaml_tag_directive_t *end; + /** The top of the list. */ + yaml_tag_directive_t *top; + } tag_directives; + + /** The current indentation level. */ + int indent; + + /** The current flow level. */ + int flow_level; + + /** Is it the document root context? */ + int root_context; + /** Is it a sequence context? */ + int sequence_context; + /** Is it a mapping context? */ + int mapping_context; + /** Is it a simple mapping key context? */ + int simple_key_context; + + /** The current line. */ + int line; + /** The current column. */ + int column; + /** If the last character was a whitespace? */ + int whitespace; + /** If the last character was an indentation character (' ', '-', '?', ':')? */ + int indention; + + /** + * @} + */ + } yaml_emitter_t; +/** + * Initialize an emitter. + * + * This function creates a new emitter object. An application is responsible + * for destroying the object using the @c yaml_emitter_delete function. + * + * @param[in] emitter An empty parser object. + * + * @returns @c 1 if the function succeeded, @c 0 on error. + */ + +YAML_DECLARE(int) +yaml_emitter_initialize(yaml_emitter_t *emitter); + +/** + * Destroy an emitter. + * + * @param[in] emitter An emitter object. + */ + +YAML_DECLARE(void) +yaml_emitter_delete(yaml_emitter_t *emitter); + +/** + * Set a string output. + * + * The emitter will write the output characters to the @a output buffer of the + * size @a size. The emitter will set @a size_written to the number of written + * bytes. If the buffer is smaller than required, the emitter produces the + * YAML_WRITE_ERROR error. + * + * @param[in] emitter An emitter object. + * @param[in] output An output buffer. + * @param[in] size The buffer size. + * @param[in] size_written The pointer to save the number of written bytes. + */ + +YAML_DECLARE(void) +yaml_emitter_set_output_string(yaml_emitter_t *emitter, + unsigned char *output, size_t size, size_t *size_written); + +/** + * Set a file output. + * + * @a file should be a file object open for writing. The application is + * responsible for closing the @a file. + * + * @param[in] emitter An emitter object. + * @param[in] file An open file. + */ + +YAML_DECLARE(void) +yaml_emitter_set_output_file(yaml_emitter_t *emitter, FILE *file); + +/** + * Set a generic output handler. + * + * @param[in] emitter An emitter object. + * @param[in] handler A write handler. + * @param[in] data Any application data for passing to the write handler. + */ + +YAML_DECLARE(void) +yaml_emitter_set_output(yaml_emitter_t *emitter, + yaml_write_handler_t *handler, void *data); + +/** + * Set the output encoding. + * + * @param[in] emitter An emitter object. + * @param[in] encoding The output encoding. + */ + +YAML_DECLARE(void) +yaml_emitter_set_encoding(yaml_emitter_t *emitter, yaml_encoding_t encoding); + +/** + * Set if the output should be in the "canonical" format as in the YAML + * specification. + * + * @param[in] emitter An emitter object. + * @param[in] canonical If the output is canonical. + */ + +YAML_DECLARE(void) +yaml_emitter_set_canonical(yaml_emitter_t *emitter, int canonical); + +/** + * Set the intendation increment. + * + * @param[in] emitter An emitter object. + * @param[in] indent The indentation increment (> 1). + */ + +YAML_DECLARE(void) +yaml_emitter_set_indent(yaml_emitter_t *emitter, int indent); + +/** + * Set the preferred line width. @c 0 means unlimited. + * + * @param[in] emitter An emitter object. + * @param[in] width The preferred line width. + */ + +YAML_DECLARE(void) +yaml_emitter_set_width(yaml_emitter_t *emitter, int width); + +/** + * Set if unescaped non-ASCII characters are allowed. + * + * @param[in] emitter An emitter object. + * @param[in] unicode If unescaped Unicode characters are allowed. + */ + +YAML_DECLARE(void) +yaml_emitter_set_unicode(yaml_emitter_t *emitter, int unicode); + +/** + * Set the preferred line break. + * + * @param[in] emitter An emitter object. + * @param[in] line_break The preferred line break. + */ + +YAML_DECLARE(void) +yaml_emitter_set_break(yaml_emitter_t *emitter, yaml_break_t line_break); + +/** + * Emit an event. + * + * The event object may be generated using the @c yaml_parser_parse function. + * The emitter will destroy the event object if the function succeeds. If the + * function fails, the application is responsible for destroing the event + * object. + * + * @param[in] emitter An emitter object. + * @param[in] event An event object. + * + * @returns @c 1 if the function succeeded, @c 0 on error. + */ + YAML_DECLARE(int) yaml_emitter_emit(yaml_emitter_t *emitter, yaml_event_t *event); +/** + * Emit the STREAM-START event. + * + * @param[in] emitter An emitter object. + * @param[in] encoding The stream encoding. + * + * @returns @c 1 if the function succeeded, @c 0 on error. + */ + YAML_DECLARE(int) yaml_emitter_emit_stream_start(yaml_emitter_t *emitter, yaml_encoding_t encoding); -*/ +/** + * Emit the STREAM-END event. + * + * @param[in] emitter An emitter object. + * + * @returns @c 1 if the function succeeded, @c 0 on error. + */ + +YAML_DECLARE(int) +yaml_emitter_emit_stream_end(yaml_emitter_t *emitter); + +/** + * Emit the DOCUMENT-START event. + * + * The @a implicit argument is considered as a stylistic parameter and may be + * ignored by the emitter. + * + * @param[in] emitter An emitter object. + * @param[in] version_directive The %YAML directive value or @c NULL. + * @param[in] tag_directives_start The beginning of the %TAG directives list. + * @param[in] tag_directives_end The end of the %TAG directives list. + * @param[in] implicit If the document start indicator is implicit. + * + * @returns @c 1 if the function succeeded, @c 0 on error. + */ + +YAML_DECLARE(int) +yaml_emitter_emit_document_start(yaml_emitter_t *emitter, + yaml_version_directive_t *version_directive, + yaml_tag_directive_t *tag_directives_start, + yaml_tag_directive_t *tag_directives_end, + int implicit); + +/** + * Emit the DOCUMENT-END event. + * + * The @a implicit argument is considered as a stylistic parameter and may be + * ignored by the emitter. + * + * @param[in] emitter An emitter object. + * @param[in] implicit If the document end indicator is implicit. + * + * @returns @c 1 if the function succeeded, @c 0 on error. + */ + +YAML_DECLARE(int) +yaml_emitter_emit_document_end(yaml_emitter_t *emitter, int implicit); + +/** + * Emit an ALIAS event. + * + * @param[in] emitter An emitter object. + * @param[in] anchor The anchor value. + * + * @returns @c 1 if the function succeeded, @c 0 on error. + */ + +YAML_DECLARE(int) +yaml_emitter_emit_alias(yaml_emitter_t *emitter, yaml_char_t *anchor); + +/** + * Emit a SCALAR event. + * + * The @a style argument may be ignored by the emitter. + * + * Either the @a tag attribute or one of the @a plain_implicit and + * @a quoted_implicit flags must be set. + * + * @param[in] emitter An emitter object. + * @param[in] anchor The scalar anchor or @c NULL. + * @param[in] tag The scalar tag or @c NULL. + * @param[in] value The scalar value. + * @param[in] length The length of the scalar value. + * @param[in] plain_implicit If the tag may be omitted for the plain style. + * @param[in] quoted_implicit If the tag may be omitted for any non-plain style. + * @param[in] style The scalar style. + * + * @returns @c 1 if the function succeeded, @c 0 on error. + */ + +YAML_DECLARE(int) +yaml_emitter_emit_scalar(yaml_emitter_t *emitter, + yaml_char_t *anchor, yaml_char_t *tag, + yaml_char_t *value, size_t length, + int plain_implicit, int quoted_implicit, + yaml_scalar_style_t style); + +/** + * Emit a SEQUENCE-START event. + * + * The @a style argument may be ignored by the emitter. + * + * Either the @a tag attribute or the @a implicit flag must be set. + * + * @param[in] emitter An emitter object. + * @param[in] anchor The sequence anchor or @c NULL. + * @param[in] tag The sequence tag or @c NULL. + * @param[in] implicit If the tag may be omitted. + * @param[in] style The sequence style. + * + * @returns @c 1 if the function succeeded, @c 0 on error. + */ + +YAML_DECLARE(int) +yaml_emitter_emit_sequence_start(yaml_emitter_t *emitter, + yaml_char_t *anchor, yaml_char_t *tag, int implicit, + yaml_sequence_style_t style); + +/** + * Emit a SEQUENCE-END event. + * + * @param[in] emitter An emitter object. + * + * @returns @c 1 if the function succeeded, @c 0 on error. + */ + +YAML_DECLARE(int) +yaml_emitter_emit_sequence_end(yaml_emitter_t *emitter); + +/** + * Emit a MAPPING-START event. + * + * The @a style argument may be ignored by the emitter. + * + * Either the @a tag attribute or the @a implicit flag must be set. + * + * @param[in] emitter An emitter object. + * @param[in] anchor The mapping anchor or @c NULL. + * @param[in] tag The mapping tag or @c NULL. + * @param[in] implicit If the tag may be omitted. + * @param[in] style The mapping style. + * + * @returns @c 1 if the function succeeded, @c 0 on error. + */ + +YAML_DECLARE(int) +yaml_emitter_emit_mapping_start(yaml_emitter_t *emitter, + yaml_char_t *anchor, yaml_char_t *tag, int implicit, + yaml_mapping_style_t style); + +/** + * Emit a MAPPING-END event. + * + * @param[in] emitter An emitter object. + * + * @returns @c 1 if the function succeeded, @c 0 on error. + */ + +YAML_DECLARE(int) +yaml_emitter_emit_mapping_end(yaml_emitter_t *emitter); + +/** + * Flush the accumulated characters to the output. + * + * @param[in] emitter An emitter object. + * + * @returns @c 1 if the function succeeded, @c 0 on error. + */ + +YAML_DECLARE(int) +yaml_emitter_flush(yaml_emitter_t *emitter); + +/** @} */ #ifdef __cplusplus } diff --git a/src/Makefile.am b/src/Makefile.am index c7f6da2a..25a5bc82 100644 --- a/src/Makefile.am +++ b/src/Makefile.am @@ -1,4 +1,4 @@ AM_CPPFLAGS = -I$(top_srcdir)/include lib_LTLIBRARIES = libyaml.la -libyaml_la_SOURCES = api.c reader.c scanner.c parser.c +libyaml_la_SOURCES = api.c reader.c scanner.c parser.c writer.c libyaml_la_LDFLAGS = -release $(YAML_LT_RELEASE) -version-info $(YAML_LT_CURRENT):$(YAML_LT_REVISION):$(YAML_LT_AGE) diff --git a/src/api.c b/src/api.c index 23f4ff20..90d86fdd 100644 --- a/src/api.c +++ b/src/api.c @@ -169,9 +169,9 @@ yaml_parser_initialize(yaml_parser_t *parser) assert(parser); /* Non-NULL parser object expected. */ memset(parser, 0, sizeof(yaml_parser_t)); - if (!BUFFER_INIT(parser, parser->raw_buffer, RAW_BUFFER_SIZE)) + if (!BUFFER_INIT(parser, parser->raw_buffer, INPUT_RAW_BUFFER_SIZE)) goto error; - if (!BUFFER_INIT(parser, parser->buffer, BUFFER_SIZE)) + if (!BUFFER_INIT(parser, parser->buffer, INPUT_BUFFER_SIZE)) goto error; if (!QUEUE_INIT(parser, parser->tokens, INITIAL_QUEUE_SIZE)) goto error; @@ -336,6 +336,233 @@ yaml_parser_set_encoding(yaml_parser_t *parser, yaml_encoding_t encoding) parser->encoding = encoding; } +/* + * Create a new emitter object. + */ + +YAML_DECLARE(int) +yaml_emitter_initialize(yaml_emitter_t *emitter) +{ + assert(emitter); /* Non-NULL emitter object expected. */ + + memset(emitter, 0, sizeof(yaml_emitter_t)); + if (!BUFFER_INIT(emitter, emitter->buffer, OUTPUT_BUFFER_SIZE)) + goto error; + if (!BUFFER_INIT(emitter, emitter->raw_buffer, OUTPUT_RAW_BUFFER_SIZE)) + goto error; + if (!STACK_INIT(emitter, emitter->states, INITIAL_STACK_SIZE)) + goto error; + if (!QUEUE_INIT(emitter, emitter->events, INITIAL_QUEUE_SIZE)) + goto error; + if (!STACK_INIT(emitter, emitter->indents, INITIAL_STACK_SIZE)) + goto error; + if (!STACK_INIT(emitter, emitter->tag_directives, INITIAL_STACK_SIZE)) + goto error; + + return 1; + +error: + + BUFFER_DEL(emitter, emitter->buffer); + BUFFER_DEL(emitter, emitter->raw_buffer); + STACK_DEL(emitter, emitter->states); + QUEUE_DEL(emitter, emitter->events); + STACK_DEL(emitter, emitter->indents); + STACK_DEL(emitter, emitter->tag_directives); + + return 0; +} + +/* + * Destroy an emitter object. + */ + +YAML_DECLARE(void) +yaml_emitter_delete(yaml_emitter_t *emitter) +{ + assert(emitter); /* Non-NULL emitter object expected. */ + + BUFFER_DEL(emitter, emitter->buffer); + BUFFER_DEL(emitter, emitter->raw_buffer); + STACK_DEL(emitter, emitter->states); + while (!QUEUE_EMPTY(emitter, emitter->events)) { + yaml_event_delete(&DEQUEUE(emitter, emitter->events)); + } + STACK_DEL(emitter, emitter->indents); + while (!STACK_EMPTY(empty, emitter->tag_directives)) { + yaml_tag_directive_t tag_directive = POP(emitter, emitter->tag_directives); + yaml_free(tag_directive.handle); + yaml_free(tag_directive.prefix); + } + STACK_DEL(emitter, emitter->tag_directives); + + memset(emitter, 0, sizeof(yaml_emitter_t)); +} + +/* + * String write handler. + */ + +static int +yaml_string_write_handler(void *data, unsigned char *buffer, size_t size) +{ + yaml_emitter_t *emitter = data; + + if (emitter->output.string.size + *emitter->output.string.size_written + < size) { + memcpy(emitter->output.string.buffer + + *emitter->output.string.size_written, + buffer, + emitter->output.string.size + - *emitter->output.string.size_written); + *emitter->output.string.size_written = emitter->output.string.size; + return 0; + } + + memcpy(emitter->output.string.buffer + + *emitter->output.string.size_written, buffer, size); + *emitter->output.string.size_written += size; + return 1; +} + +/* + * File write handler. + */ + +static int +yaml_file_write_handler(void *data, unsigned char *buffer, size_t size) +{ + yaml_emitter_t *emitter = data; + + return (fwrite(buffer, 1, size, emitter->output.file) == size); +} +/* + * Set a string output. + */ + +YAML_DECLARE(void) +yaml_emitter_set_output_string(yaml_emitter_t *emitter, + unsigned char *output, size_t size, size_t *size_written) +{ + assert(emitter); /* Non-NULL emitter object expected. */ + assert(!emitter->write_handler); /* You can set the output only once. */ + assert(output); /* Non-NULL output string expected. */ + + emitter->write_handler = yaml_string_write_handler; + emitter->write_handler_data = emitter; + + emitter->output.string.buffer = output; + emitter->output.string.size = size; + emitter->output.string.size_written = size_written; + *size_written = 0; +} + +/* + * Set a file output. + */ + +YAML_DECLARE(void) +yaml_emitter_set_output_file(yaml_emitter_t *emitter, FILE *file) +{ + assert(emitter); /* Non-NULL emitter object expected. */ + assert(!emitter->write_handler); /* You can set the output only once. */ + assert(file); /* Non-NULL file object expected. */ + + emitter->write_handler = yaml_file_write_handler; + emitter->write_handler_data = emitter; + + emitter->output.file = file; +} + +/* + * Set a generic output handler. + */ + +YAML_DECLARE(void) +yaml_emitter_set_output(yaml_emitter_t *emitter, + yaml_write_handler_t *handler, void *data) +{ + assert(emitter); /* Non-NULL emitter object expected. */ + assert(!emitter->write_handler); /* You can set the output only once. */ + assert(handler); /* Non-NULL handler object expected. */ + + emitter->write_handler = handler; + emitter->write_handler_data = data; +} + +/* + * Set the output encoding. + */ + +YAML_DECLARE(void) +yaml_emitter_set_encoding(yaml_emitter_t *emitter, yaml_encoding_t encoding) +{ + assert(emitter); /* Non-NULL emitter object expected. */ + assert(!emitter->encoding); /* You can set encoding only once. */ + + emitter->encoding = encoding; +} + +/* + * Set the canonical output style. + */ + +YAML_DECLARE(void) +yaml_emitter_set_canonical(yaml_emitter_t *emitter, int canonical) +{ + assert(emitter); /* Non-NULL emitter object expected. */ + + emitter->canonical = (canonical != 0); +} + +/* + * Set the indentation increment. + */ + +YAML_DECLARE(void) +yaml_emitter_set_indent(yaml_emitter_t *emitter, int indent) +{ + assert(emitter); /* Non-NULL emitter object expected. */ + + emitter->best_indent = (1 < indent && indent < 10) ? indent : 2; +} + +/* + * Set the preferred line width. + */ + +YAML_DECLARE(void) +yaml_emitter_set_width(yaml_emitter_t *emitter, int width) +{ + assert(emitter); /* Non-NULL emitter object expected. */ + + emitter->best_width = (width > 0) ? width : 0; +} + +/* + * Set if unescaped non-ASCII characters are allowed. + */ + +YAML_DECLARE(void) +yaml_emitter_set_unicode(yaml_emitter_t *emitter, int unicode) +{ + assert(emitter); /* Non-NULL emitter object expected. */ + + emitter->unicode = (unicode != 0); +} + +/* + * Set the preferred line break character. + */ + +YAML_DECLARE(void) +yaml_emitter_set_break(yaml_emitter_t *emitter, yaml_break_t line_break) +{ + assert(emitter); /* Non-NULL emitter object expected. */ + + emitter->line_break = line_break; +} + /* * Destroy a token object. */ diff --git a/src/parser.c b/src/parser.c index 2b81b0c2..020d5d68 100644 --- a/src/parser.c +++ b/src/parser.c @@ -41,72 +41,6 @@ #include "yaml_private.h" -/* - * Event initializers. - */ - -#define EVENT_INIT(event,event_type,event_start_mark,event_end_mark) \ - (memset(&(event), 0, sizeof(yaml_event_t)), \ - (event).type = (event_type), \ - (event).start_mark = (event_start_mark), \ - (event).end_mark = (event_end_mark)) - -#define STREAM_START_EVENT_INIT(event,event_encoding,start_mark,end_mark) \ - (EVENT_INIT((event),YAML_STREAM_START_EVENT,(start_mark),(end_mark)), \ - (event).data.stream_start.encoding = (event_encoding)) - -#define STREAM_END_EVENT_INIT(event,start_mark,end_mark) \ - (EVENT_INIT((event),YAML_STREAM_END_EVENT,(start_mark),(end_mark))) - -#define DOCUMENT_START_EVENT_INIT(event,event_version_directive, \ - event_tag_directives_start,event_tag_directives_end,event_implicit,start_mark,end_mark) \ - (EVENT_INIT((event),YAML_DOCUMENT_START_EVENT,(start_mark),(end_mark)), \ - (event).data.document_start.version_directive = (event_version_directive), \ - (event).data.document_start.tag_directives.start = (event_tag_directives_start), \ - (event).data.document_start.tag_directives.end = (event_tag_directives_end), \ - (event).data.document_start.implicit = (event_implicit)) - -#define DOCUMENT_END_EVENT_INIT(event,event_implicit,start_mark,end_mark) \ - (EVENT_INIT((event),YAML_DOCUMENT_END_EVENT,(start_mark),(end_mark)), \ - (event).data.document_end.implicit = (event_implicit)) - -#define ALIAS_EVENT_INIT(event,event_anchor,start_mark,end_mark) \ - (EVENT_INIT((event),YAML_ALIAS_EVENT,(start_mark),(end_mark)), \ - (event).data.alias.anchor = (event_anchor)) - -#define SCALAR_EVENT_INIT(event,event_anchor,event_tag,event_value,event_length, \ - event_plain_implicit, event_quoted_implicit,event_style,start_mark,end_mark) \ - (EVENT_INIT((event),YAML_SCALAR_EVENT,(start_mark),(end_mark)), \ - (event).data.scalar.anchor = (event_anchor), \ - (event).data.scalar.tag = (event_tag), \ - (event).data.scalar.value = (event_value), \ - (event).data.scalar.length = (event_length), \ - (event).data.scalar.plain_implicit = (event_plain_implicit), \ - (event).data.scalar.quoted_implicit = (event_quoted_implicit), \ - (event).data.scalar.style = (event_style)) - -#define SEQUENCE_START_EVENT_INIT(event,event_anchor,event_tag, \ - event_implicit,event_style,start_mark,end_mark) \ - (EVENT_INIT((event),YAML_SEQUENCE_START_EVENT,(start_mark),(end_mark)), \ - (event).data.sequence_start.anchor = (event_anchor), \ - (event).data.sequence_start.tag = (event_tag), \ - (event).data.sequence_start.implicit = (event_implicit), \ - (event).data.sequence_start.style = (event_style)) - -#define SEQUENCE_END_EVENT_INIT(event,start_mark,end_mark) \ - (EVENT_INIT((event),YAML_SEQUENCE_END_EVENT,(start_mark),(end_mark))) - -#define MAPPING_START_EVENT_INIT(event,event_anchor,event_tag, \ - event_implicit,event_style,start_mark,end_mark) \ - (EVENT_INIT((event),YAML_MAPPING_START_EVENT,(start_mark),(end_mark)), \ - (event).data.mapping_start.anchor = (event_anchor), \ - (event).data.mapping_start.tag = (event_tag), \ - (event).data.mapping_start.implicit = (event_implicit), \ - (event).data.mapping_start.style = (event_style)) - -#define MAPPING_END_EVENT_INIT(event,start_mark,end_mark) \ - (EVENT_INIT((event),YAML_MAPPING_END_EVENT,(start_mark),(end_mark))) - /* * Peek the next token in the token queue. */ diff --git a/src/scanner.c b/src/scanner.c index a9b78ff1..bb811276 100644 --- a/src/scanner.c +++ b/src/scanner.c @@ -760,52 +760,6 @@ parser->unread --) : 0), \ 1) : 0) -/* - * Token initializers. - */ - -#define TOKEN_INIT(token,token_type,token_start_mark,token_end_mark) \ - (memset(&(token), 0, sizeof(yaml_token_t)), \ - (token).type = (token_type), \ - (token).start_mark = (token_start_mark), \ - (token).end_mark = (token_end_mark)) - -#define STREAM_START_TOKEN_INIT(token,token_encoding,start_mark,end_mark) \ - (TOKEN_INIT((token),YAML_STREAM_START_TOKEN,(start_mark),(end_mark)), \ - (token).data.stream_start.encoding = (token_encoding)) - -#define STREAM_END_TOKEN_INIT(token,start_mark,end_mark) \ - (TOKEN_INIT((token),YAML_STREAM_END_TOKEN,(start_mark),(end_mark))) - -#define ALIAS_TOKEN_INIT(token,token_value,start_mark,end_mark) \ - (TOKEN_INIT((token),YAML_ALIAS_TOKEN,(start_mark),(end_mark)), \ - (token).data.alias.value = (token_value)) - -#define ANCHOR_TOKEN_INIT(token,token_value,start_mark,end_mark) \ - (TOKEN_INIT((token),YAML_ANCHOR_TOKEN,(start_mark),(end_mark)), \ - (token).data.anchor.value = (token_value)) - -#define TAG_TOKEN_INIT(token,token_handle,token_suffix,start_mark,end_mark) \ - (TOKEN_INIT((token),YAML_TAG_TOKEN,(start_mark),(end_mark)), \ - (token).data.tag.handle = (token_handle), \ - (token).data.tag.suffix = (token_suffix)) - -#define SCALAR_TOKEN_INIT(token,token_value,token_length,token_style,start_mark,end_mark) \ - (TOKEN_INIT((token),YAML_SCALAR_TOKEN,(start_mark),(end_mark)), \ - (token).data.scalar.value = (token_value), \ - (token).data.scalar.length = (token_length), \ - (token).data.scalar.style = (token_style)) - -#define VERSION_DIRECTIVE_TOKEN_INIT(token,token_major,token_minor,start_mark,end_mark) \ - (TOKEN_INIT((token),YAML_VERSION_DIRECTIVE_TOKEN,(start_mark),(end_mark)), \ - (token).data.version_directive.major = (token_major), \ - (token).data.version_directive.minor = (token_minor)) - -#define TAG_DIRECTIVE_TOKEN_INIT(token,token_handle,token_prefix,start_mark,end_mark) \ - (TOKEN_INIT((token),YAML_TAG_DIRECTIVE_TOKEN,(start_mark),(end_mark)), \ - (token).data.tag_directive.handle = (token_handle), \ - (token).data.tag_directive.prefix = (token_prefix)) - /* * Public API declarations. */ diff --git a/src/writer.c b/src/writer.c new file mode 100644 index 00000000..21313722 --- /dev/null +++ b/src/writer.c @@ -0,0 +1,138 @@ + +#include "yaml_private.h" + +/* + * Declarations. + */ + +static int +yaml_emitter_set_writer_error(yaml_emitter_t *emitter, const char *problem); + +YAML_DECLARE(int) +yaml_emitter_flush(yaml_emitter_t *emitter); + +/* + * Set the writer error and return 0. + */ + +static int +yaml_emitter_set_writer_error(yaml_emitter_t *emitter, const char *problem) +{ + emitter->error = YAML_WRITER_ERROR; + emitter->problem = problem; + + return 0; +} + +/* + * Flush the output buffer. + */ + +YAML_DECLARE(int) +yaml_emitter_flush(yaml_emitter_t *emitter) +{ + int low, high; + + assert(emitter); /* Non-NULL emitter object is expected. */ + assert(emitter->write_handler); /* Write handler must be set. */ + assert(emitter->encoding); /* Output encoding must be set. */ + + /* Check if the buffer is empty. */ + + if (emitter->buffer.start == emitter->buffer.last) { + return 1; + } + + /* If the output encoding is UTF-8, we don't need to recode the buffer. */ + + if (emitter->encoding == YAML_UTF8_ENCODING) + { + if (emitter->write_handler(emitter->write_handler_data, + emitter->buffer.start, + emitter->buffer.last - emitter->buffer.start)) { + emitter->buffer.last = emitter->buffer.start; + emitter->buffer.pointer = emitter->buffer.start; + return 1; + } + else { + return yaml_emitter_set_writer_error(emitter, "Write error"); + } + } + + /* Recode the buffer into the raw buffer. */ + + low = (emitter->encoding == YAML_UTF16LE_ENCODING ? 0 : 1); + high = (emitter->encoding == YAML_UTF16LE_ENCODING ? 1 : 0); + + while (emitter->buffer.pointer != emitter->buffer.last) + { + unsigned char octet; + unsigned int width; + unsigned int value; + int k; + + /* + * See the "reader.c" code for more details on UTF-8 encoding. Note + * that we assume that the buffer contains a valid UTF-8 sequence. + */ + + /* Read the next UTF-8 character. */ + + octet = emitter->buffer.pointer[0]; + + width = (octet & 0x80) == 0x00 ? 1 : + (octet & 0xE0) == 0xC0 ? 2 : + (octet & 0xF0) == 0xE0 ? 3 : + (octet & 0xF8) == 0xF0 ? 4 : 0; + + value = (octet & 0x80) == 0x00 ? octet & 0x7F : + (octet & 0xE0) == 0xC0 ? octet & 0x1F : + (octet & 0xF0) == 0xE0 ? octet & 0x0F : + (octet & 0xF8) == 0xF0 ? octet & 0x07 : 0; + + for (k = 1; k < width; k ++) { + octet = emitter->buffer.pointer[k]; + value = (value << 6) + (octet & 0x3F); + } + + emitter->buffer.pointer += width; + + /* Write the character. */ + + if (value < 0x10000) + { + emitter->raw_buffer.last[high] = value >> 8; + emitter->raw_buffer.last[low] = value & 0xFF; + + emitter->raw_buffer.last += 2; + } + else + { + /* Write the character using a surrogate pair (check "reader.c"). */ + + value -= 0x10000; + emitter->raw_buffer.last[high] = 0xD8 + (value >> 18); + emitter->raw_buffer.last[low] = (value >> 10) & 0xFF; + emitter->raw_buffer.last[high+2] = 0xDC + ((value >> 8) & 0xFF); + emitter->raw_buffer.last[low+2] = value & 0xFF; + + emitter->raw_buffer.last += 4; + } + } + + /* Write the raw buffer. */ + + if (emitter->write_handler(emitter->write_handler_data, + emitter->raw_buffer.start, + emitter->raw_buffer.last - emitter->raw_buffer.start)) { + emitter->buffer.last = emitter->buffer.start; + emitter->buffer.pointer = emitter->buffer.start; + emitter->raw_buffer.last = emitter->raw_buffer.start; + emitter->raw_buffer.pointer = emitter->raw_buffer.start; + return 1; + } + else { + return yaml_emitter_set_writer_error(emitter, "Write error"); + } +} + diff --git a/src/yaml_private.h b/src/yaml_private.h index d7ac644d..7304d8d3 100644 --- a/src/yaml_private.h +++ b/src/yaml_private.h @@ -38,18 +38,32 @@ YAML_DECLARE(int) yaml_parser_fetch_more_tokens(yaml_parser_t *parser); /* - * The size of the raw buffer. + * The size of the input raw buffer. */ -#define RAW_BUFFER_SIZE 16384 +#define INPUT_RAW_BUFFER_SIZE 16384 /* - * The size of the buffer. + * The size of the input buffer. * * It should be possible to decode the whole raw buffer. */ -#define BUFFER_SIZE (RAW_BUFFER_SIZE*3) +#define INPUT_BUFFER_SIZE (INPUT_RAW_BUFFER_SIZE*3) + +/* + * The size of the output buffer. + */ + +#define OUTPUT_BUFFER_SIZE 16384 + +/* + * The size of the output raw buffer. + * + * It should be possible to encode the whole output buffer. + */ + +#define OUTPUT_RAW_BUFFER_SIZE (OUTPUT_BUFFER_SIZE*2+2) /* * The size of other stacks and queues. @@ -203,3 +217,115 @@ yaml_queue_extend(void **start, void **head, void **tail, void **end); ((context)->error = YAML_MEMORY_ERROR, \ 0)) +/* + * Token initializers. + */ + +#define TOKEN_INIT(token,token_type,token_start_mark,token_end_mark) \ + (memset(&(token), 0, sizeof(yaml_token_t)), \ + (token).type = (token_type), \ + (token).start_mark = (token_start_mark), \ + (token).end_mark = (token_end_mark)) + +#define STREAM_START_TOKEN_INIT(token,token_encoding,start_mark,end_mark) \ + (TOKEN_INIT((token),YAML_STREAM_START_TOKEN,(start_mark),(end_mark)), \ + (token).data.stream_start.encoding = (token_encoding)) + +#define STREAM_END_TOKEN_INIT(token,start_mark,end_mark) \ + (TOKEN_INIT((token),YAML_STREAM_END_TOKEN,(start_mark),(end_mark))) + +#define ALIAS_TOKEN_INIT(token,token_value,start_mark,end_mark) \ + (TOKEN_INIT((token),YAML_ALIAS_TOKEN,(start_mark),(end_mark)), \ + (token).data.alias.value = (token_value)) + +#define ANCHOR_TOKEN_INIT(token,token_value,start_mark,end_mark) \ + (TOKEN_INIT((token),YAML_ANCHOR_TOKEN,(start_mark),(end_mark)), \ + (token).data.anchor.value = (token_value)) + +#define TAG_TOKEN_INIT(token,token_handle,token_suffix,start_mark,end_mark) \ + (TOKEN_INIT((token),YAML_TAG_TOKEN,(start_mark),(end_mark)), \ + (token).data.tag.handle = (token_handle), \ + (token).data.tag.suffix = (token_suffix)) + +#define SCALAR_TOKEN_INIT(token,token_value,token_length,token_style,start_mark,end_mark) \ + (TOKEN_INIT((token),YAML_SCALAR_TOKEN,(start_mark),(end_mark)), \ + (token).data.scalar.value = (token_value), \ + (token).data.scalar.length = (token_length), \ + (token).data.scalar.style = (token_style)) + +#define VERSION_DIRECTIVE_TOKEN_INIT(token,token_major,token_minor,start_mark,end_mark) \ + (TOKEN_INIT((token),YAML_VERSION_DIRECTIVE_TOKEN,(start_mark),(end_mark)), \ + (token).data.version_directive.major = (token_major), \ + (token).data.version_directive.minor = (token_minor)) + +#define TAG_DIRECTIVE_TOKEN_INIT(token,token_handle,token_prefix,start_mark,end_mark) \ + (TOKEN_INIT((token),YAML_TAG_DIRECTIVE_TOKEN,(start_mark),(end_mark)), \ + (token).data.tag_directive.handle = (token_handle), \ + (token).data.tag_directive.prefix = (token_prefix)) + +/* + * Event initializers. + */ + +#define EVENT_INIT(event,event_type,event_start_mark,event_end_mark) \ + (memset(&(event), 0, sizeof(yaml_event_t)), \ + (event).type = (event_type), \ + (event).start_mark = (event_start_mark), \ + (event).end_mark = (event_end_mark)) + +#define STREAM_START_EVENT_INIT(event,event_encoding,start_mark,end_mark) \ + (EVENT_INIT((event),YAML_STREAM_START_EVENT,(start_mark),(end_mark)), \ + (event).data.stream_start.encoding = (event_encoding)) + +#define STREAM_END_EVENT_INIT(event,start_mark,end_mark) \ + (EVENT_INIT((event),YAML_STREAM_END_EVENT,(start_mark),(end_mark))) + +#define DOCUMENT_START_EVENT_INIT(event,event_version_directive, \ + event_tag_directives_start,event_tag_directives_end,event_implicit,start_mark,end_mark) \ + (EVENT_INIT((event),YAML_DOCUMENT_START_EVENT,(start_mark),(end_mark)), \ + (event).data.document_start.version_directive = (event_version_directive), \ + (event).data.document_start.tag_directives.start = (event_tag_directives_start), \ + (event).data.document_start.tag_directives.end = (event_tag_directives_end), \ + (event).data.document_start.implicit = (event_implicit)) + +#define DOCUMENT_END_EVENT_INIT(event,event_implicit,start_mark,end_mark) \ + (EVENT_INIT((event),YAML_DOCUMENT_END_EVENT,(start_mark),(end_mark)), \ + (event).data.document_end.implicit = (event_implicit)) + +#define ALIAS_EVENT_INIT(event,event_anchor,start_mark,end_mark) \ + (EVENT_INIT((event),YAML_ALIAS_EVENT,(start_mark),(end_mark)), \ + (event).data.alias.anchor = (event_anchor)) + +#define SCALAR_EVENT_INIT(event,event_anchor,event_tag,event_value,event_length, \ + event_plain_implicit, event_quoted_implicit,event_style,start_mark,end_mark) \ + (EVENT_INIT((event),YAML_SCALAR_EVENT,(start_mark),(end_mark)), \ + (event).data.scalar.anchor = (event_anchor), \ + (event).data.scalar.tag = (event_tag), \ + (event).data.scalar.value = (event_value), \ + (event).data.scalar.length = (event_length), \ + (event).data.scalar.plain_implicit = (event_plain_implicit), \ + (event).data.scalar.quoted_implicit = (event_quoted_implicit), \ + (event).data.scalar.style = (event_style)) + +#define SEQUENCE_START_EVENT_INIT(event,event_anchor,event_tag, \ + event_implicit,event_style,start_mark,end_mark) \ + (EVENT_INIT((event),YAML_SEQUENCE_START_EVENT,(start_mark),(end_mark)), \ + (event).data.sequence_start.anchor = (event_anchor), \ + (event).data.sequence_start.tag = (event_tag), \ + (event).data.sequence_start.implicit = (event_implicit), \ + (event).data.sequence_start.style = (event_style)) + +#define SEQUENCE_END_EVENT_INIT(event,start_mark,end_mark) \ + (EVENT_INIT((event),YAML_SEQUENCE_END_EVENT,(start_mark),(end_mark))) + +#define MAPPING_START_EVENT_INIT(event,event_anchor,event_tag, \ + event_implicit,event_style,start_mark,end_mark) \ + (EVENT_INIT((event),YAML_MAPPING_START_EVENT,(start_mark),(end_mark)), \ + (event).data.mapping_start.anchor = (event_anchor), \ + (event).data.mapping_start.tag = (event_tag), \ + (event).data.mapping_start.implicit = (event_implicit), \ + (event).data.mapping_start.style = (event_style)) + +#define MAPPING_END_EVENT_INIT(event,start_mark,end_mark) \ + (EVENT_INIT((event),YAML_MAPPING_END_EVENT,(start_mark),(end_mark))) + From 04cf09a2986afd0bbd39c6ea6c31f11321874d08 Mon Sep 17 00:00:00 2001 From: Kirill Simonov Date: Tue, 25 Jul 2006 20:54:28 +0000 Subject: [PATCH 22/73] Add yaml_emitter_emit_* set of functions. --- include/yaml.h | 4 +- src/Makefile.am | 2 +- src/api.c | 12 +- src/emitter.c | 408 +++++++++++++++++++++++++++++++++++++++++++++ src/parser.c | 4 +- src/yaml_private.h | 4 +- 6 files changed, 423 insertions(+), 11 deletions(-) create mode 100644 src/emitter.c diff --git a/include/yaml.h b/include/yaml.h index 204872ab..a4fc4e8a 100644 --- a/include/yaml.h +++ b/include/yaml.h @@ -1139,14 +1139,14 @@ yaml_emitter_set_canonical(yaml_emitter_t *emitter, int canonical); * Set the intendation increment. * * @param[in] emitter An emitter object. - * @param[in] indent The indentation increment (> 1). + * @param[in] indent The indentation increment (1 < . < 10). */ YAML_DECLARE(void) yaml_emitter_set_indent(yaml_emitter_t *emitter, int indent); /** - * Set the preferred line width. @c 0 means unlimited. + * Set the preferred line width. @c -1 means unlimited. * * @param[in] emitter An emitter object. * @param[in] width The preferred line width. diff --git a/src/Makefile.am b/src/Makefile.am index 25a5bc82..cc815e73 100644 --- a/src/Makefile.am +++ b/src/Makefile.am @@ -1,4 +1,4 @@ AM_CPPFLAGS = -I$(top_srcdir)/include lib_LTLIBRARIES = libyaml.la -libyaml_la_SOURCES = api.c reader.c scanner.c parser.c writer.c +libyaml_la_SOURCES = api.c reader.c scanner.c parser.c writer.c emitter.c libyaml_la_LDFLAGS = -release $(YAML_LT_RELEASE) -version-info $(YAML_LT_CURRENT):$(YAML_LT_REVISION):$(YAML_LT_AGE) diff --git a/src/api.c b/src/api.c index 90d86fdd..8d5f624f 100644 --- a/src/api.c +++ b/src/api.c @@ -57,10 +57,13 @@ yaml_free(void *ptr) * Duplicate a string. */ -YAML_DECLARE(char *) -yaml_strdup(const char *str) +YAML_DECLARE(yaml_char_t *) +yaml_strdup(const yaml_char_t *str) { - return strdup(str); + if (!str) + return NULL; + + return (yaml_char_t *)strdup((char *)str); } /* @@ -389,6 +392,7 @@ yaml_emitter_delete(yaml_emitter_t *emitter) yaml_event_delete(&DEQUEUE(emitter, emitter->events)); } STACK_DEL(emitter, emitter->indents); + yaml_event_delete(&emitter->event); while (!STACK_EMPTY(empty, emitter->tag_directives)) { yaml_tag_directive_t tag_directive = POP(emitter, emitter->tag_directives); yaml_free(tag_directive.handle); @@ -536,7 +540,7 @@ yaml_emitter_set_width(yaml_emitter_t *emitter, int width) { assert(emitter); /* Non-NULL emitter object expected. */ - emitter->best_width = (width > 0) ? width : 0; + emitter->best_width = (width >= 0) ? width : -1; } /* diff --git a/src/emitter.c b/src/emitter.c new file mode 100644 index 00000000..e659e3e0 --- /dev/null +++ b/src/emitter.c @@ -0,0 +1,408 @@ + +#include "yaml_private.h" + +/* + * API functions. + */ + +YAML_DECLARE(int) +yaml_emitter_emit(yaml_emitter_t *emitter, yaml_event_t *event); + +YAML_DECLARE(int) +yaml_emitter_emit_stream_start(yaml_emitter_t *emitter, + yaml_encoding_t encoding); + +YAML_DECLARE(int) +yaml_emitter_emit_stream_end(yaml_emitter_t *emitter); + +YAML_DECLARE(int) +yaml_emitter_emit_document_start(yaml_emitter_t *emitter, + yaml_version_directive_t *version_directive, + yaml_tag_directive_t *tag_directives_start, + yaml_tag_directive_t *tag_directives_end, + int implicit); + +YAML_DECLARE(int) +yaml_emitter_emit_document_end(yaml_emitter_t *emitter, int implicit); + +YAML_DECLARE(int) +yaml_emitter_emit_alias(yaml_emitter_t *emitter, yaml_char_t *anchor); + +YAML_DECLARE(int) +yaml_emitter_emit_scalar(yaml_emitter_t *emitter, + yaml_char_t *anchor, yaml_char_t *tag, + yaml_char_t *value, size_t length, + int plain_implicit, int quoted_implicit, + yaml_scalar_style_t style); + +YAML_DECLARE(int) +yaml_emitter_emit_sequence_start(yaml_emitter_t *emitter, + yaml_char_t *anchor, yaml_char_t *tag, int implicit, + yaml_sequence_style_t style); + +YAML_DECLARE(int) +yaml_emitter_emit_sequence_end(yaml_emitter_t *emitter); + +YAML_DECLARE(int) +yaml_emitter_emit_mapping_start(yaml_emitter_t *emitter, + yaml_char_t *anchor, yaml_char_t *tag, int implicit, + yaml_mapping_style_t style); + +YAML_DECLARE(int) +yaml_emitter_emit_mapping_end(yaml_emitter_t *emitter); + +/* + * Emit STREAM-START. + */ + +YAML_DECLARE(int) +yaml_emitter_emit_stream_start(yaml_emitter_t *emitter, + yaml_encoding_t encoding) +{ + yaml_event_t event; + yaml_mark_t mark = { 0, 0, 0 }; + + assert(emitter); /* Non-NULL emitter object is expected. */ + + STREAM_START_EVENT_INIT(event, encoding, mark, mark); + + return yaml_emitter_emit(emitter, &event); +} + +/* + * Emit STREAM-END. + */ + +YAML_DECLARE(int) +yaml_emitter_emit_stream_end(yaml_emitter_t *emitter) +{ + yaml_event_t event; + yaml_mark_t mark = { 0, 0, 0 }; + + assert(emitter); /* Non-NULL emitter object is expected. */ + + STREAM_END_EVENT_INIT(event, mark, mark); + + return yaml_emitter_emit(emitter, &event); +} + +/* + * Emit DOCUMENT-START. + */ + +YAML_DECLARE(int) +yaml_emitter_emit_document_start(yaml_emitter_t *emitter, + yaml_version_directive_t *version_directive, + yaml_tag_directive_t *tag_directives_start, + yaml_tag_directive_t *tag_directives_end, + int implicit) +{ + yaml_event_t event; + yaml_mark_t mark = { 0, 0, 0 }; + yaml_version_directive_t *version_directive_copy = NULL; + struct { + yaml_tag_directive_t *start; + yaml_tag_directive_t *end; + yaml_tag_directive_t *top; + } tag_directives_copy = { NULL, NULL, NULL }; + yaml_tag_directive_t value = { NULL, NULL }; + + assert(emitter); /* Non-NULL emitter object is expected. */ + assert((tag_directives_start && tag_directives_end) || + (tag_directives_start == tag_directives_end)); + /* Valid tag directives are expected. */ + + if (version_directive) { + version_directive_copy = yaml_malloc(sizeof(yaml_version_directive_t)); + if (!version_directive_copy) { + emitter->error = YAML_MEMORY_ERROR; + goto error; + } + version_directive_copy->major = version_directive->major; + version_directive_copy->minor = version_directive->minor; + } + + if (tag_directives_start != tag_directives_end) { + yaml_tag_directive_t *tag_directive; + if (!STACK_INIT(emitter, tag_directives_copy, INITIAL_STACK_SIZE)) + goto error; + for (tag_directive = tag_directives_start; + tag_directive != tag_directives_end; tag_directive ++) { + value.handle = yaml_strdup(tag_directive->handle); + value.prefix = yaml_strdup(tag_directive->prefix); + if (!value.handle || !value.prefix) { + emitter->error = YAML_MEMORY_ERROR; + goto error; + } + if (!PUSH(emitter, tag_directives_copy, value)) + goto error; + value.handle = NULL; + value.prefix = NULL; + } + } + + DOCUMENT_START_EVENT_INIT(event, version_directive_copy, + tag_directives_copy.start, tag_directives_copy.end, + implicit, mark, mark); + + if (yaml_emitter_emit(emitter, &event)) { + return 1; + } + +error: + yaml_free(version_directive_copy); + while (!STACK_EMPTY(emitter, tag_directives_copy)) { + yaml_tag_directive_t value = POP(emitter, tag_directives_copy); + yaml_free(value.handle); + yaml_free(value.prefix); + } + STACK_DEL(emitter, tag_directives_copy); + yaml_free(value.handle); + yaml_free(value.prefix); + + return 0; +} + +/* + * Emit DOCUMENT-END. + */ + +YAML_DECLARE(int) +yaml_emitter_emit_document_end(yaml_emitter_t *emitter, int implicit) +{ + yaml_event_t event; + yaml_mark_t mark = { 0, 0, 0 }; + + assert(emitter); /* Non-NULL emitter object is expected. */ + + DOCUMENT_END_EVENT_INIT(event, implicit, mark, mark); + + return yaml_emitter_emit(emitter, &event); +} + +/* + * Emit ALIAS. + */ + +YAML_DECLARE(int) +yaml_emitter_emit_alias(yaml_emitter_t *emitter, yaml_char_t *anchor) +{ + yaml_event_t event; + yaml_mark_t mark = { 0, 0, 0 }; + yaml_char_t *anchor_copy = NULL; + + assert(emitter); /* Non-NULL emitter object is expected. */ + assert(anchor); /* Non-NULL anchor is expected. */ + + anchor_copy = yaml_strdup(anchor); + if (!anchor_copy) { + emitter->error = YAML_MEMORY_ERROR; + return 0; + } + + ALIAS_EVENT_INIT(event, anchor_copy, mark, mark); + + if (yaml_emitter_emit(emitter, &event)) { + return 1; + } + + yaml_free(anchor_copy); + + return 0; +} + +/* + * Emit SCALAR. + */ + +YAML_DECLARE(int) +yaml_emitter_emit_scalar(yaml_emitter_t *emitter, + yaml_char_t *anchor, yaml_char_t *tag, + yaml_char_t *value, size_t length, + int plain_implicit, int quoted_implicit, + yaml_scalar_style_t style) +{ + yaml_event_t event; + yaml_mark_t mark = { 0, 0, 0 }; + yaml_char_t *anchor_copy = NULL; + yaml_char_t *tag_copy = NULL; + yaml_char_t *value_copy = NULL; + + assert(emitter); /* Non-NULL emitter object is expected. */ + assert(value); /* Non-NULL anchor is expected. */ + + if (anchor) { + anchor_copy = yaml_strdup(anchor); + if (!anchor_copy) { + emitter->error = YAML_MEMORY_ERROR; + goto error; + } + } + + if (tag) { + tag_copy = yaml_strdup(tag); + if (!tag_copy) { + emitter->error = YAML_MEMORY_ERROR; + goto error; + } + } + + value_copy = yaml_malloc(length+1); + if (!value_copy) { + emitter->error = YAML_MEMORY_ERROR; + goto error; + } + memcpy(value_copy, value, length); + value_copy[length] = '\0'; + + SCALAR_EVENT_INIT(event, anchor_copy, tag_copy, value_copy, length, + plain_implicit, quoted_implicit, style, mark, mark); + + if (yaml_emitter_emit(emitter, &event)) { + return 1; + } + +error: + yaml_free(anchor_copy); + yaml_free(tag_copy); + yaml_free(value_copy); + + return 0; +} + +/* + * Emit SEQUENCE-START. + */ + +YAML_DECLARE(int) +yaml_emitter_emit_sequence_start(yaml_emitter_t *emitter, + yaml_char_t *anchor, yaml_char_t *tag, int implicit, + yaml_sequence_style_t style) +{ + yaml_event_t event; + yaml_mark_t mark = { 0, 0, 0 }; + yaml_char_t *anchor_copy = NULL; + yaml_char_t *tag_copy = NULL; + + assert(emitter); /* Non-NULL emitter object is expected. */ + + if (anchor) { + anchor_copy = yaml_strdup(anchor); + if (!anchor_copy) { + emitter->error = YAML_MEMORY_ERROR; + goto error; + } + } + + if (tag) { + tag_copy = yaml_strdup(tag); + if (!tag_copy) { + emitter->error = YAML_MEMORY_ERROR; + goto error; + } + } + + SEQUENCE_START_EVENT_INIT(event, anchor_copy, tag_copy, + implicit, style, mark, mark); + + if (yaml_emitter_emit(emitter, &event)) { + return 1; + } + +error: + yaml_free(anchor_copy); + yaml_free(tag_copy); + + return 0; +} + +/* + * Emit SEQUENCE-END. + */ + +YAML_DECLARE(int) +yaml_emitter_emit_sequence_end(yaml_emitter_t *emitter) +{ + yaml_event_t event; + yaml_mark_t mark = { 0, 0, 0 }; + + assert(emitter); /* Non-NULL emitter object is expected. */ + + SEQUENCE_END_EVENT_INIT(event, mark, mark); + + return yaml_emitter_emit(emitter, &event); +} + +/* + * Emit MAPPING-START. + */ + +YAML_DECLARE(int) +yaml_emitter_emit_mapping_start(yaml_emitter_t *emitter, + yaml_char_t *anchor, yaml_char_t *tag, int implicit, + yaml_mapping_style_t style) +{ + yaml_event_t event; + yaml_mark_t mark = { 0, 0, 0 }; + yaml_char_t *anchor_copy = NULL; + yaml_char_t *tag_copy = NULL; + + assert(emitter); /* Non-NULL emitter object is expected. */ + + if (anchor) { + anchor_copy = yaml_strdup(anchor); + if (!anchor_copy) { + emitter->error = YAML_MEMORY_ERROR; + goto error; + } + } + + if (tag) { + tag_copy = yaml_strdup(tag); + if (!tag_copy) { + emitter->error = YAML_MEMORY_ERROR; + goto error; + } + } + + MAPPING_START_EVENT_INIT(event, anchor_copy, tag_copy, + implicit, style, mark, mark); + + if (yaml_emitter_emit(emitter, &event)) { + return 1; + } + +error: + yaml_free(anchor_copy); + yaml_free(tag_copy); + + return 0; +} + +/* + * Emit MAPPING-END. + */ + +YAML_DECLARE(int) +yaml_emitter_emit_mapping_end(yaml_emitter_t *emitter) +{ + yaml_event_t event; + yaml_mark_t mark = { 0, 0, 0 }; + + assert(emitter); /* Non-NULL emitter object is expected. */ + + MAPPING_END_EVENT_INIT(event, mark, mark); + + return yaml_emitter_emit(emitter, &event); +} + +/* + * Emit an event. + */ + +YAML_DECLARE(int) +yaml_emitter_emit(yaml_emitter_t *emitter, yaml_event_t *event) +{ + return 0; +} + diff --git a/src/parser.c b/src/parser.c index 020d5d68..ed3c019a 100644 --- a/src/parser.c +++ b/src/parser.c @@ -1335,8 +1335,8 @@ yaml_parser_append_tag_directive(yaml_parser_t *parser, } } - copy.handle = (yaml_char_t *)yaml_strdup((char *)value.handle); - copy.prefix = (yaml_char_t *)yaml_strdup((char *)value.prefix); + copy.handle = yaml_strdup(value.handle); + copy.prefix = yaml_strdup(value.prefix); if (!copy.handle || !copy.prefix) { parser->error = YAML_MEMORY_ERROR; goto error; diff --git a/src/yaml_private.h b/src/yaml_private.h index 7304d8d3..faa855bd 100644 --- a/src/yaml_private.h +++ b/src/yaml_private.h @@ -20,8 +20,8 @@ yaml_realloc(void *ptr, size_t size); YAML_DECLARE(void) yaml_free(void *ptr); -YAML_DECLARE(char *) -yaml_strdup(const char *); +YAML_DECLARE(yaml_char_t *) +yaml_strdup(const yaml_char_t *); /* * Reader: Ensure that the buffer contains at least `length` characters. From 4abcb1f8e29691f16027e605a932ede9adb64632 Mon Sep 17 00:00:00 2001 From: Kirill Simonov Date: Wed, 26 Jul 2006 20:32:16 +0000 Subject: [PATCH 23/73] Implement Emitter state machine. --- include/yaml.h | 343 ++++++++------- src/api.c | 359 +++++++++++++++- src/emitter.c | 1006 ++++++++++++++++++++++++++++++++------------ src/parser.c | 10 +- src/scanner.c | 6 +- src/yaml_private.h | 1 + 6 files changed, 1278 insertions(+), 447 deletions(-) diff --git a/include/yaml.h b/include/yaml.h index a4fc4e8a..62840687 100644 --- a/include/yaml.h +++ b/include/yaml.h @@ -169,8 +169,8 @@ typedef enum { YAML_ANY_MAPPING_STYLE, YAML_BLOCK_MAPPING_STYLE, - YAML_FLOW_MAPPING_STYLE, - YAML_FLOW_SET_MAPPING_STYLE + YAML_FLOW_MAPPING_STYLE +/* YAML_FLOW_SET_MAPPING_STYLE */ } yaml_mapping_style_t; /** @} */ @@ -413,6 +413,169 @@ typedef struct { } yaml_event_t; +/** + * Create the STREAM-START event. + * + * @param[in] event An empty event object. + * + * @returns @c 1 if the function succeeded, @c 0 on error. + */ + +YAML_DECLARE(int) +yaml_stream_start_event_initialize(yaml_event_t *event, + yaml_encoding_t encoding); + +/** + * Create the STREAM-END event. + * + * @param[in] event An empty event object. + * + * @returns @c 1 if the function succeeded, @c 0 on error. + */ + +YAML_DECLARE(int) +yaml_stream_end_event_initialize(yaml_event_t *event); + +/** + * Create the DOCUMENT-START event. + * + * The @a implicit argument is considered as a stylistic parameter and may be + * ignored by the emitter. + * + * @param[in] event An empty event object. + * @param[in] version_directive The %YAML directive value or @c NULL. + * @param[in] tag_directives_start The beginning of the %TAG directives list. + * @param[in] tag_directives_end The end of the %TAG directives list. + * @param[in] implicit If the document start indicator is implicit. + * + * @returns @c 1 if the function succeeded, @c 0 on error. + */ + +YAML_DECLARE(int) +yaml_document_start_event_initialize(yaml_event_t *event, + yaml_version_directive_t *version_directive, + yaml_tag_directive_t *tag_directives_start, + yaml_tag_directive_t *tag_directives_end, + int implicit); + +/** + * Create the DOCUMENT-END event. + * + * The @a implicit argument is considered as a stylistic parameter and may be + * ignored by the emitter. + * + * @param[in] event An empty event object. + * @param[in] implicit If the document end indicator is implicit. + * + * @returns @c 1 if the function succeeded, @c 0 on error. + */ + +YAML_DECLARE(int) +yaml_document_end_event_initialize(yaml_event_t *event, int implicit); + +/** + * Create an ALIAS event. + * + * @param[in] event An empty event object. + * @param[in] anchor The anchor value. + * + * @returns @c 1 if the function succeeded, @c 0 on error. + */ + +YAML_DECLARE(int) +yaml_alias_event_initialize(yaml_event_t *event, yaml_char_t *anchor); + +/** + * Create a SCALAR event. + * + * The @a style argument may be ignored by the emitter. + * + * Either the @a tag attribute or one of the @a plain_implicit and + * @a quoted_implicit flags must be set. + * + * @param[in] event An empty event object. + * @param[in] anchor The scalar anchor or @c NULL. + * @param[in] tag The scalar tag or @c NULL. + * @param[in] value The scalar value. + * @param[in] length The length of the scalar value. + * @param[in] plain_implicit If the tag may be omitted for the plain style. + * @param[in] quoted_implicit If the tag may be omitted for any non-plain style. + * @param[in] style The scalar style. + * + * @returns @c 1 if the function succeeded, @c 0 on error. + */ + +YAML_DECLARE(int) +yaml_scalar_event_initialize(yaml_event_t *event, + yaml_char_t *anchor, yaml_char_t *tag, + yaml_char_t *value, size_t length, + int plain_implicit, int quoted_implicit, + yaml_scalar_style_t style); + +/** + * Create a SEQUENCE-START event. + * + * The @a style argument may be ignored by the emitter. + * + * Either the @a tag attribute or the @a implicit flag must be set. + * + * @param[in] event An empty event object. + * @param[in] anchor The sequence anchor or @c NULL. + * @param[in] tag The sequence tag or @c NULL. + * @param[in] implicit If the tag may be omitted. + * @param[in] style The sequence style. + * + * @returns @c 1 if the function succeeded, @c 0 on error. + */ + +YAML_DECLARE(int) +yaml_sequence_start_event_initialize(yaml_event_t *event, + yaml_char_t *anchor, yaml_char_t *tag, int implicit, + yaml_sequence_style_t style); + +/** + * Create a SEQUENCE-END event. + * + * @param[in] event An empty event object. + * + * @returns @c 1 if the function succeeded, @c 0 on error. + */ + +YAML_DECLARE(int) +yaml_sequence_end_event_initialize(yaml_event_t *event); + +/** + * Create a MAPPING-START event. + * + * The @a style argument may be ignored by the emitter. + * + * Either the @a tag attribute or the @a implicit flag must be set. + * + * @param[in] event An empty event object. + * @param[in] anchor The mapping anchor or @c NULL. + * @param[in] tag The mapping tag or @c NULL. + * @param[in] implicit If the tag may be omitted. + * @param[in] style The mapping style. + * + * @returns @c 1 if the function succeeded, @c 0 on error. + */ + +YAML_DECLARE(int) +yaml_mapping_start_event_initialize(yaml_event_t *event, + yaml_char_t *anchor, yaml_char_t *tag, int implicit, + yaml_mapping_style_t style); + +/** + * Create a MAPPING-END event. + * + * @param[in] event An empty event object. + * + * @returns @c 1 if the function succeeded, @c 0 on error. + */ + +YAML_DECLARE(int) +yaml_mapping_end_event_initialize(yaml_event_t *event); + /** * Free any memory allocated for an event object. * @@ -870,7 +1033,8 @@ typedef enum { YAML_EMIT_BLOCK_MAPPING_FIRST_KEY_STATE, YAML_EMIT_BLOCK_MAPPING_KEY_STATE, YAML_EMIT_BLOCK_MAPPING_SIMPLE_VALUE_STATE, - YAML_EMIT_BLOCK_MAPPING_VALUE_STATE + YAML_EMIT_BLOCK_MAPPING_VALUE_STATE, + YAML_EMIT_END_STATE } yaml_emitter_state_t; /** @@ -995,9 +1159,6 @@ typedef struct { yaml_event_t *tail; } events; - /** The current event. */ - yaml_event_t event; - /** The stack of indentation levels. */ struct { /** The beginning of the stack. */ @@ -1179,9 +1340,9 @@ yaml_emitter_set_break(yaml_emitter_t *emitter, yaml_break_t line_break); * Emit an event. * * The event object may be generated using the @c yaml_parser_parse function. - * The emitter will destroy the event object if the function succeeds. If the - * function fails, the application is responsible for destroing the event - * object. + * The emitter takes the responsibility for the event object and destroys its + * content after it is emitted. The event object is destroyed even if the + * function fails. * * @param[in] emitter An emitter object. * @param[in] event An event object. @@ -1192,170 +1353,6 @@ yaml_emitter_set_break(yaml_emitter_t *emitter, yaml_break_t line_break); YAML_DECLARE(int) yaml_emitter_emit(yaml_emitter_t *emitter, yaml_event_t *event); -/** - * Emit the STREAM-START event. - * - * @param[in] emitter An emitter object. - * @param[in] encoding The stream encoding. - * - * @returns @c 1 if the function succeeded, @c 0 on error. - */ - -YAML_DECLARE(int) -yaml_emitter_emit_stream_start(yaml_emitter_t *emitter, - yaml_encoding_t encoding); - -/** - * Emit the STREAM-END event. - * - * @param[in] emitter An emitter object. - * - * @returns @c 1 if the function succeeded, @c 0 on error. - */ - -YAML_DECLARE(int) -yaml_emitter_emit_stream_end(yaml_emitter_t *emitter); - -/** - * Emit the DOCUMENT-START event. - * - * The @a implicit argument is considered as a stylistic parameter and may be - * ignored by the emitter. - * - * @param[in] emitter An emitter object. - * @param[in] version_directive The %YAML directive value or @c NULL. - * @param[in] tag_directives_start The beginning of the %TAG directives list. - * @param[in] tag_directives_end The end of the %TAG directives list. - * @param[in] implicit If the document start indicator is implicit. - * - * @returns @c 1 if the function succeeded, @c 0 on error. - */ - -YAML_DECLARE(int) -yaml_emitter_emit_document_start(yaml_emitter_t *emitter, - yaml_version_directive_t *version_directive, - yaml_tag_directive_t *tag_directives_start, - yaml_tag_directive_t *tag_directives_end, - int implicit); - -/** - * Emit the DOCUMENT-END event. - * - * The @a implicit argument is considered as a stylistic parameter and may be - * ignored by the emitter. - * - * @param[in] emitter An emitter object. - * @param[in] implicit If the document end indicator is implicit. - * - * @returns @c 1 if the function succeeded, @c 0 on error. - */ - -YAML_DECLARE(int) -yaml_emitter_emit_document_end(yaml_emitter_t *emitter, int implicit); - -/** - * Emit an ALIAS event. - * - * @param[in] emitter An emitter object. - * @param[in] anchor The anchor value. - * - * @returns @c 1 if the function succeeded, @c 0 on error. - */ - -YAML_DECLARE(int) -yaml_emitter_emit_alias(yaml_emitter_t *emitter, yaml_char_t *anchor); - -/** - * Emit a SCALAR event. - * - * The @a style argument may be ignored by the emitter. - * - * Either the @a tag attribute or one of the @a plain_implicit and - * @a quoted_implicit flags must be set. - * - * @param[in] emitter An emitter object. - * @param[in] anchor The scalar anchor or @c NULL. - * @param[in] tag The scalar tag or @c NULL. - * @param[in] value The scalar value. - * @param[in] length The length of the scalar value. - * @param[in] plain_implicit If the tag may be omitted for the plain style. - * @param[in] quoted_implicit If the tag may be omitted for any non-plain style. - * @param[in] style The scalar style. - * - * @returns @c 1 if the function succeeded, @c 0 on error. - */ - -YAML_DECLARE(int) -yaml_emitter_emit_scalar(yaml_emitter_t *emitter, - yaml_char_t *anchor, yaml_char_t *tag, - yaml_char_t *value, size_t length, - int plain_implicit, int quoted_implicit, - yaml_scalar_style_t style); - -/** - * Emit a SEQUENCE-START event. - * - * The @a style argument may be ignored by the emitter. - * - * Either the @a tag attribute or the @a implicit flag must be set. - * - * @param[in] emitter An emitter object. - * @param[in] anchor The sequence anchor or @c NULL. - * @param[in] tag The sequence tag or @c NULL. - * @param[in] implicit If the tag may be omitted. - * @param[in] style The sequence style. - * - * @returns @c 1 if the function succeeded, @c 0 on error. - */ - -YAML_DECLARE(int) -yaml_emitter_emit_sequence_start(yaml_emitter_t *emitter, - yaml_char_t *anchor, yaml_char_t *tag, int implicit, - yaml_sequence_style_t style); - -/** - * Emit a SEQUENCE-END event. - * - * @param[in] emitter An emitter object. - * - * @returns @c 1 if the function succeeded, @c 0 on error. - */ - -YAML_DECLARE(int) -yaml_emitter_emit_sequence_end(yaml_emitter_t *emitter); - -/** - * Emit a MAPPING-START event. - * - * The @a style argument may be ignored by the emitter. - * - * Either the @a tag attribute or the @a implicit flag must be set. - * - * @param[in] emitter An emitter object. - * @param[in] anchor The mapping anchor or @c NULL. - * @param[in] tag The mapping tag or @c NULL. - * @param[in] implicit If the tag may be omitted. - * @param[in] style The mapping style. - * - * @returns @c 1 if the function succeeded, @c 0 on error. - */ - -YAML_DECLARE(int) -yaml_emitter_emit_mapping_start(yaml_emitter_t *emitter, - yaml_char_t *anchor, yaml_char_t *tag, int implicit, - yaml_mapping_style_t style); - -/** - * Emit a MAPPING-END event. - * - * @param[in] emitter An emitter object. - * - * @returns @c 1 if the function succeeded, @c 0 on error. - */ - -YAML_DECLARE(int) -yaml_emitter_emit_mapping_end(yaml_emitter_t *emitter); - /** * Flush the accumulated characters to the output. * diff --git a/src/api.c b/src/api.c index 8d5f624f..4eaa6676 100644 --- a/src/api.c +++ b/src/api.c @@ -392,7 +392,6 @@ yaml_emitter_delete(yaml_emitter_t *emitter) yaml_event_delete(&DEQUEUE(emitter, emitter->events)); } STACK_DEL(emitter, emitter->indents); - yaml_event_delete(&emitter->event); while (!STACK_EMPTY(empty, emitter->tag_directives)) { yaml_tag_directive_t tag_directive = POP(emitter, emitter->tag_directives); yaml_free(tag_directive.handle); @@ -607,6 +606,364 @@ yaml_token_delete(yaml_token_t *token) memset(token, 0, sizeof(yaml_token_t)); } +/* + * Check if a string is a valid UTF-8 sequence. + * + * Check 'reader.c' for more details on UTF-8 encoding. + */ + +static int +yaml_check_utf8(yaml_char_t *start, size_t length) +{ + yaml_char_t *end = start+length; + yaml_char_t *pointer = start; + + while (pointer < end) { + unsigned char octet; + unsigned int width; + unsigned int value; + int k; + + octet = pointer[0]; + width = (octet & 0x80) == 0x00 ? 1 : + (octet & 0xE0) == 0xC0 ? 2 : + (octet & 0xF0) == 0xE0 ? 3 : + (octet & 0xF8) == 0xF0 ? 4 : 0; + value = (octet & 0x80) == 0x00 ? octet & 0x7F : + (octet & 0xE0) == 0xC0 ? octet & 0x1F : + (octet & 0xF0) == 0xE0 ? octet & 0x0F : + (octet & 0xF8) == 0xF0 ? octet & 0x07 : 0; + if (!width) return 0; + if (pointer+width > end) return 0; + for (k = 1; k < width; k ++) { + octet = pointer[k]; + if ((octet & 0xC0) != 0x80) return 0; + value = (value << 6) + (octet & 0x3F); + } + if (!((width == 1) || + (width == 2 && value >= 0x80) || + (width == 3 && value >= 0x800) || + (width == 4 && value >= 0x10000))) return 0; + + pointer += width; + } + + return 1; +} + +/* + * Create STREAM-START. + */ + +YAML_DECLARE(int) +yaml_stream_start_event_initialize(yaml_event_t *event, + yaml_encoding_t encoding) +{ + yaml_mark_t mark = { 0, 0, 0 }; + + assert(event); /* Non-NULL event object is expected. */ + + STREAM_START_EVENT_INIT(*event, encoding, mark, mark); + + return 1; +} + +/* + * Create STREAM-END. + */ + +YAML_DECLARE(int) +yaml_stream_end_event_initialize(yaml_event_t *event) +{ + yaml_mark_t mark = { 0, 0, 0 }; + + assert(event); /* Non-NULL event object is expected. */ + + STREAM_END_EVENT_INIT(*event, mark, mark); + + return 1; +} + +/* + * Create DOCUMENT-START. + */ + +YAML_DECLARE(int) +yaml_document_start_event_initialize(yaml_event_t *event, + yaml_version_directive_t *version_directive, + yaml_tag_directive_t *tag_directives_start, + yaml_tag_directive_t *tag_directives_end, + int implicit) +{ + struct { + yaml_error_type_t error; + } context; + yaml_mark_t mark = { 0, 0, 0 }; + yaml_version_directive_t *version_directive_copy = NULL; + struct { + yaml_tag_directive_t *start; + yaml_tag_directive_t *end; + yaml_tag_directive_t *top; + } tag_directives_copy = { NULL, NULL, NULL }; + yaml_tag_directive_t value = { NULL, NULL }; + + assert(event); /* Non-NULL event object is expected. */ + assert((tag_directives_start && tag_directives_end) || + (tag_directives_start == tag_directives_end)); + /* Valid tag directives are expected. */ + + if (version_directive) { + version_directive_copy = yaml_malloc(sizeof(yaml_version_directive_t)); + if (!version_directive_copy) goto error; + version_directive_copy->major = version_directive->major; + version_directive_copy->minor = version_directive->minor; + } + + if (tag_directives_start != tag_directives_end) { + yaml_tag_directive_t *tag_directive; + if (!STACK_INIT(&context, tag_directives_copy, INITIAL_STACK_SIZE)) + goto error; + for (tag_directive = tag_directives_start; + tag_directive != tag_directives_end; tag_directive ++) { + assert(tag_directive->handle); + assert(tag_directive->prefix); + if (!yaml_check_utf8(tag_directive->handle, + strlen((char *)tag_directive->handle))) + goto error; + if (!yaml_check_utf8(tag_directive->prefix, + strlen((char *)tag_directive->prefix))) + goto error; + value.handle = yaml_strdup(tag_directive->handle); + value.prefix = yaml_strdup(tag_directive->prefix); + if (!value.handle || !value.prefix) goto error; + if (!PUSH(&context, tag_directives_copy, value)) + goto error; + value.handle = NULL; + value.prefix = NULL; + } + } + + DOCUMENT_START_EVENT_INIT(*event, version_directive_copy, + tag_directives_copy.start, tag_directives_copy.end, + implicit, mark, mark); + + return 1; + +error: + yaml_free(version_directive_copy); + while (!STACK_EMPTY(context, tag_directives_copy)) { + yaml_tag_directive_t value = POP(context, tag_directives_copy); + yaml_free(value.handle); + yaml_free(value.prefix); + } + STACK_DEL(context, tag_directives_copy); + yaml_free(value.handle); + yaml_free(value.prefix); + + return 0; +} + +/* + * Create DOCUMENT-END. + */ + +YAML_DECLARE(int) +yaml_document_end_event_initialize(yaml_event_t *event, int implicit) +{ + yaml_mark_t mark = { 0, 0, 0 }; + + assert(event); /* Non-NULL emitter object is expected. */ + + DOCUMENT_END_EVENT_INIT(*event, implicit, mark, mark); + + return 1; +} + +/* + * Create ALIAS. + */ + +YAML_DECLARE(int) +yaml_alias_event_initialize(yaml_event_t *event, yaml_char_t *anchor) +{ + yaml_mark_t mark = { 0, 0, 0 }; + yaml_char_t *anchor_copy = NULL; + + assert(event); /* Non-NULL event object is expected. */ + assert(anchor); /* Non-NULL anchor is expected. */ + + if (!yaml_check_utf8(anchor, strlen((char *)anchor))) return 0; + + anchor_copy = yaml_strdup(anchor); + if (!anchor_copy) + return 0; + + ALIAS_EVENT_INIT(*event, anchor_copy, mark, mark); + + return 1; +} + +/* + * Create SCALAR. + */ + +YAML_DECLARE(int) +yaml_scalar_event_initialize(yaml_event_t *event, + yaml_char_t *anchor, yaml_char_t *tag, + yaml_char_t *value, size_t length, + int plain_implicit, int quoted_implicit, + yaml_scalar_style_t style) +{ + yaml_mark_t mark = { 0, 0, 0 }; + yaml_char_t *anchor_copy = NULL; + yaml_char_t *tag_copy = NULL; + yaml_char_t *value_copy = NULL; + + assert(event); /* Non-NULL event object is expected. */ + assert(value); /* Non-NULL anchor is expected. */ + + + if (anchor) { + if (!yaml_check_utf8(anchor, strlen((char *)anchor))) goto error; + anchor_copy = yaml_strdup(anchor); + if (!anchor_copy) goto error; + } + + if (tag) { + if (!yaml_check_utf8(tag, strlen((char *)tag))) goto error; + tag_copy = yaml_strdup(tag); + if (!tag_copy) goto error; + } + + if (!yaml_check_utf8(value, length)) goto error; + value_copy = yaml_malloc(length+1); + if (!value_copy) goto error; + memcpy(value_copy, value, length); + value_copy[length] = '\0'; + + SCALAR_EVENT_INIT(*event, anchor_copy, tag_copy, value_copy, length, + plain_implicit, quoted_implicit, style, mark, mark); + + return 1; + +error: + yaml_free(anchor_copy); + yaml_free(tag_copy); + yaml_free(value_copy); + + return 0; +} + +/* + * Create SEQUENCE-START. + */ + +YAML_DECLARE(int) +yaml_sequence_start_event_initialize(yaml_event_t *event, + yaml_char_t *anchor, yaml_char_t *tag, int implicit, + yaml_sequence_style_t style) +{ + yaml_mark_t mark = { 0, 0, 0 }; + yaml_char_t *anchor_copy = NULL; + yaml_char_t *tag_copy = NULL; + + assert(event); /* Non-NULL event object is expected. */ + + if (anchor) { + if (!yaml_check_utf8(anchor, strlen((char *)anchor))) goto error; + anchor_copy = yaml_strdup(anchor); + if (!anchor_copy) goto error; + } + + if (tag) { + if (!yaml_check_utf8(tag, strlen((char *)tag))) goto error; + tag_copy = yaml_strdup(tag); + if (!tag_copy) goto error; + } + + SEQUENCE_START_EVENT_INIT(*event, anchor_copy, tag_copy, + implicit, style, mark, mark); + + return 1; + +error: + yaml_free(anchor_copy); + yaml_free(tag_copy); + + return 0; +} + +/* + * Create SEQUENCE-END. + */ + +YAML_DECLARE(int) +yaml_sequence_end_event_initialize(yaml_event_t *event) +{ + yaml_mark_t mark = { 0, 0, 0 }; + + assert(event); /* Non-NULL event object is expected. */ + + SEQUENCE_END_EVENT_INIT(*event, mark, mark); + + return 1; +} + +/* + * Create MAPPING-START. + */ + +YAML_DECLARE(int) +yaml_mapping_start_event_initialize(yaml_event_t *event, + yaml_char_t *anchor, yaml_char_t *tag, int implicit, + yaml_mapping_style_t style) +{ + yaml_mark_t mark = { 0, 0, 0 }; + yaml_char_t *anchor_copy = NULL; + yaml_char_t *tag_copy = NULL; + + assert(event); /* Non-NULL event object is expected. */ + + if (anchor) { + if (!yaml_check_utf8(anchor, strlen((char *)anchor))) goto error; + anchor_copy = yaml_strdup(anchor); + if (!anchor_copy) goto error; + } + + if (tag) { + if (!yaml_check_utf8(tag, strlen((char *)tag))) goto error; + tag_copy = yaml_strdup(tag); + if (!tag_copy) goto error; + } + + MAPPING_START_EVENT_INIT(*event, anchor_copy, tag_copy, + implicit, style, mark, mark); + + return 1; + +error: + yaml_free(anchor_copy); + yaml_free(tag_copy); + + return 0; +} + +/* + * Create MAPPING-END. + */ + +YAML_DECLARE(int) +yaml_mapping_end_event_initialize(yaml_event_t *event) +{ + yaml_mark_t mark = { 0, 0, 0 }; + + assert(event); /* Non-NULL event object is expected. */ + + MAPPING_END_EVENT_INIT(*event, mark, mark); + + return 1; +} + /* * Destroy an event object. */ diff --git a/src/emitter.c b/src/emitter.c index e659e3e0..41ed3fc6 100644 --- a/src/emitter.c +++ b/src/emitter.c @@ -8,401 +8,869 @@ YAML_DECLARE(int) yaml_emitter_emit(yaml_emitter_t *emitter, yaml_event_t *event); -YAML_DECLARE(int) -yaml_emitter_emit_stream_start(yaml_emitter_t *emitter, - yaml_encoding_t encoding); +/* + * Utility functions. + */ -YAML_DECLARE(int) -yaml_emitter_emit_stream_end(yaml_emitter_t *emitter); +static int +yaml_emitter_set_emitter_error(yaml_emitter_t *emitter, const char *problem); -YAML_DECLARE(int) +static int +yaml_emitter_need_more_events(yaml_emitter_t *emitter); + +static int +yaml_emitter_append_tag_directive(yaml_emitter_t *emitter, + yaml_tag_directive_t value, int allow_duplicates); + +static int +yaml_emitter_increase_indent(yaml_emitter_t *emitter, + int flow, int indentless); + +/* + * State functions. + */ + +static int +yaml_emitter_state_machine(yaml_emitter_t *emitter, yaml_event_t *event); + +static int +yaml_emitter_emit_stream_start(yaml_emitter_t *emitter, + yaml_event_t *event); + +static int yaml_emitter_emit_document_start(yaml_emitter_t *emitter, - yaml_version_directive_t *version_directive, - yaml_tag_directive_t *tag_directives_start, - yaml_tag_directive_t *tag_directives_end, - int implicit); + yaml_event_t *event, int first); -YAML_DECLARE(int) -yaml_emitter_emit_document_end(yaml_emitter_t *emitter, int implicit); +static int +yaml_emitter_emit_document_content(yaml_emitter_t *emitter, + yaml_event_t *event); -YAML_DECLARE(int) -yaml_emitter_emit_alias(yaml_emitter_t *emitter, yaml_char_t *anchor); +static int +yaml_emitter_emit_document_end(yaml_emitter_t *emitter, + yaml_event_t *event); -YAML_DECLARE(int) -yaml_emitter_emit_scalar(yaml_emitter_t *emitter, - yaml_char_t *anchor, yaml_char_t *tag, +static int +yaml_emitter_emit_flow_sequence_item(yaml_emitter_t *emitter, + yaml_event_t *event, int first); + +static int +yaml_emitter_emit_flow_mapping_key(yaml_emitter_t *emitter, + yaml_event_t *event, int first); + +static int +yaml_emitter_emit_flow_mapping_value(yaml_emitter_t *emitter, + yaml_event_t *event, int simple); + +static int +yaml_emitter_emit_block_sequence_item(yaml_emitter_t *emitter, + yaml_event_t *event, int first); + +static int +yaml_emitter_emit_block_mapping_key(yaml_emitter_t *emitter, + yaml_event_t *event, int first); + +static int +yaml_emitter_emit_block_mapping_value(yaml_emitter_t *emitter, + yaml_event_t *event, int simple); + +static int +yaml_emitter_emit_node(yaml_emitter_t *emitter, yaml_event_t *event, + int root, int sequence, int mapping, int simple_key); + +static int +yaml_emitter_emit_alias(yaml_emitter_t *emitter, yaml_event_t *event); + +static int +yaml_emitter_emit_scalar(yaml_emitter_t *emitter, yaml_event_t *event); + +static int +yaml_emitter_emit_sequence_start(yaml_emitter_t *emitter, yaml_event_t *event); + +static int +yaml_emitter_emit_mapping_start(yaml_emitter_t *emitter, yaml_event_t *event); + +/* + * Checkers. + */ + +static int +yaml_emitter_check_empty_document(yaml_emitter_t *emitter); + +static int +yaml_emitter_check_empty_sequence(yaml_emitter_t *emitter); + +static int +yaml_emitter_check_empty_mapping(yaml_emitter_t *emitter); + +static int +yaml_emitter_check_simple_key(yaml_emitter_t *emitter); + +/* + * Processors. + */ + +static int +yaml_emitter_process_anchor(yaml_emitter_t *emitter, + yaml_char_t *anchor, int alias); + +static int +yaml_emitter_process_tag(yaml_emitter_t *emitter, + yaml_char_t *tag); + +static int +yaml_emitter_process_scalar(yaml_emitter_t *emitter, yaml_char_t *value, size_t length, int plain_implicit, int quoted_implicit, yaml_scalar_style_t style); -YAML_DECLARE(int) -yaml_emitter_emit_sequence_start(yaml_emitter_t *emitter, - yaml_char_t *anchor, yaml_char_t *tag, int implicit, - yaml_sequence_style_t style); +/* + * Writers. + */ -YAML_DECLARE(int) -yaml_emitter_emit_sequence_end(yaml_emitter_t *emitter); +static int +yaml_emitter_write_bom(yaml_emitter_t *emitter); -YAML_DECLARE(int) -yaml_emitter_emit_mapping_start(yaml_emitter_t *emitter, - yaml_char_t *anchor, yaml_char_t *tag, int implicit, - yaml_mapping_style_t style); +static int +yaml_emitter_write_version_directive(yaml_emitter_t *emitter, + yaml_version_directive_t version_directive); -YAML_DECLARE(int) -yaml_emitter_emit_mapping_end(yaml_emitter_t *emitter); +static int +yaml_emitter_write_tag_directive(yaml_emitter_t *emitter, + yaml_tag_directive_t tag_directive); + +static int +yaml_emitter_write_indent(yaml_emitter_t *emitter); + +static int +yaml_emitter_write_indicator(yaml_emitter_t *emitter, + char *indicator, int need_whitespace, + int is_whitespace, int is_indention); /* - * Emit STREAM-START. + * Set an emitter error and return 0. */ -YAML_DECLARE(int) -yaml_emitter_emit_stream_start(yaml_emitter_t *emitter, - yaml_encoding_t encoding) +static int +yaml_emitter_set_emitter_error(yaml_emitter_t *emitter, const char *problem) { - yaml_event_t event; - yaml_mark_t mark = { 0, 0, 0 }; + emitter->error = YAML_EMITTER_ERROR; + emitter->problem = problem; - assert(emitter); /* Non-NULL emitter object is expected. */ - - STREAM_START_EVENT_INIT(event, encoding, mark, mark); - - return yaml_emitter_emit(emitter, &event); + return 0; } /* - * Emit STREAM-END. + * Emit an event. */ YAML_DECLARE(int) -yaml_emitter_emit_stream_end(yaml_emitter_t *emitter) +yaml_emitter_emit(yaml_emitter_t *emitter, yaml_event_t *event) { - yaml_event_t event; - yaml_mark_t mark = { 0, 0, 0 }; - - assert(emitter); /* Non-NULL emitter object is expected. */ + if (!ENQUEUE(emitter, emitter->events, *event)) { + yaml_event_delete(event); + return 0; + } - STREAM_END_EVENT_INIT(event, mark, mark); + while (!yaml_emitter_need_more_events(emitter)) { + if (!yaml_emitter_state_machine(emitter, emitter->events.head)) { + return 0; + } + DEQUEUE(emitter, emitter->events); + } - return yaml_emitter_emit(emitter, &event); + return 1; } /* - * Emit DOCUMENT-START. + * Check if we need to accumulate more events before emitting. + * + * We accumulate extra + * - 1 event for DOCUMENT-START + * - 2 events for SEQUENCE-START + * - 3 events for MAPPING-START */ -YAML_DECLARE(int) -yaml_emitter_emit_document_start(yaml_emitter_t *emitter, - yaml_version_directive_t *version_directive, - yaml_tag_directive_t *tag_directives_start, - yaml_tag_directive_t *tag_directives_end, - int implicit) +static int +yaml_emitter_need_more_events(yaml_emitter_t *emitter) { - yaml_event_t event; - yaml_mark_t mark = { 0, 0, 0 }; - yaml_version_directive_t *version_directive_copy = NULL; - struct { - yaml_tag_directive_t *start; - yaml_tag_directive_t *end; - yaml_tag_directive_t *top; - } tag_directives_copy = { NULL, NULL, NULL }; - yaml_tag_directive_t value = { NULL, NULL }; - - assert(emitter); /* Non-NULL emitter object is expected. */ - assert((tag_directives_start && tag_directives_end) || - (tag_directives_start == tag_directives_end)); - /* Valid tag directives are expected. */ - - if (version_directive) { - version_directive_copy = yaml_malloc(sizeof(yaml_version_directive_t)); - if (!version_directive_copy) { - emitter->error = YAML_MEMORY_ERROR; - goto error; - } - version_directive_copy->major = version_directive->major; - version_directive_copy->minor = version_directive->minor; + int level = 0; + int accumulate = 0; + yaml_event_t *event; + + if (QUEUE_EMPTY(emitter, emitter->events)) + return 1; + + switch (emitter->events.head->type) { + case YAML_DOCUMENT_START_EVENT: + accumulate = 1; + break; + case YAML_SEQUENCE_START_EVENT: + accumulate = 2; + break; + case YAML_MAPPING_START_EVENT: + accumulate = 3; + break; + default: + return 0; } - if (tag_directives_start != tag_directives_end) { - yaml_tag_directive_t *tag_directive; - if (!STACK_INIT(emitter, tag_directives_copy, INITIAL_STACK_SIZE)) - goto error; - for (tag_directive = tag_directives_start; - tag_directive != tag_directives_end; tag_directive ++) { - value.handle = yaml_strdup(tag_directive->handle); - value.prefix = yaml_strdup(tag_directive->prefix); - if (!value.handle || !value.prefix) { - emitter->error = YAML_MEMORY_ERROR; - goto error; - } - if (!PUSH(emitter, tag_directives_copy, value)) - goto error; - value.handle = NULL; - value.prefix = NULL; + if (emitter->events.tail - emitter->events.head > accumulate) + return 0; + + for (event = emitter->events.head; event != emitter->events.tail; event ++) { + switch (event->type) { + case YAML_STREAM_START_EVENT: + case YAML_DOCUMENT_START_EVENT: + case YAML_SEQUENCE_START_EVENT: + case YAML_MAPPING_START_EVENT: + level += 1; + break; + case YAML_STREAM_END_EVENT: + case YAML_DOCUMENT_END_EVENT: + case YAML_SEQUENCE_END_EVENT: + case YAML_MAPPING_END_EVENT: + level -= 1; + break; + default: + break; } + if (!level) + return 0; } - DOCUMENT_START_EVENT_INIT(event, version_directive_copy, - tag_directives_copy.start, tag_directives_copy.end, - implicit, mark, mark); + return 1; +} - if (yaml_emitter_emit(emitter, &event)) { - return 1; +/* + * Append a directive to the directives stack. + */ + +static int +yaml_emitter_append_tag_directive(yaml_emitter_t *emitter, + yaml_tag_directive_t value, int allow_duplicates) +{ + yaml_tag_directive_t *tag_directive; + yaml_tag_directive_t copy = { NULL, NULL }; + + for (tag_directive = emitter->tag_directives.start; + tag_directive != emitter->tag_directives.top; tag_directive ++) { + if (strcmp((char *)value.handle, (char *)tag_directive->handle) == 0) { + if (allow_duplicates) + return 1; + return yaml_emitter_set_emitter_error(emitter, + "duplicate %TAG directive"); + } } -error: - yaml_free(version_directive_copy); - while (!STACK_EMPTY(emitter, tag_directives_copy)) { - yaml_tag_directive_t value = POP(emitter, tag_directives_copy); - yaml_free(value.handle); - yaml_free(value.prefix); + copy.handle = yaml_strdup(value.handle); + copy.prefix = yaml_strdup(value.prefix); + if (!copy.handle || !copy.prefix) { + emitter->error = YAML_MEMORY_ERROR; + goto error; } - STACK_DEL(emitter, tag_directives_copy); - yaml_free(value.handle); - yaml_free(value.prefix); + if (!PUSH(emitter, emitter->tag_directives, copy)) + goto error; + + return 1; + +error: + yaml_free(copy.handle); + yaml_free(copy.prefix); return 0; } /* - * Emit DOCUMENT-END. + * Increase the indentation level. */ -YAML_DECLARE(int) -yaml_emitter_emit_document_end(yaml_emitter_t *emitter, int implicit) +static int +yaml_emitter_increase_indent(yaml_emitter_t *emitter, + int flow, int indentless) { - yaml_event_t event; - yaml_mark_t mark = { 0, 0, 0 }; - - assert(emitter); /* Non-NULL emitter object is expected. */ + if (!PUSH(emitter, emitter->indents, emitter->indent)) + return 0; - DOCUMENT_END_EVENT_INIT(event, implicit, mark, mark); + if (emitter->indent < 0) { + emitter->indent = flow ? emitter->best_indent : 0; + } + else if (!indentless) { + emitter->indent += emitter->best_indent; + } - return yaml_emitter_emit(emitter, &event); + return 1; } /* - * Emit ALIAS. + * State dispatcher. */ -YAML_DECLARE(int) -yaml_emitter_emit_alias(yaml_emitter_t *emitter, yaml_char_t *anchor) +static int +yaml_emitter_state_machine(yaml_emitter_t *emitter, yaml_event_t *event) { - yaml_event_t event; - yaml_mark_t mark = { 0, 0, 0 }; - yaml_char_t *anchor_copy = NULL; + switch (emitter->state) + { + case YAML_EMIT_STREAM_START_STATE: + return yaml_emitter_emit_stream_start(emitter, event); - assert(emitter); /* Non-NULL emitter object is expected. */ - assert(anchor); /* Non-NULL anchor is expected. */ + case YAML_EMIT_FIRST_DOCUMENT_START_STATE: + return yaml_emitter_emit_document_start(emitter, event, 1); - anchor_copy = yaml_strdup(anchor); - if (!anchor_copy) { - emitter->error = YAML_MEMORY_ERROR; - return 0; - } + case YAML_EMIT_DOCUMENT_START_STATE: + return yaml_emitter_emit_document_start(emitter, event, 0); - ALIAS_EVENT_INIT(event, anchor_copy, mark, mark); + case YAML_EMIT_DOCUMENT_CONTENT_STATE: + return yaml_emitter_emit_document_content(emitter, event); - if (yaml_emitter_emit(emitter, &event)) { - return 1; - } + case YAML_EMIT_DOCUMENT_END_STATE: + return yaml_emitter_emit_document_end(emitter, event); + + case YAML_EMIT_FLOW_SEQUENCE_FIRST_ITEM_STATE: + return yaml_emitter_emit_flow_sequence_item(emitter, event, 1); + + case YAML_EMIT_FLOW_SEQUENCE_ITEM_STATE: + return yaml_emitter_emit_flow_sequence_item(emitter, event, 0); + + case YAML_EMIT_FLOW_MAPPING_FIRST_KEY_STATE: + return yaml_emitter_emit_flow_mapping_key(emitter, event, 1); + + case YAML_EMIT_FLOW_MAPPING_KEY_STATE: + return yaml_emitter_emit_flow_mapping_key(emitter, event, 0); + + case YAML_EMIT_FLOW_MAPPING_SIMPLE_VALUE_STATE: + return yaml_emitter_emit_flow_mapping_value(emitter, event, 1); + + case YAML_EMIT_FLOW_MAPPING_VALUE_STATE: + return yaml_emitter_emit_flow_mapping_value(emitter, event, 0); - yaml_free(anchor_copy); + case YAML_EMIT_BLOCK_SEQUENCE_FIRST_ITEM_STATE: + return yaml_emitter_emit_block_sequence_item(emitter, event, 1); + + case YAML_EMIT_BLOCK_SEQUENCE_ITEM_STATE: + return yaml_emitter_emit_block_sequence_item(emitter, event, 0); + + case YAML_EMIT_BLOCK_MAPPING_FIRST_KEY_STATE: + return yaml_emitter_emit_block_mapping_key(emitter, event, 1); + + case YAML_EMIT_BLOCK_MAPPING_KEY_STATE: + return yaml_emitter_emit_block_mapping_key(emitter, event, 0); + + case YAML_EMIT_BLOCK_MAPPING_SIMPLE_VALUE_STATE: + return yaml_emitter_emit_block_mapping_value(emitter, event, 1); + + case YAML_EMIT_BLOCK_MAPPING_VALUE_STATE: + return yaml_emitter_emit_block_mapping_value(emitter, event, 0); + + case YAML_EMIT_END_STATE: + return yaml_emitter_set_emitter_error(emitter, + "expected nothing after STREAM-END"); + + default: + assert(1); /* Invalid state. */ + } return 0; } /* - * Emit SCALAR. + * Expect STREAM-START. */ -YAML_DECLARE(int) -yaml_emitter_emit_scalar(yaml_emitter_t *emitter, - yaml_char_t *anchor, yaml_char_t *tag, - yaml_char_t *value, size_t length, - int plain_implicit, int quoted_implicit, - yaml_scalar_style_t style) +static int +yaml_emitter_emit_stream_start(yaml_emitter_t *emitter, + yaml_event_t *event) { - yaml_event_t event; - yaml_mark_t mark = { 0, 0, 0 }; - yaml_char_t *anchor_copy = NULL; - yaml_char_t *tag_copy = NULL; - yaml_char_t *value_copy = NULL; - - assert(emitter); /* Non-NULL emitter object is expected. */ - assert(value); /* Non-NULL anchor is expected. */ - - if (anchor) { - anchor_copy = yaml_strdup(anchor); - if (!anchor_copy) { - emitter->error = YAML_MEMORY_ERROR; - goto error; + if (event->type == YAML_STREAM_START_EVENT) + { + if (!emitter->encoding) { + emitter->encoding = event->data.stream_start.encoding; } - } - if (tag) { - tag_copy = yaml_strdup(tag); - if (!tag_copy) { - emitter->error = YAML_MEMORY_ERROR; - goto error; + if (!emitter->encoding) { + emitter->encoding = YAML_UTF8_ENCODING; } - } - value_copy = yaml_malloc(length+1); - if (!value_copy) { - emitter->error = YAML_MEMORY_ERROR; - goto error; + if (emitter->best_indent < 2 || emitter->best_indent > 9) { + emitter->best_indent = 2; + } + + if (emitter->best_width >= 0 + && emitter->best_width <= emitter->best_indent*2) { + emitter->best_width = 80; + } + + if (emitter->best_width < 0) { + emitter->best_width = INT_MAX; + } + + if (!emitter->line_break) { + emitter->line_break = YAML_LN_BREAK; + } + + emitter->indent = -1; + + emitter->line = 0; + emitter->column = 0; + emitter->whitespace = 1; + emitter->indention = 1; + + if (emitter->encoding != YAML_UTF8_ENCODING) { + if (!yaml_emitter_write_bom(emitter)) + return 0; + } + + emitter->state = YAML_EMIT_FIRST_DOCUMENT_START_STATE; + + return 1; } - memcpy(value_copy, value, length); - value_copy[length] = '\0'; - SCALAR_EVENT_INIT(event, anchor_copy, tag_copy, value_copy, length, - plain_implicit, quoted_implicit, style, mark, mark); + return yaml_emitter_set_emitter_error(emitter, + "expected STREAM-START"); +} + +static int +yaml_emitter_emit_document_start(yaml_emitter_t *emitter, + yaml_event_t *event, int first) +{ + if (event->type == YAML_DOCUMENT_START_EVENT) + { + yaml_tag_directive_t default_tag_directives[] = { + {(yaml_char_t *)"!", (yaml_char_t *)"!"}, + {(yaml_char_t *)"!!", (yaml_char_t *)"tag:yaml.org,2002:"}, + {NULL, NULL} + }; + yaml_tag_directive_t *tag_directive; + int implicit; + + if (event->data.document_start.version_directive) { + if (event->data.document_start.version_directive->major != 1 + || event->data.document_start.version_directive-> minor != 1) { + return yaml_emitter_set_emitter_error(emitter, + "incompatible %YAML directive"); + } + } + + for (tag_directive = event->data.document_start.tag_directives.start; + tag_directive != event->data.document_start.tag_directives.end; + tag_directive ++) { + if (!yaml_emitter_append_tag_directive(emitter, *tag_directive, 0)) + return 0; + } + + for (tag_directive = default_tag_directives; + tag_directive->handle; tag_directive ++) { + if (!yaml_emitter_append_tag_directive(emitter, *tag_directive, 1)) + return 0; + } + + implicit = event->data.document_start.implicit; + if (!first || emitter->canonical) { + implicit = 0; + } + + if (event->data.document_start.version_directive) { + implicit = 0; + if (!yaml_emitter_write_version_directive(emitter, + *event->data.document_start.version_directive)) + return 0; + } + + if (event->data.document_start.tag_directives.start + != event->data.document_start.tag_directives.end) { + implicit = 0; + for (tag_directive = event->data.document_start.tag_directives.start; + tag_directive != event->data.document_start.tag_directives.end; + tag_directive ++) { + if (!yaml_emitter_write_tag_directive(emitter, *tag_directive)) + return 0; + } + } + + if (yaml_emitter_check_empty_document(emitter)) { + implicit = 0; + } + + if (!implicit) { + if (!yaml_emitter_write_indent(emitter)) + return 0; + if (!yaml_emitter_write_indicator(emitter, "---", 1, 0, 0)) + return 0; + if (emitter->canonical) { + if (!yaml_emitter_write_indent(emitter)) + return 0; + } + } + + emitter->state = YAML_EMIT_DOCUMENT_CONTENT_STATE; - if (yaml_emitter_emit(emitter, &event)) { return 1; } -error: - yaml_free(anchor_copy); - yaml_free(tag_copy); - yaml_free(value_copy); + else if (event->type == YAML_STREAM_END_EVENT) + { + if (!yaml_emitter_flush(emitter)) + return 0; - return 0; + emitter->state = YAML_EMIT_END_STATE; + + return 1; + } + + return yaml_emitter_set_emitter_error(emitter, + "expected DOCUMENT-START or STREAM-END"); } -/* - * Emit SEQUENCE-START. - */ +static int +yaml_emitter_emit_document_content(yaml_emitter_t *emitter, + yaml_event_t *event) +{ + if (!PUSH(emitter, emitter->states, YAML_EMIT_DOCUMENT_END_STATE)) + return 0; -YAML_DECLARE(int) -yaml_emitter_emit_sequence_start(yaml_emitter_t *emitter, - yaml_char_t *anchor, yaml_char_t *tag, int implicit, - yaml_sequence_style_t style) + return yaml_emitter_emit_node(emitter, event, 1, 0, 0, 0); +} + +static int +yaml_emitter_emit_document_end(yaml_emitter_t *emitter, + yaml_event_t *event) { - yaml_event_t event; - yaml_mark_t mark = { 0, 0, 0 }; - yaml_char_t *anchor_copy = NULL; - yaml_char_t *tag_copy = NULL; - - assert(emitter); /* Non-NULL emitter object is expected. */ - - if (anchor) { - anchor_copy = yaml_strdup(anchor); - if (!anchor_copy) { - emitter->error = YAML_MEMORY_ERROR; - goto error; + if (event->type == YAML_DOCUMENT_END_EVENT) + { + if (!yaml_emitter_write_indent(emitter)) + return 0; + if (!event->data.document_end.implicit) { + if (!yaml_emitter_write_indicator(emitter, "...", 1, 0, 0)) + return 0; + if (!yaml_emitter_write_indent(emitter)) + return 0; } + if (!yaml_emitter_flush(emitter)) + return 0; + + emitter->state = YAML_EMIT_DOCUMENT_START_STATE; + + return 1; } - if (tag) { - tag_copy = yaml_strdup(tag); - if (!tag_copy) { - emitter->error = YAML_MEMORY_ERROR; - goto error; - } + return yaml_emitter_set_emitter_error(emitter, + "expected DOCUMENT-END"); +} + +static int +yaml_emitter_emit_flow_sequence_item(yaml_emitter_t *emitter, + yaml_event_t *event, int first) +{ + if (first) + { + if (!yaml_emitter_write_indicator(emitter, "[", 1, 1, 0)) + return 0; + if (!yaml_emitter_increase_indent(emitter, 1, 0)) + return 0; + emitter->flow_level ++; } - SEQUENCE_START_EVENT_INIT(event, anchor_copy, tag_copy, - implicit, style, mark, mark); + if (event->type == YAML_SEQUENCE_END_EVENT) + { + emitter->flow_level --; + emitter->indent = POP(emitter, emitter->indents); + if (emitter->canonical && !first) { + if (!yaml_emitter_write_indicator(emitter, ",", 0, 0, 0)) + return 0; + if (!yaml_emitter_write_indent(emitter)) + return 0; + } + if (!yaml_emitter_write_indicator(emitter, "]", 0, 0, 0)) + return 0; + emitter->state = POP(emitter, emitter->states); - if (yaml_emitter_emit(emitter, &event)) { return 1; } -error: - yaml_free(anchor_copy); - yaml_free(tag_copy); + if (emitter->canonical || emitter->column > emitter->best_width) { + if (!yaml_emitter_write_indent(emitter)) + return 0; + } + if (PUSH(emitter, emitter->states, YAML_EMIT_FLOW_SEQUENCE_ITEM_STATE)) + return 0; - return 0; + return yaml_emitter_emit_node(emitter, event, 0, 1, 0, 0); } -/* - * Emit SEQUENCE-END. - */ - -YAML_DECLARE(int) -yaml_emitter_emit_sequence_end(yaml_emitter_t *emitter) +static int +yaml_emitter_emit_flow_mapping_key(yaml_emitter_t *emitter, + yaml_event_t *event, int first) { - yaml_event_t event; - yaml_mark_t mark = { 0, 0, 0 }; + if (first) + { + if (!yaml_emitter_write_indicator(emitter, "{", 1, 1, 0)) + return 0; + if (!yaml_emitter_increase_indent(emitter, 1, 0)) + return 0; + emitter->flow_level ++; + } - assert(emitter); /* Non-NULL emitter object is expected. */ + if (event->type == YAML_MAPPING_END_EVENT) + { + emitter->flow_level --; + emitter->indent = POP(emitter, emitter->indents); + if (emitter->canonical && !first) { + if (!yaml_emitter_write_indicator(emitter, ",", 0, 0, 0)) + return 0; + if (!yaml_emitter_write_indent(emitter)) + return 0; + } + if (!yaml_emitter_write_indicator(emitter, "}", 0, 0, 0)) + return 0; + emitter->state = POP(emitter, emitter->states); - SEQUENCE_END_EVENT_INIT(event, mark, mark); + return 1; + } - return yaml_emitter_emit(emitter, &event); -} + if (!first) { + if (!yaml_emitter_write_indicator(emitter, ",", 0, 0, 0)) + return 0; + } + if (emitter->canonical || emitter->column > emitter->best_width) { + if (!yaml_emitter_write_indent(emitter)) + return 0; + } -/* - * Emit MAPPING-START. - */ + if (!emitter->canonical && yaml_emitter_check_simple_key(emitter)) + { + if (!PUSH(emitter, emitter->states, + YAML_EMIT_FLOW_MAPPING_SIMPLE_VALUE_STATE)) + return 0; -YAML_DECLARE(int) -yaml_emitter_emit_mapping_start(yaml_emitter_t *emitter, - yaml_char_t *anchor, yaml_char_t *tag, int implicit, - yaml_mapping_style_t style) + return yaml_emitter_emit_node(emitter, event, 0, 0, 1, 1); + } + else + { + if (!yaml_emitter_write_indicator(emitter, "?", 1, 0, 0)) + return 0; + if (!PUSH(emitter, emitter->states, + YAML_EMIT_FLOW_MAPPING_VALUE_STATE)) + return 0; + + return yaml_emitter_emit_node(emitter, event, 0, 0, 1, 0); + } +} + +static int +yaml_emitter_emit_flow_mapping_value(yaml_emitter_t *emitter, + yaml_event_t *event, int simple) { - yaml_event_t event; - yaml_mark_t mark = { 0, 0, 0 }; - yaml_char_t *anchor_copy = NULL; - yaml_char_t *tag_copy = NULL; - - assert(emitter); /* Non-NULL emitter object is expected. */ - - if (anchor) { - anchor_copy = yaml_strdup(anchor); - if (!anchor_copy) { - emitter->error = YAML_MEMORY_ERROR; - goto error; + if (simple) { + if (!yaml_emitter_write_indicator(emitter, ":", 0, 0, 0)) + return 0; + } + else { + if (emitter->canonical || emitter->column > emitter->best_width) { + if (!yaml_emitter_write_indent(emitter)) + return 0; } + if (!yaml_emitter_write_indicator(emitter, ":", 1, 0, 0)) + return 0; } + if (!PUSH(emitter, emitter->states, YAML_EMIT_FLOW_MAPPING_KEY_STATE)) + return 0; + return yaml_emitter_emit_node(emitter, event, 0, 0, 1, 0); +} - if (tag) { - tag_copy = yaml_strdup(tag); - if (!tag_copy) { - emitter->error = YAML_MEMORY_ERROR; - goto error; - } +static int +yaml_emitter_emit_block_sequence_item(yaml_emitter_t *emitter, + yaml_event_t *event, int first) +{ + if (first) + { + if (!yaml_emitter_increase_indent(emitter, 0, + (emitter->mapping_context && !emitter->indention))) + return 0; } - MAPPING_START_EVENT_INIT(event, anchor_copy, tag_copy, - implicit, style, mark, mark); + if (event->type == YAML_SEQUENCE_END_EVENT) + { + emitter->indent = POP(emitter, emitter->indents); + emitter->state = POP(emitter, emitter->states); - if (yaml_emitter_emit(emitter, &event)) { return 1; } -error: - yaml_free(anchor_copy); - yaml_free(tag_copy); + if (!yaml_emitter_write_indent(emitter)) + return 0; + if (!yaml_emitter_write_indicator(emitter, "-", 1, 0, 1)) + return 0; + if (!PUSH(emitter, emitter->states, + YAML_EMIT_BLOCK_SEQUENCE_ITEM_STATE)) + return 0; - return 0; + return yaml_emitter_emit_node(emitter, event, 0, 1, 0, 0); } -/* - * Emit MAPPING-END. - */ - -YAML_DECLARE(int) -yaml_emitter_emit_mapping_end(yaml_emitter_t *emitter) +static int +yaml_emitter_emit_block_mapping_key(yaml_emitter_t *emitter, + yaml_event_t *event, int first) { - yaml_event_t event; - yaml_mark_t mark = { 0, 0, 0 }; + if (first) + { + if (!yaml_emitter_increase_indent(emitter, 0, 0)) + return 0; + } + + if (event->type == YAML_MAPPING_END_EVENT) + { + emitter->indent = POP(emitter, emitter->indents); + emitter->state = POP(emitter, emitter->states); + + return 1; + } - assert(emitter); /* Non-NULL emitter object is expected. */ + if (!yaml_emitter_write_indent(emitter)) + return 0; - MAPPING_END_EVENT_INIT(event, mark, mark); + if (yaml_emitter_check_simple_key(emitter)) + { + if (!PUSH(emitter, emitter->states, + YAML_EMIT_BLOCK_MAPPING_SIMPLE_VALUE_STATE)) + return 0; - return yaml_emitter_emit(emitter, &event); + return yaml_emitter_emit_node(emitter, event, 0, 0, 1, 1); + } + else + { + if (!yaml_emitter_write_indicator(emitter, "?", 1, 0, 1)) + return 0; + if (!PUSH(emitter, emitter->states, + YAML_EMIT_BLOCK_MAPPING_VALUE_STATE)) + return 0; + + return yaml_emitter_emit_node(emitter, event, 0, 0, 1, 0); + } } -/* - * Emit an event. - */ +static int +yaml_emitter_emit_block_mapping_value(yaml_emitter_t *emitter, + yaml_event_t *event, int simple) +{ + if (simple) { + if (!yaml_emitter_write_indicator(emitter, ":", 0, 0, 0)) + return 0; + } + else { + if (!yaml_emitter_write_indent(emitter)) + return 0; + if (!yaml_emitter_write_indicator(emitter, ":", 1, 0, 1)) + return 0; + } + if (!PUSH(emitter, emitter->states, + YAML_EMIT_BLOCK_MAPPING_KEY_STATE)) + return 0; -YAML_DECLARE(int) -yaml_emitter_emit(yaml_emitter_t *emitter, yaml_event_t *event) + return yaml_emitter_emit_node(emitter, event, 0, 0, 1, 0); +} + +static int +yaml_emitter_emit_node(yaml_emitter_t *emitter, yaml_event_t *event, + int root, int sequence, int mapping, int simple_key) { + emitter->root_context = root; + emitter->sequence_context = sequence; + emitter->mapping_context = mapping; + emitter->simple_key_context = simple_key; + + switch (event->type) + { + case YAML_ALIAS_EVENT: + return yaml_emitter_emit_alias(emitter, event); + + case YAML_SCALAR_EVENT: + return yaml_emitter_emit_scalar(emitter, event); + + case YAML_SEQUENCE_START_EVENT: + return yaml_emitter_emit_sequence_start(emitter, event); + + case YAML_MAPPING_START_EVENT: + return yaml_emitter_emit_mapping_start(emitter, event); + + default: + return yaml_emitter_set_emitter_error(emitter, + "expected SCALAR, SEQUENCE-START, MAPPING-START, or ALIAS"); + } + return 0; } +static int +yaml_emitter_emit_alias(yaml_emitter_t *emitter, yaml_event_t *event) +{ + if (!yaml_emitter_process_anchor(emitter, event->data.alias.anchor, 1)) + return 0; + emitter->state = POP(emitter, emitter->states); + + return 1; +} + +static int +yaml_emitter_emit_scalar(yaml_emitter_t *emitter, yaml_event_t *event) +{ + if (!yaml_emitter_process_anchor(emitter, event->data.scalar.anchor, 0)) + return 0; + if (!yaml_emitter_process_tag(emitter, event->data.scalar.tag)) + return 0; + if (!yaml_emitter_increase_indent(emitter, 1, 0)) + return 0; + if (!yaml_emitter_process_scalar(emitter, + event->data.scalar.value, event->data.scalar.length, + event->data.scalar.plain_implicit, + event->data.scalar.quoted_implicit, + event->data.scalar.style)) + return 0; + emitter->indent = POP(emitter, emitter->indents); + emitter->state = POP(emitter, emitter->states); + + return 1; +} + +static int +yaml_emitter_emit_sequence_start(yaml_emitter_t *emitter, yaml_event_t *event) +{ + if (!yaml_emitter_process_anchor(emitter, + event->data.sequence_start.anchor, 0)) + return 0; + if (!yaml_emitter_process_tag(emitter, + event->data.sequence_start.tag)) + return 0; + + if (emitter->flow_level || emitter->canonical + || event->data.sequence_start.style == YAML_FLOW_SEQUENCE_STYLE + || yaml_emitter_check_empty_sequence(emitter)) { + emitter->state = YAML_EMIT_FLOW_SEQUENCE_FIRST_ITEM_STATE; + } + else { + emitter->state = YAML_EMIT_BLOCK_SEQUENCE_FIRST_ITEM_STATE; + } + + return 1; +} + +static int +yaml_emitter_emit_mapping_start(yaml_emitter_t *emitter, yaml_event_t *event) +{ + if (!yaml_emitter_process_anchor(emitter, + event->data.mapping_start.anchor, 0)) + return 0; + if (!yaml_emitter_process_tag(emitter, + event->data.mapping_start.tag)) + return 0; + + if (emitter->flow_level || emitter->canonical + || event->data.mapping_start.style == YAML_FLOW_MAPPING_STYLE + || yaml_emitter_check_empty_mapping(emitter)) { + emitter->state = YAML_EMIT_FLOW_MAPPING_FIRST_KEY_STATE; + } + else { + emitter->state = YAML_EMIT_BLOCK_MAPPING_FIRST_KEY_STATE; + } + + return 1; +} + diff --git a/src/parser.c b/src/parser.c index ed3c019a..0b5b5437 100644 --- a/src/parser.c +++ b/src/parser.c @@ -172,12 +172,14 @@ yaml_parser_parse(yaml_parser_t *parser, yaml_event_t *event) assert(parser); /* Non-NULL parser object is expected. */ assert(event); /* Non-NULL event object is expected. */ + /* Erase the event object. */ + + memset(event, 0, sizeof(yaml_event_t)); + /* No events after the end of the stream or error. */ if (parser->stream_end_produced || parser->error || parser->state == YAML_PARSE_END_STATE) { - memset(event, 0, sizeof(yaml_event_t)); - return 1; } @@ -1318,6 +1320,10 @@ yaml_parser_process_directives(yaml_parser_t *parser, return 0; } +/* + * Append a tag directive to the directives stack. + */ + static int yaml_parser_append_tag_directive(yaml_parser_t *parser, yaml_tag_directive_t value, int allow_duplicates, yaml_mark_t mark) diff --git a/src/scanner.c b/src/scanner.c index bb811276..42657631 100644 --- a/src/scanner.c +++ b/src/scanner.c @@ -938,11 +938,13 @@ yaml_parser_scan(yaml_parser_t *parser, yaml_token_t *token) assert(parser); /* Non-NULL parser object is expected. */ assert(token); /* Non-NULL token object is expected. */ + /* Erase the token object. */ + + memset(token, 0, sizeof(yaml_token_t)); + /* No tokens after STREAM-END or error. */ if (parser->stream_end_produced || parser->error) { - memset(token, 0, sizeof(yaml_token_t)); - return 1; } diff --git a/src/yaml_private.h b/src/yaml_private.h index faa855bd..efd1d43f 100644 --- a/src/yaml_private.h +++ b/src/yaml_private.h @@ -6,6 +6,7 @@ #include #include +#include /* * Memory management. From b704365fe1f1507da1f4e89efb9c4e50e32acff4 Mon Sep 17 00:00:00 2001 From: Kirill Simonov Date: Fri, 28 Jul 2006 20:09:34 +0000 Subject: [PATCH 24/73] Implement everything except tag and scalar writers. --- include/yaml.h | 42 ++ src/emitter.c | 1044 ++++++++++++++++++++++++++++++++++++++++++-- src/scanner.c | 515 ++++++++-------------- src/writer.c | 3 + src/yaml_private.h | 249 +++++++++++ 5 files changed, 1469 insertions(+), 384 deletions(-) diff --git a/include/yaml.h b/include/yaml.h index 62840687..0408a67f 100644 --- a/include/yaml.h +++ b/include/yaml.h @@ -1203,6 +1203,48 @@ typedef struct { /** If the last character was an indentation character (' ', '-', '?', ':')? */ int indention; + /** Anchor analysis. */ + struct { + /** The anchor value. */ + yaml_char_t *anchor; + /** The anchor length. */ + size_t anchor_length; + /** Is it an alias? */ + int alias; + } anchor_data; + + /** Tag analysis. */ + struct { + /** The tag handle. */ + yaml_char_t *handle; + /** The tag handle length. */ + size_t handle_length; + /** The tag suffix. */ + yaml_char_t *suffix; + /** The tag suffix length. */ + size_t suffix_length; + } tag_data; + + /** Scalar analysis. */ + struct { + /** The scalar value. */ + yaml_char_t *value; + /** The scalar length. */ + size_t length; + /** Does the scalar contain line breaks? */ + int multiline; + /** Can the scalar be expessed in the flow plain style? */ + int flow_plain_allowed; + /** Can the scalar be expressed in the block plain style? */ + int block_plain_allowed; + /** Can the scalar be expressed in the single quoted style? */ + int single_quoted_allowed; + /** Can the scalar be expressed in the literal or folded styles? */ + int block_allowed; + /** The output style. */ + yaml_scalar_style_t style; + } scalar_data; + /** * @} */ diff --git a/src/emitter.c b/src/emitter.c index 41ed3fc6..b8d3dc97 100644 --- a/src/emitter.c +++ b/src/emitter.c @@ -1,6 +1,62 @@ #include "yaml_private.h" +/* + * Flush the buffer if needed. + */ + +#define FLUSH(emitter) \ + ((emitter->buffer.pointer+5 < emitter->buffer.end) \ + || yaml_emitter_flush(emitter)) + +/* + * Put a character to the output buffer. + */ + +#define PUT(emitter,value) \ + (FLUSH(emitter) \ + && (*(emitter->buffer.pointer++) = (yaml_char_t)(value), \ + emitter->column ++, \ + 1)) + +/* + * Put a line break to the output buffer. + */ + +#define PUT_BREAK(emitter) \ + (FLUSH(emitter) \ + && ((emitter->line_break == YAML_CR_BREAK ? \ + (*(emitter->buffer.pointer++) = (yaml_char_t) '\r') : \ + emitter->line_break == YAML_LN_BREAK ? \ + (*(emitter->buffer.pointer++) = (yaml_char_t) '\n') : \ + emitter->line_break == YAML_CRLN_BREAK ? \ + (*(emitter->buffer.pointer++) = (yaml_char_t) '\r', \ + *(emitter->buffer.pointer++) = (yaml_char_t) '\n') : 0), \ + emitter->column = 0, \ + emitter->line ++, \ + 1)) + +/* + * Copy a character from a string into buffer. + */ + +#define WRITE(emitter,string) \ + (FLUSH(emitter) \ + && (COPY(emitter->buffer,string), \ + emitter->column ++, \ + 1)) + +/* + * Copy a line break character from a string into buffer. + */ + +#define WRITE_BREAK(emitter,string) \ + (FLUSH(emitter) \ + && (COPY(emitter->buffer,string), \ + emitter->column = 0, \ + emitter->line ++, \ + 1)) + /* * API functions. */ @@ -105,39 +161,57 @@ yaml_emitter_check_empty_mapping(yaml_emitter_t *emitter); static int yaml_emitter_check_simple_key(yaml_emitter_t *emitter); +static int +yaml_emitter_select_scalar_style(yaml_emitter_t *emitter, yaml_event_t *event); + /* * Processors. */ static int -yaml_emitter_process_anchor(yaml_emitter_t *emitter, - yaml_char_t *anchor, int alias); +yaml_emitter_process_anchor(yaml_emitter_t *emitter); static int -yaml_emitter_process_tag(yaml_emitter_t *emitter, - yaml_char_t *tag); +yaml_emitter_process_tag(yaml_emitter_t *emitter); static int -yaml_emitter_process_scalar(yaml_emitter_t *emitter, - yaml_char_t *value, size_t length, - int plain_implicit, int quoted_implicit, - yaml_scalar_style_t style); +yaml_emitter_process_scalar(yaml_emitter_t *emitter); /* - * Writers. + * Analyzers. */ static int -yaml_emitter_write_bom(yaml_emitter_t *emitter); - -static int -yaml_emitter_write_version_directive(yaml_emitter_t *emitter, +yaml_emitter_analyze_version_directive(yaml_emitter_t *emitter, yaml_version_directive_t version_directive); static int -yaml_emitter_write_tag_directive(yaml_emitter_t *emitter, +yaml_emitter_analyze_tag_directive(yaml_emitter_t *emitter, yaml_tag_directive_t tag_directive); +static int +yaml_emitter_analyze_anchor(yaml_emitter_t *emitter, + yaml_char_t *anchor, int alias); + +static int +yaml_emitter_analyze_tag(yaml_emitter_t *emitter, + yaml_char_t *tag); + +static int +yaml_emitter_analyze_scalar(yaml_emitter_t *emitter, + yaml_char_t *value, size_t length); + +static int +yaml_emitter_analyze_event(yaml_emitter_t *emitter, + yaml_event_t *event); + +/* + * Writers. + */ + +static int +yaml_emitter_write_bom(yaml_emitter_t *emitter); + static int yaml_emitter_write_indent(yaml_emitter_t *emitter); @@ -146,6 +220,38 @@ yaml_emitter_write_indicator(yaml_emitter_t *emitter, char *indicator, int need_whitespace, int is_whitespace, int is_indention); +static int +yaml_emitter_write_anchor(yaml_emitter_t *emitter, + yaml_char_t *value, size_t length); + +static int +yaml_emitter_write_tag_handle(yaml_emitter_t *emitter, + yaml_char_t *value, size_t length); + +static int +yaml_emitter_write_tag_content(yaml_emitter_t *emitter, + yaml_char_t *value, size_t length); + +static int +yaml_emitter_write_plain_scalar(yaml_emitter_t *emitter, + yaml_char_t *value, size_t length, int allow_breaks); + +static int +yaml_emitter_write_single_quoted_scalar(yaml_emitter_t *emitter, + yaml_char_t *value, size_t length, int allow_breaks); + +static int +yaml_emitter_write_double_quoted_scalar(yaml_emitter_t *emitter, + yaml_char_t *value, size_t length, int allow_breaks); + +static int +yaml_emitter_write_literal_scalar(yaml_emitter_t *emitter, + yaml_char_t *value, size_t length); + +static int +yaml_emitter_write_folded_scalar(yaml_emitter_t *emitter, + yaml_char_t *value, size_t length); + /* * Set an emitter error and return 0. */ @@ -172,9 +278,10 @@ yaml_emitter_emit(yaml_emitter_t *emitter, yaml_event_t *event) } while (!yaml_emitter_need_more_events(emitter)) { - if (!yaml_emitter_state_machine(emitter, emitter->events.head)) { + if (!yaml_emitter_analyze_event(emitter, emitter->events.head)) + return 0; + if (!yaml_emitter_state_machine(emitter, emitter->events.head)) return 0; - } DEQUEUE(emitter, emitter->events); } @@ -428,6 +535,10 @@ yaml_emitter_emit_stream_start(yaml_emitter_t *emitter, "expected STREAM-START"); } +/* + * Expect DOCUMENT-START or STREAM-END. + */ + static int yaml_emitter_emit_document_start(yaml_emitter_t *emitter, yaml_event_t *event, int first) @@ -443,16 +554,16 @@ yaml_emitter_emit_document_start(yaml_emitter_t *emitter, int implicit; if (event->data.document_start.version_directive) { - if (event->data.document_start.version_directive->major != 1 - || event->data.document_start.version_directive-> minor != 1) { - return yaml_emitter_set_emitter_error(emitter, - "incompatible %YAML directive"); - } + if (!yaml_emitter_analyze_version_directive(emitter, + *event->data.document_start.version_directive)) + return 0; } for (tag_directive = event->data.document_start.tag_directives.start; tag_directive != event->data.document_start.tag_directives.end; tag_directive ++) { + if (!yaml_emitter_analyze_tag_directive(emitter, *tag_directive)) + return 0; if (!yaml_emitter_append_tag_directive(emitter, *tag_directive, 0)) return 0; } @@ -470,8 +581,11 @@ yaml_emitter_emit_document_start(yaml_emitter_t *emitter, if (event->data.document_start.version_directive) { implicit = 0; - if (!yaml_emitter_write_version_directive(emitter, - *event->data.document_start.version_directive)) + if (!yaml_emitter_write_indicator(emitter, "%YAML", 1, 0, 0)) + return 0; + if (!yaml_emitter_write_indicator(emitter, "1.1", 1, 0, 0)) + return 0; + if (!yaml_emitter_write_indent(emitter)) return 0; } @@ -481,7 +595,15 @@ yaml_emitter_emit_document_start(yaml_emitter_t *emitter, for (tag_directive = event->data.document_start.tag_directives.start; tag_directive != event->data.document_start.tag_directives.end; tag_directive ++) { - if (!yaml_emitter_write_tag_directive(emitter, *tag_directive)) + if (!yaml_emitter_write_indicator(emitter, "%TAG", 1, 0, 0)) + return 0; + if (!yaml_emitter_write_tag_handle(emitter, tag_directive->handle, + strlen((char *)tag_directive->handle))) + return 0; + if (!yaml_emitter_write_tag_content(emitter, tag_directive->prefix, + strlen((char *)tag_directive->prefix))) + return 0; + if (!yaml_emitter_write_indent(emitter)) return 0; } } @@ -520,6 +642,10 @@ yaml_emitter_emit_document_start(yaml_emitter_t *emitter, "expected DOCUMENT-START or STREAM-END"); } +/* + * Expect the root node. + */ + static int yaml_emitter_emit_document_content(yaml_emitter_t *emitter, yaml_event_t *event) @@ -530,6 +656,10 @@ yaml_emitter_emit_document_content(yaml_emitter_t *emitter, return yaml_emitter_emit_node(emitter, event, 1, 0, 0, 0); } +/* + * Expect DOCUMENT-END. + */ + static int yaml_emitter_emit_document_end(yaml_emitter_t *emitter, yaml_event_t *event) @@ -556,6 +686,10 @@ yaml_emitter_emit_document_end(yaml_emitter_t *emitter, "expected DOCUMENT-END"); } +/* + * Expect a flow item node. + */ + static int yaml_emitter_emit_flow_sequence_item(yaml_emitter_t *emitter, yaml_event_t *event, int first) @@ -596,6 +730,10 @@ yaml_emitter_emit_flow_sequence_item(yaml_emitter_t *emitter, return yaml_emitter_emit_node(emitter, event, 0, 1, 0, 0); } +/* + * Expect a flow key node. + */ + static int yaml_emitter_emit_flow_mapping_key(yaml_emitter_t *emitter, yaml_event_t *event, int first) @@ -655,6 +793,10 @@ yaml_emitter_emit_flow_mapping_key(yaml_emitter_t *emitter, } } +/* + * Expect a flow value node. + */ + static int yaml_emitter_emit_flow_mapping_value(yaml_emitter_t *emitter, yaml_event_t *event, int simple) @@ -676,6 +818,10 @@ yaml_emitter_emit_flow_mapping_value(yaml_emitter_t *emitter, return yaml_emitter_emit_node(emitter, event, 0, 0, 1, 0); } +/* + * Expect a block item node. + */ + static int yaml_emitter_emit_block_sequence_item(yaml_emitter_t *emitter, yaml_event_t *event, int first) @@ -706,6 +852,10 @@ yaml_emitter_emit_block_sequence_item(yaml_emitter_t *emitter, return yaml_emitter_emit_node(emitter, event, 0, 1, 0, 0); } +/* + * Expect a block key node. + */ + static int yaml_emitter_emit_block_mapping_key(yaml_emitter_t *emitter, yaml_event_t *event, int first) @@ -747,6 +897,10 @@ yaml_emitter_emit_block_mapping_key(yaml_emitter_t *emitter, } } +/* + * Expect a block value node. + */ + static int yaml_emitter_emit_block_mapping_value(yaml_emitter_t *emitter, yaml_event_t *event, int simple) @@ -768,6 +922,10 @@ yaml_emitter_emit_block_mapping_value(yaml_emitter_t *emitter, return yaml_emitter_emit_node(emitter, event, 0, 0, 1, 0); } +/* + * Expect a node. + */ + static int yaml_emitter_emit_node(yaml_emitter_t *emitter, yaml_event_t *event, int root, int sequence, int mapping, int simple_key) @@ -799,30 +957,36 @@ yaml_emitter_emit_node(yaml_emitter_t *emitter, yaml_event_t *event, return 0; } +/* + * Expect ALIAS. + */ + static int yaml_emitter_emit_alias(yaml_emitter_t *emitter, yaml_event_t *event) { - if (!yaml_emitter_process_anchor(emitter, event->data.alias.anchor, 1)) + if (!yaml_emitter_process_anchor(emitter)) return 0; emitter->state = POP(emitter, emitter->states); return 1; } +/* + * Expect SCALAR. + */ + static int yaml_emitter_emit_scalar(yaml_emitter_t *emitter, yaml_event_t *event) { - if (!yaml_emitter_process_anchor(emitter, event->data.scalar.anchor, 0)) + if (!yaml_emitter_select_scalar_style(emitter, event)) return 0; - if (!yaml_emitter_process_tag(emitter, event->data.scalar.tag)) + if (!yaml_emitter_process_anchor(emitter)) + return 0; + if (!yaml_emitter_process_tag(emitter)) return 0; if (!yaml_emitter_increase_indent(emitter, 1, 0)) return 0; - if (!yaml_emitter_process_scalar(emitter, - event->data.scalar.value, event->data.scalar.length, - event->data.scalar.plain_implicit, - event->data.scalar.quoted_implicit, - event->data.scalar.style)) + if (!yaml_emitter_process_scalar(emitter)) return 0; emitter->indent = POP(emitter, emitter->indents); emitter->state = POP(emitter, emitter->states); @@ -830,14 +994,16 @@ yaml_emitter_emit_scalar(yaml_emitter_t *emitter, yaml_event_t *event) return 1; } +/* + * Expect SEQUENCE-START. + */ + static int yaml_emitter_emit_sequence_start(yaml_emitter_t *emitter, yaml_event_t *event) { - if (!yaml_emitter_process_anchor(emitter, - event->data.sequence_start.anchor, 0)) + if (!yaml_emitter_process_anchor(emitter)) return 0; - if (!yaml_emitter_process_tag(emitter, - event->data.sequence_start.tag)) + if (!yaml_emitter_process_tag(emitter)) return 0; if (emitter->flow_level || emitter->canonical @@ -852,14 +1018,16 @@ yaml_emitter_emit_sequence_start(yaml_emitter_t *emitter, yaml_event_t *event) return 1; } +/* + * Expect MAPPING-START. + */ + static int yaml_emitter_emit_mapping_start(yaml_emitter_t *emitter, yaml_event_t *event) { - if (!yaml_emitter_process_anchor(emitter, - event->data.mapping_start.anchor, 0)) + if (!yaml_emitter_process_anchor(emitter)) return 0; - if (!yaml_emitter_process_tag(emitter, - event->data.mapping_start.tag)) + if (!yaml_emitter_process_tag(emitter)) return 0; if (emitter->flow_level || emitter->canonical @@ -874,3 +1042,799 @@ yaml_emitter_emit_mapping_start(yaml_emitter_t *emitter, yaml_event_t *event) return 1; } +/* + * Check if the document content is an empty scalar. + */ + +static int +yaml_emitter_check_empty_document(yaml_emitter_t *emitter) +{ + return 0; +} + +/* + * Check if the next events represent an empty sequence. + */ + +static int +yaml_emitter_check_empty_sequence(yaml_emitter_t *emitter) +{ + if (emitter->events.tail - emitter->events.head < 2) + return 0; + + return (emitter->events.head[0].type == YAML_SEQUENCE_START_EVENT + && emitter->events.head[1].type == YAML_SEQUENCE_END_EVENT); +} + +/* + * Check if the next events represent an empty mapping. + */ + +static int +yaml_emitter_check_empty_mapping(yaml_emitter_t *emitter) +{ + if (emitter->events.tail - emitter->events.head < 2) + return 0; + + return (emitter->events.head[0].type == YAML_MAPPING_START_EVENT + && emitter->events.head[1].type == YAML_MAPPING_END_EVENT); +} + +/* + * Check if the next node can be expressed as a simple key. + */ + +static int +yaml_emitter_check_simple_key(yaml_emitter_t *emitter) +{ + yaml_event_t *event = emitter->events.head; + size_t length = 0; + + switch (event->type) + { + case YAML_ALIAS_EVENT: + length += emitter->anchor_data.anchor_length; + break; + + case YAML_SCALAR_EVENT: + if (emitter->scalar_data.multiline) + return 0; + length += emitter->anchor_data.anchor_length + + emitter->tag_data.handle_length + + emitter->tag_data.suffix_length + + emitter->scalar_data.length; + break; + + case YAML_SEQUENCE_START_EVENT: + if (!yaml_emitter_check_empty_sequence(emitter)) + return 0; + length += emitter->anchor_data.anchor_length + + emitter->tag_data.handle_length + + emitter->tag_data.suffix_length; + break; + + case YAML_MAPPING_START_EVENT: + if (!yaml_emitter_check_empty_sequence(emitter)) + return 0; + length += emitter->anchor_data.anchor_length + + emitter->tag_data.handle_length + + emitter->tag_data.suffix_length; + break; + + default: + return 0; + } + + if (length > 128) + return 0; + + return 1; +} + +/* + * Determine an acceptable scalar style. + */ + +static int +yaml_emitter_select_scalar_style(yaml_emitter_t *emitter, yaml_event_t *event) +{ + yaml_scalar_style_t style = event->data.scalar.style; + + if (style == YAML_ANY_SCALAR_STYLE) + style = YAML_PLAIN_SCALAR_STYLE; + + if (emitter->canonical) + style = YAML_DOUBLE_QUOTED_SCALAR_STYLE; + + if (emitter->simple_key_context && emitter->scalar_data.multiline) + style = YAML_DOUBLE_QUOTED_SCALAR_STYLE; + + if (style == YAML_PLAIN_SCALAR_STYLE) + { + if ((emitter->flow_level && !emitter->scalar_data.flow_plain_allowed) + || (!emitter->flow_level && !emitter->scalar_data.block_plain_allowed)) + style = YAML_SINGLE_QUOTED_SCALAR_STYLE; + if (!emitter->scalar_data.length + && (emitter->flow_level || emitter->simple_key_context)) + style = YAML_SINGLE_QUOTED_SCALAR_STYLE; + if (!event->data.scalar.plain_implicit + && !emitter->tag_data.handle && !emitter->tag_data.suffix) + style = YAML_SINGLE_QUOTED_SCALAR_STYLE; + } + + if (style == YAML_SINGLE_QUOTED_SCALAR_STYLE) + { + if (!emitter->scalar_data.single_quoted_allowed) + style = YAML_DOUBLE_QUOTED_SCALAR_STYLE; + } + + if (style == YAML_LITERAL_SCALAR_STYLE || style == YAML_FOLDED_SCALAR_STYLE) + { + if (!emitter->scalar_data.block_allowed) + style = YAML_DOUBLE_QUOTED_SCALAR_STYLE; + } + + if (!emitter->tag_data.handle && !emitter->tag_data.suffix) + { + if (!event->data.scalar.plain_implicit + && !event->data.scalar.quoted_implicit) { + return yaml_emitter_set_emitter_error(emitter, + "neither tag nor implicit flags are specified"); + } + + if (event->data.scalar.plain_implicit + && style != YAML_PLAIN_SCALAR_STYLE) { + emitter->tag_data.handle = (yaml_char_t *)"!"; + emitter->tag_data.handle_length = 1; + } + } + + emitter->scalar_data.style = style; + + return 1; +} + +/* + * Write an achor. + */ + +static int +yaml_emitter_process_anchor(yaml_emitter_t *emitter) +{ + if (!emitter->anchor_data.anchor) + return 1; + + if (!yaml_emitter_write_indicator(emitter, + (emitter->anchor_data.alias ? "*" : "&"), 1, 0, 0)) + return 0; + + return yaml_emitter_write_anchor(emitter, + emitter->anchor_data.anchor, emitter->anchor_data.anchor_length); +} + +/* + * Write a tag. + */ + +static int +yaml_emitter_process_tag(yaml_emitter_t *emitter) +{ + if (!emitter->tag_data.handle && !emitter->tag_data.suffix) + return 1; + + if (emitter->tag_data.handle) + { + if (!yaml_emitter_write_tag_handle(emitter, emitter->tag_data.handle, + emitter->tag_data.handle_length)) + return 0; + if (emitter->tag_data.suffix) { + if (!yaml_emitter_write_tag_content(emitter, emitter->tag_data.suffix, + emitter->tag_data.suffix_length)) + return 0; + } + } + else + { + if (!yaml_emitter_write_indicator(emitter, "!<", 1, 0, 0)) + return 0; + if (!yaml_emitter_write_tag_content(emitter, emitter->tag_data.suffix, + emitter->tag_data.suffix_length)) + return 0; + if (!yaml_emitter_write_indicator(emitter, ">", 0, 0, 0)) + return 0; + } + + return 1; +} + +/* + * Write a scalar. + */ + +static int +yaml_emitter_process_scalar(yaml_emitter_t *emitter) +{ + switch (emitter->scalar_data.style) + { + case YAML_PLAIN_SCALAR_STYLE: + return yaml_emitter_write_plain_scalar(emitter, + emitter->scalar_data.value, emitter->scalar_data.length, + !emitter->simple_key_context); + + case YAML_SINGLE_QUOTED_SCALAR_STYLE: + return yaml_emitter_write_single_quoted_scalar(emitter, + emitter->scalar_data.value, emitter->scalar_data.length, + !emitter->simple_key_context); + + case YAML_DOUBLE_QUOTED_SCALAR_STYLE: + return yaml_emitter_write_double_quoted_scalar(emitter, + emitter->scalar_data.value, emitter->scalar_data.length, + !emitter->simple_key_context); + + case YAML_LITERAL_SCALAR_STYLE: + return yaml_emitter_write_literal_scalar(emitter, + emitter->scalar_data.value, emitter->scalar_data.length); + + case YAML_FOLDED_SCALAR_STYLE: + return yaml_emitter_write_folded_scalar(emitter, + emitter->scalar_data.value, emitter->scalar_data.length); + + default: + assert(1); /* Impossible. */ + } + + return 0; +} + +/* + * Check if a %YAML directive is valid. + */ + +static int +yaml_emitter_analyze_version_directive(yaml_emitter_t *emitter, + yaml_version_directive_t version_directive) +{ + if (version_directive.major != 1 || version_directive.minor != 1) { + return yaml_emitter_set_emitter_error(emitter, + "incompatible %YAML directive"); + } + + return 1; +} + +/* + * Check if a %TAG directive is valid. + */ + +static int +yaml_emitter_analyze_tag_directive(yaml_emitter_t *emitter, + yaml_tag_directive_t tag_directive) +{ + yaml_string_t handle = STRING(tag_directive.handle, + strlen((char *)tag_directive.handle)); + yaml_string_t prefix = STRING(tag_directive.prefix, + strlen((char *)tag_directive.prefix)); + + if (handle.start == handle.end) { + return yaml_emitter_set_emitter_error(emitter, + "tag handle must not be empty"); + } + + if (handle.start[0] != '!') { + return yaml_emitter_set_emitter_error(emitter, + "tag handle must start with '!'"); + } + + if (handle.end[-1] != '!') { + return yaml_emitter_set_emitter_error(emitter, + "tag handle must end with '!'"); + } + + handle.pointer ++; + + while (handle.pointer != handle.end-1) { + if (!IS_ALPHA(handle)) { + return yaml_emitter_set_emitter_error(emitter, + "tag handle must contain alphanumerical characters only"); + } + MOVE(handle); + } + + if (prefix.start == prefix.end) { + return yaml_emitter_set_emitter_error(emitter, + "tag prefix must not be empty"); + } + + return 1; +} + +/* + * Check if an anchor is valid. + */ + +static int +yaml_emitter_analyze_anchor(yaml_emitter_t *emitter, + yaml_char_t *anchor, int alias) +{ + yaml_string_t string = STRING(anchor, strlen((char *)anchor)); + + if (string.start == string.end) { + return yaml_emitter_set_emitter_error(emitter, alias ? + "alias value must not be empty" : + "anchor value must not be empty"); + } + + while (string.pointer != string.end) { + if (!IS_ALPHA(string)) { + return yaml_emitter_set_emitter_error(emitter, alias ? + "alias value must contain alphanumerical characters only" : + "anchor value must contain alphanumerical characters only"); + } + MOVE(string); + } +} + +/* + * Check if a tag is valid. + */ + +static int +yaml_emitter_analyze_tag(yaml_emitter_t *emitter, + yaml_char_t *tag) +{ + yaml_string_t string = STRING(tag, strlen((char *)tag)); + yaml_tag_directive_t *tag_directive; + + if (string.start == string.end) { + return yaml_emitter_set_emitter_error(emitter, + "tag value must not be empty"); + } + + for (tag_directive = emitter->tag_directives.start; + tag_directive != emitter->tag_directives.end; tag_directive ++) { + size_t prefix_length = strlen((char *)tag_directive->prefix); + if (prefix_length < (string.end - string.start) + && strncmp((char *)tag_directive->prefix, (char *)string.start, + prefix_length) == 0) + { + emitter->tag_data.handle = tag_directive->handle; + emitter->tag_data.handle_length = + strlen((char *)tag_directive->handle); + emitter->tag_data.suffix = string.start + prefix_length; + emitter->tag_data.suffix_length = + (string.end - string.start) - prefix_length; + return 1; + } + } + + emitter->tag_data.suffix = string.start; + emitter->tag_data.suffix_length = string.end - string.start; + + return 1; +} + +/* + * Check if a scalar is valid. + */ + +static int +yaml_emitter_analyze_scalar(yaml_emitter_t *emitter, + yaml_char_t *value, size_t length) +{ + yaml_string_t string = STRING(value, length); + + int block_indicators = 0; + int flow_indicators = 0; + int line_breaks = 0; + int special_characters = 0; + + int inline_spaces = 0; + int inline_breaks = 0; + int leading_spaces = 0; + int leading_breaks = 0; + int trailing_spaces = 0; + int trailing_breaks = 0; + int inline_breaks_spaces = 0; + int mixed_breaks_spaces = 0; + + int preceeded_by_space = 0; + int followed_by_space = 0; + int spaces = 0; + int breaks = 0; + int mixed = 0; + int leading = 0; + + emitter->scalar_data.value = value; + emitter->scalar_data.length = length; + + if (string.start == string.end) + { + emitter->scalar_data.multiline = 0; + emitter->scalar_data.flow_plain_allowed = 0; + emitter->scalar_data.block_plain_allowed = 1; + emitter->scalar_data.single_quoted_allowed = 1; + emitter->scalar_data.block_allowed = 0; + + return 1; + } + + if ((CHECK_AT(string, '-', 0) + && CHECK_AT(string, '-', 1) + && CHECK_AT(string, '-', 2)) + || (CHECK_AT(string, '.', 0) + && CHECK_AT(string, '.', 1) + && CHECK_AT(string, '.', 2))) { + block_indicators = 1; + flow_indicators = 1; + } + + preceeded_by_space = 1; + followed_by_space = IS_BLANKZ_AT(string, WIDTH(string)); + + while (string.pointer != string.end) + { + if (string.start == string.pointer) + { + if (CHECK(string, '#') || CHECK(string, ',') + || CHECK(string, '[') || CHECK(string, ']') + || CHECK(string, '{') || CHECK(string, '}') + || CHECK(string, '&') || CHECK(string, '*') + || CHECK(string, '!') || CHECK(string, '|') + || CHECK(string, '>') || CHECK(string, '\'') + || CHECK(string, '"') || CHECK(string, '%') + || CHECK(string, '@') || CHECK(string, '`')) { + flow_indicators = 1; + block_indicators = 1; + } + + if (CHECK(string, '?') || CHECK(string, ':')) { + flow_indicators = 1; + if (followed_by_space) { + block_indicators = 1; + } + } + + if (CHECK(string, '-') && followed_by_space) { + flow_indicators = 1; + block_indicators = 1; + } + } + else + { + if (CHECK(string, ',') || CHECK(string, '?') + || CHECK(string, '[') || CHECK(string, ']') + || CHECK(string, '{') || CHECK(string, '}')) { + flow_indicators = 1; + } + + if (CHECK(string, ':')) { + flow_indicators = 1; + if (followed_by_space) { + block_indicators = 1; + } + } + + if (CHECK(string, '#') && preceeded_by_space) { + flow_indicators = 1; + block_indicators = 1; + } + } + + if (!IS_PRINTABLE(string) + || (!IS_ASCII(string) && !emitter->unicode)) { + special_characters = 1; + } + + if (IS_BREAK(string)) { + line_breaks = 1; + } + + if (IS_SPACE(string)) + { + spaces = 1; + if (string.start == string.pointer) { + leading = 1; + } + } + + else if (IS_BREAK(string)) + { + if (spaces) { + mixed = 1; + } + breaks = 1; + if (string.start == string.pointer) { + leading = 1; + } + } + + else if (spaces || breaks) + { + if (leading) { + if (spaces && breaks) { + mixed_breaks_spaces = 1; + } + else if (spaces) { + leading_spaces = 1; + } + else if (breaks) { + leading_breaks = 1; + } + } + else { + if (mixed) { + mixed_breaks_spaces = 1; + } + else if (spaces && breaks) { + inline_breaks_spaces = 1; + } + else if (spaces) { + inline_spaces = 1; + } + else if (breaks) { + inline_breaks = 1; + } + } + spaces = breaks = mixed = leading = 0; + } + + preceeded_by_space = IS_BLANKZ(string); + MOVE(string); + if (string.pointer != string.end) { + followed_by_space = IS_BLANKZ_AT(string, WIDTH(string)); + } + } + + emitter->scalar_data.multiline = line_breaks; + + emitter->scalar_data.flow_plain_allowed = 1; + emitter->scalar_data.block_plain_allowed = 1; + emitter->scalar_data.single_quoted_allowed = 1; + emitter->scalar_data.block_allowed = 1; + + if (leading_spaces || leading_breaks || trailing_spaces) { + emitter->scalar_data.flow_plain_allowed = 0; + emitter->scalar_data.block_plain_allowed = 0; + emitter->scalar_data.block_allowed = 0; + } + + if (trailing_breaks) { + emitter->scalar_data.flow_plain_allowed = 0; + emitter->scalar_data.block_plain_allowed = 0; + } + + if (inline_breaks_spaces) { + emitter->scalar_data.flow_plain_allowed = 0; + emitter->scalar_data.block_plain_allowed = 0; + emitter->scalar_data.single_quoted_allowed = 0; + } + + if (mixed_breaks_spaces || special_characters) { + emitter->scalar_data.flow_plain_allowed = 0; + emitter->scalar_data.block_plain_allowed = 0; + emitter->scalar_data.single_quoted_allowed = 0; + emitter->scalar_data.block_allowed = 0; + } + + if (line_breaks) { + emitter->scalar_data.flow_plain_allowed = 0; + emitter->scalar_data.block_plain_allowed = 0; + } + + if (flow_indicators) { + emitter->scalar_data.flow_plain_allowed = 0; + } + + if (block_indicators) { + emitter->scalar_data.block_plain_allowed = 0; + } + + return 1; +} + +/* + * Check if the event data is valid. + */ + +static int +yaml_emitter_analyze_event(yaml_emitter_t *emitter, + yaml_event_t *event) +{ + emitter->anchor_data.anchor = NULL; + emitter->anchor_data.anchor_length = 0; + emitter->tag_data.handle = NULL; + emitter->tag_data.handle_length = 0; + emitter->tag_data.suffix = NULL; + emitter->tag_data.suffix_length = 0; + emitter->scalar_data.value = NULL; + emitter->scalar_data.length = 0; + + switch (event->type) + { + case YAML_ALIAS_EVENT: + if (!yaml_emitter_analyze_anchor(emitter, + event->data.alias.anchor, 1)) + return 0; + return 1; + + case YAML_SCALAR_EVENT: + if (event->data.scalar.anchor) { + if (!yaml_emitter_analyze_anchor(emitter, + event->data.scalar.anchor, 0)) + return 0; + } + if (event->data.scalar.tag && (emitter->canonical || + (!event->data.scalar.plain_implicit + && !event->data.scalar.quoted_implicit))) { + if (!yaml_emitter_analyze_tag(emitter, event->data.scalar.tag)) + return 0; + } + if (!yaml_emitter_analyze_scalar(emitter, + event->data.scalar.value, event->data.scalar.length)) + return 0; + return 1; + + case YAML_SEQUENCE_START_EVENT: + if (event->data.sequence_start.anchor) { + if (!yaml_emitter_analyze_anchor(emitter, + event->data.sequence_start.anchor, 0)) + return 0; + } + if (event->data.sequence_start.tag && (emitter->canonical || + !event->data.sequence_start.implicit)) { + if (!yaml_emitter_analyze_tag(emitter, + event->data.sequence_start.tag)) + return 0; + } + return 1; + + case YAML_MAPPING_START_EVENT: + if (event->data.mapping_start.anchor) { + if (!yaml_emitter_analyze_anchor(emitter, + event->data.mapping_start.anchor, 0)) + return 0; + } + if (event->data.mapping_start.tag && (emitter->canonical || + !event->data.mapping_start.implicit)) { + if (!yaml_emitter_analyze_tag(emitter, + event->data.mapping_start.tag)) + return 0; + } + return 1; + + default: + return 1; + } +} + +/* + * Write the BOM character. + */ + +static int +yaml_emitter_write_bom(yaml_emitter_t *emitter) +{ + if (!FLUSH(emitter)) return 0; + + *(emitter->buffer.pointer++) = (yaml_char_t) '\xEF'; + *(emitter->buffer.pointer++) = (yaml_char_t) '\xBB'; + *(emitter->buffer.pointer++) = (yaml_char_t) '\xBF'; + + return 1; +} + +static int +yaml_emitter_write_indent(yaml_emitter_t *emitter) +{ + int indent = (emitter->indent >= 0) ? emitter->indent : 0; + + if (!emitter->indention || emitter->column > indent + || (emitter->column == indent && !emitter->whitespace)) { + if (!PUT_BREAK(emitter)) return 0; + } + + while (emitter->column < indent) { + if (!PUT(emitter, ' ')) return 0; + } + + emitter->whitespace = 1; + emitter->indention = 1; + + return 1; +} + +static int +yaml_emitter_write_indicator(yaml_emitter_t *emitter, + char *indicator, int need_whitespace, + int is_whitespace, int is_indention) +{ + yaml_string_t string = STRING((yaml_char_t *)indicator, strlen(indicator)); + + if (need_whitespace && !emitter->whitespace) { + if (!PUT(emitter, ' ')) return 0; + } + + while (string.pointer != string.end) { + if (!WRITE(emitter, string)) return 0; + } + + emitter->whitespace = is_whitespace; + emitter->indention = (emitter->indention && is_indention); + + return 1; +} + +static int +yaml_emitter_write_anchor(yaml_emitter_t *emitter, + yaml_char_t *value, size_t length) +{ + yaml_string_t string = STRING(value, length); + + while (string.pointer != string.end) { + if (!WRITE(emitter, string)) return 0; + } + + emitter->whitespace = 0; + emitter->indention = 0; + + return 1; +} + +static int +yaml_emitter_write_tag_handle(yaml_emitter_t *emitter, + yaml_char_t *value, size_t length) +{ + yaml_string_t string = STRING(value, length); + + while (string.pointer != string.end) { + if (!WRITE(emitter, string)) return 0; + } + + emitter->whitespace = 0; + emitter->indention = 0; + + return 1; +} + +static int +yaml_emitter_write_tag_content(yaml_emitter_t *emitter, + yaml_char_t *value, size_t length) +{ + return 0; +} + +static int +yaml_emitter_write_plain_scalar(yaml_emitter_t *emitter, + yaml_char_t *value, size_t length, int allow_breaks) +{ + return 0; +} + +static int +yaml_emitter_write_single_quoted_scalar(yaml_emitter_t *emitter, + yaml_char_t *value, size_t length, int allow_breaks) +{ + return 0; +} + +static int +yaml_emitter_write_double_quoted_scalar(yaml_emitter_t *emitter, + yaml_char_t *value, size_t length, int allow_breaks) +{ + return 0; +} + +static int +yaml_emitter_write_literal_scalar(yaml_emitter_t *emitter, + yaml_char_t *value, size_t length) +{ + return 0; +} + +static int +yaml_emitter_write_folded_scalar(yaml_emitter_t *emitter, + yaml_char_t *value, size_t length) +{ + return 0; +} + diff --git a/src/scanner.c b/src/scanner.c index 42657631..1414401d 100644 --- a/src/scanner.c +++ b/src/scanner.c @@ -487,190 +487,6 @@ ? 1 \ : yaml_parser_update_buffer(parser, (length))) -/* - * Check the octet at the specified position. - */ - -#define CHECK_AT(parser,octet,offset) \ - (parser->buffer.pointer[offset] == (yaml_char_t)(octet)) - -/* - * Check the current octet in the buffer. - */ - -#define CHECK(parser,octet) CHECK_AT(parser,(octet),0) - -/* - * Check if the character at the specified position is an alphabetical - * character, a digit, '_', or '-'. - */ - -#define IS_ALPHA_AT(parser,offset) \ - ((parser->buffer.pointer[offset] >= (yaml_char_t) '0' && \ - parser->buffer.pointer[offset] <= (yaml_char_t) '9') || \ - (parser->buffer.pointer[offset] >= (yaml_char_t) 'A' && \ - parser->buffer.pointer[offset] <= (yaml_char_t) 'Z') || \ - (parser->buffer.pointer[offset] >= (yaml_char_t) 'a' && \ - parser->buffer.pointer[offset] <= (yaml_char_t) 'z') || \ - parser->buffer.pointer[offset] == '_' || \ - parser->buffer.pointer[offset] == '-') - -#define IS_ALPHA(parser) IS_ALPHA_AT(parser,0) - -/* - * Check if the character at the specified position is a digit. - */ - -#define IS_DIGIT_AT(parser,offset) \ - ((parser->buffer.pointer[offset] >= (yaml_char_t) '0' && \ - parser->buffer.pointer[offset] <= (yaml_char_t) '9')) - -#define IS_DIGIT(parser) IS_DIGIT_AT(parser,0) - -/* - * Get the value of a digit. - */ - -#define AS_DIGIT_AT(parser,offset) \ - (parser->buffer.pointer[offset] - (yaml_char_t) '0') - -#define AS_DIGIT(parser) AS_DIGIT_AT(parser,0) - -/* - * Check if the character at the specified position is a hex-digit. - */ - -#define IS_HEX_AT(parser,offset) \ - ((parser->buffer.pointer[offset] >= (yaml_char_t) '0' && \ - parser->buffer.pointer[offset] <= (yaml_char_t) '9') || \ - (parser->buffer.pointer[offset] >= (yaml_char_t) 'A' && \ - parser->buffer.pointer[offset] <= (yaml_char_t) 'F') || \ - (parser->buffer.pointer[offset] >= (yaml_char_t) 'a' && \ - parser->buffer.pointer[offset] <= (yaml_char_t) 'f')) - -#define IS_HEX(parser) IS_HEX_AT(parser,0) - -/* - * Get the value of a hex-digit. - */ - -#define AS_HEX_AT(parser,offset) \ - ((parser->buffer.pointer[offset] >= (yaml_char_t) 'A' && \ - parser->buffer.pointer[offset] <= (yaml_char_t) 'F') ? \ - (parser->buffer.pointer[offset] - (yaml_char_t) 'A' + 10) : \ - (parser->buffer.pointer[offset] >= (yaml_char_t) 'a' && \ - parser->buffer.pointer[offset] <= (yaml_char_t) 'f') ? \ - (parser->buffer.pointer[offset] - (yaml_char_t) 'a' + 10) : \ - (parser->buffer.pointer[offset] - (yaml_char_t) '0')) - -#define AS_HEX(parser) AS_HEX_AT(parser,0) - -/* - * Check if the character at the specified position is NUL. - */ - -#define IS_Z_AT(parser,offset) CHECK_AT(parser,'\0',(offset)) - -#define IS_Z(parser) IS_Z_AT(parser,0) - -/* - * Check if the character at the specified position is BOM. - */ - -#define IS_BOM_AT(parser,offset) \ - (CHECK_AT(parser,'\xEF',(offset)) \ - && CHECK_AT(parser,'\xBB',(offset)+1) \ - && CHECK_AT(parser,'\xBF',(offset)+1)) /* BOM (#xFEFF) */ - -#define IS_BOM(parser) IS_BOM_AT(parser,0) - -/* - * Check if the character at the specified position is space. - */ - -#define IS_SPACE_AT(parser,offset) CHECK_AT(parser,' ',(offset)) - -#define IS_SPACE(parser) IS_SPACE_AT(parser,0) - -/* - * Check if the character at the specified position is tab. - */ - -#define IS_TAB_AT(parser,offset) CHECK_AT(parser,'\t',(offset)) - -#define IS_TAB(parser) IS_TAB_AT(parser,0) - -/* - * Check if the character at the specified position is blank (space or tab). - */ - -#define IS_BLANK_AT(parser,offset) \ - (IS_SPACE_AT(parser,(offset)) || IS_TAB_AT(parser,(offset))) - -#define IS_BLANK(parser) IS_BLANK_AT(parser,0) - -/* - * Check if the character at the specified position is a line break. - */ - -#define IS_BREAK_AT(parser,offset) \ - (CHECK_AT(parser,'\r',(offset)) /* CR (#xD)*/ \ - || CHECK_AT(parser,'\n',(offset)) /* LF (#xA) */ \ - || (CHECK_AT(parser,'\xC2',(offset)) \ - && CHECK_AT(parser,'\x85',(offset)+1)) /* NEL (#x85) */ \ - || (CHECK_AT(parser,'\xE2',(offset)) \ - && CHECK_AT(parser,'\x80',(offset)+1) \ - && CHECK_AT(parser,'\xA8',(offset)+2)) /* LS (#x2028) */ \ - || (CHECK_AT(parser,'\xE2',(offset)) \ - && CHECK_AT(parser,'\x80',(offset)+1) \ - && CHECK_AT(parser,'\xA9',(offset)+2))) /* PS (#x2029) */ - -#define IS_BREAK(parser) IS_BREAK_AT(parser,0) - -#define IS_CRLF_AT(parser,offset) \ - (CHECK_AT(parser,'\r',(offset)) && CHECK_AT(parser,'\n',(offset)+1)) - -#define IS_CRLF(parser) IS_CRLF_AT(parser,0) - -/* - * Check if the character is a line break or NUL. - */ - -#define IS_BREAKZ_AT(parser,offset) \ - (IS_BREAK_AT(parser,(offset)) || IS_Z_AT(parser,(offset))) - -#define IS_BREAKZ(parser) IS_BREAKZ_AT(parser,0) - -/* - * Check if the character is a line break, space, or NUL. - */ - -#define IS_SPACEZ_AT(parser,offset) \ - (IS_SPACE_AT(parser,(offset)) || IS_BREAKZ_AT(parser,(offset))) - -#define IS_SPACEZ(parser) IS_SPACEZ_AT(parser,0) - -/* - * Check if the character is a line break, space, tab, or NUL. - */ - -#define IS_BLANKZ_AT(parser,offset) \ - (IS_BLANK_AT(parser,(offset)) || IS_BREAKZ_AT(parser,(offset))) - -#define IS_BLANKZ(parser) IS_BLANKZ_AT(parser,0) - -/* - * Determine the width of the character. - */ - -#define WIDTH_AT(parser,offset) \ - ((parser->buffer.pointer[offset] & 0x80) == 0x00 ? 1 : \ - (parser->buffer.pointer[offset] & 0xE0) == 0xC0 ? 2 : \ - (parser->buffer.pointer[offset] & 0xF0) == 0xE0 ? 3 : \ - (parser->buffer.pointer[offset] & 0xF8) == 0xF0 ? 4 : 0) - -#define WIDTH(parser) WIDTH_AT(parser,0) - /* * Advance the buffer pointer. */ @@ -679,21 +495,21 @@ (parser->mark.index ++, \ parser->mark.column ++, \ parser->unread --, \ - parser->buffer.pointer += WIDTH(parser)) + parser->buffer.pointer += WIDTH(parser->buffer)) #define SKIP_LINE(parser) \ - (IS_CRLF(parser) ? \ + (IS_CRLF(parser->buffer) ? \ (parser->mark.index += 2, \ parser->mark.column = 0, \ parser->mark.line ++, \ parser->unread -= 2, \ parser->buffer.pointer += 2) : \ - IS_BREAK(parser) ? \ + IS_BREAK(parser->buffer) ? \ (parser->mark.index ++, \ parser->mark.column = 0, \ parser->mark.line ++, \ parser->unread --, \ - parser->buffer.pointer += WIDTH(parser)) : 0) + parser->buffer.pointer += WIDTH(parser->buffer)) : 0) /* * Copy a character to a string buffer and advance pointers. @@ -701,20 +517,7 @@ #define READ(parser,string) \ (STRING_EXTEND(parser,string) ? \ - (((*parser->buffer.pointer & 0x80) == 0x00 ? \ - (*((string).pointer++) = *(parser->buffer.pointer++)) : \ - (*parser->buffer.pointer & 0xE0) == 0xC0 ? \ - (*((string).pointer++) = *(parser->buffer.pointer++), \ - *((string).pointer++) = *(parser->buffer.pointer++)) : \ - (*parser->buffer.pointer & 0xF0) == 0xE0 ? \ - (*((string).pointer++) = *(parser->buffer.pointer++), \ - *((string).pointer++) = *(parser->buffer.pointer++), \ - *((string).pointer++) = *(parser->buffer.pointer++)) : \ - (*parser->buffer.pointer & 0xF8) == 0xF0 ? \ - (*((string).pointer++) = *(parser->buffer.pointer++), \ - *((string).pointer++) = *(parser->buffer.pointer++), \ - *((string).pointer++) = *(parser->buffer.pointer++), \ - *((string).pointer++) = *(parser->buffer.pointer++)) : 0), \ + (COPY(string,parser->buffer), \ parser->mark.index ++, \ parser->mark.column ++, \ parser->unread --, \ @@ -726,31 +529,34 @@ #define READ_LINE(parser,string) \ (STRING_EXTEND(parser,string) ? \ - (((CHECK_AT(parser,'\r',0) && CHECK_AT(parser,'\n',1)) ? /* CR LF -> LF */ \ + (((CHECK_AT(parser->buffer,'\r',0) \ + && CHECK_AT(parser->buffer,'\n',1)) ? /* CR LF -> LF */ \ (*((string).pointer++) = (yaml_char_t) '\n', \ parser->buffer.pointer += 2, \ parser->mark.index += 2, \ parser->mark.column = 0, \ parser->mark.line ++, \ parser->unread -= 2) : \ - (CHECK_AT(parser,'\r',0) || CHECK_AT(parser,'\n',0)) ? /* CR|LF -> LF */ \ + (CHECK_AT(parser->buffer,'\r',0) \ + || CHECK_AT(parser->buffer,'\n',0)) ? /* CR|LF -> LF */ \ (*((string).pointer++) = (yaml_char_t) '\n', \ parser->buffer.pointer ++, \ parser->mark.index ++, \ parser->mark.column = 0, \ parser->mark.line ++, \ parser->unread --) : \ - (CHECK_AT(parser,'\xC2',0) && CHECK_AT(parser,'\x85',1)) ? /* NEL -> LF */ \ + (CHECK_AT(parser->buffer,'\xC2',0) \ + && CHECK_AT(parser->buffer,'\x85',1)) ? /* NEL -> LF */ \ (*((string).pointer++) = (yaml_char_t) '\n', \ parser->buffer.pointer += 2, \ parser->mark.index ++, \ parser->mark.column = 0, \ parser->mark.line ++, \ parser->unread --) : \ - (CHECK_AT(parser,'\xE2',0) && \ - CHECK_AT(parser,'\x80',1) && \ - (CHECK_AT(parser,'\xA8',2) || \ - CHECK_AT(parser,'\xA9',2))) ? /* LS|PS -> LS|PS */ \ + (CHECK_AT(parser->buffer,'\xE2',0) && \ + CHECK_AT(parser->buffer,'\x80',1) && \ + (CHECK_AT(parser->buffer,'\xA8',2) || \ + CHECK_AT(parser->buffer,'\xA9',2))) ? /* LS|PS -> LS|PS */ \ (*((string).pointer++) = *(parser->buffer.pointer++), \ *((string).pointer++) = *(parser->buffer.pointer++), \ *((string).pointer++) = *(parser->buffer.pointer++), \ @@ -1088,111 +894,113 @@ yaml_parser_fetch_next_token(yaml_parser_t *parser) /* Is it the end of the stream? */ - if (IS_Z(parser)) + if (IS_Z(parser->buffer)) return yaml_parser_fetch_stream_end(parser); /* Is it a directive? */ - if (parser->mark.column == 0 && CHECK(parser, '%')) + if (parser->mark.column == 0 && CHECK(parser->buffer, '%')) return yaml_parser_fetch_directive(parser); /* Is it the document start indicator? */ if (parser->mark.column == 0 - && CHECK_AT(parser, '-', 0) - && CHECK_AT(parser, '-', 1) - && CHECK_AT(parser, '-', 2) - && IS_BLANKZ_AT(parser, 3)) + && CHECK_AT(parser->buffer, '-', 0) + && CHECK_AT(parser->buffer, '-', 1) + && CHECK_AT(parser->buffer, '-', 2) + && IS_BLANKZ_AT(parser->buffer, 3)) return yaml_parser_fetch_document_indicator(parser, YAML_DOCUMENT_START_TOKEN); /* Is it the document end indicator? */ if (parser->mark.column == 0 - && CHECK_AT(parser, '.', 0) - && CHECK_AT(parser, '.', 1) - && CHECK_AT(parser, '.', 2) - && IS_BLANKZ_AT(parser, 3)) + && CHECK_AT(parser->buffer, '.', 0) + && CHECK_AT(parser->buffer, '.', 1) + && CHECK_AT(parser->buffer, '.', 2) + && IS_BLANKZ_AT(parser->buffer, 3)) return yaml_parser_fetch_document_indicator(parser, YAML_DOCUMENT_END_TOKEN); /* Is it the flow sequence start indicator? */ - if (CHECK(parser, '[')) + if (CHECK(parser->buffer, '[')) return yaml_parser_fetch_flow_collection_start(parser, YAML_FLOW_SEQUENCE_START_TOKEN); /* Is it the flow mapping start indicator? */ - if (CHECK(parser, '{')) + if (CHECK(parser->buffer, '{')) return yaml_parser_fetch_flow_collection_start(parser, YAML_FLOW_MAPPING_START_TOKEN); /* Is it the flow sequence end indicator? */ - if (CHECK(parser, ']')) + if (CHECK(parser->buffer, ']')) return yaml_parser_fetch_flow_collection_end(parser, YAML_FLOW_SEQUENCE_END_TOKEN); /* Is it the flow mapping end indicator? */ - if (CHECK(parser, '}')) + if (CHECK(parser->buffer, '}')) return yaml_parser_fetch_flow_collection_end(parser, YAML_FLOW_MAPPING_END_TOKEN); /* Is it the flow entry indicator? */ - if (CHECK(parser, ',')) + if (CHECK(parser->buffer, ',')) return yaml_parser_fetch_flow_entry(parser); /* Is it the block entry indicator? */ - if (CHECK(parser, '-') && IS_BLANKZ_AT(parser, 1)) + if (CHECK(parser->buffer, '-') && IS_BLANKZ_AT(parser->buffer, 1)) return yaml_parser_fetch_block_entry(parser); /* Is it the key indicator? */ - if (CHECK(parser, '?') && (parser->flow_level || IS_BLANKZ_AT(parser, 1))) + if (CHECK(parser->buffer, '?') + && (parser->flow_level || IS_BLANKZ_AT(parser->buffer, 1))) return yaml_parser_fetch_key(parser); /* Is it the value indicator? */ - if (CHECK(parser, ':') && (parser->flow_level || IS_BLANKZ_AT(parser, 1))) + if (CHECK(parser->buffer, ':') + && (parser->flow_level || IS_BLANKZ_AT(parser->buffer, 1))) return yaml_parser_fetch_value(parser); /* Is it an alias? */ - if (CHECK(parser, '*')) + if (CHECK(parser->buffer, '*')) return yaml_parser_fetch_anchor(parser, YAML_ALIAS_TOKEN); /* Is it an anchor? */ - if (CHECK(parser, '&')) + if (CHECK(parser->buffer, '&')) return yaml_parser_fetch_anchor(parser, YAML_ANCHOR_TOKEN); /* Is it a tag? */ - if (CHECK(parser, '!')) + if (CHECK(parser->buffer, '!')) return yaml_parser_fetch_tag(parser); /* Is it a literal scalar? */ - if (CHECK(parser, '|') && !parser->flow_level) + if (CHECK(parser->buffer, '|') && !parser->flow_level) return yaml_parser_fetch_block_scalar(parser, 1); /* Is it a folded scalar? */ - if (CHECK(parser, '>') && !parser->flow_level) + if (CHECK(parser->buffer, '>') && !parser->flow_level) return yaml_parser_fetch_block_scalar(parser, 0); /* Is it a single-quoted scalar? */ - if (CHECK(parser, '\'')) + if (CHECK(parser->buffer, '\'')) return yaml_parser_fetch_flow_scalar(parser, 1); /* Is it a double-quoted scalar? */ - if (CHECK(parser, '"')) + if (CHECK(parser->buffer, '"')) return yaml_parser_fetch_flow_scalar(parser, 0); /* @@ -1214,16 +1022,20 @@ yaml_parser_fetch_next_token(yaml_parser_t *parser) * The last rule is more restrictive than the specification requires. */ - if (!(IS_BLANKZ(parser) || CHECK(parser, '-') || CHECK(parser, '?') - || CHECK(parser, ':') || CHECK(parser, ',') || CHECK(parser, '[') - || CHECK(parser, ']') || CHECK(parser, '{') || CHECK(parser, '}') - || CHECK(parser, '#') || CHECK(parser, '&') || CHECK(parser, '*') - || CHECK(parser, '!') || CHECK(parser, '|') || CHECK(parser, '>') - || CHECK(parser, '\'') || CHECK(parser, '"') || CHECK(parser, '%') - || CHECK(parser, '@') || CHECK(parser, '`')) || - (CHECK(parser, '-') && !IS_BLANK_AT(parser, 1)) || + if (!(IS_BLANKZ(parser->buffer) || CHECK(parser->buffer, '-') + || CHECK(parser->buffer, '?') || CHECK(parser->buffer, ':') + || CHECK(parser->buffer, ',') || CHECK(parser->buffer, '[') + || CHECK(parser->buffer, ']') || CHECK(parser->buffer, '{') + || CHECK(parser->buffer, '}') || CHECK(parser->buffer, '#') + || CHECK(parser->buffer, '&') || CHECK(parser->buffer, '*') + || CHECK(parser->buffer, '!') || CHECK(parser->buffer, '|') + || CHECK(parser->buffer, '>') || CHECK(parser->buffer, '\'') + || CHECK(parser->buffer, '"') || CHECK(parser->buffer, '%') + || CHECK(parser->buffer, '@') || CHECK(parser->buffer, '`')) || + (CHECK(parser->buffer, '-') && !IS_BLANK_AT(parser->buffer, 1)) || (!parser->flow_level && - (CHECK(parser, '?') || CHECK(parser, ':')) && !IS_BLANKZ_AT(parser, 1))) + (CHECK(parser->buffer, '?') || CHECK(parser->buffer, ':')) + && !IS_BLANKZ_AT(parser->buffer, 1))) return yaml_parser_fetch_plain_scalar(parser); /* @@ -2101,7 +1913,7 @@ yaml_parser_scan_to_next_token(yaml_parser_t *parser) if (!CACHE(parser, 1)) return 0; - if (parser->mark.column == 0 && IS_BOM(parser)) + if (parser->mark.column == 0 && IS_BOM(parser->buffer)) SKIP(parser); /* @@ -2116,17 +1928,17 @@ yaml_parser_scan_to_next_token(yaml_parser_t *parser) if (!CACHE(parser, 1)) return 0; - while (CHECK(parser,' ') || + while (CHECK(parser->buffer,' ') || ((parser->flow_level || !parser->simple_key_allowed) && - CHECK(parser, '\t'))) { + CHECK(parser->buffer, '\t'))) { SKIP(parser); if (!CACHE(parser, 1)) return 0; } /* Eat a comment until a line break. */ - if (CHECK(parser, '#')) { - while (!IS_BREAKZ(parser)) { + if (CHECK(parser->buffer, '#')) { + while (!IS_BREAKZ(parser->buffer)) { SKIP(parser); if (!CACHE(parser, 1)) return 0; } @@ -2134,7 +1946,7 @@ yaml_parser_scan_to_next_token(yaml_parser_t *parser) /* If it is a line break, eat it. */ - if (IS_BREAK(parser)) + if (IS_BREAK(parser->buffer)) { if (!CACHE(parser, 2)) return 0; SKIP_LINE(parser); @@ -2234,13 +2046,13 @@ yaml_parser_scan_directive(yaml_parser_t *parser, yaml_token_t *token) if (!CACHE(parser, 1)) goto error; - while (IS_BLANK(parser)) { + while (IS_BLANK(parser->buffer)) { SKIP(parser); if (!CACHE(parser, 1)) goto error; } - if (CHECK(parser, '#')) { - while (!IS_BREAKZ(parser)) { + if (CHECK(parser->buffer, '#')) { + while (!IS_BREAKZ(parser->buffer)) { SKIP(parser); if (!CACHE(parser, 1)) goto error; } @@ -2248,7 +2060,7 @@ yaml_parser_scan_directive(yaml_parser_t *parser, yaml_token_t *token) /* Check if we are at the end of the line. */ - if (!IS_BREAKZ(parser)) { + if (!IS_BREAKZ(parser->buffer)) { yaml_parser_set_scanner_error(parser, "while scanning a directive", start_mark, "did not found expected comment or line break"); goto error; @@ -2256,7 +2068,7 @@ yaml_parser_scan_directive(yaml_parser_t *parser, yaml_token_t *token) /* Eat a line break. */ - if (IS_BREAK(parser)) { + if (IS_BREAK(parser->buffer)) { if (!CACHE(parser, 2)) goto error; SKIP_LINE(parser); } @@ -2294,7 +2106,7 @@ yaml_parser_scan_directive_name(yaml_parser_t *parser, if (!CACHE(parser, 1)) goto error; - while (IS_ALPHA(parser)) + while (IS_ALPHA(parser->buffer)) { if (!READ(parser, string)) goto error; if (!CACHE(parser, 1)) goto error; @@ -2310,7 +2122,7 @@ yaml_parser_scan_directive_name(yaml_parser_t *parser, /* Check for an blank character after the name. */ - if (!IS_BLANKZ(parser)) { + if (!IS_BLANKZ(parser->buffer)) { yaml_parser_set_scanner_error(parser, "while scanning a directive", start_mark, "found unexpected non-alphabetical character"); goto error; @@ -2341,7 +2153,7 @@ yaml_parser_scan_version_directive_value(yaml_parser_t *parser, if (!CACHE(parser, 1)) return 0; - while (IS_BLANK(parser)) { + while (IS_BLANK(parser->buffer)) { SKIP(parser); if (!CACHE(parser, 1)) return 0; } @@ -2353,7 +2165,7 @@ yaml_parser_scan_version_directive_value(yaml_parser_t *parser, /* Eat '.'. */ - if (!CHECK(parser, '.')) { + if (!CHECK(parser->buffer, '.')) { return yaml_parser_set_scanner_error(parser, "while scanning a %YAML directive", start_mark, "did not find expected digit or '.' character"); } @@ -2391,7 +2203,7 @@ yaml_parser_scan_version_directive_number(yaml_parser_t *parser, if (!CACHE(parser, 1)) return 0; - while (IS_DIGIT(parser)) + while (IS_DIGIT(parser->buffer)) { /* Check if the number is too long. */ @@ -2400,7 +2212,7 @@ yaml_parser_scan_version_directive_number(yaml_parser_t *parser, start_mark, "found extremely long version number"); } - value = value*10 + AS_DIGIT(parser); + value = value*10 + AS_DIGIT(parser->buffer); SKIP(parser); @@ -2438,7 +2250,7 @@ yaml_parser_scan_tag_directive_value(yaml_parser_t *parser, if (!CACHE(parser, 1)) goto error; - while (IS_BLANK(parser)) { + while (IS_BLANK(parser->buffer)) { SKIP(parser); if (!CACHE(parser, 1)) goto error; } @@ -2452,7 +2264,7 @@ yaml_parser_scan_tag_directive_value(yaml_parser_t *parser, if (!CACHE(parser, 1)) goto error; - if (!IS_BLANK(parser)) { + if (!IS_BLANK(parser->buffer)) { yaml_parser_set_scanner_error(parser, "while scanning a %TAG directive", start_mark, "did not find expected whitespace"); goto error; @@ -2460,7 +2272,7 @@ yaml_parser_scan_tag_directive_value(yaml_parser_t *parser, /* Eat whitespaces. */ - while (IS_BLANK(parser)) { + while (IS_BLANK(parser->buffer)) { SKIP(parser); if (!CACHE(parser, 1)) goto error; } @@ -2474,7 +2286,7 @@ yaml_parser_scan_tag_directive_value(yaml_parser_t *parser, if (!CACHE(parser, 1)) goto error; - if (!IS_BLANKZ(parser)) { + if (!IS_BLANKZ(parser->buffer)) { yaml_parser_set_scanner_error(parser, "while scanning a %TAG directive", start_mark, "did not find expected whitespace or line break"); goto error; @@ -2511,7 +2323,7 @@ yaml_parser_scan_anchor(yaml_parser_t *parser, yaml_token_t *token, if (!CACHE(parser, 1)) goto error; - while (IS_ALPHA(parser)) { + while (IS_ALPHA(parser->buffer)) { if (!READ(parser, string)) goto error; if (!CACHE(parser, 1)) goto error; length ++; @@ -2526,9 +2338,11 @@ yaml_parser_scan_anchor(yaml_parser_t *parser, yaml_token_t *token, * '?', ':', ',', ']', '}', '%', '@', '`'. */ - if (!length || !(IS_BLANKZ(parser) || CHECK(parser, '?') || CHECK(parser, ':') || - CHECK(parser, ',') || CHECK(parser, ']') || CHECK(parser, '}') || - CHECK(parser, '%') || CHECK(parser, '@') || CHECK(parser, '`'))) { + if (!length || !(IS_BLANKZ(parser->buffer) || CHECK(parser->buffer, '?') + || CHECK(parser->buffer, ':') || CHECK(parser->buffer, ',') + || CHECK(parser->buffer, ']') || CHECK(parser->buffer, '}') + || CHECK(parser->buffer, '%') || CHECK(parser->buffer, '@') + || CHECK(parser->buffer, '`'))) { yaml_parser_set_scanner_error(parser, type == YAML_ANCHOR_TOKEN ? "while scanning an anchor" : "while scanning an alias", start_mark, "did not find expected alphabetic or numeric character"); @@ -2568,7 +2382,7 @@ yaml_parser_scan_tag(yaml_parser_t *parser, yaml_token_t *token) if (!CACHE(parser, 2)) goto error; - if (CHECK_AT(parser, '<', 1)) + if (CHECK_AT(parser->buffer, '<', 1)) { /* Set the handle to '' */ @@ -2588,7 +2402,7 @@ yaml_parser_scan_tag(yaml_parser_t *parser, yaml_token_t *token) /* Check for '>' and eat it. */ - if (!CHECK(parser, '>')) { + if (!CHECK(parser->buffer, '>')) { yaml_parser_set_scanner_error(parser, "while scanning a tag", start_mark, "did not find the expected '>'"); goto error; @@ -2646,7 +2460,7 @@ yaml_parser_scan_tag(yaml_parser_t *parser, yaml_token_t *token) if (!CACHE(parser, 1)) goto error; - if (!IS_BLANKZ(parser)) { + if (!IS_BLANKZ(parser->buffer)) { yaml_parser_set_scanner_error(parser, "while scanning a tag", start_mark, "did not found expected whitespace or line break"); goto error; @@ -2682,7 +2496,7 @@ yaml_parser_scan_tag_handle(yaml_parser_t *parser, int directive, if (!CACHE(parser, 1)) goto error; - if (!CHECK(parser, '!')) { + if (!CHECK(parser->buffer, '!')) { yaml_parser_set_scanner_error(parser, directive ? "while scanning a tag directive" : "while scanning a tag", start_mark, "did not find expected '!'"); @@ -2697,7 +2511,7 @@ yaml_parser_scan_tag_handle(yaml_parser_t *parser, int directive, if (!CACHE(parser, 1)) goto error; - while (IS_ALPHA(parser)) + while (IS_ALPHA(parser->buffer)) { if (!READ(parser, string)) goto error; if (!CACHE(parser, 1)) goto error; @@ -2705,7 +2519,7 @@ yaml_parser_scan_tag_handle(yaml_parser_t *parser, int directive, /* Check if the trailing character is '!' and copy it. */ - if (CHECK(parser, '!')) + if (CHECK(parser->buffer, '!')) { if (!READ(parser, string)) goto error; } @@ -2778,17 +2592,21 @@ yaml_parser_scan_tag_uri(yaml_parser_t *parser, int directive, * '%'. */ - while (IS_ALPHA(parser) || CHECK(parser, ';') || CHECK(parser, '/') || - CHECK(parser, '?') || CHECK(parser, ':') || CHECK(parser, '@') || - CHECK(parser, '&') || CHECK(parser, '=') || CHECK(parser, '+') || - CHECK(parser, '$') || CHECK(parser, ',') || CHECK(parser, '.') || - CHECK(parser, '!') || CHECK(parser, '~') || CHECK(parser, '*') || - CHECK(parser, '\'') || CHECK(parser, '(') || CHECK(parser, ')') || - CHECK(parser, '[') || CHECK(parser, ']') || CHECK(parser, '%')) + while (IS_ALPHA(parser->buffer) || CHECK(parser->buffer, ';') + || CHECK(parser->buffer, '/') || CHECK(parser->buffer, '?') + || CHECK(parser->buffer, ':') || CHECK(parser->buffer, '@') + || CHECK(parser->buffer, '&') || CHECK(parser->buffer, '=') + || CHECK(parser->buffer, '+') || CHECK(parser->buffer, '$') + || CHECK(parser->buffer, ',') || CHECK(parser->buffer, '.') + || CHECK(parser->buffer, '!') || CHECK(parser->buffer, '~') + || CHECK(parser->buffer, '*') || CHECK(parser->buffer, '\'') + || CHECK(parser->buffer, '(') || CHECK(parser->buffer, ')') + || CHECK(parser->buffer, '[') || CHECK(parser->buffer, ']') + || CHECK(parser->buffer, '%')) { /* Check if it is a URI-escape sequence. */ - if (CHECK(parser, '%')) { + if (CHECK(parser->buffer, '%')) { if (!yaml_parser_scan_uri_escapes(parser, directive, start_mark, &string)) goto error; } @@ -2841,7 +2659,9 @@ yaml_parser_scan_uri_escapes(yaml_parser_t *parser, int directive, if (!CACHE(parser, 3)) return 0; - if (!(CHECK(parser, '%') && IS_HEX_AT(parser, 1) && IS_HEX_AT(parser, 2))) { + if (!(CHECK(parser->buffer, '%') + && IS_HEX_AT(parser->buffer, 1) + && IS_HEX_AT(parser->buffer, 2))) { return yaml_parser_set_scanner_error(parser, directive ? "while parsing a %TAG directive" : "while parsing a tag", start_mark, "did not find URI escaped octet"); @@ -2849,7 +2669,7 @@ yaml_parser_scan_uri_escapes(yaml_parser_t *parser, int directive, /* Get the octet. */ - octet = (AS_HEX_AT(parser, 1) << 4) + AS_HEX_AT(parser, 2); + octet = (AS_HEX_AT(parser->buffer, 1) << 4) + AS_HEX_AT(parser->buffer, 2); /* If it is the leading octet, determine the length of the UTF-8 sequence. */ @@ -2923,11 +2743,11 @@ yaml_parser_scan_block_scalar(yaml_parser_t *parser, yaml_token_t *token, /* Check for a chomping indicator. */ - if (CHECK(parser, '+') || CHECK(parser, '-')) + if (CHECK(parser->buffer, '+') || CHECK(parser->buffer, '-')) { /* Set the chomping method and eat the indicator. */ - chomping = CHECK(parser, '+') ? +1 : -1; + chomping = CHECK(parser->buffer, '+') ? +1 : -1; SKIP(parser); @@ -2935,11 +2755,11 @@ yaml_parser_scan_block_scalar(yaml_parser_t *parser, yaml_token_t *token, if (!CACHE(parser, 1)) goto error; - if (IS_DIGIT(parser)) + if (IS_DIGIT(parser->buffer)) { /* Check that the intendation is greater than 0. */ - if (CHECK(parser, '0')) { + if (CHECK(parser->buffer, '0')) { yaml_parser_set_scanner_error(parser, "while scanning a block scalar", start_mark, "found an intendation indicator equal to 0"); goto error; @@ -2947,7 +2767,7 @@ yaml_parser_scan_block_scalar(yaml_parser_t *parser, yaml_token_t *token, /* Get the intendation level and eat the indicator. */ - increment = AS_DIGIT(parser); + increment = AS_DIGIT(parser->buffer); SKIP(parser); } @@ -2955,22 +2775,22 @@ yaml_parser_scan_block_scalar(yaml_parser_t *parser, yaml_token_t *token, /* Do the same as above, but in the opposite order. */ - else if (IS_DIGIT(parser)) + else if (IS_DIGIT(parser->buffer)) { - if (CHECK(parser, '0')) { + if (CHECK(parser->buffer, '0')) { yaml_parser_set_scanner_error(parser, "while scanning a block scalar", start_mark, "found an intendation indicator equal to 0"); goto error; } - increment = AS_DIGIT(parser); + increment = AS_DIGIT(parser->buffer); SKIP(parser); if (!CACHE(parser, 1)) goto error; - if (CHECK(parser, '+') || CHECK(parser, '-')) { - chomping = CHECK(parser, '+') ? +1 : -1; + if (CHECK(parser->buffer, '+') || CHECK(parser->buffer, '-')) { + chomping = CHECK(parser->buffer, '+') ? +1 : -1; SKIP(parser); } @@ -2980,13 +2800,13 @@ yaml_parser_scan_block_scalar(yaml_parser_t *parser, yaml_token_t *token, if (!CACHE(parser, 1)) goto error; - while (IS_BLANK(parser)) { + while (IS_BLANK(parser->buffer)) { SKIP(parser); if (!CACHE(parser, 1)) goto error; } - if (CHECK(parser, '#')) { - while (!IS_BREAKZ(parser)) { + if (CHECK(parser->buffer, '#')) { + while (!IS_BREAKZ(parser->buffer)) { SKIP(parser); if (!CACHE(parser, 1)) goto error; } @@ -2994,7 +2814,7 @@ yaml_parser_scan_block_scalar(yaml_parser_t *parser, yaml_token_t *token, /* Check if we are at the end of the line. */ - if (!IS_BREAKZ(parser)) { + if (!IS_BREAKZ(parser->buffer)) { yaml_parser_set_scanner_error(parser, "while scanning a block scalar", start_mark, "did not found expected comment or line break"); goto error; @@ -3002,7 +2822,7 @@ yaml_parser_scan_block_scalar(yaml_parser_t *parser, yaml_token_t *token, /* Eat a line break. */ - if (IS_BREAK(parser)) { + if (IS_BREAK(parser->buffer)) { if (!CACHE(parser, 2)) goto error; SKIP_LINE(parser); } @@ -3024,7 +2844,7 @@ yaml_parser_scan_block_scalar(yaml_parser_t *parser, yaml_token_t *token, if (!CACHE(parser, 1)) goto error; - while (parser->mark.column == indent && !IS_Z(parser)) + while (parser->mark.column == indent && !IS_Z(parser->buffer)) { /* * We are at the beginning of a non-empty line. @@ -3032,7 +2852,7 @@ yaml_parser_scan_block_scalar(yaml_parser_t *parser, yaml_token_t *token, /* Is it a trailing whitespace? */ - trailing_blank = IS_BLANK(parser); + trailing_blank = IS_BLANK(parser->buffer); /* Check if we need to fold the leading line break. */ @@ -3060,11 +2880,11 @@ yaml_parser_scan_block_scalar(yaml_parser_t *parser, yaml_token_t *token, /* Is it a leading whitespace? */ - leading_blank = IS_BLANK(parser); + leading_blank = IS_BLANK(parser->buffer); /* Consume the current line. */ - while (!IS_BREAKZ(parser)) { + while (!IS_BREAKZ(parser->buffer)) { if (!READ(parser, string)) goto error; if (!CACHE(parser, 1)) goto error; } @@ -3131,7 +2951,8 @@ yaml_parser_scan_block_scalar_breaks(yaml_parser_t *parser, if (!CACHE(parser, 1)) return 0; - while ((!*indent || parser->mark.column < *indent) && IS_SPACE(parser)) { + while ((!*indent || parser->mark.column < *indent) + && IS_SPACE(parser->buffer)) { SKIP(parser); if (!CACHE(parser, 1)) return 0; } @@ -3141,14 +2962,15 @@ yaml_parser_scan_block_scalar_breaks(yaml_parser_t *parser, /* Check for a tab character messing the intendation. */ - if ((!*indent || parser->mark.column < *indent) && IS_TAB(parser)) { + if ((!*indent || parser->mark.column < *indent) + && IS_TAB(parser->buffer)) { return yaml_parser_set_scanner_error(parser, "while scanning a block scalar", start_mark, "found a tab character where an intendation space is expected"); } /* Have we found a non-empty line? */ - if (!IS_BREAK(parser)) break; + if (!IS_BREAK(parser->buffer)) break; /* Consume the line break. */ @@ -3206,13 +3028,13 @@ yaml_parser_scan_flow_scalar(yaml_parser_t *parser, yaml_token_t *token, if (!CACHE(parser, 4)) goto error; if (parser->mark.column == 0 && - ((CHECK_AT(parser, '-', 0) && - CHECK_AT(parser, '-', 1) && - CHECK_AT(parser, '-', 2)) || - (CHECK_AT(parser, '.', 0) && - CHECK_AT(parser, '.', 1) && - CHECK_AT(parser, '.', 2))) && - IS_BLANKZ_AT(parser, 3)) + ((CHECK_AT(parser->buffer, '-', 0) && + CHECK_AT(parser->buffer, '-', 1) && + CHECK_AT(parser->buffer, '-', 2)) || + (CHECK_AT(parser->buffer, '.', 0) && + CHECK_AT(parser->buffer, '.', 1) && + CHECK_AT(parser->buffer, '.', 2))) && + IS_BLANKZ_AT(parser->buffer, 3)) { yaml_parser_set_scanner_error(parser, "while scanning a quoted scalar", start_mark, "found unexpected document indicator"); @@ -3221,7 +3043,7 @@ yaml_parser_scan_flow_scalar(yaml_parser_t *parser, yaml_token_t *token, /* Check for EOF. */ - if (IS_Z(parser)) { + if (IS_Z(parser->buffer)) { yaml_parser_set_scanner_error(parser, "while scanning a quoted scalar", start_mark, "found unexpected end of stream"); goto error; @@ -3233,11 +3055,12 @@ yaml_parser_scan_flow_scalar(yaml_parser_t *parser, yaml_token_t *token, leading_blanks = 0; - while (!IS_BLANKZ(parser)) + while (!IS_BLANKZ(parser->buffer)) { /* Check for an escaped single quote. */ - if (single && CHECK_AT(parser, '\'', 0) && CHECK_AT(parser, '\'', 1)) + if (single && CHECK_AT(parser->buffer, '\'', 0) + && CHECK_AT(parser->buffer, '\'', 1)) { if (!STRING_EXTEND(parser, string)) goto error; *(string.pointer++) = '\''; @@ -3247,14 +3070,15 @@ yaml_parser_scan_flow_scalar(yaml_parser_t *parser, yaml_token_t *token, /* Check for the right quote. */ - else if (CHECK(parser, single ? '\'' : '"')) + else if (CHECK(parser->buffer, single ? '\'' : '"')) { break; } /* Check for an escaped line break. */ - else if (!single && CHECK(parser, '\\') && IS_BREAK_AT(parser, 1)) + else if (!single && CHECK(parser->buffer, '\\') + && IS_BREAK_AT(parser->buffer, 1)) { if (!CACHE(parser, 3)) goto error; SKIP(parser); @@ -3265,7 +3089,7 @@ yaml_parser_scan_flow_scalar(yaml_parser_t *parser, yaml_token_t *token, /* Check for an escape sequence. */ - else if (!single && CHECK(parser, '\\')) + else if (!single && CHECK(parser->buffer, '\\')) { int code_length = 0; @@ -3383,12 +3207,12 @@ yaml_parser_scan_flow_scalar(yaml_parser_t *parser, yaml_token_t *token, if (!CACHE(parser, code_length)) goto error; for (k = 0; k < code_length; k ++) { - if (!IS_HEX_AT(parser, k)) { + if (!IS_HEX_AT(parser->buffer, k)) { yaml_parser_set_scanner_error(parser, "while parsing a quoted scalar", start_mark, "did not find expected hexdecimal number"); goto error; } - value = (value << 4) + AS_HEX_AT(parser, k); + value = (value << 4) + AS_HEX_AT(parser->buffer, k); } /* Check the value and write the character. */ @@ -3438,16 +3262,16 @@ yaml_parser_scan_flow_scalar(yaml_parser_t *parser, yaml_token_t *token, /* Check if we are at the end of the scalar. */ - if (CHECK(parser, single ? '\'' : '"')) + if (CHECK(parser->buffer, single ? '\'' : '"')) break; /* Consume blank characters. */ if (!CACHE(parser, 1)) goto error; - while (IS_BLANK(parser) || IS_BREAK(parser)) + while (IS_BLANK(parser->buffer) || IS_BREAK(parser->buffer)) { - if (IS_BLANK(parser)) + if (IS_BLANK(parser->buffer)) { /* Consume a space or a tab character. */ @@ -3568,26 +3392,28 @@ yaml_parser_scan_plain_scalar(yaml_parser_t *parser, yaml_token_t *token) if (!CACHE(parser, 4)) goto error; if (parser->mark.column == 0 && - ((CHECK_AT(parser, '-', 0) && - CHECK_AT(parser, '-', 1) && - CHECK_AT(parser, '-', 2)) || - (CHECK_AT(parser, '.', 0) && - CHECK_AT(parser, '.', 1) && - CHECK_AT(parser, '.', 2))) && - IS_BLANKZ_AT(parser, 3)) break; + ((CHECK_AT(parser->buffer, '-', 0) && + CHECK_AT(parser->buffer, '-', 1) && + CHECK_AT(parser->buffer, '-', 2)) || + (CHECK_AT(parser->buffer, '.', 0) && + CHECK_AT(parser->buffer, '.', 1) && + CHECK_AT(parser->buffer, '.', 2))) && + IS_BLANKZ_AT(parser->buffer, 3)) break; /* Check for a comment. */ - if (CHECK(parser, '#')) + if (CHECK(parser->buffer, '#')) break; /* Consume non-blank characters. */ - while (!IS_BLANKZ(parser)) + while (!IS_BLANKZ(parser->buffer)) { /* Check for 'x:x' in the flow context. TODO: Fix the test "spec-08-13". */ - if (parser->flow_level && CHECK(parser, ':') && !IS_BLANKZ_AT(parser, 1)) { + if (parser->flow_level + && CHECK(parser->buffer, ':') + && !IS_BLANKZ_AT(parser->buffer, 1)) { yaml_parser_set_scanner_error(parser, "while scanning a plain scalar", start_mark, "found unexpected ':'"); goto error; @@ -3595,12 +3421,12 @@ yaml_parser_scan_plain_scalar(yaml_parser_t *parser, yaml_token_t *token) /* Check for indicators that may end a plain scalar. */ - if ((CHECK(parser, ':') && IS_BLANKZ_AT(parser, 1)) || - (parser->flow_level && - (CHECK(parser, ',') || CHECK(parser, ':') || - CHECK(parser, '?') || CHECK(parser, '[') || - CHECK(parser, ']') || CHECK(parser, '{') || - CHECK(parser, '}')))) + if ((CHECK(parser->buffer, ':') && IS_BLANKZ_AT(parser->buffer, 1)) + || (parser->flow_level && + (CHECK(parser->buffer, ',') || CHECK(parser->buffer, ':') + || CHECK(parser->buffer, '?') || CHECK(parser->buffer, '[') + || CHECK(parser->buffer, ']') || CHECK(parser->buffer, '{') + || CHECK(parser->buffer, '}')))) break; /* Check if we need to join whitespaces and breaks. */ @@ -3649,20 +3475,21 @@ yaml_parser_scan_plain_scalar(yaml_parser_t *parser, yaml_token_t *token) /* Is it the end? */ - if (!(IS_BLANK(parser) || IS_BREAK(parser))) + if (!(IS_BLANK(parser->buffer) || IS_BREAK(parser->buffer))) break; /* Consume blank characters. */ if (!CACHE(parser, 1)) goto error; - while (IS_BLANK(parser) || IS_BREAK(parser)) + while (IS_BLANK(parser->buffer) || IS_BREAK(parser->buffer)) { - if (IS_BLANK(parser)) + if (IS_BLANK(parser->buffer)) { /* Check for tab character that abuse intendation. */ - if (leading_blanks && parser->mark.column < indent && IS_TAB(parser)) { + if (leading_blanks && parser->mark.column < indent + && IS_TAB(parser->buffer)) { yaml_parser_set_scanner_error(parser, "while scanning a plain scalar", start_mark, "found a tab character that violate intendation"); goto error; diff --git a/src/writer.c b/src/writer.c index 21313722..ec0e477d 100644 --- a/src/writer.c +++ b/src/writer.c @@ -37,6 +37,9 @@ yaml_emitter_flush(yaml_emitter_t *emitter) assert(emitter->write_handler); /* Write handler must be set. */ assert(emitter->encoding); /* Output encoding must be set. */ + emitter->buffer.last = emitter->buffer.pointer; + emitter->buffer.pointer = emitter->buffer.start; + /* Check if the buffer is empty. */ if (emitter->buffer.start == emitter->buffer.last) { diff --git a/src/yaml_private.h b/src/yaml_private.h index efd1d43f..08a02323 100644 --- a/src/yaml_private.h +++ b/src/yaml_private.h @@ -111,6 +111,8 @@ yaml_string_join( #define NULL_STRING { NULL, NULL, NULL } +#define STRING(string,length) { (string), (string)+(length), (string) } + #define STRING_INIT(context,string,size) \ (((string).start = yaml_malloc(size)) ? \ ((string).pointer = (string).start, \ @@ -142,6 +144,253 @@ yaml_string_join( ((context)->error = YAML_MEMORY_ERROR, \ 0)) +/* + * String check operations. + */ + +/* + * Check the octet at the specified position. + */ + +#define CHECK_AT(string,octet,offset) \ + ((string).pointer[offset] == (yaml_char_t)(octet)) + +/* + * Check the current octet in the buffer. + */ + +#define CHECK(string,octet) CHECK_AT((string),(octet),0) + +/* + * Check if the character at the specified position is an alphabetical + * character, a digit, '_', or '-'. + */ + +#define IS_ALPHA_AT(string,offset) \ + (((string).pointer[offset] >= (yaml_char_t) '0' && \ + (string).pointer[offset] <= (yaml_char_t) '9') || \ + ((string).pointer[offset] >= (yaml_char_t) 'A' && \ + (string).pointer[offset] <= (yaml_char_t) 'Z') || \ + ((string).pointer[offset] >= (yaml_char_t) 'a' && \ + (string).pointer[offset] <= (yaml_char_t) 'z') || \ + (string).pointer[offset] == '_' || \ + (string).pointer[offset] == '-') + +#define IS_ALPHA(string) IS_ALPHA_AT((string),0) + +/* + * Check if the character at the specified position is a digit. + */ + +#define IS_DIGIT_AT(string,offset) \ + (((string).pointer[offset] >= (yaml_char_t) '0' && \ + (string).pointer[offset] <= (yaml_char_t) '9')) + +#define IS_DIGIT(string) IS_DIGIT_AT((string),0) + +/* + * Get the value of a digit. + */ + +#define AS_DIGIT_AT(string,offset) \ + ((string).pointer[offset] - (yaml_char_t) '0') + +#define AS_DIGIT(string) AS_DIGIT_AT((string),0) + +/* + * Check if the character at the specified position is a hex-digit. + */ + +#define IS_HEX_AT(string,offset) \ + (((string).pointer[offset] >= (yaml_char_t) '0' && \ + (string).pointer[offset] <= (yaml_char_t) '9') || \ + ((string).pointer[offset] >= (yaml_char_t) 'A' && \ + (string).pointer[offset] <= (yaml_char_t) 'F') || \ + ((string).pointer[offset] >= (yaml_char_t) 'a' && \ + (string).pointer[offset] <= (yaml_char_t) 'f')) + +#define IS_HEX(string) IS_HEX_AT((string),0) + +/* + * Get the value of a hex-digit. + */ + +#define AS_HEX_AT(string,offset) \ + (((string).pointer[offset] >= (yaml_char_t) 'A' && \ + (string).pointer[offset] <= (yaml_char_t) 'F') ? \ + ((string).pointer[offset] - (yaml_char_t) 'A' + 10) : \ + ((string).pointer[offset] >= (yaml_char_t) 'a' && \ + (string).pointer[offset] <= (yaml_char_t) 'f') ? \ + ((string).pointer[offset] - (yaml_char_t) 'a' + 10) : \ + ((string).pointer[offset] - (yaml_char_t) '0')) + +#define AS_HEX(string) AS_HEX_AT((string),0) + +/* + * Check if the character is ASCII. + */ + +#define IS_ASCII_AT(string,offset) \ + ((string).pointer[offset] <= (yaml_char_t) '\x7F') + +#define IS_ASCII(string) IS_ASCII_AT((string),0) + +/* + * Check if the character can be printed unescaped. + */ + +#define IS_PRINTABLE_AT(string,offset) \ + (((string).pointer[offset] == 0x0A) /* . == #x0A */ \ + || ((string).pointer[offset] >= 0x20 /* #x20 <= . <= #x7E */ \ + && (string).pointer[offset] <= 0x7E) \ + || ((string).pointer[offset] == 0xC2 /* #0xA0 <= . <= #xD7FF */ \ + && (string).pointer[offset+1] >= 0xA0) \ + || ((string).pointer[offset] > 0xC2 \ + && (string).pointer[offset] < 0xED) \ + || ((string).pointer[offset] == 0xED \ + && (string).pointer[offset+1] < 0xA0) \ + || ((string).pointer[offset] == 0xEE) \ + || ((string).pointer[offset] == 0xEF /* #xE000 <= . <= #xFFFD */ \ + && !((string).pointer[offset+1] == 0xBB /* && . != #xFEFF */ \ + && (string).pointer[offset+2] == 0xBF) \ + && !((string).pointer[offset+1] == 0xBF \ + && ((string).pointer[offset+2] == 0xBE \ + || (string).pointer[offset+2] == 0xBF)))) + +#define IS_PRINTABLE(string) IS_PRINTABLE_AT((string),0) + +/* + * Check if the character at the specified position is NUL. + */ + +#define IS_Z_AT(string,offset) CHECK_AT((string),'\0',(offset)) + +#define IS_Z(string) IS_Z_AT((string),0) + +/* + * Check if the character at the specified position is BOM. + */ + +#define IS_BOM_AT(string,offset) \ + (CHECK_AT((string),'\xEF',(offset)) \ + && CHECK_AT((string),'\xBB',(offset)+1) \ + && CHECK_AT((string),'\xBF',(offset)+2)) /* BOM (#xFEFF) */ + +#define IS_BOM(string) IS_BOM_AT(string,0) + +/* + * Check if the character at the specified position is space. + */ + +#define IS_SPACE_AT(string,offset) CHECK_AT((string),' ',(offset)) + +#define IS_SPACE(string) IS_SPACE_AT((string),0) + +/* + * Check if the character at the specified position is tab. + */ + +#define IS_TAB_AT(string,offset) CHECK_AT((string),'\t',(offset)) + +#define IS_TAB(string) IS_TAB_AT((string),0) + +/* + * Check if the character at the specified position is blank (space or tab). + */ + +#define IS_BLANK_AT(string,offset) \ + (IS_SPACE_AT((string),(offset)) || IS_TAB_AT((string),(offset))) + +#define IS_BLANK(string) IS_BLANK_AT((string),0) + +/* + * Check if the character at the specified position is a line break. + */ + +#define IS_BREAK_AT(string,offset) \ + (CHECK_AT((string),'\r',(offset)) /* CR (#xD)*/ \ + || CHECK_AT((string),'\n',(offset)) /* LF (#xA) */ \ + || (CHECK_AT((string),'\xC2',(offset)) \ + && CHECK_AT((string),'\x85',(offset)+1)) /* NEL (#x85) */ \ + || (CHECK_AT((string),'\xE2',(offset)) \ + && CHECK_AT((string),'\x80',(offset)+1) \ + && CHECK_AT((string),'\xA8',(offset)+2)) /* LS (#x2028) */ \ + || (CHECK_AT((string),'\xE2',(offset)) \ + && CHECK_AT((string),'\x80',(offset)+1) \ + && CHECK_AT((string),'\xA9',(offset)+2))) /* PS (#x2029) */ + +#define IS_BREAK(string) IS_BREAK_AT((string),0) + +#define IS_CRLF_AT(string,offset) \ + (CHECK_AT((string),'\r',(offset)) && CHECK_AT((string),'\n',(offset)+1)) + +#define IS_CRLF(string) IS_CRLF_AT((string),0) + +/* + * Check if the character is a line break or NUL. + */ + +#define IS_BREAKZ_AT(string,offset) \ + (IS_BREAK_AT((string),(offset)) || IS_Z_AT((string),(offset))) + +#define IS_BREAKZ(string) IS_BREAKZ_AT((string),0) + +/* + * Check if the character is a line break, space, or NUL. + */ + +#define IS_SPACEZ_AT(string,offset) \ + (IS_SPACE_AT((string),(offset)) || IS_BREAKZ_AT((string),(offset))) + +#define IS_SPACEZ(string) IS_SPACEZ_AT((string),0) + +/* + * Check if the character is a line break, space, tab, or NUL. + */ + +#define IS_BLANKZ_AT(string,offset) \ + (IS_BLANK_AT((string),(offset)) || IS_BREAKZ_AT((string),(offset))) + +#define IS_BLANKZ(string) IS_BLANKZ_AT((string),0) + +/* + * Determine the width of the character. + */ + +#define WIDTH_AT(string,offset) \ + (((string).pointer[offset] & 0x80) == 0x00 ? 1 : \ + ((string).pointer[offset] & 0xE0) == 0xC0 ? 2 : \ + ((string).pointer[offset] & 0xF0) == 0xE0 ? 3 : \ + ((string).pointer[offset] & 0xF8) == 0xF0 ? 4 : 0) + +#define WIDTH(string) WIDTH_AT((string),0) + +/* + * Move the string pointer to the next character. + */ + +#define MOVE(string) ((string).pointer += WIDTH((string))) + +/* + * Copy a character and move the pointers of both strings. + */ + +#define COPY(string_a,string_b) \ + ((*(string_b).pointer & 0x80) == 0x00 ? \ + (*((string_a).pointer++) = *((string_b).pointer++)) : \ + (*(string_b).pointer & 0xE0) == 0xC0 ? \ + (*((string_a).pointer++) = *((string_b).pointer++), \ + *((string_a).pointer++) = *((string_b).pointer++)) : \ + (*(string_b).pointer & 0xF0) == 0xE0 ? \ + (*((string_a).pointer++) = *((string_b).pointer++), \ + *((string_a).pointer++) = *((string_b).pointer++), \ + *((string_a).pointer++) = *((string_b).pointer++)) : \ + (*(string_b).pointer & 0xF8) == 0xF0 ? \ + (*((string_a).pointer++) = *((string_b).pointer++), \ + *((string_a).pointer++) = *((string_b).pointer++), \ + *((string_a).pointer++) = *((string_b).pointer++), \ + *((string_a).pointer++) = *((string_b).pointer++)) : 0) + /* * Stack and queue management. */ From 1fb241f0d8b758e167790509d2fdcea940b5fcfb Mon Sep 17 00:00:00 2001 From: Kirill Simonov Date: Sat, 29 Jul 2006 22:10:26 +0000 Subject: [PATCH 25/73] Scalar writers are completed. --- src/api.c | 2 +- src/emitter.c | 431 ++++++++++++++++++++++++++++++++++++++++++++++++-- 2 files changed, 417 insertions(+), 16 deletions(-) diff --git a/src/api.c b/src/api.c index 4eaa6676..1b9b2a2e 100644 --- a/src/api.c +++ b/src/api.c @@ -629,7 +629,7 @@ yaml_check_utf8(yaml_char_t *start, size_t length) (octet & 0xE0) == 0xC0 ? 2 : (octet & 0xF0) == 0xE0 ? 3 : (octet & 0xF8) == 0xF0 ? 4 : 0; - value = (octet & 0x80) == 0x00 ? octet & 0x7F : + value = (octet & 0x80) == 0x00 ? octet & 0x7F : (octet & 0xE0) == 0xC0 ? octet & 0x1F : (octet & 0xF0) == 0xE0 ? octet & 0x0F : (octet & 0xF8) == 0xF0 ? octet & 0x07 : 0; diff --git a/src/emitter.c b/src/emitter.c index b8d3dc97..ad3fc39a 100644 --- a/src/emitter.c +++ b/src/emitter.c @@ -52,10 +52,14 @@ #define WRITE_BREAK(emitter,string) \ (FLUSH(emitter) \ - && (COPY(emitter->buffer,string), \ - emitter->column = 0, \ - emitter->line ++, \ - 1)) + && (CHECK(string,'\n') ? \ + (PUT_BREAK(emitter), \ + string.pointer ++, \ + 1) : \ + (COPY(emitter->buffer,string), \ + emitter->column = 0, \ + emitter->line ++, \ + 1))) /* * API functions. @@ -230,7 +234,7 @@ yaml_emitter_write_tag_handle(yaml_emitter_t *emitter, static int yaml_emitter_write_tag_content(yaml_emitter_t *emitter, - yaml_char_t *value, size_t length); + yaml_char_t *value, size_t length, int need_whitespace); static int yaml_emitter_write_plain_scalar(yaml_emitter_t *emitter, @@ -244,6 +248,10 @@ static int yaml_emitter_write_double_quoted_scalar(yaml_emitter_t *emitter, yaml_char_t *value, size_t length, int allow_breaks); +static int +yaml_emitter_determine_chomping(yaml_emitter_t *emitter, + yaml_string_t string); + static int yaml_emitter_write_literal_scalar(yaml_emitter_t *emitter, yaml_char_t *value, size_t length); @@ -601,7 +609,7 @@ yaml_emitter_emit_document_start(yaml_emitter_t *emitter, strlen((char *)tag_directive->handle))) return 0; if (!yaml_emitter_write_tag_content(emitter, tag_directive->prefix, - strlen((char *)tag_directive->prefix))) + strlen((char *)tag_directive->prefix), 1)) return 0; if (!yaml_emitter_write_indent(emitter)) return 0; @@ -1229,7 +1237,7 @@ yaml_emitter_process_tag(yaml_emitter_t *emitter) return 0; if (emitter->tag_data.suffix) { if (!yaml_emitter_write_tag_content(emitter, emitter->tag_data.suffix, - emitter->tag_data.suffix_length)) + emitter->tag_data.suffix_length, 0)) return 0; } } @@ -1238,7 +1246,7 @@ yaml_emitter_process_tag(yaml_emitter_t *emitter) if (!yaml_emitter_write_indicator(emitter, "!<", 1, 0, 0)) return 0; if (!yaml_emitter_write_tag_content(emitter, emitter->tag_data.suffix, - emitter->tag_data.suffix_length)) + emitter->tag_data.suffix_length, 0)) return 0; if (!yaml_emitter_write_indicator(emitter, ">", 0, 0, 0)) return 0; @@ -1786,6 +1794,10 @@ yaml_emitter_write_tag_handle(yaml_emitter_t *emitter, { yaml_string_t string = STRING(value, length); + if (!emitter->whitespace) { + if (!PUT(emitter, ' ')) return 0; + } + while (string.pointer != string.end) { if (!WRITE(emitter, string)) return 0; } @@ -1798,43 +1810,432 @@ yaml_emitter_write_tag_handle(yaml_emitter_t *emitter, static int yaml_emitter_write_tag_content(yaml_emitter_t *emitter, - yaml_char_t *value, size_t length) + yaml_char_t *value, size_t length, + int need_whitespace) { - return 0; + yaml_string_t string = STRING(value, length); + + if (need_whitespace && !emitter->whitespace) { + if (!PUT(emitter, ' ')) return 0; + } + + while (string.pointer != string.end) { + if (IS_ALPHA(string) + || CHECK(string, ';') || CHECK(string, '/') + || CHECK(string, '?') || CHECK(string, ':') + || CHECK(string, '@') || CHECK(string, '&') + || CHECK(string, '=') || CHECK(string, '+') + || CHECK(string, '$') || CHECK(string, ',') + || CHECK(string, '_') || CHECK(string, '.') + || CHECK(string, '~') || CHECK(string, '*') + || CHECK(string, '\'') || CHECK(string, '(') + || CHECK(string, ')') || CHECK(string, '[') + || CHECK(string, ']')) { + if (!WRITE(emitter, string)) return 0; + } + else { + int width = WIDTH(string); + unsigned int value; + while (width --) { + value = *(string.pointer++); + if (!PUT(emitter, '%')) return 0; + if (!PUT(emitter, (value >> 8) + + ((value >> 8) < 10 ? '0' : 'A' - 10))) + return 0; + if (!PUT(emitter, (value & 0x0F) + + ((value & 0x0F) < 10 ? '0' : 'A' - 10))) + return 0; + } + } + } + + emitter->whitespace = 0; + emitter->indention = 0; + + return 1; } static int yaml_emitter_write_plain_scalar(yaml_emitter_t *emitter, yaml_char_t *value, size_t length, int allow_breaks) { - return 0; + yaml_string_t string = STRING(value, length); + int spaces = 0; + int breaks = 0; + + if (!emitter->whitespace) { + if (!PUT(emitter, ' ')) return 0; + } + + while (string.pointer != string.end) + { + if (IS_SPACE(string)) + { + if (allow_breaks && !spaces + && emitter->column > emitter->best_width + && !IS_SPACE_AT(string, 1)) { + if (!yaml_emitter_write_indent(emitter)) return 0; + MOVE(string); + } + else { + if (!WRITE(emitter, string)) return 0; + } + spaces = 1; + } + else if (IS_BREAK(string)) + { + if (!breaks && CHECK(string, '\n')) { + if (!PUT_BREAK(emitter)) return 0; + } + if (!WRITE_BREAK(emitter, string)) return 0; + emitter->indention = 1; + breaks = 1; + } + else + { + if (breaks) { + if (!yaml_emitter_write_indent(emitter)) return 0; + } + if (!WRITE(emitter, string)) return 0; + emitter->indention = 0; + spaces = 0; + breaks = 0; + } + } + + emitter->whitespace = 0; + emitter->indention = 0; + + return 1; } static int yaml_emitter_write_single_quoted_scalar(yaml_emitter_t *emitter, yaml_char_t *value, size_t length, int allow_breaks) { - return 0; + yaml_string_t string = STRING(value, length); + int spaces = 0; + int breaks = 0; + + if (!yaml_emitter_write_indicator(emitter, "'", 1, 0, 0)) + return 0; + + while (string.pointer != string.end) + { + if (IS_SPACE(string)) + { + if (allow_breaks && !spaces + && emitter->column > emitter->best_width + && string.pointer != string.start + && string.pointer != string.end - 1 + && !IS_SPACE_AT(string, 1)) { + if (!yaml_emitter_write_indent(emitter)) return 0; + MOVE(string); + } + else { + if (!WRITE(emitter, string)) return 0; + } + spaces = 1; + } + else if (IS_BREAK(string)) + { + if (!breaks && CHECK(string, '\n')) { + if (!PUT_BREAK(emitter)) return 0; + } + if (!WRITE_BREAK(emitter, string)) return 0; + emitter->indention = 1; + breaks = 1; + } + else + { + if (breaks) { + if (!yaml_emitter_write_indent(emitter)) return 0; + } + if (CHECK(string, '\'')) { + if (!PUT(emitter, '\'')) return 0; + } + if (!WRITE(emitter, string)) return 0; + emitter->indention = 0; + spaces = 0; + breaks = 0; + } + } + + if (!yaml_emitter_write_indicator(emitter, "'", 0, 0, 0)) + return 0; + + emitter->whitespace = 0; + emitter->indention = 0; + + return 1; } static int yaml_emitter_write_double_quoted_scalar(yaml_emitter_t *emitter, yaml_char_t *value, size_t length, int allow_breaks) { - return 0; + yaml_string_t string = STRING(value, length); + int spaces = 0; + + if (!yaml_emitter_write_indicator(emitter, "\"", 1, 0, 0)) + return 0; + + while (string.pointer != string.end) + { + if (!IS_PRINTABLE(string) || (!emitter->unicode && !IS_ASCII(string)) + || IS_BOM(string) || IS_BREAK(string) + || CHECK(string, '"') || CHECK(string, '\\')) + { + unsigned char octet; + unsigned int width; + unsigned int value; + int k; + + octet = string.pointer[0]; + width = (octet & 0x80) == 0x00 ? 1 : + (octet & 0xE0) == 0xC0 ? 2 : + (octet & 0xF0) == 0xE0 ? 3 : + (octet & 0xF8) == 0xF0 ? 4 : 0; + value = (octet & 0x80) == 0x00 ? octet & 0x7F : + (octet & 0xE0) == 0xC0 ? octet & 0x1F : + (octet & 0xF0) == 0xE0 ? octet & 0x0F : + (octet & 0xF8) == 0xF0 ? octet & 0x07 : 0; + for (k = 1; k < width; k ++) { + octet = string.pointer[k]; + value = (value << 6) + (octet & 0x3F); + } + string.pointer += width; + + if (!PUT(emitter, '\\')) return 0; + + switch (value) + { + case 0x00: + if (!PUT(emitter, '0')) return 0; + break; + + case 0x07: + if (!PUT(emitter, 'a')) return 0; + break; + + case 0x08: + if (!PUT(emitter, 'b')) return 0; + break; + + case 0x09: + if (!PUT(emitter, 't')) return 0; + break; + + case 0x0A: + if (!PUT(emitter, 'n')) return 0; + break; + + case 0x0B: + if (!PUT(emitter, 'v')) return 0; + break; + + case 0x0C: + if (!PUT(emitter, 'f')) return 0; + break; + + case 0x0D: + if (!PUT(emitter, 'r')) return 0; + break; + + case 0x1B: + if (!PUT(emitter, 'e')) return 0; + break; + + case 0x22: + if (!PUT(emitter, '\"')) return 0; + break; + + case 0x5C: + if (!PUT(emitter, '\\')) return 0; + break; + + case 0x85: + if (!PUT(emitter, 'N')) return 0; + break; + + case 0xA0: + if (!PUT(emitter, '_')) return 0; + break; + + case 0x2028: + if (!PUT(emitter, 'L')) return 0; + break; + + case 0x2029: + if (!PUT(emitter, 'P')) return 0; + break; + + default: + if (value <= 0xFF) { + if (!PUT(emitter, 'x')) return 0; + width = 2; + } + else if (value <= 0xFFFF) { + if (!PUT(emitter, 'u')) return 0; + width = 4; + } + else { + if (!PUT(emitter, 'U')) return 0; + width = 8; + } + for (k = width*4; k >= 0; k -= 4) { + if (!PUT(emitter, (value >> k) & 0x0F)) return 0; + } + } + spaces = 0; + } + else if (IS_SPACE(string)) + { + if (allow_breaks && !spaces + && emitter->column > emitter->best_width + && string.pointer != string.start + && string.pointer != string.end - 1) { + if (!yaml_emitter_write_indent(emitter)) return 0; + if (IS_SPACE_AT(string, 1)) { + if (!PUT(emitter, '\\')) return 0; + } + MOVE(string); + } + else { + if (!WRITE(emitter, string)) return 0; + } + spaces = 1; + } + else + { + if (!WRITE(emitter, string)) return 0; + spaces = 0; + } + } + + if (!yaml_emitter_write_indicator(emitter, "\"", 0, 0, 0)) + return 0; + + emitter->whitespace = 0; + emitter->indention = 0; + + return 1; +} + +static int +yaml_emitter_determine_chomping(yaml_emitter_t *emitter, + yaml_string_t string) +{ + string.pointer = string.end; + if (string.start == string.pointer) + return -1; + while ((string.pointer[-1] & 0xC0) == 0x80) { + string.pointer --; + } + if (!IS_BREAK(string)) + return -1; + if (string.start == string.pointer) + return 0; + while ((string.pointer[-1] & 0xC0) == 0x80) { + string.pointer --; + } + if (!IS_BREAK(string)) + return 0; + return +1; + } static int yaml_emitter_write_literal_scalar(yaml_emitter_t *emitter, yaml_char_t *value, size_t length) { - return 0; + yaml_string_t string = STRING(value, length); + int chomp = yaml_emitter_determine_chomping(emitter, string); + int breaks = 0; + + if (!yaml_emitter_write_indicator(emitter, + chomp == -1 ? "|-" : chomp == +1 ? "|+" : "|", 1, 0, 0)) + return 0; + if (!yaml_emitter_write_indent(emitter)) + return 0; + + while (string.pointer != string.end) + { + if (IS_BREAK(string)) + { + if (!WRITE_BREAK(emitter, string)) return 0; + emitter->indention = 1; + breaks = 1; + } + else + { + if (breaks) { + if (!yaml_emitter_write_indent(emitter)) return 0; + } + if (!WRITE(emitter, string)) return 0; + emitter->indention = 0; + breaks = 0; + } + } + + if (!yaml_emitter_write_indent(emitter)) return 0; + + return 1; } static int yaml_emitter_write_folded_scalar(yaml_emitter_t *emitter, yaml_char_t *value, size_t length) { - return 0; + yaml_string_t string = STRING(value, length); + int chomp = yaml_emitter_determine_chomping(emitter, string); + int breaks = 0; + int leading_spaces = 1; + + if (!yaml_emitter_write_indicator(emitter, + chomp == -1 ? ">-" : chomp == +1 ? ">+" : ">", 1, 0, 0)) + return 0; + if (!yaml_emitter_write_indent(emitter)) + return 0; + + while (string.pointer != string.end) + { + if (IS_BREAK(string)) + { + if (!breaks && !leading_spaces && CHECK(string, '\n')) { + int k = 0; + while (IS_BREAK_AT(string, k)) { + k += WIDTH_AT(string, k); + } + if (!IS_BLANK_AT(string, k)) { + if (!PUT_BREAK(emitter)) return 0; + } + } + if (!WRITE_BREAK(emitter, string)) return 0; + emitter->indention = 1; + breaks = 1; + } + else + { + if (breaks) { + if (!yaml_emitter_write_indent(emitter)) return 0; + leading_spaces = IS_BLANK(string); + } + if (!breaks && IS_SPACE(string) && !IS_SPACE_AT(string, 1) + && emitter->column > emitter->best_width) { + if (!yaml_emitter_write_indent(emitter)) return 0; + MOVE(string); + } + else { + if (!WRITE(emitter, string)) return 0; + } + emitter->indention = 0; + breaks = 0; + } + } + + if (!yaml_emitter_write_indent(emitter)) return 0; + + return 1; } From 0ff07a70e411127c4533350fc1852d5e85f41839 Mon Sep 17 00:00:00 2001 From: Kirill Simonov Date: Sun, 30 Jul 2006 12:49:17 +0000 Subject: [PATCH 26/73] Fix Emitter bugs and leaks. --- src/api.c | 3 +- src/emitter.c | 75 +++++++++++++++++++++++++++++++++++------------ src/parser.c | 2 +- tests/Makefile.am | 2 +- 4 files changed, 60 insertions(+), 22 deletions(-) diff --git a/src/api.c b/src/api.c index 1b9b2a2e..dc7611d2 100644 --- a/src/api.c +++ b/src/api.c @@ -391,6 +391,7 @@ yaml_emitter_delete(yaml_emitter_t *emitter) while (!QUEUE_EMPTY(emitter, emitter->events)) { yaml_event_delete(&DEQUEUE(emitter, emitter->events)); } + QUEUE_DEL(emitter, emitter->events); STACK_DEL(emitter, emitter->indents); while (!STACK_EMPTY(empty, emitter->tag_directives)) { yaml_tag_directive_t tag_directive = POP(emitter, emitter->tag_directives); @@ -744,7 +745,7 @@ yaml_document_start_event_initialize(yaml_event_t *event, } DOCUMENT_START_EVENT_INIT(*event, version_directive_copy, - tag_directives_copy.start, tag_directives_copy.end, + tag_directives_copy.start, tag_directives_copy.top, implicit, mark, mark); return 1; diff --git a/src/emitter.c b/src/emitter.c index ad3fc39a..28eadcc6 100644 --- a/src/emitter.c +++ b/src/emitter.c @@ -290,7 +290,7 @@ yaml_emitter_emit(yaml_emitter_t *emitter, yaml_event_t *event) return 0; if (!yaml_emitter_state_machine(emitter, emitter->events.head)) return 0; - DEQUEUE(emitter, emitter->events); + yaml_event_delete(&DEQUEUE(emitter, emitter->events)); } return 1; @@ -687,6 +687,13 @@ yaml_emitter_emit_document_end(yaml_emitter_t *emitter, emitter->state = YAML_EMIT_DOCUMENT_START_STATE; + while (!STACK_EMPTY(emitter, emitter->tag_directives)) { + yaml_tag_directive_t tag_directive = POP(emitter, + emitter->tag_directives); + yaml_free(tag_directive.handle); + yaml_free(tag_directive.prefix); + } + return 1; } @@ -695,6 +702,7 @@ yaml_emitter_emit_document_end(yaml_emitter_t *emitter, } /* + * * Expect a flow item node. */ @@ -728,11 +736,16 @@ yaml_emitter_emit_flow_sequence_item(yaml_emitter_t *emitter, return 1; } + if (!first) { + if (!yaml_emitter_write_indicator(emitter, ",", 0, 0, 0)) + return 0; + } + if (emitter->canonical || emitter->column > emitter->best_width) { if (!yaml_emitter_write_indent(emitter)) return 0; } - if (PUSH(emitter, emitter->states, YAML_EMIT_FLOW_SEQUENCE_ITEM_STATE)) + if (!PUSH(emitter, emitter->states, YAML_EMIT_FLOW_SEQUENCE_ITEM_STATE)) return 0; return yaml_emitter_emit_node(emitter, event, 0, 1, 0, 0); @@ -1178,7 +1191,8 @@ yaml_emitter_select_scalar_style(yaml_emitter_t *emitter, yaml_event_t *event) if (style == YAML_LITERAL_SCALAR_STYLE || style == YAML_FOLDED_SCALAR_STYLE) { - if (!emitter->scalar_data.block_allowed) + if (!emitter->scalar_data.block_allowed + || emitter->flow_level || emitter->simple_key_context) style = YAML_DOUBLE_QUOTED_SCALAR_STYLE; } @@ -1340,7 +1354,7 @@ yaml_emitter_analyze_tag_directive(yaml_emitter_t *emitter, handle.pointer ++; - while (handle.pointer != handle.end-1) { + while (handle.pointer < handle.end-1) { if (!IS_ALPHA(handle)) { return yaml_emitter_set_emitter_error(emitter, "tag handle must contain alphanumerical characters only"); @@ -1380,6 +1394,12 @@ yaml_emitter_analyze_anchor(yaml_emitter_t *emitter, } MOVE(string); } + + emitter->anchor_data.anchor = string.start; + emitter->anchor_data.anchor_length = string.end - string.start; + emitter->anchor_data.alias = alias; + + return 1; } /* @@ -1399,7 +1419,7 @@ yaml_emitter_analyze_tag(yaml_emitter_t *emitter, } for (tag_directive = emitter->tag_directives.start; - tag_directive != emitter->tag_directives.end; tag_directive ++) { + tag_directive != emitter->tag_directives.top; tag_directive ++) { size_t prefix_length = strlen((char *)tag_directive->prefix); if (prefix_length < (string.end - string.start) && strncmp((char *)tag_directive->prefix, (char *)string.start, @@ -1586,6 +1606,25 @@ yaml_emitter_analyze_scalar(yaml_emitter_t *emitter, spaces = breaks = mixed = leading = 0; } + if ((spaces || breaks) && string.pointer == string.end-1) + { + if (spaces && breaks) { + mixed_breaks_spaces = 1; + } + else if (spaces) { + if (leading) { + leading_spaces = 1; + } + trailing_spaces = 1; + } + else if (breaks) { + if (leading) { + leading_breaks = 1; + } + trailing_breaks = 1; + } + } + preceeded_by_space = IS_BLANKZ(string); MOVE(string); if (string.pointer != string.end) { @@ -1839,8 +1878,8 @@ yaml_emitter_write_tag_content(yaml_emitter_t *emitter, while (width --) { value = *(string.pointer++); if (!PUT(emitter, '%')) return 0; - if (!PUT(emitter, (value >> 8) - + ((value >> 8) < 10 ? '0' : 'A' - 10))) + if (!PUT(emitter, (value >> 4) + + ((value >> 4) < 10 ? '0' : 'A' - 10))) return 0; if (!PUT(emitter, (value & 0x0F) + ((value & 0x0F) < 10 ? '0' : 'A' - 10))) @@ -2083,8 +2122,10 @@ yaml_emitter_write_double_quoted_scalar(yaml_emitter_t *emitter, if (!PUT(emitter, 'U')) return 0; width = 8; } - for (k = width*4; k >= 0; k -= 4) { - if (!PUT(emitter, (value >> k) & 0x0F)) return 0; + for (k = (width-1)*4; k >= 0; k -= 4) { + int digit = (value >> k) & 0x0F; + if (!PUT(emitter, digit + (digit < 10 ? '0' : 'A'-10))) + return 0; } } spaces = 0; @@ -2129,16 +2170,16 @@ yaml_emitter_determine_chomping(yaml_emitter_t *emitter, string.pointer = string.end; if (string.start == string.pointer) return -1; - while ((string.pointer[-1] & 0xC0) == 0x80) { + do { string.pointer --; - } + } while ((*string.pointer & 0xC0) == 0x80); if (!IS_BREAK(string)) return -1; if (string.start == string.pointer) return 0; - while ((string.pointer[-1] & 0xC0) == 0x80) { + do { string.pointer --; - } + } while ((*string.pointer & 0xC0) == 0x80); if (!IS_BREAK(string)) return 0; return +1; @@ -2178,8 +2219,6 @@ yaml_emitter_write_literal_scalar(yaml_emitter_t *emitter, } } - if (!yaml_emitter_write_indent(emitter)) return 0; - return 1; } @@ -2189,8 +2228,8 @@ yaml_emitter_write_folded_scalar(yaml_emitter_t *emitter, { yaml_string_t string = STRING(value, length); int chomp = yaml_emitter_determine_chomping(emitter, string); - int breaks = 0; - int leading_spaces = 1; + int breaks = 1; + int leading_spaces = 0; if (!yaml_emitter_write_indicator(emitter, chomp == -1 ? ">-" : chomp == +1 ? ">+" : ">", 1, 0, 0)) @@ -2234,8 +2273,6 @@ yaml_emitter_write_folded_scalar(yaml_emitter_t *emitter, } } - if (!yaml_emitter_write_indent(emitter)) return 0; - return 1; } diff --git a/src/parser.c b/src/parser.c index 0b5b5437..c9f67af8 100644 --- a/src/parser.c +++ b/src/parser.c @@ -1252,7 +1252,7 @@ yaml_parser_process_directives(yaml_parser_t *parser, goto error; } if (token->data.version_directive.major != 1 - && token->data.version_directive.minor != 1) { + || token->data.version_directive.minor != 1) { yaml_parser_set_parser_error(parser, "found incompatible YAML document", token->start_mark); goto error; diff --git a/tests/Makefile.am b/tests/Makefile.am index e2f9e9c0..643e1eb2 100644 --- a/tests/Makefile.am +++ b/tests/Makefile.am @@ -1,4 +1,4 @@ AM_CPPFLAGS = -I$(top_srcdir)/include LDADD = $(top_builddir)/src/libyaml.la TESTS = test-version test-reader -check_PROGRAMS = test-version test-reader run-scanner run-parser +check_PROGRAMS = test-version test-reader run-scanner run-parser run-emitter From 7a5632f84062918fffaa15a0952b93f583e18a9e Mon Sep 17 00:00:00 2001 From: Kirill Simonov Date: Sun, 30 Jul 2006 12:50:15 +0000 Subject: [PATCH 27/73] Add the run-emitter test. --- tests/run-emitter.c | 323 ++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 323 insertions(+) create mode 100644 tests/run-emitter.c diff --git a/tests/run-emitter.c b/tests/run-emitter.c new file mode 100644 index 00000000..3008ab54 --- /dev/null +++ b/tests/run-emitter.c @@ -0,0 +1,323 @@ +#include + +#include +#include +#include +#include + +#define BUFFER_SIZE 65536 +#define MAX_EVENTS 1024 + +int copy_event(yaml_event_t *event_to, yaml_event_t *event_from) +{ + switch (event_from->type) + { + case YAML_STREAM_START_EVENT: + return yaml_stream_start_event_initialize(event_to, + event_from->data.stream_start.encoding); + + case YAML_STREAM_END_EVENT: + return yaml_stream_end_event_initialize(event_to); + + case YAML_DOCUMENT_START_EVENT: + return yaml_document_start_event_initialize(event_to, + event_from->data.document_start.version_directive, + event_from->data.document_start.tag_directives.start, + event_from->data.document_start.tag_directives.end, + event_from->data.document_start.implicit); + + case YAML_DOCUMENT_END_EVENT: + return yaml_document_end_event_initialize(event_to, + event_from->data.document_end.implicit); + + case YAML_ALIAS_EVENT: + return yaml_alias_event_initialize(event_to, + event_from->data.alias.anchor); + + case YAML_SCALAR_EVENT: + return yaml_scalar_event_initialize(event_to, + event_from->data.scalar.anchor, + event_from->data.scalar.tag, + event_from->data.scalar.value, + event_from->data.scalar.length, + event_from->data.scalar.plain_implicit, + event_from->data.scalar.quoted_implicit, + event_from->data.scalar.style); + + case YAML_SEQUENCE_START_EVENT: + return yaml_sequence_start_event_initialize(event_to, + event_from->data.sequence_start.anchor, + event_from->data.sequence_start.tag, + event_from->data.sequence_start.implicit, + event_from->data.sequence_start.style); + + case YAML_SEQUENCE_END_EVENT: + return yaml_sequence_end_event_initialize(event_to); + + case YAML_MAPPING_START_EVENT: + return yaml_mapping_start_event_initialize(event_to, + event_from->data.mapping_start.anchor, + event_from->data.mapping_start.tag, + event_from->data.mapping_start.implicit, + event_from->data.mapping_start.style); + + case YAML_MAPPING_END_EVENT: + return yaml_mapping_end_event_initialize(event_to); + + default: + assert(1); + } + + return 0; +} + +int compare_events(yaml_event_t *event1, yaml_event_t *event2) +{ + int k; + + if (event1->type != event2->type) + return 0; + + switch (event1->type) + { + case YAML_STREAM_START_EVENT: + return 1; + /* return (event1->data.stream_start.encoding == + event2->data.stream_start.encoding); */ + + case YAML_DOCUMENT_START_EVENT: + if ((event1->data.document_start.version_directive && !event2->data.document_start.version_directive) + || (!event1->data.document_start.version_directive && event2->data.document_start.version_directive) + || (event1->data.document_start.version_directive && event2->data.document_start.version_directive + && (event1->data.document_start.version_directive->major != event2->data.document_start.version_directive->major + || event1->data.document_start.version_directive->minor != event2->data.document_start.version_directive->minor))) + return 0; + if ((event1->data.document_start.tag_directives.end - event1->data.document_start.tag_directives.start) != + (event2->data.document_start.tag_directives.end - event2->data.document_start.tag_directives.start)) + return 0; + for (k = 0; k < (event1->data.document_start.tag_directives.end - event1->data.document_start.tag_directives.start); k ++) { + if ((strcmp((char *)event1->data.document_start.tag_directives.start[k].handle, + (char *)event2->data.document_start.tag_directives.start[k].handle) != 0) + || (strcmp((char *)event1->data.document_start.tag_directives.start[k].prefix, + (char *)event2->data.document_start.tag_directives.start[k].prefix) != 0)) + return 0; + } + /* if (event1->data.document_start.implicit != event2->data.document_start.implicit) + return 0; */ + return 1; + + case YAML_DOCUMENT_END_EVENT: + return 1; + /* return (event1->data.document_end.implicit == + event2->data.document_end.implicit); */ + + case YAML_ALIAS_EVENT: + return (strcmp((char *)event1->data.alias.anchor, + (char *)event2->data.alias.anchor) == 0); + + case YAML_SCALAR_EVENT: + if ((event1->data.scalar.anchor && !event2->data.scalar.anchor) + || (!event1->data.scalar.anchor && event2->data.scalar.anchor) + || (event1->data.scalar.anchor && event2->data.scalar.anchor + && strcmp((char *)event1->data.scalar.anchor, + (char *)event2->data.scalar.anchor) != 0)) + return 0; + if ((event1->data.scalar.tag && !event2->data.scalar.tag + && strcmp((char *)event1->data.scalar.tag, "!") != 0) + || (!event1->data.scalar.tag && event2->data.scalar.tag + && strcmp((char *)event2->data.scalar.tag, "!") != 0) + || (event1->data.scalar.tag && event2->data.scalar.tag + && strcmp((char *)event1->data.scalar.tag, + (char *)event2->data.scalar.tag) != 0)) + return 0; + if ((event1->data.scalar.length != event2->data.scalar.length) + || memcmp(event1->data.scalar.value, event2->data.scalar.value, + event1->data.scalar.length) != 0) + return 0; + if ((event1->data.scalar.plain_implicit != event2->data.scalar.plain_implicit) + || (event2->data.scalar.quoted_implicit != event2->data.scalar.quoted_implicit) + /* || (event2->data.scalar.style != event2->data.scalar.style) */) + return 0; + return 1; + + case YAML_SEQUENCE_START_EVENT: + if ((event1->data.sequence_start.anchor && !event2->data.sequence_start.anchor) + || (!event1->data.sequence_start.anchor && event2->data.sequence_start.anchor) + || (event1->data.sequence_start.anchor && event2->data.sequence_start.anchor + && strcmp((char *)event1->data.sequence_start.anchor, + (char *)event2->data.sequence_start.anchor) != 0)) + return 0; + if ((event1->data.sequence_start.tag && !event2->data.sequence_start.tag) + || (!event1->data.sequence_start.tag && event2->data.sequence_start.tag) + || (event1->data.sequence_start.tag && event2->data.sequence_start.tag + && strcmp((char *)event1->data.sequence_start.tag, + (char *)event2->data.sequence_start.tag) != 0)) + return 0; + if ((event1->data.sequence_start.implicit != event2->data.sequence_start.implicit) + /* || (event2->data.sequence_start.style != event2->data.sequence_start.style) */) + return 0; + return 1; + + case YAML_MAPPING_START_EVENT: + if ((event1->data.mapping_start.anchor && !event2->data.mapping_start.anchor) + || (!event1->data.mapping_start.anchor && event2->data.mapping_start.anchor) + || (event1->data.mapping_start.anchor && event2->data.mapping_start.anchor + && strcmp((char *)event1->data.mapping_start.anchor, + (char *)event2->data.mapping_start.anchor) != 0)) + return 0; + if ((event1->data.mapping_start.tag && !event2->data.mapping_start.tag) + || (!event1->data.mapping_start.tag && event2->data.mapping_start.tag) + || (event1->data.mapping_start.tag && event2->data.mapping_start.tag + && strcmp((char *)event1->data.mapping_start.tag, + (char *)event2->data.mapping_start.tag) != 0)) + return 0; + if ((event1->data.mapping_start.implicit != event2->data.mapping_start.implicit) + /* || (event2->data.mapping_start.style != event2->data.mapping_start.style) */) + return 0; + return 1; + + default: + return 1; + } +} + +int print_output(char *name, unsigned char *buffer, size_t size, int count) +{ + FILE *file; + char data[BUFFER_SIZE]; + size_t data_size = 1; + size_t total_size = 0; + if (count >= 0) { + printf("FAILED (at the event #%d)\nSOURCE:\n", count+1); + } + file = fopen(name, "rb"); + assert(file); + while (data_size > 0) { + data_size = fread(data, 1, BUFFER_SIZE, file); + assert(!ferror(file)); + if (!data_size) break; + assert(fwrite(data, 1, data_size, stdout) == data_size); + total_size += data_size; + if (feof(file)) break; + } + fclose(file); + printf("#### (length: %d)\n", total_size); + printf("OUTPUT:\n%s#### (length: %d)\n", buffer, size); + return 0; +} + +int +main(int argc, char *argv[]) +{ + int number; + int canonical = 0; + int unicode = 0; + + number = 1; + while (number < argc) { + if (strcmp(argv[number], "-c") == 0) { + canonical = 1; + } + else if (strcmp(argv[number], "-u") == 0) { + unicode = 1; + } + else if (argv[number][0] == '-') { + printf("Unknown option: '%s'\n", argv[number]); + return 0; + } + if (argv[number][0] == '-') { + if (number < argc-1) { + memmove(argv+number, argv+number+1, (argc-number-1)*sizeof(char *)); + } + argc --; + } + else { + number ++; + } + } + + if (argc < 2) { + printf("Usage: %s [-c] [-u] file1.yaml ...\n", argv[0]); + return 0; + } + + for (number = 1; number < argc; number ++) + { + FILE *file; + yaml_parser_t parser; + yaml_emitter_t emitter; + yaml_event_t event; + unsigned char buffer[BUFFER_SIZE]; + size_t written = 0; + yaml_event_t events[MAX_EVENTS]; + size_t event_number = 0; + int done = 0; + int count = 0; + int error = 0; + int k; + memset(buffer, 0, BUFFER_SIZE); + memset(events, 0, MAX_EVENTS); + + printf("[%d] Parsing, emitting, and parsing again '%s': ", number, argv[number]); + fflush(stdout); + + file = fopen(argv[number], "rb"); + assert(file); + + assert(yaml_parser_initialize(&parser)); + yaml_parser_set_input_file(&parser, file); + assert(yaml_emitter_initialize(&emitter)); + if (canonical) { + yaml_emitter_set_canonical(&emitter, 1); + } + if (unicode) { + yaml_emitter_set_unicode(&emitter, 1); + } + yaml_emitter_set_output_string(&emitter, buffer, BUFFER_SIZE, &written); + + while (!done) + { + if (!yaml_parser_parse(&parser, &event)) { + error = 1; + break; + } + + done = (event.type == YAML_STREAM_END_EVENT); + assert(event_number < MAX_EVENTS); + assert(copy_event(&(events[event_number++]), &event)); + assert(yaml_emitter_emit(&emitter, &event) || + (yaml_emitter_flush(&emitter) && print_output(argv[number], buffer, written, count))); + count ++; + } + + yaml_parser_delete(&parser); + assert(!fclose(file)); + yaml_emitter_delete(&emitter); + + if (!error) + { + count = done = 0; + assert(yaml_parser_initialize(&parser)); + yaml_parser_set_input_string(&parser, buffer, written); + + while (!done) + { + assert(yaml_parser_parse(&parser, &event) || print_output(argv[number], buffer, written, count)); + done = (event.type == YAML_STREAM_END_EVENT); + assert(compare_events(events+count, &event) || print_output(argv[number], buffer, written, count)); + yaml_event_delete(&event); + count ++; + } + yaml_parser_delete(&parser); + } + + for (k = 0; k < event_number; k ++) { + yaml_event_delete(events+k); + } + + printf("PASSED (length: %d)\n", written); + print_output(argv[number], buffer, written, -1); + } + + return 0; +} From 0e237f449b6e916ca4a09696f0bd276a9ce78935 Mon Sep 17 00:00:00 2001 From: Kirill Simonov Date: Mon, 31 Jul 2006 22:24:32 +0000 Subject: [PATCH 28/73] Add two examples and prepare the build system for distribution. --- include/yaml.h | 3 +- src/Makefile.am | 2 +- src/api.c | 7 +- tests/Makefile.am | 4 +- tests/example-deconstructor.c | 1130 +++++++++++++++++++++++++++++++++ tests/example-reformatter.c | 202 ++++++ 6 files changed, 1343 insertions(+), 5 deletions(-) create mode 100644 tests/example-deconstructor.c create mode 100644 tests/example-reformatter.c diff --git a/include/yaml.h b/include/yaml.h index 0408a67f..47de95d1 100644 --- a/include/yaml.h +++ b/include/yaml.h @@ -417,6 +417,7 @@ typedef struct { * Create the STREAM-START event. * * @param[in] event An empty event object. + * @param[in] encoding The stream encoding. * * @returns @c 1 if the function succeeded, @c 0 on error. */ @@ -508,7 +509,7 @@ yaml_alias_event_initialize(yaml_event_t *event, yaml_char_t *anchor); YAML_DECLARE(int) yaml_scalar_event_initialize(yaml_event_t *event, yaml_char_t *anchor, yaml_char_t *tag, - yaml_char_t *value, size_t length, + yaml_char_t *value, int length, int plain_implicit, int quoted_implicit, yaml_scalar_style_t style); diff --git a/src/Makefile.am b/src/Makefile.am index cc815e73..f9cb7a28 100644 --- a/src/Makefile.am +++ b/src/Makefile.am @@ -1,4 +1,4 @@ AM_CPPFLAGS = -I$(top_srcdir)/include lib_LTLIBRARIES = libyaml.la -libyaml_la_SOURCES = api.c reader.c scanner.c parser.c writer.c emitter.c +libyaml_la_SOURCES = yaml_private.h api.c reader.c scanner.c parser.c writer.c emitter.c libyaml_la_LDFLAGS = -release $(YAML_LT_RELEASE) -version-info $(YAML_LT_CURRENT):$(YAML_LT_REVISION):$(YAML_LT_AGE) diff --git a/src/api.c b/src/api.c index dc7611d2..83ca720c 100644 --- a/src/api.c +++ b/src/api.c @@ -811,7 +811,7 @@ yaml_alias_event_initialize(yaml_event_t *event, yaml_char_t *anchor) YAML_DECLARE(int) yaml_scalar_event_initialize(yaml_event_t *event, yaml_char_t *anchor, yaml_char_t *tag, - yaml_char_t *value, size_t length, + yaml_char_t *value, int length, int plain_implicit, int quoted_implicit, yaml_scalar_style_t style) { @@ -823,7 +823,6 @@ yaml_scalar_event_initialize(yaml_event_t *event, assert(event); /* Non-NULL event object is expected. */ assert(value); /* Non-NULL anchor is expected. */ - if (anchor) { if (!yaml_check_utf8(anchor, strlen((char *)anchor))) goto error; anchor_copy = yaml_strdup(anchor); @@ -836,6 +835,10 @@ yaml_scalar_event_initialize(yaml_event_t *event, if (!tag_copy) goto error; } + if (length < 0) { + length = strlen((char *)value); + } + if (!yaml_check_utf8(value, length)) goto error; value_copy = yaml_malloc(length+1); if (!value_copy) goto error; diff --git a/tests/Makefile.am b/tests/Makefile.am index 643e1eb2..e7880d65 100644 --- a/tests/Makefile.am +++ b/tests/Makefile.am @@ -1,4 +1,6 @@ AM_CPPFLAGS = -I$(top_srcdir)/include +AM_CFLAGS = -Wno-pointer-sign LDADD = $(top_builddir)/src/libyaml.la TESTS = test-version test-reader -check_PROGRAMS = test-version test-reader run-scanner run-parser run-emitter +check_PROGRAMS = test-version test-reader +noinst_PROGRAMS = run-scanner run-parser run-emitter example-reformatter example-deconstructor diff --git a/tests/example-deconstructor.c b/tests/example-deconstructor.c new file mode 100644 index 00000000..d41b5ae0 --- /dev/null +++ b/tests/example-deconstructor.c @@ -0,0 +1,1130 @@ + +#include + +#include +#include + +int +main(int argc, char *argv[]) +{ + int help = 0; + int canonical = 0; + int unicode = 0; + int k; + int done = 0; + + yaml_parser_t parser; + yaml_emitter_t emitter; + yaml_event_t input_event; + yaml_event_t output_event; + + /* Clear the objects. */ + + memset(&parser, 0, sizeof(parser)); + memset(&emitter, 0, sizeof(emitter)); + memset(&input_event, 0, sizeof(input_event)); + memset(&output_event, 0, sizeof(output_event)); + + /* Analyze command line options. */ + + for (k = 1; k < argc; k ++) + { + if (strcmp(argv[k], "-h") == 0 + || strcmp(argv[k], "--help") == 0) { + help = 1; + } + + else if (strcmp(argv[k], "-c") == 0 + || strcmp(argv[k], "--canonical") == 0) { + canonical = 1; + } + + else if (strcmp(argv[k], "-u") == 0 + || strcmp(argv[k], "--unicode") == 0) { + unicode = 1; + } + + else { + fprintf(stderr, "Unrecognized option: %s\n" + "Try `%s --help` for more information.\n", + argv[k], argv[0]); + return 1; + } + } + + /* Display the help string. */ + + if (help) + { + printf("%s major); + if (!yaml_scalar_event_initialize(&output_event, + NULL, "tag:yaml.org,2002:int", number, -1, + 1, 1, YAML_PLAIN_SCALAR_STYLE)) + goto event_error; + if (!yaml_emitter_emit(&emitter, &output_event)) + goto emitter_error; + + /* Write 'minor'. */ + + if (!yaml_scalar_event_initialize(&output_event, + NULL, "tag:yaml.org,2002:str", "minor", -1, + 1, 1, YAML_PLAIN_SCALAR_STYLE)) + goto event_error; + if (!yaml_emitter_emit(&emitter, &output_event)) + goto emitter_error; + + /* Write a number. */ + + sprintf(number, "%d", version->minor); + if (!yaml_scalar_event_initialize(&output_event, + NULL, "tag:yaml.org,2002:int", number, -1, + 1, 1, YAML_PLAIN_SCALAR_STYLE)) + goto event_error; + if (!yaml_emitter_emit(&emitter, &output_event)) + goto emitter_error; + + /* Write '}'. */ + + if (!yaml_mapping_end_event_initialize(&output_event)) + goto event_error; + if (!yaml_emitter_emit(&emitter, &output_event)) + goto emitter_error; + } + + /* Display the document tag directives. */ + + if (input_event.data.document_start.tag_directives.start + != input_event.data.document_start.tag_directives.end) + { + yaml_tag_directive_t *tag; + + /* Write 'tags'. */ + + if (!yaml_scalar_event_initialize(&output_event, + NULL, "tag:yaml.org,2002:str", "tags", -1, + 1, 1, YAML_PLAIN_SCALAR_STYLE)) + goto event_error; + if (!yaml_emitter_emit(&emitter, &output_event)) + goto emitter_error; + + /* Start a block sequence. */ + + if (!yaml_sequence_start_event_initialize(&output_event, + NULL, "tag:yaml.org,2002:seq", 1, + YAML_BLOCK_SEQUENCE_STYLE)) + goto event_error; + if (!yaml_emitter_emit(&emitter, &output_event)) + goto emitter_error; + + for (tag = input_event.data.document_start.tag_directives.start; + tag != input_event.data.document_start.tag_directives.end; + tag ++) + { + /* Write '{'. */ + + if (!yaml_mapping_start_event_initialize(&output_event, + NULL, "tag:yaml.org,2002:map", 1, + YAML_FLOW_MAPPING_STYLE)) + goto event_error; + if (!yaml_emitter_emit(&emitter, &output_event)) + goto emitter_error; + + /* Write 'handle'. */ + + if (!yaml_scalar_event_initialize(&output_event, + NULL, "tag:yaml.org,2002:str", "handle", -1, + 1, 1, YAML_PLAIN_SCALAR_STYLE)) + goto event_error; + if (!yaml_emitter_emit(&emitter, &output_event)) + goto emitter_error; + + /* Write the tag directive handle. */ + + if (!yaml_scalar_event_initialize(&output_event, + NULL, "tag:yaml.org,2002:str", + tag->handle, -1, + 0, 1, YAML_DOUBLE_QUOTED_SCALAR_STYLE)) + goto event_error; + if (!yaml_emitter_emit(&emitter, &output_event)) + goto emitter_error; + + /* Write 'prefix'. */ + + if (!yaml_scalar_event_initialize(&output_event, + NULL, "tag:yaml.org,2002:str", "prefix", -1, + 1, 1, YAML_PLAIN_SCALAR_STYLE)) + goto event_error; + if (!yaml_emitter_emit(&emitter, &output_event)) + goto emitter_error; + + /* Write the tag directive prefix. */ + + if (!yaml_scalar_event_initialize(&output_event, + NULL, "tag:yaml.org,2002:str", + tag->prefix, -1, + 0, 1, YAML_DOUBLE_QUOTED_SCALAR_STYLE)) + goto event_error; + if (!yaml_emitter_emit(&emitter, &output_event)) + goto emitter_error; + + /* Write '}'. */ + + if (!yaml_mapping_end_event_initialize(&output_event)) + goto event_error; + if (!yaml_emitter_emit(&emitter, &output_event)) + goto emitter_error; + } + + /* End a block sequence. */ + + if (!yaml_sequence_end_event_initialize(&output_event)) + goto event_error; + if (!yaml_emitter_emit(&emitter, &output_event)) + goto emitter_error; + } + + /* Write 'implicit'. */ + + if (!yaml_scalar_event_initialize(&output_event, + NULL, "tag:yaml.org,2002:str", "implicit", -1, + 1, 1, YAML_PLAIN_SCALAR_STYLE)) + goto event_error; + if (!yaml_emitter_emit(&emitter, &output_event)) + goto emitter_error; + + /* Write if the document is implicit. */ + + if (!yaml_scalar_event_initialize(&output_event, + NULL, "tag:yaml.org,2002:bool", + (input_event.data.document_start.implicit ? + "true" : "false"), -1, + 1, 0, YAML_PLAIN_SCALAR_STYLE)) + goto event_error; + if (!yaml_emitter_emit(&emitter, &output_event)) + goto emitter_error; + + break; + + case YAML_DOCUMENT_END_EVENT: + + /* Write 'type'. */ + + if (!yaml_scalar_event_initialize(&output_event, + NULL, "tag:yaml.org,2002:str", "type", -1, + 1, 1, YAML_PLAIN_SCALAR_STYLE)) + goto event_error; + if (!yaml_emitter_emit(&emitter, &output_event)) + goto emitter_error; + + /* Write 'DOCUMENT-END'. */ + + if (!yaml_scalar_event_initialize(&output_event, + NULL, "tag:yaml.org,2002:str", "DOCUMENT-END", -1, + 1, 1, YAML_PLAIN_SCALAR_STYLE)) + goto event_error; + if (!yaml_emitter_emit(&emitter, &output_event)) + goto emitter_error; + + /* Write 'implicit'. */ + + if (!yaml_scalar_event_initialize(&output_event, + NULL, "tag:yaml.org,2002:str", "implicit", -1, + 1, 1, YAML_PLAIN_SCALAR_STYLE)) + goto event_error; + if (!yaml_emitter_emit(&emitter, &output_event)) + goto emitter_error; + + /* Write if the document is implicit. */ + + if (!yaml_scalar_event_initialize(&output_event, + NULL, "tag:yaml.org,2002:bool", + (input_event.data.document_end.implicit ? + "true" : "false"), -1, + 1, 0, YAML_PLAIN_SCALAR_STYLE)) + goto event_error; + if (!yaml_emitter_emit(&emitter, &output_event)) + goto emitter_error; + + break; + + case YAML_ALIAS_EVENT: + + /* Write 'type'. */ + + if (!yaml_scalar_event_initialize(&output_event, + NULL, "tag:yaml.org,2002:str", "type", -1, + 1, 1, YAML_PLAIN_SCALAR_STYLE)) + goto event_error; + if (!yaml_emitter_emit(&emitter, &output_event)) + goto emitter_error; + + /* Write 'ALIAS'. */ + + if (!yaml_scalar_event_initialize(&output_event, + NULL, "tag:yaml.org,2002:str", "ALIAS", -1, + 1, 1, YAML_PLAIN_SCALAR_STYLE)) + goto event_error; + if (!yaml_emitter_emit(&emitter, &output_event)) + goto emitter_error; + + /* Write 'anchor'. */ + + if (!yaml_scalar_event_initialize(&output_event, + NULL, "tag:yaml.org,2002:str", "anchor", -1, + 1, 1, YAML_PLAIN_SCALAR_STYLE)) + goto event_error; + if (!yaml_emitter_emit(&emitter, &output_event)) + goto emitter_error; + + /* Write the alias anchor. */ + + if (!yaml_scalar_event_initialize(&output_event, + NULL, "tag:yaml.org,2002:str", + input_event.data.alias.anchor, -1, + 0, 1, YAML_DOUBLE_QUOTED_SCALAR_STYLE)) + goto event_error; + if (!yaml_emitter_emit(&emitter, &output_event)) + goto emitter_error; + + break; + + case YAML_SCALAR_EVENT: + + /* Write 'type'. */ + + if (!yaml_scalar_event_initialize(&output_event, + NULL, "tag:yaml.org,2002:str", "type", -1, + 1, 1, YAML_PLAIN_SCALAR_STYLE)) + goto event_error; + if (!yaml_emitter_emit(&emitter, &output_event)) + goto emitter_error; + + /* Write 'SCALAR'. */ + + if (!yaml_scalar_event_initialize(&output_event, + NULL, "tag:yaml.org,2002:str", "SCALAR", -1, + 1, 1, YAML_PLAIN_SCALAR_STYLE)) + goto event_error; + if (!yaml_emitter_emit(&emitter, &output_event)) + goto emitter_error; + + /* Display the scalar anchor. */ + + if (input_event.data.scalar.anchor) + { + /* Write 'anchor'. */ + + if (!yaml_scalar_event_initialize(&output_event, + NULL, "tag:yaml.org,2002:str", "anchor", -1, + 1, 1, YAML_PLAIN_SCALAR_STYLE)) + goto event_error; + if (!yaml_emitter_emit(&emitter, &output_event)) + goto emitter_error; + + /* Write the scalar anchor. */ + + if (!yaml_scalar_event_initialize(&output_event, + NULL, "tag:yaml.org,2002:str", + input_event.data.scalar.anchor, -1, + 0, 1, YAML_DOUBLE_QUOTED_SCALAR_STYLE)) + goto event_error; + if (!yaml_emitter_emit(&emitter, &output_event)) + goto emitter_error; + } + + /* Display the scalar tag. */ + + if (input_event.data.scalar.tag) + { + /* Write 'tag'. */ + + if (!yaml_scalar_event_initialize(&output_event, + NULL, "tag:yaml.org,2002:str", "tag", -1, + 1, 1, YAML_PLAIN_SCALAR_STYLE)) + goto event_error; + if (!yaml_emitter_emit(&emitter, &output_event)) + goto emitter_error; + + /* Write the scalar tag. */ + + if (!yaml_scalar_event_initialize(&output_event, + NULL, "tag:yaml.org,2002:str", + input_event.data.scalar.tag, -1, + 0, 1, YAML_DOUBLE_QUOTED_SCALAR_STYLE)) + goto event_error; + if (!yaml_emitter_emit(&emitter, &output_event)) + goto emitter_error; + } + + /* Display the scalar value. */ + + /* Write 'value'. */ + + if (!yaml_scalar_event_initialize(&output_event, + NULL, "tag:yaml.org,2002:str", "value", -1, + 1, 1, YAML_PLAIN_SCALAR_STYLE)) + goto event_error; + if (!yaml_emitter_emit(&emitter, &output_event)) + goto emitter_error; + + /* Write the scalar value. */ + + if (!yaml_scalar_event_initialize(&output_event, + NULL, "tag:yaml.org,2002:str", + input_event.data.scalar.value, + input_event.data.scalar.length, + 0, 1, YAML_DOUBLE_QUOTED_SCALAR_STYLE)) + goto event_error; + if (!yaml_emitter_emit(&emitter, &output_event)) + goto emitter_error; + + /* Display if the scalar tag is implicit. */ + + /* Write 'implicit'. */ + + if (!yaml_scalar_event_initialize(&output_event, + NULL, "tag:yaml.org,2002:str", "implicit", -1, + 1, 1, YAML_PLAIN_SCALAR_STYLE)) + goto event_error; + if (!yaml_emitter_emit(&emitter, &output_event)) + goto emitter_error; + + /* Write '{'. */ + + if (!yaml_mapping_start_event_initialize(&output_event, + NULL, "tag:yaml.org,2002:map", 1, + YAML_FLOW_MAPPING_STYLE)) + goto event_error; + if (!yaml_emitter_emit(&emitter, &output_event)) + goto emitter_error; + + /* Write 'plain'. */ + + if (!yaml_scalar_event_initialize(&output_event, + NULL, "tag:yaml.org,2002:str", "plain", -1, + 1, 1, YAML_PLAIN_SCALAR_STYLE)) + goto event_error; + if (!yaml_emitter_emit(&emitter, &output_event)) + goto emitter_error; + + /* Write if the scalar is implicit in the plain style. */ + + if (!yaml_scalar_event_initialize(&output_event, + NULL, "tag:yaml.org,2002:bool", + (input_event.data.scalar.plain_implicit ? + "true" : "false"), -1, + 1, 0, YAML_PLAIN_SCALAR_STYLE)) + goto event_error; + if (!yaml_emitter_emit(&emitter, &output_event)) + goto emitter_error; + + /* Write 'quoted'. */ + + if (!yaml_scalar_event_initialize(&output_event, + NULL, "tag:yaml.org,2002:str", "non-plain", -1, + 1, 1, YAML_PLAIN_SCALAR_STYLE)) + goto event_error; + if (!yaml_emitter_emit(&emitter, &output_event)) + goto emitter_error; + + /* Write if the scalar is implicit in a non-plain style. */ + + if (!yaml_scalar_event_initialize(&output_event, + NULL, "tag:yaml.org,2002:bool", + (input_event.data.scalar.quoted_implicit ? + "true" : "false"), -1, + 1, 0, YAML_PLAIN_SCALAR_STYLE)) + goto event_error; + if (!yaml_emitter_emit(&emitter, &output_event)) + goto emitter_error; + + /* Write '}'. */ + + if (!yaml_mapping_end_event_initialize(&output_event)) + goto event_error; + if (!yaml_emitter_emit(&emitter, &output_event)) + goto emitter_error; + + /* Display the style information. */ + + if (input_event.data.scalar.style) + { + yaml_scalar_style_t style = input_event.data.scalar.style; + + /* Write 'style'. */ + + if (!yaml_scalar_event_initialize(&output_event, + NULL, "tag:yaml.org,2002:str", "style", -1, + 1, 1, YAML_PLAIN_SCALAR_STYLE)) + goto event_error; + if (!yaml_emitter_emit(&emitter, &output_event)) + goto emitter_error; + + /* Write the scalar style. */ + + if (!yaml_scalar_event_initialize(&output_event, + NULL, "tag:yaml.org,2002:str", + (style == YAML_PLAIN_SCALAR_STYLE ? "plain" : + style == YAML_SINGLE_QUOTED_SCALAR_STYLE ? + "single-quoted" : + style == YAML_DOUBLE_QUOTED_SCALAR_STYLE ? + "double-quoted" : + style == YAML_LITERAL_SCALAR_STYLE ? "literal" : + style == YAML_FOLDED_SCALAR_STYLE ? "folded" : + "unknown"), -1, + 1, 1, YAML_PLAIN_SCALAR_STYLE)) + goto event_error; + if (!yaml_emitter_emit(&emitter, &output_event)) + goto emitter_error; + } + + break; + + case YAML_SEQUENCE_START_EVENT: + + /* Write 'type'. */ + + if (!yaml_scalar_event_initialize(&output_event, + NULL, "tag:yaml.org,2002:str", "type", -1, + 1, 1, YAML_PLAIN_SCALAR_STYLE)) + goto event_error; + if (!yaml_emitter_emit(&emitter, &output_event)) + goto emitter_error; + + /* Write 'SEQUENCE-START'. */ + + if (!yaml_scalar_event_initialize(&output_event, + NULL, "tag:yaml.org,2002:str", "SEQUENCE-START", -1, + 1, 1, YAML_PLAIN_SCALAR_STYLE)) + goto event_error; + if (!yaml_emitter_emit(&emitter, &output_event)) + goto emitter_error; + + /* Display the sequence anchor. */ + + if (input_event.data.sequence_start.anchor) + { + /* Write 'anchor'. */ + + if (!yaml_scalar_event_initialize(&output_event, + NULL, "tag:yaml.org,2002:str", "anchor", -1, + 1, 1, YAML_PLAIN_SCALAR_STYLE)) + goto event_error; + if (!yaml_emitter_emit(&emitter, &output_event)) + goto emitter_error; + + /* Write the sequence anchor. */ + + if (!yaml_scalar_event_initialize(&output_event, + NULL, "tag:yaml.org,2002:str", + input_event.data.sequence_start.anchor, -1, + 0, 1, YAML_DOUBLE_QUOTED_SCALAR_STYLE)) + goto event_error; + if (!yaml_emitter_emit(&emitter, &output_event)) + goto emitter_error; + } + + /* Display the sequence tag. */ + + if (input_event.data.sequence_start.tag) + { + /* Write 'tag'. */ + + if (!yaml_scalar_event_initialize(&output_event, + NULL, "tag:yaml.org,2002:str", "tag", -1, + 1, 1, YAML_PLAIN_SCALAR_STYLE)) + goto event_error; + if (!yaml_emitter_emit(&emitter, &output_event)) + goto emitter_error; + + /* Write the sequence tag. */ + + if (!yaml_scalar_event_initialize(&output_event, + NULL, "tag:yaml.org,2002:str", + input_event.data.sequence_start.tag, -1, + 0, 1, YAML_DOUBLE_QUOTED_SCALAR_STYLE)) + goto event_error; + if (!yaml_emitter_emit(&emitter, &output_event)) + goto emitter_error; + } + + /* Write 'implicit'. */ + + if (!yaml_scalar_event_initialize(&output_event, + NULL, "tag:yaml.org,2002:str", "implicit", -1, + 1, 1, YAML_PLAIN_SCALAR_STYLE)) + goto event_error; + if (!yaml_emitter_emit(&emitter, &output_event)) + goto emitter_error; + + /* Write if the sequence tag is implicit. */ + + if (!yaml_scalar_event_initialize(&output_event, + NULL, "tag:yaml.org,2002:bool", + (input_event.data.sequence_start.implicit ? + "true" : "false"), -1, + 1, 0, YAML_PLAIN_SCALAR_STYLE)) + goto event_error; + if (!yaml_emitter_emit(&emitter, &output_event)) + goto emitter_error; + + /* Display the style information. */ + + if (input_event.data.sequence_start.style) + { + yaml_sequence_style_t style + = input_event.data.sequence_start.style; + + /* Write 'style'. */ + + if (!yaml_scalar_event_initialize(&output_event, + NULL, "tag:yaml.org,2002:str", "style", -1, + 1, 1, YAML_PLAIN_SCALAR_STYLE)) + goto event_error; + if (!yaml_emitter_emit(&emitter, &output_event)) + goto emitter_error; + + /* Write the scalar style. */ + + if (!yaml_scalar_event_initialize(&output_event, + NULL, "tag:yaml.org,2002:str", + (style == YAML_BLOCK_SEQUENCE_STYLE ? "block" : + style == YAML_FLOW_SEQUENCE_STYLE ? "flow" : + "unknown"), -1, + 1, 1, YAML_PLAIN_SCALAR_STYLE)) + goto event_error; + if (!yaml_emitter_emit(&emitter, &output_event)) + goto emitter_error; + } + + break; + + case YAML_SEQUENCE_END_EVENT: + + /* Write 'type'. */ + + if (!yaml_scalar_event_initialize(&output_event, + NULL, "tag:yaml.org,2002:str", "type", -1, + 1, 1, YAML_PLAIN_SCALAR_STYLE)) + goto event_error; + if (!yaml_emitter_emit(&emitter, &output_event)) + goto emitter_error; + + /* Write 'SEQUENCE-END'. */ + + if (!yaml_scalar_event_initialize(&output_event, + NULL, "tag:yaml.org,2002:str", "SEQUENCE-END", -1, + 1, 1, YAML_PLAIN_SCALAR_STYLE)) + goto event_error; + if (!yaml_emitter_emit(&emitter, &output_event)) + goto emitter_error; + + break; + + case YAML_MAPPING_START_EVENT: + + /* Write 'type'. */ + + if (!yaml_scalar_event_initialize(&output_event, + NULL, "tag:yaml.org,2002:str", "type", -1, + 1, 1, YAML_PLAIN_SCALAR_STYLE)) + goto event_error; + if (!yaml_emitter_emit(&emitter, &output_event)) + goto emitter_error; + + /* Write 'MAPPING-START'. */ + + if (!yaml_scalar_event_initialize(&output_event, + NULL, "tag:yaml.org,2002:str", "MAPPING-START", -1, + 1, 1, YAML_PLAIN_SCALAR_STYLE)) + goto event_error; + if (!yaml_emitter_emit(&emitter, &output_event)) + goto emitter_error; + + /* Display the mapping anchor. */ + + if (input_event.data.mapping_start.anchor) + { + /* Write 'anchor'. */ + + if (!yaml_scalar_event_initialize(&output_event, + NULL, "tag:yaml.org,2002:str", "anchor", -1, + 1, 1, YAML_PLAIN_SCALAR_STYLE)) + goto event_error; + if (!yaml_emitter_emit(&emitter, &output_event)) + goto emitter_error; + + /* Write the mapping anchor. */ + + if (!yaml_scalar_event_initialize(&output_event, + NULL, "tag:yaml.org,2002:str", + input_event.data.mapping_start.anchor, -1, + 0, 1, YAML_DOUBLE_QUOTED_SCALAR_STYLE)) + goto event_error; + if (!yaml_emitter_emit(&emitter, &output_event)) + goto emitter_error; + } + + /* Display the mapping tag. */ + + if (input_event.data.mapping_start.tag) + { + /* Write 'tag'. */ + + if (!yaml_scalar_event_initialize(&output_event, + NULL, "tag:yaml.org,2002:str", "tag", -1, + 1, 1, YAML_PLAIN_SCALAR_STYLE)) + goto event_error; + if (!yaml_emitter_emit(&emitter, &output_event)) + goto emitter_error; + + /* Write the mapping tag. */ + + if (!yaml_scalar_event_initialize(&output_event, + NULL, "tag:yaml.org,2002:str", + input_event.data.mapping_start.tag, -1, + 0, 1, YAML_DOUBLE_QUOTED_SCALAR_STYLE)) + goto event_error; + if (!yaml_emitter_emit(&emitter, &output_event)) + goto emitter_error; + } + + /* Write 'implicit'. */ + + if (!yaml_scalar_event_initialize(&output_event, + NULL, "tag:yaml.org,2002:str", "implicit", -1, + 1, 1, YAML_PLAIN_SCALAR_STYLE)) + goto event_error; + if (!yaml_emitter_emit(&emitter, &output_event)) + goto emitter_error; + + /* Write if the mapping tag is implicit. */ + + if (!yaml_scalar_event_initialize(&output_event, + NULL, "tag:yaml.org,2002:bool", + (input_event.data.mapping_start.implicit ? + "true" : "false"), -1, + 1, 0, YAML_PLAIN_SCALAR_STYLE)) + goto event_error; + if (!yaml_emitter_emit(&emitter, &output_event)) + goto emitter_error; + + /* Display the style information. */ + + if (input_event.data.mapping_start.style) + { + yaml_mapping_style_t style + = input_event.data.mapping_start.style; + + /* Write 'style'. */ + + if (!yaml_scalar_event_initialize(&output_event, + NULL, "tag:yaml.org,2002:str", "style", -1, + 1, 1, YAML_PLAIN_SCALAR_STYLE)) + goto event_error; + if (!yaml_emitter_emit(&emitter, &output_event)) + goto emitter_error; + + /* Write the scalar style. */ + + if (!yaml_scalar_event_initialize(&output_event, + NULL, "tag:yaml.org,2002:str", + (style == YAML_BLOCK_MAPPING_STYLE ? "block" : + style == YAML_FLOW_MAPPING_STYLE ? "flow" : + "unknown"), -1, + 1, 1, YAML_PLAIN_SCALAR_STYLE)) + goto event_error; + if (!yaml_emitter_emit(&emitter, &output_event)) + goto emitter_error; + } + + break; + + case YAML_MAPPING_END_EVENT: + + /* Write 'type'. */ + + if (!yaml_scalar_event_initialize(&output_event, + NULL, "tag:yaml.org,2002:str", "type", -1, + 1, 1, YAML_PLAIN_SCALAR_STYLE)) + goto event_error; + if (!yaml_emitter_emit(&emitter, &output_event)) + goto emitter_error; + + /* Write 'MAPPING-END'. */ + + if (!yaml_scalar_event_initialize(&output_event, + NULL, "tag:yaml.org,2002:str", "MAPPING-END", -1, + 1, 1, YAML_PLAIN_SCALAR_STYLE)) + goto event_error; + if (!yaml_emitter_emit(&emitter, &output_event)) + goto emitter_error; + + break; + + default: + /* It couldn't really happen. */ + break; + } + + /* Delete the event object. */ + + yaml_event_delete(&input_event); + + /* Create and emit a MAPPING-END event. */ + + if (!yaml_mapping_end_event_initialize(&output_event)) + goto event_error; + if (!yaml_emitter_emit(&emitter, &output_event)) + goto emitter_error; + } + + /* Create and emit the SEQUENCE-END event. */ + + if (!yaml_sequence_end_event_initialize(&output_event)) + goto event_error; + if (!yaml_emitter_emit(&emitter, &output_event)) + goto emitter_error; + + /* Create and emit the DOCUMENT-END event. */ + + if (!yaml_document_end_event_initialize(&output_event, 0)) + goto event_error; + if (!yaml_emitter_emit(&emitter, &output_event)) + goto emitter_error; + + /* Create and emit the STREAM-END event. */ + + if (!yaml_stream_end_event_initialize(&output_event)) + goto event_error; + if (!yaml_emitter_emit(&emitter, &output_event)) + goto emitter_error; + + yaml_parser_delete(&parser); + yaml_emitter_delete(&emitter); + + return 0; + +parser_error: + + /* Display a parser error message. */ + + switch (parser.error) + { + case YAML_MEMORY_ERROR: + fprintf(stderr, "Memory error: Not enough memory for parsing\n"); + break; + + case YAML_READER_ERROR: + if (parser.problem_value != -1) { + fprintf(stderr, "Reader error: %s: #%X at %d\n", parser.problem, + parser.problem_value, parser.problem_offset); + } + else { + fprintf(stderr, "Reader error: %s at %d\n", parser.problem, + parser.problem_offset); + } + break; + + case YAML_SCANNER_ERROR: + if (parser.context) { + fprintf(stderr, "Scanner error: %s at line %d, column %d\n" + "%s at line %d, column %d\n", parser.context, + parser.context_mark.line, parser.context_mark.column, + parser.problem, parser.problem_mark.line, + parser.problem_mark.column); + } + else { + fprintf(stderr, "Scanner error: %s at line %d, column %d\n", + parser.problem, parser.problem_mark.line, + parser.problem_mark.column); + } + break; + + case YAML_PARSER_ERROR: + if (parser.context) { + fprintf(stderr, "Parser error: %s at line %d, column %d\n" + "%s at line %d, column %d\n", parser.context, + parser.context_mark.line, parser.context_mark.column, + parser.problem, parser.problem_mark.line, + parser.problem_mark.column); + } + else { + fprintf(stderr, "Parser error: %s at line %d, column %d\n", + parser.problem, parser.problem_mark.line, + parser.problem_mark.column); + } + break; + + default: + /* Couldn't happen. */ + fprintf(stderr, "Internal error\n"); + break; + } + + yaml_event_delete(&input_event); + yaml_event_delete(&output_event); + yaml_parser_delete(&parser); + yaml_emitter_delete(&emitter); + + return 1; + +emitter_error: + + /* Display an emitter error message. */ + + switch (emitter.error) + { + case YAML_MEMORY_ERROR: + fprintf(stderr, "Memory error: Not enough memory for emitting\n"); + break; + + case YAML_WRITER_ERROR: + fprintf(stderr, "Writer error: %s\n", emitter.problem); + break; + + case YAML_EMITTER_ERROR: + fprintf(stderr, "Emitter error: %s\n", emitter.problem); + break; + + default: + /* Couldn't happen. */ + fprintf(stderr, "Internal error\n"); + break; + } + + yaml_event_delete(&input_event); + yaml_event_delete(&output_event); + yaml_parser_delete(&parser); + yaml_emitter_delete(&emitter); + + return 1; + +event_error: + + fprintf(stderr, "Memory error: Not enough memory for creating an event\n"); + + yaml_event_delete(&input_event); + yaml_event_delete(&output_event); + yaml_parser_delete(&parser); + yaml_emitter_delete(&emitter); + + return 1; +} + diff --git a/tests/example-reformatter.c b/tests/example-reformatter.c new file mode 100644 index 00000000..a999fbed --- /dev/null +++ b/tests/example-reformatter.c @@ -0,0 +1,202 @@ + +#include + +#include +#include + +int +main(int argc, char *argv[]) +{ + int help = 0; + int canonical = 0; + int unicode = 0; + int k; + int done = 0; + + yaml_parser_t parser; + yaml_emitter_t emitter; + yaml_event_t event; + + /* Clear the objects. */ + + memset(&parser, 0, sizeof(parser)); + memset(&emitter, 0, sizeof(emitter)); + memset(&event, 0, sizeof(event)); + + /* Analyze command line options. */ + + for (k = 1; k < argc; k ++) + { + if (strcmp(argv[k], "-h") == 0 + || strcmp(argv[k], "--help") == 0) { + help = 1; + } + + else if (strcmp(argv[k], "-c") == 0 + || strcmp(argv[k], "--canonical") == 0) { + canonical = 1; + } + + else if (strcmp(argv[k], "-u") == 0 + || strcmp(argv[k], "--unicode") == 0) { + unicode = 1; + } + + else { + fprintf(stderr, "Unrecognized option: %s\n" + "Try `%s --help` for more information.\n", + argv[k], argv[0]); + return 1; + } + } + + /* Display the help string. */ + + if (help) + { + printf("%s [--canonical] [--unicode] output\n" + "or\n%s -h | --help\nReformat a YAML stream\n\nOptions:\n" + "-h, --help\t\tdisplay this help and exit\n" + "-c, --canonical\t\toutput in the canonical YAML format\n" + "-u, --unicode\t\toutput unescaped non-ASCII characters\n", + argv[0], argv[0]); + return 0; + } + + /* Initialize the parser and emitter objects. */ + + if (!yaml_parser_initialize(&parser)) + goto parser_error; + + if (!yaml_emitter_initialize(&emitter)) + goto emitter_error; + + /* Set the parser parameters. */ + + yaml_parser_set_input_file(&parser, stdin); + + /* Set the emitter parameters. */ + + yaml_emitter_set_output_file(&emitter, stdout); + + yaml_emitter_set_canonical(&emitter, canonical); + yaml_emitter_set_unicode(&emitter, unicode); + + /* The main loop. */ + + while (!done) + { + /* Get the next event. */ + + if (!yaml_parser_parse(&parser, &event)) + goto parser_error; + + /* Check if this is the stream end. */ + + if (event.type == YAML_STREAM_END_EVENT) { + done = 1; + } + + /* Emit the event. */ + + if (!yaml_emitter_emit(&emitter, &event)) + goto emitter_error; + } + + yaml_parser_delete(&parser); + yaml_emitter_delete(&emitter); + + return 0; + +parser_error: + + /* Display a parser error message. */ + + switch (parser.error) + { + case YAML_MEMORY_ERROR: + fprintf(stderr, "Memory error: Not enough memory for parsing\n"); + break; + + case YAML_READER_ERROR: + if (parser.problem_value != -1) { + fprintf(stderr, "Reader error: %s: #%X at %d\n", parser.problem, + parser.problem_value, parser.problem_offset); + } + else { + fprintf(stderr, "Reader error: %s at %d\n", parser.problem, + parser.problem_offset); + } + break; + + case YAML_SCANNER_ERROR: + if (parser.context) { + fprintf(stderr, "Scanner error: %s at line %d, column %d\n" + "%s at line %d, column %d\n", parser.context, + parser.context_mark.line, parser.context_mark.column, + parser.problem, parser.problem_mark.line, + parser.problem_mark.column); + } + else { + fprintf(stderr, "Scanner error: %s at line %d, column %d\n", + parser.problem, parser.problem_mark.line, + parser.problem_mark.column); + } + break; + + case YAML_PARSER_ERROR: + if (parser.context) { + fprintf(stderr, "Parser error: %s at line %d, column %d\n" + "%s at line %d, column %d\n", parser.context, + parser.context_mark.line, parser.context_mark.column, + parser.problem, parser.problem_mark.line, + parser.problem_mark.column); + } + else { + fprintf(stderr, "Parser error: %s at line %d, column %d\n", + parser.problem, parser.problem_mark.line, + parser.problem_mark.column); + } + break; + + default: + /* Couldn't happen. */ + fprintf(stderr, "Internal error\n"); + break; + } + + yaml_parser_delete(&parser); + yaml_emitter_delete(&emitter); + + return 1; + +emitter_error: + + /* Display an emitter error message. */ + + switch (emitter.error) + { + case YAML_MEMORY_ERROR: + fprintf(stderr, "Memory error: Not enough memory for emitting\n"); + break; + + case YAML_WRITER_ERROR: + fprintf(stderr, "Writer error: %s\n", emitter.problem); + break; + + case YAML_EMITTER_ERROR: + fprintf(stderr, "Emitter error: %s\n", emitter.problem); + break; + + default: + /* Couldn't happen. */ + fprintf(stderr, "Internal error\n"); + break; + } + + yaml_parser_delete(&parser); + yaml_emitter_delete(&emitter); + + return 1; +} + From 03f21e73690eae07fb83d1a5b5d969bee7288612 Mon Sep 17 00:00:00 2001 From: Kirill Simonov Date: Tue, 1 Aug 2006 11:28:16 +0000 Subject: [PATCH 29/73] Prepare the initial release. --- Makefile.am | 2 +- README | 19 +++-- announcement.msg | 23 ++++++ include/yaml.h | 147 +++++++++++++++++----------------- tests/example-deconstructor.c | 20 ++--- tests/example-reformatter.c | 20 ++--- 6 files changed, 132 insertions(+), 99 deletions(-) create mode 100644 announcement.msg diff --git a/Makefile.am b/Makefile.am index d1c309f6..1cf4114e 100644 --- a/Makefile.am +++ b/Makefile.am @@ -3,7 +3,7 @@ SUBDIRS = include src . tests -EXTRA_DIST = doc/doxygen.cfg +EXTRA_DIST = README LICENSE doc/doxygen.cfg maintainer-clean-local: -rm -f aclocal.m4 config.h.in configure config/* diff --git a/README b/README index fdda6108..a762f97c 100644 --- a/README +++ b/README @@ -1,22 +1,29 @@ -libyaml - A C library for parsing and emitting YAML. +LibYAML - A C library for parsing and emitting YAML. -The project is in an early stage of development and not usable for end users. +The project is in an early stage of development. To build and install the library, run: +$ ./configure +$ make +# make install + +If you checked the source code from the Subversion repository, run $ ./bootstrap $ ./configure $ make -$ make check # make install -For more information, check the libyaml homepage: +For more information, check the LibYAML homepage: 'http://pyyaml.org/wiki/LibYAML'. Post your questions and opinions to the YAML-Core mailing list: 'http://lists.sourceforge.net/lists/listinfo/yaml-core'. -Submit bug reports and feature requests to the libyaml bug tracker: +Submit bug reports and feature requests to the LibYAML bug tracker: 'http://pyyaml.org/newticket?component=libyaml'. -libyaml is written by Kirill Simonov . It is released +LibYAML is written by Kirill Simonov . It is released under the MIT license. See the file LICENSE for more details. + +This project is developed for Python Software Foundation as a part of +Google Summer of Code under the mentorship of Clark Evans. diff --git a/announcement.msg b/announcement.msg new file mode 100644 index 00000000..a2dae20f --- /dev/null +++ b/announcement.msg @@ -0,0 +1,23 @@ +From: Kirill Simonov +To: yaml-core@lists.sourceforge.net +Subject: LibYAML-0.0.1: The initial release + +I'd like to present the initial release of LibYAML, a YAML parser and emitter +written in C. + +LibYAML homepage: http://pyyaml.org/wiki/LibYAML +TAR.GZ package: http://pyyaml.org/download/libyaml/yaml-0.0.1.tar.gz +SVN repository: http://svn.pyyaml.org/libyaml +Bug tracker: http://pyyaml.org/newticket?component=libyaml + +The library is functionally complete, but the documentation is scarce and the +API is subject to change. For more information, you may check the project +homepage, the doxygen-generated documentation in the `doc` directory of the +source distribution, and the examples `tests/example-reformatter.c` and +`tests/example-deconstructor.c`. + +LibYAML is written by Kirill Simonov . It is released +under the MIT license. See the file LICENSE for more details. + +This project is developed for Python Software Foundation as a part of +Google Summer of Code under the mentorship of Clark Evans. diff --git a/include/yaml.h b/include/yaml.h index 47de95d1..8aec0be6 100644 --- a/include/yaml.h +++ b/include/yaml.h @@ -4,7 +4,7 @@ * * Include the header file with the code: * @code - * #include + * #include * @endcode */ @@ -285,7 +285,7 @@ typedef struct { /** * Free any memory allocated for a token object. * - * @param[in] token A token object. + * @param[in,out] token A token object. */ YAML_DECLARE(void) @@ -416,7 +416,7 @@ typedef struct { /** * Create the STREAM-START event. * - * @param[in] event An empty event object. + * @param[out] event An empty event object. * @param[in] encoding The stream encoding. * * @returns @c 1 if the function succeeded, @c 0 on error. @@ -429,7 +429,7 @@ yaml_stream_start_event_initialize(yaml_event_t *event, /** * Create the STREAM-END event. * - * @param[in] event An empty event object. + * @param[out] event An empty event object. * * @returns @c 1 if the function succeeded, @c 0 on error. */ @@ -443,7 +443,7 @@ yaml_stream_end_event_initialize(yaml_event_t *event); * The @a implicit argument is considered as a stylistic parameter and may be * ignored by the emitter. * - * @param[in] event An empty event object. + * @param[out] event An empty event object. * @param[in] version_directive The %YAML directive value or @c NULL. * @param[in] tag_directives_start The beginning of the %TAG directives list. * @param[in] tag_directives_end The end of the %TAG directives list. @@ -465,7 +465,7 @@ yaml_document_start_event_initialize(yaml_event_t *event, * The @a implicit argument is considered as a stylistic parameter and may be * ignored by the emitter. * - * @param[in] event An empty event object. + * @param[out] event An empty event object. * @param[in] implicit If the document end indicator is implicit. * * @returns @c 1 if the function succeeded, @c 0 on error. @@ -477,7 +477,7 @@ yaml_document_end_event_initialize(yaml_event_t *event, int implicit); /** * Create an ALIAS event. * - * @param[in] event An empty event object. + * @param[out] event An empty event object. * @param[in] anchor The anchor value. * * @returns @c 1 if the function succeeded, @c 0 on error. @@ -494,7 +494,7 @@ yaml_alias_event_initialize(yaml_event_t *event, yaml_char_t *anchor); * Either the @a tag attribute or one of the @a plain_implicit and * @a quoted_implicit flags must be set. * - * @param[in] event An empty event object. + * @param[out] event An empty event object. * @param[in] anchor The scalar anchor or @c NULL. * @param[in] tag The scalar tag or @c NULL. * @param[in] value The scalar value. @@ -520,7 +520,7 @@ yaml_scalar_event_initialize(yaml_event_t *event, * * Either the @a tag attribute or the @a implicit flag must be set. * - * @param[in] event An empty event object. + * @param[out] event An empty event object. * @param[in] anchor The sequence anchor or @c NULL. * @param[in] tag The sequence tag or @c NULL. * @param[in] implicit If the tag may be omitted. @@ -537,7 +537,7 @@ yaml_sequence_start_event_initialize(yaml_event_t *event, /** * Create a SEQUENCE-END event. * - * @param[in] event An empty event object. + * @param[out] event An empty event object. * * @returns @c 1 if the function succeeded, @c 0 on error. */ @@ -552,7 +552,7 @@ yaml_sequence_end_event_initialize(yaml_event_t *event); * * Either the @a tag attribute or the @a implicit flag must be set. * - * @param[in] event An empty event object. + * @param[out] event An empty event object. * @param[in] anchor The mapping anchor or @c NULL. * @param[in] tag The mapping tag or @c NULL. * @param[in] implicit If the tag may be omitted. @@ -569,7 +569,7 @@ yaml_mapping_start_event_initialize(yaml_event_t *event, /** * Create a MAPPING-END event. * - * @param[in] event An empty event object. + * @param[out] event An empty event object. * * @returns @c 1 if the function succeeded, @c 0 on error. */ @@ -580,7 +580,7 @@ yaml_mapping_end_event_initialize(yaml_event_t *event); /** * Free any memory allocated for an event object. * - * @param[in] event An event object. + * @param[out] event An event object. */ YAML_DECLARE(void) @@ -600,11 +600,11 @@ yaml_event_delete(yaml_event_t *event); * source. The handler should write not more than @a size bytes to the @a * buffer. The number of written bytes should be set to the @a length variable. * - * @param[in] data A pointer to an application data specified by - * @c yaml_parser_set_read_handler. - * @param[out] buffer The buffer to write the data from the source. - * @param[in] size The size of the buffer. - * @param[out] size_read The actual number of bytes read from the source. + * @param[in,out] data A pointer to an application data specified by + * yaml_parser_set_input(). + * @param[out] buffer The buffer to write the data from the source. + * @param[in] size The size of the buffer. + * @param[out] size_read The actual number of bytes read from the source. * * @returns On success, the handler should return @c 1. If the handler failed, * the returned value should be @c 0. On EOF, the handler should set the @@ -875,9 +875,9 @@ typedef struct { * Initialize a parser. * * This function creates a new parser object. An application is responsible - * for destroying the object using the @c yaml_parser_delete function. + * for destroying the object using the yaml_parser_delete() function. * - * @param[in] parser An empty parser object. + * @param[out] parser An empty parser object. * * @returns @c 1 if the function succeeded, @c 0 on error. */ @@ -888,7 +888,7 @@ yaml_parser_initialize(yaml_parser_t *parser); /** * Destroy a parser. * - * @param[in] parser A parser object. + * @param[in,out] parser A parser object. */ YAML_DECLARE(void) @@ -901,9 +901,9 @@ yaml_parser_delete(yaml_parser_t *parser); * exists. The application is responsible for destroing @a input after * destroying the @a parser. * - * @param[in] parser A parser object. - * @param[in] input A source data. - * @param[in] size The length of the source data in bytes. + * @param[in,out] parser A parser object. + * @param[in] input A source data. + * @param[in] size The length of the source data in bytes. */ YAML_DECLARE(void) @@ -916,8 +916,8 @@ yaml_parser_set_input_string(yaml_parser_t *parser, * @a file should be a file object open for reading. The application is * responsible for closing the @a file. * - * @param[in] parser A parser object. - * @param[in] file An open file. + * @param[in,out] parser A parser object. + * @param[in] file An open file. */ YAML_DECLARE(void) @@ -926,9 +926,10 @@ yaml_parser_set_input_file(yaml_parser_t *parser, FILE *file); /** * Set a generic input handler. * - * @param[in] parser A parser object. - * @param[in] handler A read handler. - * @param[in] data Any application data for passing to the read handler. + * @param[in,out] parser A parser object. + * @param[in] handler A read handler. + * @param[in] data Any application data for passing to the read + * handler. */ YAML_DECLARE(void) @@ -938,8 +939,8 @@ yaml_parser_set_input(yaml_parser_t *parser, /** * Set the source encoding. * - * @param[in] parser A parser object. - * @param[in] encoding The source encoding. + * @param[in,out] parser A parser object. + * @param[in] encoding The source encoding. */ YAML_DECLARE(void) @@ -956,11 +957,11 @@ yaml_parser_set_encoding(yaml_parser_t *parser, yaml_encoding_t encoding); * An application is responsible for freeing any buffers associated with the * produced token object using the @c yaml_token_delete function. * - * An application must not alternate the calls of @c yaml_parser_scan with the - * calls of @c yaml_parser_parse. Doing this will break the parser. + * An application must not alternate the calls of yaml_parser_scan() with the + * calls of yaml_parser_parse(). Doing this will break the parser. * - * @param[in] parser A parser object. - * @param[in] token An empty token object. + * @param[in,out] parser A parser object. + * @param[out] token An empty token object. * * @returns @c 1 if the function succeeded, @c 0 on error. */ @@ -977,13 +978,13 @@ yaml_parser_scan(yaml_parser_t *parser, yaml_token_t *token); * @c YAML_STREAM_END_EVENT. * * An application is responsible for freeing any buffers associated with the - * produced event object using the @c yaml_event_delete function. + * produced event object using the yaml_event_delete() function. * - * An application must not alternate the calls of @c yaml_parser_scan with the - * calls of @c yaml_parser_parse. Doing this will break the parser. + * An application must not alternate the calls of yaml_parser_scan() with the + * calls of yaml_parser_parse(). Doing this will break the parser. * - * @param[in] parser A parser object. - * @param[in] event An empty event object. + * @param[in,out] parser A parser object. + * @param[out] event An empty event object. * * @returns @c 1 if the function succeeded, @c 0 on error. */ @@ -1005,10 +1006,10 @@ yaml_parser_parse(yaml_parser_t *parser, yaml_event_t *event); * characters to the output. The handler should write @a size bytes of the * @a buffer to the output. * - * @param[in] data A pointer to an application data specified by - * @c yaml_emitter_set_write_handler. - * @param[out] buffer The buffer with bytes to be written. - * @param[in] size The size of the buffer. + * @param[in,out] data A pointer to an application data specified by + * yaml_emitter_set_output(). + * @param[in] buffer The buffer with bytes to be written. + * @param[in] size The size of the buffer. * * @returns On success, the handler should return @c 1. If the handler failed, * the returned value should be @c 0. @@ -1256,9 +1257,9 @@ typedef struct { * Initialize an emitter. * * This function creates a new emitter object. An application is responsible - * for destroying the object using the @c yaml_emitter_delete function. + * for destroying the object using the yaml_emitter_delete() function. * - * @param[in] emitter An empty parser object. + * @param[out] emitter An empty parser object. * * @returns @c 1 if the function succeeded, @c 0 on error. */ @@ -1269,7 +1270,7 @@ yaml_emitter_initialize(yaml_emitter_t *emitter); /** * Destroy an emitter. * - * @param[in] emitter An emitter object. + * @param[in,out] emitter An emitter object. */ YAML_DECLARE(void) @@ -1283,10 +1284,11 @@ yaml_emitter_delete(yaml_emitter_t *emitter); * bytes. If the buffer is smaller than required, the emitter produces the * YAML_WRITE_ERROR error. * - * @param[in] emitter An emitter object. - * @param[in] output An output buffer. - * @param[in] size The buffer size. - * @param[in] size_written The pointer to save the number of written bytes. + * @param[in,out] emitter An emitter object. + * @param[in] output An output buffer. + * @param[in] size The buffer size. + * @param[in] size_written The pointer to save the number of written + * bytes. */ YAML_DECLARE(void) @@ -1299,8 +1301,8 @@ yaml_emitter_set_output_string(yaml_emitter_t *emitter, * @a file should be a file object open for writing. The application is * responsible for closing the @a file. * - * @param[in] emitter An emitter object. - * @param[in] file An open file. + * @param[in,out] emitter An emitter object. + * @param[in] file An open file. */ YAML_DECLARE(void) @@ -1309,9 +1311,10 @@ yaml_emitter_set_output_file(yaml_emitter_t *emitter, FILE *file); /** * Set a generic output handler. * - * @param[in] emitter An emitter object. - * @param[in] handler A write handler. - * @param[in] data Any application data for passing to the write handler. + * @param[in,out] emitter An emitter object. + * @param[in] handler A write handler. + * @param[in] data Any application data for passing to the write + * handler. */ YAML_DECLARE(void) @@ -1321,8 +1324,8 @@ yaml_emitter_set_output(yaml_emitter_t *emitter, /** * Set the output encoding. * - * @param[in] emitter An emitter object. - * @param[in] encoding The output encoding. + * @param[in,out] emitter An emitter object. + * @param[in] encoding The output encoding. */ YAML_DECLARE(void) @@ -1332,8 +1335,8 @@ yaml_emitter_set_encoding(yaml_emitter_t *emitter, yaml_encoding_t encoding); * Set if the output should be in the "canonical" format as in the YAML * specification. * - * @param[in] emitter An emitter object. - * @param[in] canonical If the output is canonical. + * @param[in,out] emitter An emitter object. + * @param[in] canonical If the output is canonical. */ YAML_DECLARE(void) @@ -1342,8 +1345,8 @@ yaml_emitter_set_canonical(yaml_emitter_t *emitter, int canonical); /** * Set the intendation increment. * - * @param[in] emitter An emitter object. - * @param[in] indent The indentation increment (1 < . < 10). + * @param[in,out] emitter An emitter object. + * @param[in] indent The indentation increment (1 < . < 10). */ YAML_DECLARE(void) @@ -1352,8 +1355,8 @@ yaml_emitter_set_indent(yaml_emitter_t *emitter, int indent); /** * Set the preferred line width. @c -1 means unlimited. * - * @param[in] emitter An emitter object. - * @param[in] width The preferred line width. + * @param[in,out] emitter An emitter object. + * @param[in] width The preferred line width. */ YAML_DECLARE(void) @@ -1362,8 +1365,8 @@ yaml_emitter_set_width(yaml_emitter_t *emitter, int width); /** * Set if unescaped non-ASCII characters are allowed. * - * @param[in] emitter An emitter object. - * @param[in] unicode If unescaped Unicode characters are allowed. + * @param[in,out] emitter An emitter object. + * @param[in] unicode If unescaped Unicode characters are allowed. */ YAML_DECLARE(void) @@ -1372,8 +1375,8 @@ yaml_emitter_set_unicode(yaml_emitter_t *emitter, int unicode); /** * Set the preferred line break. * - * @param[in] emitter An emitter object. - * @param[in] line_break The preferred line break. + * @param[in,out] emitter An emitter object. + * @param[in] line_break The preferred line break. */ YAML_DECLARE(void) @@ -1382,13 +1385,13 @@ yaml_emitter_set_break(yaml_emitter_t *emitter, yaml_break_t line_break); /** * Emit an event. * - * The event object may be generated using the @c yaml_parser_parse function. + * The event object may be generated using the yaml_parser_parse() function. * The emitter takes the responsibility for the event object and destroys its * content after it is emitted. The event object is destroyed even if the * function fails. * - * @param[in] emitter An emitter object. - * @param[in] event An event object. + * @param[in,out] emitter An emitter object. + * @param[in,out] event An event object. * * @returns @c 1 if the function succeeded, @c 0 on error. */ @@ -1399,7 +1402,7 @@ yaml_emitter_emit(yaml_emitter_t *emitter, yaml_event_t *event); /** * Flush the accumulated characters to the output. * - * @param[in] emitter An emitter object. + * @param[in,out] emitter An emitter object. * * @returns @c 1 if the function succeeded, @c 0 on error. */ diff --git a/tests/example-deconstructor.c b/tests/example-deconstructor.c index d41b5ae0..57e6693c 100644 --- a/tests/example-deconstructor.c +++ b/tests/example-deconstructor.c @@ -1046,14 +1046,14 @@ main(int argc, char *argv[]) if (parser.context) { fprintf(stderr, "Scanner error: %s at line %d, column %d\n" "%s at line %d, column %d\n", parser.context, - parser.context_mark.line, parser.context_mark.column, - parser.problem, parser.problem_mark.line, - parser.problem_mark.column); + parser.context_mark.line+1, parser.context_mark.column+1, + parser.problem, parser.problem_mark.line+1, + parser.problem_mark.column+1); } else { fprintf(stderr, "Scanner error: %s at line %d, column %d\n", - parser.problem, parser.problem_mark.line, - parser.problem_mark.column); + parser.problem, parser.problem_mark.line+1, + parser.problem_mark.column+1); } break; @@ -1061,14 +1061,14 @@ main(int argc, char *argv[]) if (parser.context) { fprintf(stderr, "Parser error: %s at line %d, column %d\n" "%s at line %d, column %d\n", parser.context, - parser.context_mark.line, parser.context_mark.column, - parser.problem, parser.problem_mark.line, - parser.problem_mark.column); + parser.context_mark.line+1, parser.context_mark.column+1, + parser.problem, parser.problem_mark.line+1, + parser.problem_mark.column+1); } else { fprintf(stderr, "Parser error: %s at line %d, column %d\n", - parser.problem, parser.problem_mark.line, - parser.problem_mark.column); + parser.problem, parser.problem_mark.line+1, + parser.problem_mark.column+1); } break; diff --git a/tests/example-reformatter.c b/tests/example-reformatter.c index a999fbed..946d5561 100644 --- a/tests/example-reformatter.c +++ b/tests/example-reformatter.c @@ -133,14 +133,14 @@ main(int argc, char *argv[]) if (parser.context) { fprintf(stderr, "Scanner error: %s at line %d, column %d\n" "%s at line %d, column %d\n", parser.context, - parser.context_mark.line, parser.context_mark.column, - parser.problem, parser.problem_mark.line, - parser.problem_mark.column); + parser.context_mark.line+1, parser.context_mark.column+1, + parser.problem, parser.problem_mark.line+1, + parser.problem_mark.column+1); } else { fprintf(stderr, "Scanner error: %s at line %d, column %d\n", - parser.problem, parser.problem_mark.line, - parser.problem_mark.column); + parser.problem, parser.problem_mark.line+1, + parser.problem_mark.column+1); } break; @@ -148,14 +148,14 @@ main(int argc, char *argv[]) if (parser.context) { fprintf(stderr, "Parser error: %s at line %d, column %d\n" "%s at line %d, column %d\n", parser.context, - parser.context_mark.line, parser.context_mark.column, - parser.problem, parser.problem_mark.line, - parser.problem_mark.column); + parser.context_mark.line+1, parser.context_mark.column+1, + parser.problem, parser.problem_mark.line+1, + parser.problem_mark.column+1); } else { fprintf(stderr, "Parser error: %s at line %d, column %d\n", - parser.problem, parser.problem_mark.line, - parser.problem_mark.column); + parser.problem, parser.problem_mark.line+1, + parser.problem_mark.column+1); } break; From 2ef0340f88d9e32e564cc40a77346988b2127834 Mon Sep 17 00:00:00 2001 From: Kirill Simonov Date: Tue, 15 Aug 2006 17:21:01 +0000 Subject: [PATCH 30/73] Older versions of gcc do not know about -Wno-pointer-sign. --- tests/Makefile.am | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tests/Makefile.am b/tests/Makefile.am index e7880d65..bfc01d7d 100644 --- a/tests/Makefile.am +++ b/tests/Makefile.am @@ -1,5 +1,5 @@ AM_CPPFLAGS = -I$(top_srcdir)/include -AM_CFLAGS = -Wno-pointer-sign +#AM_CFLAGS = -Wno-pointer-sign LDADD = $(top_builddir)/src/libyaml.la TESTS = test-version test-reader check_PROGRAMS = test-version test-reader From a5dab4db21662e6be49cb91a55d2425071fa74fe Mon Sep 17 00:00:00 2001 From: Kirill Simonov Date: Mon, 11 Dec 2006 19:20:29 +0000 Subject: [PATCH 31/73] Force a new line at the end of the input stream even if there are no a new line character. This fixes a nasty bug when libyaml hangs on documents like `[[[[`. Thanks ciaranm for reporting the bug. --- src/scanner.c | 7 +++++++ 1 file changed, 7 insertions(+) diff --git a/src/scanner.c b/src/scanner.c index 1414401d..b190c7ee 100644 --- a/src/scanner.c +++ b/src/scanner.c @@ -1324,6 +1324,13 @@ yaml_parser_fetch_stream_end(yaml_parser_t *parser) { yaml_token_t token; + /* Force new line. */ + + if (parser->mark.column != 0) { + parser->mark.column = 0; + parser->mark.line ++; + } + /* Reset the indentation level. */ if (!yaml_parser_unroll_indent(parser, -1)) From 50a3f268659c612611ee848d24ed3e712ee63c42 Mon Sep 17 00:00:00 2001 From: Kirill Simonov Date: Mon, 11 Dec 2006 19:33:21 +0000 Subject: [PATCH 32/73] Add `const` qualifier for `yaml_parser_set_input_string` parameter `input`. --- include/yaml.h | 310 ++++++++++++++++++++++++++++++++++++++++++++- src/api.c | 183 +++++++++++++++++++++++++- src/yaml_private.h | 36 ++++++ 3 files changed, 522 insertions(+), 7 deletions(-) diff --git a/include/yaml.h b/include/yaml.h index 8aec0be6..afb62f07 100644 --- a/include/yaml.h +++ b/include/yaml.h @@ -406,9 +406,9 @@ typedef struct { } data; - /** The beginning of the token. */ + /** The beginning of the event. */ yaml_mark_t start_mark; - /** The end of the token. */ + /** The end of the event. */ yaml_mark_t end_mark; } yaml_event_t; @@ -586,6 +586,304 @@ yaml_mapping_end_event_initialize(yaml_event_t *event); YAML_DECLARE(void) yaml_event_delete(yaml_event_t *event); +/** + * @defgroup nodes Nodes + * @{ + */ + +#define YAML_NULL_TAG "tag:yaml.org,2002:null" +#define YAML_BOOL_TAG "tag:yaml.org,2002:bool" +#define YAML_STR_TAG "tag:yaml.org,2002:str" +#define YAML_INT_TAG "tag:yaml.org,2002:int" +#define YAML_FLOAT_TAG "tag:yaml.org,2002:float" +#define YAML_TIMESTAMP_TAG "tag:yaml.org,2002:timestamp" + +#define YAML_SEQ_TAG "tag:yaml.org,2002:seq" +#define YAML_MAP_TAG "tag:yaml.org,2002:map" + +#define YAML_DEFAULT_SCALAR_TAG YAML_STR_TAG +#define YAML_DEFAULT_SEQUENCE_TAG YAML_SEQ_TAG +#define YAML_DEFAULT_MAPPING_STYLE YAML_MAP_TAG + +/** Node types. */ +typedef enum { + YAML_NO_NODE, + + YAML_SCALAR_NODE, + YAML_SEQUENCE_NODE, + YAML_MAPPING_NODE +} yaml_node_type_t; + +#if 0 + +typedef struct _yaml_node_t yaml_node_item_t; + +typedef struct { + yaml_node_item_t key; + yaml_node_item_t value; +} yaml_node_pair_t; + +/** The node structure. */ +typedef struct _yaml_node_t { + + /** The node type. */ + yaml_node_type_t type; + + /* The reference counter. */ + int references; + + /** The node data. */ + union { + + /** The scalar parameters (for @c YAML_SCALAR_NODE). */ + struct { + /** The tag. */ + yaml_char_t *tag; + /** The scalar value. */ + yaml_char_t *value; + /** The length of the scalar value. */ + size_t length; + /** The scalar style. */ + yaml_scalar_style_t style; + } scalar; + + /** The sequence parameters (for @c YAML_SEQUENCE_NODE). */ + struct { + /** The tag. */ + yaml_char_t *tag; + /** The stack of sequence items. */ + struct { + /** The beginning of the stack. */ + struct yaml_node_item_t *start; + /** The end of the stack. */ + struct yaml_node_item_t *end; + /** The top of the stack. */ + struct yaml_node_item_t *top; + } items; + /** The sequence style. */ + yaml_sequence_style_t style; + } sequence; + + /** The mapping parameters (for @c YAML_MAPPING_NODE). */ + struct { + /** The tag. */ + yaml_char_t *tag; + /** The stack of mapping pairs. */ + struct { + /** The beginning of the stack. */ + struct yaml_node_pair_t *start; + /** The end of the stack. */ + struct yaml_node_pair_t *end; + /** The top of the stack. */ + struct yaml_node_pair_t *top; + } pairs; + /** The mapping style. */ + yaml_mapping_style_t style; + } mapping; + + } data; + + /** The beginning of the node. */ + yaml_mark_t start_mark; + /** The end of the node. */ + yaml_mark_t end_mark; + +} yaml_node_t; + +/** + * Create a SCALAR node. + * + * The @a style argument may be ignored by the emitter. + * + * @param[out] node An empty node object. + * @param[in] tag The scalar tag. + * @param[in] value The scalar value. + * @param[in] length The length of the scalar value. + * @param[in] style The scalar style. + * + * @returns @c 1 if the function succeeded, @c 0 on error. + */ + +YAML_DECLARE(int) +yaml_scalar_node_initialize(yaml_node_t *node, + yaml_char_t *tag, yaml_char_t *value, int length, + yaml_scalar_style_t style); + +/** + * Create a SEQUENCE node. + * + * The @a style argument may be ignored by the emitter. + * + * @param[out] node An empty node object. + * @param[in] tag The sequence tag. + * @param[in] style The sequence style. + * + * @returns @c 1 if the function succeeded, @c 0 on error. + */ + +YAML_DECLARE(int) +yaml_sequence_node_initialize(yaml_node_t *node, + yaml_char_t *tag, yaml_sequence_style_t style); + +/** + * Add an item to a SEQUENCE node + * + * @param[out] node A sequence node. + * @param[in] item An item node. +* + * @returns @c 1 if the function succeeded, @c 0 on error. + */ + +YAML_DECLARE(int) +yaml_sequence_node_add_item(yaml_node_t *node, yaml_node_t *item) + +/** + * Create a SCALAR node and add it to a SEQUENCE node. + * + * @param[out] node A sequence node. + * @param[in] tag The scalar tag. + * @param[in] value The scalar value. + * @param[in] length The length of the scalar value. + * @param[in] style The scalar style. + * + * @returns @c 1 if the function succeeded, @c 0 on error. + */ + +YAML_DECLARE(int) +yaml_sequence_node_add_scalar_item(yaml_node_t *node, + yaml_char_t *tag, yaml_char_t *value, int length, + yaml_scalar_style_t style); + +/** + * Get the number of subnodes of a SEQUENCE node. + * + * @param[in] node A sequence node. + * + * @returns the number of subnodes. + */ + +YAML_DECLARE(size_t) +yaml_sequence_node_get_length(yaml_node_t *node); + +/** + * Get a subnode of a SEQUENCE node. + * + * @param[in] node A sequence node. + * @param[in] index The index of a subnode. + * @param[out] item A subnode. + */ + +YAML_DECLARE(void) +yaml_sequence_node_get_item(yaml_node_t *node, size_t index, + yaml_node_t *item); + +/** + * Create a MAPPING node. + * + * The @a style argument may be ignored by the emitter. + * + * @param[out] node An empty node object. + * @param[in] tag The mapping tag. + * @param[in] style The mapping style. + * + * @returns @c 1 if the function succeeded, @c 0 on error. + */ + +YAML_DECLARE(int) +yaml_mapping_node_initialize(yaml_node_t *node, + yaml_char_t *tag, yaml_mapping_style_t style); + +/** + * Add a key/value pair of nodes to a MAPPING node. + * + * @param[out] node A mapping node. + * @param[in] key A key node. + * @param[in] value A value node. + * + * @returns @c 1 if the function succeeded, @c 0 on error. + */ + +YAML_DECLARE(int) +yaml_mapping_node_add_pair(yaml_node_t *node, + yaml_node_t *key, yaml_node_t *value) + +/** + * Create a scalar key and add the key/value pair to a MAPPING node. + * + * @param[out] node A mapping node. + * @param[in] key_tag The key node tag. + * @param[in] key_value The key node value. + * @param[in] key_length The length of the key node value. + * @param[in] key_style The key node style. + * @param[in] value A value node. + * + * @returns @c 1 if the function succeeded, @c 0 on error. + */ + +YAML_DECLARE(int) +yaml_sequence_node_add_scalar_key_pair(yaml_node_t *node, + yaml_char_t *key_tag, yaml_char_t *key_value, int key_length, + yaml_scalar_style_t key_style, + yaml_node_t *value); + +/** + * Create a scalar key/value nodes and add the pair to a MAPPING node. + * + * @param[out] node A mapping node. + * @param[in] key_tag The key node tag. + * @param[in] key_value The key node value. + * @param[in] key_length The length of the key node value. + * @param[in] key_style The key node style. + * @param[in] value_tag The value node tag. + * @param[in] value_value The value node value. + * @param[in] value_length The length of the value node value. + * @param[in] value_style The value node style. + * + * @returns @c 1 if the function succeeded, @c 0 on error. + */ + +YAML_DECLARE(int) +yaml_sequence_node_add_scalar_pair(yaml_node_t *node, + yaml_char_t *key_tag, yaml_char_t *key_value, int key_length, + yaml_scalar_style_t key_style, + yaml_char_t *value_tag, yaml_char_t *value_value, int value_length, + yaml_scalar_style_t value_style); + +/** + * Get the number of subnode pairs of a MAPPING node. + * + * @param[in] node A mapping node. + * + * @returns the number of pairs. + */ + +YAML_DECLARE(size_t) +yaml_mapping_node_get_length(yaml_node_t *node); + +/** + * Get a subnode of a SEQUENCE node. + * + * @param[in] node A sequence node. + * @param[in] index The index of a subnode. + * @param[out] key The key subnode. + * @param[out] value The value subnode. + */ + +YAML_DECLARE(void) +yaml_mapping_node_get_pair(yaml_node_t *node, size_t index, + yaml_node_t *key, yaml_node_t *value); + +/** + * Delete a node and its subnodes. + * + * @param[out] node A node object. + */ + +YAML_DECLARE(void) +yaml_node_delete(yaml_node_t *node); + +#endif + /** @} */ /** @@ -711,11 +1009,11 @@ typedef struct { /** String input data. */ struct { /** The string start pointer. */ - unsigned char *start; + const unsigned char *start; /** The string end pointer. */ - unsigned char *end; + const unsigned char *end; /** The string current position. */ - unsigned char *current; + const unsigned char *current; } string; /** File input data. */ @@ -908,7 +1206,7 @@ yaml_parser_delete(yaml_parser_t *parser); YAML_DECLARE(void) yaml_parser_set_input_string(yaml_parser_t *parser, - unsigned char *input, size_t size); + const unsigned char *input, size_t size); /** * Set a file input. diff --git a/src/api.c b/src/api.c index 83ca720c..a2d55435 100644 --- a/src/api.c +++ b/src/api.c @@ -279,7 +279,7 @@ yaml_file_read_handler(void *data, unsigned char *buffer, size_t size, YAML_DECLARE(void) yaml_parser_set_input_string(yaml_parser_t *parser, - unsigned char *input, size_t size) + const unsigned char *input, size_t size) { assert(parser); /* Non-NULL parser object expected. */ assert(!parser->read_handler); /* You can set the source only once. */ @@ -1019,3 +1019,184 @@ yaml_event_delete(yaml_event_t *event) memset(event, 0, sizeof(yaml_event_t)); } +#if 0 + +/* + * Create a SCALAR node. + */ + +YAML_DECLARE(int) +yaml_scalar_node_initialize(yaml_node_t *node, + yaml_char_t *tag, yaml_char_t *value, int length, + yaml_scalar_style_t style) +{ + yaml_mark_t mark = { 0, 0, 0 }; + yaml_char_t *tag_copy = NULL; + yaml_char_t *value_copy = NULL; + + assert(node); /* Non-NULL node object is expected. */ + assert(value); /* Non-NULL anchor is expected. */ + + if (!tag) { + tag = YAML_DEFAULT_SCALAR_TAG; + } + + if (!yaml_check_utf8(tag, strlen((char *)tag))) goto error; + tag_copy = yaml_strdup(tag); + if (!tag_copy) goto error; + + if (length < 0) { + length = strlen((char *)value); + } + + if (!yaml_check_utf8(value, length)) goto error; + value_copy = yaml_malloc(length+1); + if (!value_copy) goto error; + memcpy(value_copy, value, length); + value_copy[length] = '\0'; + + SCALAR_NODE_INIT(*node, tag_copy, value_copy, length, style, mark, mark); + + return 1; + +error: + yaml_free(tag_copy); + yaml_free(value_copy); + + return 0; +} + +/* + * Create a SEQUENCE node. + */ + +YAML_DECLARE(int) +yaml_sequence_node_initialize(yaml_node_t *node, + yaml_char_t *tag, yaml_sequence_style_t style) +{ + struct { + yaml_error_type_t error; + } context; + yaml_mark_t mark = { 0, 0, 0 }; + yaml_char_t *tag_copy = NULL; + struct { + yaml_node_item_t *start; + yaml_node_item_t *end; + yaml_node_item_t *top; + } items = { NULL, NULL, NULL }; + + assert(node); /* Non-NULL node object is expected. */ + + if (!tag) { + tag = YAML_DEFAULT_SEQUENCE_TAG; + } + + if (tag) { + if (!yaml_check_utf8(tag, strlen((char *)tag))) goto error; + tag_copy = yaml_strdup(tag); + if (!tag_copy) goto error; + } + + if (!STACK_INIT(context, items, INITIAL_STACK_SIZE)) goto error; + + SEQUENCE_NODE_INIT(*node, tag_copy, items.start, item.end, style, + mark, mark); + + return 1; + +error: + yaml_free(tag_copy); + STACK_DEL(context, items); + + return 0; +} + +/* + * Create a MAPPING node. + */ + +YAML_DECLARE(int) +yaml_mapping_node_initialize(yaml_node_t *node, + yaml_char_t *tag, yaml_mapping_style_t style) +{ + struct { + yaml_error_type_t error; + } context; + yaml_mark_t mark = { 0, 0, 0 }; + yaml_char_t *tag_copy = NULL; + struct { + yaml_node_pair_t *start; + yaml_node_pair_t *end; + yaml_node_pair_t *top; + } pairs = { NULL, NULL, NULL }; + + assert(node); /* Non-NULL node object is expected. */ + + if (!tag) { + tag = YAML_DEFAULT_MAPPING_TAG; + } + + if (tag) { + if (!yaml_check_utf8(tag, strlen((char *)tag))) goto error; + tag_copy = yaml_strdup(tag); + if (!tag_copy) goto error; + } + + if (!STACK_INIT(context, pairs, INITIAL_STACK_SIZE)) goto error; + + MAPPING_NODE_INIT(*node, tag_copy, pairs.start, pairs.end, style, + mark, mark); + + return 1; + +error: + yaml_free(tag_copy); + STACK_DEL(context, pairs); + + return 0; +} + +/* + * Delete a node and its subnodes. + */ + +YAML_DECLARE(void) +yaml_node_delete(yaml_node_t *node) +{ + struct { + yaml_error_type_t error; + } context; + struct { + yaml_node_item_t *start; + yaml_node_item_t *end; + yaml_node_item_t *head; + yaml_node_item_t *tail; + } queue = { NULL, NULL, NULL, NULL }; + + assert(node); /* Non-NULL node object is expected. */ + + if (node->type == YAML_SCALAR_NODE) { + yaml_free(node->data.scalar.tag); + yaml_free(node->data.scalar.value); + memset(node, 0, sizeof(yaml_node_t)); + return; + } + + if (!QUEUE_INIT(context, queue, INITIAL_QUEUE_SIZE)) goto error; + if (!ENQUEUE(context, queue, node)) goto error; + + while (!QUEUE_EMPTY(context, queue)) { + yaml_node_t node = DEQUEUE(context, queue); + if (node.type == YAML_SCALAR_NODE) { + if (!node->reference) + } + if (node->type == YAML_SEQUENCE_NODE) { + while (!STACK_EMPTY(context, node->data.sequence.items)) { + yaml_node_t *item = + } + } + } +} + +#endif + diff --git a/src/yaml_private.h b/src/yaml_private.h index 08a02323..33787357 100644 --- a/src/yaml_private.h +++ b/src/yaml_private.h @@ -579,3 +579,39 @@ yaml_queue_extend(void **start, void **head, void **tail, void **end); #define MAPPING_END_EVENT_INIT(event,start_mark,end_mark) \ (EVENT_INIT((event),YAML_MAPPING_END_EVENT,(start_mark),(end_mark))) +/* + * Node initializers. + */ + +#define NODE_INIT(node,node_type,node_start_mark,node_end_mark) \ + (memset(&(node), 0, sizeof(yaml_node_t)), \ + (node).type = (node_type), \ + (node).start_mark = (node_start_mark), \ + (node).end_mark = (node_end_mark)) + +#define SCALAR_NODE_INIT(node,node_tag,node_value,node_length, \ + node_style,start_mark,end_mark) \ + (EVENT_INIT((node),YAML_SCALAR_NODE,(start_mark),(end_mark)), \ + (node).data.scalar.tag = (node_tag), \ + (node).data.scalar.value = (node_value), \ + (node).data.scalar.length = (node_length), \ + (node).data.scalar.style = (node_style)) + +#define SEQUENCE_NODE_INIT(node,node_tag,node_items_start,node_items_end, \ + node_style,start_mark,end_mark) \ + (NODE_INIT((node),YAML_SEQUENCE_NODE,(start_mark),(end_mark)), \ + (node).data.sequence.tag = (node_tag), \ + (node).data.sequence.items.start = (node_items_start), \ + (node).data.sequence.items.end = (node_items_end), \ + (node).data.sequence.items.top = (node_items_start), \ + (node).data.sequence.style = (node_style)) + +#define MAPPING_NODE_INIT(node,node_tag,node_pairs_start,node_pairs_end, \ + node_style,start_mark,end_mark) \ + (NODE_INIT((node),YAML_MAPPING_NODE,(start_mark),(end_mark)), \ + (node).data.mapping.tag = (node_tag), \ + (node).data.mapping.pairs.start = (node_pairs_start), \ + (node).data.mapping.pairs.end = (node_pairs_end), \ + (node).data.mapping.pairs.top = (node_pairs_start), \ + (node).data.mapping.style = (node_style)) + From a0cac6b42421a63dc82d8afd359f1c1c249e79ef Mon Sep 17 00:00:00 2001 From: Kirill Simonov Date: Sun, 7 Jan 2007 20:11:16 +0000 Subject: [PATCH 33/73] Add functions for constructing, parsing and emitting YAML documents. --- configure.ac | 2 +- include/yaml.h | 557 +++++++++++++-------- src/Makefile.am | 2 +- src/api.c | 309 +++++++++--- src/dumper.c | 394 +++++++++++++++ src/emitter.c | 26 +- src/loader.c | 429 ++++++++++++++++ src/yaml_private.h | 30 +- tests/Makefile.am | 4 +- tests/example-deconstructor-alt.c | 800 ++++++++++++++++++++++++++++++ tests/example-deconstructor.c | 116 ++--- tests/example-reformatter-alt.c | 217 ++++++++ tests/run-dumper.c | 305 ++++++++++++ tests/run-emitter.c | 2 +- tests/run-loader.c | 59 +++ 15 files changed, 2892 insertions(+), 360 deletions(-) create mode 100644 src/dumper.c create mode 100644 src/loader.c create mode 100644 tests/example-deconstructor-alt.c create mode 100644 tests/example-reformatter-alt.c create mode 100644 tests/run-dumper.c create mode 100644 tests/run-loader.c diff --git a/configure.ac b/configure.ac index 95ec3832..2aba3996 100644 --- a/configure.ac +++ b/configure.ac @@ -18,7 +18,7 @@ m4_define([YAML_BUGS], [http://pyyaml.org/newticket?component=libyaml]) # else: # YAML_AGE = 0 m4_define([YAML_RELEASE], 0) -m4_define([YAML_CURRENT], 0) +m4_define([YAML_CURRENT], 1) m4_define([YAML_REVISION], 0) m4_define([YAML_AGE], 0) diff --git a/include/yaml.h b/include/yaml.h index afb62f07..515bf7c7 100644 --- a/include/yaml.h +++ b/include/yaml.h @@ -59,9 +59,9 @@ yaml_get_version_string(void); /** * Get the library version numbers. * - * @param[out] major Major version number. - * @param[out] minor Minor version number. - * @param[out] patch Patch version number. + * @param[out] major Major version number. + * @param[out] minor Minor version number. + * @param[out] patch Patch version number. */ YAML_DECLARE(void) @@ -78,7 +78,7 @@ yaml_get_version(int *major, int *minor, int *patch); typedef unsigned char yaml_char_t; /** The version directive data. */ -typedef struct { +typedef struct yaml_version_directive_s { /** The major version number. */ int major; /** The minor version number. */ @@ -86,7 +86,7 @@ typedef struct { } yaml_version_directive_t; /** The tag directive data. */ -typedef struct { +typedef struct yaml_tag_directive_s { /** The tag handle. */ yaml_char_t *handle; /** The tag prefix. */ @@ -94,7 +94,7 @@ typedef struct { } yaml_tag_directive_t; /** The stream encoding. */ -typedef enum { +typedef enum yaml_encoding_e { YAML_ANY_ENCODING, YAML_UTF8_ENCODING, YAML_UTF16LE_ENCODING, @@ -103,7 +103,7 @@ typedef enum { /** Line break types. */ -typedef enum { +typedef enum yaml_break_e { YAML_ANY_BREAK, YAML_CR_BREAK, YAML_LN_BREAK, @@ -111,7 +111,7 @@ typedef enum { } yaml_break_t; /** Many bad things could happen with the parser and emitter. */ -typedef enum { +typedef enum yaml_error_type_e { YAML_NO_ERROR, YAML_MEMORY_ERROR, @@ -119,13 +119,14 @@ typedef enum { YAML_READER_ERROR, YAML_SCANNER_ERROR, YAML_PARSER_ERROR, + YAML_COMPOSER_ERROR, YAML_WRITER_ERROR, YAML_EMITTER_ERROR } yaml_error_type_t; /** The pointer position. */ -typedef struct { +typedef struct yaml_mark_s { /** The position index. */ size_t index; @@ -144,7 +145,7 @@ typedef struct { */ /** Scalar styles. */ -typedef enum { +typedef enum yaml_scalar_style_e { YAML_ANY_SCALAR_STYLE, YAML_PLAIN_SCALAR_STYLE, @@ -157,7 +158,7 @@ typedef enum { } yaml_scalar_style_t; /** Sequence styles. */ -typedef enum { +typedef enum yaml_sequence_style_e { YAML_ANY_SEQUENCE_STYLE, YAML_BLOCK_SEQUENCE_STYLE, @@ -165,7 +166,7 @@ typedef enum { } yaml_sequence_style_t; /** Mapping styles. */ -typedef enum { +typedef enum yaml_mapping_style_e { YAML_ANY_MAPPING_STYLE, YAML_BLOCK_MAPPING_STYLE, @@ -181,7 +182,7 @@ typedef enum { */ /** Token types. */ -typedef enum { +typedef enum yaml_token_type_e { YAML_NO_TOKEN, YAML_STREAM_START_TOKEN, @@ -213,7 +214,7 @@ typedef enum { } yaml_token_type_t; /** The token structure. */ -typedef struct { +typedef struct yaml_token_s { /** The token type. */ yaml_token_type_t type; @@ -299,7 +300,7 @@ yaml_token_delete(yaml_token_t *token); */ /** Event types. */ -typedef enum { +typedef enum yaml_event_type_e { YAML_NO_EVENT, YAML_STREAM_START_EVENT, @@ -319,7 +320,7 @@ typedef enum { } yaml_event_type_t; /** The event structure. */ -typedef struct { +typedef struct yaml_event_s { /** The event type. */ yaml_event_type_t type; @@ -416,8 +417,8 @@ typedef struct { /** * Create the STREAM-START event. * - * @param[out] event An empty event object. - * @param[in] encoding The stream encoding. + * @param[out] event An empty event object. + * @param[in] encoding The stream encoding. * * @returns @c 1 if the function succeeded, @c 0 on error. */ @@ -429,7 +430,7 @@ yaml_stream_start_event_initialize(yaml_event_t *event, /** * Create the STREAM-END event. * - * @param[out] event An empty event object. + * @param[out] event An empty event object. * * @returns @c 1 if the function succeeded, @c 0 on error. */ @@ -443,11 +444,15 @@ yaml_stream_end_event_initialize(yaml_event_t *event); * The @a implicit argument is considered as a stylistic parameter and may be * ignored by the emitter. * - * @param[out] event An empty event object. - * @param[in] version_directive The %YAML directive value or @c NULL. - * @param[in] tag_directives_start The beginning of the %TAG directives list. - * @param[in] tag_directives_end The end of the %TAG directives list. - * @param[in] implicit If the document start indicator is implicit. + * @param[out] event An empty event object. + * @param[in] version_directive The %YAML directive value or + * @c NULL. + * @param[in] tag_directives_start The beginning of the %TAG + * directives list. + * @param[in] tag_directives_end The end of the %TAG directives + * list. + * @param[in] implicit If the document start indicator is + * implicit. * * @returns @c 1 if the function succeeded, @c 0 on error. */ @@ -465,8 +470,8 @@ yaml_document_start_event_initialize(yaml_event_t *event, * The @a implicit argument is considered as a stylistic parameter and may be * ignored by the emitter. * - * @param[out] event An empty event object. - * @param[in] implicit If the document end indicator is implicit. + * @param[out] event An empty event object. + * @param[in] implicit If the document end indicator is implicit. * * @returns @c 1 if the function succeeded, @c 0 on error. */ @@ -477,8 +482,8 @@ yaml_document_end_event_initialize(yaml_event_t *event, int implicit); /** * Create an ALIAS event. * - * @param[out] event An empty event object. - * @param[in] anchor The anchor value. + * @param[out] event An empty event object. + * @param[in] anchor The anchor value. * * @returns @c 1 if the function succeeded, @c 0 on error. */ @@ -494,14 +499,16 @@ yaml_alias_event_initialize(yaml_event_t *event, yaml_char_t *anchor); * Either the @a tag attribute or one of the @a plain_implicit and * @a quoted_implicit flags must be set. * - * @param[out] event An empty event object. - * @param[in] anchor The scalar anchor or @c NULL. - * @param[in] tag The scalar tag or @c NULL. - * @param[in] value The scalar value. - * @param[in] length The length of the scalar value. - * @param[in] plain_implicit If the tag may be omitted for the plain style. - * @param[in] quoted_implicit If the tag may be omitted for any non-plain style. - * @param[in] style The scalar style. + * @param[out] event An empty event object. + * @param[in] anchor The scalar anchor or @c NULL. + * @param[in] tag The scalar tag or @c NULL. + * @param[in] value The scalar value. + * @param[in] length The length of the scalar value. + * @param[in] plain_implicit If the tag may be omitted for the plain + * style. + * @param[in] quoted_implicit If the tag may be omitted for any + * non-plain style. + * @param[in] style The scalar style. * * @returns @c 1 if the function succeeded, @c 0 on error. */ @@ -520,11 +527,11 @@ yaml_scalar_event_initialize(yaml_event_t *event, * * Either the @a tag attribute or the @a implicit flag must be set. * - * @param[out] event An empty event object. - * @param[in] anchor The sequence anchor or @c NULL. - * @param[in] tag The sequence tag or @c NULL. - * @param[in] implicit If the tag may be omitted. - * @param[in] style The sequence style. + * @param[out] event An empty event object. + * @param[in] anchor The sequence anchor or @c NULL. + * @param[in] tag The sequence tag or @c NULL. + * @param[in] implicit If the tag may be omitted. + * @param[in] style The sequence style. * * @returns @c 1 if the function succeeded, @c 0 on error. */ @@ -537,7 +544,7 @@ yaml_sequence_start_event_initialize(yaml_event_t *event, /** * Create a SEQUENCE-END event. * - * @param[out] event An empty event object. + * @param[out] event An empty event object. * * @returns @c 1 if the function succeeded, @c 0 on error. */ @@ -552,11 +559,11 @@ yaml_sequence_end_event_initialize(yaml_event_t *event); * * Either the @a tag attribute or the @a implicit flag must be set. * - * @param[out] event An empty event object. - * @param[in] anchor The mapping anchor or @c NULL. - * @param[in] tag The mapping tag or @c NULL. - * @param[in] implicit If the tag may be omitted. - * @param[in] style The mapping style. + * @param[out] event An empty event object. + * @param[in] anchor The mapping anchor or @c NULL. + * @param[in] tag The mapping tag or @c NULL. + * @param[in] implicit If the tag may be omitted. + * @param[in] style The mapping style. * * @returns @c 1 if the function succeeded, @c 0 on error. */ @@ -569,7 +576,7 @@ yaml_mapping_start_event_initialize(yaml_event_t *event, /** * Create a MAPPING-END event. * - * @param[out] event An empty event object. + * @param[out] event An empty event object. * * @returns @c 1 if the function succeeded, @c 0 on error. */ @@ -580,7 +587,7 @@ yaml_mapping_end_event_initialize(yaml_event_t *event); /** * Free any memory allocated for an event object. * - * @param[out] event An event object. + * @param[in,out] event An event object. */ YAML_DECLARE(void) @@ -603,10 +610,10 @@ yaml_event_delete(yaml_event_t *event); #define YAML_DEFAULT_SCALAR_TAG YAML_STR_TAG #define YAML_DEFAULT_SEQUENCE_TAG YAML_SEQ_TAG -#define YAML_DEFAULT_MAPPING_STYLE YAML_MAP_TAG +#define YAML_DEFAULT_MAPPING_TAG YAML_MAP_TAG /** Node types. */ -typedef enum { +typedef enum yaml_node_type_e { YAML_NO_NODE, YAML_SCALAR_NODE, @@ -614,31 +621,34 @@ typedef enum { YAML_MAPPING_NODE } yaml_node_type_t; -#if 0 +/** The forward definition of a document node structure. */ +typedef struct yaml_node_s yaml_node_t; -typedef struct _yaml_node_t yaml_node_item_t; +/** An element of a sequence node. */ +typedef int yaml_node_item_t; -typedef struct { - yaml_node_item_t key; - yaml_node_item_t value; +/** An element of a mapping node. */ +typedef struct yaml_node_pair_s { + /** The key of the element. */ + int key; + /** The value of the element. */ + int value; } yaml_node_pair_t; /** The node structure. */ -typedef struct _yaml_node_t { +struct yaml_node_s { /** The node type. */ yaml_node_type_t type; - /* The reference counter. */ - int references; + /** The node tag. */ + yaml_char_t *tag; /** The node data. */ union { /** The scalar parameters (for @c YAML_SCALAR_NODE). */ struct { - /** The tag. */ - yaml_char_t *tag; /** The scalar value. */ yaml_char_t *value; /** The length of the scalar value. */ @@ -649,16 +659,14 @@ typedef struct _yaml_node_t { /** The sequence parameters (for @c YAML_SEQUENCE_NODE). */ struct { - /** The tag. */ - yaml_char_t *tag; /** The stack of sequence items. */ struct { /** The beginning of the stack. */ - struct yaml_node_item_t *start; + yaml_node_item_t *start; /** The end of the stack. */ - struct yaml_node_item_t *end; + yaml_node_item_t *end; /** The top of the stack. */ - struct yaml_node_item_t *top; + yaml_node_item_t *top; } items; /** The sequence style. */ yaml_sequence_style_t style; @@ -666,16 +674,14 @@ typedef struct _yaml_node_t { /** The mapping parameters (for @c YAML_MAPPING_NODE). */ struct { - /** The tag. */ - yaml_char_t *tag; - /** The stack of mapping pairs. */ + /** The stack of mapping pairs (key, value). */ struct { /** The beginning of the stack. */ - struct yaml_node_pair_t *start; + yaml_node_pair_t *start; /** The end of the stack. */ - struct yaml_node_pair_t *end; + yaml_node_pair_t *end; /** The top of the stack. */ - struct yaml_node_pair_t *top; + yaml_node_pair_t *top; } pairs; /** The mapping style. */ yaml_mapping_style_t style; @@ -688,201 +694,191 @@ typedef struct _yaml_node_t { /** The end of the node. */ yaml_mark_t end_mark; -} yaml_node_t; +}; -/** - * Create a SCALAR node. - * - * The @a style argument may be ignored by the emitter. - * - * @param[out] node An empty node object. - * @param[in] tag The scalar tag. - * @param[in] value The scalar value. - * @param[in] length The length of the scalar value. - * @param[in] style The scalar style. - * - * @returns @c 1 if the function succeeded, @c 0 on error. - */ +/** The document structure. */ +typedef struct yaml_document_s { -YAML_DECLARE(int) -yaml_scalar_node_initialize(yaml_node_t *node, - yaml_char_t *tag, yaml_char_t *value, int length, - yaml_scalar_style_t style); + /** The document nodes. */ + struct { + /** The beginning of the stack. */ + yaml_node_t *start; + /** The end of the stack. */ + yaml_node_t *end; + /** The top of the stack. */ + yaml_node_t *top; + } nodes; + + /** The version directive. */ + yaml_version_directive_t *version_directive; + + /** The list of tag directives. */ + struct { + /** The beginning of the tag directives list. */ + yaml_tag_directive_t *start; + /** The end of the tag directives list. */ + yaml_tag_directive_t *end; + } tag_directives; + + /** Is the document start indicator implicit? */ + int start_implicit; + /** Is the document end indicator implicit? */ + int end_implicit; + + /** The beginning of the document. */ + yaml_mark_t start_mark; + /** The end of the document. */ + yaml_mark_t end_mark; + +} yaml_document_t; /** - * Create a SEQUENCE node. - * - * The @a style argument may be ignored by the emitter. + * Create a YAML document. * - * @param[out] node An empty node object. - * @param[in] tag The sequence tag. - * @param[in] style The sequence style. + * @param[out] document An empty document object. + * @param[in] version_directive The %YAML directive value or + * @c NULL. + * @param[in] tag_directives_start The beginning of the %TAG + * directives list. + * @param[in] tag_directives_end The end of the %TAG directives + * list. + * @param[in] start_implicit If the document start indicator is + * implicit. + * @param[in] end_implicit If the document end indicator is + * implicit. * * @returns @c 1 if the function succeeded, @c 0 on error. */ YAML_DECLARE(int) -yaml_sequence_node_initialize(yaml_node_t *node, - yaml_char_t *tag, yaml_sequence_style_t style); +yaml_document_initialize(yaml_document_t *document, + yaml_version_directive_t *version_directive, + yaml_tag_directive_t *tag_directives_start, + yaml_tag_directive_t *tag_directives_end, + int start_implicit, int end_implicit); /** - * Add an item to a SEQUENCE node + * Delete a YAML document and all its nodes. * - * @param[out] node A sequence node. - * @param[in] item An item node. -* - * @returns @c 1 if the function succeeded, @c 0 on error. + * @param[in,out] document A document object. */ -YAML_DECLARE(int) -yaml_sequence_node_add_item(yaml_node_t *node, yaml_node_t *item) +YAML_DECLARE(void) +yaml_document_delete(yaml_document_t *document); /** - * Create a SCALAR node and add it to a SEQUENCE node. + * Get a node of a YAML document. * - * @param[out] node A sequence node. - * @param[in] tag The scalar tag. - * @param[in] value The scalar value. - * @param[in] length The length of the scalar value. - * @param[in] style The scalar style. + * The pointer returned by this function is valid until any of the functions + * modifying the documents are called. * - * @returns @c 1 if the function succeeded, @c 0 on error. + * @param[in] document A document object. + * @param[in] node The node id. + * + * @returns the node objct or @c NULL if @c node_id is out of range. */ -YAML_DECLARE(int) -yaml_sequence_node_add_scalar_item(yaml_node_t *node, - yaml_char_t *tag, yaml_char_t *value, int length, - yaml_scalar_style_t style); +YAML_DECLARE(yaml_node_t *) +yaml_document_get_node(yaml_document_t *document, int node_id); /** - * Get the number of subnodes of a SEQUENCE node. + * Get the root of a YAML document node. * - * @param[in] node A sequence node. + * The root object is the first object added to the document. * - * @returns the number of subnodes. - */ - -YAML_DECLARE(size_t) -yaml_sequence_node_get_length(yaml_node_t *node); - -/** - * Get a subnode of a SEQUENCE node. + * The pointer returned by this function is valid until any of the functions + * modifying the documents are called. + * + * An empty document produced by the parser signifies the end of a YAML + * stream. + * + * @param[in] document A document object. * - * @param[in] node A sequence node. - * @param[in] index The index of a subnode. - * @param[out] item A subnode. + * @returns the node object or @c NULL if the document is empty. */ -YAML_DECLARE(void) -yaml_sequence_node_get_item(yaml_node_t *node, size_t index, - yaml_node_t *item); +YAML_DECLARE(yaml_node_t *) +yaml_document_get_root_node(yaml_document_t *document); /** - * Create a MAPPING node. + * Create a SCALAR node and attach it to the document. * * The @a style argument may be ignored by the emitter. * - * @param[out] node An empty node object. - * @param[in] tag The mapping tag. - * @param[in] style The mapping style. + * @param[in,out] document A document object. + * @param[in] tag The scalar tag. + * @param[in] value The scalar value. + * @param[in] length The length of the scalar value. + * @param[in] style The scalar style. * - * @returns @c 1 if the function succeeded, @c 0 on error. + * @returns the node id or @c 0 on error. */ YAML_DECLARE(int) -yaml_mapping_node_initialize(yaml_node_t *node, - yaml_char_t *tag, yaml_mapping_style_t style); +yaml_document_add_scalar(yaml_document_t *document, + yaml_char_t *tag, yaml_char_t *value, int length, + yaml_scalar_style_t style); /** - * Add a key/value pair of nodes to a MAPPING node. + * Create a SEQUENCE node and attach it to the document. * - * @param[out] node A mapping node. - * @param[in] key A key node. - * @param[in] value A value node. + * The @a style argument may be ignored by the emitter. * - * @returns @c 1 if the function succeeded, @c 0 on error. + * @param[in,out] document A document object. + * @param[in] tag The sequence tag. + * @param[in] style The sequence style. + * + * @returns the node id or @c 0 on error. */ YAML_DECLARE(int) -yaml_mapping_node_add_pair(yaml_node_t *node, - yaml_node_t *key, yaml_node_t *value) +yaml_document_add_sequence(yaml_document_t *document, + yaml_char_t *tag, yaml_sequence_style_t style); /** - * Create a scalar key and add the key/value pair to a MAPPING node. + * Create a MAPPING node and attach it to the document. * - * @param[out] node A mapping node. - * @param[in] key_tag The key node tag. - * @param[in] key_value The key node value. - * @param[in] key_length The length of the key node value. - * @param[in] key_style The key node style. - * @param[in] value A value node. + * The @a style argument may be ignored by the emitter. * - * @returns @c 1 if the function succeeded, @c 0 on error. + * @param[in,out] document A document object. + * @param[in] tag The sequence tag. + * @param[in] style The sequence style. + * + * @returns the node id or @c 0 on error. */ YAML_DECLARE(int) -yaml_sequence_node_add_scalar_key_pair(yaml_node_t *node, - yaml_char_t *key_tag, yaml_char_t *key_value, int key_length, - yaml_scalar_style_t key_style, - yaml_node_t *value); +yaml_document_add_mapping(yaml_document_t *document, + yaml_char_t *tag, yaml_mapping_style_t style); /** - * Create a scalar key/value nodes and add the pair to a MAPPING node. - * - * @param[out] node A mapping node. - * @param[in] key_tag The key node tag. - * @param[in] key_value The key node value. - * @param[in] key_length The length of the key node value. - * @param[in] key_style The key node style. - * @param[in] value_tag The value node tag. - * @param[in] value_value The value node value. - * @param[in] value_length The length of the value node value. - * @param[in] value_style The value node style. + * Add an item to a SEQUENCE node. * + * @param[in,out] document A document object. + * @param[in] sequence The sequence node id. + * @param[in] item The item node id. +* * @returns @c 1 if the function succeeded, @c 0 on error. */ YAML_DECLARE(int) -yaml_sequence_node_add_scalar_pair(yaml_node_t *node, - yaml_char_t *key_tag, yaml_char_t *key_value, int key_length, - yaml_scalar_style_t key_style, - yaml_char_t *value_tag, yaml_char_t *value_value, int value_length, - yaml_scalar_style_t value_style); - -/** - * Get the number of subnode pairs of a MAPPING node. - * - * @param[in] node A mapping node. - * - * @returns the number of pairs. - */ - -YAML_DECLARE(size_t) -yaml_mapping_node_get_length(yaml_node_t *node); +yaml_document_append_sequence_item(yaml_document_t *document, + int sequence, int item); /** - * Get a subnode of a SEQUENCE node. + * Add a pair of a key and a value to a MAPPING node. * - * @param[in] node A sequence node. - * @param[in] index The index of a subnode. - * @param[out] key The key subnode. - * @param[out] value The value subnode. - */ - -YAML_DECLARE(void) -yaml_mapping_node_get_pair(yaml_node_t *node, size_t index, - yaml_node_t *key, yaml_node_t *value); - -/** - * Delete a node and its subnodes. - * - * @param[out] node A node object. + * @param[in,out] document A document object. + * @param[in] mapping The mapping node id. + * @param[in] key The key node id. + * @param[in] value The value node id. +* + * @returns @c 1 if the function succeeded, @c 0 on error. */ -YAML_DECLARE(void) -yaml_node_delete(yaml_node_t *node); - -#endif +YAML_DECLARE(int) +yaml_document_append_mapping_pair(yaml_document_t *document, + int mapping, int key, int value); /** @} */ @@ -916,7 +912,7 @@ typedef int yaml_read_handler_t(void *data, unsigned char *buffer, size_t size, * This structure holds information about a potential simple key. */ -typedef struct { +typedef struct yaml_simple_key_s { /** Is a simple key possible? */ int possible; @@ -933,7 +929,7 @@ typedef struct { /** * The states of the parser. */ -typedef enum { +typedef enum yaml_parser_state_e { YAML_PARSE_STREAM_START_STATE, YAML_PARSE_IMPLICIT_DOCUMENT_START_STATE, YAML_PARSE_DOCUMENT_START_STATE, @@ -960,6 +956,19 @@ typedef enum { YAML_PARSE_END_STATE } yaml_parser_state_t; +/** + * This structure holds aliases data. + */ + +typedef struct yaml_alias_data_s { + /** The anchor. */ + yaml_char_t *anchor; + /** The node id. */ + int index; + /** The anchor mark. */ + yaml_mark_t mark; +} yaml_alias_data_t; + /** * The parser structure. * @@ -967,7 +976,7 @@ typedef enum { * family of functions. */ -typedef struct { +typedef struct yaml_parser_s { /** * @name Error handling @@ -1167,6 +1176,28 @@ typedef struct { * @} */ + /** + * @name Dumper stuff + * @{ + */ + + /** The alias data. */ + struct { + /** The beginning of the list. */ + yaml_alias_data_t *start; + /** The end of the list. */ + yaml_alias_data_t *end; + /** The top of the list. */ + yaml_alias_data_t *top; + } aliases; + + /** The currently parsed document. */ + yaml_document_t *document; + + /** + * @} + */ + } yaml_parser_t; /** @@ -1175,7 +1206,7 @@ typedef struct { * This function creates a new parser object. An application is responsible * for destroying the object using the yaml_parser_delete() function. * - * @param[out] parser An empty parser object. + * @param[out] parser An empty parser object. * * @returns @c 1 if the function succeeded, @c 0 on error. */ @@ -1256,7 +1287,8 @@ yaml_parser_set_encoding(yaml_parser_t *parser, yaml_encoding_t encoding); * produced token object using the @c yaml_token_delete function. * * An application must not alternate the calls of yaml_parser_scan() with the - * calls of yaml_parser_parse(). Doing this will break the parser. + * calls of yaml_parser_parse() or yaml_parser_load(). Doing this will break + * the parser. * * @param[in,out] parser A parser object. * @param[out] token An empty token object. @@ -1278,8 +1310,9 @@ yaml_parser_scan(yaml_parser_t *parser, yaml_token_t *token); * An application is responsible for freeing any buffers associated with the * produced event object using the yaml_event_delete() function. * - * An application must not alternate the calls of yaml_parser_scan() with the - * calls of yaml_parser_parse(). Doing this will break the parser. + * An application must not alternate the calls of yaml_parser_parse() with the + * calls of yaml_parser_scan() or yaml_parser_load(). Doing this will break the + * parser. * * @param[in,out] parser A parser object. * @param[out] event An empty event object. @@ -1290,6 +1323,31 @@ yaml_parser_scan(yaml_parser_t *parser, yaml_token_t *token); YAML_DECLARE(int) yaml_parser_parse(yaml_parser_t *parser, yaml_event_t *event); +/** + * Parse the input stream and produce the next YAML document. + * + * Call this function subsequently to produce a sequence of documents + * constituting the input stream. + * + * If the produced document has no root node, it means that the document + * end has been reached. + * + * An application is responsible for freeing any data associated with the + * produced document object using the yaml_document_delete() function. + * + * An application must not alternate the calls of yaml_parser_load() with the + * calls of yaml_parser_scan() or yaml_parser_parse(). Doing this will break + * the parser. + * + * @param[in,out] parser A parser object. + * @param[out] document An empty document object. + * + * @return @c 1 if the function succeeded, @c 0 on error. + */ + +YAML_DECLARE(int) +yaml_parser_load(yaml_parser_t *parser, yaml_document_t *document); + /** @} */ /** @@ -1316,7 +1374,7 @@ yaml_parser_parse(yaml_parser_t *parser, yaml_event_t *event); typedef int yaml_write_handler_t(void *data, unsigned char *buffer, size_t size); /** The emitter states. */ -typedef enum { +typedef enum yaml_emitter_state_e { YAML_EMIT_STREAM_START_STATE, YAML_EMIT_FIRST_DOCUMENT_START_STATE, YAML_EMIT_DOCUMENT_START_STATE, @@ -1344,7 +1402,7 @@ typedef enum { * family of functions. */ -typedef struct { +typedef struct yaml_emitter_s { /** * @name Error handling @@ -1549,6 +1607,36 @@ typedef struct { * @} */ + /** + * @name Dumper stuff + * @{ + */ + + /** If the stream was already opened? */ + int opened; + /** If the stream was already closed? */ + int closed; + + /** The information associated with the document nodes. */ + struct { + /** The number of references. */ + int references; + /** The anchor id. */ + int anchor; + /** If the node has been emitted? */ + int serialized; + } *anchors; + + /** The last assigned anchor id. */ + int last_anchor_id; + + /** The currently emitted document. */ + yaml_document_t *document; + + /** + * @} + */ + } yaml_emitter_t; /** @@ -1697,6 +1785,49 @@ yaml_emitter_set_break(yaml_emitter_t *emitter, yaml_break_t line_break); YAML_DECLARE(int) yaml_emitter_emit(yaml_emitter_t *emitter, yaml_event_t *event); +/* + * Start a YAML stream. + * + * This function should be used before yaml_emitter_dump() is called. + * + * @param[in,out] emitter An emitter object. + * + * @returns @c 1 if the function succeeded, @c 0 on error. + */ + +YAML_DECLARE(int) +yaml_emitter_open(yaml_emitter_t *emitter); + +/* + * Finish a YAML stream. + * + * This function should be used after yaml_emitter_dump() is called. + * + * @param[in,out] emitter An emitter object. + * + * @returns @c 1 if the function succeeded, @c 0 on error. + */ + +YAML_DECLARE(int) +yaml_emitter_close(yaml_emitter_t *emitter); + +/* + * Emit a YAML document. + * + * The documen object may be generated using the yaml_parser_load() function + * or the yaml_document_initialize() function. The emitter takes the + * responsibility for the document object and destoys its content after + * it is emitted. The document object is destroyedeven if the function fails. + * + * @param[in,out] emitter An emitter object. + * @param[in,out] document A document object. + * + * @returns @c 1 if the function succeeded, @c 0 on error. + */ + +YAML_DECLARE(int) +yaml_emitter_dump(yaml_emitter_t *emitter, yaml_document_t *document); + /** * Flush the accumulated characters to the output. * diff --git a/src/Makefile.am b/src/Makefile.am index f9cb7a28..724a1b2d 100644 --- a/src/Makefile.am +++ b/src/Makefile.am @@ -1,4 +1,4 @@ AM_CPPFLAGS = -I$(top_srcdir)/include lib_LTLIBRARIES = libyaml.la -libyaml_la_SOURCES = yaml_private.h api.c reader.c scanner.c parser.c writer.c emitter.c +libyaml_la_SOURCES = yaml_private.h api.c reader.c scanner.c parser.c loader.c writer.c emitter.c dumper.c libyaml_la_LDFLAGS = -release $(YAML_LT_RELEASE) -version-info $(YAML_LT_CURRENT):$(YAML_LT_REVISION):$(YAML_LT_AGE) diff --git a/src/api.c b/src/api.c index a2d55435..a1fdf6bf 100644 --- a/src/api.c +++ b/src/api.c @@ -399,6 +399,7 @@ yaml_emitter_delete(yaml_emitter_t *emitter) yaml_free(tag_directive.prefix); } STACK_DEL(emitter, emitter->tag_directives); + yaml_free(emitter->anchors); memset(emitter, 0, sizeof(yaml_emitter_t)); } @@ -1019,23 +1020,187 @@ yaml_event_delete(yaml_event_t *event) memset(event, 0, sizeof(yaml_event_t)); } -#if 0 +/* + * Create a document object. + */ + +YAML_DECLARE(int) +yaml_document_initialize(yaml_document_t *document, + yaml_version_directive_t *version_directive, + yaml_tag_directive_t *tag_directives_start, + yaml_tag_directive_t *tag_directives_end, + int start_implicit, int end_implicit) +{ + struct { + yaml_error_type_t error; + } context; + struct { + yaml_node_t *start; + yaml_node_t *end; + yaml_node_t *top; + } nodes = { NULL, NULL, NULL }; + yaml_version_directive_t *version_directive_copy = NULL; + struct { + yaml_tag_directive_t *start; + yaml_tag_directive_t *end; + yaml_tag_directive_t *top; + } tag_directives_copy = { NULL, NULL, NULL }; + yaml_tag_directive_t value = { NULL, NULL }; + yaml_mark_t mark = { 0, 0, 0 }; + + assert(document); /* Non-NULL document object is expected. */ + assert((tag_directives_start && tag_directives_end) || + (tag_directives_start == tag_directives_end)); + /* Valid tag directives are expected. */ + + if (!STACK_INIT(&context, nodes, INITIAL_STACK_SIZE)) goto error; + + if (version_directive) { + version_directive_copy = yaml_malloc(sizeof(yaml_version_directive_t)); + if (!version_directive_copy) goto error; + version_directive_copy->major = version_directive->major; + version_directive_copy->minor = version_directive->minor; + } + + if (tag_directives_start != tag_directives_end) { + yaml_tag_directive_t *tag_directive; + if (!STACK_INIT(&context, tag_directives_copy, INITIAL_STACK_SIZE)) + goto error; + for (tag_directive = tag_directives_start; + tag_directive != tag_directives_end; tag_directive ++) { + assert(tag_directive->handle); + assert(tag_directive->prefix); + if (!yaml_check_utf8(tag_directive->handle, + strlen((char *)tag_directive->handle))) + goto error; + if (!yaml_check_utf8(tag_directive->prefix, + strlen((char *)tag_directive->prefix))) + goto error; + value.handle = yaml_strdup(tag_directive->handle); + value.prefix = yaml_strdup(tag_directive->prefix); + if (!value.handle || !value.prefix) goto error; + if (!PUSH(&context, tag_directives_copy, value)) + goto error; + value.handle = NULL; + value.prefix = NULL; + } + } + + DOCUMENT_INIT(*document, nodes.start, nodes.end, version_directive_copy, + tag_directives_copy.start, tag_directives_copy.top, + start_implicit, end_implicit, mark, mark); + + return 1; + +error: + STACK_DEL(&context, nodes); + yaml_free(version_directive_copy); + while (!STACK_EMPTY(&context, tag_directives_copy)) { + yaml_tag_directive_t value = POP(&context, tag_directives_copy); + yaml_free(value.handle); + yaml_free(value.prefix); + } + STACK_DEL(&context, tag_directives_copy); + yaml_free(value.handle); + yaml_free(value.prefix); + + return 0; +} + +/* + * Destroy a document object. + */ + +YAML_DECLARE(void) +yaml_document_delete(yaml_document_t *document) +{ + struct { + yaml_error_type_t error; + } context; + yaml_tag_directive_t *tag_directive; + + assert(document); /* Non-NULL document object is expected. */ + + while (!STACK_EMPTY(&context, document->nodes)) { + yaml_node_t node = POP(&context, document->nodes); + yaml_free(node.tag); + switch (node.type) { + case YAML_SCALAR_NODE: + yaml_free(node.data.scalar.value); + break; + case YAML_SEQUENCE_NODE: + STACK_DEL(&context, node.data.sequence.items); + break; + case YAML_MAPPING_NODE: + STACK_DEL(&context, node.data.mapping.pairs); + break; + default: + assert(0); /* Should not happen. */ + } + } + STACK_DEL(&context, document->nodes); + + yaml_free(document->version_directive); + for (tag_directive = document->tag_directives.start; + tag_directive != document->tag_directives.end; + tag_directive++) { + yaml_free(tag_directive->handle); + yaml_free(tag_directive->prefix); + } + yaml_free(document->tag_directives.start); + + memset(document, 0, sizeof(yaml_document_t)); +} + +/** + * Get a document node. + */ + +YAML_DECLARE(yaml_node_t *) +yaml_document_get_node(yaml_document_t *document, int node) +{ + assert(document); /* Non-NULL document object is expected. */ + + if (node > 0 && document->nodes.start + node <= document->nodes.top) { + return document->nodes.start + node - 1; + } + return NULL; +} + +/** + * Get the root object. + */ + +YAML_DECLARE(yaml_node_t *) +yaml_document_get_root_node(yaml_document_t *document) +{ + assert(document); /* Non-NULL document object is expected. */ + + if (document->nodes.top != document->nodes.start) { + return document->nodes.start; + } + return NULL; +} /* - * Create a SCALAR node. + * Add a scalar node to a document. */ YAML_DECLARE(int) -yaml_scalar_node_initialize(yaml_node_t *node, +yaml_document_add_scalar(yaml_document_t *document, yaml_char_t *tag, yaml_char_t *value, int length, yaml_scalar_style_t style) { + struct { + yaml_error_type_t error; + } context; yaml_mark_t mark = { 0, 0, 0 }; yaml_char_t *tag_copy = NULL; yaml_char_t *value_copy = NULL; + yaml_node_t node; - assert(node); /* Non-NULL node object is expected. */ - assert(value); /* Non-NULL anchor is expected. */ + assert(document); /* Non-NULL document object is expected. */ + assert(value); /* Non-NULL value is expected. */ if (!tag) { tag = YAML_DEFAULT_SCALAR_TAG; @@ -1055,9 +1220,10 @@ yaml_scalar_node_initialize(yaml_node_t *node, memcpy(value_copy, value, length); value_copy[length] = '\0'; - SCALAR_NODE_INIT(*node, tag_copy, value_copy, length, style, mark, mark); + SCALAR_NODE_INIT(node, tag_copy, value_copy, length, style, mark, mark); + if (!PUSH(&context, document->nodes, node)) goto error; - return 1; + return document->nodes.top - document->nodes.start; error: yaml_free(tag_copy); @@ -1067,11 +1233,11 @@ yaml_scalar_node_initialize(yaml_node_t *node, } /* - * Create a SEQUENCE node. + * Add a sequence node to a document. */ YAML_DECLARE(int) -yaml_sequence_node_initialize(yaml_node_t *node, +yaml_document_add_sequence(yaml_document_t *document, yaml_char_t *tag, yaml_sequence_style_t style) { struct { @@ -1084,39 +1250,39 @@ yaml_sequence_node_initialize(yaml_node_t *node, yaml_node_item_t *end; yaml_node_item_t *top; } items = { NULL, NULL, NULL }; + yaml_node_t node; - assert(node); /* Non-NULL node object is expected. */ + assert(document); /* Non-NULL document object is expected. */ if (!tag) { tag = YAML_DEFAULT_SEQUENCE_TAG; } - if (tag) { - if (!yaml_check_utf8(tag, strlen((char *)tag))) goto error; - tag_copy = yaml_strdup(tag); - if (!tag_copy) goto error; - } + if (!yaml_check_utf8(tag, strlen((char *)tag))) goto error; + tag_copy = yaml_strdup(tag); + if (!tag_copy) goto error; - if (!STACK_INIT(context, items, INITIAL_STACK_SIZE)) goto error; + if (!STACK_INIT(&context, items, INITIAL_STACK_SIZE)) goto error; - SEQUENCE_NODE_INIT(*node, tag_copy, items.start, item.end, style, - mark, mark); + SEQUENCE_NODE_INIT(node, tag_copy, items.start, items.end, + style, mark, mark); + if (!PUSH(&context, document->nodes, node)) goto error; - return 1; + return document->nodes.top - document->nodes.start; error: + STACK_DEL(&context, items); yaml_free(tag_copy); - STACK_DEL(context, items); return 0; } /* - * Create a MAPPING node. + * Add a mapping node to a document. */ YAML_DECLARE(int) -yaml_mapping_node_initialize(yaml_node_t *node, +yaml_document_add_mapping(yaml_document_t *document, yaml_char_t *tag, yaml_mapping_style_t style) { struct { @@ -1129,74 +1295,89 @@ yaml_mapping_node_initialize(yaml_node_t *node, yaml_node_pair_t *end; yaml_node_pair_t *top; } pairs = { NULL, NULL, NULL }; + yaml_node_t node; - assert(node); /* Non-NULL node object is expected. */ + assert(document); /* Non-NULL document object is expected. */ if (!tag) { tag = YAML_DEFAULT_MAPPING_TAG; } - if (tag) { - if (!yaml_check_utf8(tag, strlen((char *)tag))) goto error; - tag_copy = yaml_strdup(tag); - if (!tag_copy) goto error; - } + if (!yaml_check_utf8(tag, strlen((char *)tag))) goto error; + tag_copy = yaml_strdup(tag); + if (!tag_copy) goto error; - if (!STACK_INIT(context, pairs, INITIAL_STACK_SIZE)) goto error; + if (!STACK_INIT(&context, pairs, INITIAL_STACK_SIZE)) goto error; - MAPPING_NODE_INIT(*node, tag_copy, pairs.start, pairs.end, style, - mark, mark); + MAPPING_NODE_INIT(node, tag_copy, pairs.start, pairs.end, + style, mark, mark); + if (!PUSH(&context, document->nodes, node)) goto error; - return 1; + return document->nodes.top - document->nodes.start; error: + STACK_DEL(&context, pairs); yaml_free(tag_copy); - STACK_DEL(context, pairs); return 0; } /* - * Delete a node and its subnodes. + * Append an item to a sequence node. */ -YAML_DECLARE(void) -yaml_node_delete(yaml_node_t *node) +YAML_DECLARE(int) +yaml_document_append_sequence_item(yaml_document_t *document, + int sequence, int item) { struct { yaml_error_type_t error; } context; - struct { - yaml_node_item_t *start; - yaml_node_item_t *end; - yaml_node_item_t *head; - yaml_node_item_t *tail; - } queue = { NULL, NULL, NULL, NULL }; - assert(node); /* Non-NULL node object is expected. */ + assert(document); /* Non-NULL document is required. */ + assert(sequence > 0 + && document->nodes.start + sequence <= document->nodes.top); + /* Valid sequence id is required. */ + assert(document->nodes.start[sequence-1].type == YAML_SEQUENCE_NODE); + /* A sequence node is required. */ + assert(item > 0 && document->nodes.start + item <= document->nodes.top); + /* Valid item id is required. */ + + if (!PUSH(&context, + document->nodes.start[sequence-1].data.sequence.items, item)) + return 0; - if (node->type == YAML_SCALAR_NODE) { - yaml_free(node->data.scalar.tag); - yaml_free(node->data.scalar.value); - memset(node, 0, sizeof(yaml_node_t)); - return; - } + return 1; +} - if (!QUEUE_INIT(context, queue, INITIAL_QUEUE_SIZE)) goto error; - if (!ENQUEUE(context, queue, node)) goto error; +/* + * Append a pair of a key and a value to a mapping node. + */ - while (!QUEUE_EMPTY(context, queue)) { - yaml_node_t node = DEQUEUE(context, queue); - if (node.type == YAML_SCALAR_NODE) { - if (!node->reference) - } - if (node->type == YAML_SEQUENCE_NODE) { - while (!STACK_EMPTY(context, node->data.sequence.items)) { - yaml_node_t *item = - } - } - } -} +YAML_DECLARE(int) +yaml_document_append_mapping_pair(yaml_document_t *document, + int mapping, int key, int value) +{ + struct { + yaml_error_type_t error; + } context; + yaml_node_pair_t pair = { key, value }; + + assert(document); /* Non-NULL document is required. */ + assert(mapping > 0 + && document->nodes.start + mapping <= document->nodes.top); + /* Valid mapping id is required. */ + assert(document->nodes.start[mapping-1].type == YAML_MAPPING_NODE); + /* A mapping node is required. */ + assert(key > 0 && document->nodes.start + key <= document->nodes.top); + /* Valid key id is required. */ + assert(value > 0 && document->nodes.start + value <= document->nodes.top); + /* Valid value id is required. */ + + if (!PUSH(&context, + document->nodes.start[mapping-1].data.mapping.pairs, pair)) + return 0; -#endif + return 1; +} diff --git a/src/dumper.c b/src/dumper.c new file mode 100644 index 00000000..203c6a70 --- /dev/null +++ b/src/dumper.c @@ -0,0 +1,394 @@ + +#include "yaml_private.h" + +/* + * API functions. + */ + +YAML_DECLARE(int) +yaml_emitter_open(yaml_emitter_t *emitter); + +YAML_DECLARE(int) +yaml_emitter_close(yaml_emitter_t *emitter); + +YAML_DECLARE(int) +yaml_emitter_dump(yaml_emitter_t *emitter, yaml_document_t *document); + +/* + * Clean up functions. + */ + +static void +yaml_emitter_delete_document_and_anchors(yaml_emitter_t *emitter); + +/* + * Anchor functions. + */ + +static void +yaml_emitter_anchor_node(yaml_emitter_t *emitter, int index); + +static yaml_char_t * +yaml_emitter_generate_anchor(yaml_emitter_t *emitter, int anchor_id); + + +/* + * Serialize functions. + */ + +static int +yaml_emitter_dump_node(yaml_emitter_t *emitter, int index); + +static int +yaml_emitter_dump_alias(yaml_emitter_t *emitter, yaml_char_t *anchor); + +static int +yaml_emitter_dump_scalar(yaml_emitter_t *emitter, yaml_node_t *node, + yaml_char_t *anchor); + +static int +yaml_emitter_dump_sequence(yaml_emitter_t *emitter, yaml_node_t *node, + yaml_char_t *anchor); + +static int +yaml_emitter_dump_mapping(yaml_emitter_t *emitter, yaml_node_t *node, + yaml_char_t *anchor); + +/* + * Issue a STREAM-START event. + */ + +YAML_DECLARE(int) +yaml_emitter_open(yaml_emitter_t *emitter) +{ + yaml_event_t event; + yaml_mark_t mark = { 0, 0, 0 }; + + assert(emitter); /* Non-NULL emitter object is required. */ + assert(!emitter->opened); /* Emitter should not be opened yet. */ + + STREAM_START_EVENT_INIT(event, YAML_ANY_ENCODING, mark, mark); + + if (!yaml_emitter_emit(emitter, &event)) { + return 0; + } + + emitter->opened = 1; + + return 1; +} + +/* + * Issue a STREAM-END event. + */ + +YAML_DECLARE(int) +yaml_emitter_close(yaml_emitter_t *emitter) +{ + yaml_event_t event; + yaml_mark_t mark = { 0, 0, 0 }; + + assert(emitter); /* Non-NULL emitter object is required. */ + assert(emitter->opened); /* Emitter should be opened. */ + + if (emitter->closed) return 1; + + STREAM_END_EVENT_INIT(event, mark, mark); + + if (!yaml_emitter_emit(emitter, &event)) { + return 0; + } + + emitter->closed = 1; + + return 1; +} + +/* + * Dump a YAML document. + */ + +YAML_DECLARE(int) +yaml_emitter_dump(yaml_emitter_t *emitter, yaml_document_t *document) +{ + yaml_event_t event; + yaml_mark_t mark = { 0, 0, 0 }; + + assert(emitter); /* Non-NULL emitter object is required. */ + assert(document); /* Non-NULL emitter object is expected. */ + + emitter->document = document; + + if (!emitter->opened) { + if (!yaml_emitter_open(emitter)) goto error; + } + + if (STACK_EMPTY(emitter, document->nodes)) { + if (!yaml_emitter_close(emitter)) goto error; + yaml_emitter_delete_document_and_anchors(emitter); + return 1; + } + + assert(emitter->opened); /* Emitter should be opened. */ + + emitter->anchors = yaml_malloc(sizeof(*(emitter->anchors)) + * (document->nodes.top - document->nodes.start)); + if (!emitter->anchors) goto error; + memset(emitter->anchors, 0, sizeof(*(emitter->anchors)) + * (document->nodes.top - document->nodes.start)); + + DOCUMENT_START_EVENT_INIT(event, document->version_directive, + document->tag_directives.start, document->tag_directives.end, + document->start_implicit, mark, mark); + if (!yaml_emitter_emit(emitter, &event)) goto error; + + yaml_emitter_anchor_node(emitter, 1); + if (!yaml_emitter_dump_node(emitter, 1)) goto error; + + DOCUMENT_END_EVENT_INIT(event, document->end_implicit, mark, mark); + if (!yaml_emitter_emit(emitter, &event)) goto error; + + yaml_emitter_delete_document_and_anchors(emitter); + + return 1; + +error: + + yaml_emitter_delete_document_and_anchors(emitter); + + return 0; +} + +/* + * Clean up the emitter object after a document is dumped. + */ + +static void +yaml_emitter_delete_document_and_anchors(yaml_emitter_t *emitter) +{ + int index; + + if (!emitter->anchors) { + yaml_document_delete(emitter->document); + emitter->document = NULL; + return; + } + + for (index = 0; emitter->document->nodes.start + index + < emitter->document->nodes.top; index ++) { + yaml_node_t node = emitter->document->nodes.start[index]; + if (!emitter->anchors[index].serialized) { + yaml_free(node.tag); + if (node.type == YAML_SCALAR_NODE) { + yaml_free(node.data.scalar.value); + } + } + if (node.type == YAML_SEQUENCE_NODE) { + STACK_DEL(emitter, node.data.sequence.items); + } + if (node.type == YAML_MAPPING_NODE) { + STACK_DEL(emitter, node.data.mapping.pairs); + } + } + + STACK_DEL(emitter, emitter->document->nodes); + yaml_free(emitter->anchors); + + emitter->anchors = NULL; + emitter->last_anchor_id = 0; + emitter->document = NULL; +} + +/* + * Check the references of a node and assign the anchor id if needed. + */ + +static void +yaml_emitter_anchor_node(yaml_emitter_t *emitter, int index) +{ + yaml_node_t *node = emitter->document->nodes.start + index - 1; + yaml_node_item_t *item; + yaml_node_pair_t *pair; + + emitter->anchors[index-1].references ++; + + if (emitter->anchors[index-1].references == 1) { + switch (node->type) { + case YAML_SEQUENCE_NODE: + for (item = node->data.sequence.items.start; + item < node->data.sequence.items.top; item ++) { + yaml_emitter_anchor_node(emitter, *item); + } + break; + case YAML_MAPPING_NODE: + for (pair = node->data.mapping.pairs.start; + pair < node->data.mapping.pairs.top; pair ++) { + yaml_emitter_anchor_node(emitter, pair->key); + yaml_emitter_anchor_node(emitter, pair->value); + } + break; + default: + break; + } + } + + else if (emitter->anchors[index-1].references == 2) { + emitter->anchors[index-1].anchor = (++ emitter->last_anchor_id); + } +} + +/* + * Generate a textual representation for an anchor. + */ + +#define ANCHOR_TEMPLATE "id%03d" +#define ANCHOR_TEMPLATE_LENGTH 16 + +static yaml_char_t * +yaml_emitter_generate_anchor(yaml_emitter_t *emitter, int anchor_id) +{ + yaml_char_t *anchor = yaml_malloc(ANCHOR_TEMPLATE_LENGTH); + + if (!anchor) return NULL; + + sprintf((char *)anchor, ANCHOR_TEMPLATE, anchor_id); + + return anchor; +} + +/* + * Serialize a node. + */ + +static int +yaml_emitter_dump_node(yaml_emitter_t *emitter, int index) +{ + yaml_node_t *node = emitter->document->nodes.start + index - 1; + int anchor_id = emitter->anchors[index-1].anchor; + yaml_char_t *anchor = NULL; + + if (anchor_id) { + anchor = yaml_emitter_generate_anchor(emitter, anchor_id); + if (!anchor) return 0; + } + + if (emitter->anchors[index-1].serialized) { + return yaml_emitter_dump_alias(emitter, anchor); + } + + emitter->anchors[index-1].serialized = 1; + + switch (node->type) { + case YAML_SCALAR_NODE: + return yaml_emitter_dump_scalar(emitter, node, anchor); + case YAML_SEQUENCE_NODE: + return yaml_emitter_dump_sequence(emitter, node, anchor); + case YAML_MAPPING_NODE: + return yaml_emitter_dump_mapping(emitter, node, anchor); + default: + assert(0); /* Could not happen. */ + break; + } + + return 0; /* Could not happen. */ +} + +/* + * Serialize an alias. + */ + +static int +yaml_emitter_dump_alias(yaml_emitter_t *emitter, yaml_char_t *anchor) +{ + yaml_event_t event; + yaml_mark_t mark = { 0, 0, 0 }; + + ALIAS_EVENT_INIT(event, anchor, mark, mark); + + return yaml_emitter_emit(emitter, &event); +} + +/* + * Serialize a scalar. + */ + +static int +yaml_emitter_dump_scalar(yaml_emitter_t *emitter, yaml_node_t *node, + yaml_char_t *anchor) +{ + yaml_event_t event; + yaml_mark_t mark = { 0, 0, 0 }; + + int plain_implicit = (strcmp((char *)node->tag, + YAML_DEFAULT_SCALAR_TAG) == 0); + int quoted_implicit = (strcmp((char *)node->tag, + YAML_DEFAULT_SCALAR_TAG) == 0); + + SCALAR_EVENT_INIT(event, anchor, node->tag, node->data.scalar.value, + node->data.scalar.length, plain_implicit, quoted_implicit, + node->data.scalar.style, mark, mark); + + return yaml_emitter_emit(emitter, &event); +} + +/* + * Serialize a sequence. + */ + +static int +yaml_emitter_dump_sequence(yaml_emitter_t *emitter, yaml_node_t *node, + yaml_char_t *anchor) +{ + yaml_event_t event; + yaml_mark_t mark = { 0, 0, 0 }; + + int implicit = (strcmp((char *)node->tag, YAML_DEFAULT_SEQUENCE_TAG) == 0); + + yaml_node_item_t *item; + + SEQUENCE_START_EVENT_INIT(event, anchor, node->tag, implicit, + node->data.sequence.style, mark, mark); + if (!yaml_emitter_emit(emitter, &event)) return 0; + + for (item = node->data.sequence.items.start; + item < node->data.sequence.items.top; item ++) { + if (!yaml_emitter_dump_node(emitter, *item)) return 0; + } + + SEQUENCE_END_EVENT_INIT(event, mark, mark); + if (!yaml_emitter_emit(emitter, &event)) return 0; + + return 1; +} + +/* + * Serialize a mapping. + */ + +static int +yaml_emitter_dump_mapping(yaml_emitter_t *emitter, yaml_node_t *node, + yaml_char_t *anchor) +{ + yaml_event_t event; + yaml_mark_t mark = { 0, 0, 0 }; + + int implicit = (strcmp((char *)node->tag, YAML_DEFAULT_MAPPING_TAG) == 0); + + yaml_node_pair_t *pair; + + MAPPING_START_EVENT_INIT(event, anchor, node->tag, implicit, + node->data.mapping.style, mark, mark); + if (!yaml_emitter_emit(emitter, &event)) return 0; + + for (pair = node->data.mapping.pairs.start; + pair < node->data.mapping.pairs.top; pair ++) { + if (!yaml_emitter_dump_node(emitter, pair->key)) return 0; + if (!yaml_emitter_dump_node(emitter, pair->value)) return 0; + } + + MAPPING_END_EVENT_INIT(event, mark, mark); + if (!yaml_emitter_emit(emitter, &event)) return 0; + + return 1; +} + diff --git a/src/emitter.c b/src/emitter.c index 28eadcc6..0affaab0 100644 --- a/src/emitter.c +++ b/src/emitter.c @@ -1160,6 +1160,13 @@ static int yaml_emitter_select_scalar_style(yaml_emitter_t *emitter, yaml_event_t *event) { yaml_scalar_style_t style = event->data.scalar.style; + int no_tag = (!emitter->tag_data.handle && !emitter->tag_data.suffix); + + if (no_tag && !event->data.scalar.plain_implicit + && !event->data.scalar.quoted_implicit) { + return yaml_emitter_set_emitter_error(emitter, + "neither tag nor implicit flags are specified"); + } if (style == YAML_ANY_SCALAR_STYLE) style = YAML_PLAIN_SCALAR_STYLE; @@ -1178,8 +1185,7 @@ yaml_emitter_select_scalar_style(yaml_emitter_t *emitter, yaml_event_t *event) if (!emitter->scalar_data.length && (emitter->flow_level || emitter->simple_key_context)) style = YAML_SINGLE_QUOTED_SCALAR_STYLE; - if (!event->data.scalar.plain_implicit - && !emitter->tag_data.handle && !emitter->tag_data.suffix) + if (no_tag && !event->data.scalar.plain_implicit) style = YAML_SINGLE_QUOTED_SCALAR_STYLE; } @@ -1196,19 +1202,11 @@ yaml_emitter_select_scalar_style(yaml_emitter_t *emitter, yaml_event_t *event) style = YAML_DOUBLE_QUOTED_SCALAR_STYLE; } - if (!emitter->tag_data.handle && !emitter->tag_data.suffix) + if (no_tag && !event->data.scalar.quoted_implicit + && style != YAML_PLAIN_SCALAR_STYLE) { - if (!event->data.scalar.plain_implicit - && !event->data.scalar.quoted_implicit) { - return yaml_emitter_set_emitter_error(emitter, - "neither tag nor implicit flags are specified"); - } - - if (event->data.scalar.plain_implicit - && style != YAML_PLAIN_SCALAR_STYLE) { - emitter->tag_data.handle = (yaml_char_t *)"!"; - emitter->tag_data.handle_length = 1; - } + emitter->tag_data.handle = (yaml_char_t *)"!"; + emitter->tag_data.handle_length = 1; } emitter->scalar_data.style = style; diff --git a/src/loader.c b/src/loader.c new file mode 100644 index 00000000..7ee0dbdf --- /dev/null +++ b/src/loader.c @@ -0,0 +1,429 @@ + +#include "yaml_private.h" + +/* + * API functions. + */ + +YAML_DECLARE(int) +yaml_parser_load(yaml_parser_t *parser, yaml_document_t *document); + +/* + * Error handling. + */ + +static int +yaml_parser_set_parser_error(yaml_parser_t *parser, + const char *problem, yaml_mark_t problem_mark); + +static int +yaml_parser_set_parser_error_context(yaml_parser_t *parser, + const char *context, yaml_mark_t context_mark, + const char *problem, yaml_mark_t problem_mark); + + +/* + * Alias handling. + */ + +static int +yaml_parser_register_anchor(yaml_parser_t *parser, + int index, yaml_char_t *anchor); + +/* + * Clean up functions. + */ + +static void +yaml_parser_delete_aliases(yaml_parser_t *parser); + +/* + * Composer functions. + */ + +static int +yaml_parser_load_document(yaml_parser_t *parser, yaml_event_t *first_event); + +static int +yaml_parser_load_node(yaml_parser_t *parser, yaml_event_t *first_event); + +static int +yaml_parser_load_alias(yaml_parser_t *parser, yaml_event_t *first_event); + +static int +yaml_parser_load_scalar(yaml_parser_t *parser, yaml_event_t *first_event); + +static int +yaml_parser_load_sequence(yaml_parser_t *parser, yaml_event_t *first_event); + +static int +yaml_parser_load_mapping(yaml_parser_t *parser, yaml_event_t *first_event); + +/* + * Load the next document of the stream. + */ + +YAML_DECLARE(int) +yaml_parser_load(yaml_parser_t *parser, yaml_document_t *document) +{ + yaml_event_t event; + + assert(parser); /* Non-NULL parser object is expected. */ + assert(document); /* Non-NULL document object is expected. */ + + memset(document, 0, sizeof(yaml_document_t)); + if (!STACK_INIT(parser, document->nodes, INITIAL_STACK_SIZE)) + goto error; + + if (!parser->stream_start_produced) { + if (!yaml_parser_parse(parser, &event)) goto error; + assert(event.type == YAML_STREAM_START_EVENT); + /* STREAM-START is expected. */ + } + + if (parser->stream_end_produced) { + return 1; + } + + if (!yaml_parser_parse(parser, &event)) goto error; + if (event.type == YAML_STREAM_END_EVENT) { + return 1; + } + + if (!STACK_INIT(parser, parser->aliases, INITIAL_STACK_SIZE)) + goto error; + + parser->document = document; + + if (!yaml_parser_load_document(parser, &event)) goto error; + + yaml_parser_delete_aliases(parser); + parser->document = NULL; + + return 1; + +error: + + yaml_parser_delete_aliases(parser); + yaml_document_delete(document); + parser->document = NULL; + + return 0; +} + +/* + * Set composer error. + */ + +static int +yaml_parser_set_composer_error(yaml_parser_t *parser, + const char *problem, yaml_mark_t problem_mark) +{ + parser->error = YAML_COMPOSER_ERROR; + parser->problem = problem; + parser->problem_mark = problem_mark; + + return 0; +} + +/* + * Set composer error with context. + */ + +static int +yaml_parser_set_composer_error_context(yaml_parser_t *parser, + const char *context, yaml_mark_t context_mark, + const char *problem, yaml_mark_t problem_mark) +{ + parser->error = YAML_COMPOSER_ERROR; + parser->context = context; + parser->context_mark = context_mark; + parser->problem = problem; + parser->problem_mark = problem_mark; + + return 0; +} + +/* + * Delete the stack of aliases. + */ + +static void +yaml_parser_delete_aliases(yaml_parser_t *parser) +{ + while (!STACK_EMPTY(parser, parser->aliases)) { + yaml_free(POP(parser, parser->aliases).anchor); + } + STACK_DEL(parser, parser->aliases); +} + +/* + * Compose a document object. + */ + +static int +yaml_parser_load_document(yaml_parser_t *parser, yaml_event_t *first_event) +{ + yaml_event_t event; + + assert(first_event->type == YAML_DOCUMENT_START_EVENT); + /* DOCUMENT-START is expected. */ + + parser->document->version_directive + = first_event->data.document_start.version_directive; + parser->document->tag_directives.start + = first_event->data.document_start.tag_directives.start; + parser->document->tag_directives.end + = first_event->data.document_start.tag_directives.end; + parser->document->start_implicit + = first_event->data.document_start.implicit; + parser->document->start_mark = first_event->start_mark; + + if (!yaml_parser_parse(parser, &event)) return 0; + + if (!yaml_parser_load_node(parser, &event)) return 0; + + if (!yaml_parser_parse(parser, &event)) return 0; + assert(event.type == YAML_DOCUMENT_END_EVENT); + /* DOCUMENT-END is expected. */ + + parser->document->end_implicit = event.data.document_end.implicit; + parser->document->end_mark = event.end_mark; + + return 1; +} + +/* + * Compose a node. + */ + +static int +yaml_parser_load_node(yaml_parser_t *parser, yaml_event_t *first_event) +{ + switch (first_event->type) { + case YAML_ALIAS_EVENT: + return yaml_parser_load_alias(parser, first_event); + case YAML_SCALAR_EVENT: + return yaml_parser_load_scalar(parser, first_event); + case YAML_SEQUENCE_START_EVENT: + return yaml_parser_load_sequence(parser, first_event); + case YAML_MAPPING_START_EVENT: + return yaml_parser_load_mapping(parser, first_event); + default: + assert(0); /* Could not happen. */ + return 0; + } + + return 0; +} + +/* + * Add an anchor. + */ + +static int +yaml_parser_register_anchor(yaml_parser_t *parser, + int index, yaml_char_t *anchor) +{ + yaml_alias_data_t data = { anchor, index, + parser->document->nodes.start[index-1].start_mark }; + yaml_alias_data_t *alias_data; + + if (!anchor) return 1; + + for (alias_data = parser->aliases.start; + alias_data != parser->aliases.top; alias_data ++) { + if (strcmp((char *)alias_data->anchor, (char *)anchor) == 0) { + yaml_free(anchor); + return yaml_parser_set_composer_error_context(parser, + "found duplicate anchor; first occurence", + alias_data->mark, "second occurence", data.mark); + } + } + + if (!PUSH(parser, parser->aliases, data)) { + yaml_free(anchor); + return 0; + } + + return 1; +} + +/* + * Compose a node corresponding to an alias. + */ + +static int +yaml_parser_load_alias(yaml_parser_t *parser, yaml_event_t *first_event) +{ + yaml_char_t *anchor = first_event->data.alias.anchor; + yaml_alias_data_t *alias_data; + + for (alias_data = parser->aliases.start; + alias_data != parser->aliases.top; alias_data ++) { + if (strcmp((char *)alias_data->anchor, (char *)anchor) == 0) { + yaml_free(anchor); + return alias_data->index; + } + } + + yaml_free(anchor); + return yaml_parser_set_composer_error(parser, "found undefined alias", + first_event->start_mark); +} + +/* + * Compose a scalar node. + */ + +static int +yaml_parser_load_scalar(yaml_parser_t *parser, yaml_event_t *first_event) +{ + yaml_node_t node; + int index; + yaml_char_t *tag = first_event->data.scalar.tag; + + if (!tag || strcmp((char *)tag, "!") == 0) { + yaml_free(tag); + tag = yaml_strdup(YAML_DEFAULT_SCALAR_TAG); + if (!tag) goto error; + } + + SCALAR_NODE_INIT(node, tag, first_event->data.scalar.value, + first_event->data.scalar.length, first_event->data.scalar.style, + first_event->start_mark, first_event->end_mark); + + if (!PUSH(parser, parser->document->nodes, node)) goto error; + + index = parser->document->nodes.top - parser->document->nodes.start; + + if (!yaml_parser_register_anchor(parser, index, + first_event->data.scalar.anchor)) return 0; + + return index; + +error: + yaml_free(tag); + yaml_free(first_event->data.scalar.anchor); + yaml_free(first_event->data.scalar.value); + return 0; +} + +/* + * Compose a sequence node. + */ + +static int +yaml_parser_load_sequence(yaml_parser_t *parser, yaml_event_t *first_event) +{ + yaml_event_t event; + yaml_node_t node; + struct { + yaml_node_item_t *start; + yaml_node_item_t *end; + yaml_node_item_t *top; + } items = { NULL, NULL, NULL }; + int index, item_index; + yaml_char_t *tag = first_event->data.sequence_start.tag; + + if (!tag || strcmp((char *)tag, "!") == 0) { + yaml_free(tag); + tag = yaml_strdup(YAML_DEFAULT_SEQUENCE_TAG); + if (!tag) goto error; + } + + if (!STACK_INIT(parser, items, INITIAL_STACK_SIZE)) goto error; + + SEQUENCE_NODE_INIT(node, tag, items.start, items.end, + first_event->data.sequence_start.style, + first_event->start_mark, first_event->end_mark); + + if (!PUSH(parser, parser->document->nodes, node)) goto error; + + index = parser->document->nodes.top - parser->document->nodes.start; + + if (!yaml_parser_register_anchor(parser, index, + first_event->data.sequence_start.anchor)) return 0; + + if (!yaml_parser_parse(parser, &event)) return 0; + + while (event.type != YAML_SEQUENCE_END_EVENT) { + item_index = yaml_parser_load_node(parser, &event); + if (!item_index) return 0; + if (!PUSH(parser, + parser->document->nodes.start[index-1].data.sequence.items, + item_index)) return 0; + if (!yaml_parser_parse(parser, &event)) return 0; + } + + parser->document->nodes.start[index-1].end_mark = event.end_mark; + + return index; + +error: + yaml_free(tag); + yaml_free(first_event->data.sequence_start.anchor); + return 0; +} + +/* + * Compose a mapping node. + */ + +static int +yaml_parser_load_mapping(yaml_parser_t *parser, yaml_event_t *first_event) +{ + yaml_event_t event; + yaml_node_t node; + struct { + yaml_node_pair_t *start; + yaml_node_pair_t *end; + yaml_node_pair_t *top; + } pairs = { NULL, NULL, NULL }; + int index; + yaml_node_pair_t pair; + yaml_char_t *tag = first_event->data.mapping_start.tag; + + if (!tag || strcmp((char *)tag, "!") == 0) { + yaml_free(tag); + tag = yaml_strdup(YAML_DEFAULT_MAPPING_TAG); + if (!tag) goto error; + } + + if (!STACK_INIT(parser, pairs, INITIAL_STACK_SIZE)) goto error; + + MAPPING_NODE_INIT(node, tag, pairs.start, pairs.end, + first_event->data.mapping_start.style, + first_event->start_mark, first_event->end_mark); + + if (!PUSH(parser, parser->document->nodes, node)) goto error; + + index = parser->document->nodes.top - parser->document->nodes.start; + + if (!yaml_parser_register_anchor(parser, index, + first_event->data.mapping_start.anchor)) return 0; + + if (!yaml_parser_parse(parser, &event)) return 0; + + while (event.type != YAML_MAPPING_END_EVENT) { + pair.key = yaml_parser_load_node(parser, &event); + if (!pair.key) return 0; + if (!yaml_parser_parse(parser, &event)) return 0; + pair.value = yaml_parser_load_node(parser, &event); + if (!pair.value) return 0; + if (!PUSH(parser, + parser->document->nodes.start[index-1].data.mapping.pairs, + pair)) return 0; + if (!yaml_parser_parse(parser, &event)) return 0; + } + + parser->document->nodes.start[index-1].end_mark = event.end_mark; + + return index; + +error: + yaml_free(tag); + yaml_free(first_event->data.mapping_start.anchor); + return 0; +} + diff --git a/src/yaml_private.h b/src/yaml_private.h index 33787357..10c4219e 100644 --- a/src/yaml_private.h +++ b/src/yaml_private.h @@ -579,28 +579,45 @@ yaml_queue_extend(void **start, void **head, void **tail, void **end); #define MAPPING_END_EVENT_INIT(event,start_mark,end_mark) \ (EVENT_INIT((event),YAML_MAPPING_END_EVENT,(start_mark),(end_mark))) +/* + * Document initializer. + */ + +#define DOCUMENT_INIT(document,document_nodes_start,document_nodes_end, \ + document_version_directive,document_tag_directives_start, \ + document_tag_directives_end,document_start_implicit, \ + document_end_implicit,start_mark,end_mark) \ + (memset(&(document), 0, sizeof(yaml_document_t)), \ + (document).nodes.start = (document_nodes_start), \ + (document).nodes.end = (document_nodes_end), \ + (document).nodes.top = (document_nodes_start), \ + (document).version_directive = (document_version_directive), \ + (document).tag_directives.start = (document_tag_directives_start), \ + (document).tag_directives.end = (document_tag_directives_end), \ + (document).start_implicit = (document_start_implicit), \ + (document).end_implicit = (document_end_implicit)) + /* * Node initializers. */ -#define NODE_INIT(node,node_type,node_start_mark,node_end_mark) \ +#define NODE_INIT(node,node_type,node_tag,node_start_mark,node_end_mark) \ (memset(&(node), 0, sizeof(yaml_node_t)), \ (node).type = (node_type), \ + (node).tag = (node_tag), \ (node).start_mark = (node_start_mark), \ (node).end_mark = (node_end_mark)) #define SCALAR_NODE_INIT(node,node_tag,node_value,node_length, \ node_style,start_mark,end_mark) \ - (EVENT_INIT((node),YAML_SCALAR_NODE,(start_mark),(end_mark)), \ - (node).data.scalar.tag = (node_tag), \ + (NODE_INIT((node),YAML_SCALAR_NODE,(node_tag),(start_mark),(end_mark)), \ (node).data.scalar.value = (node_value), \ (node).data.scalar.length = (node_length), \ (node).data.scalar.style = (node_style)) #define SEQUENCE_NODE_INIT(node,node_tag,node_items_start,node_items_end, \ node_style,start_mark,end_mark) \ - (NODE_INIT((node),YAML_SEQUENCE_NODE,(start_mark),(end_mark)), \ - (node).data.sequence.tag = (node_tag), \ + (NODE_INIT((node),YAML_SEQUENCE_NODE,(node_tag),(start_mark),(end_mark)), \ (node).data.sequence.items.start = (node_items_start), \ (node).data.sequence.items.end = (node_items_end), \ (node).data.sequence.items.top = (node_items_start), \ @@ -608,8 +625,7 @@ yaml_queue_extend(void **start, void **head, void **tail, void **end); #define MAPPING_NODE_INIT(node,node_tag,node_pairs_start,node_pairs_end, \ node_style,start_mark,end_mark) \ - (NODE_INIT((node),YAML_MAPPING_NODE,(start_mark),(end_mark)), \ - (node).data.mapping.tag = (node_tag), \ + (NODE_INIT((node),YAML_MAPPING_NODE,(node_tag),(start_mark),(end_mark)), \ (node).data.mapping.pairs.start = (node_pairs_start), \ (node).data.mapping.pairs.end = (node_pairs_end), \ (node).data.mapping.pairs.top = (node_pairs_start), \ diff --git a/tests/Makefile.am b/tests/Makefile.am index bfc01d7d..72e84d2a 100644 --- a/tests/Makefile.am +++ b/tests/Makefile.am @@ -3,4 +3,6 @@ AM_CPPFLAGS = -I$(top_srcdir)/include LDADD = $(top_builddir)/src/libyaml.la TESTS = test-version test-reader check_PROGRAMS = test-version test-reader -noinst_PROGRAMS = run-scanner run-parser run-emitter example-reformatter example-deconstructor +noinst_PROGRAMS = run-scanner run-parser run-loader run-emitter run-dumper \ + example-reformatter example-reformatter-alt \ + example-deconstructor example-deconstructor-alt diff --git a/tests/example-deconstructor-alt.c b/tests/example-deconstructor-alt.c new file mode 100644 index 00000000..7da194a6 --- /dev/null +++ b/tests/example-deconstructor-alt.c @@ -0,0 +1,800 @@ + +#include + +#include +#include + +int +main(int argc, char *argv[]) +{ + int help = 0; + int canonical = 0; + int unicode = 0; + int k; + int done = 0; + + yaml_parser_t parser; + yaml_emitter_t emitter; + yaml_event_t input_event; + yaml_document_t output_document; + + int root; + + /* Clear the objects. */ + + memset(&parser, 0, sizeof(parser)); + memset(&emitter, 0, sizeof(emitter)); + memset(&input_event, 0, sizeof(input_event)); + memset(&output_document, 0, sizeof(output_document)); + + /* Analyze command line options. */ + + for (k = 1; k < argc; k ++) + { + if (strcmp(argv[k], "-h") == 0 + || strcmp(argv[k], "--help") == 0) { + help = 1; + } + + else if (strcmp(argv[k], "-c") == 0 + || strcmp(argv[k], "--canonical") == 0) { + canonical = 1; + } + + else if (strcmp(argv[k], "-u") == 0 + || strcmp(argv[k], "--unicode") == 0) { + unicode = 1; + } + + else { + fprintf(stderr, "Unrecognized option: %s\n" + "Try `%s --help` for more information.\n", + argv[k], argv[0]); + return 1; + } + } + + /* Display the help string. */ + + if (help) + { + printf("%s . */ + + if (input_event.data.stream_start.encoding) + { + yaml_encoding_t encoding + = input_event.data.stream_start.encoding; + + key = yaml_document_add_scalar(&output_document, NULL, + "encoding", -1, YAML_PLAIN_SCALAR_STYLE); + if (!key) goto document_error; + value = yaml_document_add_scalar(&output_document, NULL, + (encoding == YAML_UTF8_ENCODING ? "utf-8" : + encoding == YAML_UTF16LE_ENCODING ? "utf-16-le" : + encoding == YAML_UTF16BE_ENCODING ? "utf-16-be" : + "unknown"), -1, YAML_PLAIN_SCALAR_STYLE); + if (!value) goto document_error; + if (!yaml_document_append_mapping_pair(&output_document, + properties, key, value)) goto document_error; + } + + break; + + case YAML_STREAM_END_EVENT: + + /* Add 'type': 'STREAM-END'. */ + + key = yaml_document_add_scalar(&output_document, NULL, + "type", -1, YAML_PLAIN_SCALAR_STYLE); + if (!key) goto document_error; + value = yaml_document_add_scalar(&output_document, NULL, + "STREAM-END", -1, YAML_PLAIN_SCALAR_STYLE); + if (!value) goto document_error; + if (!yaml_document_append_mapping_pair(&output_document, + properties, key, value)) goto document_error; + + break; + + case YAML_DOCUMENT_START_EVENT: + + /* Add 'type': 'DOCUMENT-START'. */ + + key = yaml_document_add_scalar(&output_document, NULL, + "type", -1, YAML_PLAIN_SCALAR_STYLE); + if (!key) goto document_error; + value = yaml_document_add_scalar(&output_document, NULL, + "DOCUMENT-START", -1, YAML_PLAIN_SCALAR_STYLE); + if (!value) goto document_error; + if (!yaml_document_append_mapping_pair(&output_document, + properties, key, value)) goto document_error; + + /* Display the output_document version numbers. */ + + if (input_event.data.document_start.version_directive) + { + yaml_version_directive_t *version + = input_event.data.document_start.version_directive; + char number[64]; + + /* Add 'version': {}. */ + + key = yaml_document_add_scalar(&output_document, NULL, + "version", -1, YAML_PLAIN_SCALAR_STYLE); + if (!key) goto document_error; + map = yaml_document_add_mapping(&output_document, NULL, + YAML_FLOW_MAPPING_STYLE); + if (!map) goto document_error; + if (!yaml_document_append_mapping_pair(&output_document, + properties, key, map)) goto document_error; + + /* Add 'major': . */ + + key = yaml_document_add_scalar(&output_document, NULL, + "major", -1, YAML_PLAIN_SCALAR_STYLE); + if (!key) goto document_error; + sprintf(number, "%d", version->major); + value = yaml_document_add_scalar(&output_document, YAML_INT_TAG, + number, -1, YAML_PLAIN_SCALAR_STYLE); + if (!value) goto document_error; + if (!yaml_document_append_mapping_pair(&output_document, + map, key, value)) goto document_error; + + /* Add 'minor': . */ + + key = yaml_document_add_scalar(&output_document, NULL, + "minor", -1, YAML_PLAIN_SCALAR_STYLE); + if (!key) goto document_error; + sprintf(number, "%d", version->minor); + value = yaml_document_add_scalar(&output_document, YAML_INT_TAG, + number, -1, YAML_PLAIN_SCALAR_STYLE); + if (!value) goto document_error; + if (!yaml_document_append_mapping_pair(&output_document, + map, key, value)) goto document_error; + } + + /* Display the output_document tag directives. */ + + if (input_event.data.document_start.tag_directives.start + != input_event.data.document_start.tag_directives.end) + { + yaml_tag_directive_t *tag; + + /* Add 'tags': []. */ + + key = yaml_document_add_scalar(&output_document, NULL, + "tags", -1, YAML_PLAIN_SCALAR_STYLE); + if (!key) goto document_error; + seq = yaml_document_add_sequence(&output_document, NULL, + YAML_BLOCK_SEQUENCE_STYLE); + if (!seq) goto document_error; + if (!yaml_document_append_mapping_pair(&output_document, + properties, key, seq)) goto document_error; + + for (tag = input_event.data.document_start.tag_directives.start; + tag != input_event.data.document_start.tag_directives.end; + tag ++) + { + /* Add {}. */ + + map = yaml_document_add_mapping(&output_document, NULL, + YAML_FLOW_MAPPING_STYLE); + if (!map) goto document_error; + if (!yaml_document_append_sequence_item(&output_document, + seq, map)) goto document_error; + + /* Add 'handle': . */ + + key = yaml_document_add_scalar(&output_document, NULL, + "handle", -1, YAML_PLAIN_SCALAR_STYLE); + if (!key) goto document_error; + value = yaml_document_add_scalar(&output_document, NULL, + tag->handle, -1, YAML_DOUBLE_QUOTED_SCALAR_STYLE); + if (!value) goto document_error; + if (!yaml_document_append_mapping_pair(&output_document, + map, key, value)) goto document_error; + + /* Add 'prefix': . */ + + key = yaml_document_add_scalar(&output_document, NULL, + "prefix", -1, YAML_PLAIN_SCALAR_STYLE); + if (!key) goto document_error; + value = yaml_document_add_scalar(&output_document, NULL, + tag->prefix, -1, YAML_DOUBLE_QUOTED_SCALAR_STYLE); + if (!value) goto document_error; + if (!yaml_document_append_mapping_pair(&output_document, + map, key, value)) goto document_error; + } + } + + /* Add 'implicit': . */ + + key = yaml_document_add_scalar(&output_document, NULL, + "implicit", -1, YAML_PLAIN_SCALAR_STYLE); + if (!key) goto document_error; + value = yaml_document_add_scalar(&output_document, YAML_BOOL_TAG, + (input_event.data.document_start.implicit ? + "true" : "false"), -1, YAML_PLAIN_SCALAR_STYLE); + if (!value) goto document_error; + if (!yaml_document_append_mapping_pair(&output_document, + properties, key, value)) goto document_error; + + break; + + case YAML_DOCUMENT_END_EVENT: + + /* Add 'type': 'DOCUMENT-END'. */ + + key = yaml_document_add_scalar(&output_document, NULL, + "type", -1, YAML_PLAIN_SCALAR_STYLE); + if (!key) goto document_error; + value = yaml_document_add_scalar(&output_document, NULL, + "DOCUMENT-END", -1, YAML_PLAIN_SCALAR_STYLE); + if (!value) goto document_error; + if (!yaml_document_append_mapping_pair(&output_document, + properties, key, value)) goto document_error; + + /* Add 'implicit': . */ + + key = yaml_document_add_scalar(&output_document, NULL, + "implicit", -1, YAML_PLAIN_SCALAR_STYLE); + if (!key) goto document_error; + value = yaml_document_add_scalar(&output_document, YAML_BOOL_TAG, + (input_event.data.document_end.implicit ? + "true" : "false"), -1, YAML_PLAIN_SCALAR_STYLE); + if (!value) goto document_error; + if (!yaml_document_append_mapping_pair(&output_document, + properties, key, value)) goto document_error; + + break; + + case YAML_ALIAS_EVENT: + + /* Add 'type': 'ALIAS'. */ + + key = yaml_document_add_scalar(&output_document, NULL, + "type", -1, YAML_PLAIN_SCALAR_STYLE); + if (!key) goto document_error; + value = yaml_document_add_scalar(&output_document, NULL, + "ALIAS", -1, YAML_PLAIN_SCALAR_STYLE); + if (!value) goto document_error; + if (!yaml_document_append_mapping_pair(&output_document, + properties, key, value)) goto document_error; + + /* Add 'anchor': . */ + + key = yaml_document_add_scalar(&output_document, NULL, + "anchor", -1, YAML_PLAIN_SCALAR_STYLE); + if (!key) goto document_error; + value = yaml_document_add_scalar(&output_document, NULL, + input_event.data.alias.anchor, -1, + YAML_DOUBLE_QUOTED_SCALAR_STYLE); + if (!value) goto document_error; + if (!yaml_document_append_mapping_pair(&output_document, + properties, key, value)) goto document_error; + + break; + + case YAML_SCALAR_EVENT: + + /* Add 'type': 'SCALAR'. */ + + key = yaml_document_add_scalar(&output_document, NULL, + "type", -1, YAML_PLAIN_SCALAR_STYLE); + if (!key) goto document_error; + value = yaml_document_add_scalar(&output_document, NULL, + "SCALAR", -1, YAML_PLAIN_SCALAR_STYLE); + if (!value) goto document_error; + if (!yaml_document_append_mapping_pair(&output_document, + properties, key, value)) goto document_error; + + /* Add 'anchor': . */ + + if (input_event.data.scalar.anchor) + { + key = yaml_document_add_scalar(&output_document, NULL, + "anchor", -1, YAML_PLAIN_SCALAR_STYLE); + if (!key) goto document_error; + value = yaml_document_add_scalar(&output_document, NULL, + input_event.data.scalar.anchor, -1, + YAML_DOUBLE_QUOTED_SCALAR_STYLE); + if (!value) goto document_error; + if (!yaml_document_append_mapping_pair(&output_document, + properties, key, value)) goto document_error; + } + + /* Add 'tag': . */ + + if (input_event.data.scalar.tag) + { + key = yaml_document_add_scalar(&output_document, NULL, + "tag", -1, YAML_PLAIN_SCALAR_STYLE); + if (!key) goto document_error; + value = yaml_document_add_scalar(&output_document, NULL, + input_event.data.scalar.tag, -1, + YAML_DOUBLE_QUOTED_SCALAR_STYLE); + if (!value) goto document_error; + if (!yaml_document_append_mapping_pair(&output_document, + properties, key, value)) goto document_error; + } + + /* Add 'value': . */ + + key = yaml_document_add_scalar(&output_document, NULL, + "value", -1, YAML_PLAIN_SCALAR_STYLE); + if (!key) goto document_error; + value = yaml_document_add_scalar(&output_document, NULL, + input_event.data.scalar.value, + input_event.data.scalar.length, + YAML_DOUBLE_QUOTED_SCALAR_STYLE); + if (!value) goto document_error; + if (!yaml_document_append_mapping_pair(&output_document, + properties, key, value)) goto document_error; + + /* Display if the scalar tag is implicit. */ + + /* Add 'implicit': {} */ + + key = yaml_document_add_scalar(&output_document, NULL, + "version", -1, YAML_PLAIN_SCALAR_STYLE); + if (!key) goto document_error; + map = yaml_document_add_mapping(&output_document, NULL, + YAML_FLOW_MAPPING_STYLE); + if (!map) goto document_error; + if (!yaml_document_append_mapping_pair(&output_document, + properties, key, map)) goto document_error; + + /* Add 'plain': . */ + + key = yaml_document_add_scalar(&output_document, NULL, + "plain", -1, YAML_PLAIN_SCALAR_STYLE); + if (!key) goto document_error; + value = yaml_document_add_scalar(&output_document, YAML_BOOL_TAG, + (input_event.data.scalar.plain_implicit ? + "true" : "false"), -1, YAML_PLAIN_SCALAR_STYLE); + if (!value) goto document_error; + if (!yaml_document_append_mapping_pair(&output_document, + map, key, value)) goto document_error; + + /* Add 'quoted': . */ + + key = yaml_document_add_scalar(&output_document, NULL, + "quoted", -1, YAML_PLAIN_SCALAR_STYLE); + if (!key) goto document_error; + value = yaml_document_add_scalar(&output_document, YAML_BOOL_TAG, + (input_event.data.scalar.quoted_implicit ? + "true" : "false"), -1, YAML_PLAIN_SCALAR_STYLE); + if (!value) goto document_error; + if (!yaml_document_append_mapping_pair(&output_document, + map, key, value)) goto document_error; + + /* Display the style information. */ + + if (input_event.data.scalar.style) + { + yaml_scalar_style_t style = input_event.data.scalar.style; + + /* Add 'style':