diff --git a/src/bin/readstat.c b/src/bin/readstat.c index f55da6a4..10a6b216 100644 --- a/src/bin/readstat.c +++ b/src/bin/readstat.c @@ -424,6 +424,7 @@ size_t readstat_strftime(char *s, size_t maxsize, const char *format, time_t tim static int dump_metadata(readstat_metadata_t *metadata, void *ctx) { printf("Columns: %d\n", readstat_get_var_count(metadata)); printf("Rows: %d\n", readstat_get_row_count(metadata)); + const char *table_name = readstat_get_table_name(metadata); const char *file_label = readstat_get_file_label(metadata); const char *orig_encoding = readstat_get_file_encoding(metadata); long version = readstat_get_file_format_version(metadata); @@ -431,8 +432,11 @@ static int dump_metadata(readstat_metadata_t *metadata, void *ctx) { readstat_compress_t compression = readstat_get_compression(metadata); readstat_endian_t endianness = readstat_get_endianness(metadata); + if (table_name && table_name[0]) { + printf("Table name: %s\n", table_name); + } if (file_label && file_label[0]) { - printf("File label: %s\n", file_label); + printf("Table label: %s\n", file_label); } if (version) { printf("Format version: %ld\n", version); diff --git a/src/readstat.h b/src/readstat.h index 04a7b3fb..ece0d379 100644 --- a/src/readstat.h +++ b/src/readstat.h @@ -483,7 +483,7 @@ typedef struct readstat_writer_s { int row_count; int current_row; - char file_label[100]; + char file_label[257]; char table_name[33]; const readstat_variable_t *fweight_variable; diff --git a/src/sas/readstat_sas.c b/src/sas/readstat_sas.c index 7e3ffc7c..3c43e0bc 100644 --- a/src/sas/readstat_sas.c +++ b/src/sas/readstat_sas.c @@ -189,7 +189,7 @@ readstat_error_t sas_read_header(readstat_io_t *io, sas_header_info_t *hinfo, retval = READSTAT_ERROR_UNSUPPORTED_CHARSET; goto cleanup; } - memcpy(hinfo->file_label, header_start.file_label, sizeof(header_start.file_label)); + memcpy(hinfo->table_name, header_start.table_name, sizeof(header_start.table_name)); if (io->seek(hinfo->pad1, READSTAT_SEEK_CUR, io->io_ctx) == -1) { retval = READSTAT_ERROR_SEEK; goto cleanup; @@ -317,16 +317,16 @@ readstat_error_t sas_write_header(readstat_writer_t *writer, sas_header_info_t * struct tm epoch_tm = { .tm_year = 60, .tm_mday = 1 }; time_t epoch = mktime(&epoch_tm); - memset(header_start.file_label, ' ', sizeof(header_start.file_label)); + memset(header_start.table_name, ' ', sizeof(header_start.table_name)); - size_t file_label_len = strlen(writer->file_label); - if (file_label_len > sizeof(header_start.file_label)) - file_label_len = sizeof(header_start.file_label); + size_t table_name_len = strlen(writer->table_name); + if (table_name_len > sizeof(header_start.table_name)) + table_name_len = sizeof(header_start.table_name); - if (file_label_len) { - memcpy(header_start.file_label, writer->file_label, file_label_len); + if (table_name_len) { + memcpy(header_start.table_name, writer->table_name, table_name_len); } else { - memcpy(header_start.file_label, "DATASET", sizeof("DATASET")-1); + memcpy(header_start.table_name, "DATASET", sizeof("DATASET")-1); } retval = readstat_write_bytes(writer, &header_start, sizeof(sas_header_start_t)); @@ -379,7 +379,7 @@ readstat_error_t sas_write_header(readstat_writer_t *writer, sas_header_info_t * goto cleanup; sas_header_end_t header_end = { - .host = "W32_VSPRO" + .host = "9.0401M6Linux" }; char release[sizeof(header_end.release)+1] = { 0 }; diff --git a/src/sas/readstat_sas.h b/src/sas/readstat_sas.h index 4d707689..5fce15c3 100644 --- a/src/sas/readstat_sas.h +++ b/src/sas/readstat_sas.h @@ -17,7 +17,8 @@ typedef struct sas_header_start_s { unsigned char encoding; unsigned char mystery5[13]; char file_type[8]; - char file_label[64]; + char table_name[32]; + unsigned char mystery6[32]; char file_info[8]; } sas_header_start_t; @@ -47,7 +48,8 @@ typedef struct sas_header_info_s { int64_t header_size; time_t creation_time; time_t modification_time; - char file_label[64]; + char table_name[32]; + char file_label[256]; char *encoding; } sas_header_info_t; diff --git a/src/sas/readstat_sas7bcat_read.c b/src/sas/readstat_sas7bcat_read.c index 6c16622a..968d968b 100644 --- a/src/sas/readstat_sas7bcat_read.c +++ b/src/sas/readstat_sas7bcat_read.c @@ -415,7 +415,7 @@ readstat_error_t readstat_parse_sas7bcat(readstat_parser_t *parser, const char * } if (ctx->metadata_handler) { - char file_label[4*64+1]; + char table_name[4*32+1]; readstat_metadata_t metadata = { .file_encoding = ctx->input_encoding, /* orig encoding? */ .modified_time = hinfo->modification_time, @@ -424,12 +424,12 @@ readstat_error_t readstat_parse_sas7bcat(readstat_parser_t *parser, const char * .endianness = hinfo->little_endian ? READSTAT_ENDIAN_LITTLE : READSTAT_ENDIAN_BIG, .is64bit = ctx->u64 }; - retval = readstat_convert(file_label, sizeof(file_label), - hinfo->file_label, sizeof(hinfo->file_label), ctx->converter); + retval = readstat_convert(table_name, sizeof(table_name), + hinfo->table_name, sizeof(hinfo->table_name), ctx->converter); if (retval != READSTAT_OK) goto cleanup; - metadata.file_label = file_label; + metadata.table_name = table_name; if (ctx->metadata_handler(&metadata, ctx->user_ctx) != READSTAT_HANDLER_OK) { retval = READSTAT_ERROR_USER_ABORT; diff --git a/src/sas/readstat_sas7bdat_read.c b/src/sas/readstat_sas7bdat_read.c index bf88bfa4..44e13135 100644 --- a/src/sas/readstat_sas7bdat_read.c +++ b/src/sas/readstat_sas7bdat_read.c @@ -85,7 +85,8 @@ typedef struct sas7bdat_ctx_s { time_t ctime; time_t mtime; int version; - char file_label[4*64+1]; + char table_name[4*32+1]; + char file_label[4*256+1]; char error_buf[2048]; unsigned int rdc_compression:1; @@ -285,6 +286,7 @@ static readstat_error_t sas7bdat_parse_column_name_subheader(const char *subhead int i; const char *cnp = &subheader[signature_len+8]; uint16_t remainder = sas_read2(&subheader[signature_len], ctx->bswap); + int off; if (remainder != sas_subheader_remainder(len, signature_len)) { retval = READSTAT_ERROR_PARSE; @@ -298,6 +300,39 @@ static readstat_error_t sas7bdat_parse_column_name_subheader(const char *subhead for (i=ctx->col_names_count-cmax; icol_names_count; i++) { ctx->col_info[i].name_ref = sas7bdat_parse_text_ref(cnp, ctx); + if (i == 0) { + if (ctx->text_blobs == NULL || ctx->text_blob_lengths == NULL) { + retval = READSTAT_ERROR_PARSE; + goto cleanup; + } + if (ctx->version < 9) { + off = 36; + } else { + if (ctx->text_blob_lengths[0] < 19) { + retval = READSTAT_ERROR_PARSE; + goto cleanup; + } + if (!memcmp(&ctx->text_blobs[0][12], "SASYZCR", 7)) { + off = 44; + } else { + off = ctx->u64 ? 36 : 12; + } + } + if (ctx->col_info[0].name_ref.offset >= + ctx->text_blob_lengths[0] || + ctx->col_info[0].name_ref.offset < off) { + retval = READSTAT_ERROR_PARSE; + goto cleanup; + } + retval = readstat_convert(ctx->file_label, + sizeof(ctx->file_label), + &ctx->text_blobs[0][off], + ctx->col_info[0].name_ref.offset - off, + ctx->converter + ); + if (retval != READSTAT_OK) + goto cleanup; + } cnp += 8; } @@ -712,6 +747,7 @@ static readstat_error_t sas7bdat_submit_columns(sas7bdat_ctx_t *ctx, int compres readstat_metadata_t metadata = { .row_count = ctx->row_limit, .var_count = ctx->column_count, + .table_name = ctx->table_name, .file_label = ctx->file_label, .file_encoding = ctx->input_encoding, /* orig encoding? */ .creation_time = ctx->ctime, @@ -1219,8 +1255,8 @@ readstat_error_t readstat_parse_sas7bdat(readstat_parser_t *parser, const char * ctx->converter = converter; } - if ((retval = readstat_convert(ctx->file_label, sizeof(ctx->file_label), - hinfo->file_label, sizeof(hinfo->file_label), ctx->converter)) != READSTAT_OK) { + if ((retval = readstat_convert(ctx->table_name, sizeof(ctx->table_name), + hinfo->table_name, sizeof(hinfo->table_name), ctx->converter)) != READSTAT_OK) { goto cleanup; } diff --git a/src/sas/readstat_sas7bdat_write.c b/src/sas/readstat_sas7bdat_write.c index 37e3276b..acb8a9e0 100644 --- a/src/sas/readstat_sas7bdat_write.c +++ b/src/sas/readstat_sas7bdat_write.c @@ -140,7 +140,7 @@ static readstat_error_t sas7bdat_emit_header(readstat_writer_t *writer, sas_head .file_format = SAS_FILE_FORMAT_UNIX, .encoding = 20, /* UTF-8 */ .file_type = "SAS FILE", - .file_info = "DATA ~ ~" + .file_info = "DATA " }; memcpy(&header_start.magic, sas7bdat_magic_number, sizeof(header_start.magic)); @@ -218,13 +218,16 @@ static sas7bdat_subheader_t *sas7bdat_col_name_subheader_init(readstat_writer_t sas7bdat_subheader_t *subheader = sas7bdat_subheader_init( SAS_SUBHEADER_SIGNATURE_COLUMN_NAME, len); memcpy(&subheader->data[signature_len], &remainder, sizeof(uint16_t)); + + sas_text_ref_t text_ref = sas7bdat_make_text_ref(column_text_array, "READSTAT"); + text_ref = sas7bdat_make_text_ref(column_text_array, writer->file_label); int i; char *ptrs = &subheader->data[signature_len+8]; for (i=0; ivariables_count; i++) { readstat_variable_t *variable = readstat_get_variable(writer, i); const char *name = readstat_variable_get_name(variable); - sas_text_ref_t text_ref = sas7bdat_make_text_ref(column_text_array, name); + text_ref = sas7bdat_make_text_ref(column_text_array, name); memcpy(&ptrs[0], &text_ref.index, sizeof(uint16_t)); memcpy(&ptrs[2], &text_ref.offset, sizeof(uint16_t)); memcpy(&ptrs[4], &text_ref.length, sizeof(uint16_t)); diff --git a/src/sas/readstat_xport_read.c b/src/sas/readstat_xport_read.c index 079865e8..f9349f23 100644 --- a/src/sas/readstat_xport_read.c +++ b/src/sas/readstat_xport_read.c @@ -32,7 +32,7 @@ typedef struct xport_ctx_s { int row_offset; size_t row_length; int parsed_row_count; - char file_label[40*4+1]; + char file_label[256*4+1]; char table_name[32*4+1]; readstat_variable_t **variables; diff --git a/src/test/test_read.c b/src/test/test_read.c index 82c0bf57..a3ffa744 100644 --- a/src/test/test_read.c +++ b/src/test/test_read.c @@ -77,7 +77,8 @@ void parse_ctx_reset(rt_parse_ctx_t *parse_ctx, long file_format) { } else if ((file_format & RT_FORMAT_SAV)) { parse_ctx->max_file_label_len = 64; } else if ((file_format & RT_FORMAT_SAS7BDAT)) { - parse_ctx->max_file_label_len = 64; + parse_ctx->max_table_name_len = 32; + parse_ctx->max_file_label_len = 256; } else { parse_ctx->max_file_label_len = 20; } @@ -85,6 +86,7 @@ void parse_ctx_reset(rt_parse_ctx_t *parse_ctx, long file_format) { parse_ctx->max_table_name_len = 8; } else if ((file_format & RT_FORMAT_XPORT_8)) { parse_ctx->max_table_name_len = 32; + parse_ctx->max_file_label_len = 256; } parse_ctx->var_index = -1; parse_ctx->obs_index = -1; diff --git a/src/test/test_write.c b/src/test/test_write.c index f7831398..46e14306 100644 --- a/src/test/test_write.c +++ b/src/test/test_write.c @@ -31,7 +31,9 @@ readstat_error_t write_file_to_buffer(rt_test_file_t *file, rt_buffer_t *buffer, readstat_writer_t *writer = readstat_writer_init(); readstat_set_data_writer(writer, &write_data); - readstat_writer_set_file_label(writer, file->label); + if ((format & RT_FORMAT_SAS7BCAT)) { + strncpy(file->label, "", 1); + } else readstat_writer_set_file_label(writer, file->label); readstat_writer_set_table_name(writer, file->table_name); readstat_writer_set_error_handler(writer, &handle_error); if (file->timestamp.tm_year) {