diff --git a/src/cbortojson.c b/src/cbortojson.c index 1f74e32f..a617db2d 100644 --- a/src/cbortojson.c +++ b/src/cbortojson.c @@ -170,9 +170,16 @@ typedef struct ConversionStatus { static CborError value_to_json(FILE *out, CborValue *it, int flags, CborType type, int nestingLevel, ConversionStatus *status); -static CborError dump_bytestring_base16(char **result, CborValue *it) +static void append_hex(void *buffer, uint8_t byte) { static const char characters[] = "0123456789abcdef"; + char *str = buffer; + str[0] = characters[byte >> 4]; + str[1] = characters[byte & 0xf]; +} + +static CborError dump_bytestring_base16(char **result, CborValue *it) +{ size_t i; size_t n = 0; uint8_t *buffer; @@ -195,8 +202,7 @@ static CborError dump_bytestring_base16(char **result, CborValue *it) for (i = 0; i < n; ++i) { uint8_t byte = buffer[n + i]; - buffer[2*i] = characters[byte >> 4]; - buffer[2*i + 1] = characters[byte & 0xf]; + append_hex(buffer + 2 * i, byte); } return CborNoError; } @@ -293,6 +299,96 @@ static CborError dump_bytestring_base64url(char **result, CborValue *it) return generic_dump_base64(result, it, alphabet); } +static CborError escape_text_string(char **str, size_t *alloc, size_t *offsetp, const char *input, size_t len) +{ + /* JSON requires escaping some characters in strings, so we iterate and + * escape as necessary + * https://www.rfc-editor.org/rfc/rfc8259#section-7: + * All Unicode characters may be placed within the + * quotation marks, except for the characters that MUST be escaped: + * quotation mark, reverse solidus, and the control characters (U+0000 + * through U+001F). + * We additionally choose to escape BS, HT, CR, LF and FF. + */ + char *buf = *str; + + /* Ensure we have enough space for this chunk. In the worst case, we + * have 6 escaped characters per input character. + * + * The overflow checking here is only practically useful for 32-bit + * machines, as SIZE_MAX/6 for a 64-bit machine is 2.6667 exabytes. + * That is much more than any current architecture can even address and + * cbor_value_get_text_string_chunk() only works for data already + * loaded into memory. + */ + size_t needed; + size_t offset = offsetp ? *offsetp : 0; + if (mul_check_overflow(len, 6, &needed) || add_check_overflow(needed, offset, &needed) + || add_check_overflow(needed, 1, &needed)) { + return CborErrorDataTooLarge; + } + if (!alloc || needed > *alloc) { + buf = cbor_realloc(buf, needed); + if (!buf) + return CborErrorOutOfMemory; + if (alloc) + *alloc = needed; + } + + for (size_t i = 0; i < len; ++i) { + static const char escapeChars[] = "\b\t\n\r\f\"\\"; + static const char escapedChars[] = "btnrf\"\\"; + unsigned char c = input[i]; + + char *esc = c > 0 ? strchr(escapeChars, c) : NULL; + if (esc) { + buf[offset++] = '\\'; + buf[offset++] = escapedChars[esc - escapeChars]; + } else if (c <= 0x1F) { + buf[offset++] = '\\'; + buf[offset++] = 'u'; + buf[offset++] = '0'; + buf[offset++] = '0'; + append_hex(buf + offset, c); + offset += 2; + } else { + buf[offset++] = c; + } + } + buf[offset] = '\0'; + *str = buf; + if (offsetp) + *offsetp = offset; + return CborNoError; +} + +static CborError text_string_to_escaped(char **str, CborValue *it) +{ + size_t alloc = 0, offset = 0; + CborError err; + + *str = NULL; + err = cbor_value_begin_string_iteration(it); + while (err == CborNoError) { + const char *chunk; + size_t len; + err = cbor_value_get_text_string_chunk(it, &chunk, &len, it); + if (err == CborNoError) + err = escape_text_string(str, &alloc, &offset, chunk, len); + } + + if (likely(err == CborErrorNoMoreStringChunks)) { + /* success */ + if (!*str) + *str = strdup(""); // wasteful, but very atypical + return cbor_value_finish_string_iteration(it); + } + + cbor_free(*str); + *str = NULL; + return err; +} + static CborError add_value_metadata(FILE *out, CborType type, const ConversionStatus *status) { int flags = status->flags; @@ -420,14 +516,20 @@ static CborError stringify_map_key(char **key, CborValue *it, int flags, CborTyp return CborErrorJsonNotImplemented; #else size_t size; + char *stringified; - FILE *memstream = open_memstream(key, &size); + FILE *memstream = open_memstream(&stringified, &size); if (memstream == NULL) return CborErrorOutOfMemory; /* could also be EMFILE, but it's unlikely */ CborError err = cbor_value_to_pretty_advance(memstream, it); - if (unlikely(fclose(memstream) < 0 || *key == NULL)) + if (unlikely(fclose(memstream) < 0 || stringified == NULL)) return CborErrorInternalError; + if (err == CborNoError) { + /* escape the stringified CBOR stream */ + err = escape_text_string(key, NULL, NULL, stringified, size); + } + cbor_free(stringified); return err; #endif } @@ -452,15 +554,14 @@ static CborError map_to_json(FILE *out, CborValue *it, int flags, int nestingLev const char *comma = ""; CborError err; while (!cbor_value_at_end(it)) { - char *key; + char *key = NULL; if (fprintf(out, "%s", comma) < 0) return CborErrorIO; comma = ","; CborType keyType = cbor_value_get_type(it); if (likely(keyType == CborTextStringType)) { - size_t n = 0; - err = cbor_value_dup_text_string(it, &key, &n, it); + err = text_string_to_escaped(&key, it); } else if (flags & CborConvertStringifyMapKeys) { err = stringify_map_key(&key, it, flags, keyType); } else { @@ -570,8 +671,7 @@ static CborError value_to_json(FILE *out, CborValue *it, int flags, CborType typ err = dump_bytestring_base64url(&str, it); status->flags = TypeWasNotNative; } else { - size_t n = 0; - err = cbor_value_dup_text_string(it, &str, &n, it); + err = text_string_to_escaped(&str, it); } if (err) return err; diff --git a/src/compilersupport_p.h b/src/compilersupport_p.h index 55a26e4a..c91ea933 100644 --- a/src/compilersupport_p.h +++ b/src/compilersupport_p.h @@ -234,4 +234,15 @@ static inline bool add_check_overflow(size_t v1, size_t v2, size_t *r) #endif } +static inline bool mul_check_overflow(size_t v1, size_t v2, size_t *r) +{ +#if ((defined(__GNUC__) && (__GNUC__ >= 5)) && !defined(__INTEL_COMPILER)) || __has_builtin(__builtin_add_overflow) + return __builtin_mul_overflow(v1, v2, r); +#else + /* unsigned multiplications are well-defined */ + *r = v1 * v2; + return *r > v1 && *r > v2; +#endif +} + #endif /* COMPILERSUPPORT_H */ diff --git a/src/memory.h b/src/memory.h index 0032b93b..6686eb26 100644 --- a/src/memory.h +++ b/src/memory.h @@ -26,6 +26,7 @@ # include CBOR_CUSTOM_ALLOC_INCLUDE #else # include -# define cbor_malloc malloc -# define cbor_free free +# define cbor_malloc malloc +# define cbor_realloc realloc +# define cbor_free free #endif diff --git a/tests/tojson/tst_tojson.cpp b/tests/tojson/tst_tojson.cpp index 27603ea6..a1b03663 100644 --- a/tests/tojson/tst_tojson.cpp +++ b/tests/tojson/tst_tojson.cpp @@ -160,6 +160,18 @@ void addTextStringsData() QTest::newRow("_textstring5*2") << raw("\x7f\x63Hel\x62lo\xff") << "\"Hello\""; QTest::newRow("_textstring5*5") << raw("\x7f\x61H\x61""e\x61l\x61l\x61o\xff") << "\"Hello\""; QTest::newRow("_textstring5*6") << raw("\x7f\x61H\x61""e\x61l\x60\x61l\x61o\xff") << "\"Hello\""; + + // strings containing characters that are escaped in JSON + QTest::newRow("null") << raw("\x61\0") << R"("\u0000")"; + QTest::newRow("bell") << raw("\x61\7") << R"("\u0007")"; // not \\a + QTest::newRow("backspace") << raw("\x61\b") << R"("\b")"; + QTest::newRow("tab") << raw("\x61\t") << R"("\t")"; + QTest::newRow("carriage-return") << raw("\x61\r") << R"("\r")"; + QTest::newRow("line-feed") << raw("\x61\n") << R"("\n")"; + QTest::newRow("form-feed") << raw("\x61\f") << R"("\f")"; + QTest::newRow("esc") << raw("\x61\x1f") << R"("\u001f")"; + QTest::newRow("quote") << raw("\x61\"") << R"("\"")"; + QTest::newRow("backslash") << raw("\x61\\") << R"("\\")"; } void addNonJsonData() @@ -412,6 +424,15 @@ void tst_ToJson::nonStringKeyMaps_data() QTest::newRow("map-24-0") << raw("\xa1\x18\x18\0") << "{24: 0}"; QTest::newRow("_map-0-24") << raw("\xbf\0\x18\x18\xff") << "{_ 0: 24}"; QTest::newRow("_map-24-0") << raw("\xbf\x18\x18\0\xff") << "{_ 24: 0}"; + + // nested strings ought to be escaped + QTest::newRow("array-emptystring") << raw("\x81\x60") << R"([\"\"])"; + QTest::newRow("array-string1") << raw("\x81\x61 ") << R"([\" \"])"; + + // and escaped chracters in strings end up doubly escaped + QTest::newRow("array-string-null") << raw("\x81\x61\0") << R"([\"\\u0000\"])"; + QTest::newRow("array-string-quote") << raw("\x81\x61\"") << R"([\"\\\"\"])"; + QTest::newRow("array-string-backslash") << raw("\x81\x61\\") << R"([\"\\\\\"])"; } void tst_ToJson::nonStringKeyMaps()