From e86f577dd9042ea4365201c8251b439f9f2d8e0f Mon Sep 17 00:00:00 2001 From: Anna Henningsen Date: Tue, 5 Dec 2017 13:23:31 +0100 Subject: [PATCH 1/4] src: minor cleanups to node_url.cc MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit - Remove pointless pointers - Make `WriteHost` take a const argument so that it’s functionality is clear from the signature - Make `FindLongestZeroSequence` templated to accommodate the constness in `WriteHost` and because using `uint16_t` is an articifial, unnecessary restriction - Remove string copying when no copies are needed - Make `PercentDecode` just return its return value - Make `ParseHost` (string-only version) take its constant argument as a constant reference --- src/node_url.cc | 61 +++++++++++++++++++++++-------------------------- 1 file changed, 29 insertions(+), 32 deletions(-) diff --git a/src/node_url.cc b/src/node_url.cc index e1ef9273ae927e..b4b16399aab87f 100644 --- a/src/node_url.cc +++ b/src/node_url.cc @@ -506,12 +506,11 @@ static inline unsigned hex2bin(const T ch) { return static_cast(-1); } -static inline void PercentDecode(const char* input, - size_t len, - std::string* dest) { +inline std::string PercentDecode(const char* input, size_t len) { + std::string dest; if (len == 0) - return; - dest->reserve(len); + return dest; + dest.reserve(len); const char* pointer = input; const char* end = input + len; @@ -522,17 +521,18 @@ static inline void PercentDecode(const char* input, (ch == '%' && (!IsASCIIHexDigit(pointer[1]) || !IsASCIIHexDigit(pointer[2])))) { - *dest += ch; + dest += ch; pointer++; continue; } else { unsigned a = hex2bin(pointer[1]); unsigned b = hex2bin(pointer[2]); char c = static_cast(a * 16 + b); - *dest += c; + dest += c; pointer += 3; } } + return dest; } #define SPECIALS(XX) \ @@ -860,7 +860,7 @@ static url_host_type ParseHost(url_host* host, return ParseOpaqueHost(host, input, length); // First, we have to percent decode - PercentDecode(input, length, &decoded); + decoded = PercentDecode(input, length); // Then we have to punycode toASCII if (!ToASCII(decoded, &decoded)) @@ -894,13 +894,13 @@ static url_host_type ParseHost(url_host* host, // Locates the longest sequence of 0 segments in an IPv6 address // in order to use the :: compression when serializing -static inline uint16_t* FindLongestZeroSequence(uint16_t* values, - size_t len) { - uint16_t* start = values; - uint16_t* end = start + len; - uint16_t* result = nullptr; +template +static inline T* FindLongestZeroSequence(T* values, size_t len) { + T* start = values; + T* end = start + len; + T* result = nullptr; - uint16_t* current = nullptr; + T* current = nullptr; unsigned counter = 0, longest = 1; while (start < end) { @@ -923,7 +923,7 @@ static inline uint16_t* FindLongestZeroSequence(uint16_t* values, return result; } -static url_host_type WriteHost(url_host* host, std::string* dest) { +static url_host_type WriteHost(const url_host* host, std::string* dest) { dest->clear(); switch (host->type) { case HOST_TYPE_DOMAIN: @@ -934,8 +934,7 @@ static url_host_type WriteHost(url_host* host, std::string* dest) { uint32_t value = host->value.ipv4; for (int n = 0; n < 4; n++) { char buf[4]; - char* buffer = buf; - snprintf(buffer, sizeof(buf), "%d", value % 256); + snprintf(buf, sizeof(buf), "%d", value % 256); dest->insert(0, buf); if (n < 3) dest->insert(0, 1, '.'); @@ -946,12 +945,12 @@ static url_host_type WriteHost(url_host* host, std::string* dest) { case HOST_TYPE_IPV6: { dest->reserve(41); *dest+= '['; - uint16_t* start = &host->value.ipv6[0]; - uint16_t* compress_pointer = + const uint16_t* start = &host->value.ipv6[0]; + const uint16_t* compress_pointer = FindLongestZeroSequence(start, 8); bool ignore0 = false; for (int n = 0; n <= 7; n++) { - uint16_t* piece = &host->value.ipv6[n]; + const uint16_t* piece = &host->value.ipv6[n]; if (ignore0 && *piece == 0) continue; else if (ignore0) @@ -962,8 +961,7 @@ static url_host_type WriteHost(url_host* host, std::string* dest) { continue; } char buf[5]; - char* buffer = buf; - snprintf(buffer, sizeof(buf), "%x", *piece); + snprintf(buf, sizeof(buf), "%x", *piece); *dest += buf; if (n < 7) *dest += ':'; @@ -980,16 +978,16 @@ static url_host_type WriteHost(url_host* host, std::string* dest) { return host->type; } -static bool ParseHost(std::string* input, +static bool ParseHost(const std::string& input, std::string* output, bool is_special, bool unicode = false) { - if (input->length() == 0) { + if (input.length() == 0) { output->clear(); return true; } url_host host{{""}, HOST_TYPE_DOMAIN}; - ParseHost(&host, input->c_str(), input->length(), is_special, unicode); + ParseHost(&host, input.c_str(), input.length(), is_special, unicode); if (host.type == HOST_TYPE_FAILED) return false; WriteHost(&host, output); @@ -1092,7 +1090,7 @@ static inline void HarvestContext(Environment* env, } // Single dot segment can be ".", "%2e", or "%2E" -static inline bool IsSingleDotSegment(std::string str) { +static inline bool IsSingleDotSegment(const std::string& str) { switch (str.size()) { case 1: return str == "."; @@ -1108,7 +1106,7 @@ static inline bool IsSingleDotSegment(std::string str) { // Double dot segment can be: // "..", ".%2e", ".%2E", "%2e.", "%2E.", // "%2e%2e", "%2E%2E", "%2e%2E", or "%2E%2e" -static inline bool IsDoubleDotSegment(std::string str) { +static inline bool IsDoubleDotSegment(const std::string& str) { switch (str.size()) { case 2: return str == ".."; @@ -1542,7 +1540,7 @@ void URL::Parse(const char* input, return; } url->flags |= URL_FLAGS_HAS_HOST; - if (!ParseHost(&buffer, &url->host, special)) { + if (!ParseHost(buffer, &url->host, special)) { url->flags |= URL_FLAGS_FAILED; return; } @@ -1569,7 +1567,7 @@ void URL::Parse(const char* input, return; } url->flags |= URL_FLAGS_HAS_HOST; - if (!ParseHost(&buffer, &url->host, special)) { + if (!ParseHost(buffer, &url->host, special)) { url->flags |= URL_FLAGS_FAILED; return; } @@ -1741,7 +1739,7 @@ void URL::Parse(const char* input, state = kPathStart; } else { std::string host; - if (!ParseHost(&buffer, &host, special)) { + if (!ParseHost(buffer, &host, special)) { url->flags |= URL_FLAGS_FAILED; return; } @@ -2104,8 +2102,7 @@ std::string URL::ToFilePath() const { #endif std::string decoded_path; for (const std::string& part : context_.path) { - std::string decoded; - PercentDecode(part.c_str(), part.length(), &decoded); + std::string decoded = PercentDecode(part.c_str(), part.length()); for (char& ch : decoded) { if (is_slash(ch)) { return ""; From c28a3a4d8edc1faf570d6a01949d2cda0bfef0d7 Mon Sep 17 00:00:00 2001 From: Anna Henningsen Date: Tue, 5 Dec 2017 13:37:38 +0100 Subject: [PATCH 2/4] src: move url internals into anonymous namespace MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit This helps because `static` doesn’t work for C++ classes, but refactoring `url_host` into a proper C++ class seems the most reasonable soluation for the memory leak fixed by the next commit. --- src/node_url.cc | 117 ++++++++++++++++++++++++------------------------ 1 file changed, 58 insertions(+), 59 deletions(-) diff --git a/src/node_url.cc b/src/node_url.cc index b4b16399aab87f..21d8c810cb8216 100644 --- a/src/node_url.cc +++ b/src/node_url.cc @@ -46,11 +46,13 @@ using v8::Value; namespace url { +namespace { + // https://url.spec.whatwg.org/#eof-code-point -static const char kEOL = -1; +const char kEOL = -1; // Used in ToUSVString(). -static const char16_t kUnicodeReplacementCharacter = 0xFFFD; +const char16_t kUnicodeReplacementCharacter = 0xFFFD; // https://url.spec.whatwg.org/#concept-host union url_host_value { @@ -103,7 +105,7 @@ enum url_error_cb_args { #define CHAR_TEST(bits, name, expr) \ template \ - static inline bool name(const T ch) { \ + inline bool name(const T ch) { \ static_assert(sizeof(ch) >= (bits) / 8, \ "Character must be wider than " #bits " bits"); \ return (expr); \ @@ -111,13 +113,13 @@ enum url_error_cb_args { #define TWO_CHAR_STRING_TEST(bits, name, expr) \ template \ - static inline bool name(const T ch1, const T ch2) { \ + inline bool name(const T ch1, const T ch2) { \ static_assert(sizeof(ch1) >= (bits) / 8, \ "Character must be wider than " #bits " bits"); \ return (expr); \ } \ template \ - static inline bool name(const std::basic_string& str) { \ + inline bool name(const std::basic_string& str) { \ static_assert(sizeof(str[0]) >= (bits) / 8, \ "Character must be wider than " #bits " bits"); \ return str.length() >= 2 && name(str[0], str[1]); \ @@ -146,7 +148,7 @@ CHAR_TEST(8, IsASCIIAlphanumeric, (IsASCIIDigit(ch) || IsASCIIAlpha(ch))) // https://infra.spec.whatwg.org/#ascii-lowercase template -static inline T ASCIILowercase(T ch) { +inline T ASCIILowercase(T ch) { return IsASCIIAlpha(ch) ? (ch | 0x20) : ch; } @@ -177,7 +179,7 @@ CHAR_TEST(16, IsUnicodeSurrogateTrail, (ch & 0x400) != 0) #undef CHAR_TEST #undef TWO_CHAR_STRING_TEST -static const char* hex[256] = { +const char* hex[256] = { "%00", "%01", "%02", "%03", "%04", "%05", "%06", "%07", "%08", "%09", "%0A", "%0B", "%0C", "%0D", "%0E", "%0F", "%10", "%11", "%12", "%13", "%14", "%15", "%16", "%17", @@ -212,7 +214,7 @@ static const char* hex[256] = { "%F8", "%F9", "%FA", "%FB", "%FC", "%FD", "%FE", "%FF" }; -static const uint8_t C0_CONTROL_ENCODE_SET[32] = { +const uint8_t C0_CONTROL_ENCODE_SET[32] = { // 00 01 02 03 04 05 06 07 0x01 | 0x02 | 0x04 | 0x08 | 0x10 | 0x20 | 0x40 | 0x80, // 08 09 0A 0B 0C 0D 0E 0F @@ -279,7 +281,7 @@ static const uint8_t C0_CONTROL_ENCODE_SET[32] = { 0x01 | 0x02 | 0x04 | 0x08 | 0x10 | 0x20 | 0x40 | 0x80 }; -static const uint8_t PATH_ENCODE_SET[32] = { +const uint8_t PATH_ENCODE_SET[32] = { // 00 01 02 03 04 05 06 07 0x01 | 0x02 | 0x04 | 0x08 | 0x10 | 0x20 | 0x40 | 0x80, // 08 09 0A 0B 0C 0D 0E 0F @@ -346,7 +348,7 @@ static const uint8_t PATH_ENCODE_SET[32] = { 0x01 | 0x02 | 0x04 | 0x08 | 0x10 | 0x20 | 0x40 | 0x80 }; -static const uint8_t USERINFO_ENCODE_SET[32] = { +const uint8_t USERINFO_ENCODE_SET[32] = { // 00 01 02 03 04 05 06 07 0x01 | 0x02 | 0x04 | 0x08 | 0x10 | 0x20 | 0x40 | 0x80, // 08 09 0A 0B 0C 0D 0E 0F @@ -413,7 +415,7 @@ static const uint8_t USERINFO_ENCODE_SET[32] = { 0x01 | 0x02 | 0x04 | 0x08 | 0x10 | 0x20 | 0x40 | 0x80 }; -static const uint8_t QUERY_ENCODE_SET[32] = { +const uint8_t QUERY_ENCODE_SET[32] = { // 00 01 02 03 04 05 06 07 0x01 | 0x02 | 0x04 | 0x08 | 0x10 | 0x20 | 0x40 | 0x80, // 08 09 0A 0B 0C 0D 0E 0F @@ -480,15 +482,15 @@ static const uint8_t QUERY_ENCODE_SET[32] = { 0x01 | 0x02 | 0x04 | 0x08 | 0x10 | 0x20 | 0x40 | 0x80 }; -static inline bool BitAt(const uint8_t a[], const uint8_t i) { +inline bool BitAt(const uint8_t a[], const uint8_t i) { return !!(a[i >> 3] & (1 << (i & 7))); } // Appends ch to str. If ch position in encode_set is set, the ch will // be percent-encoded then appended. -static inline void AppendOrEscape(std::string* str, - const unsigned char ch, - const uint8_t encode_set[]) { +inline void AppendOrEscape(std::string* str, + const unsigned char ch, + const uint8_t encode_set[]) { if (BitAt(encode_set, ch)) *str += hex[ch]; else @@ -496,7 +498,7 @@ static inline void AppendOrEscape(std::string* str, } template -static inline unsigned hex2bin(const T ch) { +inline unsigned hex2bin(const T ch) { if (ch >= '0' && ch <= '9') return ch - '0'; if (ch >= 'A' && ch <= 'F') @@ -544,7 +546,7 @@ inline std::string PercentDecode(const char* input, size_t len) { XX("ws:", 80) \ XX("wss:", 443) -static inline bool IsSpecial(std::string scheme) { +inline bool IsSpecial(std::string scheme) { #define XX(name, _) if (scheme == name) return true; SPECIALS(XX); #undef XX @@ -552,8 +554,7 @@ static inline bool IsSpecial(std::string scheme) { } // https://url.spec.whatwg.org/#start-with-a-windows-drive-letter -static inline bool StartsWithWindowsDriveLetter(const char* p, - const char* end) { +inline bool StartsWithWindowsDriveLetter(const char* p, const char* end) { const size_t length = end - p; return length >= 2 && IsWindowsDriveLetter(p[0], p[1]) && @@ -564,7 +565,7 @@ static inline bool StartsWithWindowsDriveLetter(const char* p, p[2] == '#'); } -static inline int NormalizePort(std::string scheme, int p) { +inline int NormalizePort(std::string scheme, int p) { #define XX(name, port) if (scheme == name && p == port) return -1; SPECIALS(XX); #undef XX @@ -572,7 +573,7 @@ static inline int NormalizePort(std::string scheme, int p) { } #if defined(NODE_HAVE_I18N_SUPPORT) -static inline bool ToUnicode(const std::string& input, std::string* output) { +inline bool ToUnicode(const std::string& input, std::string* output) { MaybeStackBuffer buf; if (i18n::ToUnicode(&buf, input.c_str(), input.length()) < 0) return false; @@ -580,7 +581,7 @@ static inline bool ToUnicode(const std::string& input, std::string* output) { return true; } -static inline bool ToASCII(const std::string& input, std::string* output) { +inline bool ToASCII(const std::string& input, std::string* output) { MaybeStackBuffer buf; if (i18n::ToASCII(&buf, input.c_str(), input.length()) < 0) return false; @@ -589,20 +590,18 @@ static inline bool ToASCII(const std::string& input, std::string* output) { } #else // Intentional non-ops if ICU is not present. -static inline bool ToUnicode(const std::string& input, std::string* output) { +inline bool ToUnicode(const std::string& input, std::string* output) { *output = input; return true; } -static inline bool ToASCII(const std::string& input, std::string* output) { +inline bool ToASCII(const std::string& input, std::string* output) { *output = input; return true; } #endif -static url_host_type ParseIPv6Host(url_host* host, - const char* input, - size_t length) { +url_host_type ParseIPv6Host(url_host* host, const char* input, size_t length) { url_host_type type = HOST_TYPE_FAILED; for (unsigned n = 0; n < 8; n++) host->value.ipv6[n] = 0; @@ -720,7 +719,7 @@ static url_host_type ParseIPv6Host(url_host* host, return type; } -static inline int64_t ParseNumber(const char* start, const char* end) { +inline int64_t ParseNumber(const char* start, const char* end) { unsigned R = 10; if (end - start >= 2 && start[0] == '0' && (start[1] | 0x20) == 'x') { start += 2; @@ -755,9 +754,7 @@ static inline int64_t ParseNumber(const char* start, const char* end) { return strtoll(start, nullptr, R); } -static url_host_type ParseIPv4Host(url_host* host, - const char* input, - size_t length) { +url_host_type ParseIPv4Host(url_host* host, const char* input, size_t length) { url_host_type type = HOST_TYPE_DOMAIN; const char* pointer = input; const char* mark = input; @@ -816,9 +813,9 @@ static url_host_type ParseIPv4Host(url_host* host, return type; } -static url_host_type ParseOpaqueHost(url_host* host, - const char* input, - size_t length) { +url_host_type ParseOpaqueHost(url_host* host, + const char* input, + size_t length) { url_host_type type = HOST_TYPE_OPAQUE; std::string output; output.reserve(length * 3); @@ -838,11 +835,11 @@ static url_host_type ParseOpaqueHost(url_host* host, return type; } -static url_host_type ParseHost(url_host* host, - const char* input, - size_t length, - bool is_special, - bool unicode = false) { +url_host_type ParseHost(url_host* host, + const char* input, + size_t length, + bool is_special, + bool unicode = false) { url_host_type type = HOST_TYPE_FAILED; const char* pointer = input; std::string decoded; @@ -895,7 +892,7 @@ static url_host_type ParseHost(url_host* host, // Locates the longest sequence of 0 segments in an IPv6 address // in order to use the :: compression when serializing template -static inline T* FindLongestZeroSequence(T* values, size_t len) { +inline T* FindLongestZeroSequence(T* values, size_t len) { T* start = values; T* end = start + len; T* result = nullptr; @@ -923,7 +920,7 @@ static inline T* FindLongestZeroSequence(T* values, size_t len) { return result; } -static url_host_type WriteHost(const url_host* host, std::string* dest) { +url_host_type WriteHost(const url_host* host, std::string* dest) { dest->clear(); switch (host->type) { case HOST_TYPE_DOMAIN: @@ -978,10 +975,10 @@ static url_host_type WriteHost(const url_host* host, std::string* dest) { return host->type; } -static bool ParseHost(const std::string& input, - std::string* output, - bool is_special, - bool unicode = false) { +bool ParseHost(const std::string& input, + std::string* output, + bool is_special, + bool unicode = false) { if (input.length() == 0) { output->clear(); return true; @@ -994,9 +991,9 @@ static bool ParseHost(const std::string& input, return true; } -static inline void Copy(Environment* env, - Local ary, - std::vector* vec) { +inline void Copy(Environment* env, + Local ary, + std::vector* vec) { const int32_t len = ary->Length(); if (len == 0) return; // nothing to copy @@ -1010,8 +1007,8 @@ static inline void Copy(Environment* env, } } -static inline Local Copy(Environment* env, - const std::vector& vec) { +inline Local Copy(Environment* env, + const std::vector& vec) { Isolate* isolate = env->isolate(); Local ary = Array::New(isolate, vec.size()); for (size_t n = 0; n < vec.size(); n++) @@ -1019,9 +1016,9 @@ static inline Local Copy(Environment* env, return ary; } -static inline void HarvestBase(Environment* env, - struct url_data* base, - Local base_obj) { +inline void HarvestBase(Environment* env, + struct url_data* base, + Local base_obj) { Local context = env->context(); Local flags = GET(env, base_obj, "flags"); if (flags->IsInt32()) @@ -1045,9 +1042,9 @@ static inline void HarvestBase(Environment* env, } } -static inline void HarvestContext(Environment* env, - struct url_data* context, - Local context_obj) { +inline void HarvestContext(Environment* env, + struct url_data* context, + Local context_obj) { Local flags = GET(env, context_obj, "flags"); if (flags->IsInt32()) { int32_t _flags = flags->Int32Value(env->context()).FromJust(); @@ -1090,7 +1087,7 @@ static inline void HarvestContext(Environment* env, } // Single dot segment can be ".", "%2e", or "%2E" -static inline bool IsSingleDotSegment(const std::string& str) { +inline bool IsSingleDotSegment(const std::string& str) { switch (str.size()) { case 1: return str == "."; @@ -1106,7 +1103,7 @@ static inline bool IsSingleDotSegment(const std::string& str) { // Double dot segment can be: // "..", ".%2e", ".%2E", "%2e.", "%2E.", // "%2e%2e", "%2E%2E", "%2e%2E", or "%2E%2e" -static inline bool IsDoubleDotSegment(const std::string& str) { +inline bool IsDoubleDotSegment(const std::string& str) { switch (str.size()) { case 2: return str == ".."; @@ -1133,13 +1130,15 @@ static inline bool IsDoubleDotSegment(const std::string& str) { } } -static inline void ShortenUrlPath(struct url_data* url) { +inline void ShortenUrlPath(struct url_data* url) { if (url->path.empty()) return; if (url->path.size() == 1 && url->scheme == "file:" && IsNormalizedWindowsDriveLetter(url->path[0])) return; url->path.pop_back(); } +} // anonymous namespace + void URL::Parse(const char* input, size_t len, enum url_parse_state state_override, From 363ad6aeb319fbb23137f143fdb9483c74ab0ab0 Mon Sep 17 00:00:00 2001 From: Anna Henningsen Date: Tue, 5 Dec 2017 14:23:38 +0100 Subject: [PATCH 3/4] src: make url host a proper C++ class MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit - Gives `URLHost` a proper destructor that clears memory depending on the type of the host (This fixes a memory leak) - Hide the host type enums and class layout as implementation details - Make the `Parse` methods members of `URLHost` - Turn `WriteHost` into a `ToString()` method on the `URLHost` class - Verify that at the beginning of a parse attempt, the type is set to “failed” - Remove a lot of `goto`s from the source code 🐢🚀 Fixes: https://github.com/nodejs/node/issues/17448 --- src/node_url.cc | 269 ++++++++++++++++++++++++++---------------------- 1 file changed, 147 insertions(+), 122 deletions(-) diff --git a/src/node_url.cc b/src/node_url.cc index 21d8c810cb8216..1b90df3b92a98a 100644 --- a/src/node_url.cc +++ b/src/node_url.cc @@ -55,27 +55,71 @@ const char kEOL = -1; const char16_t kUnicodeReplacementCharacter = 0xFFFD; // https://url.spec.whatwg.org/#concept-host -union url_host_value { - std::string domain; - uint32_t ipv4; - uint16_t ipv6[8]; - std::string opaque; - ~url_host_value() {} -}; +class URLHost { + public: + ~URLHost(); + + void ParseIPv4Host(const char* input, size_t length, bool* is_ipv4); + void ParseIPv6Host(const char* input, size_t length); + void ParseOpaqueHost(const char* input, size_t length); + void ParseHost(const char* input, + size_t length, + bool is_special, + bool unicode = false); + + inline bool ParsingFailed() const { return type_ == HostType::H_FAILED; } + std::string ToString() const; + + private: + enum class HostType { + H_FAILED, + H_DOMAIN, + H_IPV4, + H_IPV6, + H_OPAQUE, + }; -enum url_host_type { - HOST_TYPE_FAILED = -1, - HOST_TYPE_DOMAIN = 0, - HOST_TYPE_IPV4 = 1, - HOST_TYPE_IPV6 = 2, - HOST_TYPE_OPAQUE = 3, -}; + union Value { + std::string domain; + uint32_t ipv4; + uint16_t ipv6[8]; + std::string opaque; + + ~Value() {} + Value() : ipv4(0) {} + }; -struct url_host { - url_host_value value; - enum url_host_type type; + Value value_; + HostType type_ = HostType::H_FAILED; + + // Setting the string members of the union with = is brittle because + // it relies on them being initialized to a state that requires no + // destruction of old data. + // For a long time, that worked well enough because ParseIPv6Host() happens + // to zero-fill `value_`, but that really is relying on standard library + // internals too much. + // These helpers are the easiest solution but we might want to consider + // just not forcing strings into an union. + inline void SetOpaque(std::string&& string) { + type_ = HostType::H_OPAQUE; + new(&value_.opaque) std::string(std::move(string)); + } + + inline void SetDomain(std::string&& string) { + type_ = HostType::H_DOMAIN; + new(&value_.domain) std::string(std::move(string)); + } }; +URLHost::~URLHost() { + using string = std::string; + switch (type_) { + case HostType::H_DOMAIN: value_.domain.~string(); break; + case HostType::H_OPAQUE: value_.opaque.~string(); break; + default: break; + } +} + #define ARGS(XX) \ XX(ARG_FLAGS) \ XX(ARG_PROTOCOL) \ @@ -601,11 +645,11 @@ inline bool ToASCII(const std::string& input, std::string* output) { } #endif -url_host_type ParseIPv6Host(url_host* host, const char* input, size_t length) { - url_host_type type = HOST_TYPE_FAILED; +void URLHost::ParseIPv6Host(const char* input, size_t length) { + CHECK_EQ(type_, HostType::H_FAILED); for (unsigned n = 0; n < 8; n++) - host->value.ipv6[n] = 0; - uint16_t* piece_pointer = &host->value.ipv6[0]; + value_.ipv6[n] = 0; + uint16_t* piece_pointer = &value_.ipv6[0]; uint16_t* last_piece = piece_pointer + 8; uint16_t* compress_pointer = nullptr; const char* pointer = input; @@ -614,7 +658,7 @@ url_host_type ParseIPv6Host(url_host* host, const char* input, size_t length) { char ch = pointer < end ? pointer[0] : kEOL; if (ch == ':') { if (length < 2 || pointer[1] != ':') - goto end; + return; pointer += 2; ch = pointer < end ? pointer[0] : kEOL; piece_pointer++; @@ -622,10 +666,10 @@ url_host_type ParseIPv6Host(url_host* host, const char* input, size_t length) { } while (ch != kEOL) { if (piece_pointer > last_piece) - goto end; + return; if (ch == ':') { if (compress_pointer != nullptr) - goto end; + return; pointer++; ch = pointer < end ? pointer[0] : kEOL; piece_pointer++; @@ -643,11 +687,11 @@ url_host_type ParseIPv6Host(url_host* host, const char* input, size_t length) { switch (ch) { case '.': if (len == 0) - goto end; + return; pointer -= len; ch = pointer < end ? pointer[0] : kEOL; if (piece_pointer > last_piece - 2) - goto end; + return; numbers_seen = 0; while (ch != kEOL) { value = 0xffffffff; @@ -656,22 +700,22 @@ url_host_type ParseIPv6Host(url_host* host, const char* input, size_t length) { pointer++; ch = pointer < end ? pointer[0] : kEOL; } else { - goto end; + return; } } if (!IsASCIIDigit(ch)) - goto end; + return; while (IsASCIIDigit(ch)) { unsigned number = ch - '0'; if (value == 0xffffffff) { value = number; } else if (value == 0) { - goto end; + return; } else { value = value * 10 + number; } if (value > 255) - goto end; + return; pointer++; ch = pointer < end ? pointer[0] : kEOL; } @@ -681,18 +725,18 @@ url_host_type ParseIPv6Host(url_host* host, const char* input, size_t length) { piece_pointer++; } if (numbers_seen != 4) - goto end; + return; continue; case ':': pointer++; ch = pointer < end ? pointer[0] : kEOL; if (ch == kEOL) - goto end; + return; break; case kEOL: break; default: - goto end; + return; } *piece_pointer = value; piece_pointer++; @@ -701,7 +745,7 @@ url_host_type ParseIPv6Host(url_host* host, const char* input, size_t length) { if (compress_pointer != nullptr) { swaps = piece_pointer - compress_pointer; piece_pointer = last_piece - 1; - while (piece_pointer != &host->value.ipv6[0] && swaps > 0) { + while (piece_pointer != &value_.ipv6[0] && swaps > 0) { uint16_t temp = *piece_pointer; uint16_t* swap_piece = compress_pointer + swaps - 1; *piece_pointer = *swap_piece; @@ -711,12 +755,9 @@ url_host_type ParseIPv6Host(url_host* host, const char* input, size_t length) { } } else if (compress_pointer == nullptr && piece_pointer != last_piece) { - goto end; + return; } - type = HOST_TYPE_IPV6; - end: - host->type = type; - return type; + type_ = HostType::H_IPV6; } inline int64_t ParseNumber(const char* start, const char* end) { @@ -754,8 +795,9 @@ inline int64_t ParseNumber(const char* start, const char* end) { return strtoll(start, nullptr, R); } -url_host_type ParseIPv4Host(url_host* host, const char* input, size_t length) { - url_host_type type = HOST_TYPE_DOMAIN; +void URLHost::ParseIPv4Host(const char* input, size_t length, bool* is_ipv4) { + CHECK_EQ(type_, HostType::H_FAILED); + *is_ipv4 = false; const char* pointer = input; const char* mark = input; const char* end = pointer + length; @@ -764,19 +806,19 @@ url_host_type ParseIPv4Host(url_host* host, const char* input, size_t length) { uint64_t numbers[4]; int tooBigNumbers = 0; if (length == 0) - goto end; + return; while (pointer <= end) { const char ch = pointer < end ? pointer[0] : kEOL; const int remaining = end - pointer - 1; if (ch == '.' || ch == kEOL) { if (++parts > 4) - goto end; + return; if (pointer == mark) - goto end; + return; int64_t n = ParseNumber(mark, pointer); if (n < 0) - goto end; + return; if (n > 255) { tooBigNumbers++; @@ -789,6 +831,7 @@ url_host_type ParseIPv4Host(url_host* host, const char* input, size_t length) { pointer++; } CHECK_GT(parts, 0); + *is_ipv4 = true; // If any but the last item in numbers is greater than 255, return failure. // If the last item in numbers is greater than or equal to @@ -796,97 +839,81 @@ url_host_type ParseIPv4Host(url_host* host, const char* input, size_t length) { if (tooBigNumbers > 1 || (tooBigNumbers == 1 && numbers[parts - 1] <= 255) || numbers[parts - 1] >= pow(256, static_cast(5 - parts))) { - type = HOST_TYPE_FAILED; - goto end; + return; } - type = HOST_TYPE_IPV4; + type_ = HostType::H_IPV4; val = numbers[parts - 1]; for (int n = 0; n < parts - 1; n++) { double b = 3 - n; val += numbers[n] * pow(256, b); } - host->value.ipv4 = val; - end: - host->type = type; - return type; + value_.ipv4 = val; } -url_host_type ParseOpaqueHost(url_host* host, - const char* input, - size_t length) { - url_host_type type = HOST_TYPE_OPAQUE; +void URLHost::ParseOpaqueHost(const char* input, size_t length) { + CHECK_EQ(type_, HostType::H_FAILED); std::string output; output.reserve(length * 3); for (size_t i = 0; i < length; i++) { const char ch = input[i]; if (ch != '%' && IsForbiddenHostCodePoint(ch)) { - type = HOST_TYPE_FAILED; - goto end; + return; } else { AppendOrEscape(&output, ch, C0_CONTROL_ENCODE_SET); } } - host->value.opaque = output; - end: - host->type = type; - return type; + SetOpaque(std::move(output)); } -url_host_type ParseHost(url_host* host, - const char* input, +void URLHost::ParseHost(const char* input, size_t length, bool is_special, - bool unicode = false) { - url_host_type type = HOST_TYPE_FAILED; + bool unicode) { + CHECK_EQ(type_, HostType::H_FAILED); const char* pointer = input; - std::string decoded; if (length == 0) - goto end; + return; if (pointer[0] == '[') { if (pointer[length - 1] != ']') - goto end; - return ParseIPv6Host(host, ++pointer, length - 2); + return; + return ParseIPv6Host(++pointer, length - 2); } if (!is_special) - return ParseOpaqueHost(host, input, length); + return ParseOpaqueHost(input, length); // First, we have to percent decode - decoded = PercentDecode(input, length); + std::string decoded = PercentDecode(input, length); // Then we have to punycode toASCII if (!ToASCII(decoded, &decoded)) - goto end; + return; // If any of the following characters are still present, we have to fail for (size_t n = 0; n < decoded.size(); n++) { const char ch = decoded[n]; if (IsForbiddenHostCodePoint(ch)) { - goto end; + return; } } // Check to see if it's an IPv4 IP address - type = ParseIPv4Host(host, decoded.c_str(), decoded.length()); - if (type == HOST_TYPE_IPV4 || type == HOST_TYPE_FAILED) - goto end; + bool is_ipv4; + ParseIPv4Host(decoded.c_str(), decoded.length(), &is_ipv4); + if (is_ipv4) + return; // If the unicode flag is set, run the result through punycode ToUnicode if (unicode && !ToUnicode(decoded, &decoded)) - goto end; + return; // It's not an IPv4 or IPv6 address, it must be a domain - type = HOST_TYPE_DOMAIN; - host->value.domain = decoded; - - end: - host->type = type; - return type; + SetDomain(std::move(decoded)); } // Locates the longest sequence of 0 segments in an IPv6 address @@ -920,59 +947,59 @@ inline T* FindLongestZeroSequence(T* values, size_t len) { return result; } -url_host_type WriteHost(const url_host* host, std::string* dest) { - dest->clear(); - switch (host->type) { - case HOST_TYPE_DOMAIN: - *dest = host->value.domain; +std::string URLHost::ToString() const { + std::string dest; + switch (type_) { + case HostType::H_DOMAIN: + return value_.domain; + break; + case HostType::H_OPAQUE: + return value_.opaque; break; - case HOST_TYPE_IPV4: { - dest->reserve(15); - uint32_t value = host->value.ipv4; + case HostType::H_IPV4: { + dest.reserve(15); + uint32_t value = value_.ipv4; for (int n = 0; n < 4; n++) { char buf[4]; snprintf(buf, sizeof(buf), "%d", value % 256); - dest->insert(0, buf); + dest.insert(0, buf); if (n < 3) - dest->insert(0, 1, '.'); + dest.insert(0, 1, '.'); value /= 256; } break; } - case HOST_TYPE_IPV6: { - dest->reserve(41); - *dest+= '['; - const uint16_t* start = &host->value.ipv6[0]; + case HostType::H_IPV6: { + dest.reserve(41); + dest += '['; + const uint16_t* start = &value_.ipv6[0]; const uint16_t* compress_pointer = FindLongestZeroSequence(start, 8); bool ignore0 = false; for (int n = 0; n <= 7; n++) { - const uint16_t* piece = &host->value.ipv6[n]; + const uint16_t* piece = &value_.ipv6[n]; if (ignore0 && *piece == 0) continue; else if (ignore0) ignore0 = false; if (compress_pointer == piece) { - *dest += n == 0 ? "::" : ":"; + dest += n == 0 ? "::" : ":"; ignore0 = true; continue; } char buf[5]; snprintf(buf, sizeof(buf), "%x", *piece); - *dest += buf; + dest += buf; if (n < 7) - *dest += ':'; + dest += ':'; } - *dest += ']'; + dest += ']'; break; } - case HOST_TYPE_OPAQUE: - *dest = host->value.opaque; - break; - case HOST_TYPE_FAILED: + case HostType::H_FAILED: break; } - return host->type; + return dest; } bool ParseHost(const std::string& input, @@ -983,11 +1010,11 @@ bool ParseHost(const std::string& input, output->clear(); return true; } - url_host host{{""}, HOST_TYPE_DOMAIN}; - ParseHost(&host, input.c_str(), input.length(), is_special, unicode); - if (host.type == HOST_TYPE_FAILED) + URLHost host; + host.ParseHost(input.c_str(), input.length(), is_special, unicode); + if (host.ParsingFailed()) return false; - WriteHost(&host, output); + *output = host.ToString(); return true; } @@ -2043,15 +2070,14 @@ static void DomainToASCII(const FunctionCallbackInfo& args) { CHECK(args[0]->IsString()); Utf8Value value(env->isolate(), args[0]); - url_host host{{""}, HOST_TYPE_DOMAIN}; + URLHost host; // Assuming the host is used for a special scheme. - ParseHost(&host, *value, value.length(), true); - if (host.type == HOST_TYPE_FAILED) { + host.ParseHost(*value, value.length(), true); + if (host.ParsingFailed()) { args.GetReturnValue().Set(FIXED_ONE_BYTE_STRING(env->isolate(), "")); return; } - std::string out; - WriteHost(&host, &out); + std::string out = host.ToString(); args.GetReturnValue().Set( String::NewFromUtf8(env->isolate(), out.c_str(), @@ -2064,15 +2090,14 @@ static void DomainToUnicode(const FunctionCallbackInfo& args) { CHECK(args[0]->IsString()); Utf8Value value(env->isolate(), args[0]); - url_host host{{""}, HOST_TYPE_DOMAIN}; + URLHost host; // Assuming the host is used for a special scheme. - ParseHost(&host, *value, value.length(), true, true); - if (host.type == HOST_TYPE_FAILED) { + host.ParseHost(*value, value.length(), true, true); + if (host.ParsingFailed()) { args.GetReturnValue().Set(FIXED_ONE_BYTE_STRING(env->isolate(), "")); return; } - std::string out; - WriteHost(&host, &out); + std::string out = host.ToString(); args.GetReturnValue().Set( String::NewFromUtf8(env->isolate(), out.c_str(), From c8bef2df4ba5a10e4ee7659352a9b50e72d1773a Mon Sep 17 00:00:00 2001 From: Anna Henningsen Date: Tue, 5 Dec 2017 19:40:16 +0100 Subject: [PATCH 4/4] src: use correct OOB check for IPv6 parsing MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit `last_piece` pointed to the end of the 8×16 bit array, so `piece_pointer == last_piece` already means that the pointer is not writable any longer. Previously, this still worked most of the time but could result in an out-of-bounds-write. Also, rename `last_piece` to `buffer_end` to avoid this pitfall. --- src/node_url.cc | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/src/node_url.cc b/src/node_url.cc index 1b90df3b92a98a..578c73c7f03cf6 100644 --- a/src/node_url.cc +++ b/src/node_url.cc @@ -650,7 +650,7 @@ void URLHost::ParseIPv6Host(const char* input, size_t length) { for (unsigned n = 0; n < 8; n++) value_.ipv6[n] = 0; uint16_t* piece_pointer = &value_.ipv6[0]; - uint16_t* last_piece = piece_pointer + 8; + uint16_t* const buffer_end = piece_pointer + 8; uint16_t* compress_pointer = nullptr; const char* pointer = input; const char* end = pointer + length; @@ -665,7 +665,7 @@ void URLHost::ParseIPv6Host(const char* input, size_t length) { compress_pointer = piece_pointer; } while (ch != kEOL) { - if (piece_pointer > last_piece) + if (piece_pointer >= buffer_end) return; if (ch == ':') { if (compress_pointer != nullptr) @@ -690,7 +690,7 @@ void URLHost::ParseIPv6Host(const char* input, size_t length) { return; pointer -= len; ch = pointer < end ? pointer[0] : kEOL; - if (piece_pointer > last_piece - 2) + if (piece_pointer > buffer_end - 2) return; numbers_seen = 0; while (ch != kEOL) { @@ -744,7 +744,7 @@ void URLHost::ParseIPv6Host(const char* input, size_t length) { if (compress_pointer != nullptr) { swaps = piece_pointer - compress_pointer; - piece_pointer = last_piece - 1; + piece_pointer = buffer_end - 1; while (piece_pointer != &value_.ipv6[0] && swaps > 0) { uint16_t temp = *piece_pointer; uint16_t* swap_piece = compress_pointer + swaps - 1; @@ -754,7 +754,7 @@ void URLHost::ParseIPv6Host(const char* input, size_t length) { swaps--; } } else if (compress_pointer == nullptr && - piece_pointer != last_piece) { + piece_pointer != buffer_end) { return; } type_ = HostType::H_IPV6;