From e86f577dd9042ea4365201c8251b439f9f2d8e0f Mon Sep 17 00:00:00 2001
From: Anna Henningsen <anna@addaleax.net>
Date: Tue, 5 Dec 2017 13:23:31 +0100
Subject: [PATCH 1/4] src: minor cleanups to node_url.cc
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

- Remove pointless pointers
- Make `WriteHost` take a const argument so that it’s functionality
  is clear from the signature
- Make `FindLongestZeroSequence` templated to accommodate the constness
  in `WriteHost` and because using `uint16_t` is an articifial,
  unnecessary restriction
- Remove string copying when no copies are needed
- Make `PercentDecode` just return its return value
- Make `ParseHost` (string-only version) take its constant argument
  as a constant reference
---
 src/node_url.cc | 61 +++++++++++++++++++++++--------------------------
 1 file changed, 29 insertions(+), 32 deletions(-)
diff --git a/src/node_url.cc b/src/node_url.cc
index e1ef9273ae927e..b4b16399aab87f 100644
--- a/src/node_url.cc
+++ b/src/node_url.cc
@@ -506,12 +506,11 @@ static inline unsigned hex2bin(const T ch) {
   return static_cast<unsigned>(-1);
 }
 
-static inline void PercentDecode(const char* input,
-                                 size_t len,
-                                 std::string* dest) {
+inline std::string PercentDecode(const char* input, size_t len) {
+  std::string dest;
   if (len == 0)
-    return;
-  dest->reserve(len);
+    return dest;
+  dest.reserve(len);
   const char* pointer = input;
   const char* end = input + len;
 
@@ -522,17 +521,18 @@ static inline void PercentDecode(const char* input,
         (ch == '%' &&
          (!IsASCIIHexDigit(pointer[1]) ||
           !IsASCIIHexDigit(pointer[2])))) {
-      *dest += ch;
+      dest += ch;
       pointer++;
       continue;
     } else {
       unsigned a = hex2bin(pointer[1]);
       unsigned b = hex2bin(pointer[2]);
       char c = static_cast<char>(a * 16 + b);
-      *dest += c;
+      dest += c;
       pointer += 3;
     }
   }
+  return dest;
 }
 
 #define SPECIALS(XX)                                                          \
@@ -860,7 +860,7 @@ static url_host_type ParseHost(url_host* host,
     return ParseOpaqueHost(host, input, length);
 
   // First, we have to percent decode
-  PercentDecode(input, length, &decoded);
+  decoded = PercentDecode(input, length);
 
   // Then we have to punycode toASCII
   if (!ToASCII(decoded, &decoded))
@@ -894,13 +894,13 @@ static url_host_type ParseHost(url_host* host,
 
 // Locates the longest sequence of 0 segments in an IPv6 address
 // in order to use the :: compression when serializing
-static inline uint16_t* FindLongestZeroSequence(uint16_t* values,
-                                                size_t len) {
-  uint16_t* start = values;
-  uint16_t* end = start + len;
-  uint16_t* result = nullptr;
+template<typename T>
+static inline T* FindLongestZeroSequence(T* values, size_t len) {
+  T* start = values;
+  T* end = start + len;
+  T* result = nullptr;
 
-  uint16_t* current = nullptr;
+  T* current = nullptr;
   unsigned counter = 0, longest = 1;
 
   while (start < end) {
@@ -923,7 +923,7 @@ static inline uint16_t* FindLongestZeroSequence(uint16_t* values,
   return result;
 }
 
-static url_host_type WriteHost(url_host* host, std::string* dest) {
+static url_host_type WriteHost(const url_host* host, std::string* dest) {
   dest->clear();
   switch (host->type) {
     case HOST_TYPE_DOMAIN:
@@ -934,8 +934,7 @@ static url_host_type WriteHost(url_host* host, std::string* dest) {
       uint32_t value = host->value.ipv4;
       for (int n = 0; n < 4; n++) {
         char buf[4];
-        char* buffer = buf;
-        snprintf(buffer, sizeof(buf), "%d", value % 256);
+        snprintf(buf, sizeof(buf), "%d", value % 256);
         dest->insert(0, buf);
         if (n < 3)
           dest->insert(0, 1, '.');
@@ -946,12 +945,12 @@ static url_host_type WriteHost(url_host* host, std::string* dest) {
     case HOST_TYPE_IPV6: {
       dest->reserve(41);
       *dest+= '[';
-      uint16_t* start = &host->value.ipv6[0];
-      uint16_t* compress_pointer =
+      const uint16_t* start = &host->value.ipv6[0];
+      const uint16_t* compress_pointer =
           FindLongestZeroSequence(start, 8);
       bool ignore0 = false;
       for (int n = 0; n <= 7; n++) {
-        uint16_t* piece = &host->value.ipv6[n];
+        const uint16_t* piece = &host->value.ipv6[n];
         if (ignore0 && *piece == 0)
           continue;
         else if (ignore0)
@@ -962,8 +961,7 @@ static url_host_type WriteHost(url_host* host, std::string* dest) {
           continue;
         }
         char buf[5];
-        char* buffer = buf;
-        snprintf(buffer, sizeof(buf), "%x", *piece);
+        snprintf(buf, sizeof(buf), "%x", *piece);
         *dest += buf;
         if (n < 7)
           *dest += ':';
@@ -980,16 +978,16 @@ static url_host_type WriteHost(url_host* host, std::string* dest) {
   return host->type;
 }
 
-static bool ParseHost(std::string* input,
+static bool ParseHost(const std::string& input,
                       std::string* output,
                       bool is_special,
                       bool unicode = false) {
-  if (input->length() == 0) {
+  if (input.length() == 0) {
     output->clear();
     return true;
   }
   url_host host{{""}, HOST_TYPE_DOMAIN};
-  ParseHost(&host, input->c_str(), input->length(), is_special, unicode);
+  ParseHost(&host, input.c_str(), input.length(), is_special, unicode);
   if (host.type == HOST_TYPE_FAILED)
     return false;
   WriteHost(&host, output);
@@ -1092,7 +1090,7 @@ static inline void HarvestContext(Environment* env,
 }
 
 // Single dot segment can be ".", "%2e", or "%2E"
-static inline bool IsSingleDotSegment(std::string str) {
+static inline bool IsSingleDotSegment(const std::string& str) {
   switch (str.size()) {
     case 1:
       return str == ".";
@@ -1108,7 +1106,7 @@ static inline bool IsSingleDotSegment(std::string str) {
 // Double dot segment can be:
 //   "..", ".%2e", ".%2E", "%2e.", "%2E.",
 //   "%2e%2e", "%2E%2E", "%2e%2E", or "%2E%2e"
-static inline bool IsDoubleDotSegment(std::string str) {
+static inline bool IsDoubleDotSegment(const std::string& str) {
   switch (str.size()) {
     case 2:
       return str == "..";
@@ -1542,7 +1540,7 @@ void URL::Parse(const char* input,
             return;
           }
           url->flags |= URL_FLAGS_HAS_HOST;
-          if (!ParseHost(&buffer, &url->host, special)) {
+          if (!ParseHost(buffer, &url->host, special)) {
             url->flags |= URL_FLAGS_FAILED;
             return;
           }
@@ -1569,7 +1567,7 @@ void URL::Parse(const char* input,
             return;
           }
           url->flags |= URL_FLAGS_HAS_HOST;
-          if (!ParseHost(&buffer, &url->host, special)) {
+          if (!ParseHost(buffer, &url->host, special)) {
             url->flags |= URL_FLAGS_FAILED;
             return;
           }
@@ -1741,7 +1739,7 @@ void URL::Parse(const char* input,
             state = kPathStart;
           } else {
             std::string host;
-            if (!ParseHost(&buffer, &host, special)) {
+            if (!ParseHost(buffer, &host, special)) {
               url->flags |= URL_FLAGS_FAILED;
               return;
             }
@@ -2104,8 +2102,7 @@ std::string URL::ToFilePath() const {
 #endif
   std::string decoded_path;
   for (const std::string& part : context_.path) {
-    std::string decoded;
-    PercentDecode(part.c_str(), part.length(), &decoded);
+    std::string decoded = PercentDecode(part.c_str(), part.length());
     for (char& ch : decoded) {
       if (is_slash(ch)) {
         return "";

From c28a3a4d8edc1faf570d6a01949d2cda0bfef0d7 Mon Sep 17 00:00:00 2001
From: Anna Henningsen <anna@addaleax.net>
Date: Tue, 5 Dec 2017 13:37:38 +0100
Subject: [PATCH 2/4] src: move url internals into anonymous namespace
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

This helps because `static` doesn’t work for C++ classes,
but refactoring `url_host` into a proper C++ class seems the
most reasonable soluation for the memory leak fixed by the next commit.
---
 src/node_url.cc | 117 ++++++++++++++++++++++++------------------------
 1 file changed, 58 insertions(+), 59 deletions(-)

diff --git a/src/node_url.cc b/src/node_url.cc
index b4b16399aab87f..21d8c810cb8216 100644
--- a/src/node_url.cc
+++ b/src/node_url.cc
@@ -46,11 +46,13 @@ using v8::Value;
 
 namespace url {
 
+namespace {
+
 // https://url.spec.whatwg.org/#eof-code-point
-static const char kEOL = -1;
+const char kEOL = -1;
 
 // Used in ToUSVString().
-static const char16_t kUnicodeReplacementCharacter = 0xFFFD;
+const char16_t kUnicodeReplacementCharacter = 0xFFFD;
 
 // https://url.spec.whatwg.org/#concept-host
 union url_host_value {
@@ -103,7 +105,7 @@ enum url_error_cb_args {
 
 #define CHAR_TEST(bits, name, expr)                                           \
   template <typename T>                                                       \
-  static inline bool name(const T ch) {                                       \
+  inline bool name(const T ch) {                                              \
     static_assert(sizeof(ch) >= (bits) / 8,                                   \
                   "Character must be wider than " #bits " bits");             \
     return (expr);                                                            \
@@ -111,13 +113,13 @@ enum url_error_cb_args {
 
 #define TWO_CHAR_STRING_TEST(bits, name, expr)                                \
   template <typename T>                                                       \
-  static inline bool name(const T ch1, const T ch2) {                         \
+  inline bool name(const T ch1, const T ch2) {                                \
     static_assert(sizeof(ch1) >= (bits) / 8,                                  \
                   "Character must be wider than " #bits " bits");             \
     return (expr);                                                            \
   }                                                                           \
   template <typename T>                                                       \
-  static inline bool name(const std::basic_string<T>& str) {                  \
+  inline bool name(const std::basic_string<T>& str) {                         \
     static_assert(sizeof(str[0]) >= (bits) / 8,                               \
                   "Character must be wider than " #bits " bits");             \
     return str.length() >= 2 && name(str[0], str[1]);                         \
@@ -146,7 +148,7 @@ CHAR_TEST(8, IsASCIIAlphanumeric, (IsASCIIDigit(ch) || IsASCIIAlpha(ch)))
 
 // https://infra.spec.whatwg.org/#ascii-lowercase
 template <typename T>
-static inline T ASCIILowercase(T ch) {
+inline T ASCIILowercase(T ch) {
   return IsASCIIAlpha(ch) ? (ch | 0x20) : ch;
 }
 
@@ -177,7 +179,7 @@ CHAR_TEST(16, IsUnicodeSurrogateTrail, (ch & 0x400) != 0)
 #undef CHAR_TEST
 #undef TWO_CHAR_STRING_TEST
 
-static const char* hex[256] = {
+const char* hex[256] = {
   "%00", "%01", "%02", "%03", "%04", "%05", "%06", "%07",
   "%08", "%09", "%0A", "%0B", "%0C", "%0D", "%0E", "%0F",
   "%10", "%11", "%12", "%13", "%14", "%15", "%16", "%17",
@@ -212,7 +214,7 @@ static const char* hex[256] = {
   "%F8", "%F9", "%FA", "%FB", "%FC", "%FD", "%FE", "%FF"
 };
 
-static const uint8_t C0_CONTROL_ENCODE_SET[32] = {
+const uint8_t C0_CONTROL_ENCODE_SET[32] = {
   // 00     01     02     03     04     05     06     07
     0x01 | 0x02 | 0x04 | 0x08 | 0x10 | 0x20 | 0x40 | 0x80,
   // 08     09     0A     0B     0C     0D     0E     0F
@@ -279,7 +281,7 @@ static const uint8_t C0_CONTROL_ENCODE_SET[32] = {
     0x01 | 0x02 | 0x04 | 0x08 | 0x10 | 0x20 | 0x40 | 0x80
 };
 
-static const uint8_t PATH_ENCODE_SET[32] = {
+const uint8_t PATH_ENCODE_SET[32] = {
   // 00     01     02     03     04     05     06     07
     0x01 | 0x02 | 0x04 | 0x08 | 0x10 | 0x20 | 0x40 | 0x80,
   // 08     09     0A     0B     0C     0D     0E     0F
@@ -346,7 +348,7 @@ static const uint8_t PATH_ENCODE_SET[32] = {
     0x01 | 0x02 | 0x04 | 0x08 | 0x10 | 0x20 | 0x40 | 0x80
 };
 
-static const uint8_t USERINFO_ENCODE_SET[32] = {
+const uint8_t USERINFO_ENCODE_SET[32] = {
   // 00     01     02     03     04     05     06     07
     0x01 | 0x02 | 0x04 | 0x08 | 0x10 | 0x20 | 0x40 | 0x80,
   // 08     09     0A     0B     0C     0D     0E     0F
@@ -413,7 +415,7 @@ static const uint8_t USERINFO_ENCODE_SET[32] = {
     0x01 | 0x02 | 0x04 | 0x08 | 0x10 | 0x20 | 0x40 | 0x80
 };
 
-static const uint8_t QUERY_ENCODE_SET[32] = {
+const uint8_t QUERY_ENCODE_SET[32] = {
   // 00     01     02     03     04     05     06     07
     0x01 | 0x02 | 0x04 | 0x08 | 0x10 | 0x20 | 0x40 | 0x80,
   // 08     09     0A     0B     0C     0D     0E     0F
@@ -480,15 +482,15 @@ static const uint8_t QUERY_ENCODE_SET[32] = {
     0x01 | 0x02 | 0x04 | 0x08 | 0x10 | 0x20 | 0x40 | 0x80
 };
 
-static inline bool BitAt(const uint8_t a[], const uint8_t i) {
+inline bool BitAt(const uint8_t a[], const uint8_t i) {
   return !!(a[i >> 3] & (1 << (i & 7)));
 }
 
 // Appends ch to str. If ch position in encode_set is set, the ch will
 // be percent-encoded then appended.
-static inline void AppendOrEscape(std::string* str,
-                                  const unsigned char ch,
-                                  const uint8_t encode_set[]) {
+inline void AppendOrEscape(std::string* str,
+                           const unsigned char ch,
+                           const uint8_t encode_set[]) {
   if (BitAt(encode_set, ch))
     *str += hex[ch];
   else
@@ -496,7 +498,7 @@ static inline void AppendOrEscape(std::string* str,
 }
 
 template <typename T>
-static inline unsigned hex2bin(const T ch) {
+inline unsigned hex2bin(const T ch) {
   if (ch >= '0' && ch <= '9')
     return ch - '0';
   if (ch >= 'A' && ch <= 'F')
@@ -544,7 +546,7 @@ inline std::string PercentDecode(const char* input, size_t len) {
   XX("ws:", 80)                                                               \
   XX("wss:", 443)
 
-static inline bool IsSpecial(std::string scheme) {
+inline bool IsSpecial(std::string scheme) {
 #define XX(name, _) if (scheme == name) return true;
   SPECIALS(XX);
 #undef XX
@@ -552,8 +554,7 @@ static inline bool IsSpecial(std::string scheme) {
 }
 
 // https://url.spec.whatwg.org/#start-with-a-windows-drive-letter
-static inline bool StartsWithWindowsDriveLetter(const char* p,
-                                                const char* end) {
+inline bool StartsWithWindowsDriveLetter(const char* p, const char* end) {
   const size_t length = end - p;
   return length >= 2 &&
     IsWindowsDriveLetter(p[0], p[1]) &&
@@ -564,7 +565,7 @@ static inline bool StartsWithWindowsDriveLetter(const char* p,
       p[2] == '#');
 }
 
-static inline int NormalizePort(std::string scheme, int p) {
+inline int NormalizePort(std::string scheme, int p) {
 #define XX(name, port) if (scheme == name && p == port) return -1;
   SPECIALS(XX);
 #undef XX
@@ -572,7 +573,7 @@ static inline int NormalizePort(std::string scheme, int p) {
 }
 
 #if defined(NODE_HAVE_I18N_SUPPORT)
-static inline bool ToUnicode(const std::string& input, std::string* output) {
+inline bool ToUnicode(const std::string& input, std::string* output) {
   MaybeStackBuffer<char> buf;
   if (i18n::ToUnicode(&buf, input.c_str(), input.length()) < 0)
     return false;
@@ -580,7 +581,7 @@ static inline bool ToUnicode(const std::string& input, std::string* output) {
   return true;
 }
 
-static inline bool ToASCII(const std::string& input, std::string* output) {
+inline bool ToASCII(const std::string& input, std::string* output) {
   MaybeStackBuffer<char> buf;
   if (i18n::ToASCII(&buf, input.c_str(), input.length()) < 0)
     return false;
@@ -589,20 +590,18 @@ static inline bool ToASCII(const std::string& input, std::string* output) {
 }
 #else
 // Intentional non-ops if ICU is not present.
-static inline bool ToUnicode(const std::string& input, std::string* output) {
+inline bool ToUnicode(const std::string& input, std::string* output) {
   *output = input;
   return true;
 }
 
-static inline bool ToASCII(const std::string& input, std::string* output) {
+inline bool ToASCII(const std::string& input, std::string* output) {
   *output = input;
   return true;
 }
 #endif
 
-static url_host_type ParseIPv6Host(url_host* host,
-                                   const char* input,
-                                   size_t length) {
+url_host_type ParseIPv6Host(url_host* host, const char* input, size_t length) {
   url_host_type type = HOST_TYPE_FAILED;
   for (unsigned n = 0; n < 8; n++)
     host->value.ipv6[n] = 0;
@@ -720,7 +719,7 @@ static url_host_type ParseIPv6Host(url_host* host,
   return type;
 }
 
-static inline int64_t ParseNumber(const char* start, const char* end) {
+inline int64_t ParseNumber(const char* start, const char* end) {
   unsigned R = 10;
   if (end - start >= 2 && start[0] == '0' && (start[1] | 0x20) == 'x') {
     start += 2;
@@ -755,9 +754,7 @@ static inline int64_t ParseNumber(const char* start, const char* end) {
   return strtoll(start, nullptr, R);
 }
 
-static url_host_type ParseIPv4Host(url_host* host,
-                                   const char* input,
-                                   size_t length) {
+url_host_type ParseIPv4Host(url_host* host, const char* input, size_t length) {
   url_host_type type = HOST_TYPE_DOMAIN;
   const char* pointer = input;
   const char* mark = input;
@@ -816,9 +813,9 @@ static url_host_type ParseIPv4Host(url_host* host,
   return type;
 }
 
-static url_host_type ParseOpaqueHost(url_host* host,
-                                     const char* input,
-                                     size_t length) {
+url_host_type ParseOpaqueHost(url_host* host,
+                              const char* input,
+                              size_t length) {
   url_host_type type = HOST_TYPE_OPAQUE;
   std::string output;
   output.reserve(length * 3);
@@ -838,11 +835,11 @@ static url_host_type ParseOpaqueHost(url_host* host,
   return type;
 }
 
-static url_host_type ParseHost(url_host* host,
-                               const char* input,
-                               size_t length,
-                               bool is_special,
-                               bool unicode = false) {
+url_host_type ParseHost(url_host* host,
+                        const char* input,
+                        size_t length,
+                        bool is_special,
+                        bool unicode = false) {
   url_host_type type = HOST_TYPE_FAILED;
   const char* pointer = input;
   std::string decoded;
@@ -895,7 +892,7 @@ static url_host_type ParseHost(url_host* host,
 // Locates the longest sequence of 0 segments in an IPv6 address
 // in order to use the :: compression when serializing
 template<typename T>
-static inline T* FindLongestZeroSequence(T* values, size_t len) {
+inline T* FindLongestZeroSequence(T* values, size_t len) {
   T* start = values;
   T* end = start + len;
   T* result = nullptr;
@@ -923,7 +920,7 @@ static inline T* FindLongestZeroSequence(T* values, size_t len) {
   return result;
 }
 
-static url_host_type WriteHost(const url_host* host, std::string* dest) {
+url_host_type WriteHost(const url_host* host, std::string* dest) {
   dest->clear();
   switch (host->type) {
     case HOST_TYPE_DOMAIN:
@@ -978,10 +975,10 @@ static url_host_type WriteHost(const url_host* host, std::string* dest) {
   return host->type;
 }
 
-static bool ParseHost(const std::string& input,
-                      std::string* output,
-                      bool is_special,
-                      bool unicode = false) {
+bool ParseHost(const std::string& input,
+               std::string* output,
+               bool is_special,
+               bool unicode = false) {
   if (input.length() == 0) {
     output->clear();
     return true;
@@ -994,9 +991,9 @@ static bool ParseHost(const std::string& input,
   return true;
 }
 
-static inline void Copy(Environment* env,
-                        Local<Array> ary,
-                        std::vector<std::string>* vec) {
+inline void Copy(Environment* env,
+                 Local<Array> ary,
+                 std::vector<std::string>* vec) {
   const int32_t len = ary->Length();
   if (len == 0)
     return;  // nothing to copy
@@ -1010,8 +1007,8 @@ static inline void Copy(Environment* env,
   }
 }
 
-static inline Local<Array> Copy(Environment* env,
-                                const std::vector<std::string>& vec) {
+inline Local<Array> Copy(Environment* env,
+                         const std::vector<std::string>& vec) {
   Isolate* isolate = env->isolate();
   Local<Array> ary = Array::New(isolate, vec.size());
   for (size_t n = 0; n < vec.size(); n++)
@@ -1019,9 +1016,9 @@ static inline Local<Array> Copy(Environment* env,
   return ary;
 }
 
-static inline void HarvestBase(Environment* env,
-                               struct url_data* base,
-                               Local<Object> base_obj) {
+inline void HarvestBase(Environment* env,
+                        struct url_data* base,
+                        Local<Object> base_obj) {
   Local<Context> context = env->context();
   Local<Value> flags = GET(env, base_obj, "flags");
   if (flags->IsInt32())
@@ -1045,9 +1042,9 @@ static inline void HarvestBase(Environment* env,
   }
 }
 
-static inline void HarvestContext(Environment* env,
-                                  struct url_data* context,
-                                  Local<Object> context_obj) {
+inline void HarvestContext(Environment* env,
+                           struct url_data* context,
+                           Local<Object> context_obj) {
   Local<Value> flags = GET(env, context_obj, "flags");
   if (flags->IsInt32()) {
     int32_t _flags = flags->Int32Value(env->context()).FromJust();
@@ -1090,7 +1087,7 @@ static inline void HarvestContext(Environment* env,
 }
 
 // Single dot segment can be ".", "%2e", or "%2E"
-static inline bool IsSingleDotSegment(const std::string& str) {
+inline bool IsSingleDotSegment(const std::string& str) {
   switch (str.size()) {
     case 1:
       return str == ".";
@@ -1106,7 +1103,7 @@ static inline bool IsSingleDotSegment(const std::string& str) {
 // Double dot segment can be:
 //   "..", ".%2e", ".%2E", "%2e.", "%2E.",
 //   "%2e%2e", "%2E%2E", "%2e%2E", or "%2E%2e"
-static inline bool IsDoubleDotSegment(const std::string& str) {
+inline bool IsDoubleDotSegment(const std::string& str) {
   switch (str.size()) {
     case 2:
       return str == "..";
@@ -1133,13 +1130,15 @@ static inline bool IsDoubleDotSegment(const std::string& str) {
   }
 }
 
-static inline void ShortenUrlPath(struct url_data* url) {
+inline void ShortenUrlPath(struct url_data* url) {
   if (url->path.empty()) return;
   if (url->path.size() == 1 && url->scheme == "file:" &&
       IsNormalizedWindowsDriveLetter(url->path[0])) return;
   url->path.pop_back();
 }
 
+}  // anonymous namespace
+
 void URL::Parse(const char* input,
                 size_t len,
                 enum url_parse_state state_override,

From 363ad6aeb319fbb23137f143fdb9483c74ab0ab0 Mon Sep 17 00:00:00 2001
From: Anna Henningsen <anna@addaleax.net>
Date: Tue, 5 Dec 2017 14:23:38 +0100
Subject: [PATCH 3/4] src: make url host a proper C++ class
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

- Gives `URLHost` a proper destructor that clears memory
  depending on the type of the host (This fixes a memory leak)
- Hide the host type enums and class layout as implementation details
- Make the `Parse` methods members of `URLHost`
- Turn `WriteHost` into a `ToString()` method on the `URLHost` class
- Verify that at the beginning of a parse attempt, the type is set
  to “failed”
- Remove a lot of `goto`s from the source code 🐢🚀

Fixes: https://github.com/nodejs/node/issues/17448
---
 src/node_url.cc | 269 ++++++++++++++++++++++++++----------------------
 1 file changed, 147 insertions(+), 122 deletions(-)

diff --git a/src/node_url.cc b/src/node_url.cc
index 21d8c810cb8216..1b90df3b92a98a 100644
--- a/src/node_url.cc
+++ b/src/node_url.cc
@@ -55,27 +55,71 @@ const char kEOL = -1;
 const char16_t kUnicodeReplacementCharacter = 0xFFFD;
 
 // https://url.spec.whatwg.org/#concept-host
-union url_host_value {
-  std::string domain;
-  uint32_t ipv4;
-  uint16_t ipv6[8];
-  std::string opaque;
-  ~url_host_value() {}
-};
+class URLHost {
+ public:
+  ~URLHost();
+
+  void ParseIPv4Host(const char* input, size_t length, bool* is_ipv4);
+  void ParseIPv6Host(const char* input, size_t length);
+  void ParseOpaqueHost(const char* input, size_t length);
+  void ParseHost(const char* input,
+                 size_t length,
+                 bool is_special,
+                 bool unicode = false);
+
+  inline bool ParsingFailed() const { return type_ == HostType::H_FAILED; }
+  std::string ToString() const;
+
+ private:
+  enum class HostType {
+    H_FAILED,
+    H_DOMAIN,
+    H_IPV4,
+    H_IPV6,
+    H_OPAQUE,
+  };
 
-enum url_host_type {
-  HOST_TYPE_FAILED = -1,
-  HOST_TYPE_DOMAIN = 0,
-  HOST_TYPE_IPV4 = 1,
-  HOST_TYPE_IPV6 = 2,
-  HOST_TYPE_OPAQUE = 3,
-};
+  union Value {
+    std::string domain;
+    uint32_t ipv4;
+    uint16_t ipv6[8];
+    std::string opaque;
+
+    ~Value() {}
+    Value() : ipv4(0) {}
+  };
 
-struct url_host {
-  url_host_value value;
-  enum url_host_type type;
+  Value value_;
+  HostType type_ = HostType::H_FAILED;
+
+  // Setting the string members of the union with = is brittle because
+  // it relies on them being initialized to a state that requires no
+  // destruction of old data.
+  // For a long time, that worked well enough because ParseIPv6Host() happens
+  // to zero-fill `value_`, but that really is relying on standard library
+  // internals too much.
+  // These helpers are the easiest solution but we might want to consider
+  // just not forcing strings into an union.
+  inline void SetOpaque(std::string&& string) {
+    type_ = HostType::H_OPAQUE;
+    new(&value_.opaque) std::string(std::move(string));
+  }
+
+  inline void SetDomain(std::string&& string) {
+    type_ = HostType::H_DOMAIN;
+    new(&value_.domain) std::string(std::move(string));
+  }
 };
 
+URLHost::~URLHost() {
+  using string = std::string;
+  switch (type_) {
+    case HostType::H_DOMAIN: value_.domain.~string(); break;
+    case HostType::H_OPAQUE: value_.opaque.~string(); break;
+    default: break;
+  }
+}
+
 #define ARGS(XX)                                                              \
   XX(ARG_FLAGS)                                                               \
   XX(ARG_PROTOCOL)                                                            \
@@ -601,11 +645,11 @@ inline bool ToASCII(const std::string& input, std::string* output) {
 }
 #endif
 
-url_host_type ParseIPv6Host(url_host* host, const char* input, size_t length) {
-  url_host_type type = HOST_TYPE_FAILED;
+void URLHost::ParseIPv6Host(const char* input, size_t length) {
+  CHECK_EQ(type_, HostType::H_FAILED);
   for (unsigned n = 0; n < 8; n++)
-    host->value.ipv6[n] = 0;
-  uint16_t* piece_pointer = &host->value.ipv6[0];
+    value_.ipv6[n] = 0;
+  uint16_t* piece_pointer = &value_.ipv6[0];
   uint16_t* last_piece = piece_pointer + 8;
   uint16_t* compress_pointer = nullptr;
   const char* pointer = input;
@@ -614,7 +658,7 @@ url_host_type ParseIPv6Host(url_host* host, const char* input, size_t length) {
   char ch = pointer < end ? pointer[0] : kEOL;
   if (ch == ':') {
     if (length < 2 || pointer[1] != ':')
-      goto end;
+      return;
     pointer += 2;
     ch = pointer < end ? pointer[0] : kEOL;
     piece_pointer++;
@@ -622,10 +666,10 @@ url_host_type ParseIPv6Host(url_host* host, const char* input, size_t length) {
   }
   while (ch != kEOL) {
     if (piece_pointer > last_piece)
-      goto end;
+      return;
     if (ch == ':') {
       if (compress_pointer != nullptr)
-        goto end;
+        return;
       pointer++;
       ch = pointer < end ? pointer[0] : kEOL;
       piece_pointer++;
@@ -643,11 +687,11 @@ url_host_type ParseIPv6Host(url_host* host, const char* input, size_t length) {
     switch (ch) {
       case '.':
         if (len == 0)
-          goto end;
+          return;
         pointer -= len;
         ch = pointer < end ? pointer[0] : kEOL;
         if (piece_pointer > last_piece - 2)
-          goto end;
+          return;
         numbers_seen = 0;
         while (ch != kEOL) {
           value = 0xffffffff;
@@ -656,22 +700,22 @@ url_host_type ParseIPv6Host(url_host* host, const char* input, size_t length) {
               pointer++;
               ch = pointer < end ? pointer[0] : kEOL;
             } else {
-              goto end;
+              return;
             }
           }
           if (!IsASCIIDigit(ch))
-            goto end;
+            return;
           while (IsASCIIDigit(ch)) {
             unsigned number = ch - '0';
             if (value == 0xffffffff) {
               value = number;
             } else if (value == 0) {
-              goto end;
+              return;
             } else {
               value = value * 10 + number;
             }
             if (value > 255)
-              goto end;
+              return;
             pointer++;
             ch = pointer < end ? pointer[0] : kEOL;
           }
@@ -681,18 +725,18 @@ url_host_type ParseIPv6Host(url_host* host, const char* input, size_t length) {
             piece_pointer++;
         }
         if (numbers_seen != 4)
-          goto end;
+          return;
         continue;
       case ':':
         pointer++;
         ch = pointer < end ? pointer[0] : kEOL;
         if (ch == kEOL)
-          goto end;
+          return;
         break;
       case kEOL:
         break;
       default:
-        goto end;
+        return;
     }
     *piece_pointer = value;
     piece_pointer++;
@@ -701,7 +745,7 @@ url_host_type ParseIPv6Host(url_host* host, const char* input, size_t length) {
   if (compress_pointer != nullptr) {
     swaps = piece_pointer - compress_pointer;
     piece_pointer = last_piece - 1;
-    while (piece_pointer != &host->value.ipv6[0] && swaps > 0) {
+    while (piece_pointer != &value_.ipv6[0] && swaps > 0) {
       uint16_t temp = *piece_pointer;
       uint16_t* swap_piece = compress_pointer + swaps - 1;
       *piece_pointer = *swap_piece;
@@ -711,12 +755,9 @@ url_host_type ParseIPv6Host(url_host* host, const char* input, size_t length) {
     }
   } else if (compress_pointer == nullptr &&
              piece_pointer != last_piece) {
-    goto end;
+    return;
   }
-  type = HOST_TYPE_IPV6;
- end:
-  host->type = type;
-  return type;
+  type_ = HostType::H_IPV6;
 }
 
 inline int64_t ParseNumber(const char* start, const char* end) {
@@ -754,8 +795,9 @@ inline int64_t ParseNumber(const char* start, const char* end) {
   return strtoll(start, nullptr, R);
 }
 
-url_host_type ParseIPv4Host(url_host* host, const char* input, size_t length) {
-  url_host_type type = HOST_TYPE_DOMAIN;
+void URLHost::ParseIPv4Host(const char* input, size_t length, bool* is_ipv4) {
+  CHECK_EQ(type_, HostType::H_FAILED);
+  *is_ipv4 = false;
   const char* pointer = input;
   const char* mark = input;
   const char* end = pointer + length;
@@ -764,19 +806,19 @@ url_host_type ParseIPv4Host(url_host* host, const char* input, size_t length) {
   uint64_t numbers[4];
   int tooBigNumbers = 0;
   if (length == 0)
-    goto end;
+    return;
 
   while (pointer <= end) {
     const char ch = pointer < end ? pointer[0] : kEOL;
     const int remaining = end - pointer - 1;
     if (ch == '.' || ch == kEOL) {
       if (++parts > 4)
-        goto end;
+        return;
       if (pointer == mark)
-        goto end;
+        return;
       int64_t n = ParseNumber(mark, pointer);
       if (n < 0)
-        goto end;
+        return;
 
       if (n > 255) {
         tooBigNumbers++;
@@ -789,6 +831,7 @@ url_host_type ParseIPv4Host(url_host* host, const char* input, size_t length) {
     pointer++;
   }
   CHECK_GT(parts, 0);
+  *is_ipv4 = true;
 
   // If any but the last item in numbers is greater than 255, return failure.
   // If the last item in numbers is greater than or equal to
@@ -796,97 +839,81 @@ url_host_type ParseIPv4Host(url_host* host, const char* input, size_t length) {
   if (tooBigNumbers > 1 ||
       (tooBigNumbers == 1 && numbers[parts - 1] <= 255) ||
       numbers[parts - 1] >= pow(256, static_cast<double>(5 - parts))) {
-    type = HOST_TYPE_FAILED;
-    goto end;
+    return;
   }
 
-  type = HOST_TYPE_IPV4;
+  type_ = HostType::H_IPV4;
   val = numbers[parts - 1];
   for (int n = 0; n < parts - 1; n++) {
     double b = 3 - n;
     val += numbers[n] * pow(256, b);
   }
 
-  host->value.ipv4 = val;
- end:
-  host->type = type;
-  return type;
+  value_.ipv4 = val;
 }
 
-url_host_type ParseOpaqueHost(url_host* host,
-                              const char* input,
-                              size_t length) {
-  url_host_type type = HOST_TYPE_OPAQUE;
+void URLHost::ParseOpaqueHost(const char* input, size_t length) {
+  CHECK_EQ(type_, HostType::H_FAILED);
   std::string output;
   output.reserve(length * 3);
   for (size_t i = 0; i < length; i++) {
     const char ch = input[i];
     if (ch != '%' && IsForbiddenHostCodePoint(ch)) {
-      type = HOST_TYPE_FAILED;
-      goto end;
+      return;
     } else {
       AppendOrEscape(&output, ch, C0_CONTROL_ENCODE_SET);
     }
   }
 
-  host->value.opaque = output;
- end:
-  host->type = type;
-  return type;
+  SetOpaque(std::move(output));
 }
 
-url_host_type ParseHost(url_host* host,
-                        const char* input,
+void URLHost::ParseHost(const char* input,
                         size_t length,
                         bool is_special,
-                        bool unicode = false) {
-  url_host_type type = HOST_TYPE_FAILED;
+                        bool unicode) {
+  CHECK_EQ(type_, HostType::H_FAILED);
   const char* pointer = input;
-  std::string decoded;
 
   if (length == 0)
-    goto end;
+    return;
 
   if (pointer[0] == '[') {
     if (pointer[length - 1] != ']')
-      goto end;
-    return ParseIPv6Host(host, ++pointer, length - 2);
+      return;
+    return ParseIPv6Host(++pointer, length - 2);
   }
 
   if (!is_special)
-    return ParseOpaqueHost(host, input, length);
+    return ParseOpaqueHost(input, length);
 
   // First, we have to percent decode
-  decoded = PercentDecode(input, length);
+  std::string decoded = PercentDecode(input, length);
 
   // Then we have to punycode toASCII
   if (!ToASCII(decoded, &decoded))
-    goto end;
+    return;
 
   // If any of the following characters are still present, we have to fail
   for (size_t n = 0; n < decoded.size(); n++) {
     const char ch = decoded[n];
     if (IsForbiddenHostCodePoint(ch)) {
-      goto end;
+      return;
     }
   }
 
   // Check to see if it's an IPv4 IP address
-  type = ParseIPv4Host(host, decoded.c_str(), decoded.length());
-  if (type == HOST_TYPE_IPV4 || type == HOST_TYPE_FAILED)
-    goto end;
+  bool is_ipv4;
+  ParseIPv4Host(decoded.c_str(), decoded.length(), &is_ipv4);
+  if (is_ipv4)
+    return;
 
   // If the unicode flag is set, run the result through punycode ToUnicode
   if (unicode && !ToUnicode(decoded, &decoded))
-    goto end;
+    return;
 
   // It's not an IPv4 or IPv6 address, it must be a domain
-  type = HOST_TYPE_DOMAIN;
-  host->value.domain = decoded;
-
- end:
-  host->type = type;
-  return type;
+  SetDomain(std::move(decoded));
 }
 
 // Locates the longest sequence of 0 segments in an IPv6 address
@@ -920,59 +947,59 @@ inline T* FindLongestZeroSequence(T* values, size_t len) {
   return result;
 }
 
-url_host_type WriteHost(const url_host* host, std::string* dest) {
-  dest->clear();
-  switch (host->type) {
-    case HOST_TYPE_DOMAIN:
-      *dest = host->value.domain;
+std::string URLHost::ToString() const {
+  std::string dest;
+  switch (type_) {
+    case HostType::H_DOMAIN:
+      return value_.domain;
+      break;
+    case HostType::H_OPAQUE:
+      return value_.opaque;
       break;
-    case HOST_TYPE_IPV4: {
-      dest->reserve(15);
-      uint32_t value = host->value.ipv4;
+    case HostType::H_IPV4: {
+      dest.reserve(15);
+      uint32_t value = value_.ipv4;
       for (int n = 0; n < 4; n++) {
         char buf[4];
         snprintf(buf, sizeof(buf), "%d", value % 256);
-        dest->insert(0, buf);
+        dest.insert(0, buf);
         if (n < 3)
-          dest->insert(0, 1, '.');
+          dest.insert(0, 1, '.');
         value /= 256;
       }
       break;
     }
-    case HOST_TYPE_IPV6: {
-      dest->reserve(41);
-      *dest+= '[';
-      const uint16_t* start = &host->value.ipv6[0];
+    case HostType::H_IPV6: {
+      dest.reserve(41);
+      dest += '[';
+      const uint16_t* start = &value_.ipv6[0];
       const uint16_t* compress_pointer =
           FindLongestZeroSequence(start, 8);
       bool ignore0 = false;
       for (int n = 0; n <= 7; n++) {
-        const uint16_t* piece = &host->value.ipv6[n];
+        const uint16_t* piece = &value_.ipv6[n];
         if (ignore0 && *piece == 0)
           continue;
         else if (ignore0)
           ignore0 = false;
         if (compress_pointer == piece) {
-          *dest += n == 0 ? "::" : ":";
+          dest += n == 0 ? "::" : ":";
           ignore0 = true;
           continue;
         }
         char buf[5];
         snprintf(buf, sizeof(buf), "%x", *piece);
-        *dest += buf;
+        dest += buf;
         if (n < 7)
-          *dest += ':';
+          dest += ':';
       }
-      *dest += ']';
+      dest += ']';
       break;
     }
-    case HOST_TYPE_OPAQUE:
-      *dest = host->value.opaque;
-      break;
-    case HOST_TYPE_FAILED:
+    case HostType::H_FAILED:
       break;
   }
-  return host->type;
+  return dest;
 }
 
 bool ParseHost(const std::string& input,
@@ -983,11 +1010,11 @@ bool ParseHost(const std::string& input,
     output->clear();
     return true;
   }
-  url_host host{{""}, HOST_TYPE_DOMAIN};
-  ParseHost(&host, input.c_str(), input.length(), is_special, unicode);
-  if (host.type == HOST_TYPE_FAILED)
+  URLHost host;
+  host.ParseHost(input.c_str(), input.length(), is_special, unicode);
+  if (host.ParsingFailed())
     return false;
-  WriteHost(&host, output);
+  *output = host.ToString();
   return true;
 }
 
@@ -2043,15 +2070,14 @@ static void DomainToASCII(const FunctionCallbackInfo<Value>& args) {
   CHECK(args[0]->IsString());
   Utf8Value value(env->isolate(), args[0]);
 
-  url_host host{{""}, HOST_TYPE_DOMAIN};
+  URLHost host;
   // Assuming the host is used for a special scheme.
-  ParseHost(&host, *value, value.length(), true);
-  if (host.type == HOST_TYPE_FAILED) {
+  host.ParseHost(*value, value.length(), true);
+  if (host.ParsingFailed()) {
     args.GetReturnValue().Set(FIXED_ONE_BYTE_STRING(env->isolate(), ""));
     return;
   }
-  std::string out;
-  WriteHost(&host, &out);
+  std::string out = host.ToString();
   args.GetReturnValue().Set(
       String::NewFromUtf8(env->isolate(),
                           out.c_str(),
@@ -2064,15 +2090,14 @@ static void DomainToUnicode(const FunctionCallbackInfo<Value>& args) {
   CHECK(args[0]->IsString());
   Utf8Value value(env->isolate(), args[0]);
 
-  url_host host{{""}, HOST_TYPE_DOMAIN};
+  URLHost host;
   // Assuming the host is used for a special scheme.
-  ParseHost(&host, *value, value.length(), true, true);
-  if (host.type == HOST_TYPE_FAILED) {
+  host.ParseHost(*value, value.length(), true, true);
+  if (host.ParsingFailed()) {
     args.GetReturnValue().Set(FIXED_ONE_BYTE_STRING(env->isolate(), ""));
     return;
   }
-  std::string out;
-  WriteHost(&host, &out);
+  std::string out = host.ToString();
   args.GetReturnValue().Set(
       String::NewFromUtf8(env->isolate(),
                           out.c_str(),

From c8bef2df4ba5a10e4ee7659352a9b50e72d1773a Mon Sep 17 00:00:00 2001
From: Anna Henningsen <anna@addaleax.net>
Date: Tue, 5 Dec 2017 19:40:16 +0100
Subject: [PATCH 4/4] src: use correct OOB check for IPv6 parsing
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

`last_piece` pointed to the end of the 8×16 bit array,
so `piece_pointer == last_piece` already means that the pointer
is not writable any longer.

Previously, this still worked most of the time but could
result in an out-of-bounds-write.

Also, rename `last_piece` to `buffer_end` to avoid this pitfall.
---
 src/node_url.cc | 10 +++++-----
 1 file changed, 5 insertions(+), 5 deletions(-)

diff --git a/src/node_url.cc b/src/node_url.cc
index 1b90df3b92a98a..578c73c7f03cf6 100644
--- a/src/node_url.cc
+++ b/src/node_url.cc
@@ -650,7 +650,7 @@ void URLHost::ParseIPv6Host(const char* input, size_t length) {
   for (unsigned n = 0; n < 8; n++)
     value_.ipv6[n] = 0;
   uint16_t* piece_pointer = &value_.ipv6[0];
-  uint16_t* last_piece = piece_pointer + 8;
+  uint16_t* const buffer_end = piece_pointer + 8;
   uint16_t* compress_pointer = nullptr;
   const char* pointer = input;
   const char* end = pointer + length;
@@ -665,7 +665,7 @@ void URLHost::ParseIPv6Host(const char* input, size_t length) {
     compress_pointer = piece_pointer;
   }
   while (ch != kEOL) {
-    if (piece_pointer > last_piece)
+    if (piece_pointer >= buffer_end)
       return;
     if (ch == ':') {
       if (compress_pointer != nullptr)
@@ -690,7 +690,7 @@ void URLHost::ParseIPv6Host(const char* input, size_t length) {
           return;
         pointer -= len;
         ch = pointer < end ? pointer[0] : kEOL;
-        if (piece_pointer > last_piece - 2)
+        if (piece_pointer > buffer_end - 2)
           return;
         numbers_seen = 0;
         while (ch != kEOL) {
@@ -744,7 +744,7 @@ void URLHost::ParseIPv6Host(const char* input, size_t length) {
 
   if (compress_pointer != nullptr) {
     swaps = piece_pointer - compress_pointer;
-    piece_pointer = last_piece - 1;
+    piece_pointer = buffer_end - 1;
     while (piece_pointer != &value_.ipv6[0] && swaps > 0) {
       uint16_t temp = *piece_pointer;
       uint16_t* swap_piece = compress_pointer + swaps - 1;
@@ -754,7 +754,7 @@ void URLHost::ParseIPv6Host(const char* input, size_t length) {
        swaps--;
     }
   } else if (compress_pointer == nullptr &&
-             piece_pointer != last_piece) {
+             piece_pointer != buffer_end) {
     return;
   }
   type_ = HostType::H_IPV6;