diff --git a/parseutil/normalize.go b/parseutil/normalize.go new file mode 100644 index 0000000..879bcfc --- /dev/null +++ b/parseutil/normalize.go @@ -0,0 +1,121 @@ +// Copyright (c) HashiCorp, Inc. +// SPDX-License-Identifier: MPL-2.0 + +package parseutil + +import ( + "fmt" + "net" + "net/url" + "strings" +) + +// general delimiters as defined in RFC-3986 §2.2 +// See: https://www.rfc-editor.org/rfc/rfc3986#section-2.2 +const genDelims = ":/?#[]@" + +func normalizeHostPort(host string, port string) (string, error) { + if host == "" { + return "", fmt.Errorf("empty hostname") + } + if ip := net.ParseIP(host); ip != nil { + if ip.To4() == nil && ip.To16() != nil && port == "" { + // this is a unique case, host is ipv6 and requires brackets due to + // being part of a url, but they won't be added by net.JoinHostPort + // as there is no port + // See: https://www.rfc-editor.org/rfc/rfc3986#section-3.2.2 + return "[" + ip.String() + "]", nil + } + host = ip.String() + } else if strings.Contains(host, ":") { + // host is an invalid ipv6 literal. + // hosts cannot contain certain reserved characters, including ":" + // See: https://www.rfc-editor.org/rfc/rfc3986#section-2.2, + // https://www.rfc-editor.org/rfc/rfc3986#section-3.2.2 + return "", fmt.Errorf("host contains an invalid IPv6 literal") + } + if port == "" { + return host, nil + } + return net.JoinHostPort(host, port), nil +} + +func parseUrl(addr string) (string, error) { + if u, err := url.Parse(addr); err == nil { + if strings.HasSuffix(u.Host, ":") { + return "", fmt.Errorf("url has malformed host: missing port value after colon") + } + if u.Host, err = normalizeHostPort(u.Hostname(), u.Port()); err != nil { + return "", err + } + return u.String(), nil + } + return "", fmt.Errorf("failed to parse address") +} + +// NormalizeAddr takes an address as a string and returns a normalized copy. +// If the address is a URL, IP Address, or host:port address that includes an +// IPv6 address, the normalized copy will be conformant with RFC-5952 §4. If +// the address cannot be parsed, an error will be returned. +// +// There are two valid formats: +// +// - hosts: "host" +// - may be any of: IPv6 literal, IPv4 literal, dns name, or [sub]domain name +// - IPv6 literals cannot be encapsulated within square brackets in this format +// +// - URIs: "[scheme://] [user@] host [:port] [/path] [?query] [#frag]" +// - format should conform with RFC-3986 §3 or else the returned address may +// be parsed and formatted incorrectly +// - hosts containing IPv6 literals MUST be encapsulated within square brackets, +// as defined in RFC-3986 §3.2.2 and RFC-5952 §6 +// - all non-host components are optional +// +// See: +// - https://www.rfc-editor.org/rfc/rfc5952 +// - https://www.rfc-editor.org/rfc/rfc3986 +func NormalizeAddr(address string) (string, error) { + if address == "" { + return "", fmt.Errorf("empty address") + } + + if strings.HasPrefix(address, "[") && strings.HasSuffix(address, "]") { + return "", fmt.Errorf("address cannot be encapsulated by brackets") + } + + if ip := net.ParseIP(address); ip != nil { + return ip.String(), nil + } + + // if the provided address does not have a scheme provided, attempt to + // provide one and re-parse the result. this is done by looking for the + // first general delimiter and checking if it exists or if it's not a colon + // or by subsequently checking if the first character of the address is a + // letter or a colon or if the colon is part of "://" + // See: https://www.rfc-editor.org/rfc/rfc3986#section-3 + // + // though the first character being a colon is not mentioned in the scheme + // spec, we check for it as url.Parse will read certain invalid ipv6 + // addresses as valid urls, and we want to avoid that + idx := strings.IndexAny(address, genDelims) + switch { + case idx < 0: + fallthrough + case address[idx] != ':': + fallthrough + // by this point we already know that idx > 0 and that address[idx] == ':' + case idx > 1 && !strings.HasPrefix(address[idx:], "://"): + const scheme = "default://" + // attempt to parse it as a url. we only want to try this func when we + // know for sure it has a scheme, since it will parse ANYTHING, but + // just put it into u.Path when called without the scheme + u, err := parseUrl(scheme + address) + if err != nil { + return "", err + } + return strings.TrimPrefix(u, scheme), nil + + default: + return parseUrl(address) + } +} diff --git a/parseutil/normalize_test.go b/parseutil/normalize_test.go new file mode 100644 index 0000000..b8a45a1 --- /dev/null +++ b/parseutil/normalize_test.go @@ -0,0 +1,508 @@ +// Copyright (c) HashiCorp, Inc. +// SPDX-License-Identifier: MPL-2.0 + +package parseutil + +import ( + "testing" + + "github.com/stretchr/testify/assert" + "github.com/stretchr/testify/require" +) + +func Test_NormalizeAddr(t *testing.T) { + t.Parallel() + + tests := []struct { + name string + address string + expected string + err string + }{ + { + name: "valid ipv4 address", + address: "127.0.0.1", + expected: "127.0.0.1", + }, + { + name: "valid ipv4 address with port", + address: "127.0.0.1:80", + expected: "127.0.0.1:80", + }, + { + name: "valid ipv4 address with port and path", + address: "127.0.0.1:80/test/path", + expected: "127.0.0.1:80/test/path", + }, + { + name: "valid ipv4 address with path", + address: "127.0.0.1/test/path", + expected: "127.0.0.1/test/path", + }, + { + name: "valid ipv4 uri with path", + address: "http://127.0.0.1/test/path", + expected: "http://127.0.0.1/test/path", + }, + { + name: "valid ipv4 uri with port and path", + address: "http://127.0.0.1:80/test/path", + expected: "http://127.0.0.1:80/test/path", + }, + { + name: "valid double colon address", + address: "::", + expected: "::", + }, + { + name: "valid ipv6 localhost address", + address: "::1", + expected: "::1", + }, + { + name: "valid ipv6 literal", + address: "2001:BEEF:0:0:0:1:0:0001", + expected: "2001:beef::1:0:1", + }, + { + name: "valid ipv6 literal with brackets", + address: "[2001:BEEF:0:0:0:1:0:0001]", + err: "address cannot be encapsulated by brackets", + }, + { + name: "valid ipv6 host:port", + address: "[2001:BEEF:0:0:0:1:0:0001]:80", + expected: "[2001:beef::1:0:1]:80", + }, + { + name: "valid ipv6 uri", + address: "https://[2001:BEEF:0:0:0:1:0:0001]", + expected: "https://[2001:beef::1:0:1]", + }, + { + name: "valid ipv6 uri with path", + address: "https://[2001:BEEF:0:0:0:1:0:0001]/test/path", + expected: "https://[2001:beef::1:0:1]/test/path", + }, + { + name: "valid ipv6 uri with port", + address: "https://[2001:BEEF:0:0:0:1:0:0001]:80", + expected: "https://[2001:beef::1:0:1]:80", + }, + { + name: "invalid ipv6 uri missing closing bracket", + address: "https://[2001:BEEF:0:0:0:1:0:0001", + err: "failed to parse address", + }, + { + name: "invalid ipv6 uri missing brackets", + address: "https://2001:BEEF:0:0:0:1:0:0001", + err: "host contains an invalid IPv6 literal", + }, + { + name: "invalid ipv6 literal", + address: ":0:", + err: "failed to parse address", + }, + { + name: "invalid ipv6 literal", + address: "::0:", + err: "failed to parse address", + }, + { + name: "invalid ipv6, not enough segments", + address: "2001:BEEF:0:0:1:0:0001", + err: "host contains an invalid IPv6 literal", + }, + { + name: "invalid ipv6 host:port, not enough segments", + address: "[2001:BEEF:0:0:1:0:0001]:80", + err: "host contains an invalid IPv6 literal", + }, + { + name: "invalid ipv6 literal with brackets, not enough segments", + address: "[2001:BEEF:0:0:1:0:0001]", + err: "address cannot be encapsulated by brackets", + }, + { + name: "invalid ipv6 uri, not enough segments", + address: "https://[2001:BEEF:0:0:1:0:0001]:80", + err: "host contains an invalid IPv6 literal", + }, + { + name: "invalid ipv6 uri withut port, not enough segments", + address: "https://[2001:BEEF:0:0:1:0:0001]", + err: "host contains an invalid IPv6 literal", + }, + { + name: "invalid ipv6, it's just brackets", + address: "[]", + err: "address cannot be encapsulated by brackets", + }, + { + name: "invalid address, empty", + address: "", + err: "empty address", + }, + { + name: "valid url with domain", + address: "https://www.google.com", + expected: "https://www.google.com", + }, + { + name: "valid url with domain and port", + address: "https://www.google.com:443", + expected: "https://www.google.com:443", + }, + { + name: "valid host with only sub domain", + address: "www.google.com", + expected: "www.google.com", + }, + { + name: "valid host:port with sub domain and port", + address: "www.google.com:443", + expected: "www.google.com:443", + }, + { + name: "valid host with only domain", + address: "google.com", + expected: "google.com", + }, + { + name: "valid host:port with domain and port", + address: "google.com:443", + expected: "google.com:443", + }, + { + name: "valid host with only dns name", + address: "hashicorp", + expected: "hashicorp", + }, + { + name: "valid host:port with dns name and port", + address: "hashicorp:443", + expected: "hashicorp:443", + }, + { + name: "invalid host with only dns name", + address: "hashi corp", + err: "failed to parse address", + }, + { + name: "valid url with path, schema, and subdomain", + address: "https://www.google.com/search?client=firefox-b-1-d&q=hey#section-1.2.3", + expected: "https://www.google.com/search?client=firefox-b-1-d&q=hey#section-1.2.3", + }, + { + name: "valid url with path but without schema or subdomain", + address: "google.com/search?client=firefox-b-1-d&q=hey#section-1.2.3", + expected: "google.com/search?client=firefox-b-1-d&q=hey#section-1.2.3", + }, + { + name: "valid uri with hostname and path", + address: "hashicorp/test/path?query=some&extra=data#section-1.2.3", + expected: "hashicorp/test/path?query=some&extra=data#section-1.2.3", + }, + { + name: "valid uri with crazy chars in query", + address: "hashicorp/test/path?I think actually anything can be past here !@#$^&*()[:]{;}", + expected: "hashicorp/test/path?I think actually anything can be past here !@#$%5E&*()%5B:%5D%7B;%7D", + }, + { + name: "valid uri with pre-encoded components", + address: "hashicorp/test/path?!@#$%5E&*()%5B:%5D%7B;%7D", + expected: "hashicorp/test/path?!@#$%5E&*()%5B:%5D%7B;%7D", + }, + { + name: "valid uri with crazy chars in path", + // note the lack of % as that would need to be encoded already + address: "hashicorp/test/path/ !@$^&*()[:]{;}", + expected: "hashicorp/test/path/%20%21@$%5E&%2A%28%29%5B:%5D%7B;%7D", + }, + { + name: "invalid uri with invalid percent encoding", + address: "hashicorp/test/path?!@#$%^&*()[:]{;}", + err: "failed to parse address", + }, + { + name: "invalid uri with invalid percent encoding", + address: "hashicorp/test/path?%^&", + // since there is nothing that needs to be encoded in this url, + // url.Parse does not detect that `%^&` in an invalid url encoding + expected: "hashicorp/test/path?%^&", // sad + }, + { + name: "valid uri without schema, with ipv6", + address: "[2001:BEEF:0:0:0:1:0:0001]/test/path", + expected: "[2001:beef::1:0:1]/test/path", + }, + { + name: "valid host with user", + address: "dani@localhost", + expected: "dani@localhost", + }, + { + name: "valid uri no closing slash with frag", + address: "[2001:BEEF:0:0:0:1:0:0001]#test", + expected: "[2001:beef::1:0:1]#test", + }, + { + name: "valid uri with scheme, no closing slash, with frag", + address: "https://[2001:BEEF:0:0:0:1:0:0001]#test", + expected: "https://[2001:beef::1:0:1]#test", + }, + { + name: "valid ldap url with a bunch of data", + address: "ldap://ds.example.com:389/dc=example,dc=com?givenName,sn,cn?sub?(uid=john.doe)#extra", + expected: "ldap://ds.example.com:389/dc=example,dc=com?givenName,sn,cn?sub?(uid=john.doe)#extra", + }, + { + name: "valid ldap url with IPv6 address, port, and data", + address: "ldap://[2001:BEEF:0:0:0:1:0:0001]:389/dc=example,dc=com?givenName,sn,cn?sub?(uid=john.doe)#extra", + expected: "ldap://[2001:beef::1:0:1]:389/dc=example,dc=com?givenName,sn,cn?sub?(uid=john.doe)#extra", + }, + { + name: "valid ldap url with IPv6 address and data", + address: "ldap://[2001:BEEF:0:0:0:1:0:0001]/dc=example,dc=com?givenName,sn,cn?sub?(uid=john.doe)#extra", + expected: "ldap://[2001:beef::1:0:1]/dc=example,dc=com?givenName,sn,cn?sub?(uid=john.doe)#extra", + }, + { + name: "valid ldap url with IPv4 address, port, and data", + address: "ldap://127.0.0.1:389/dc=example,dc=com?givenName,sn,cn?sub?(uid=john.doe)#extra", + expected: "ldap://127.0.0.1:389/dc=example,dc=com?givenName,sn,cn?sub?(uid=john.doe)#extra", + }, + { + name: "valid ldap url with IPv4 address and data", + address: "ldap://127.0.0.1/dc=example,dc=com?givenName,sn,cn?sub?(uid=john.doe)#extra", + expected: "ldap://127.0.0.1/dc=example,dc=com?givenName,sn,cn?sub?(uid=john.doe)#extra", + }, + { + name: "valid ldap url with IPv6 address, no slash, and query data", + address: "ldap://[2001:BEEF:0:0:0:1:0:0001]:389?givenName,sn,cn?sub?(uid=john.doe)#extra", + expected: "ldap://[2001:beef::1:0:1]:389?givenName,sn,cn?sub?(uid=john.doe)#extra", + }, + { + name: "valid ldap url with IPv6 address, no slash, and frag data", + address: "ldap://[2001:BEEF:0:0:0:1:0:0001]:389#extra", + expected: "ldap://[2001:beef::1:0:1]:389#extra", + }, + { + name: "valid url with no scheme, ipv4 host, port address, and colon after port", + address: "127.0.0.1:80/test/path:123", + expected: "127.0.0.1:80/test/path:123", + }, + { + name: "valid url with no scheme, ipv6 host, port address, and colon after port", + address: "[2001:BEEF:0:0:0:1:0:0001]:80/test/path:123", + expected: "[2001:beef::1:0:1]:80/test/path:123", + }, + { + name: "anything other than numbers in port", + address: "abc:gh", + err: "failed to parse address", + }, + { + name: "invalid ipv4 host:port, host contains colon but no port", + address: "127.0.0.1:", + err: "url has malformed host: missing port value after colon", + }, + { + name: "invalid ipv6 host:port, host contains colon but no port", + address: "[2001:4860:4860::8888]:", + err: "url has malformed host: missing port value after colon", + }, + + // imported from vault + { + name: "hostname", + address: "vaultproject.io", + expected: "vaultproject.io", + }, + { + name: "hostname port", + address: "vaultproject.io:8200", + expected: "vaultproject.io:8200", + }, + { + name: "hostname URL", + address: "https://vaultproject.io", + expected: "https://vaultproject.io", + }, + { + name: "hostname port URL", + address: "https://vaultproject.io:8200", + expected: "https://vaultproject.io:8200", + }, + { + name: "hostname destination address", + address: "user@vaultproject.io", + expected: "user@vaultproject.io", + }, + { + name: "hostname destination address URL", + address: "http://user@vaultproject.io", + expected: "http://user@vaultproject.io", + }, + { + name: "hostname destination address URL port", + address: "http://user@vaultproject.io:8200", + expected: "http://user@vaultproject.io:8200", + }, + { + name: "ipv4", + address: "10.10.1.10", + expected: "10.10.1.10", + }, + { + name: "ipv4 IP:Port addr", + address: "10.10.1.10:8500", + expected: "10.10.1.10:8500", + }, + { + name: "ipv4 invalid IP:Port addr", + address: "[10.10.1.10]:8500", + expected: "10.10.1.10:8500", + }, + { + name: "ipv4 URL", + address: "https://10.10.1.10:8200", + expected: "https://10.10.1.10:8200", + }, + { + name: "ipv4 invalid URL", + address: "https://[10.10.1.10]:8200", + expected: "https://10.10.1.10:8200", + }, + { + name: "ipv4 destination address", + address: "username@10.10.1.10", + expected: "username@10.10.1.10", + }, + { + name: "ipv4 invalid destination address", + address: "username@10.10.1.10", + expected: "username@10.10.1.10", + }, + { + name: "ipv4 destination address port", + address: "username@10.10.1.10:8200", + expected: "username@10.10.1.10:8200", + }, + { + name: "ipv4 invalid destination address port", + address: "username@[10.10.1.10]:8200", + expected: "username@10.10.1.10:8200", + }, + { + name: "ipv4 destination address URL", + address: "https://username@10.10.1.10", + expected: "https://username@10.10.1.10", + }, + { + name: "ipv4 destination address URL port", + address: "https://username@10.10.1.10:8200", + expected: "https://username@10.10.1.10:8200", + }, + { + name: "ipv6 IP:Port RFC-5952 4.1 conformance leading zeroes", + address: "[2001:0db8::0001]:8500", + expected: "[2001:db8::1]:8500", + }, + { + name: "ipv6 RFC-5952 4.1 conformance leading zeroes", + address: "2001:0db8::0001", + expected: "2001:db8::1", + }, + { + name: "ipv6 URL RFC-5952 4.1 conformance leading zeroes", + address: "https://[2001:0db8::0001]:8200", + expected: "https://[2001:db8::1]:8200", + }, + { + name: "ipv6 bracketed destination address with port RFC-5952 4.1 conformance leading zeroes", + address: "username@[2001:0db8::0001]:8200", + expected: "username@[2001:db8::1]:8200", + }, + { + name: "ipv6 RFC-5952 4.2.2 conformance one 16-bit 0 field", + address: "2001:db8:0:1:1:1:1:1", + expected: "2001:db8:0:1:1:1:1:1", + }, + { + name: "ipv6 URL RFC-5952 4.2.2 conformance one 16-bit 0 field", + address: "https://[2001:db8:0:1:1:1:1:1]:8200", + expected: "https://[2001:db8:0:1:1:1:1:1]:8200", + }, + { + name: "ipv6 destination address with port RFC-5952 4.2.2 conformance one 16-bit 0 field", + address: "username@[2001:db8:0:1:1:1:1:1]:8200", + expected: "username@[2001:db8:0:1:1:1:1:1]:8200", + }, + { + name: "ipv6 RFC-5952 4.2.3 conformance longest run of 0 bits shortened", + address: "2001:0:0:1:0:0:0:1", + expected: "2001:0:0:1::1", + }, + { + name: "ipv6 URL RFC-5952 4.2.3 conformance longest run of 0 bits shortened", + address: "https://[2001:0:0:1:0:0:0:1]:8200", + expected: "https://[2001:0:0:1::1]:8200", + }, + { + name: "ipv6 destination address with port RFC-5952 4.2.3 conformance longest run of 0 bits shortened", + address: "username@[2001:0:0:1:0:0:0:1]:8200", + expected: "username@[2001:0:0:1::1]:8200", + }, + { + name: "ipv6 RFC-5952 4.2.3 conformance equal runs of 0 bits shortened", + address: "2001:db8:0:0:1:0:0:1", + expected: "2001:db8::1:0:0:1", + }, + { + name: "ipv6 URL no port RFC-5952 4.2.3 conformance equal runs of 0 bits shortened", + address: "https://[2001:db8:0:0:1:0:0:1]", + expected: "https://[2001:db8::1:0:0:1]", + }, + { + name: "ipv6 URL with port RFC-5952 4.2.3 conformance equal runs of 0 bits shortened", + address: "https://[2001:db8:0:0:1:0:0:1]:8200", + expected: "https://[2001:db8::1:0:0:1]:8200", + }, + { + name: "ipv6 destination address with port RFC-5952 4.2.3 conformance equal runs of 0 bits shortened", + address: "username@[2001:db8:0:0:1:0:0:1]:8200", + expected: "username@[2001:db8::1:0:0:1]:8200", + }, + { + name: "ipv6 RFC-5952 4.3 conformance downcase hex letters", + address: "2001:DB8:AC3:FE4::1", + expected: "2001:db8:ac3:fe4::1", + }, + { + name: "ipv6 URL RFC-5952 4.3 conformance downcase hex letters", + address: "https://[2001:DB8:AC3:FE4::1]:8200", + expected: "https://[2001:db8:ac3:fe4::1]:8200", + }, + { + name: "ipv6 destination address with port RFC-5952 4.3 conformance downcase hex letters", + address: "username@[2001:DB8:AC3:FE4::1]:8200", + expected: "username@[2001:db8:ac3:fe4::1]:8200", + }, + } + for _, tt := range tests { + tt := tt + t.Run(tt.name, func(t *testing.T) { + assert := assert.New(t) + actual, err := NormalizeAddr(tt.address) + assert.Equal(tt.expected, actual) + if tt.err != "" { + require.Error(t, err) + assert.ErrorContains(err, tt.err) + } else { + assert.Nil(err) + } + }) + } + +}