diff --git a/cpp/src/arrow/util/uri.cc b/cpp/src/arrow/util/uri.cc index 579d4c7f243..706b6dff4c6 100644 --- a/cpp/src/arrow/util/uri.cc +++ b/cpp/src/arrow/util/uri.cc @@ -120,6 +120,36 @@ std::string Uri::path() const { return ss.str(); } +std::string Uri::query_string() const { return TextRangeToString(impl_->uri_.query); } + +Result>> Uri::query_items() const { + const auto& query = impl_->uri_.query; + UriQueryListA* query_list; + int item_count; + std::vector> items; + + if (query.first == nullptr) { + return items; + } + if (uriDissectQueryMallocA(&query_list, &item_count, query.first, query.afterLast) != + URI_SUCCESS) { + return Status::Invalid("Cannot parse query string: '", query_string(), "'"); + } + std::unique_ptr query_guard( + query_list, uriFreeQueryListA); + + items.reserve(item_count); + while (query_list != nullptr) { + if (query_list->value != nullptr) { + items.emplace_back(query_list->key, query_list->value); + } else { + items.emplace_back(query_list->key, ""); + } + query_list = query_list->next; + } + return items; +} + const std::string& Uri::ToString() const { return impl_->string_rep_; } Status Uri::Parse(const std::string& uri_string) { diff --git a/cpp/src/arrow/util/uri.h b/cpp/src/arrow/util/uri.h index ce082ccc8e6..c7a8079c49f 100644 --- a/cpp/src/arrow/util/uri.h +++ b/cpp/src/arrow/util/uri.h @@ -20,7 +20,10 @@ #include #include #include +#include +#include +#include "arrow/result.h" #include "arrow/status.h" #include "arrow/util/visibility.h" @@ -54,6 +57,13 @@ class ARROW_EXPORT Uri { int32_t port() const; /// The URI path component. std::string path() const; + /// The URI query string + std::string query_string() const; + /// The URI query items + /// + /// Note this API doesn't allow differentiating between an empty value + /// and a missing value, such in "a&b=1" vs. "a=&b=1". + Result>> query_items() const; /// Get the string representation of this URI. const std::string& ToString() const; diff --git a/cpp/src/arrow/util/uri_test.cc b/cpp/src/arrow/util/uri_test.cc index 34a7d24cf99..d06e3265867 100644 --- a/cpp/src/arrow/util/uri_test.cc +++ b/cpp/src/arrow/util/uri_test.cc @@ -17,6 +17,7 @@ #include #include +#include #include #include @@ -52,75 +53,71 @@ TEST(Uri, ParsePath) { Uri uri; + auto check_case = [&](std::string uri_string, std::string scheme, bool has_host, + std::string host, std::string path) -> void { + ASSERT_OK(uri.Parse(uri_string)); + ASSERT_EQ(uri.scheme(), scheme); + ASSERT_EQ(uri.has_host(), has_host); + ASSERT_EQ(uri.host(), host); + ASSERT_EQ(uri.path(), path); + }; + // Relative path - ASSERT_OK(uri.Parse("unix:tmp/flight.sock")); - ASSERT_EQ(uri.scheme(), "unix"); - ASSERT_FALSE(uri.has_host()); - ASSERT_EQ(uri.host(), ""); - ASSERT_EQ(uri.path(), "tmp/flight.sock"); + check_case("unix:tmp/flight.sock", "unix", false, "", "tmp/flight.sock"); // Absolute path - ASSERT_OK(uri.Parse("unix:/tmp/flight.sock")); - ASSERT_EQ(uri.scheme(), "unix"); - ASSERT_FALSE(uri.has_host()); - ASSERT_EQ(uri.host(), ""); - ASSERT_EQ(uri.path(), "/tmp/flight.sock"); - - ASSERT_OK(uri.Parse("unix://localhost/tmp/flight.sock")); - ASSERT_EQ(uri.scheme(), "unix"); - ASSERT_TRUE(uri.has_host()); - ASSERT_EQ(uri.host(), "localhost"); - ASSERT_EQ(uri.path(), "/tmp/flight.sock"); - - ASSERT_OK(uri.Parse("unix:///tmp/flight.sock")); - ASSERT_EQ(uri.scheme(), "unix"); - ASSERT_TRUE(uri.has_host()); - ASSERT_EQ(uri.host(), ""); - ASSERT_EQ(uri.path(), "/tmp/flight.sock"); + check_case("unix:/tmp/flight.sock", "unix", false, "", "/tmp/flight.sock"); + check_case("unix://localhost/tmp/flight.sock", "unix", true, "localhost", + "/tmp/flight.sock"); + check_case("unix:///tmp/flight.sock", "unix", true, "", "/tmp/flight.sock"); // Empty path - ASSERT_OK(uri.Parse("unix:")); - ASSERT_EQ(uri.scheme(), "unix"); - ASSERT_FALSE(uri.has_host()); - ASSERT_EQ(uri.host(), ""); - ASSERT_EQ(uri.path(), ""); - - ASSERT_OK(uri.Parse("unix://localhost")); - ASSERT_EQ(uri.scheme(), "unix"); - ASSERT_TRUE(uri.has_host()); - ASSERT_EQ(uri.host(), "localhost"); - ASSERT_EQ(uri.path(), ""); + check_case("unix:", "unix", false, "", ""); + check_case("unix://localhost", "unix", true, "localhost", ""); // With trailing slash - ASSERT_OK(uri.Parse("unix:/")); - ASSERT_EQ(uri.scheme(), "unix"); - ASSERT_FALSE(uri.has_host()); - ASSERT_EQ(uri.host(), ""); - ASSERT_EQ(uri.path(), "/"); - - ASSERT_OK(uri.Parse("unix:tmp/")); - ASSERT_EQ(uri.scheme(), "unix"); - ASSERT_FALSE(uri.has_host()); - ASSERT_EQ(uri.host(), ""); - ASSERT_EQ(uri.path(), "tmp/"); - - ASSERT_OK(uri.Parse("unix://localhost/")); - ASSERT_EQ(uri.scheme(), "unix"); - ASSERT_TRUE(uri.has_host()); - ASSERT_EQ(uri.host(), "localhost"); - ASSERT_EQ(uri.path(), "/"); - - ASSERT_OK(uri.Parse("unix:/tmp/flight/")); - ASSERT_EQ(uri.scheme(), "unix"); - ASSERT_FALSE(uri.has_host()); - ASSERT_EQ(uri.host(), ""); - ASSERT_EQ(uri.path(), "/tmp/flight/"); - - ASSERT_OK(uri.Parse("unix:///tmp/flight/")); - ASSERT_EQ(uri.scheme(), "unix"); - ASSERT_TRUE(uri.has_host()); - ASSERT_EQ(uri.host(), ""); - ASSERT_EQ(uri.path(), "/tmp/flight/"); + check_case("unix:/", "unix", false, "", "/"); + check_case("unix:tmp/", "unix", false, "", "tmp/"); + check_case("unix://localhost/", "unix", true, "localhost", "/"); + check_case("unix:/tmp/flight/", "unix", false, "", "/tmp/flight/"); + check_case("unix://localhost/tmp/flight/", "unix", true, "localhost", "/tmp/flight/"); + check_case("unix:///tmp/flight/", "unix", true, "", "/tmp/flight/"); + + // With query string + check_case("unix:?", "unix", false, "", ""); + check_case("unix:?foo", "unix", false, "", ""); + check_case("unix:?foo=bar", "unix", false, "", ""); + check_case("unix:/?", "unix", false, "", "/"); + check_case("unix:/?foo", "unix", false, "", "/"); + check_case("unix:/?foo=bar", "unix", false, "", "/"); + check_case("unix://localhost/tmp?", "unix", true, "localhost", "/tmp"); + check_case("unix://localhost/tmp?foo", "unix", true, "localhost", "/tmp"); + check_case("unix://localhost/tmp?foo=bar", "unix", true, "localhost", "/tmp"); +} + +TEST(Uri, ParseQuery) { + Uri uri; + + auto check_case = [&](std::string uri_string, std::string query_string, + std::vector> items) -> void { + ASSERT_OK(uri.Parse(uri_string)); + ASSERT_EQ(uri.query_string(), query_string); + auto result = uri.query_items(); + ASSERT_OK(result); + ASSERT_EQ(*result, items); + }; + + check_case("unix://localhost/tmp", "", {}); + check_case("unix://localhost/tmp?", "", {}); + check_case("unix://localhost/tmp?foo=bar", "foo=bar", {{"foo", "bar"}}); + check_case("unix:?foo=bar", "foo=bar", {{"foo", "bar"}}); + check_case("unix:?a=b&c=d", "a=b&c=d", {{"a", "b"}, {"c", "d"}}); + + // With escaped values + check_case("unix:?a=some+value&b=c", "a=some+value&b=c", + {{"a", "some value"}, {"b", "c"}}); + check_case("unix:?a=some%20value%2Fanother&b=c", "a=some%20value%2Fanother&b=c", + {{"a", "some value/another"}, {"b", "c"}}); } TEST(Uri, ParseHostPort) {