Skip to content
Closed
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
30 changes: 30 additions & 0 deletions cpp/src/arrow/util/uri.cc
Original file line number Diff line number Diff line change
Expand Up @@ -120,6 +120,36 @@ std::string Uri::path() const {
return ss.str();
}

std::string Uri::query_string() const { return TextRangeToString(impl_->uri_.query); }

Result<std::vector<std::pair<std::string, std::string>>> Uri::query_items() const {
const auto& query = impl_->uri_.query;
UriQueryListA* query_list;
int item_count;
std::vector<std::pair<std::string, std::string>> items;

if (query.first == nullptr) {
return items;
}
if (uriDissectQueryMallocA(&query_list, &item_count, query.first, query.afterLast) !=
URI_SUCCESS) {
return Status::Invalid("Cannot parse query string: '", query_string(), "'");
}
std::unique_ptr<UriQueryListA, decltype(&uriFreeQueryListA)> query_guard(
query_list, uriFreeQueryListA);

items.reserve(item_count);
while (query_list != nullptr) {
if (query_list->value != nullptr) {
items.emplace_back(query_list->key, query_list->value);
} else {
items.emplace_back(query_list->key, "");
}
query_list = query_list->next;
}
return items;
}

const std::string& Uri::ToString() const { return impl_->string_rep_; }

Status Uri::Parse(const std::string& uri_string) {
Expand Down
10 changes: 10 additions & 0 deletions cpp/src/arrow/util/uri.h
Original file line number Diff line number Diff line change
Expand Up @@ -20,7 +20,10 @@
#include <cstdint>
#include <memory>
#include <string>
#include <utility>
#include <vector>

#include "arrow/result.h"
#include "arrow/status.h"
#include "arrow/util/visibility.h"

Expand Down Expand Up @@ -54,6 +57,13 @@ class ARROW_EXPORT Uri {
int32_t port() const;
/// The URI path component.
std::string path() const;
/// The URI query string
std::string query_string() const;
/// The URI query items
///
/// Note this API doesn't allow differentiating between an empty value
/// and a missing value, such in "a&b=1" vs. "a=&b=1".
Result<std::vector<std::pair<std::string, std::string>>> query_items() const;

/// Get the string representation of this URI.
const std::string& ToString() const;
Expand Down
121 changes: 59 additions & 62 deletions cpp/src/arrow/util/uri_test.cc
Original file line number Diff line number Diff line change
Expand Up @@ -17,6 +17,7 @@

#include <memory>
#include <string>
#include <utility>
#include <vector>

#include <gtest/gtest.h>
Expand Down Expand Up @@ -52,75 +53,71 @@ TEST(Uri, ParsePath) {

Uri uri;

auto check_case = [&](std::string uri_string, std::string scheme, bool has_host,
std::string host, std::string path) -> void {
ASSERT_OK(uri.Parse(uri_string));
ASSERT_EQ(uri.scheme(), scheme);
ASSERT_EQ(uri.has_host(), has_host);
ASSERT_EQ(uri.host(), host);
ASSERT_EQ(uri.path(), path);
};

// Relative path
ASSERT_OK(uri.Parse("unix:tmp/flight.sock"));
ASSERT_EQ(uri.scheme(), "unix");
ASSERT_FALSE(uri.has_host());
ASSERT_EQ(uri.host(), "");
ASSERT_EQ(uri.path(), "tmp/flight.sock");
check_case("unix:tmp/flight.sock", "unix", false, "", "tmp/flight.sock");

// Absolute path
ASSERT_OK(uri.Parse("unix:/tmp/flight.sock"));
ASSERT_EQ(uri.scheme(), "unix");
ASSERT_FALSE(uri.has_host());
ASSERT_EQ(uri.host(), "");
ASSERT_EQ(uri.path(), "/tmp/flight.sock");

ASSERT_OK(uri.Parse("unix://localhost/tmp/flight.sock"));
ASSERT_EQ(uri.scheme(), "unix");
ASSERT_TRUE(uri.has_host());
ASSERT_EQ(uri.host(), "localhost");
ASSERT_EQ(uri.path(), "/tmp/flight.sock");

ASSERT_OK(uri.Parse("unix:///tmp/flight.sock"));
ASSERT_EQ(uri.scheme(), "unix");
ASSERT_TRUE(uri.has_host());
ASSERT_EQ(uri.host(), "");
ASSERT_EQ(uri.path(), "/tmp/flight.sock");
check_case("unix:/tmp/flight.sock", "unix", false, "", "/tmp/flight.sock");
check_case("unix://localhost/tmp/flight.sock", "unix", true, "localhost",
"/tmp/flight.sock");
check_case("unix:///tmp/flight.sock", "unix", true, "", "/tmp/flight.sock");

// Empty path
ASSERT_OK(uri.Parse("unix:"));
ASSERT_EQ(uri.scheme(), "unix");
ASSERT_FALSE(uri.has_host());
ASSERT_EQ(uri.host(), "");
ASSERT_EQ(uri.path(), "");

ASSERT_OK(uri.Parse("unix://localhost"));
ASSERT_EQ(uri.scheme(), "unix");
ASSERT_TRUE(uri.has_host());
ASSERT_EQ(uri.host(), "localhost");
ASSERT_EQ(uri.path(), "");
check_case("unix:", "unix", false, "", "");
check_case("unix://localhost", "unix", true, "localhost", "");

// With trailing slash
ASSERT_OK(uri.Parse("unix:/"));
ASSERT_EQ(uri.scheme(), "unix");
ASSERT_FALSE(uri.has_host());
ASSERT_EQ(uri.host(), "");
ASSERT_EQ(uri.path(), "/");

ASSERT_OK(uri.Parse("unix:tmp/"));
ASSERT_EQ(uri.scheme(), "unix");
ASSERT_FALSE(uri.has_host());
ASSERT_EQ(uri.host(), "");
ASSERT_EQ(uri.path(), "tmp/");

ASSERT_OK(uri.Parse("unix://localhost/"));
ASSERT_EQ(uri.scheme(), "unix");
ASSERT_TRUE(uri.has_host());
ASSERT_EQ(uri.host(), "localhost");
ASSERT_EQ(uri.path(), "/");

ASSERT_OK(uri.Parse("unix:/tmp/flight/"));
ASSERT_EQ(uri.scheme(), "unix");
ASSERT_FALSE(uri.has_host());
ASSERT_EQ(uri.host(), "");
ASSERT_EQ(uri.path(), "/tmp/flight/");

ASSERT_OK(uri.Parse("unix:///tmp/flight/"));
ASSERT_EQ(uri.scheme(), "unix");
ASSERT_TRUE(uri.has_host());
ASSERT_EQ(uri.host(), "");
ASSERT_EQ(uri.path(), "/tmp/flight/");
check_case("unix:/", "unix", false, "", "/");
check_case("unix:tmp/", "unix", false, "", "tmp/");
check_case("unix://localhost/", "unix", true, "localhost", "/");
check_case("unix:/tmp/flight/", "unix", false, "", "/tmp/flight/");
check_case("unix://localhost/tmp/flight/", "unix", true, "localhost", "/tmp/flight/");
check_case("unix:///tmp/flight/", "unix", true, "", "/tmp/flight/");

// With query string
check_case("unix:?", "unix", false, "", "");
check_case("unix:?foo", "unix", false, "", "");
check_case("unix:?foo=bar", "unix", false, "", "");
check_case("unix:/?", "unix", false, "", "/");
check_case("unix:/?foo", "unix", false, "", "/");
check_case("unix:/?foo=bar", "unix", false, "", "/");
check_case("unix://localhost/tmp?", "unix", true, "localhost", "/tmp");
check_case("unix://localhost/tmp?foo", "unix", true, "localhost", "/tmp");
check_case("unix://localhost/tmp?foo=bar", "unix", true, "localhost", "/tmp");
}

TEST(Uri, ParseQuery) {
Uri uri;

auto check_case = [&](std::string uri_string, std::string query_string,
std::vector<std::pair<std::string, std::string>> items) -> void {
ASSERT_OK(uri.Parse(uri_string));
ASSERT_EQ(uri.query_string(), query_string);
auto result = uri.query_items();
ASSERT_OK(result);
ASSERT_EQ(*result, items);
};

check_case("unix://localhost/tmp", "", {});
check_case("unix://localhost/tmp?", "", {});
check_case("unix://localhost/tmp?foo=bar", "foo=bar", {{"foo", "bar"}});
check_case("unix:?foo=bar", "foo=bar", {{"foo", "bar"}});
check_case("unix:?a=b&c=d", "a=b&c=d", {{"a", "b"}, {"c", "d"}});

// With escaped values
check_case("unix:?a=some+value&b=c", "a=some+value&b=c",
{{"a", "some value"}, {"b", "c"}});
check_case("unix:?a=some%20value%2Fanother&b=c", "a=some%20value%2Fanother&b=c",
{{"a", "some value/another"}, {"b", "c"}});
}

TEST(Uri, ParseHostPort) {
Expand Down