Skip to content
Closed
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion ci/appveyor-cpp-build.bat
Original file line number Diff line number Diff line change
Expand Up @@ -96,7 +96,7 @@ if "%JOB%" == "Build_Debug" (
exit /B 0
)

set CONDA_PACKAGES=--file=ci\conda_env_python.yml python=%PYTHON% numpy=1.14 thrift-cpp=0.11 boost-cpp
set CONDA_PACKAGES=--file=ci\conda_env_python.yml python=%PYTHON% numpy=1.14 boost-cpp

if "%ARROW_BUILD_GANDIVA%" == "ON" (
@rem Install llvmdev in the toolchain if building gandiva.dll
Expand Down
3 changes: 3 additions & 0 deletions cpp/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -757,6 +757,9 @@ set(ARROW_LINK_LIBS ${double-conversion_LIBRARIES})
set(ARROW_STATIC_LINK_LIBS ${double-conversion_LIBRARIES})
set(ARROW_STATIC_INSTALL_INTERFACE_LIBS ${double-conversion_LIBRARIES})

list(APPEND ARROW_STATIC_LINK_LIBS uriparser::uriparser)
list(APPEND ARROW_STATIC_INSTALL_INTERFACE_LIBS uriparser::uriparser)

if(ARROW_WITH_BROTLI)
# Order is important for static linking
list(APPEND ARROW_LINK_LIBS Brotli::brotlienc Brotli::brotlidec Brotli::brotlicommon)
Expand Down
75 changes: 75 additions & 0 deletions cpp/cmake_modules/ThirdpartyToolchain.cmake
Original file line number Diff line number Diff line change
Expand Up @@ -133,6 +133,8 @@ macro(build_dependency DEPENDENCY_NAME)
build_grpc()
elseif("${DEPENDENCY_NAME}" STREQUAL "BZip2")
build_bzip2()
elseif("${DEPENDENCY_NAME}" STREQUAL "uriparser")
build_uriparser()
else()
message(FATAL_ERROR "Unknown thirdparty dependency to build: ${DEPENDENCY_NAME}")
endif()
Expand Down Expand Up @@ -341,6 +343,13 @@ else()
)
endif()

if(DEFINED ENV{ARROW_URIPARSER_URL})
set(URIPARSER_SOURCE_URL "$ENV{ARROW_URIPARSER_URL}")
else()
set(URIPARSER_SOURCE_URL
"https://github.com/uriparser/uriparser/archive/${URIPARSER_VERSION}.tar.gz")
endif()

if(DEFINED ENV{ARROW_ZLIB_URL})
set(ZLIB_SOURCE_URL "$ENV{ARROW_ZLIB_URL}")
else()
Expand Down Expand Up @@ -515,6 +524,72 @@ include_directories(SYSTEM ${double-conversion_INCLUDE_DIRS})

double_conversion_compability()

# ----------------------------------------------------------------------
# uriparser library

macro(build_uriparser)
message(STATUS "Building uriparser from source")
set(URIPARSER_PREFIX "${CMAKE_CURRENT_BINARY_DIR}/uriparser_ep-install")
set(
URIPARSER_STATIC_LIB
"${URIPARSER_PREFIX}/lib/${CMAKE_STATIC_LIBRARY_PREFIX}uriparser${CMAKE_STATIC_LIBRARY_SUFFIX}"
)
set(URIPARSER_INCLUDE_DIRS "${URIPARSER_PREFIX}/include")

set(URIPARSER_CMAKE_ARGS
${EP_COMMON_CMAKE_ARGS}
"-DURIPARSER_BUILD_DOCS=off"
"-DURIPARSER_BUILD_TESTS=off"
"-DURIPARSER_BUILD_TOOLS=off"
"-DURIPARSER_BUILD_WCHAR_T=off"
"-DBUILD_SHARED_LIBS=off"
"-DCMAKE_BUILD_TYPE=${CMAKE_BUILD_TYPE}"
"-DCMAKE_INSTALL_LIBDIR=lib"
"-DCMAKE_POSITION_INDEPENDENT_CODE=on"
"-DCMAKE_INSTALL_PREFIX=<INSTALL_DIR>")

if(MSVC AND ARROW_USE_STATIC_CRT)
if("${CMAKE_BUILD_TYPE}" STREQUAL "DEBUG")
list(APPEND URIPARSER_CMAKE_ARGS "-DURIPARSER_MSVC_RUNTIME=/MTd")
else()
list(APPEND URIPARSER_CMAKE_ARGS "-DURIPARSER_MSVC_RUNTIME=/MT")
endif()
endif()

externalproject_add(uriparser_ep
URL
${URIPARSER_SOURCE_URL}
CMAKE_ARGS
${URIPARSER_CMAKE_ARGS}
BUILD_BYPRODUCTS
${URIPARSER_STATIC_LIB}
INSTALL_DIR
${URIPARSER_PREFIX}
${EP_LOG_OPTIONS})

add_library(uriparser::uriparser STATIC IMPORTED)
# Work around https://gitlab.kitware.com/cmake/cmake/issues/15052
file(MAKE_DIRECTORY ${URIPARSER_INCLUDE_DIRS})
set_target_properties(
uriparser::uriparser
PROPERTIES IMPORTED_LOCATION ${URIPARSER_STATIC_LIB} INTERFACE_INCLUDE_DIRECTORIES
${URIPARSER_INCLUDE_DIRS})

add_dependencies(toolchain uriparser_ep)
add_dependencies(uriparser::uriparser uriparser_ep)
endmacro()

# Unless the user overrides uriparser_SOURCE, build uriparser ourselves
if("${uriparser_SOURCE}" STREQUAL "")
set(uriparser_SOURCE "BUNDLED")
endif()

resolve_dependency(uriparser)

get_target_property(URIPARSER_INCLUDE_DIRS uriparser::uriparser
INTERFACE_INCLUDE_DIRECTORIES)
include_directories(SYSTEM ${URIPARSER_INCLUDE_DIRS})

# ----------------------------------------------------------------------
# Snappy

Expand Down
1 change: 1 addition & 0 deletions cpp/src/arrow/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -129,6 +129,7 @@ set(ARROW_SRCS
util/task-group.cc
util/thread-pool.cc
util/trie.cc
util/uri.cc
util/utf8.cc
vendored/datetime/tz.cpp)

Expand Down
2 changes: 2 additions & 0 deletions cpp/src/arrow/symbols.map
Original file line number Diff line number Diff line change
Expand Up @@ -66,6 +66,8 @@
ERR_getErrorString;
# jemalloc
je_arrow_*;
# uriparser
uri*;
# ORC destructors
_ZThn8_N3orc*;
# Protobuf symbols that aren't hidden by the C++ section below
Expand Down
1 change: 1 addition & 0 deletions cpp/src/arrow/util/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -41,6 +41,7 @@ add_arrow_test(stl-util-test)
add_arrow_test(task-group-test)
add_arrow_test(thread-pool-test)
add_arrow_test(trie-test)
add_arrow_test(uri-test)
add_arrow_test(utf8-util-test)

add_arrow_benchmark(bit-util-benchmark)
Expand Down
182 changes: 182 additions & 0 deletions cpp/src/arrow/util/uri-test.cc
Original file line number Diff line number Diff line change
@@ -0,0 +1,182 @@
// Licensed to the Apache Software Foundation (ASF) under one
// or more contributor license agreements. See the NOTICE file
// distributed with this work for additional information
// regarding copyright ownership. The ASF licenses this file
// to you under the Apache License, Version 2.0 (the
// "License"); you may not use this file except in compliance
// with the License. You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing,
// software distributed under the License is distributed on an
// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
// KIND, either express or implied. See the License for the
// specific language governing permissions and limitations
// under the License.

#include <memory>
#include <string>
#include <vector>

#include <gtest/gtest.h>

#include "arrow/testing/gtest_util.h"
#include "arrow/util/logging.h"
#include "arrow/util/uri.h"

namespace arrow {
namespace internal {

TEST(Uri, Empty) {
Uri uri;
ASSERT_EQ(uri.scheme(), "");
}

TEST(Uri, ParseSimple) {
Uri uri;
{
// An ephemeral string object shouldn't invalidate results
std::string s = "https://arrow.apache.org";
ASSERT_OK(uri.Parse(s));
s.replace(0, s.size(), s.size(), 'X'); // replace contents
}
ASSERT_EQ(uri.scheme(), "https");
ASSERT_EQ(uri.host(), "arrow.apache.org");
ASSERT_EQ(uri.port_text(), "");
}

TEST(Uri, ParsePath) {
// The various edge cases below (leading and trailing slashes) have been
// checked against several Python URI parsing modules: `uri`, `rfc3986`, `rfc3987`

Uri uri;

// Relative path
ASSERT_OK(uri.Parse("unix:tmp/flight.sock"));
ASSERT_EQ(uri.scheme(), "unix");
ASSERT_FALSE(uri.has_host());
ASSERT_EQ(uri.host(), "");
ASSERT_EQ(uri.path(), "tmp/flight.sock");

// Absolute path
ASSERT_OK(uri.Parse("unix:/tmp/flight.sock"));
ASSERT_EQ(uri.scheme(), "unix");
ASSERT_FALSE(uri.has_host());
ASSERT_EQ(uri.host(), "");
ASSERT_EQ(uri.path(), "/tmp/flight.sock");

ASSERT_OK(uri.Parse("unix://localhost/tmp/flight.sock"));
ASSERT_EQ(uri.scheme(), "unix");
ASSERT_TRUE(uri.has_host());
ASSERT_EQ(uri.host(), "localhost");
ASSERT_EQ(uri.path(), "/tmp/flight.sock");

ASSERT_OK(uri.Parse("unix:///tmp/flight.sock"));
ASSERT_EQ(uri.scheme(), "unix");
ASSERT_TRUE(uri.has_host());
ASSERT_EQ(uri.host(), "");
ASSERT_EQ(uri.path(), "/tmp/flight.sock");

// Empty path
ASSERT_OK(uri.Parse("unix:"));
ASSERT_EQ(uri.scheme(), "unix");
ASSERT_FALSE(uri.has_host());
ASSERT_EQ(uri.host(), "");
ASSERT_EQ(uri.path(), "");

ASSERT_OK(uri.Parse("unix://localhost"));
ASSERT_EQ(uri.scheme(), "unix");
ASSERT_TRUE(uri.has_host());
ASSERT_EQ(uri.host(), "localhost");
ASSERT_EQ(uri.path(), "");

// With trailing slash
ASSERT_OK(uri.Parse("unix:/"));
ASSERT_EQ(uri.scheme(), "unix");
ASSERT_FALSE(uri.has_host());
ASSERT_EQ(uri.host(), "");
ASSERT_EQ(uri.path(), "/");

ASSERT_OK(uri.Parse("unix:tmp/"));
ASSERT_EQ(uri.scheme(), "unix");
ASSERT_FALSE(uri.has_host());
ASSERT_EQ(uri.host(), "");
ASSERT_EQ(uri.path(), "tmp/");

ASSERT_OK(uri.Parse("unix://localhost/"));
ASSERT_EQ(uri.scheme(), "unix");
ASSERT_TRUE(uri.has_host());
ASSERT_EQ(uri.host(), "localhost");
ASSERT_EQ(uri.path(), "/");

ASSERT_OK(uri.Parse("unix:/tmp/flight/"));
ASSERT_EQ(uri.scheme(), "unix");
ASSERT_FALSE(uri.has_host());
ASSERT_EQ(uri.host(), "");
ASSERT_EQ(uri.path(), "/tmp/flight/");

ASSERT_OK(uri.Parse("unix:///tmp/flight/"));
ASSERT_EQ(uri.scheme(), "unix");
ASSERT_TRUE(uri.has_host());
ASSERT_EQ(uri.host(), "");
ASSERT_EQ(uri.path(), "/tmp/flight/");
}

TEST(Uri, ParseHostPort) {
Uri uri;

ASSERT_OK(uri.Parse("http://localhost:80"));
ASSERT_EQ(uri.scheme(), "http");
ASSERT_EQ(uri.host(), "localhost");
ASSERT_EQ(uri.port_text(), "80");
ASSERT_EQ(uri.port(), 80);

ASSERT_OK(uri.Parse("http://1.2.3.4"));
ASSERT_EQ(uri.scheme(), "http");
ASSERT_EQ(uri.host(), "1.2.3.4");
ASSERT_EQ(uri.port_text(), "");
ASSERT_EQ(uri.port(), -1);

ASSERT_OK(uri.Parse("http://1.2.3.4:"));
ASSERT_EQ(uri.scheme(), "http");
ASSERT_EQ(uri.host(), "1.2.3.4");
ASSERT_EQ(uri.port_text(), "");
ASSERT_EQ(uri.port(), -1);

ASSERT_OK(uri.Parse("http://1.2.3.4:80"));
ASSERT_EQ(uri.scheme(), "http");
ASSERT_EQ(uri.host(), "1.2.3.4");
ASSERT_EQ(uri.port_text(), "80");
ASSERT_EQ(uri.port(), 80);

ASSERT_OK(uri.Parse("http://[::1]"));
ASSERT_EQ(uri.scheme(), "http");
ASSERT_EQ(uri.host(), "::1");
ASSERT_EQ(uri.port_text(), "");
ASSERT_EQ(uri.port(), -1);

ASSERT_OK(uri.Parse("http://[::1]:"));
ASSERT_EQ(uri.scheme(), "http");
ASSERT_EQ(uri.host(), "::1");
ASSERT_EQ(uri.port_text(), "");
ASSERT_EQ(uri.port(), -1);

ASSERT_OK(uri.Parse("http://[::1]:80"));
ASSERT_EQ(uri.scheme(), "http");
ASSERT_EQ(uri.host(), "::1");
ASSERT_EQ(uri.port_text(), "80");
ASSERT_EQ(uri.port(), 80);
}

TEST(Uri, ParseError) {
Uri uri;

ASSERT_RAISES(Invalid, uri.Parse("http://a:b:c:d"));
ASSERT_RAISES(Invalid, uri.Parse("http://localhost:z"));
ASSERT_RAISES(Invalid, uri.Parse("http://localhost:-1"));
ASSERT_RAISES(Invalid, uri.Parse("http://localhost:99999"));
}

} // namespace internal
} // namespace arrow
Loading