Skip to content
Closed
Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
36 commits
Select commit Hold shift + click to select a range
1a4e031
local file system
kszucs Sep 3, 2019
a6a6243
pathlib
kszucs Sep 3, 2019
cd21741
remove comment
kszucs Sep 3, 2019
fd0b4ca
flake8
kszucs Sep 3, 2019
5de8881
mtime
kszucs Sep 4, 2019
cc25721
inlining
kszucs Sep 4, 2019
e812dca
docstrings
kszucs Sep 4, 2019
74598eb
python2
kszucs Sep 4, 2019
b73300c
write bytes in the test case
kszucs Sep 4, 2019
545a2c6
move to internal namespace
kszucs Sep 5, 2019
e24a036
raise on invalid path
kszucs Sep 5, 2019
cd8b387
ARROW_PYTHON_EXPORT
kszucs Sep 5, 2019
8ba3c60
don't export
kszucs Sep 6, 2019
299c702
use internal namespace
kszucs Sep 9, 2019
76993d2
review comments
kszucs Sep 10, 2019
43c12bc
review comments
kszucs Sep 10, 2019
962c346
revert FindClangTools
kszucs Sep 10, 2019
0baf5ee
convert string paths to posix too; don't accept bytes paths
kszucs Sep 10, 2019
88409ed
remove comment
kszucs Sep 10, 2019
d898b06
io.pxi docstring
kszucs Sep 10, 2019
abc51ab
clang-format; use posix-style path as basepath for SubTreeFileSystem
kszucs Sep 11, 2019
0939d97
int enum
kszucs Sep 11, 2019
de75cb5
remove pathlib dependency
kszucs Sep 11, 2019
4c794a3
explain Selector.base_dir; a couple of additional tests
kszucs Sep 11, 2019
4ae9951
remove default value from selector; test relative paths
kszucs Sep 12, 2019
fadf3a4
typo
kszucs Sep 12, 2019
19a44ad
include symlinks and hidden files in the test glob
kszucs Sep 12, 2019
787eaf4
use PurePosixPath
kszucs Sep 12, 2019
a391016
move out PyDateTime_IMPORT
kszucs Sep 12, 2019
9c90de5
use slash delimited paths everywhere in the tests
kszucs Sep 13, 2019
ab803e3
windows
kszucs Sep 16, 2019
58ce6d1
repr
kszucs Sep 16, 2019
cf066da
try to fix appveyor test
kszucs Sep 16, 2019
ea3a488
flake8
kszucs Sep 17, 2019
a7798f5
don't accept pathlib.Paths objects
kszucs Sep 17, 2019
9b66277
flake8
kszucs Sep 17, 2019
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
26 changes: 26 additions & 0 deletions cpp/src/arrow/filesystem/api.h
Original file line number Diff line number Diff line change
@@ -0,0 +1,26 @@
// Licensed to the Apache Software Foundation (ASF) under one
// or more contributor license agreements. See the NOTICE file
// distributed with this work for additional information
// regarding copyright ownership. The ASF licenses this file
// to you under the Apache License, Version 2.0 (the
// "License"); you may not use this file except in compliance
// with the License. You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing,
// software distributed under the License is distributed on an
// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
// KIND, either express or implied. See the License for the
// specific language governing permissions and limitations
// under the License.

#ifndef ARROW_FILESYSTEM_API_H
#define ARROW_FILESYSTEM_API_H

#include "arrow/filesystem/filesystem.h" // IWYU pragma: export
#include "arrow/filesystem/localfs.h" // IWYU pragma: export
#include "arrow/filesystem/mockfs.h" // IWYU pragma: export
#include "arrow/filesystem/s3fs.h" // IWYU pragma: export

#endif // ARROW_FILESYSTEM_API_H
2 changes: 1 addition & 1 deletion cpp/src/arrow/filesystem/filesystem.h
Original file line number Diff line number Diff line change
Expand Up @@ -57,7 +57,7 @@ using TimePoint =
std::chrono::time_point<std::chrono::system_clock, std::chrono::nanoseconds>;

/// \brief EXPERIMENTAL: FileSystem entry type
enum class ARROW_EXPORT FileType {
enum class ARROW_EXPORT FileType : int8_t {
// Target does not exist
NonExistent,
// Target exists but its type is unknown (could be a special file such
Expand Down
1 change: 1 addition & 0 deletions cpp/src/arrow/python/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -32,6 +32,7 @@ set(ARROW_PYTHON_SRCS
benchmark.cc
common.cc
config.cc
datetime.cc
decimal.cc
deserialize.cc
extension_type.cc
Expand Down
1 change: 1 addition & 0 deletions cpp/src/arrow/python/api.h
Original file line number Diff line number Diff line change
Expand Up @@ -20,6 +20,7 @@

#include "arrow/python/arrow_to_pandas.h"
#include "arrow/python/common.h"
#include "arrow/python/datetime.h"
#include "arrow/python/deserialize.h"
#include "arrow/python/helpers.h"
#include "arrow/python/inference.h"
Expand Down
6 changes: 3 additions & 3 deletions cpp/src/arrow/python/arrow_to_pandas.cc
Original file line number Diff line number Diff line change
Expand Up @@ -46,13 +46,13 @@

#include "arrow/python/common.h"
#include "arrow/python/config.h"
#include "arrow/python/datetime.h"
#include "arrow/python/decimal.h"
#include "arrow/python/helpers.h"
#include "arrow/python/numpy_convert.h"
#include "arrow/python/numpy_internal.h"
#include "arrow/python/python_to_arrow.h"
#include "arrow/python/type_traits.h"
#include "arrow/python/util/datetime.h"

namespace arrow {

Expand Down Expand Up @@ -654,7 +654,7 @@ static Status ConvertDates(const PandasOptions& options, const ChunkedArray& dat
PyDateTime_IMPORT;
}
auto WrapValue = [](typename Type::c_type value, PyObject** out) {
RETURN_NOT_OK(PyDate_from_int(value, Type::UNIT, out));
RETURN_NOT_OK(internal::PyDate_from_int(value, Type::UNIT, out));
RETURN_IF_PYERROR();
return Status::OK();
};
Expand All @@ -672,7 +672,7 @@ static Status ConvertTimes(const PandasOptions& options, const ChunkedArray& dat
const TimeUnit::type unit = checked_cast<const Type&>(*data.type()).unit();

auto WrapValue = [unit](typename Type::c_type value, PyObject** out) {
RETURN_NOT_OK(PyTime_from_int(value, unit, out));
RETURN_NOT_OK(internal::PyTime_from_int(value, unit, out));
RETURN_IF_PYERROR();
return Status::OK();
};
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -15,18 +15,19 @@
// specific language governing permissions and limitations
// under the License.

#ifndef PYARROW_UTIL_DATETIME_H
#define PYARROW_UTIL_DATETIME_H

#include <algorithm>
#include <chrono>
#include <iostream>

#include <datetime.h>
#include "arrow/python/datetime.h"
#include "arrow/python/platform.h"
#include "arrow/status.h"
#include "arrow/type.h"
#include "arrow/util/logging.h"

namespace arrow {
namespace py {
namespace internal {

// The following code is adapted from
// https://github.com/numpy/numpy/blob/master/numpy/core/src/multiarray/datetime.c
Expand Down Expand Up @@ -155,25 +156,6 @@ static void get_date_from_days(int64_t days, int64_t* date_year, int64_t* date_m
return;
}

static inline int64_t PyTime_to_us(PyObject* pytime) {
return (static_cast<int64_t>(PyDateTime_TIME_GET_HOUR(pytime)) * 3600000000LL +
static_cast<int64_t>(PyDateTime_TIME_GET_MINUTE(pytime)) * 60000000LL +
static_cast<int64_t>(PyDateTime_TIME_GET_SECOND(pytime)) * 1000000LL +
PyDateTime_TIME_GET_MICROSECOND(pytime));
}

static inline int64_t PyTime_to_s(PyObject* pytime) {
return PyTime_to_us(pytime) / 1000000;
}

static inline int64_t PyTime_to_ms(PyObject* pytime) {
return PyTime_to_us(pytime) / 1000;
}

static inline int64_t PyTime_to_ns(PyObject* pytime) {
return PyTime_to_us(pytime) * 1000;
}

// Splitting time quantities, for example splitting total seconds into
// minutes and remaining seconds. After we run
// int64_t remaining = split_time(total, quotient, &next)
Expand Down Expand Up @@ -233,25 +215,23 @@ static inline Status PyDate_convert_int(int64_t val, const DateUnit unit, int64_
return Status::OK();
}

static inline Status PyTime_from_int(int64_t val, const TimeUnit::type unit,
PyObject** out) {
Status PyTime_from_int(int64_t val, const TimeUnit::type unit, PyObject** out) {
int64_t hour = 0, minute = 0, second = 0, microsecond = 0;
RETURN_NOT_OK(PyTime_convert_int(val, unit, &hour, &minute, &second, &microsecond));
*out = PyTime_FromTime(static_cast<int32_t>(hour), static_cast<int32_t>(minute),
static_cast<int32_t>(second), static_cast<int32_t>(microsecond));
return Status::OK();
}

static inline Status PyDate_from_int(int64_t val, const DateUnit unit, PyObject** out) {
Status PyDate_from_int(int64_t val, const DateUnit unit, PyObject** out) {
int64_t year = 0, month = 0, day = 0;
RETURN_NOT_OK(PyDate_convert_int(val, unit, &year, &month, &day));
*out = PyDate_FromDate(static_cast<int32_t>(year), static_cast<int32_t>(month),
static_cast<int32_t>(day));
return Status::OK();
}

static inline Status PyDateTime_from_int(int64_t val, const TimeUnit::type unit,
PyObject** out) {
Status PyDateTime_from_int(int64_t val, const TimeUnit::type unit, PyObject** out) {
int64_t hour = 0, minute = 0, second = 0, microsecond = 0;
RETURN_NOT_OK(PyTime_convert_int(val, unit, &hour, &minute, &second, &microsecond));
int64_t total_days = 0;
Expand All @@ -265,47 +245,19 @@ static inline Status PyDateTime_from_int(int64_t val, const TimeUnit::type unit,
return Status::OK();
}

static inline int64_t PyDate_to_days(PyDateTime_Date* pydate) {
return get_days_from_date(PyDateTime_GET_YEAR(pydate), PyDateTime_GET_MONTH(pydate),
PyDateTime_GET_DAY(pydate));
}

static inline int64_t PyDate_to_ms(PyDateTime_Date* pydate) {
int64_t total_seconds = 0;
int64_t days =
get_days_from_date(PyDateTime_GET_YEAR(pydate), PyDateTime_GET_MONTH(pydate),
PyDateTime_GET_DAY(pydate));
total_seconds += days * 24 * 3600;
return total_seconds * 1000;
}

static inline int64_t PyDateTime_to_s(PyDateTime_DateTime* pydatetime) {
int64_t total_seconds = 0;
total_seconds += PyDateTime_DATE_GET_SECOND(pydatetime);
total_seconds += PyDateTime_DATE_GET_MINUTE(pydatetime) * 60;
total_seconds += PyDateTime_DATE_GET_HOUR(pydatetime) * 3600;

return total_seconds +
(PyDate_to_ms(reinterpret_cast<PyDateTime_Date*>(pydatetime)) / 1000LL);
}

static inline int64_t PyDateTime_to_ms(PyDateTime_DateTime* pydatetime) {
int64_t date_ms = PyDateTime_to_s(pydatetime) * 1000;
int ms = PyDateTime_DATE_GET_MICROSECOND(pydatetime) / 1000;
return date_ms + ms;
}

static inline int64_t PyDateTime_to_us(PyDateTime_DateTime* pydatetime) {
int64_t ms = PyDateTime_to_s(pydatetime) * 1000;
int us = PyDateTime_DATE_GET_MICROSECOND(pydatetime);
return ms * 1000 + us;
Status PyDateTime_from_TimePoint(TimePoint val, PyObject** out) {
PyDateTime_IMPORT;
Copy link
Member Author

@kszucs kszucs Sep 12, 2019

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

@pitrou this is the outermost I can move PyDateTime_IMPORT, otherwise its call in FileStats.mtime segfaults. Removed the import from the other functions, and PyDateTime_from_TimePoint is only used from the _fs.pyx, so it should not be that expensive.

auto nanos = val.time_since_epoch();
auto micros = std::chrono::duration_cast<std::chrono::microseconds>(nanos);
RETURN_NOT_OK(PyDateTime_from_int(micros.count(), TimeUnit::MICRO, out));
return Status::OK();
}

static inline int64_t PyDateTime_to_ns(PyDateTime_DateTime* pydatetime) {
return PyDateTime_to_us(pydatetime) * 1000;
int64_t PyDate_to_days(PyDateTime_Date* pydate) {
return get_days_from_date(PyDateTime_GET_YEAR(pydate), PyDateTime_GET_MONTH(pydate),
PyDateTime_GET_DAY(pydate));
}

} // namespace internal
} // namespace py
} // namespace arrow

#endif // PYARROW_UTIL_DATETIME_H
108 changes: 108 additions & 0 deletions cpp/src/arrow/python/datetime.h
Original file line number Diff line number Diff line change
@@ -0,0 +1,108 @@
// Licensed to the Apache Software Foundation (ASF) under one
// or more contributor license agreements. See the NOTICE file
// distributed with this work for additional information
// regarding copyright ownership. The ASF licenses this file
// to you under the Apache License, Version 2.0 (the
// "License"); you may not use this file except in compliance
// with the License. You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing,
// software distributed under the License is distributed on an
// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
// KIND, either express or implied. See the License for the
// specific language governing permissions and limitations
// under the License.

#ifndef PYARROW_UTIL_DATETIME_H
#define PYARROW_UTIL_DATETIME_H

#include <algorithm>
#include <chrono>

#include "arrow/python/platform.h"
#include "arrow/python/visibility.h"
#include "arrow/status.h"
#include "arrow/type.h"
#include "arrow/util/logging.h"

namespace arrow {
namespace py {
namespace internal {

ARROW_PYTHON_EXPORT
inline int64_t PyTime_to_us(PyObject* pytime) {
return (static_cast<int64_t>(PyDateTime_TIME_GET_HOUR(pytime)) * 3600000000LL +
static_cast<int64_t>(PyDateTime_TIME_GET_MINUTE(pytime)) * 60000000LL +
static_cast<int64_t>(PyDateTime_TIME_GET_SECOND(pytime)) * 1000000LL +
PyDateTime_TIME_GET_MICROSECOND(pytime));
}

ARROW_PYTHON_EXPORT
inline int64_t PyTime_to_s(PyObject* pytime) { return PyTime_to_us(pytime) / 1000000; }

ARROW_PYTHON_EXPORT
inline int64_t PyTime_to_ms(PyObject* pytime) { return PyTime_to_us(pytime) / 1000; }

ARROW_PYTHON_EXPORT
inline int64_t PyTime_to_ns(PyObject* pytime) { return PyTime_to_us(pytime) * 1000; }

ARROW_PYTHON_EXPORT
Status PyTime_from_int(int64_t val, const TimeUnit::type unit, PyObject** out);

ARROW_PYTHON_EXPORT
Status PyDate_from_int(int64_t val, const DateUnit unit, PyObject** out);

ARROW_PYTHON_EXPORT
Status PyDateTime_from_int(int64_t val, const TimeUnit::type unit, PyObject** out);

using TimePoint =
std::chrono::time_point<std::chrono::system_clock, std::chrono::nanoseconds>;

ARROW_PYTHON_EXPORT
Status PyDateTime_from_TimePoint(TimePoint val, PyObject** out);

ARROW_PYTHON_EXPORT
int64_t PyDate_to_days(PyDateTime_Date* pydate);

ARROW_PYTHON_EXPORT
inline int64_t PyDate_to_ms(PyDateTime_Date* pydate) {
return PyDate_to_days(pydate) * 24 * 3600 * 1000;
}

ARROW_PYTHON_EXPORT
inline int64_t PyDateTime_to_s(PyDateTime_DateTime* pydatetime) {
int64_t total_seconds = 0;
total_seconds += PyDateTime_DATE_GET_SECOND(pydatetime);
total_seconds += PyDateTime_DATE_GET_MINUTE(pydatetime) * 60;
total_seconds += PyDateTime_DATE_GET_HOUR(pydatetime) * 3600;

return total_seconds +
(PyDate_to_ms(reinterpret_cast<PyDateTime_Date*>(pydatetime)) / 1000LL);
}

ARROW_PYTHON_EXPORT
inline int64_t PyDateTime_to_ms(PyDateTime_DateTime* pydatetime) {
int64_t date_ms = PyDateTime_to_s(pydatetime) * 1000;
int ms = PyDateTime_DATE_GET_MICROSECOND(pydatetime) / 1000;
return date_ms + ms;
}

ARROW_PYTHON_EXPORT
inline int64_t PyDateTime_to_us(PyDateTime_DateTime* pydatetime) {
int64_t ms = PyDateTime_to_s(pydatetime) * 1000;
int us = PyDateTime_DATE_GET_MICROSECOND(pydatetime);
return ms * 1000 + us;
}

ARROW_PYTHON_EXPORT
inline int64_t PyDateTime_to_ns(PyDateTime_DateTime* pydatetime) {
return PyDateTime_to_us(pydatetime) * 1000;
}

} // namespace internal
} // namespace py
} // namespace arrow

#endif // PYARROW_UTIL_DATETIME_H
4 changes: 2 additions & 2 deletions cpp/src/arrow/python/deserialize.cc
Original file line number Diff line number Diff line change
Expand Up @@ -39,11 +39,11 @@
#include "arrow/util/parsing.h"

#include "arrow/python/common.h"
#include "arrow/python/datetime.h"
#include "arrow/python/helpers.h"
#include "arrow/python/numpy_convert.h"
#include "arrow/python/pyarrow.h"
#include "arrow/python/serialize.h"
#include "arrow/python/util/datetime.h"

namespace arrow {

Expand Down Expand Up @@ -154,7 +154,7 @@ Status GetValue(PyObject* context, const Array& arr, int64_t index, int8_t type,
*result = PyFloat_FromDouble(checked_cast<const DoubleArray&>(arr).Value(index));
return Status::OK();
case PythonType::DATE64: {
RETURN_NOT_OK(PyDateTime_from_int(
RETURN_NOT_OK(internal::PyDateTime_from_int(
checked_cast<const Date64Array&>(arr).Value(index), TimeUnit::MICRO, result));
RETURN_IF_PYERROR();
return Status::OK();
Expand Down
2 changes: 1 addition & 1 deletion cpp/src/arrow/python/inference.cc
Original file line number Diff line number Diff line change
Expand Up @@ -31,11 +31,11 @@
#include "arrow/util/decimal.h"
#include "arrow/util/logging.h"

#include "arrow/python/datetime.h"
#include "arrow/python/decimal.h"
#include "arrow/python/helpers.h"
#include "arrow/python/iterators.h"
#include "arrow/python/numpy_convert.h"
#include "arrow/python/util/datetime.h"

namespace arrow {
namespace py {
Expand Down
2 changes: 1 addition & 1 deletion cpp/src/arrow/python/numpy_to_arrow.cc
Original file line number Diff line number Diff line change
Expand Up @@ -48,13 +48,13 @@

#include "arrow/python/common.h"
#include "arrow/python/config.h"
#include "arrow/python/datetime.h"
#include "arrow/python/helpers.h"
#include "arrow/python/iterators.h"
#include "arrow/python/numpy_convert.h"
#include "arrow/python/numpy_internal.h"
#include "arrow/python/python_to_arrow.h"
#include "arrow/python/type_traits.h"
#include "arrow/python/util/datetime.h"

namespace arrow {

Expand Down
Loading