diff --git a/upath/_flavour.py b/upath/_flavour.py index e6b41054..ed107c60 100644 --- a/upath/_flavour.py +++ b/upath/_flavour.py @@ -17,6 +17,7 @@ from fsspec.registry import registry as _class_registry from fsspec.spec import AbstractFileSystem +import upath from upath._flavour_sources import FileSystemFlavourBase from upath._flavour_sources import flavour_registry from upath._protocol import get_upath_protocol @@ -239,12 +240,14 @@ def local_file(self) -> bool: def stringify_path(pth: PathOrStr) -> str: if isinstance(pth, str): out = pth + elif isinstance(pth, upath.UPath) and not pth.is_absolute(): + out = str(pth) elif getattr(pth, "__fspath__", None) is not None: assert hasattr(pth, "__fspath__") out = pth.__fspath__() elif isinstance(pth, os.PathLike): out = str(pth) - elif hasattr(pth, "path"): # type: ignore[unreachable] + elif isinstance(pth, upath.UPath) and pth.is_absolute(): out = pth.path else: out = str(pth) @@ -288,7 +291,7 @@ def join(self, path: PathOrStr, *paths: PathOrStr) -> str: if self.local_file: return os.path.join( self.strip_protocol(path), - *paths, # type: ignore[arg-type] + *map(self.stringify_path, paths), ) if self.netloc_is_anchor: drv, p0 = self.splitdrive(path) diff --git a/upath/_protocol.py b/upath/_protocol.py index db1fead1..51588c1d 100644 --- a/upath/_protocol.py +++ b/upath/_protocol.py @@ -105,7 +105,12 @@ def compatible_protocol( *args: str | os.PathLike[str] | PurePath | JoinablePath, ) -> bool: """check if UPath protocols are compatible""" + from upath.core import UPath + for arg in args: + if isinstance(arg, UPath) and not arg.is_absolute(): + # relative UPath are always compatible + continue other_protocol = get_upath_protocol(arg) # consider protocols equivalent if they match up to the first "+" other_protocol = other_protocol.partition("+")[0] diff --git a/upath/core.py b/upath/core.py index 3ab82c38..cc3fd465 100644 --- a/upath/core.py +++ b/upath/core.py @@ -15,6 +15,7 @@ from typing import Any from typing import BinaryIO from typing import Literal +from typing import NoReturn from typing import TextIO from typing import overload from urllib.parse import SplitResult @@ -71,7 +72,12 @@ def _check_fsspec_has_working_glob(): def _make_instance(cls, args, kwargs): """helper for pickling UPath instances""" - return cls(*args, **kwargs) + # Extract _relative_base if present + relative_base = kwargs.pop("_relative_base", None) + instance = cls(*args, **kwargs) + if relative_base is not None: + instance._relative_base = relative_base + return instance def _buffering2blocksize(mode: str, buffering: int) -> int | None: @@ -87,6 +93,11 @@ def _buffering2blocksize(mode: str, buffering: int) -> int | None: return buffering +def _raise_unsupported(cls_name: str, method: str) -> NoReturn: + "relative path does not support method(), because cls_name.cwd() is unsupported" + raise NotImplementedError(f"{cls_name}.{method}() is unsupported") + + class _UPathMeta(ABCMeta): if sys.version_info < (3, 11): # pathlib 3.9 and 3.10 supported `Path[str]` but @@ -170,6 +181,15 @@ def _raw_urlpaths(self) -> Sequence[JoinablePathLike]: def _raw_urlpaths(self, value: Sequence[JoinablePathLike]) -> None: raise NotImplementedError + @property + @abstractmethod + def _relative_base(self) -> str | None: + raise NotImplementedError + + @_relative_base.setter + def _relative_base(self, value: str | None) -> None: + raise NotImplementedError + # === upath.UPath PUBLIC ADDITIONAL API =========================== @property @@ -196,7 +216,23 @@ def fs(self) -> AbstractFileSystem: @property def path(self) -> str: """The path that a fsspec filesystem can use.""" - return self.parser.strip_protocol(self.__str__()) + if self._relative_base is not None: + try: + # For relative paths, we need to resolve to absolute path + current_dir = self.cwd() # type: ignore[attr-defined] + except NotImplementedError: + raise NotImplementedError( + f"fsspec paths can not be relative and" + f" {type(self).__name__}.cwd() is unsupported" + ) from None + # Join the current directory with the relative path + if (self_path := str(self)) == ".": + path = str(current_dir) + else: + path = current_dir.parser.join(str(self), self_path) + else: + path = str(self) + return self.parser.strip_protocol(path) def joinuri(self, uri: JoinablePathLike) -> UPath: """Join with urljoin behavior for UPath instances""" @@ -378,6 +414,7 @@ def __init__( self._chain = Chain.from_list(segments) self._chain_parser = chain_parser self._raw_urlpaths = args + self._relative_base = None # --- deprecated attributes --------------------------------------- @@ -395,6 +432,7 @@ class UPath(_UPathMixin, OpenablePath): "_chain_parser", "_fs_cached", "_raw_urlpaths", + "_relative_base", ) if TYPE_CHECKING: @@ -402,31 +440,65 @@ class UPath(_UPathMixin, OpenablePath): _chain_parser: FSSpecChainParser _fs_cached: bool _raw_urlpaths: Sequence[JoinablePathLike] + _relative_base: str | None # === JoinablePath attributes ===================================== parser: UPathParser = LazyFlavourDescriptor() # type: ignore[assignment] def with_segments(self, *pathsegments: JoinablePathLike) -> Self: - return type(self)( + # we change joinpath behavior if called from a relative path + # this is not fully ideal, but currently the best way to move forward + if is_relative := self._relative_base is not None: + pathsegments = (self._relative_base, *pathsegments) + + new_instance = type(self)( *pathsegments, protocol=self._protocol, **self._storage_options, ) + if is_relative: + new_instance._relative_base = self._relative_base + return new_instance + def __str__(self) -> str: return self.__vfspath__() def __vfspath__(self) -> str: - return self._chain_parser.chain(self._chain.to_list())[0] + if self._relative_base is not None: + active_path = self._chain.active_path + stripped_base = self.parser.strip_protocol( + self._relative_base + ).removesuffix(self.parser.sep) + if not active_path.startswith(stripped_base): + raise RuntimeError( + f"{active_path!r} is not a subpath of {stripped_base!r}" + ) + + return ( + active_path.removeprefix(stripped_base).removeprefix(self.parser.sep) + or "." + ) + else: + return self._chain_parser.chain(self._chain.to_list())[0] def __repr__(self) -> str: + if self._relative_base is not None: + return f"" return f"{type(self).__name__}({self.path!r}, protocol={self._protocol!r})" # === JoinablePath overrides ====================================== @property def parts(self) -> Sequence[str]: + # For relative paths, return parts of the relative path only + if self._relative_base is not None: + rel_str = str(self) + if rel_str == ".": + return () + return tuple(rel_str.split(self.parser.sep)) + split = self.parser.split sep = self.parser.sep @@ -463,13 +535,46 @@ def with_name(self, name) -> Self: @property def anchor(self) -> str: + if self._relative_base is not None: + return "" return self.drive + self.root + @property + def parent(self) -> Self: + if self._relative_base is not None: + if str(self) == ".": + return self + else: + # this needs to be revisited... + pth = type(self)( + self._relative_base, + str(self), + protocol=self._protocol, + **self._storage_options, + ) + parent = pth.parent + parent._relative_base = self._relative_base + return parent + return super().parent + + @property + def parents(self) -> Sequence[Self]: + if self._relative_base is not None: + parents = [] + parent = self + while True: + if str(parent) == ".": + break + parent = parent.parent + parents.append(parent) + return parents + return super().parents + # === ReadablePath attributes ===================================== @property def info(self) -> PathInfo: - raise NotImplementedError("todo") + _raise_unsupported(type(self).__name__, "info") def iterdir(self) -> Iterator[Self]: sep = self.parser.sep @@ -491,7 +596,7 @@ def __open_reader__(self) -> BinaryIO: return self.fs.open(self.path, mode="rb") def readlink(self) -> Self: - raise NotImplementedError + _raise_unsupported(type(self).__name__, "readlink") # --- WritablePath attributes ------------------------------------- @@ -500,7 +605,7 @@ def symlink_to( target: ReadablePathLike, target_is_directory: bool = False, ) -> None: - raise NotImplementedError + _raise_unsupported(type(self).__name__, "symlink_to") def mkdir( self, @@ -623,7 +728,7 @@ def lstat(self) -> UPathStatResult: return self.stat(follow_symlinks=False) def chmod(self, mode: int, *, follow_symlinks: bool = True) -> None: - raise NotImplementedError + _raise_unsupported(type(self).__name__, "chmod") def exists(self, *, follow_symlinks=True) -> bool: return self.fs.exists(self.path) @@ -686,6 +791,8 @@ def glob( UserWarning, stacklevel=2, ) + if self._relative_base is not None: + self = self.absolute() path_pattern = self.joinpath(pattern).path sep = self.parser.sep base = self.fs._strip_protocol(self.path) @@ -739,22 +846,43 @@ def rglob( yield self.joinpath(name) def owner(self) -> str: - raise NotImplementedError + _raise_unsupported(type(self).__name__, "owner") def group(self) -> str: - raise NotImplementedError + _raise_unsupported(type(self).__name__, "group") def absolute(self) -> Self: + if self._relative_base is not None: + return self.cwd().joinpath(str(self)) return self def is_absolute(self) -> bool: - return self.parser.isabs(str(self)) + if self._relative_base is not None: + return False + else: + return self.parser.isabs(str(self)) def __eq__(self, other: object) -> bool: """UPaths are considered equal if their protocol, path and storage_options are equal.""" if not isinstance(other, UPath): return NotImplemented + + # For relative paths, compare the string representation instead of path + if ( + self._relative_base is not None + or getattr(other, "_relative_base", None) is not None + ): + # If both are relative paths, compare just the relative strings + if ( + self._relative_base is not None + and getattr(other, "_relative_base", None) is not None + ): + return str(self) == str(other) + else: + # One is relative, one is not - they can't be equal + return False + return ( self.path == other.path and self.protocol == other.protocol @@ -767,6 +895,8 @@ def __hash__(self) -> int: Note: in the future, if hash collisions become an issue, we can add `fsspec.utils.tokenize(storage_options)` """ + if self._relative_base is not None: + return hash((self.protocol, str(self))) return hash((self.protocol, self.path)) def __lt__(self, other: object) -> bool: @@ -790,6 +920,8 @@ def __ge__(self, other: object) -> bool: return self.path >= other.path def resolve(self, strict: bool = False) -> Self: + if self._relative_base is not None: + self = self.absolute() _parts = self.parts # Do not attempt to normalize path if no parts are dots @@ -820,7 +952,7 @@ def touch(self, mode=0o666, exist_ok=True) -> None: pass # unsupported by filesystem def lchmod(self, mode: int) -> None: - raise NotImplementedError + _raise_unsupported(type(self).__name__, "lchmod") def unlink(self, missing_ok: bool = False) -> None: if not self.exists(): @@ -848,6 +980,8 @@ def rename( target = UPath(target, **self.storage_options) if target == self: return self + if self._relative_base is not None: + self = self.absolute() target_protocol = get_upath_protocol(target) if target_protocol: if target_protocol != self.protocol: @@ -879,14 +1013,18 @@ def rename( return self.with_segments(target_) def replace(self, target: WritablePathLike) -> Self: - raise NotImplementedError # todo + _raise_unsupported(type(self).__name__, "replace") @property def drive(self) -> str: + if self._relative_base is not None: + return "" return self.parser.splitroot(str(self))[0] @property def root(self) -> str: + if self._relative_base is not None: + return "" return self.parser.splitroot(str(self))[1] def __reduce__(self): @@ -895,9 +1033,16 @@ def __reduce__(self): "protocol": self._protocol, **self._storage_options, } + # Include _relative_base in the state if it's set + if self._relative_base is not None: + kwargs["_relative_base"] = self._relative_base return _make_instance, (type(self), args, kwargs) def as_uri(self) -> str: + if self._relative_base is not None: + raise ValueError( + f"relative path can't be expressed as a {self.protocol} URI" + ) return str(self) def as_posix(self) -> str: @@ -912,35 +1057,53 @@ def samefile(self, other_path) -> bool: return st == other_st @classmethod - def cwd(cls) -> UPath: + def cwd(cls) -> Self: if cls is UPath: - return get_upath_class("").cwd() # type: ignore[union-attr] + # default behavior for UPath.cwd() is to return local cwd + return get_upath_class("").cwd() # type: ignore[union-attr,return-value] else: - raise NotImplementedError + _raise_unsupported(cls.__name__, "cwd") @classmethod - def home(cls) -> UPath: + def home(cls) -> Self: if cls is UPath: - return get_upath_class("").home() # type: ignore[union-attr] + return get_upath_class("").home() # type: ignore[union-attr,return-value] else: - raise NotImplementedError + _raise_unsupported(cls.__name__, "home") def relative_to( # type: ignore[override] self, - other, + other: Self | str, /, *_deprecated, - walk_up=False, + walk_up: bool = False, ) -> Self: - if isinstance(other, UPath) and ( - (self.__class__ is not other.__class__) - or (self.storage_options != other.storage_options) - ): - raise ValueError( - "paths have different storage_options:" - f" {self.storage_options!r} != {other.storage_options!r}" - ) - return self # super().relative_to(other, *_deprecated, walk_up=walk_up) + if walk_up: + raise NotImplementedError("walk_up=True is not implemented yet") + + if isinstance(other, UPath): + # revisit: ... + if self.__class__ is not other.__class__: + raise ValueError( + "incompatible protocols:" + f" {self._protocol!r} != {other._protocol!r}" + ) + if self.storage_options != other.storage_options: + raise ValueError( + "incompatible storage_options:" + f" {self.storage_options!r} != {other.storage_options!r}" + ) + elif isinstance(other, str): + other = self.with_segments(other) + else: + raise TypeError(f"expected UPath or str, got {type(other).__name__}") + + if other not in self.parents and self != other: + raise ValueError(f"{self!s} is not in the subpath of {other!s}") + else: + rel = copy(self) + rel._relative_base = str(other) + return rel def is_relative_to(self, other, /, *_deprecated) -> bool: # type: ignore[override] if isinstance(other, UPath) and self.storage_options != other.storage_options: diff --git a/upath/implementations/_experimental.py b/upath/implementations/_experimental.py new file mode 100644 index 00000000..e99a02b7 --- /dev/null +++ b/upath/implementations/_experimental.py @@ -0,0 +1,17 @@ +from __future__ import annotations + +from typing import TYPE_CHECKING + +from upath.registry import get_upath_class + +if TYPE_CHECKING: + from upath import UPath + + +def __getattr__(name: str) -> type[UPath]: + if name.startswith("_") and name.endswith("Path"): + protocol = name[1:-4].lower() + cls = get_upath_class(protocol, fallback=False) + assert cls is not None + return cls + raise AttributeError(f"module {__name__!r} has no attribute {name!r}") diff --git a/upath/implementations/cloud.py b/upath/implementations/cloud.py index fabb7383..3575e1fa 100644 --- a/upath/implementations/cloud.py +++ b/upath/implementations/cloud.py @@ -46,8 +46,18 @@ def _transform_init_args( @property def root(self) -> str: + if self._relative_base is not None: + return "" return self.parser.sep + def __vfspath__(self): + path = super().__vfspath__() + if self._relative_base is None: + drive = self.parser.splitdrive(path)[0] + if drive and path == f"{self.protocol}://{drive}": + return f"{path}{self.root}" + return path + def mkdir( self, mode: int = 0o777, parents: bool = False, exist_ok: bool = False ) -> None: @@ -60,11 +70,6 @@ def iterdir(self) -> Iterator[Self]: raise NotADirectoryError(str(self)) yield from super().iterdir() - def relative_to(self, other, /, *_deprecated, walk_up=False): - # use the parent implementation for the ValueError logic - super().relative_to(other, *_deprecated, walk_up=False) - return self - class GCSPath(CloudPath): __slots__ = () @@ -88,6 +93,13 @@ def mkdir( if "unexpected keyword argument 'create_parents'" in str(err): self.fs.mkdir(self.path) + def exists(self, *, follow_symlinks=True): + # required for gcsfs<2025.5.0, see: https://github.com/fsspec/gcsfs/pull/676 + path = self.path + if len(path) > 1: + path = path.removesuffix(self.root) + return self.fs.exists(path) + class S3Path(CloudPath): __slots__ = () diff --git a/upath/implementations/local.py b/upath/implementations/local.py index 8d71c7be..d11a2656 100644 --- a/upath/implementations/local.py +++ b/upath/implementations/local.py @@ -71,11 +71,13 @@ class LocalPath(_UPathMixin, pathlib.Path): "_chain", "_chain_parser", "_fs_cached", + "_relative_base", ) if TYPE_CHECKING: _chain: Chain _chain_parser: FSSpecChainParser _fs_cached: AbstractFileSystem + _relative_base: str | None parser = os.path # type: ignore[misc,assignment] @@ -163,17 +165,30 @@ def _url(self) -> SplitResult: def joinpath(self, *other) -> Self: if not compatible_protocol("", *other): raise ValueError("can't combine incompatible UPath protocols") - return super().joinpath(*other) + return super().joinpath( + *( + str(o) if isinstance(o, UPath) and not o.is_absolute() else o + for o in other + ) + ) def __truediv__(self, other) -> Self: if not compatible_protocol("", other): raise ValueError("can't combine incompatible UPath protocols") - return super().__truediv__(other) + return super().__truediv__( + str(other) + if isinstance(other, UPath) and not other.is_absolute() + else other + ) def __rtruediv__(self, other) -> Self: if not compatible_protocol("", other): raise ValueError("can't combine incompatible UPath protocols") - return super().__rtruediv__(other) + return super().__rtruediv__( + str(other) + if isinstance(other, UPath) and not other.is_absolute() + else other + ) UPath.register(LocalPath) @@ -230,5 +245,13 @@ def iterdir(self) -> Iterator[Self]: def _url(self) -> SplitResult: return SplitResult._make((self.protocol, "", self.path, "", "")) + @classmethod + def cwd(cls) -> Self: + return cls(os.getcwd(), protocol="file") + + @classmethod + def home(cls) -> Self: + return cls(os.path.expanduser("~"), protocol="file") + LocalPath.register(FilePath) diff --git a/upath/registry.py b/upath/registry.py index 4ef5a7f3..6d0571a7 100644 --- a/upath/registry.py +++ b/upath/registry.py @@ -215,7 +215,7 @@ def get_upath_class( if not fallback: return None try: - _ = get_filesystem_class(protocol) + get_filesystem_class(protocol) except ValueError: return None # this is an unknown protocol else: @@ -226,4 +226,13 @@ def get_upath_class( UserWarning, stacklevel=2, ) - return upath.UPath + import upath.implementations._experimental as upath_experimental + + cls_name = f"_{protocol.title()}Path" + cls = type( + cls_name, + (upath.UPath,), + {"__module__": "upath.implementations._experimental"}, + ) + setattr(upath_experimental, cls_name, cls) + return cls diff --git a/upath/tests/implementations/test_local.py b/upath/tests/implementations/test_local.py index e3f59d48..cb446bcb 100644 --- a/upath/tests/implementations/test_local.py +++ b/upath/tests/implementations/test_local.py @@ -1,3 +1,5 @@ +from pathlib import Path + import pytest from upath import UPath @@ -15,6 +17,16 @@ def path(self, local_testdir): def test_is_LocalPath(self): assert isinstance(self.path, LocalPath) + def test_cwd(self): + cwd = type(self.path).cwd() + assert isinstance(cwd, LocalPath) + assert cwd.path == Path.cwd().as_posix() + + def test_home(self): + cwd = type(self.path).home() + assert isinstance(cwd, LocalPath) + assert cwd.path == Path.home().as_posix() + @xfail_if_version("fsspec", lt="2023.10.0", reason="requires fsspec>=2023.10.0") class TestRayIOFSSpecLocal(BaseTests): @@ -25,3 +37,13 @@ def path(self, local_testdir): def test_is_LocalPath(self): assert isinstance(self.path, LocalPath) + + def test_cwd(self): + cwd = type(self.path).cwd() + assert isinstance(cwd, LocalPath) + assert cwd.path == Path.cwd().as_posix() + + def test_home(self): + cwd = type(self.path).home() + assert isinstance(cwd, LocalPath) + assert cwd.path == Path.home().as_posix() diff --git a/upath/tests/implementations/test_s3.py b/upath/tests/implementations/test_s3.py index c18f089a..2f8ac432 100644 --- a/upath/tests/implementations/test_s3.py +++ b/upath/tests/implementations/test_s3.py @@ -52,7 +52,7 @@ def test_rmdir(self): self.path.joinpath("file1.txt").rmdir() def test_relative_to(self): - assert "s3://test_bucket/file.txt" == str( + assert "file.txt" == str( UPath("s3://test_bucket/file.txt").relative_to(UPath("s3://test_bucket")) ) @@ -78,7 +78,7 @@ def test_no_bucket_joinpath(self, joiner): def test_creating_s3path_with_bucket(self): path = UPath("s3://", bucket="bucket", anon=self.anon, **self.s3so) - assert str(path) == "s3://bucket" + assert str(path) == "s3://bucket/" def test_iterdir_with_plus_in_name(self, s3_with_plus_chr_name): bucket, anon, s3so = s3_with_plus_chr_name diff --git a/upath/tests/test_core.py b/upath/tests/test_core.py index 02183480..4033e616 100644 --- a/upath/tests/test_core.py +++ b/upath/tests/test_core.py @@ -64,16 +64,18 @@ def test_fsspec_compat(self): pass def test_cwd(self): - pth = type(self.path).cwd() - assert str(pth) == os.getcwd() - assert isinstance(pth, pathlib.Path) - assert isinstance(pth, UPath) + with pytest.raises( + NotImplementedError, + match=r".+Path[.]cwd\(\) is unsupported", + ): + type(self.path).cwd() def test_home(self): - pth = type(self.path).home() - assert str(pth) == os.path.expanduser("~") - assert isinstance(pth, pathlib.Path) - assert isinstance(pth, UPath) + with pytest.raises( + NotImplementedError, + match=r".+Path[.]home\(\) is unsupported", + ): + type(self.path).home() @xfail_if_version("fsspec", reason="", ge="2024.2.0") def test_iterdir_no_dir(self): @@ -340,7 +342,7 @@ def test_copy_path_append_kwargs(): def test_relative_to(): - assert "s3://test_bucket/file.txt" == str( + assert "file.txt" == str( UPath("s3://test_bucket/file.txt").relative_to(UPath("s3://test_bucket")) ) diff --git a/upath/tests/test_relative.py b/upath/tests/test_relative.py new file mode 100644 index 00000000..fef84a22 --- /dev/null +++ b/upath/tests/test_relative.py @@ -0,0 +1,694 @@ +"""Tests for relative path functionality.""" + +import os +import pickle +import re +import tempfile +from pathlib import Path + +import pytest + +from upath import UPath + + +@pytest.mark.parametrize( + "protocol,storage_options,path,base", + [ + ("memory", {}, "memory:///foo/bar/baz.txt", "memory:///foo"), + ("s3", {"anon": True}, "s3://bucket/foo/bar/baz.txt", "s3://bucket/foo"), + ("gcs", {"token": "anon"}, "gcs://bucket/foo/bar/baz.txt", "gcs://bucket/foo"), + ("http", {"something": 1}, "http://host/foo/bar/baz.txt", "http://host/foo"), + ( + "https", + {}, + "https://host/foo/bar/baz.txt", + "https://host/", + ), + ], +) +def test_protocol_storage_options_fs_preserved(protocol, storage_options, path, base): + """Test that protocol and storage_options are preserved in relative paths.""" + p = UPath(path, protocol=protocol, **storage_options) + root = UPath(base, protocol=protocol, **storage_options) + rel = p.relative_to(root) + + assert rel.protocol == protocol + assert dict(**rel.storage_options) == storage_options + assert isinstance(rel.fs, type(p.fs)) + + +@pytest.mark.parametrize( + "protocol,path,base", + [ + ("s3", "s3://bucket/foo/bar/baz.txt", "s3://bucket/foo"), + ("gcs", "gcs://bucket/foo/bar/baz.txt", "gcs://bucket/foo"), + ("ftp", "ftp://user:pass@host/foo/bar/baz.txt", "ftp://user:pass@host/foo"), + ("http", "http://host/foo/bar/baz.txt", "http://host/foo"), + ("https", "https://host/foo/bar/baz.txt", "https://host/foo"), + ("memory", "memory:///foo/bar/baz.txt", "memory:///foo"), + ], +) +def test_relative_urlpath_raises_without_cwd(protocol, path, base): + rel = UPath(path, protocol=protocol).relative_to(UPath(base, protocol=protocol)) + with pytest.raises( + NotImplementedError, + match=re.escape(f"{type(rel).__name__}.cwd() is unsupported"), + ): + rel.cwd() + with pytest.raises( + NotImplementedError, + match=re.escape( + f"fsspec paths can not be relative and" + f" {type(rel).__name__}.cwd() is unsupported" + ), + ): + _ = rel.path + + +@pytest.mark.parametrize( + "pth,base,rel", + [ + ("/foo/bar/baz.txt", "/foo", "bar/baz.txt"), + ("/foo/bar/baz/qux.txt", "/foo/bar", "baz/qux.txt"), + ("/foo/bar/baz/qux.txt", "/foo/bar/baz", "qux.txt"), + ("/foo/bar/baz", "/foo/bar/baz", "."), + ], +) +@pytest.mark.parametrize( + "protocol", + [ + "memory", + "file", + "", + ], +) +def test_basic_relative_path_creation(protocol, pth, base, rel): + rel_pth = UPath(pth, protocol=protocol).relative_to(UPath(base, protocol=protocol)) + + assert not rel_pth.is_absolute() + assert rel_pth.as_posix() == rel + + +def test_relative_path_validation(): + """Test validation of relative_to arguments.""" + p = UPath("memory:///foo/bar") + + # Different protocols should fail + with pytest.raises(ValueError, match="incompatible protocols"): + p.relative_to(UPath("s3://bucket")) + + # Different storage options should fail + with pytest.raises(ValueError, match="incompatible storage_options"): + UPath("s3://bucket/file", anon=True).relative_to( + UPath("s3://bucket", anon=False) + ) + + +def test_path_not_in_subpath(): + """Test relative_to with paths that don't have a parent-child relationship.""" + p = UPath("memory:///foo/bar") + other = UPath("memory:///baz") + + with pytest.raises(ValueError, match="is not in the subpath of"): + p.relative_to(other) + + +def test_filesystem_operations_fail_without_cwd(): + """Test that filesystem operations fail on relative paths when cwd()""" + p = UPath("memory:///foo/bar/baz.txt") + root = UPath("memory:///foo") + rel = p.relative_to(root) + + # Memory filesystem doesn't implement cwd(), so these should fail + with pytest.raises( + NotImplementedError, + match=re.escape( + "fsspec paths can not be relative and MemoryPath.cwd() is unsupported" + ), + ): + _ = rel.path + + with pytest.raises( + NotImplementedError, + match=re.escape( + "fsspec paths can not be relative and MemoryPath.cwd() is unsupported" + ), + ): + rel.exists() + + +def test_filesystem_operations_work_with_cwd(): + """Test that filesystem operations work on relative paths when cwd()""" + with tempfile.TemporaryDirectory() as tmpdir: + # Create test file structure + test_dir = os.path.join(tmpdir, "testdir") + os.makedirs(test_dir, exist_ok=True) + test_file = os.path.join(test_dir, "testfile.txt") + with open(test_file, "w") as f: + f.write("test content") + + # Create paths + abs_path = UPath(test_file) + abs_dir = UPath(test_dir) + rel_path = abs_path.relative_to(abs_dir) + + assert not rel_path.is_absolute() + assert str(rel_path) == "testfile.txt" + + # Change to the test directory and try filesystem operations + old_cwd = os.getcwd() + try: + os.chdir(test_dir) + + # These should work now since we're in the right directory + full_path = rel_path.path + assert "testfile.txt" in full_path + + # Test that the file exists + assert rel_path.exists() + + finally: + os.chdir(old_cwd) + + +def test_pickling_relative_paths(): + """Test that relative paths can be pickled and unpickled.""" + p = UPath("memory:///foo/bar/baz.txt") + root = UPath("memory:///foo") + rel = p.relative_to(root) + + # Pickle and unpickle + pickled = pickle.dumps(rel) + unpickled = pickle.loads(pickled) + + assert str(rel) == str(unpickled) + assert rel.is_absolute() == unpickled.is_absolute() + assert rel._relative_base == unpickled._relative_base + + +def test_with_segments_preserves_relative_state(): + """Test that with_segments preserves the relative state.""" + p = UPath("memory:///foo/bar/baz.txt") + root = UPath("memory:///foo") + rel = p.relative_to(root) + + # Create new path with different segments + new_rel = rel.with_segments("memory:///foo/other/file.txt") + + # Should still be relative with same root + assert not new_rel.is_absolute() + assert new_rel._relative_base == rel._relative_base + + +def test_relative_path_parts(): + """Test that parts work correctly for relative paths.""" + p = UPath("memory:///foo/bar/baz/qux.txt") + root = UPath("memory:///foo") + rel = p.relative_to(root) + + assert p.parts == root.parts + rel.parts + + +def test_absolute_method_behavior(): + """Test that absolute() returns the original absolute path.""" + p = UPath("memory:///foo/bar/baz.txt") + root = UPath("memory:///foo") + rel = p.relative_to(root) + + with pytest.raises( + NotImplementedError, + match=re.escape("MemoryPath.cwd() is unsupported"), + ): + rel.absolute() + + +def test_is_absolute_method(): + """Test is_absolute() method on relative paths.""" + p = UPath("memory:///foo/bar/baz.txt") + root = UPath("memory:///foo") + rel = p.relative_to(root) + + assert not rel.is_absolute() + + +def test_relative_path_comparison(): + """Test that relative paths can be compared.""" + p1 = UPath("memory:///foo/bar/baz.txt") + p2 = UPath("memory:///foo/bar/qux.txt") + root = UPath("memory:///foo") + + rel1 = p1.relative_to(root) + rel2 = p2.relative_to(root) + + # Compare string representations since .path requires cwd() for memory:// + assert str(rel1) != str(rel2) + assert rel1 != rel2 + + # Same relative path should be equal + rel1_copy = p1.relative_to(root) + assert str(rel1) == str(rel1_copy) + assert rel1 == rel1_copy + + # Same relative path from different base should be equal + rel3 = UPath("memory:///a/b/c.txt").relative_to(UPath("memory:///a")) + rel4 = UPath("file:///x/b/c.txt").relative_to(UPath("file:///x")) + + assert str(rel3) == str(rel4) + assert rel3 == rel4 + + +def test_nonrelative_path_is_absolute(): + """Test that normal (non-relative) paths return True for is_absolute().""" + p = UPath("memory:///foo/bar/baz.txt") + assert p.is_absolute() + + +def test_s3_relative_paths(): + """Test relative paths work with S3 URLs.""" + p = UPath("s3://test_bucket/dir/file.txt") + root = UPath("s3://test_bucket") + rel = p.relative_to(root) + + assert not rel.is_absolute() + assert str(rel) == "dir/file.txt" + + +@pytest.fixture +def rel_path(): + p = UPath("memory:///foo/bar/baz.txt") + root = UPath("memory:///foo") + yield p.relative_to(root) + + +def test_relative_path_as_uri(rel_path): + with pytest.raises( + ValueError, + match=f"relative path can't be expressed as a {rel_path.protocol} URI", + ): + rel_path.as_uri() + + +@pytest.mark.parametrize( + "method_args", + [ + pytest.param(("absolute", ()), id="absolute"), + pytest.param(("chmod", (0o777,)), id="chmod"), + pytest.param(("cwd", ()), id="cwd"), + pytest.param(("exists", ()), id="exists"), + pytest.param(("glob", ("*.txt",)), id="glob"), + pytest.param(("group", ()), id="group"), + pytest.param(("is_dir", ()), id="is_dir"), + pytest.param(("is_file", ()), id="is_file"), + pytest.param(("is_symlink", ()), id="is_symlink"), + pytest.param(("iterdir", ()), id="iterdir"), + pytest.param(("lchmod", (0o777,)), id="lchmod"), + pytest.param(("lstat", ()), id="lstat"), + pytest.param(("mkdir", ()), id="mkdir"), + pytest.param(("open", ()), id="open"), + pytest.param(("owner", ()), id="owner"), + pytest.param(("read_bytes", ()), id="read_bytes"), + pytest.param(("read_text", ()), id="read_text"), + pytest.param(("readlink", ()), id="readlink"), + pytest.param(("rename", ("a/b/c",)), id="rename"), + pytest.param(("replace", ("...",)), id="replace"), + pytest.param(("resolve", ()), id="resolve"), + pytest.param(("rglob", ("*.txt",)), id="rglob"), + pytest.param(("rmdir", ()), id="rmdir"), + pytest.param(("samefile", ("...",)), id="samefile"), + pytest.param(("stat", ()), id="stat"), + pytest.param(("symlink_to", ("...",)), id="symlink_to"), + pytest.param(("touch", ()), id="touch"), + pytest.param(("unlink", ()), id="unlink"), + pytest.param(("write_bytes", (b"data",)), id="write_bytes"), + pytest.param(("write_text", ("data",)), id="write_text"), + ], +) +def test_path_operations_disabled_without_cwd(rel_path, method_args): + """UPaths without .cwd() implementation should not allow path operations.""" + method, args = method_args + + with pytest.raises(NotImplementedError): + # next only needs to be called for iterdir and glob/rglob + # but the other raise already in the getattr call + next(getattr(rel_path, method)(*args)) + + +@pytest.mark.parametrize( + "protocol,path,base", + [ + ("", "/foo/bar/baz.txt", "/foo"), + ("file", "/foo/bar/baz.txt", "/foo"), + ("s3", "s3://bucket/foo/bar/baz.txt", "s3://bucket/foo"), + ("gcs", "gcs://bucket/foo/bar/baz.txt", "gcs://bucket/foo"), + ("ftp", "ftp://user:pass@host/foo/bar/baz.txt", "ftp://user:pass@host/foo"), + ("http", "http://host/foo/bar/baz.txt", "http://host/foo"), + ("https", "https://host/foo/bar/baz.txt", "https://host/foo"), + ("memory", "memory:///foo/bar/baz.txt", "memory:///foo"), + ], +) +def test_drive_root_anchor_empty_for_relative_paths(protocol, path, base): + rel = UPath(path, protocol=protocol).relative_to(UPath(base, protocol=protocol)) + assert (rel.drive, rel.root, rel.anchor) == ("", "", "") + + +@pytest.mark.parametrize( + "protocol,path,base,expected_rel", + [ + ("", "/foo/bar/baz.txt", "/foo", "bar/baz.txt"), + ("file", "/foo/bar/baz.txt", "/foo", "bar/baz.txt"), + ("s3", "s3://bucket/foo/bar/baz.txt", "s3://bucket/foo", "bar/baz.txt"), + ("gcs", "gcs://bucket/foo/bar/baz.txt", "gcs://bucket/foo", "bar/baz.txt"), + ( + "ftp", + "ftp://user:pass@host/foo/bar/baz.txt", + "ftp://user:pass@host/foo", + "bar/baz.txt", + ), + ("http", "http://host/foo/bar/baz.txt", "http://host/foo", "bar/baz.txt"), + ("https", "https://host/foo/bar/baz.txt", "https://host/foo", "bar/baz.txt"), + ("memory", "memory:///foo/bar/baz.txt", "memory:///foo", "bar/baz.txt"), + ], +) +def test_relative_path_properties(protocol, path, base, expected_rel): + rel = UPath(path, protocol=protocol).relative_to(UPath(base, protocol=protocol)) + + assert not rel.is_absolute() + assert rel.as_posix() == expected_rel + assert rel.parts == tuple(expected_rel.split("/")) + + +@pytest.mark.parametrize( + "protocol,path,base,expected_parts", + [ + ("", "/foo/bar/baz.txt", "/foo", ("bar", "baz.txt")), + ("file", "/foo/bar/baz.txt", "/foo", ("bar", "baz.txt")), + ("s3", "s3://bucket/foo/bar/baz.txt", "s3://bucket/foo", ("bar", "baz.txt")), + ("gcs", "gcs://bucket/foo/bar/baz.txt", "gcs://bucket/foo", ("bar", "baz.txt")), + ( + "ftp", + "ftp://user:pass@host/foo/bar/baz.txt", + "ftp://user:pass@host/foo", + ("bar", "baz.txt"), + ), + ("http", "http://host/foo/bar/baz.txt", "http://host/foo", ("bar", "baz.txt")), + ( + "https", + "https://host/foo/bar/baz.txt", + "https://host/foo", + ("bar", "baz.txt"), + ), + ("memory", "memory:///foo/bar/baz.txt", "memory:///foo", ("bar", "baz.txt")), + ], +) +def test_relative_path_parts_property(protocol, path, base, expected_parts): + rel = UPath(path, protocol=protocol).relative_to(UPath(base, protocol=protocol)) + assert rel.parts == expected_parts + + +def test_relative_path_is_something(rel_path): + assert rel_path.is_block_device() is False + assert rel_path.is_char_device() is False + assert rel_path.is_fifo() is False + assert rel_path.is_mount() is False + assert rel_path.is_reserved() is False + assert rel_path.is_socket() is False + + +def test_relative_path_hashable(): + x = UPath("memory:///a/b/c.txt") + y = x.relative_to(UPath("memory:///a")) + assert hash(y) != hash(x) + + +def test_relative_path_expanduser_noop(rel_path): + # this should be revisited if we ever add ~ support to non-file protocols + assert rel_path == rel_path.expanduser() + + +def test_relative_path_stem_suffix_name(rel_path): + assert rel_path.name == "baz.txt" + assert rel_path.stem == "baz" + assert rel_path.suffix == ".txt" + assert rel_path.suffixes == [".txt"] + assert rel_path.with_name("other.txt").name == "other.txt" + assert rel_path.with_stem("other").name == "other.txt" + assert rel_path.with_suffix(".md").name == "baz.md" + assert rel_path.with_suffix(".tar.gz").suffixes == [".tar", ".gz"] + + +@pytest.mark.parametrize( + "protocol,pth,base,expected_parent", + [ + ("", "/foo/bar/baz.txt", "/foo", "bar"), + ("", "/foo", "/foo", "."), + ("file", "/foo/bar/baz.txt", "/foo", "bar"), + ("file", "/foo/bar", "/foo/bar", "."), + ("s3", "s3://bucket/foo/bar/baz.txt", "s3://bucket/foo", "bar"), + ("s3", "s3://bucket/foo/bar/", "s3://bucket/foo/bar", "."), + ("gcs", "gcs://bucket/foo/bar/baz.txt", "gcs://bucket/foo", "bar"), + ("gcs", "gcs://bucket/foo/bar/", "gcs://bucket/foo", "."), + ("memory", "memory:///foo/bar/baz.txt", "memory:///foo", "bar"), + ("memory", "memory:///foo/bar", "memory:///foo", "."), + ("https", "https://host/foo/bar/baz.txt", "https://host/foo", "bar"), + ("https", "https://host/foo/bar/", "https://host/foo/bar", "."), + ], +) +def test_relative_path_parent(protocol, pth, base, expected_parent): + rel = UPath(pth, protocol=protocol).relative_to(UPath(base, protocol=protocol)) + assert str(rel.parent) == expected_parent + + +@pytest.mark.parametrize( + "uri,base,expected_parents_parts", + [ + ("/foo/bar/baz/qux.txt", "/foo", [("bar", "baz"), ("bar",), ()]), + ("file:///foo/bar/baz/qux.txt", "file:///foo", [("bar", "baz"), ("bar",), ()]), + ("s3://bucket/foo/bar/baz/", "s3://bucket/", [("foo", "bar"), ("foo",), ()]), + ("gcs://bucket/foo/bar/baz", "gcs://bucket/", [("foo", "bar"), ("foo",), ()]), + ("az://bucket/foo/bar/baz", "az://bucket/", [("foo", "bar"), ("foo",), ()]), + ( + "memory:///foo/bar/baz/qux.txt", + "memory:///foo", + [("bar", "baz"), ("bar",), ()], + ), + ( + "https://host.com/foo/bar/baz/qux.txt", + "https://host.com/foo", + [("bar", "baz"), ("bar",), ()], + ), + ], +) +def test_relative_path_parents(uri, base, expected_parents_parts): + rel = UPath(uri).relative_to(UPath(base)) + parents = list(rel.parents) + assert [x.parts for x in parents] == expected_parents_parts + + +@pytest.mark.parametrize( + "protocol,pth,base", + [ + ("", "/foo/bar/baz.txt", "/foo"), + ("file", "/foo/bar/baz.txt", "/foo"), + ], +) +def test_home_works_for_local_paths(protocol, pth, base): + rel = UPath(pth, protocol=protocol).relative_to(UPath(base, protocol=protocol)) + prefix = f"{protocol}://" if protocol else "" + assert rel.home().as_posix() == prefix + UPath.home().as_posix() + + +@pytest.mark.parametrize( + "protocol,pth,base", + [ + ("s3", "s3://bucket/foo/bar/baz.txt", "s3://bucket/foo"), + ("gcs", "gcs://bucket/foo/bar/baz.txt", "gcs://bucket/foo"), + ("memory", "memory:///foo/bar/baz.txt", "memory:///foo"), + ("https", "https://host/foo/bar/baz.txt", "https://host/foo"), + ], +) +def test_home_raises_for_non_local_paths(protocol, pth, base): + rel = UPath(pth, protocol=protocol).relative_to(UPath(base, protocol=protocol)) + with pytest.raises( + NotImplementedError, + match=re.escape(f"{type(rel).__name__}.home() is unsupported"), + ): + rel.home() + + +@pytest.mark.parametrize( + "protocol,pth,base", + [ + ("", "/foo/bar/baz.txt", "/foo"), + ("file", "/foo/bar/baz.txt", "/foo"), + ("s3", "s3://bucket/foo/bar/baz.txt", "s3://bucket/foo"), + ("gcs", "gcs://bucket/foo/bar/baz.txt", "gcs://bucket/foo"), + ("memory", "memory:///foo/bar/baz.txt", "memory:///foo"), + ("https", "https://host/foo/bar/baz.txt", "https://host/foo"), + ], +) +def test_parser_attribute_available(protocol, pth, base): + rel_path = UPath(pth, protocol=protocol).relative_to(UPath(base, protocol=protocol)) + assert rel_path.parser is not None + + +@pytest.mark.parametrize( + "protocol", + [ + "", + "file", + ], +) +def test_relpath_path_resolve(tmp_path, protocol, monkeypatch): + """This should work for all path types that support .cwd()""" + base = UPath(tmp_path, protocol=protocol) + (base / "a" / "b").mkdir(parents=True) + (base / "a" / "b" / "file.txt").write_text("data") + monkeypatch.chdir(base) + + rel = UPath("/xyz/a/b/c/d/../../file.txt", protocol=protocol).relative_to( + UPath("/xyz", protocol=protocol) + ) + + assert rel.as_posix() == "a/b/c/d/../../file.txt" + + resolved = rel.resolve() + prefix = f"{protocol}://" if protocol else "" + assert ( + resolved.as_posix() == prefix + (tmp_path / "a" / "b" / "file.txt").as_posix() + ) + assert resolved.read_text() == "data" + assert resolved.is_absolute() + assert resolved.exists() + + +@pytest.mark.parametrize( + "protocol,path,base", + [ + ("", "/foo/bar/baz/qux.txt", "/foo"), + ("file", "/foo/bar/baz/qux.txt", "/foo"), + ("s3", "s3://bucket/foo/bar/baz/qux.txt", "s3://bucket/foo"), + ("gcs", "gcs://bucket/foo/bar/baz/qux.txt", "gcs://bucket/foo"), + ("memory", "memory:///foo/bar/baz/qux.txt", "memory:///foo"), + ("https", "https://host/foo/bar/baz/qux.txt", "https://host/foo"), + ], +) +def test_relative_path_match(protocol, path, base): + """Test that match works correctly for relative paths.""" + rel = UPath(path, protocol=protocol).relative_to(UPath(base, protocol=protocol)) + + assert rel.as_posix() == "bar/baz/qux.txt" + + # Should match patterns that match the relative path + assert rel.match("bar/baz/qux.txt") + assert rel.match("*/baz/qux.txt") + assert rel.match("bar/*/qux.txt") + assert rel.match("*/**/*.txt") # ** acts like * + + # Should not match patterns that don't match + assert not rel.match("foo/baz/qux.txt") + assert not rel.match("*.py") + assert not rel.match("other.txt") + + +@pytest.mark.parametrize( + "protocol,path,base", + [ + ("", "/foo/bar/baz/qux.txt", "/foo"), + ("file", "/foo/bar/baz/qux.txt", "/foo"), + ("s3", "s3://bucket/foo/bar/baz/qux.txt", "s3://bucket/foo"), + ("gcs", "gcs://bucket/foo/bar/baz/qux.txt", "gcs://bucket/foo"), + ("memory", "memory:///foo/bar/baz/qux.txt", "memory:///foo"), + ("https", "https://host/foo/bar/baz/qux.txt", "https://host/foo"), + ], +) +def test_relative_path_joinpath(protocol, path, base): + """Test that joinpath works correctly for relative paths.""" + rel = UPath(path, protocol=protocol).relative_to(UPath(base, protocol=protocol)) + + # Test joining with a single segment + assert rel.as_posix() == "bar/baz/qux.txt" + joined = rel.joinpath("extra.txt") + assert joined.as_posix() == "bar/baz/qux.txt/extra.txt" + assert not joined.is_absolute() + + # Test joining with multiple segments + joined_multi = rel.joinpath("dir", "file.py") + assert joined_multi.as_posix() == "bar/baz/qux.txt/dir/file.py" + assert not joined_multi.is_absolute() + + # Test that the result is still relative with same base + assert joined.protocol == joined_multi.protocol == protocol + assert joined.storage_options == joined_multi.storage_options == rel.storage_options + + +@pytest.mark.parametrize( + "protocol,path,base", + [ + ("", "/foo/bar/baz/qux.txt", "/foo"), + ("file", "/foo/bar/baz/qux.txt", "/foo"), + ("s3", "s3://bucket/foo/bar/baz/qux.txt", "s3://bucket/foo"), + ("gcs", "gcs://bucket/foo/bar/baz/qux.txt", "gcs://bucket/foo"), + ("memory", "memory:///foo/bar/baz/qux.txt", "memory:///foo"), + ("https", "https://host/foo/bar/baz/qux.txt", "https://host/foo"), + ], +) +def test_join_local_absolute_path_to_relative(protocol, path, base, tmp_path): + """Test that joining an absolute path to a relative path works correctly.""" + rel = UPath(path, protocol=protocol).relative_to(base) + + assert rel.as_posix() == "bar/baz/qux.txt" + tmp_path.joinpath("bar/baz/qux.txt").parent.mkdir(parents=True, exist_ok=True) + tmp_path.joinpath("bar/baz/qux.txt").write_text("data") + + assert UPath(tmp_path).joinpath(rel).read_text() == "data" + + +@pytest.mark.parametrize( + "protocol,path", + [ + ("", "/foo/bar"), + ("file", "/foo/bar"), + ("s3", "s3://bucket/foo/bar"), + ("gcs", "gcs://bucket/foo/bar"), + ("memory", "memory:///foo/bar"), + ("https", "https://host/foo/bar"), + ], +) +def test_join_fsspec_absolute_path_to_relative(protocol, path): + p = UPath(path, protocol=protocol) + + x = p.joinpath(Path("a/b/c").as_posix()) + assert x.path.endswith("foo/bar/a/b/c") + + +@pytest.mark.parametrize( + "proto0,path0", + [ + ("", "/foo/bar"), + ("file", "/foo/bar"), + ("s3", "s3://bucket/foo/bar"), + ("gcs", "gcs://bucket/foo/bar"), + ("memory", "memory:///foo/bar"), + ("https", "https://host/foo/bar"), + ], +) +@pytest.mark.parametrize( + "proto1,path1,base1", + [ + ("", "/foo/bar", "/foo"), + ("file", "/foo/bar", "/foo"), + ("s3", "s3://bucket/foo/bar", "s3://bucket/foo"), + ("gcs", "gcs://bucket/foo/bar", "gcs://bucket/foo"), + ("memory", "memory:///foo/bar", "memory:///foo"), + ("https", "https://host/foo/bar", "https://host/foo"), + ], +) +def test_join_fsspec_absolute_path_to_fsspec_relative( + proto0, path0, proto1, path1, base1 +): + p0 = UPath(path0, protocol=proto0) + p1 = UPath(path1, protocol=proto1).relative_to(base1) + assert str(p1) == "bar" + + x = p0.joinpath(p1) + assert x.path.endswith("foo/bar/bar")