Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion dvc/fs/base.py
Original file line number Diff line number Diff line change
Expand Up @@ -170,7 +170,7 @@ def walk_files(self, path_info, **kwargs):
def ls(self, path_info, detail=False):
raise RemoteActionNotImplemented("ls", self.scheme)

def find(self, path_info, detail=False):
def find(self, path_info, detail=False, prefix=None):
raise RemoteActionNotImplemented("find", self.scheme)

def is_empty(self, path_info):
Expand Down
20 changes: 15 additions & 5 deletions dvc/fs/fsspec_wrapper.py
Original file line number Diff line number Diff line change
Expand Up @@ -96,7 +96,8 @@ def ls(self, path_info, detail=False):
files = self.fs.ls(path, detail=detail)
yield from self._strip_buckets(files, detail=detail)

def find(self, path_info, detail=False):
# pylint: disable=unused-argument
def find(self, path_info, detail=False, prefix=None):
Copy link
Copy Markdown
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I don't really know if there is a scenario where the prefix will not be None for this case (TRAVERSE_PREFIX_LEN=2, used by filesystems with real dirs etc) but if there is no such case, then perhaps we could add an assert prefix is None as a safe-guard.

path = self._with_bucket(path_info)
files = self.fs.find(path, detail=detail)
if detail:
Expand All @@ -105,7 +106,7 @@ def find(self, path_info, detail=False):
yield from self._strip_buckets(files, detail=detail)

def walk_files(self, path_info, **kwargs):
for file in self.find(path_info):
for file in self.find(path_info, **kwargs):
yield path_info.replace(path=file)

def remove(self, path_info):
Expand Down Expand Up @@ -155,6 +156,8 @@ def _download(

# pylint: disable=abstract-method
class ObjectFSWrapper(FSSpecWrapper):
TRAVERSE_PREFIX_LEN = 3

def _isdir(self, path_info):
# Directory in object storages are interpreted differently
# among different fsspec providers, so this logic is a temporary
Expand All @@ -169,9 +172,16 @@ def _isdir(self, path_info):
and entry["name"].endswith("/")
)

def find(self, path_info, detail=False):
path = self._with_bucket(path_info)
files = self.fs.find(path, detail=detail)
def find(self, path_info, detail=False, prefix=None):
Copy link
Copy Markdown
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

It might be a nice idea (for future refactors of remotes) to pass the prefix in here (0/), and use the actual path (bucket/cache/00/) as the path_info (to be fully fsspec-compliant).

Copy link
Copy Markdown
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Yeah, it would make more sense for us to do

walk("bucket/00/", prefix="0")

instead of the current

walk("bucket/00/0", prefix=True)

if prefix is not None:
path = self._with_bucket(path_info.parent)
files = self.fs.find(
path, detail=detail, prefix=path_info.parts[-1]
)
else:
path = self._with_bucket(path_info)
files = self.fs.find(path, detail=detail)

if detail:
files = files.values()

Expand Down
2 changes: 1 addition & 1 deletion dvc/fs/gdrive.py
Original file line number Diff line number Diff line change
Expand Up @@ -538,7 +538,7 @@ def _gdrive_list_ids(self, query_ids):
query = f"({query}) and trashed=false"
return self._gdrive_list(query)

def find(self, path_info, detail=False):
def find(self, path_info, detail=False, prefix=None):
root_path = path_info.path
seen_paths = set()

Expand Down