From 19e4387cf9b2bd59fa2e2b7b6f4053287bfab23f Mon Sep 17 00:00:00 2001 From: atollk Date: Thu, 5 Nov 2020 08:45:09 +0100 Subject: [PATCH 1/9] Issue #438: Extended parsing compatibility of the FTP `LIST` command for Windows servers. The parsing process now properly supports 24-hour time format, both with and without leading zeros. --- CHANGELOG.md | 2 ++ CONTRIBUTORS.md | 1 + fs/_ftp_parse.py | 35 +++++++++++++++++++++++------- tests/test_ftp_parse.py | 48 +++++++++++++++++++++++++---------------- 4 files changed, 60 insertions(+), 26 deletions(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index 3b965c9c..9e0a7680 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -12,6 +12,8 @@ and this project adheres to [Semantic Versioning](http://semver.org/). - Missing `mode` attribute to `_MemoryFile` objects returned by `MemoryFS.openbin`. - Missing `readinto` method for `MemoryFS` and `FTPFS` file objects. Closes [#380](https://github.com/PyFilesystem/pyfilesystem2/issues/380). +- Added compatibility if a Windows FTP server returns file information to the + `LIST` command with 24-hour times. Closes [#438](https://github.com/PyFilesystem/pyfilesystem2/issues/438). ### Changed diff --git a/CONTRIBUTORS.md b/CONTRIBUTORS.md index ba15fd73..31d5da55 100644 --- a/CONTRIBUTORS.md +++ b/CONTRIBUTORS.md @@ -13,3 +13,4 @@ Many thanks to the following developers for contributing to this project: - [Will McGugan](https://github.com/willmcgugan) - [Zmej Serow](https://github.com/zmej-serow) - [Morten Engelhardt Olsen](https://github.com/xoriath) +- [Andreas Tollkötter](https://github.com/atollk) diff --git a/fs/_ftp_parse.py b/fs/_ftp_parse.py index b503d737..235a6c16 100644 --- a/fs/_ftp_parse.py +++ b/fs/_ftp_parse.py @@ -41,14 +41,17 @@ RE_WINDOWSNT = re.compile( r""" ^ - (?P.*?(AM|PM)) - \s* - (?P(|\d*)) - \s* + (?P\S+) + \s+ + (?P\S+(AM|PM)?) + \s+ + (?P(|\d+)) + \s+ (?P.*) $ """, - re.VERBOSE) + re.VERBOSE, +) def get_decoders(): @@ -104,6 +107,10 @@ def _parse_time(t, formats): return epoch_time +def _decode_linux_time(mtime): + return _parse_time(mtime, formats=["%b %d %Y", "%b %d %H:%M"]) + + def decode_linux(line, match): perms, links, uid, gid, size, mtime, name = match.groups() is_link = perms.startswith("l") @@ -114,7 +121,7 @@ def decode_linux(line, match): _link_name = _link_name.strip() permissions = Permissions.parse(perms[1:]) - mtime_epoch = _parse_time(mtime, formats=["%b %d %Y", "%b %d %H:%M"]) + mtime_epoch = _decode_linux_time(mtime) name = unicodedata.normalize("NFC", name) @@ -138,12 +145,22 @@ def decode_linux(line, match): return raw_info +def _decode_windowsnt_time(date, time): + while len(time.split(":")[0]) < 2: + time = "0" + time + return _parse_time( + date + " " + time, formats=["%d-%m-%y %I:%M%p", "%d-%m-%y %H:%M"] + ) + + def decode_windowsnt(line, match): """ - Decodes a Windows NT FTP LIST line like these two: + Decodes a Windows NT FTP LIST line like one of these: `11-02-18 02:12PM images` `11-02-18 03:33PM 9276 logo.gif` + + Alternatively, the time (02:12PM) might also be present in 24-hour format (14:12). """ is_dir = match.group("size") == "" @@ -161,7 +178,9 @@ def decode_windowsnt(line, match): if not is_dir: raw_info["details"]["size"] = int(match.group("size")) - modified = _parse_time(match.group("modified"), formats=["%d-%m-%y %I:%M%p"]) + modified = _decode_windowsnt_time( + match.group("modified_date"), match.group("modified_time") + ) if modified is not None: raw_info["details"]["modified"] = modified diff --git a/tests/test_ftp_parse.py b/tests/test_ftp_parse.py index d0abc05a..1ee3cf86 100644 --- a/tests/test_ftp_parse.py +++ b/tests/test_ftp_parse.py @@ -17,17 +17,18 @@ class TestFTPParse(unittest.TestCase): @mock.patch("time.localtime") def test_parse_time(self, mock_localtime): self.assertEqual( - ftp_parse._parse_time("JUL 05 1974", formats=["%b %d %Y"]), - 142214400.0) + ftp_parse._parse_time("JUL 05 1974", formats=["%b %d %Y"]), 142214400.0 + ) mock_localtime.return_value = time2017 self.assertEqual( - ftp_parse._parse_time("JUL 05 02:00", formats=["%b %d %H:%M"]), - 1499220000.0) + ftp_parse._parse_time("JUL 05 02:00", formats=["%b %d %H:%M"]), 1499220000.0 + ) self.assertEqual( ftp_parse._parse_time("05-07-17 02:00AM", formats=["%d-%m-%y %I:%M%p"]), - 1499220000.0) + 1499220000.0, + ) self.assertEqual(ftp_parse._parse_time("notadate", formats=["%b %d %Y"]), None) @@ -164,39 +165,50 @@ def test_decode_linux(self, mock_localtime): def test_decode_windowsnt(self, mock_localtime): mock_localtime.return_value = time2017 directory = """\ +unparsable line 11-02-17 02:00AM docs 11-02-17 02:12PM images -11-02-17 02:12PM AM to PM +11-02-17 02:12PM AM to PM 11-02-17 03:33PM 9276 logo.gif +05-11-20 22:11 src +11-02-17 01:23 1 12 +11-02-17 4:54 0 icon.bmp """ expected = [ { "basic": {"is_dir": True, "name": "docs"}, "details": {"modified": 1486778400.0, "type": 1}, - "ftp": { - "ls": "11-02-17 02:00AM docs" - }, + "ftp": {"ls": "11-02-17 02:00AM docs"}, }, { "basic": {"is_dir": True, "name": "images"}, "details": {"modified": 1486822320.0, "type": 1}, - "ftp": { - "ls": "11-02-17 02:12PM images" - }, + "ftp": {"ls": "11-02-17 02:12PM images"}, }, { "basic": {"is_dir": True, "name": "AM to PM"}, "details": {"modified": 1486822320.0, "type": 1}, - "ftp": { - "ls": "11-02-17 02:12PM AM to PM" - }, + "ftp": {"ls": "11-02-17 02:12PM AM to PM"}, }, { "basic": {"is_dir": False, "name": "logo.gif"}, "details": {"modified": 1486827180.0, "size": 9276, "type": 2}, - "ftp": { - "ls": "11-02-17 03:33PM 9276 logo.gif" - }, + "ftp": {"ls": "11-02-17 03:33PM 9276 logo.gif"}, + }, + { + "basic": {"is_dir": True, "name": "src"}, + "details": {"modified": 1604614260.0, "type": 1}, + "ftp": {"ls": "05-11-20 22:11 src"}, + }, + { + "basic": {"is_dir": False, "name": "12"}, + "details": {"modified": 1486776180.0, "size": 1, "type": 2}, + "ftp": {"ls": "11-02-17 01:23 1 12"}, + }, + { + "basic": {"is_dir": False, "name": "icon.bmp"}, + "details": {"modified": 1486788840.0, "size": 0, "type": 2}, + "ftp": {"ls": "11-02-17 4:54 0 icon.bmp"}, }, ] From 3bbbc482a3e945b587b9963177003d31e71c6b4a Mon Sep 17 00:00:00 2001 From: atollk Date: Thu, 5 Nov 2020 22:24:51 +0100 Subject: [PATCH 2/9] Adding to the previous commit, performed minor changes according to suggestions from a code review. --- fs/_ftp_parse.py | 8 ++++---- tests/test_ftp_parse.py | 18 ++++++++++++++++++ 2 files changed, 22 insertions(+), 4 deletions(-) diff --git a/fs/_ftp_parse.py b/fs/_ftp_parse.py index 235a6c16..b235159b 100644 --- a/fs/_ftp_parse.py +++ b/fs/_ftp_parse.py @@ -145,11 +145,11 @@ def decode_linux(line, match): return raw_info -def _decode_windowsnt_time(date, time): - while len(time.split(":")[0]) < 2: - time = "0" + time +def _decode_windowsnt_time(mdate, mtime): + if len(mtime.split(":")[0]) == 1: + mtime = "0" + mtime return _parse_time( - date + " " + time, formats=["%d-%m-%y %I:%M%p", "%d-%m-%y %H:%M"] + mdate + " " + mtime, formats=["%d-%m-%y %I:%M%p", "%d-%m-%y %H:%M"] ) diff --git a/tests/test_ftp_parse.py b/tests/test_ftp_parse.py index 1ee3cf86..b9a69cf1 100644 --- a/tests/test_ftp_parse.py +++ b/tests/test_ftp_parse.py @@ -173,6 +173,9 @@ def test_decode_windowsnt(self, mock_localtime): 05-11-20 22:11 src 11-02-17 01:23 1 12 11-02-17 4:54 0 icon.bmp +11-02-17 4:54AM 0 icon.gif +11-02-17 4:54PM 0 icon.png +11-02-17 16:54 0 icon.jpg """ expected = [ { @@ -210,6 +213,21 @@ def test_decode_windowsnt(self, mock_localtime): "details": {"modified": 1486788840.0, "size": 0, "type": 2}, "ftp": {"ls": "11-02-17 4:54 0 icon.bmp"}, }, + { + "basic": {"is_dir": False, "name": "icon.gif"}, + "details": {"modified": 1486788840.0, "size": 0, "type": 2}, + "ftp": {"ls": "11-02-17 4:54AM 0 icon.gif"}, + }, + { + "basic": {"is_dir": False, "name": "icon.png"}, + "details": {"modified": 1486832040.0, "size": 0, "type": 2}, + "ftp": {"ls": "11-02-17 4:54PM 0 icon.png"}, + }, + { + "basic": {"is_dir": False, "name": "icon.jpg"}, + "details": {"modified": 1486832040.0, "size": 0, "type": 2}, + "ftp": {"ls": "11-02-17 16:54 0 icon.jpg"}, + }, ] parsed = ftp_parse.parse(directory.splitlines()) From 7b76fb110ac7e22ae369bdfd08a5c48ad75fcd48 Mon Sep 17 00:00:00 2001 From: atollk Date: Thu, 19 Nov 2020 22:16:33 +0100 Subject: [PATCH 3/9] Sorted CONTRIBUTORS.md alphabetically. --- CONTRIBUTORS.md | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/CONTRIBUTORS.md b/CONTRIBUTORS.md index 31d5da55..5df27157 100644 --- a/CONTRIBUTORS.md +++ b/CONTRIBUTORS.md @@ -2,6 +2,7 @@ Many thanks to the following developers for contributing to this project: +- [Andreas Tollkötter](https://github.com/atollk) - [C. W.](https://github.com/chfw) - [Diego Argueta](https://github.com/dargueta) - [Geoff Jukes](https://github.com/geoffjukes) @@ -9,8 +10,7 @@ Many thanks to the following developers for contributing to this project: - [Justin Charlong](https://github.com/jcharlong) - [Louis Sautier](https://github.com/sbraz) - [Martin Larralde](https://github.com/althonos) +- [Morten Engelhardt Olsen](https://github.com/xoriath) - [Nick Henderson](https://github.com/nwh) - [Will McGugan](https://github.com/willmcgugan) - [Zmej Serow](https://github.com/zmej-serow) -- [Morten Engelhardt Olsen](https://github.com/xoriath) -- [Andreas Tollkötter](https://github.com/atollk) From d1997bab5100182ac6f6fe5e2c77c69cf2dcd9f7 Mon Sep 17 00:00:00 2001 From: atollk Date: Thu, 19 Nov 2020 22:26:48 +0100 Subject: [PATCH 4/9] Cleaned up some redundant and unclear code. In fs._ftp_parse._parse_time, a noop-line was removed. On request of code review, the loop to determine the suitable time format was also improved upon. See https://github.com/PyFilesystem/pyfilesystem2/pull/439#issuecomment-728622272 for details. --- fs/_ftp_parse.py | 16 ++++++++++------ 1 file changed, 10 insertions(+), 6 deletions(-) diff --git a/fs/_ftp_parse.py b/fs/_ftp_parse.py index b235159b..759a6dce 100644 --- a/fs/_ftp_parse.py +++ b/fs/_ftp_parse.py @@ -84,17 +84,21 @@ def parse_line(line): return None -def _parse_time(t, formats): - t = " ".join(token.strip() for token in t.lower().split(" ")) - - _t = None +def _find_suitable_format(t, formats): for frmt in formats: try: - _t = time.strptime(t, frmt) + if time.strptime(t, frmt): + return frmt except ValueError: continue - if not _t: + return None + + +def _parse_time(t, formats): + frmt = _find_suitable_format(t, formats) + if frmt is None: return None + _t = time.strptime(t, frmt) year = _t.tm_year if _t.tm_year != 1900 else time.localtime().tm_year month = _t.tm_mon From 89d09199fea18a95e136dfb38491325ba1da81da Mon Sep 17 00:00:00 2001 From: atollk Date: Fri, 20 Nov 2020 07:33:00 +0100 Subject: [PATCH 5/9] fs._ftp_parse, removed some code that was only assumed to be necessary due to incomplete documentation The standard library function `time.strptime` using the format "%H" was falsely assumed to require a two-digit number (00-23). As it turns out, one-digit numbers (0-9) are also valid, so we don't have to manually prepend a zero. --- fs/_ftp_parse.py | 10 +++------- 1 file changed, 3 insertions(+), 7 deletions(-) diff --git a/fs/_ftp_parse.py b/fs/_ftp_parse.py index 759a6dce..73553f13 100644 --- a/fs/_ftp_parse.py +++ b/fs/_ftp_parse.py @@ -149,12 +149,8 @@ def decode_linux(line, match): return raw_info -def _decode_windowsnt_time(mdate, mtime): - if len(mtime.split(":")[0]) == 1: - mtime = "0" + mtime - return _parse_time( - mdate + " " + mtime, formats=["%d-%m-%y %I:%M%p", "%d-%m-%y %H:%M"] - ) +def _decode_windowsnt_time(mtime): + return _parse_time(mtime, formats=["%d-%m-%y %I:%M%p", "%d-%m-%y %H:%M"]) def decode_windowsnt(line, match): @@ -183,7 +179,7 @@ def decode_windowsnt(line, match): raw_info["details"]["size"] = int(match.group("size")) modified = _decode_windowsnt_time( - match.group("modified_date"), match.group("modified_time") + match.group("modified_date") + " " + match.group("modified_time") ) if modified is not None: raw_info["details"]["modified"] = modified From c04ff0a4ae43238648c2cd6b80c9b118cc1518a0 Mon Sep 17 00:00:00 2001 From: atollk Date: Mon, 23 Nov 2020 16:25:48 +0100 Subject: [PATCH 6/9] Re-merged two functions in fs._ftp_parse. See https://github.com/PyFilesystem/pyfilesystem2/pull/439#pullrequestreview-535549737 for details. The function `_find_suitable_format` was inlined into `_parse_time`. --- fs/_ftp_parse.py | 12 +++++++++--- 1 file changed, 9 insertions(+), 3 deletions(-) diff --git a/fs/_ftp_parse.py b/fs/_ftp_parse.py index 73553f13..ffbcbc65 100644 --- a/fs/_ftp_parse.py +++ b/fs/_ftp_parse.py @@ -95,10 +95,16 @@ def _find_suitable_format(t, formats): def _parse_time(t, formats): - frmt = _find_suitable_format(t, formats) - if frmt is None: + _t = None + for frmt in formats: + try: + _t = time.strptime(t, frmt) + if t: + break + except ValueError: + continue + if not _t: return None - _t = time.strptime(t, frmt) year = _t.tm_year if _t.tm_year != 1900 else time.localtime().tm_year month = _t.tm_mon From 4a19bb8b4cafb9324c69ebcbaa737940eb87cdbe Mon Sep 17 00:00:00 2001 From: atollk Date: Mon, 23 Nov 2020 16:27:23 +0100 Subject: [PATCH 7/9] Removed `_find_suitable_format` after its only use was deleted in the last commit. --- fs/_ftp_parse.py | 10 ---------- 1 file changed, 10 deletions(-) diff --git a/fs/_ftp_parse.py b/fs/_ftp_parse.py index ffbcbc65..9348ac5a 100644 --- a/fs/_ftp_parse.py +++ b/fs/_ftp_parse.py @@ -84,16 +84,6 @@ def parse_line(line): return None -def _find_suitable_format(t, formats): - for frmt in formats: - try: - if time.strptime(t, frmt): - return frmt - except ValueError: - continue - return None - - def _parse_time(t, formats): _t = None for frmt in formats: From d39b9852ab9f49c681db30fee257fe6d8bd3caa7 Mon Sep 17 00:00:00 2001 From: atollk Date: Sat, 12 Dec 2020 08:49:08 +0100 Subject: [PATCH 8/9] Fixed a typo in a variable name in fs._ftp_parse by completely removing the reference in that line. --- fs/_ftp_parse.py | 5 ++--- 1 file changed, 2 insertions(+), 3 deletions(-) diff --git a/fs/_ftp_parse.py b/fs/_ftp_parse.py index 9348ac5a..5d892fdb 100644 --- a/fs/_ftp_parse.py +++ b/fs/_ftp_parse.py @@ -89,11 +89,10 @@ def _parse_time(t, formats): for frmt in formats: try: _t = time.strptime(t, frmt) - if t: - break + break except ValueError: continue - if not _t: + else: return None year = _t.tm_year if _t.tm_year != 1900 else time.localtime().tm_year From 1fa1f27e4e59ac69052cedc9bb7fece1fb2f57e1 Mon Sep 17 00:00:00 2001 From: atollk Date: Sat, 12 Dec 2020 16:11:32 +0100 Subject: [PATCH 9/9] Removed redundant line in fs._ftp_parse. --- fs/_ftp_parse.py | 1 - 1 file changed, 1 deletion(-) diff --git a/fs/_ftp_parse.py b/fs/_ftp_parse.py index 5d892fdb..9e15a265 100644 --- a/fs/_ftp_parse.py +++ b/fs/_ftp_parse.py @@ -85,7 +85,6 @@ def parse_line(line): def _parse_time(t, formats): - _t = None for frmt in formats: try: _t = time.strptime(t, frmt)