From 9b086edd1e78e1bcadf91abcb53bc13b18a8c127 Mon Sep 17 00:00:00 2001 From: Jiakai Li Date: Sat, 7 Dec 2024 09:10:45 +1300 Subject: [PATCH 1/3] Add _missing_ to FileFormat Enum to make it case insensitive --- pyiceberg/manifest.py | 8 ++++++++ tests/test_manifest.py | 42 ++++++++++++++++++++++++++++++++++++++++++ 2 files changed, 50 insertions(+) create mode 100644 tests/test_manifest.py diff --git a/pyiceberg/manifest.py b/pyiceberg/manifest.py index 6774499f2e..e2352abca6 100644 --- a/pyiceberg/manifest.py +++ b/pyiceberg/manifest.py @@ -30,6 +30,7 @@ Optional, Tuple, Type, + Union ) from cachetools import LRUCache, cached @@ -97,6 +98,13 @@ class FileFormat(str, Enum): PARQUET = "PARQUET" ORC = "ORC" + @classmethod + def _missing_(cls, value: object) -> Union[None, str]: + for member in cls: + if getattr(member, "value") == str(value).upper(): + return member + return None + def __repr__(self) -> str: """Return the string representation of the FileFormat class.""" return f"FileFormat.{self.name}" diff --git a/tests/test_manifest.py b/tests/test_manifest.py new file mode 100644 index 0000000000..8a45fbc747 --- /dev/null +++ b/tests/test_manifest.py @@ -0,0 +1,42 @@ +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. +import pytest + +from pyiceberg.manifest import FileFormat + + +@pytest.mark.parametrize( + "raw_file_format,expected_file_format", + [ + ("avro", FileFormat("AVRO")), + ("AVRO", FileFormat("AVRO")), + ("parquet", FileFormat("PARQUET")), + ("PARQUET", FileFormat("PARQUET")), + ("orc", FileFormat("ORC")), + ("ORC", FileFormat("ORC")), + ("NOT_EXISTS", None), + ], +) +def test_file_format_case_insensitive(raw_file_format: str, expected_file_format: FileFormat) -> None: + if expected_file_format: + parsed_file_format = FileFormat(raw_file_format) + assert parsed_file_format == expected_file_format, ( + f"File format {raw_file_format}: {parsed_file_format} != {expected_file_format}" + ) + else: + with pytest.raises(ValueError): + _ = FileFormat(raw_file_format) From 751c24b18ec961711c793a6b465b38fa529e5122 Mon Sep 17 00:00:00 2001 From: Jiakai Li Date: Sat, 7 Dec 2024 11:21:03 +1300 Subject: [PATCH 2/3] Combine the manifest test to existing test_manifest.py file --- tests/test_manifest.py | 42 ------------------------------------ tests/utils/test_manifest.py | 23 ++++++++++++++++++++ 2 files changed, 23 insertions(+), 42 deletions(-) delete mode 100644 tests/test_manifest.py diff --git a/tests/test_manifest.py b/tests/test_manifest.py deleted file mode 100644 index 8a45fbc747..0000000000 --- a/tests/test_manifest.py +++ /dev/null @@ -1,42 +0,0 @@ -# Licensed to the Apache Software Foundation (ASF) under one -# or more contributor license agreements. See the NOTICE file -# distributed with this work for additional information -# regarding copyright ownership. The ASF licenses this file -# to you under the Apache License, Version 2.0 (the -# "License"); you may not use this file except in compliance -# with the License. You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, -# software distributed under the License is distributed on an -# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY -# KIND, either express or implied. See the License for the -# specific language governing permissions and limitations -# under the License. -import pytest - -from pyiceberg.manifest import FileFormat - - -@pytest.mark.parametrize( - "raw_file_format,expected_file_format", - [ - ("avro", FileFormat("AVRO")), - ("AVRO", FileFormat("AVRO")), - ("parquet", FileFormat("PARQUET")), - ("PARQUET", FileFormat("PARQUET")), - ("orc", FileFormat("ORC")), - ("ORC", FileFormat("ORC")), - ("NOT_EXISTS", None), - ], -) -def test_file_format_case_insensitive(raw_file_format: str, expected_file_format: FileFormat) -> None: - if expected_file_format: - parsed_file_format = FileFormat(raw_file_format) - assert parsed_file_format == expected_file_format, ( - f"File format {raw_file_format}: {parsed_file_format} != {expected_file_format}" - ) - else: - with pytest.raises(ValueError): - _ = FileFormat(raw_file_format) diff --git a/tests/utils/test_manifest.py b/tests/utils/test_manifest.py index 97c88a99ee..154671c92e 100644 --- a/tests/utils/test_manifest.py +++ b/tests/utils/test_manifest.py @@ -604,3 +604,26 @@ def test_write_manifest_list( assert entry.file_sequence_number == 0 if format_version == 1 else 3 assert entry.snapshot_id == 8744736658442914487 assert entry.status == ManifestEntryStatus.ADDED + + +@pytest.mark.parametrize( + "raw_file_format,expected_file_format", + [ + ("avro", FileFormat("AVRO")), + ("AVRO", FileFormat("AVRO")), + ("parquet", FileFormat("PARQUET")), + ("PARQUET", FileFormat("PARQUET")), + ("orc", FileFormat("ORC")), + ("ORC", FileFormat("ORC")), + ("NOT_EXISTS", None), + ], +) +def test_file_format_case_insensitive(raw_file_format: str, expected_file_format: FileFormat) -> None: + if expected_file_format: + parsed_file_format = FileFormat(raw_file_format) + assert parsed_file_format == expected_file_format, ( + f"File format {raw_file_format}: {parsed_file_format} != {expected_file_format}" + ) + else: + with pytest.raises(ValueError): + _ = FileFormat(raw_file_format) From 4e3af1eb6e30d33b214daade30830335524cb4ef Mon Sep 17 00:00:00 2001 From: Jiakai Li Date: Sat, 7 Dec 2024 11:31:46 +1300 Subject: [PATCH 3/3] Fix linting --- pyiceberg/manifest.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/pyiceberg/manifest.py b/pyiceberg/manifest.py index e2352abca6..a56da5fc05 100644 --- a/pyiceberg/manifest.py +++ b/pyiceberg/manifest.py @@ -30,7 +30,7 @@ Optional, Tuple, Type, - Union + Union, ) from cachetools import LRUCache, cached @@ -101,7 +101,7 @@ class FileFormat(str, Enum): @classmethod def _missing_(cls, value: object) -> Union[None, str]: for member in cls: - if getattr(member, "value") == str(value).upper(): + if member.value == str(value).upper(): return member return None