From 9b086edd1e78e1bcadf91abcb53bc13b18a8c127 Mon Sep 17 00:00:00 2001
From: Jiakai Li
Date: Sat, 7 Dec 2024 09:10:45 +1300
Subject: [PATCH 1/3] Add _missing_ to FileFormat Enum to make it case
insensitive
---
pyiceberg/manifest.py | 8 ++++++++
tests/test_manifest.py | 42 ++++++++++++++++++++++++++++++++++++++++++
2 files changed, 50 insertions(+)
create mode 100644 tests/test_manifest.py
diff --git a/pyiceberg/manifest.py b/pyiceberg/manifest.py
index 6774499f2e..e2352abca6 100644
--- a/pyiceberg/manifest.py
+++ b/pyiceberg/manifest.py
@@ -30,6 +30,7 @@
Optional,
Tuple,
Type,
+ Union
)
from cachetools import LRUCache, cached
@@ -97,6 +98,13 @@ class FileFormat(str, Enum):
PARQUET = "PARQUET"
ORC = "ORC"
+ @classmethod
+ def _missing_(cls, value: object) -> Union[None, str]:
+ for member in cls:
+ if getattr(member, "value") == str(value).upper():
+ return member
+ return None
+
def __repr__(self) -> str:
"""Return the string representation of the FileFormat class."""
return f"FileFormat.{self.name}"
diff --git a/tests/test_manifest.py b/tests/test_manifest.py
new file mode 100644
index 0000000000..8a45fbc747
--- /dev/null
+++ b/tests/test_manifest.py
@@ -0,0 +1,42 @@
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements. See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership. The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License. You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied. See the License for the
+# specific language governing permissions and limitations
+# under the License.
+import pytest
+
+from pyiceberg.manifest import FileFormat
+
+
+@pytest.mark.parametrize(
+ "raw_file_format,expected_file_format",
+ [
+ ("avro", FileFormat("AVRO")),
+ ("AVRO", FileFormat("AVRO")),
+ ("parquet", FileFormat("PARQUET")),
+ ("PARQUET", FileFormat("PARQUET")),
+ ("orc", FileFormat("ORC")),
+ ("ORC", FileFormat("ORC")),
+ ("NOT_EXISTS", None),
+ ],
+)
+def test_file_format_case_insensitive(raw_file_format: str, expected_file_format: FileFormat) -> None:
+ if expected_file_format:
+ parsed_file_format = FileFormat(raw_file_format)
+ assert parsed_file_format == expected_file_format, (
+ f"File format {raw_file_format}: {parsed_file_format} != {expected_file_format}"
+ )
+ else:
+ with pytest.raises(ValueError):
+ _ = FileFormat(raw_file_format)
From 751c24b18ec961711c793a6b465b38fa529e5122 Mon Sep 17 00:00:00 2001
From: Jiakai Li
Date: Sat, 7 Dec 2024 11:21:03 +1300
Subject: [PATCH 2/3] Combine the manifest test to existing test_manifest.py
file
---
tests/test_manifest.py | 42 ------------------------------------
tests/utils/test_manifest.py | 23 ++++++++++++++++++++
2 files changed, 23 insertions(+), 42 deletions(-)
delete mode 100644 tests/test_manifest.py
diff --git a/tests/test_manifest.py b/tests/test_manifest.py
deleted file mode 100644
index 8a45fbc747..0000000000
--- a/tests/test_manifest.py
+++ /dev/null
@@ -1,42 +0,0 @@
-# Licensed to the Apache Software Foundation (ASF) under one
-# or more contributor license agreements. See the NOTICE file
-# distributed with this work for additional information
-# regarding copyright ownership. The ASF licenses this file
-# to you under the Apache License, Version 2.0 (the
-# "License"); you may not use this file except in compliance
-# with the License. You may obtain a copy of the License at
-#
-# http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing,
-# software distributed under the License is distributed on an
-# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-# KIND, either express or implied. See the License for the
-# specific language governing permissions and limitations
-# under the License.
-import pytest
-
-from pyiceberg.manifest import FileFormat
-
-
-@pytest.mark.parametrize(
- "raw_file_format,expected_file_format",
- [
- ("avro", FileFormat("AVRO")),
- ("AVRO", FileFormat("AVRO")),
- ("parquet", FileFormat("PARQUET")),
- ("PARQUET", FileFormat("PARQUET")),
- ("orc", FileFormat("ORC")),
- ("ORC", FileFormat("ORC")),
- ("NOT_EXISTS", None),
- ],
-)
-def test_file_format_case_insensitive(raw_file_format: str, expected_file_format: FileFormat) -> None:
- if expected_file_format:
- parsed_file_format = FileFormat(raw_file_format)
- assert parsed_file_format == expected_file_format, (
- f"File format {raw_file_format}: {parsed_file_format} != {expected_file_format}"
- )
- else:
- with pytest.raises(ValueError):
- _ = FileFormat(raw_file_format)
diff --git a/tests/utils/test_manifest.py b/tests/utils/test_manifest.py
index 97c88a99ee..154671c92e 100644
--- a/tests/utils/test_manifest.py
+++ b/tests/utils/test_manifest.py
@@ -604,3 +604,26 @@ def test_write_manifest_list(
assert entry.file_sequence_number == 0 if format_version == 1 else 3
assert entry.snapshot_id == 8744736658442914487
assert entry.status == ManifestEntryStatus.ADDED
+
+
+@pytest.mark.parametrize(
+ "raw_file_format,expected_file_format",
+ [
+ ("avro", FileFormat("AVRO")),
+ ("AVRO", FileFormat("AVRO")),
+ ("parquet", FileFormat("PARQUET")),
+ ("PARQUET", FileFormat("PARQUET")),
+ ("orc", FileFormat("ORC")),
+ ("ORC", FileFormat("ORC")),
+ ("NOT_EXISTS", None),
+ ],
+)
+def test_file_format_case_insensitive(raw_file_format: str, expected_file_format: FileFormat) -> None:
+ if expected_file_format:
+ parsed_file_format = FileFormat(raw_file_format)
+ assert parsed_file_format == expected_file_format, (
+ f"File format {raw_file_format}: {parsed_file_format} != {expected_file_format}"
+ )
+ else:
+ with pytest.raises(ValueError):
+ _ = FileFormat(raw_file_format)
From 4e3af1eb6e30d33b214daade30830335524cb4ef Mon Sep 17 00:00:00 2001
From: Jiakai Li
Date: Sat, 7 Dec 2024 11:31:46 +1300
Subject: [PATCH 3/3] Fix linting
---
pyiceberg/manifest.py | 4 ++--
1 file changed, 2 insertions(+), 2 deletions(-)
diff --git a/pyiceberg/manifest.py b/pyiceberg/manifest.py
index e2352abca6..a56da5fc05 100644
--- a/pyiceberg/manifest.py
+++ b/pyiceberg/manifest.py
@@ -30,7 +30,7 @@
Optional,
Tuple,
Type,
- Union
+ Union,
)
from cachetools import LRUCache, cached
@@ -101,7 +101,7 @@ class FileFormat(str, Enum):
@classmethod
def _missing_(cls, value: object) -> Union[None, str]:
for member in cls:
- if getattr(member, "value") == str(value).upper():
+ if member.value == str(value).upper():
return member
return None