From 24f0939c62a0c07b6953aec78cdf2821ca4538a4 Mon Sep 17 00:00:00 2001 From: Sean Kelly <10122262+skellys@users.noreply.github.com> Date: Mon, 30 Oct 2023 14:09:51 -0400 Subject: [PATCH] fix: partition evaluator thread safety --- pyiceberg/table/__init__.py | 8 ++++++-- 1 file changed, 6 insertions(+), 2 deletions(-) diff --git a/pyiceberg/table/__init__.py b/pyiceberg/table/__init__.py index ae35b34384..21a3b7fc23 100644 --- a/pyiceberg/table/__init__.py +++ b/pyiceberg/table/__init__.py @@ -828,8 +828,12 @@ def _build_partition_evaluator(self, spec_id: int) -> Callable[[DataFile], bool] partition_schema = Schema(*partition_type.fields) partition_expr = self.partition_filters[spec_id] - evaluator = visitors.expression_evaluator(partition_schema, partition_expr, self.case_sensitive) - return lambda data_file: evaluator(data_file.partition) + # The lambda created here is run in multiple threads. + # So we avoid creating _EvaluatorExpression methods bound to a single + # shared instance across multiple threads. + return lambda data_file: visitors.expression_evaluator(partition_schema, partition_expr, self.case_sensitive)( + data_file.partition + ) def _check_sequence_number(self, min_data_sequence_number: int, manifest: ManifestFile) -> bool: """Ensure that no manifests are loaded that contain deletes that are older than the data.