From 62b52140962fec746a6d651a80a06b7d1945e16e Mon Sep 17 00:00:00 2001 From: Matthew Suozzo Date: Sat, 15 Feb 2025 17:55:45 +0000 Subject: [PATCH] Add support for collections.abc.Sequence Work towards #29135 Notes: - The naming used for the TypeConstraint is necessary to avoid colliding with the existing base class used for other collections.abc generics. - The change in transforms/stats.py is required due to a test failure that appears to require `int` be a valid param instead of `Sequence[int]`. --- sdks/python/apache_beam/transforms/stats.py | 2 +- .../typehints/native_type_compatibility.py | 11 +++++ .../native_type_compatibility_test.py | 8 ++++ .../python/apache_beam/typehints/typehints.py | 40 +++++++++++++++++++ .../apache_beam/typehints/typehints_test.py | 25 ++++++++++++ 5 files changed, 85 insertions(+), 1 deletion(-) diff --git a/sdks/python/apache_beam/transforms/stats.py b/sdks/python/apache_beam/transforms/stats.py index 6c45fe56a988..fb38a883dd39 100644 --- a/sdks/python/apache_beam/transforms/stats.py +++ b/sdks/python/apache_beam/transforms/stats.py @@ -321,7 +321,7 @@ def _display_data(num_quantiles, key, reverse, weighted, input_batched): } @typehints.with_input_types( - typehints.Union[typing.Sequence[T], Tuple[T, float]]) + typing.Union[T, typing.Sequence[T], Tuple[T, float]]) @typehints.with_output_types(List[T]) class Globally(PTransform): """ diff --git a/sdks/python/apache_beam/typehints/native_type_compatibility.py b/sdks/python/apache_beam/typehints/native_type_compatibility.py index e9332635f255..da5bd6b0c0c4 100644 --- a/sdks/python/apache_beam/typehints/native_type_compatibility.py +++ b/sdks/python/apache_beam/typehints/native_type_compatibility.py @@ -65,6 +65,7 @@ collections.abc.Set, collections.abc.MutableSet, collections.abc.Collection, + collections.abc.Sequence, ] @@ -144,6 +145,10 @@ def _match_is_exactly_collection(user_type): return getattr(user_type, '__origin__', None) is collections.abc.Collection +def _match_is_exactly_sequence(user_type): + return getattr(user_type, '__origin__', None) is collections.abc.Sequence + + def match_is_named_tuple(user_type): return ( _safe_issubclass(user_type, typing.Tuple) and @@ -405,6 +410,10 @@ def convert_to_beam_type(typ): match=_match_issubclass(TypedWindowedValue), arity=1, beam_type=typehints.WindowedValue), + _TypeMapEntry( + match=_match_is_exactly_sequence, + arity=1, + beam_type=typehints.Sequence), ] # Find the first matching entry. @@ -521,6 +530,8 @@ def convert_to_python_type(typ): return tuple[tuple(convert_to_python_types(typ.tuple_types))] if isinstance(typ, typehints.TupleSequenceConstraint): return tuple[convert_to_python_type(typ.inner_type), ...] + if isinstance(typ, typehints.ABCSequenceTypeConstraint): + return collections.abc.Sequence[convert_to_python_type(typ.inner_type)] if isinstance(typ, typehints.IteratorTypeConstraint): return collections.abc.Iterator[convert_to_python_type(typ.yielded_type)] diff --git a/sdks/python/apache_beam/typehints/native_type_compatibility_test.py b/sdks/python/apache_beam/typehints/native_type_compatibility_test.py index 3f3603c2c978..e5366260c88e 100644 --- a/sdks/python/apache_beam/typehints/native_type_compatibility_test.py +++ b/sdks/python/apache_beam/typehints/native_type_compatibility_test.py @@ -245,6 +245,14 @@ def test_convert_to_beam_type_with_collections_types(self): 'collection of tuples', collections.abc.Collection[tuple[str, int]], typehints.Collection[typehints.Tuple[str, int]]), + ( + 'nested sequence', + tuple[collections.abc.Sequence[str], int], + typehints.Tuple[typehints.Sequence[str], int]), + ( + 'sequence of tuples', + collections.abc.Sequence[tuple[str, int]], + typehints.Sequence[typehints.Tuple[str, int]]), ] for test_case in test_cases: diff --git a/sdks/python/apache_beam/typehints/typehints.py b/sdks/python/apache_beam/typehints/typehints.py index a65a0f753826..67f9bb71c015 100644 --- a/sdks/python/apache_beam/typehints/typehints.py +++ b/sdks/python/apache_beam/typehints/typehints.py @@ -83,6 +83,7 @@ 'Set', 'FrozenSet', 'Collection', + 'Sequence', 'Iterable', 'Iterator', 'Generator', @@ -1035,6 +1036,7 @@ def _is_subclass_constraint(sub): sub, ( CollectionTypeConstraint, + ABCSequenceTypeConstraint, FrozenSetTypeConstraint, SetTypeConstraint, ListConstraint)) @@ -1072,6 +1074,43 @@ def __getitem__(self, type_param): CollectionTypeConstraint = CollectionHint.CollectionTypeConstraint +class SequenceHint(CompositeTypeHint): + """A Sequence type-hint. + + Sequence[X] defines a type-hint for a sequence of homogeneous types. 'X' may + be either a built-in Python type or another nested TypeConstraint. + + This represents collections.abc.Sequence type, which implements __getitem__, + __len__, and __contains__. This is more specific than Iterable but less + restrictive than List, providing a good middle ground for sequence-like types. + """ + class ABCSequenceTypeConstraint(SequenceTypeConstraint): + def __init__(self, type_param): + super().__init__(type_param, abc.Sequence) + + def __repr__(self): + return 'Sequence[%s]' % repr(self.inner_type) + + def _consistent_with_check_(self, sub): + if isinstance(sub, (ListConstraint, TupleConstraint)): + # Lists and Tuples are Sequences + if isinstance(sub, TupleConstraint): + # For tuples, all elements must be consistent with the sequence type + return all( + is_consistent_with(elem, self.inner_type) + for elem in sub.tuple_types) + return is_consistent_with(sub.inner_type, self.inner_type) + return super()._consistent_with_check_(sub) + + def __getitem__(self, type_param): + validate_composite_type_param( + type_param, error_msg_prefix='Parameter to a Sequence hint') + return self.ABCSequenceTypeConstraint(type_param) + + +ABCSequenceTypeConstraint = SequenceHint.ABCSequenceTypeConstraint + + class IterableHint(CompositeTypeHint): """An Iterable type-hint. @@ -1252,6 +1291,7 @@ def __getitem__(self, type_params): Set = SetHint() FrozenSet = FrozenSetHint() Collection = CollectionHint() +Sequence = SequenceHint() Iterable = IterableHint() Iterator = IteratorHint() Generator = GeneratorHint() diff --git a/sdks/python/apache_beam/typehints/typehints_test.py b/sdks/python/apache_beam/typehints/typehints_test.py index 6611dcecab01..175f8621b6cf 100644 --- a/sdks/python/apache_beam/typehints/typehints_test.py +++ b/sdks/python/apache_beam/typehints/typehints_test.py @@ -875,6 +875,30 @@ def test_getitem_invalid_composite_type_param(self): e.exception.args[0]) +class SequenceHintTestCase(TypeHintTestCase): + def test_type_constraint_compatibility(self): + self.assertCompatible(typehints.Sequence[str], typehints.List[str]) + self.assertCompatible(typehints.Sequence[str], typehints.Tuple[str]) + self.assertCompatible( + typehints.Sequence[typehints.Any], typehints.Sequence[str]) + self.assertCompatible( + typehints.Sequence[str], typehints.Sequence[typehints.Any]) + self.assertCompatible(typehints.Any, typehints.Sequence[str]) + + def test_one_way_compatibility(self): + self.assertNotCompatible(typehints.List[str], typehints.Sequence[str]) + self.assertNotCompatible(typehints.Tuple[str], typehints.Sequence[str]) + + def test_getitem_invalid_composite_type_param(self): + with self.assertRaises(TypeError) as e: + typehints.Sequence[5] + self.assertEqual( + 'Parameter to a Sequence hint must be a ' + 'non-sequence, a type, or a TypeConstraint. 5 is ' + 'an instance of int.', + e.exception.args[0]) + + class IterableHintTestCase(TypeHintTestCase): def test_getitem_invalid_composite_type_param(self): with self.assertRaises(TypeError) as e: @@ -891,6 +915,7 @@ def test_compatibility(self): self.assertCompatible( typehints.Iterable[typehints.Any], typehints.List[typehints.Tuple[int, bool]]) + self.assertCompatible(typehints.Iterable[str], typehints.Sequence[str]) self.assertCompatible(typehints.Iterable[int], typehints.Iterable[int]) self.assertCompatible(