From 7e379c655b06c33766f39ef9a70ef19d1d4e8222 Mon Sep 17 00:00:00 2001 From: "Eric V. Smith" Date: Wed, 17 Mar 2021 18:00:04 -0400 Subject: [PATCH 1/3] Add kw_only to dataclass() and field(), and add KW_ONLY marker. --- Lib/dataclasses.py | 106 ++++++++++++++++++----- Lib/test/test_dataclasses.py | 159 +++++++++++++++++++++++++++++++++++ 2 files changed, 242 insertions(+), 23 deletions(-) diff --git a/Lib/dataclasses.py b/Lib/dataclasses.py index 422a95cebe8534..e33712266adff0 100644 --- a/Lib/dataclasses.py +++ b/Lib/dataclasses.py @@ -16,6 +16,7 @@ 'Field', 'FrozenInstanceError', 'InitVar', + 'KW_ONLY', 'MISSING', # Helper functions. @@ -179,6 +180,12 @@ class _MISSING_TYPE: pass MISSING = _MISSING_TYPE() +# A sentinel object to indicate that following fields are keyword-only by +# default. Use a class to give it a better repr. +class _KW_ONLY_TYPE: + pass +KW_ONLY = _KW_ONLY_TYPE() + # Since most per-field metadata will be unused, create an empty # read-only proxy that can be shared among all fields. _EMPTY_METADATA = types.MappingProxyType({}) @@ -227,7 +234,6 @@ def __repr__(self): def __class_getitem__(cls, type): return InitVar(type) - # Instances of Field are only ever created from within this module, # and only from the field() function, although Field instances are # exposed externally as (conceptually) read-only objects. @@ -248,11 +254,12 @@ class Field: 'init', 'compare', 'metadata', + 'kw_only', '_field_type', # Private: not to be used by user code. ) def __init__(self, default, default_factory, init, repr, hash, compare, - metadata): + metadata, kw_only): self.name = None self.type = None self.default = default @@ -264,6 +271,7 @@ def __init__(self, default, default_factory, init, repr, hash, compare, self.metadata = (_EMPTY_METADATA if metadata is None else types.MappingProxyType(metadata)) + self.kw_only = kw_only self._field_type = None def __repr__(self): @@ -277,6 +285,7 @@ def __repr__(self): f'hash={self.hash!r},' f'compare={self.compare!r},' f'metadata={self.metadata!r},' + f'kw_only={self.kw_only!r},' f'_field_type={self._field_type}' ')') @@ -330,17 +339,19 @@ def __repr__(self): # so that a type checker can be told (via overloads) that this is a # function whose type depends on its parameters. def field(*, default=MISSING, default_factory=MISSING, init=True, repr=True, - hash=None, compare=True, metadata=None): + hash=None, compare=True, metadata=None, kw_only=MISSING): """Return an object to identify dataclass fields. default is the default value of the field. default_factory is a 0-argument function called to initialize a field's value. If init - is True, the field will be a parameter to the class's __init__() - function. If repr is True, the field will be included in the - object's repr(). If hash is True, the field will be included in - the object's hash(). If compare is True, the field will be used - in comparison functions. metadata, if specified, must be a - mapping which is stored but not otherwise examined by dataclass. + is true, the field will be a parameter to the class's __init__() + function. If repr is true, the field will be included in the + object's repr(). If hash is true, the field will be included in the + object's hash(). If compare is true, the field will be used in + comparison functions. metadata, if specified, must be a mapping + which is stored but not otherwise examined by dataclass. If kw_only + is true, the field will become a keyword-only parameter to + __init__(). It is an error to specify both default and default_factory. """ @@ -348,7 +359,15 @@ def field(*, default=MISSING, default_factory=MISSING, init=True, repr=True, if default is not MISSING and default_factory is not MISSING: raise ValueError('cannot specify both default and default_factory') return Field(default, default_factory, init, repr, hash, compare, - metadata) + metadata, kw_only) + + +def _fields_in_init_order(fields): + # Returns the fields as __init__ will output them. It returns 2 tuples: + # the first for normal args, and the second for keyword args. + return (tuple(f for f in fields if f.init and not f.kw_only), + tuple(f for f in fields if f.init and f.kw_only) + ) def _tuple_str(obj_name, fields): @@ -405,7 +424,6 @@ def _create_fn(name, args, body, *, globals=None, locals=None, local_vars = ', '.join(locals.keys()) txt = f"def __create_fn__({local_vars}):\n{txt}\n return {name}" - ns = {} exec(txt, globals, ns) func = ns['__create_fn__'](**locals) @@ -540,8 +558,16 @@ def _init_fn(fields, frozen, has_post_init, self_name, globals): if not body_lines: body_lines = ['pass'] + arg_fields, kw_arg_fields = _fields_in_init_order(fields) + _init_params = [_init_param(f) for f in arg_fields] + if kw_arg_fields: + # Add the keyword-only args. Because the * can only be added if + # there's at least one keyword-only arg, there needs to be a test here + # (instead of just concatenting the lists together). + _init_params += ['*'] + _init_params += [_init_param(f) for f in kw_arg_fields] return _create_fn('__init__', - [self_name] + [_init_param(f) for f in fields if f.init], + [self_name] + _init_params, body_lines, locals=locals, globals=globals, @@ -620,6 +646,9 @@ def _is_initvar(a_type, dataclasses): return (a_type is dataclasses.InitVar or type(a_type) is dataclasses.InitVar) +def _is_kw_only(a_type, dataclasses): + return a_type is dataclasses.KW_ONLY + def _is_type(annotation, cls, a_module, a_type, is_type_predicate): # Given a type annotation string, does it refer to a_type in @@ -685,10 +714,11 @@ def _is_type(annotation, cls, a_module, a_type, is_type_predicate): return False -def _get_field(cls, a_name, a_type): - # Return a Field object for this field name and type. ClassVars - # and InitVars are also returned, but marked as such (see - # f._field_type). +def _get_field(cls, a_name, a_type, default_kw_only): + # Return a Field object for this field name and type. ClassVars and + # InitVars are also returned, but marked as such (see f._field_type). + # default_kw_only is the value of kw_only to use if there isn't a field() + # that defines it. # If the default value isn't derived from Field, then it's only a # normal default value. Convert it to a Field(). @@ -759,6 +789,19 @@ def _get_field(cls, a_name, a_type): # init=)? It makes no sense for # ClassVar and InitVar to specify init=. + # kw_only validation and assignment. + if f._field_type in (_FIELD, _FIELD_INITVAR): + # For real and InitVar fields, if kw_only wasn't specified use the + # default value. + if f.kw_only is MISSING: + f.kw_only = default_kw_only + else: + # Make sure kw_only isn't set for ClassVars + assert f._field_type is _FIELD_CLASSVAR + if f.kw_only is not MISSING: + raise TypeError(f'field {f.name} is a ClassVar but specifies ' + 'kw_only') + # For real fields, disallow mutable defaults for known types. if f._field_type is _FIELD and isinstance(f.default, (list, dict, set)): raise ValueError(f'mutable default {type(f.default)} for field ' @@ -830,7 +873,7 @@ def _hash_exception(cls, fields, globals): # version of this table. -def _process_class(cls, init, repr, eq, order, unsafe_hash, frozen): +def _process_class(cls, init, repr, eq, order, unsafe_hash, frozen, kw_only): # Now that dicts retain insertion order, there's no reason to use # an ordered dict. I am leveraging that ordering here, because # derived class fields overwrite base class fields, but the order @@ -884,8 +927,22 @@ def _process_class(cls, init, repr, eq, order, unsafe_hash, frozen): # Now find fields in our class. While doing so, validate some # things, and set the default values (as class attributes) where # we can. - cls_fields = [_get_field(cls, name, type) - for name, type in cls_annotations.items()] + cls_fields = [] + # Get a reference to this module for the _is_kw_only() test. + dataclasses = sys.modules[__name__] + for name, type in cls_annotations.items(): + # See if this is a marker to change the value of kw_only. + if (_is_kw_only(type, dataclasses) + or (isinstance(type, str) + and _is_type(type, cls, dataclasses, dataclasses.KW_ONLY, + _is_kw_only))): + # Switch the default to kw_only=True, and ignore this + # annotation: it's not a real field. + kw_only = True + else: + # Otherwise it's a field of some type. + cls_fields.append(_get_field(cls, name, type, kw_only)) + for f in cls_fields: fields[f.name] = f @@ -1017,7 +1074,8 @@ def _process_class(cls, init, repr, eq, order, unsafe_hash, frozen): str(inspect.signature(cls)).replace(' -> NoneType', '')) if '__match_args__' not in cls.__dict__: - cls.__match_args__ = tuple(f.name for f in flds if f.init) + args, kw_args = _fields_in_init_order(flds) + cls.__match_args__ = tuple(f.name for f in args) abc.update_abstractmethods(cls) @@ -1025,7 +1083,7 @@ def _process_class(cls, init, repr, eq, order, unsafe_hash, frozen): def dataclass(cls=None, /, *, init=True, repr=True, eq=True, order=False, - unsafe_hash=False, frozen=False): + unsafe_hash=False, frozen=False, kw_only=False): """Returns the same class as was passed in, with dunder methods added based on the fields defined in the class. @@ -1035,11 +1093,13 @@ def dataclass(cls=None, /, *, init=True, repr=True, eq=True, order=False, repr is true, a __repr__() method is added. If order is true, rich comparison dunder methods are added. If unsafe_hash is true, a __hash__() method function is added. If frozen is true, fields may - not be assigned to after instance creation. + not be assigned to after instance creation. If kw_only is true, + then by default all fields are keyword-only. """ def wrap(cls): - return _process_class(cls, init, repr, eq, order, unsafe_hash, frozen) + return _process_class(cls, init, repr, eq, order, unsafe_hash, + frozen, kw_only) # See if we're being called as @dataclass or @dataclass(). if cls is None: diff --git a/Lib/test/test_dataclasses.py b/Lib/test/test_dataclasses.py index 0bfed41b369d19..0aaf3278d317ba 100644 --- a/Lib/test/test_dataclasses.py +++ b/Lib/test/test_dataclasses.py @@ -62,6 +62,7 @@ def test_field_repr(self): f"default=1,default_factory={MISSING!r}," \ "init=True,repr=False,hash=None," \ "compare=True,metadata=mappingproxy({})," \ + f"kw_only={MISSING!r}," \ "_field_type=None)" self.assertEqual(repr_output, expected_output) @@ -3391,5 +3392,163 @@ class C: self.assertIs(C(42).__match_args__, ma) +class TestKwArgs(unittest.TestCase): + def test_no_classvar_kwarg(self): + msg = 'field a is a ClassVar but specifies kw_only' + with self.assertRaisesRegex(TypeError, msg): + @dataclass + class A: + a: ClassVar[int] = field(kw_only=True) + + with self.assertRaisesRegex(TypeError, msg): + @dataclass + class A: + a: ClassVar[int] = field(kw_only=False) + + with self.assertRaisesRegex(TypeError, msg): + @dataclass(kw_only=True) + class A: + a: ClassVar[int] = field(kw_only=False) + + def test_field_marked_as_kwonly(self): + ####################### + # Using dataclass(kw_only=True) + @dataclass(kw_only=True) + class A: + a: int + self.assertTrue(fields(A)[0].kw_only) + + @dataclass(kw_only=True) + class A: + a: int = field(kw_only=True) + self.assertTrue(fields(A)[0].kw_only) + + @dataclass(kw_only=True) + class A: + a: int = field(kw_only=False) + self.assertFalse(fields(A)[0].kw_only) + + ####################### + # Using dataclass(kw_only=False) + @dataclass(kw_only=False) + class A: + a: int + self.assertFalse(fields(A)[0].kw_only) + + @dataclass(kw_only=False) + class A: + a: int = field(kw_only=True) + self.assertTrue(fields(A)[0].kw_only) + + @dataclass(kw_only=False) + class A: + a: int = field(kw_only=False) + self.assertFalse(fields(A)[0].kw_only) + + ####################### + # Not specifying dataclass(kw_only) + @dataclass + class A: + a: int + self.assertFalse(fields(A)[0].kw_only) + + @dataclass + class A: + a: int = field(kw_only=True) + self.assertTrue(fields(A)[0].kw_only) + + @dataclass + class A: + a: int = field(kw_only=False) + self.assertFalse(fields(A)[0].kw_only) + + def test_match_args(self): + # kw fields don't show up in __match_args__. + @dataclass(kw_only=True) + class C: + a: int + self.assertEqual(C(a=42).__match_args__, ()) + + @dataclass + class C: + a: int + b: int = field(kw_only=True) + self.assertEqual(C(42, b=10).__match_args__, ('a',)) + + def test_KW_ONLY(self): + @dataclass + class A: + a: int + _: KW_ONLY + b: int + c: int + A(3, c=5, b=4) + msg = "takes 2 positional arguments but 4 were given" + with self.assertRaisesRegex(TypeError, msg): + A(3, 4, 5) + + + @dataclass(kw_only=True) + class B: + a: int + _: KW_ONLY + b: int + c: int + B(a=3, b=4, c=5) + msg = "takes 1 positional argument but 4 were given" + with self.assertRaisesRegex(TypeError, msg): + B(3, 4, 5) + + # Explicitely make a field that follows KW_ONLY be non-keyword-only. + @dataclass + class C: + a: int + _: KW_ONLY + b: int + c: int = field(kw_only=False) + c = C(1, 2, b=3) + self.assertEqual(c.a, 1) + self.assertEqual(c.b, 3) + self.assertEqual(c.c, 2) + c = C(1, b=3, c=2) + self.assertEqual(c.a, 1) + self.assertEqual(c.b, 3) + self.assertEqual(c.c, 2) + c = C(1, b=3, c=2) + self.assertEqual(c.a, 1) + self.assertEqual(c.b, 3) + self.assertEqual(c.c, 2) + c = C(c=2, b=3, a=1) + self.assertEqual(c.a, 1) + self.assertEqual(c.b, 3) + self.assertEqual(c.c, 2) + + def test_post_init(self): + @dataclass + class A: + a: int + _: KW_ONLY + b: InitVar[int] + c: int + d: InitVar[int] + def __post_init__(self, b, d): + raise CustomError(f'{b=} {d=}') + with self.assertRaisesRegex(CustomError, 'b=3 d=4'): + A(1, c=2, b=3, d=4) + + @dataclass + class B: + a: int + _: KW_ONLY + b: InitVar[int] + c: int + d: InitVar[int] + def __post_init__(self, b, d): + self.a = b + self.c = d + b = B(1, c=2, b=3, d=4) + self.assertEqual(asdict(b), {'a': 3, 'c': 4}) + + if __name__ == '__main__': unittest.main() From c58e8e4aae2c5d2ee90509a6765688977a5eadcb Mon Sep 17 00:00:00 2001 From: "Eric V. Smith" Date: Wed, 17 Mar 2021 19:06:57 -0400 Subject: [PATCH 2/3] Added blurb. --- .../next/Library/2021-03-17-19-06-45.bpo-43532.W2Ntnm.rst | 2 ++ 1 file changed, 2 insertions(+) create mode 100644 Misc/NEWS.d/next/Library/2021-03-17-19-06-45.bpo-43532.W2Ntnm.rst diff --git a/Misc/NEWS.d/next/Library/2021-03-17-19-06-45.bpo-43532.W2Ntnm.rst b/Misc/NEWS.d/next/Library/2021-03-17-19-06-45.bpo-43532.W2Ntnm.rst new file mode 100644 index 00000000000000..11ea5f916d173f --- /dev/null +++ b/Misc/NEWS.d/next/Library/2021-03-17-19-06-45.bpo-43532.W2Ntnm.rst @@ -0,0 +1,2 @@ +Add the ability to specify keyword-only fields to dataclasses. These fields +will become keyword-only arguments to the generated __init__. From ea4c149e1150c1f6371bf537c26ff184c6e203c8 Mon Sep 17 00:00:00 2001 From: "Eric V. Smith" Date: Sun, 25 Apr 2021 12:35:04 -0400 Subject: [PATCH 3/3] Started the documentation. --- Doc/library/dataclasses.rst | 41 ++++++++++++++++++++++++++++++++----- 1 file changed, 36 insertions(+), 5 deletions(-) diff --git a/Doc/library/dataclasses.rst b/Doc/library/dataclasses.rst index 133cc0a065cace..2ed7b0b1e4d23e 100644 --- a/Doc/library/dataclasses.rst +++ b/Doc/library/dataclasses.rst @@ -46,7 +46,7 @@ directly specified in the ``InventoryItem`` definition shown above. Module-level decorators, classes, and functions ----------------------------------------------- -.. decorator:: dataclass(*, init=True, repr=True, eq=True, order=False, unsafe_hash=False, frozen=False) +.. decorator:: dataclass(*, init=True, repr=True, eq=True, order=False, unsafe_hash=False, frozen=False, kw_only=False) This function is a :term:`decorator` that is used to add generated :term:`special method`\s to classes, as described below. @@ -79,7 +79,7 @@ Module-level decorators, classes, and functions class C: ... - @dataclass(init=True, repr=True, eq=True, order=False, unsafe_hash=False, frozen=False) + @dataclass(init=True, repr=True, eq=True, order=False, unsafe_hash=False, frozen=False, kw_only=False) class C: ... @@ -161,6 +161,11 @@ Module-level decorators, classes, and functions :meth:`__setattr__` or :meth:`__delattr__` is defined in the class, then :exc:`TypeError` is raised. See the discussion below. + - ``kw_only``: If true (the default value is ``False``), then this + field will be defined in the generated :meth:`__init__` method as + keyword-only. See the :term:`parameter` glossary entry for + details. Also see the ``dataclasses.KW_ONLY`` section. + ``field``\s may optionally specify a default value, using normal Python syntax:: @@ -325,7 +330,7 @@ Module-level decorators, classes, and functions Raises :exc:`TypeError` if ``instance`` is not a dataclass instance. -.. function:: make_dataclass(cls_name, fields, *, bases=(), namespace=None, init=True, repr=True, eq=True, order=False, unsafe_hash=False, frozen=False) +.. function:: make_dataclass(cls_name, fields, *, bases=(), namespace=None, init=True, repr=True, eq=True, order=False, unsafe_hash=False, frozen=False, kw_only=False) Creates a new dataclass with name ``cls_name``, fields as defined in ``fields``, base classes as given in ``bases``, and initialized @@ -333,8 +338,8 @@ Module-level decorators, classes, and functions iterable whose elements are each either ``name``, ``(name, type)``, or ``(name, type, Field)``. If just ``name`` is supplied, ``typing.Any`` is used for ``type``. The values of ``init``, - ``repr``, ``eq``, ``order``, ``unsafe_hash``, and ``frozen`` have - the same meaning as they do in :func:`dataclass`. + ``repr``, ``eq``, ``order``, ``unsafe_hash``, ``frozen``, and + ``kw_only`` have the same meaning as they do in :func:`dataclass`. This function is not strictly required, because any Python mechanism for creating a new class with ``__annotations__`` can @@ -511,6 +516,32 @@ The generated :meth:`__init__` method for ``C`` will look like:: def __init__(self, x: int = 15, y: int = 0, z: int = 10): +Re-ordering of keyword-only parameters in __init__ +-------------------------------------------------- + +After the fields needed for :meth:`__init__` are computed, any +keyword-only fields are put after regular fields. In this example, +``Base.y`` and ``D.t`` are keyword-only fields:: + + @dataclass + class Base: + x: Any = 15.0 + _: KW_ONLY + y: int = 0 + + @dataclass + class D(Base): + z: int = 10 + t: int = field(kw_only=True, default=0) + +The generated :meth:`__init__` method for ``D`` will look like:: + + def __init__(self, x: Any = 15.0, z: int = 10, *, y: int = 0, t: int = 0): + +The relative ordering of keyword-only arguments is not changed from +the order they are in computed field :meth:`__init__` list. + + Default factory functions -------------------------