From 1a7bf3c7ea1874d15bb090000e0a60c1ed5d4770 Mon Sep 17 00:00:00 2001 From: Keith Philpott Date: Thu, 11 Jan 2024 22:33:36 -0800 Subject: [PATCH 01/11] replace appends with list comprehensions for speed --- Lib/dataclasses.py | 28 +++++++++++++--------------- 1 file changed, 13 insertions(+), 15 deletions(-) diff --git a/Lib/dataclasses.py b/Lib/dataclasses.py index 2fba32b5ffbc1e..634f7b9c376afe 100644 --- a/Lib/dataclasses.py +++ b/Lib/dataclasses.py @@ -591,13 +591,13 @@ def _init_fn(fields, std_fields, kw_only_fields, frozen, has_post_init, '__dataclass_builtins_object__': object, }) - body_lines = [] - for f in fields: - line = _field_init(f, frozen, locals, self_name, slots) + body_lines = [ + line + for f in fields # line is None means that this field doesn't require # initialization (it's a pseudo-field). Just skip it. - if line: - body_lines.append(line) + if (line := _field_init(f, frozen, locals, self_name, slots)) + ] # Does this class have a post-init function? if has_post_init: @@ -1342,11 +1342,10 @@ def _asdict_inner(obj, dict_factory): for f in fields(obj) } else: - result = [] - for f in fields(obj): - value = _asdict_inner(getattr(obj, f.name), dict_factory) - result.append((f.name, value)) - return dict_factory(result) + return dict_factory([ + (f.name, _asdict_inner(getattr(obj, f.name), dict_factory)) + for f in fields(obj) + ]) elif isinstance(obj, tuple) and hasattr(obj, '_fields'): # obj is a namedtuple. Recurse into it, but the returned # object is another namedtuple of the same type. This is @@ -1416,11 +1415,10 @@ def _astuple_inner(obj, tuple_factory): if type(obj) in _ATOMIC_TYPES: return obj elif _is_dataclass_instance(obj): - result = [] - for f in fields(obj): - value = _astuple_inner(getattr(obj, f.name), tuple_factory) - result.append(value) - return tuple_factory(result) + return tuple_factory([ + _astuple_inner(getattr(obj, f.name), tuple_factory) + for f in fields(obj) + ]) elif isinstance(obj, tuple) and hasattr(obj, '_fields'): # obj is a namedtuple. Recurse into it, but the returned # object is another namedtuple of the same type. This is From ffc6e603b15dbc563e21a16ba60a668e0ed88946 Mon Sep 17 00:00:00 2001 From: "blurb-it[bot]" <43283697+blurb-it[bot]@users.noreply.github.com> Date: Sat, 13 Jan 2024 00:22:56 +0000 Subject: [PATCH 02/11] =?UTF-8?q?=F0=9F=93=9C=F0=9F=A4=96=20Added=20by=20b?= =?UTF-8?q?lurb=5Fit.?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- .../next/Library/2024-01-13-00-22-55.gh-issue-114011.Om2h3p.rst | 1 + 1 file changed, 1 insertion(+) create mode 100644 Misc/NEWS.d/next/Library/2024-01-13-00-22-55.gh-issue-114011.Om2h3p.rst diff --git a/Misc/NEWS.d/next/Library/2024-01-13-00-22-55.gh-issue-114011.Om2h3p.rst b/Misc/NEWS.d/next/Library/2024-01-13-00-22-55.gh-issue-114011.Om2h3p.rst new file mode 100644 index 00000000000000..e9bc75445f0ab2 --- /dev/null +++ b/Misc/NEWS.d/next/Library/2024-01-13-00-22-55.gh-issue-114011.Om2h3p.rst @@ -0,0 +1 @@ +Convert several for loops in the dataclasses module to list comprehensions. From 98965fa798ccce8bede6d1a020d823121f6df136 Mon Sep 17 00:00:00 2001 From: Keith Philpott Date: Sat, 13 Jan 2024 21:02:49 -0800 Subject: [PATCH 03/11] focus on _asdict_inner --- Lib/dataclasses.py | 63 +++++++++++++++++++++++++++------------------- 1 file changed, 37 insertions(+), 26 deletions(-) diff --git a/Lib/dataclasses.py b/Lib/dataclasses.py index 634f7b9c376afe..6f2b123a42b925 100644 --- a/Lib/dataclasses.py +++ b/Lib/dataclasses.py @@ -1,3 +1,5 @@ +import time + import re import sys import copy @@ -1332,7 +1334,8 @@ class C: def _asdict_inner(obj, dict_factory): - if type(obj) in _ATOMIC_TYPES: + obj_type = type(obj) + if obj_type in _ATOMIC_TYPES: return obj elif _is_dataclass_instance(obj): # fast path for the common case @@ -1346,33 +1349,41 @@ def _asdict_inner(obj, dict_factory): (f.name, _asdict_inner(getattr(obj, f.name), dict_factory)) for f in fields(obj) ]) - elif isinstance(obj, tuple) and hasattr(obj, '_fields'): - # obj is a namedtuple. Recurse into it, but the returned - # object is another namedtuple of the same type. This is - # similar to how other list- or tuple-derived classes are - # treated (see below), but we just need to create them - # differently because a namedtuple's __init__ needs to be - # called differently (see bpo-34363). - - # I'm not using namedtuple's _asdict() - # method, because: - # - it does not recurse in to the namedtuple fields and - # convert them to dicts (using dict_factory). - # - I don't actually want to return a dict here. The main - # use case here is json.dumps, and it handles converting - # namedtuples to lists. Admittedly we're losing some - # information here when we produce a json list instead of a - # dict. Note that if we returned dicts here instead of - # namedtuples, we could no longer call asdict() on a data - # structure where a namedtuple was used as a dict key. - - return type(obj)(*[_asdict_inner(v, dict_factory) for v in obj]) - elif isinstance(obj, (list, tuple)): + elif obj_type is list: + return [_asdict_inner(v, dict_factory) for v in obj] + elif obj_type is dict: + return { + _asdict_inner(k, dict_factory): _asdict_inner(v, dict_factory) + for k, v in obj.items() + } + elif isinstance(obj, tuple): + if hasattr(obj, '_fields'): + # obj is a namedtuple. Recurse into it, but the returned + # object is another namedtuple of the same type. This is + # similar to how other list- or tuple-derived classes are + # treated (see below), but we just need to create them + # differently because a namedtuple's __init__ needs to be + # called differently (see bpo-34363). + + # I'm not using namedtuple's _asdict() + # method, because: + # - it does not recurse in to the namedtuple fields and + # convert them to dicts (using dict_factory). + # - I don't actually want to return a dict here. The main + # use case here is json.dumps, and it handles converting + # namedtuples to lists. Admittedly we're losing some + # information here when we produce a json list instead of a + # dict. Note that if we returned dicts here instead of + # namedtuples, we could no longer call asdict() on a data + # structure where a namedtuple was used as a dict key. + return type(obj)(*[_asdict_inner(v, dict_factory) for v in obj]) + else: + return tuple([_asdict_inner(v, dict_factory) for v in obj]) + elif issubclass(obj_type, list): # Assume we can create an object of this type by passing in a - # generator (which is not true for namedtuples, handled - # above). + # generator return type(obj)(_asdict_inner(v, dict_factory) for v in obj) - elif isinstance(obj, dict): + elif issubclass(obj_type, dict): if hasattr(type(obj), 'default_factory'): # obj is a defaultdict, which has a different constructor from # dict as it requires the default_factory as its first arg. From d593af4c0c68517dfe37ccc81cd26f0f7a1a2cfa Mon Sep 17 00:00:00 2001 From: Keith Philpott Date: Sat, 13 Jan 2024 21:13:34 -0800 Subject: [PATCH 04/11] subclasses --- Lib/dataclasses.py | 32 +++++++++++++++++--------------- 1 file changed, 17 insertions(+), 15 deletions(-) diff --git a/Lib/dataclasses.py b/Lib/dataclasses.py index 6f2b123a42b925..4fd8c08c122ed8 100644 --- a/Lib/dataclasses.py +++ b/Lib/dataclasses.py @@ -1356,7 +1356,20 @@ def _asdict_inner(obj, dict_factory): _asdict_inner(k, dict_factory): _asdict_inner(v, dict_factory) for k, v in obj.items() } - elif isinstance(obj, tuple): + elif obj_type is tuple: + return tuple([_asdict_inner(v, dict_factory) for v in obj]) + elif issubclass(obj_type, dict): + if hasattr(obj_type, 'default_factory'): + # obj is a defaultdict, which has a different constructor from + # dict as it requires the default_factory as its first arg. + result = obj_type(getattr(obj, 'default_factory')) + for k, v in obj.items(): + result[_asdict_inner(k, dict_factory)] = _asdict_inner(v, dict_factory) + return result + return obj_type((_asdict_inner(k, dict_factory), + _asdict_inner(v, dict_factory)) + for k, v in obj.items()) + elif issubclass(obj_type, tuple): if hasattr(obj, '_fields'): # obj is a namedtuple. Recurse into it, but the returned # object is another namedtuple of the same type. This is @@ -1376,24 +1389,13 @@ def _asdict_inner(obj, dict_factory): # dict. Note that if we returned dicts here instead of # namedtuples, we could no longer call asdict() on a data # structure where a namedtuple was used as a dict key. - return type(obj)(*[_asdict_inner(v, dict_factory) for v in obj]) + return obj_type(*[_asdict_inner(v, dict_factory) for v in obj]) else: - return tuple([_asdict_inner(v, dict_factory) for v in obj]) + return obj_type(_asdict_inner(v, dict_factory) for v in obj) elif issubclass(obj_type, list): # Assume we can create an object of this type by passing in a # generator - return type(obj)(_asdict_inner(v, dict_factory) for v in obj) - elif issubclass(obj_type, dict): - if hasattr(type(obj), 'default_factory'): - # obj is a defaultdict, which has a different constructor from - # dict as it requires the default_factory as its first arg. - result = type(obj)(getattr(obj, 'default_factory')) - for k, v in obj.items(): - result[_asdict_inner(k, dict_factory)] = _asdict_inner(v, dict_factory) - return result - return type(obj)((_asdict_inner(k, dict_factory), - _asdict_inner(v, dict_factory)) - for k, v in obj.items()) + return obj_type(_asdict_inner(v, dict_factory) for v in obj) else: return copy.deepcopy(obj) From 9f81e16d373bc1989fc03ca4d466299e3c62998f Mon Sep 17 00:00:00 2001 From: Keith Philpott Date: Sat, 13 Jan 2024 21:18:56 -0800 Subject: [PATCH 05/11] no function call for dataclass check --- Lib/dataclasses.py | 9 ++++----- 1 file changed, 4 insertions(+), 5 deletions(-) diff --git a/Lib/dataclasses.py b/Lib/dataclasses.py index 4fd8c08c122ed8..b556f17062d990 100644 --- a/Lib/dataclasses.py +++ b/Lib/dataclasses.py @@ -1,5 +1,3 @@ -import time - import re import sys import copy @@ -1337,7 +1335,8 @@ def _asdict_inner(obj, dict_factory): obj_type = type(obj) if obj_type in _ATOMIC_TYPES: return obj - elif _is_dataclass_instance(obj): + # dataclass instance + elif hasattr(obj_type, _FIELDS): # fast path for the common case if dict_factory is dict: return { @@ -1362,13 +1361,13 @@ def _asdict_inner(obj, dict_factory): if hasattr(obj_type, 'default_factory'): # obj is a defaultdict, which has a different constructor from # dict as it requires the default_factory as its first arg. - result = obj_type(getattr(obj, 'default_factory')) + result = obj_type(obj.default_factory) for k, v in obj.items(): result[_asdict_inner(k, dict_factory)] = _asdict_inner(v, dict_factory) return result return obj_type((_asdict_inner(k, dict_factory), _asdict_inner(v, dict_factory)) - for k, v in obj.items()) + for k, v in obj.items()) elif issubclass(obj_type, tuple): if hasattr(obj, '_fields'): # obj is a namedtuple. Recurse into it, but the returned From 5ca7a6fd14396e4aede76bfaef55bad158922ba0 Mon Sep 17 00:00:00 2001 From: Keith Philpott Date: Sun, 14 Jan 2024 07:51:39 -0800 Subject: [PATCH 06/11] revert order --- Lib/dataclasses.py | 22 +++++++++++----------- 1 file changed, 11 insertions(+), 11 deletions(-) diff --git a/Lib/dataclasses.py b/Lib/dataclasses.py index b556f17062d990..dee5c216dfe842 100644 --- a/Lib/dataclasses.py +++ b/Lib/dataclasses.py @@ -1357,17 +1357,6 @@ def _asdict_inner(obj, dict_factory): } elif obj_type is tuple: return tuple([_asdict_inner(v, dict_factory) for v in obj]) - elif issubclass(obj_type, dict): - if hasattr(obj_type, 'default_factory'): - # obj is a defaultdict, which has a different constructor from - # dict as it requires the default_factory as its first arg. - result = obj_type(obj.default_factory) - for k, v in obj.items(): - result[_asdict_inner(k, dict_factory)] = _asdict_inner(v, dict_factory) - return result - return obj_type((_asdict_inner(k, dict_factory), - _asdict_inner(v, dict_factory)) - for k, v in obj.items()) elif issubclass(obj_type, tuple): if hasattr(obj, '_fields'): # obj is a namedtuple. Recurse into it, but the returned @@ -1395,6 +1384,17 @@ def _asdict_inner(obj, dict_factory): # Assume we can create an object of this type by passing in a # generator return obj_type(_asdict_inner(v, dict_factory) for v in obj) + elif issubclass(obj_type, dict): + if hasattr(obj_type, 'default_factory'): + # obj is a defaultdict, which has a different constructor from + # dict as it requires the default_factory as its first arg. + result = obj_type(obj.default_factory) + for k, v in obj.items(): + result[_asdict_inner(k, dict_factory)] = _asdict_inner(v, dict_factory) + return result + return obj_type((_asdict_inner(k, dict_factory), + _asdict_inner(v, dict_factory)) + for k, v in obj.items()) else: return copy.deepcopy(obj) From 67eeeec22d7b487f704b8e575334faebcea88420 Mon Sep 17 00:00:00 2001 From: Keith Philpott Date: Mon, 15 Jan 2024 11:19:43 -0800 Subject: [PATCH 07/11] comments and cleanup --- Lib/dataclasses.py | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-) diff --git a/Lib/dataclasses.py b/Lib/dataclasses.py index dee5c216dfe842..b8c16947b540cc 100644 --- a/Lib/dataclasses.py +++ b/Lib/dataclasses.py @@ -1335,9 +1335,8 @@ def _asdict_inner(obj, dict_factory): obj_type = type(obj) if obj_type in _ATOMIC_TYPES: return obj - # dataclass instance elif hasattr(obj_type, _FIELDS): - # fast path for the common case + # dataclass instance: fast path for the common case if dict_factory is dict: return { f.name: _asdict_inner(getattr(obj, f.name), dict) @@ -1348,6 +1347,7 @@ def _asdict_inner(obj, dict_factory): (f.name, _asdict_inner(getattr(obj, f.name), dict_factory)) for f in fields(obj) ]) + # handle the builtin types first for speed; subclasses handled below elif obj_type is list: return [_asdict_inner(v, dict_factory) for v in obj] elif obj_type is dict: @@ -1380,10 +1380,6 @@ def _asdict_inner(obj, dict_factory): return obj_type(*[_asdict_inner(v, dict_factory) for v in obj]) else: return obj_type(_asdict_inner(v, dict_factory) for v in obj) - elif issubclass(obj_type, list): - # Assume we can create an object of this type by passing in a - # generator - return obj_type(_asdict_inner(v, dict_factory) for v in obj) elif issubclass(obj_type, dict): if hasattr(obj_type, 'default_factory'): # obj is a defaultdict, which has a different constructor from @@ -1395,6 +1391,10 @@ def _asdict_inner(obj, dict_factory): return obj_type((_asdict_inner(k, dict_factory), _asdict_inner(v, dict_factory)) for k, v in obj.items()) + elif issubclass(obj_type, list): + # Assume we can create an object of this type by passing in a + # generator + return obj_type(_asdict_inner(v, dict_factory) for v in obj) else: return copy.deepcopy(obj) From 1290c658e610303a2c0bedca511bea1a31de6bda Mon Sep 17 00:00:00 2001 From: Keith Philpott Date: Mon, 15 Jan 2024 11:29:08 -0800 Subject: [PATCH 08/11] remove news entry --- .../next/Library/2024-01-13-00-22-55.gh-issue-114011.Om2h3p.rst | 1 - 1 file changed, 1 deletion(-) delete mode 100644 Misc/NEWS.d/next/Library/2024-01-13-00-22-55.gh-issue-114011.Om2h3p.rst diff --git a/Misc/NEWS.d/next/Library/2024-01-13-00-22-55.gh-issue-114011.Om2h3p.rst b/Misc/NEWS.d/next/Library/2024-01-13-00-22-55.gh-issue-114011.Om2h3p.rst deleted file mode 100644 index e9bc75445f0ab2..00000000000000 --- a/Misc/NEWS.d/next/Library/2024-01-13-00-22-55.gh-issue-114011.Om2h3p.rst +++ /dev/null @@ -1 +0,0 @@ -Convert several for loops in the dataclasses module to list comprehensions. From 916715b024ad6e482feae5643ffb9741310b76f4 Mon Sep 17 00:00:00 2001 From: Keith Philpott Date: Mon, 15 Jan 2024 11:33:03 -0800 Subject: [PATCH 09/11] undo init change --- Lib/dataclasses.py | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/Lib/dataclasses.py b/Lib/dataclasses.py index b8c16947b540cc..dddec333c6c7e6 100644 --- a/Lib/dataclasses.py +++ b/Lib/dataclasses.py @@ -591,13 +591,13 @@ def _init_fn(fields, std_fields, kw_only_fields, frozen, has_post_init, '__dataclass_builtins_object__': object, }) - body_lines = [ - line - for f in fields + body_lines = [] + for f in fields: + line = _field_init(f, frozen, locals, self_name, slots) # line is None means that this field doesn't require # initialization (it's a pseudo-field). Just skip it. - if (line := _field_init(f, frozen, locals, self_name, slots)) - ] + if line: + body_lines.append(line) # Does this class have a post-init function? if has_post_init: From 14abed89a347a762f4afe64283875b275dc70794 Mon Sep 17 00:00:00 2001 From: "blurb-it[bot]" <43283697+blurb-it[bot]@users.noreply.github.com> Date: Mon, 15 Jan 2024 19:54:42 +0000 Subject: [PATCH 10/11] =?UTF-8?q?=F0=9F=93=9C=F0=9F=A4=96=20Added=20by=20b?= =?UTF-8?q?lurb=5Fit.?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- .../next/Library/2024-01-15-19-54-41.gh-issue-114087.Xic5vY.rst | 1 + 1 file changed, 1 insertion(+) create mode 100644 Misc/NEWS.d/next/Library/2024-01-15-19-54-41.gh-issue-114087.Xic5vY.rst diff --git a/Misc/NEWS.d/next/Library/2024-01-15-19-54-41.gh-issue-114087.Xic5vY.rst b/Misc/NEWS.d/next/Library/2024-01-15-19-54-41.gh-issue-114087.Xic5vY.rst new file mode 100644 index 00000000000000..a5d8de8b66b569 --- /dev/null +++ b/Misc/NEWS.d/next/Library/2024-01-15-19-54-41.gh-issue-114087.Xic5vY.rst @@ -0,0 +1 @@ +Speed up dataclasses.asdict by up to 1.35x. From 75d70529f6eaaf2911c6e7410dcc0c1f5d6862c2 Mon Sep 17 00:00:00 2001 From: Keith Philpott Date: Tue, 16 Jan 2024 21:14:58 -0800 Subject: [PATCH 11/11] update news --- .../next/Library/2024-01-15-19-54-41.gh-issue-114087.Xic5vY.rst | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Misc/NEWS.d/next/Library/2024-01-15-19-54-41.gh-issue-114087.Xic5vY.rst b/Misc/NEWS.d/next/Library/2024-01-15-19-54-41.gh-issue-114087.Xic5vY.rst index a5d8de8b66b569..68b27a7b0c9f6c 100644 --- a/Misc/NEWS.d/next/Library/2024-01-15-19-54-41.gh-issue-114087.Xic5vY.rst +++ b/Misc/NEWS.d/next/Library/2024-01-15-19-54-41.gh-issue-114087.Xic5vY.rst @@ -1 +1 @@ -Speed up dataclasses.asdict by up to 1.35x. +Speed up ``dataclasses.asdict`` up to 1.35x.