From 69df59096281d0006b0861db79957e6d306c5301 Mon Sep 17 00:00:00 2001 From: Zac Hatfield-Dodds Date: Thu, 23 Jun 2022 00:45:49 -0700 Subject: [PATCH 1/5] Implement late-binding loop check --- README.rst | 3 ++ bugbear.py | 91 +++++++++++++++++++++++++++++++++++++++++++ tests/b023.py | 56 ++++++++++++++++++++++++++ tests/test_bugbear.py | 22 +++++++++++ 4 files changed, 172 insertions(+) create mode 100644 tests/b023.py diff --git a/README.rst b/README.rst index bde5850..600e96c 100644 --- a/README.rst +++ b/README.rst @@ -150,6 +150,9 @@ No exceptions will be suppressed and therefore this context manager is redundant N.B. this rule currently does not flag `suppress` calls to avoid potential false positives due to similarly named user-defined functions. +**B023**: Functions defined inside a loop must not use variables redefined in +the loop, because `late-binding closures are a classic gotcha +`__. Opinionated warnings ~~~~~~~~~~~~~~~~~~~~ diff --git a/bugbear.py b/bugbear.py index f1dcf9e..8e7ae0c 100644 --- a/bugbear.py +++ b/bugbear.py @@ -26,6 +26,7 @@ ast.DictComp, ast.GeneratorExp, ) +FUNCTION_NODES = (ast.AsyncFunctionDef, ast.FunctionDef, ast.Lambda) Context = namedtuple("Context", ["node", "stack"]) @@ -198,6 +199,21 @@ def _to_name_str(node): return _to_name_str(node.value) +def names_from_assignments(node): + for name in ast.walk(node): + if isinstance(name, ast.Name): + yield name.id + elif isinstance(name, ast.arg): + yield name.arg + + +def children_in_scope(node): + yield node + if not isinstance(node, FUNCTION_NODES): + for child in ast.iter_child_nodes(node): + yield from children_in_scope(child) + + def _typesafe_issubclass(cls, class_or_tuple): try: return issubclass(cls, class_or_tuple) @@ -348,6 +364,31 @@ def visit_Assign(self, node): def visit_For(self, node): self.check_for_b007(node) self.check_for_b020(node) + self.check_for_b023(node) + self.generic_visit(node) + + def visit_AsyncFor(self, node): + self.check_for_b023(node) + self.generic_visit(node) + + def visit_While(self, node): + self.check_for_b023(node) + self.generic_visit(node) + + def visit_ListComp(self, node): + self.check_for_b023(node) + self.generic_visit(node) + + def visit_SetComp(self, node): + self.check_for_b023(node) + self.generic_visit(node) + + def visit_DictComp(self, node): + self.check_for_b023(node) + self.generic_visit(node) + + def visit_GeneratorExp(self, node): + self.check_for_b023(node) self.generic_visit(node) def visit_Assert(self, node): @@ -520,6 +561,47 @@ def check_for_b020(self, node): n = targets.names[name][0] self.errors.append(B020(n.lineno, n.col_offset, vars=(name,))) + def check_for_b023(self, loop_node): + """Check that functions (including lambdas) do not use loop variables. + + https://docs.python-guide.org/writing/gotchas/#late-binding-closures from + functions - usually but not always lambdas - defined inside a loop are a + classic source of bugs. + + For each use of a variable inside a function defined inside a loop, we + emit a warning if that variable is reassigned on each loop iteration + (outside the function). This includes but is not limited to explicit + loop variables like the `x` in `for x in range(3):`. + """ + # Because most loops don't contain functions, it's most efficient to + # implement this "backwards": first we find all the candidate variable + # uses, and then if there are any we check for assignment of those names + # inside the loop body. + suspicious_variables = [] + for node in ast.walk(loop_node): + if isinstance(node, FUNCTION_NODES): + argnames = set(names_from_assignments(node.args)) + for name in ast.walk(node): + if isinstance(name, ast.Name) and name.id not in argnames: + err = B023(name.lineno, name.col_offset, vars=(name.id,)) + suspicious_variables.append(err) + + if suspicious_variables: + reassigned_in_loop = set(self._get_assigned_names(loop_node)) + + for err in sorted(suspicious_variables): + if reassigned_in_loop.issuperset(err.vars): + self.errors.append(err) + + def _get_assigned_names(self, loop_node): + loop_targets = (ast.For, ast.AsyncFor, ast.comprehension) + for node in children_in_scope(loop_node): + if isinstance(node, (ast.Assign)): + for child in node.targets: + yield from names_from_assignments(child) + if isinstance(node, loop_targets + (ast.AnnAssign, ast.AugAssign)): + yield from names_from_assignments(node.target) + def check_for_b904(self, node): """Checks `raise` without `from` inside an `except` clause. @@ -1041,6 +1123,15 @@ def visit_Lambda(self, node): ) ) +B023 = Error( + message=( + "B023 Function definition does not bind loop variable {!r}. " + "This means that invoking functions defined inside the loop will " + "always use the value of this variable from the last iteration. See " + "https://docs.python-guide.org/writing/gotchas/#late-binding-closures" + ) +) + # Warnings disabled by default. B901 = Error( message=( diff --git a/tests/b023.py b/tests/b023.py new file mode 100644 index 0000000..fe78e4e --- /dev/null +++ b/tests/b023.py @@ -0,0 +1,56 @@ +""" +Should emit: +B023 - on lines 12, 13, 16, 28, 29, 30, 31, 40, 42, 50, 51, 52, 53. +""" + +functions = [] +z = 0 + +for x in range(3): + y = x + 1 + # Subject to late-binding problems + functions.append(lambda: x) + functions.append(lambda: y) # not just the loop var + + def f_bad_1(): + return x + + # Actually OK + functions.append(lambda x: x * 2) + functions.append(lambda x=x: x) + functions.append(lambda: z) # OK because not assigned in the loop + + def f_ok_1(x): + return x * 2 + + +def check_inside_functions_too(): + ls = [lambda: x for x in range(2)] + st = {lambda: x for x in range(2)} + gn = (lambda: x for x in range(2)) + dt = {x: lambda: x for x in range(2)} + + +async def pointless_async_iterable(): + yield 1 + + +async def container_for_problems(): + async for x in pointless_async_iterable(): + functions.append(lambda: x) + + [lambda: x async for x in pointless_async_iterable()] + + +a = 10 +b = 0 +while True: + a = a_ = a - 1 + b += 1 + functions.append(lambda: a) + functions.append(lambda: a_) + functions.append(lambda: b) + functions.append(lambda: c) # not a name error because of late binding! + c: bool = a > 3 + if not c: + break diff --git a/tests/test_bugbear.py b/tests/test_bugbear.py index 12469e6..31686c6 100644 --- a/tests/test_bugbear.py +++ b/tests/test_bugbear.py @@ -33,6 +33,7 @@ B020, B021, B022, + B023, B901, B902, B903, @@ -325,6 +326,27 @@ def test_b022(self): errors = list(bbc.run()) self.assertEqual(errors, self.errors(B022(8, 0))) + def test_b023(self): + filename = Path(__file__).absolute().parent / "b023.py" + bbc = BugBearChecker(filename=str(filename)) + errors = list(bbc.run()) + expected = self.errors( + B023(12, 29, vars=("x",)), + B023(13, 29, vars=("y",)), + B023(16, 15, vars=("x",)), + B023(28, 18, vars=("x",)), + B023(29, 18, vars=("x",)), + B023(30, 18, vars=("x",)), + B023(31, 21, vars=("x",)), + B023(40, 33, vars=("x",)), + B023(42, 13, vars=("x",)), + B023(50, 29, vars=("a",)), + B023(51, 29, vars=("a_",)), + B023(52, 29, vars=("b",)), + B023(53, 29, vars=("c",)), + ) + self.assertEqual(errors, expected) + def test_b901(self): filename = Path(__file__).absolute().parent / "b901.py" bbc = BugBearChecker(filename=str(filename)) From 765d2068320ab8bcbfc0260a2db551642b26e45d Mon Sep 17 00:00:00 2001 From: Zac Hatfield-Dodds Date: Thu, 23 Jun 2022 17:36:25 -0700 Subject: [PATCH 2/5] Dedupe warning with nested loops --- bugbear.py | 14 +++++--------- tests/b023.py | 7 ++++++- tests/test_bugbear.py | 2 ++ 3 files changed, 13 insertions(+), 10 deletions(-) diff --git a/bugbear.py b/bugbear.py index 8e7ae0c..0c3b15c 100644 --- a/bugbear.py +++ b/bugbear.py @@ -236,6 +236,7 @@ class BugBearVisitor(ast.NodeVisitor): contexts = attr.ib(default=attr.Factory(list)) NODE_WINDOW_SIZE = 4 + _b023_seen = attr.ib(factory=set, init=False) if False: # Useful for tracing what the hell is going on. @@ -584,7 +585,9 @@ def check_for_b023(self, loop_node): for name in ast.walk(node): if isinstance(name, ast.Name) and name.id not in argnames: err = B023(name.lineno, name.col_offset, vars=(name.id,)) - suspicious_variables.append(err) + if err not in self._b023_seen: + self._b023_seen.add(err) # dedupe across nested loops + suspicious_variables.append(err) if suspicious_variables: reassigned_in_loop = set(self._get_assigned_names(loop_node)) @@ -1123,14 +1126,7 @@ def visit_Lambda(self, node): ) ) -B023 = Error( - message=( - "B023 Function definition does not bind loop variable {!r}. " - "This means that invoking functions defined inside the loop will " - "always use the value of this variable from the last iteration. See " - "https://docs.python-guide.org/writing/gotchas/#late-binding-closures" - ) -) +B023 = Error(message="B023 Function definition does not bind loop variable {!r}.") # Warnings disabled by default. B901 = Error( diff --git a/tests/b023.py b/tests/b023.py index fe78e4e..9426691 100644 --- a/tests/b023.py +++ b/tests/b023.py @@ -1,6 +1,6 @@ """ Should emit: -B023 - on lines 12, 13, 16, 28, 29, 30, 31, 40, 42, 50, 51, 52, 53. +B023 - on lines 12, 13, 16, 28, 29, 30, 31, 40, 42, 50, 51, 52, 53, 61. """ functions = [] @@ -54,3 +54,8 @@ async def container_for_problems(): c: bool = a > 3 if not c: break + +# Nested loops should not duplicate reports +for j in range(2): + for k in range(3): + lambda: j * k diff --git a/tests/test_bugbear.py b/tests/test_bugbear.py index 31686c6..e73fbf6 100644 --- a/tests/test_bugbear.py +++ b/tests/test_bugbear.py @@ -344,6 +344,8 @@ def test_b023(self): B023(51, 29, vars=("a_",)), B023(52, 29, vars=("b",)), B023(53, 29, vars=("c",)), + B023(61, 16, vars=("j",)), + B023(61, 20, vars=("k",)), ) self.assertEqual(errors, expected) From b764b3761c6501d126c41f5a142b2e0afb62aa87 Mon Sep 17 00:00:00 2001 From: Zac Hatfield-Dodds Date: Thu, 23 Jun 2022 22:33:17 -0700 Subject: [PATCH 3/5] Only trigger for Load names --- bugbear.py | 6 +++++- tests/b023.py | 9 ++++++++- tests/test_bugbear.py | 1 + 3 files changed, 14 insertions(+), 2 deletions(-) diff --git a/bugbear.py b/bugbear.py index 0c3b15c..b2b25bf 100644 --- a/bugbear.py +++ b/bugbear.py @@ -583,7 +583,11 @@ def check_for_b023(self, loop_node): if isinstance(node, FUNCTION_NODES): argnames = set(names_from_assignments(node.args)) for name in ast.walk(node): - if isinstance(name, ast.Name) and name.id not in argnames: + if ( + isinstance(name, ast.Name) + and name.id not in argnames + and isinstance(name.ctx, ast.Load) + ): err = B023(name.lineno, name.col_offset, vars=(name.id,)) if err not in self._b023_seen: self._b023_seen.add(err) # dedupe across nested loops diff --git a/tests/b023.py b/tests/b023.py index 9426691..7d6f479 100644 --- a/tests/b023.py +++ b/tests/b023.py @@ -1,6 +1,6 @@ """ Should emit: -B023 - on lines 12, 13, 16, 28, 29, 30, 31, 40, 42, 50, 51, 52, 53, 61. +B023 - on lines 12, 13, 16, 28, 29, 30, 31, 40, 42, 50, 51, 52, 53, 61, 68. """ functions = [] @@ -59,3 +59,10 @@ async def container_for_problems(): for j in range(2): for k in range(3): lambda: j * k + + +for j, k, l in [(1, 2, 3)]: + + def f(): + j = None # OK because it's an assignment + [l for k in range(2)] # error for l, not for k diff --git a/tests/test_bugbear.py b/tests/test_bugbear.py index e73fbf6..750e96f 100644 --- a/tests/test_bugbear.py +++ b/tests/test_bugbear.py @@ -346,6 +346,7 @@ def test_b023(self): B023(53, 29, vars=("c",)), B023(61, 16, vars=("j",)), B023(61, 20, vars=("k",)), + B023(68, 9, vars=("l",)), ) self.assertEqual(errors, expected) From b3dd9721fb3dd1aadd8a2950422199bdaad8c995 Mon Sep 17 00:00:00 2001 From: Zac Hatfield-Dodds Date: Fri, 24 Jun 2022 00:04:13 -0700 Subject: [PATCH 4/5] Handle attribute/item assignment --- bugbear.py | 18 +++++++++++------- tests/b023.py | 5 +++++ 2 files changed, 16 insertions(+), 7 deletions(-) diff --git a/bugbear.py b/bugbear.py index b2b25bf..4a26319 100644 --- a/bugbear.py +++ b/bugbear.py @@ -199,12 +199,14 @@ def _to_name_str(node): return _to_name_str(node.value) -def names_from_assignments(node): - for name in ast.walk(node): - if isinstance(name, ast.Name): - yield name.id - elif isinstance(name, ast.arg): - yield name.arg +def names_from_assignments(assign_target): + if isinstance(assign_target, ast.Name): + yield assign_target.id + elif isinstance(assign_target, ast.Starred): + yield from names_from_assignments(assign_target.value) + elif isinstance(assign_target, (ast.List, ast.Tuple)): + for child in assign_target.elts: + yield from names_from_assignments(child) def children_in_scope(node): @@ -581,7 +583,9 @@ def check_for_b023(self, loop_node): suspicious_variables = [] for node in ast.walk(loop_node): if isinstance(node, FUNCTION_NODES): - argnames = set(names_from_assignments(node.args)) + argnames = { + arg.arg for arg in ast.walk(node.args) if isinstance(arg, ast.arg) + } for name in ast.walk(node): if ( isinstance(name, ast.Name) diff --git a/tests/b023.py b/tests/b023.py index 7d6f479..e5dc3ad 100644 --- a/tests/b023.py +++ b/tests/b023.py @@ -66,3 +66,8 @@ async def container_for_problems(): def f(): j = None # OK because it's an assignment [l for k in range(2)] # error for l, not for k + + assert a and functions + + a.attribute = 1 # modifying an attribute doesn't make it a loop variable + functions[0] = lambda: None # same for an element From 2c8a06d3ab6d17b827a86e76cd272a86098739c9 Mon Sep 17 00:00:00 2001 From: Zac Hatfield-Dodds Date: Fri, 24 Jun 2022 00:39:41 -0700 Subject: [PATCH 5/5] Allow explictly-captured vars i.e. don't complain about the default value of arguments, since that's an explicit solution to late binding! --- bugbear.py | 6 +++++- tests/b023.py | 5 +++++ 2 files changed, 10 insertions(+), 1 deletion(-) diff --git a/bugbear.py b/bugbear.py index 4a26319..58c3432 100644 --- a/bugbear.py +++ b/bugbear.py @@ -586,7 +586,11 @@ def check_for_b023(self, loop_node): argnames = { arg.arg for arg in ast.walk(node.args) if isinstance(arg, ast.arg) } - for name in ast.walk(node): + if isinstance(node, ast.Lambda): + body_nodes = ast.walk(node.body) + else: + body_nodes = itertools.chain.from_iterable(map(ast.walk, node.body)) + for name in body_nodes: if ( isinstance(name, ast.Name) and name.id not in argnames diff --git a/tests/b023.py b/tests/b023.py index e5dc3ad..542e46e 100644 --- a/tests/b023.py +++ b/tests/b023.py @@ -71,3 +71,8 @@ def f(): a.attribute = 1 # modifying an attribute doesn't make it a loop variable functions[0] = lambda: None # same for an element + +for var in range(2): + + def explicit_capture(captured=var): + return captured