Skip to content
Closed
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
23 changes: 23 additions & 0 deletions apps/unused_code/README.md
Original file line number Diff line number Diff line change
Expand Up @@ -34,3 +34,26 @@ To run from CLI with `--exclude-files`
```bash
pyutils-unusedcode --exclude-files 'my_exclude_file1.py,my_exclude_file2.py'
```

## Excluding single functions in your code
To skip single functions in your target repository you can add an inline comment to the function definition. The comment should match `# skip-unused-code`

### Example:

Given a target file main.py
```python
def tmp1(): # skip-unused-code
pass

def tmp2(
x,
y,
z
): # skip-unused-code
```
Running this tool would exclude both functions `tmp1` and `tmp2`
```bash
pyutils-unusedcode -v
2025-01-01T00:00:00.1 apps.unused_code.unused_code DEBUG Skipping function due to comment: tmp1
2025-01-01T00:00:00.2 apps.unused_code.unused_code DEBUG Skipping function due to comment: tmp2
```
79 changes: 77 additions & 2 deletions apps/unused_code/unused_code.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,7 +5,9 @@
import os
import subprocess
import sys
import tokenize
Comment thread
myakove marked this conversation as resolved.
from concurrent.futures import Future, ThreadPoolExecutor, as_completed
from io import StringIO
from typing import Any, Iterable

import click
Expand All @@ -14,6 +16,60 @@
from apps.utils import ListParamType, all_python_files, get_util_config

LOGGER = get_logger(name=__name__)
SKIP_COMMENT = "# skip-unused-code"
Comment thread
lugi0 marked this conversation as resolved.


def extract_inline_function_comments(source_code: str) -> dict[str, list[str]]:
Copy link
Copy Markdown
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

found this to be more simple
https://pypi.org/project/ast-comments/

Copy link
Copy Markdown
Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Thanks for the suggestion! I wasn't familiar with this library, so I appreciate you bringing it up. My PR was more about implementing the logic directly, but if the preferred approach is to use that library instead, feel free to close this and go ahead with that direction.

Copy link
Copy Markdown
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Why?
You can make this PR to use it.

Copy link
Copy Markdown
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

@lugi0
Please update if you continue with this PR.
I prefer you will :)

Copy link
Copy Markdown
Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

@myakove as I said, if you wish to implement this functionality in a different way feel free to close the PR and move ahead with the proposed solution; you can also merge the PR and then update the code as you see fit, if that is preferable. I unfortunately do not have the time to reimplement the logic using a library I am not familiar with in the short term.

Copy link
Copy Markdown
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

@myakove seems interesting. I will explore it.

Copy link
Copy Markdown
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

@lugi0 sure
@dbasunag please take it from here

Copy link
Copy Markdown
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

@myakove I will. - if you don't already create a patch and fix it, before I get a chance!

"""
Finds *only* inline comments for function definition that match `SKIP_COMMENT` and returns them
"""
# Tokenize the source code to find comments
tokens = tokenize.generate_tokens(StringIO(source_code).readline)

# To store the comments for each function
prev_token = None
comments = {}
def_tok = False

# Process the tokens and extract comments
for token in tokens:
tok_type, tok_string, _, _, _ = token

# Detect the start of a new function definition
if tok_type == tokenize.NAME and tok_string == "def":
def_tok = True

elif tok_type == tokenize.NAME and def_tok:
# First "NAME" token after a "def" will be the function name
prev_token = token
def_tok = False

elif tok_type == tokenize.NEWLINE and prev_token:
# we found a function name and this is the first logical newline after it
# if no comment has been found it means that anything that comes after could be within the function
# or outside of it, which is outside the scope of what we are looking for. we can empty prev_token.
# note that tokenize.NL would be a different (non-logical) newline, e.g. a multi-line function def
# which is thus still handled correctly.
# Not handling this here can cause comments outside the scope of the function to be mishandled, e.g.
# ------------
# def foo():
# pass
#
# # my-comment
# def bar():
# ------------
# would return "# my-comment" as a foo() comment
prev_token = None

# If this is the comment we look for, and it comes after a function definition
elif tok_type == tokenize.COMMENT and prev_token and tok_string == SKIP_COMMENT:
LOGGER.debug(f"found comment for function def: {prev_token.line.strip()}")
LOGGER.debug(f"comment is: {tok_string}")
func_name = prev_token.string
comments[func_name] = [tok_string]
prev_token = None

return comments


def is_fixture_autouse(func: ast.FunctionDef) -> bool:
Expand Down Expand Up @@ -53,14 +109,22 @@ def is_ignore_function_list(ignore_prefix_list: list[str], function: ast.Functio


def process_file(py_file: str, func_ignore_prefix: list[str], file_ignore_list: list[str]) -> str:
if os.path.basename(py_file) in file_ignore_list:
if os.path.relpath(py_file) in file_ignore_list:
Copy link
Copy Markdown
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

This may break existing usage. Expectation is, you would be running unused code check from root directory of your repo and if you want to ignore some file, you would be passing their names. Using relative file path can be problematic here.

Copy link
Copy Markdown
Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

This is IMHO incorrect.
basename would return the tail of https://docs.python.org/3/library/os.path.html#os.path.split, which is assumed to be the file name of a given path.
This means that passing foo/xyz.py would return xyz.py, which might not be what the user has intended to exclude, e.g. in a scenario like

project/
├─ bar/
│  ├─ xyz.py
├─ foo/
│  ├─ xyz.py
├─ xyz.py

passing foo/xyz.py when running from the project root would actually exclude all xyz.py files (project/xyz.py, project/bar/xyz.py and project/foo/xyz.py).
relpath returns the relative path from the current directory - assuming you are excluding files based on their paths from the project root as is the current assumption, you would end up excluding the specific file rather than all of them

LOGGER.debug(f"Skipping file: {py_file}")
return ""

with open(py_file) as fd:
tree = ast.parse(source=fd.read())
Copy link
Copy Markdown
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

from ast_comments import parse

Suggested change
tree = ast.parse(source=fd.read())
tree = parse(source=fd.read())


with open(py_file) as fd:
Copy link
Copy Markdown
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

The file was already opened in line 116, can you collect the comments there?

Copy link
Copy Markdown
Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

No, file.read() returns the entire file contents and any subsequent read in the same context would return ''; the file has to be closed and reopened before its contents can be read again

Copy link
Copy Markdown
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

save d.read() and re-use

comments = extract_inline_function_comments(source_code=fd.read())

found = []
for func in _iter_functions(tree=tree):
if func.name in comments.keys():
LOGGER.debug(f"Skipping function due to comment: {func.name}")
continue

Copy link
Copy Markdown
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

if any(getattr(item, "value", None) == "# skip-unused-code" for item in func.body):
            continue

if func_ignore_prefix and is_ignore_function_list(ignore_prefix_list=func_ignore_prefix, function=func):
LOGGER.debug(f"Skipping function: {func.name}")
continue
Expand All @@ -81,12 +145,23 @@ def process_file(py_file: str, func_ignore_prefix: list[str], file_ignore_list:
if _line.strip().startswith("#"):
continue

if _line.strip().startswith("assert"):
Copy link
Copy Markdown
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Can you please elaborate? Are we currently counting such calls?

Copy link
Copy Markdown
Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Currently the tool would count a function as being used even if its name was only referenced in an assert statement, e.g. see the test cases of this PR and imagine a scenario in which you are doing
assert "check_me" in output.
In this case "check_me" would suddenly stop being in the output, because the name of the function is now found, and even though the function itself is never actually used the tool incorrectly reports otherwise.
You can try running poetry run pytest with this line commented out to see it in action, but with this a function is only reported as used if it is actually called from some other place other than the assert statement(s).
As I wrote in the earlier comment it is an opinionated call, but I think it is fair to assume that the function name being found does not equal the function itself being used.
One could be more fancy with trying to figure out if the function name is encased in a string delimiter to rule out other such instances, but it would require a fair bit of work to get right

Copy link
Copy Markdown
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Nice catch, I think the proper way t solve it is to use grep -E

git grep -wE '{func.name}(.*)' | grep -v 'def'

this will also replace
if f"def {func.name}" in _line: line

Copy link
Copy Markdown
Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

The grep will not work, as it will still match the function name in any given context (e.g. print("my_function")). You would need to do negative lookbehind to ensure you're not in a string, but as the wise sage once said:

Some people, when confronted with a problem, think “I know, I’ll use regular expressions.” Now they have two problems.

The only way to properly handle it would be to special case it in the ast parse by matching the instance type with ast.Call, but even then I am not 100% sure of its correctness in all cases. Feel free to improve this as you see fit!

# if the function is only called from a test assert statement do not count it
continue

if func.name in _line:
used = True
break

if not used:
return f"{os.path.relpath(py_file)}:{func.name}:{func.lineno}:{func.col_offset} Is not used anywhere in the code."
# store all unused functions in the file
found.append(
f"{os.path.relpath(py_file)}:{func.name}:{func.lineno}:{func.col_offset} Is not used anywhere in the code.\n"
)

# return all unused functions if any
if len(found) > 0:
return "".join(found)

return ""

Expand Down
2 changes: 1 addition & 1 deletion pyproject.toml
Original file line number Diff line number Diff line change
Expand Up @@ -26,7 +26,7 @@ output-format = "grouped"
lint.extend-select = ["I"]

[tool.ruff.format]
exclude = [".git", ".venv", ".mypy_cache", ".tox", "__pycache__"]
exclude = [".git", ".venv", ".mypy_cache", ".tox", "__pycache__", "unused_code_file_for_test.py"]

[tool.poetry]
name = "python-utility-scripts"
Expand Down
33 changes: 30 additions & 3 deletions tests/unused_code/test_unused_code.py
Original file line number Diff line number Diff line change
Expand Up @@ -14,21 +14,48 @@ def test_unused_code():


def test_unused_code_file_list():
result = get_cli_runner().invoke(get_unused_functions, '--exclude-files "unused_code_file_for_test.py"')
result = get_cli_runner().invoke(
get_unused_functions, '--exclude-files "tests/unused_code/unused_code_file_for_test.py"'
)
LOGGER.info(f"Result output: {result.output}, exit code: {result.exit_code}, exceptions: {result.exception}")
assert result.exit_code == 0
assert "Is not used anywhere in the code" not in result.output


def test_unused_code_wrong_file_list():
result = get_cli_runner().invoke(get_unused_functions, '--exclude-files "unused_code_file_for_test.py"')
LOGGER.info(f"Result output: {result.output}, exit code: {result.exit_code}, exceptions: {result.exception}")
assert result.exit_code == 1
assert "Is not used anywhere in the code" in result.output


def test_unused_code_function_list_exclude_all():
result = get_cli_runner().invoke(get_unused_functions, '--exclude-function-prefixes "unused_code_"')
LOGGER.info(f"Result output: {result.output}, exit code: {result.exit_code}, exceptions: {result.exception}")
assert result.exit_code == 0
assert "Is not used anywhere in the code" not in result.output
# No function def that starts with "unused_code_"
assert ":unused_code_" not in result.output
assert "check_me" in result.output
assert "check_me_too" in result.output
assert "foo" not in result.output
assert "bar" not in result.output


def test_unused_code_function_list_exclude():
result = get_cli_runner().invoke(get_unused_functions, '--exclude-function-prefixes "unused_code_check_function"')
LOGGER.info(f"Result output: {result.output}, exit code: {result.exit_code}, exceptions: {result.exception}")
assert result.exit_code == 1
assert "Is not used anywhere in the code" in result.output
assert "unused_code_check_fail" in result.output
assert "unused_code_check_file" in result.output


def test_skip_comment():
result = get_cli_runner().invoke(get_unused_functions)
LOGGER.info(f"Result output: {result.output}, exit code: {result.exit_code}, exceptions: {result.exception}")
assert result.exit_code == 1
assert "unused_code_check_fail" in result.output
assert "unused_code_check_file" in result.output
assert "check_me" in result.output
assert "check_me_too" in result.output
assert "foo" not in result.output
assert "bar" not in result.output
25 changes: 25 additions & 0 deletions tests/unused_code/unused_code_file_for_test.py
Original file line number Diff line number Diff line change
@@ -1,6 +1,31 @@
from typing import Any


def unused_code_check_fail():
pass


def unused_code_check_file():
pass


def foo(): # skip-unused-code
pass


def bar(
x: Any,
y: Any,
z: Any
) -> None: # skip-unused-code
pass


def check_me():
# skip-unused-code
pass


# skip-unused-code
def check_me_too():
pass