diff --git a/CHANGES.md b/CHANGES.md index ddf026a481b..0742a2d7562 100644 --- a/CHANGES.md +++ b/CHANGES.md @@ -17,6 +17,7 @@ - Fix bug where module docstrings would be treated as normal strings if preceeded by comments (#4764) - Fix bug where python 3.12 generics syntax split line happens weirdly (#4777) +- Standardize type comments to form `# type: ` (#4645) ### Configuration diff --git a/docs/the_black_code_style/future_style.md b/docs/the_black_code_style/future_style.md index f5651c20d4a..97af09de389 100644 --- a/docs/the_black_code_style/future_style.md +++ b/docs/the_black_code_style/future_style.md @@ -29,6 +29,8 @@ Currently, the following features are included in the preview style: - `fix_fmt_skip_in_one_liners`: Fix `# fmt: skip` behaviour on one-liner declarations, such as `def foo(): return "mock" # fmt: skip`, where previously the declaration would have been incorrectly collapsed. +- `standardize_type_comments`: Format type comments which have zero or more spaces + between `#` and `type:` or between `type:` and value to `# type: (value)` - `wrap_comprehension_in`: Wrap the `in` clause of list and dictionary comprehensions across lines if it would otherwise exceed the maximum line length. - `remove_parens_around_except_types`: Remove parentheses around multiple exception diff --git a/src/black/comments.py b/src/black/comments.py index 8c8866860d2..25c20a8e677 100644 --- a/src/black/comments.py +++ b/src/black/comments.py @@ -11,6 +11,7 @@ WHITESPACE, container_of, first_leaf_of, + is_type_comment_string, make_simple_prefix, preceding_leaf, syms, @@ -50,7 +51,7 @@ class ProtoComment: leading_whitespace: str # leading whitespace before the comment, if any -def generate_comments(leaf: LN) -> Iterator[Leaf]: +def generate_comments(leaf: LN, mode: Mode) -> Iterator[Leaf]: """Clean the prefix of the `leaf` and generate comments from it, if any. Comments in lib2to3 are shoved into the whitespace prefix. This happens @@ -70,7 +71,9 @@ def generate_comments(leaf: LN) -> Iterator[Leaf]: are emitted with a fake STANDALONE_COMMENT token identifier. """ total_consumed = 0 - for pc in list_comments(leaf.prefix, is_endmarker=leaf.type == token.ENDMARKER): + for pc in list_comments( + leaf.prefix, is_endmarker=leaf.type == token.ENDMARKER, mode=mode + ): total_consumed = pc.consumed prefix = make_simple_prefix(pc.newlines, pc.form_feed) yield Leaf(pc.type, pc.value, prefix=prefix) @@ -78,7 +81,7 @@ def generate_comments(leaf: LN) -> Iterator[Leaf]: @lru_cache(maxsize=4096) -def list_comments(prefix: str, *, is_endmarker: bool) -> list[ProtoComment]: +def list_comments(prefix: str, *, is_endmarker: bool, mode: Mode) -> list[ProtoComment]: """Return a list of :class:`ProtoComment` objects parsed from the given `prefix`.""" result: list[ProtoComment] = [] if not prefix or "#" not in prefix: @@ -109,7 +112,7 @@ def list_comments(prefix: str, *, is_endmarker: bool) -> list[ProtoComment]: comment_type = token.COMMENT # simple trailing comment else: comment_type = STANDALONE_COMMENT - comment = make_comment(line) + comment = make_comment(line, mode=mode) result.append( ProtoComment( type=comment_type, @@ -140,7 +143,7 @@ def normalize_trailing_prefix(leaf: LN, total_consumed: int) -> None: leaf.prefix = "" -def make_comment(content: str) -> str: +def make_comment(content: str, mode: Mode) -> str: """Return a consistently formatted comment from the given `content` string. All comments (except for "##", "#!", "#:", '#'") should have a single @@ -157,9 +160,18 @@ def make_comment(content: str) -> str: if ( content and content[0] == "\N{NO-BREAK SPACE}" - and not content.lstrip().startswith("type:") + and not is_type_comment_string("# " + content.lstrip(), mode=mode) ): content = " " + content[1:] # Replace NBSP by a simple space + if ( + Preview.standardize_type_comments in mode + and content + and "\N{NO-BREAK SPACE}" not in content + and is_type_comment_string("#" + content, mode=mode) + ): + type_part, value_part = content.split(":", 1) + content = type_part.strip() + ": " + value_part.strip() + if content and content[0] not in COMMENT_EXCEPTIONS: content = " " + content return "#" + content @@ -183,7 +195,7 @@ def convert_one_fmt_off_pair( """ for leaf in node.leaves(): previous_consumed = 0 - for comment in list_comments(leaf.prefix, is_endmarker=False): + for comment in list_comments(leaf.prefix, is_endmarker=False, mode=mode): is_fmt_off = comment.value in FMT_OFF is_fmt_skip = _contains_fmt_skip_comment(comment.value, mode) if (not is_fmt_off and not is_fmt_skip) or ( @@ -273,13 +285,13 @@ def generate_ignored_nodes( return container: Optional[LN] = container_of(leaf) while container is not None and container.type != token.ENDMARKER: - if is_fmt_on(container): + if is_fmt_on(container, mode=mode): return # fix for fmt: on in children - if children_contains_fmt_on(container): + if children_contains_fmt_on(container, mode=mode): for index, child in enumerate(container.children): - if isinstance(child, Leaf) and is_fmt_on(child): + if isinstance(child, Leaf) and is_fmt_on(child, mode=mode): if child.type in CLOSING_BRACKETS: # This means `# fmt: on` is placed at a different bracket level # than `# fmt: off`. This is an invalid use, but as a courtesy, @@ -290,12 +302,14 @@ def generate_ignored_nodes( if ( child.type == token.INDENT and index < len(container.children) - 1 - and children_contains_fmt_on(container.children[index + 1]) + and children_contains_fmt_on( + container.children[index + 1], mode=mode + ) ): # This means `# fmt: on` is placed right after an indentation # level, and we shouldn't swallow the previous INDENT token. return - if children_contains_fmt_on(child): + if children_contains_fmt_on(child, mode=mode): return yield child else: @@ -316,7 +330,7 @@ def _generate_ignored_nodes_from_fmt_skip( ignored_nodes: list[LN] = [] # Need to properly format the leaf prefix to compare it to comment.value, # which is also formatted - comments = list_comments(leaf.prefix, is_endmarker=False) + comments = list_comments(leaf.prefix, is_endmarker=False, mode=mode) if not comments or comment.value != comments[0].value: return if prev_sibling is not None: @@ -392,12 +406,12 @@ def _generate_ignored_nodes_from_fmt_skip( yield from iter(ignored_nodes) -def is_fmt_on(container: LN) -> bool: +def is_fmt_on(container: LN, mode: Mode) -> bool: """Determine whether formatting is switched on within a container. Determined by whether the last `# fmt:` comment is `on` or `off`. """ fmt_on = False - for comment in list_comments(container.prefix, is_endmarker=False): + for comment in list_comments(container.prefix, is_endmarker=False, mode=mode): if comment.value in FMT_ON: fmt_on = True elif comment.value in FMT_OFF: @@ -405,11 +419,11 @@ def is_fmt_on(container: LN) -> bool: return fmt_on -def children_contains_fmt_on(container: LN) -> bool: +def children_contains_fmt_on(container: LN, mode: Mode) -> bool: """Determine if children have formatting switched on.""" for child in container.children: leaf = first_leaf_of(child) - if leaf is not None and is_fmt_on(leaf): + if leaf is not None and is_fmt_on(leaf, mode=mode): return True return False diff --git a/src/black/linegen.py b/src/black/linegen.py index 09197e674e7..27601262604 100644 --- a/src/black/linegen.py +++ b/src/black/linegen.py @@ -140,7 +140,7 @@ def visit_default(self, node: LN) -> Iterator[Line]: """Default `visit_*()` implementation. Recurses to children of `node`.""" if isinstance(node, Leaf): any_open_brackets = self.current_line.bracket_tracker.any_open_brackets() - for comment in generate_comments(node): + for comment in generate_comments(node, mode=self.mode): if any_open_brackets: # any comment within brackets is subject to splitting self.current_line.append(comment) @@ -1420,7 +1420,7 @@ def normalize_invisible_parens( # noqa: C901 Standardizes on visible parentheses for single-element tuples, and keeps existing visible parentheses for other tuples and generator expressions. """ - for pc in list_comments(node.prefix, is_endmarker=False): + for pc in list_comments(node.prefix, is_endmarker=False, mode=mode): if pc.value in FMT_OFF: # This `node` has a prefix with `# fmt: off`, don't mess with parens. return @@ -1748,7 +1748,7 @@ def maybe_make_parens_invisible_in_atom( if ( # If the prefix of `middle` includes a type comment with # ignore annotation, then we do not remove the parentheses - not is_type_ignore_comment_string(middle.prefix.strip()) + not is_type_ignore_comment_string(middle.prefix.strip(), mode=mode) ): first.value = "" last.value = "" diff --git a/src/black/lines.py b/src/black/lines.py index b3fdd4ae3a3..436f5ded64d 100644 --- a/src/black/lines.py +++ b/src/black/lines.py @@ -286,9 +286,9 @@ def contains_uncollapsable_type_comments(self) -> bool: comment_seen = False for leaf_id, comments in self.comments.items(): for comment in comments: - if is_type_comment(comment): + if is_type_comment(comment, mode=self.mode): if comment_seen or ( - not is_type_ignore_comment(comment) + not is_type_ignore_comment(comment, mode=self.mode) and leaf_id not in ignored_ids ): return True @@ -325,7 +325,7 @@ def contains_unsplittable_type_ignore(self) -> bool: # line. for node in self.leaves[-2:]: for comment in self.comments.get(id(node), []): - if is_type_ignore_comment(comment): + if is_type_ignore_comment(comment, mode=self.mode): return True return False @@ -400,7 +400,7 @@ def append_comment(self, comment: Leaf) -> bool: and not last_leaf.value and last_leaf.parent and len(list(last_leaf.parent.leaves())) <= 3 - and not is_type_comment(comment) + and not is_type_comment(comment, mode=self.mode) ): # Comments on an optional parens wrapping a single leaf should belong to # the wrapped node except if it's a type comment. Pinning the comment like diff --git a/src/black/mode.py b/src/black/mode.py index 79dfed41047..560e7c6923b 100644 --- a/src/black/mode.py +++ b/src/black/mode.py @@ -231,6 +231,7 @@ class Preview(Enum): multiline_string_handling = auto() always_one_newline_after_import = auto() fix_fmt_skip_in_one_liners = auto() + standardize_type_comments = auto() wrap_comprehension_in = auto() # Remove parentheses around multiple exception types in except and # except* without as. See PEP 758 for details. @@ -319,3 +320,18 @@ def get_cache_key(self) -> str: features_and_magics, ] return ".".join(parts) + + def __hash__(self) -> int: + return hash(( + frozenset(self.target_versions), + self.line_length, + self.string_normalization, + self.is_pyi, + self.is_ipynb, + self.skip_source_first_line, + self.magic_trailing_comma, + frozenset(self.python_cell_magics), + self.preview, + self.unstable, + frozenset(self.enabled_features), + )) diff --git a/src/black/nodes.py b/src/black/nodes.py index ac346f2411b..c6e618acadb 100644 --- a/src/black/nodes.py +++ b/src/black/nodes.py @@ -14,7 +14,7 @@ from mypy_extensions import mypyc_attr from black.cache import CACHE_DIR -from black.mode import Mode +from black.mode import Mode, Preview from black.strings import get_string_prefix, has_triple_quotes from blib2to3 import pygram from blib2to3.pgen2 import token @@ -931,27 +931,44 @@ def is_async_stmt_or_funcdef(leaf: Leaf) -> bool: ) -def is_type_comment(leaf: Leaf) -> bool: +def is_type_comment(leaf: Leaf, mode: Mode) -> bool: """Return True if the given leaf is a type comment. This function should only be used for general type comments (excluding ignore annotations, which should use `is_type_ignore_comment`). Note that general type comments are no longer used in modern version of Python, this function may be deprecated in the future.""" t = leaf.type v = leaf.value - return t in {token.COMMENT, STANDALONE_COMMENT} and v.startswith("# type:") + return t in {token.COMMENT, STANDALONE_COMMENT} and is_type_comment_string(v, mode) -def is_type_ignore_comment(leaf: Leaf) -> bool: +def is_type_comment_string(value: str, mode: Mode) -> bool: + if Preview.standardize_type_comments in mode: + is_valid = value.startswith("#") and value[1:].lstrip().startswith("type:") + else: + is_valid = value.startswith("# type:") + return is_valid + + +def is_type_ignore_comment(leaf: Leaf, mode: Mode) -> bool: """Return True if the given leaf is a type comment with ignore annotation.""" t = leaf.type v = leaf.value - return t in {token.COMMENT, STANDALONE_COMMENT} and is_type_ignore_comment_string(v) + return t in {token.COMMENT, STANDALONE_COMMENT} and is_type_ignore_comment_string( + v, mode + ) -def is_type_ignore_comment_string(value: str) -> bool: +def is_type_ignore_comment_string(value: str, mode: Mode) -> bool: """Return True if the given string match with type comment with ignore annotation.""" - return value.startswith("# type: ignore") + if Preview.standardize_type_comments in mode: + is_valid = is_type_comment_string(value, mode) and value.split(":", 1)[ + 1 + ].lstrip().startswith("ignore") + else: + is_valid = value.startswith("# type: ignore") + + return is_valid def wrap_in_parentheses(parent: Node, child: LN, *, visible: bool = True) -> None: diff --git a/src/black/resources/black.schema.json b/src/black/resources/black.schema.json index 9e60db33ebf..ff517b5bbca 100644 --- a/src/black/resources/black.schema.json +++ b/src/black/resources/black.schema.json @@ -86,6 +86,7 @@ "multiline_string_handling", "always_one_newline_after_import", "fix_fmt_skip_in_one_liners", + "standardize_type_comments", "wrap_comprehension_in", "remove_parens_around_except_types", "normalize_cr_newlines", diff --git a/tests/data/cases/preview_standardize_type_comments.py b/tests/data/cases/preview_standardize_type_comments.py new file mode 100644 index 00000000000..2ab45533cf9 --- /dev/null +++ b/tests/data/cases/preview_standardize_type_comments.py @@ -0,0 +1,23 @@ +# flags: --preview +def foo( + a, #type:int + b, #type: str + c, # type: List[int] + d, # type: Dict[int, str] + e, # type: ignore + f, # type : ignore + g, # type : ignore +): + pass + +# output +def foo( + a, # type: int + b, # type: str + c, # type: List[int] + d, # type: Dict[int, str] + e, # type: ignore + f, # type : ignore + g, # type : ignore +): + pass