Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
5 changes: 5 additions & 0 deletions CHANGELOG.rst
Original file line number Diff line number Diff line change
Expand Up @@ -11,6 +11,11 @@ Change Log

.. There should always be an "Unreleased" section for changes pending release.

[0.6.0] - 2020-08-27
~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~

* Add support for multiline annotations for lines prefixed with single-line comment signs ("#")

[0.5.1] - 2020-08-25
~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~

Expand Down
2 changes: 1 addition & 1 deletion code_annotations/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,4 +2,4 @@
Extensible tools for parsing annotations in codebases.
"""

__version__ = '0.5.1'
__version__ = '0.6.0'
80 changes: 57 additions & 23 deletions code_annotations/extensions/base.py
Original file line number Diff line number Diff line change
Expand Up @@ -42,22 +42,27 @@ class SimpleRegexAnnotationExtension(AnnotationExtension, metaclass=ABCMeta):
# Javascript and Python extensions for examples.
lang_comment_definition = None

r"""
This format string/regex finds all comments in the file. The format tokens will be replaced with the
language-specific comment definitions defined in the sub-classes.

{multi_start} - start of the language-specific multi-line comment (ex. /*)
([\d\D]*?) - capture all of the characters...
{multi_end} - until you find the end of the language-specific multi-line comment (ex. */)
| - If you don't find any of those...
{single} - start by finding the single-line comment token (ex. //)
(.*) - and capture all characters until the end of the line

Returns a 2-tuple of:
- ("Comment text", None) in the case of a multi-line comment OR
- (None, "Comment text") in the case of a single-line comment
# This format string/regex finds all comments in the file. The format tokens will be replaced with the
# language-specific comment definitions defined in the sub-classes.
#
# Match groupdict will contain two named subgroups: 'comment' and 'prefixed_comment', of which at most
# one will be non-None.
comment_regex_fmt = r"""
{multi_start} # start of the language-specific multi-line comment (ex. /*)
(?P<comment> # Look for a multiline comment
[\d\D]*? # capture all of the characters...
)
{multi_end} # until you find the end of the language-specific multi-line comment (ex. */)
| # If you don't find any of those...
(?P<prefixed_comment> # Look for a group of single-line comments
(?: # Non-capture mode
{single} # start by finding the single-line comment token (ex. //)
.* # and capture all characters until the end of the line
\n? # followed by an optional carriage return
\ * # and some empty space
)* # multiple times
)
"""
comment_regex_fmt = r'{multi_start}([\d\D]*?){multi_end}|{single}(.*)'

def __init__(self, config, echo):
"""
Expand All @@ -74,7 +79,12 @@ def __init__(self, config, echo):

# pylint: disable=not-a-mapping
self.comment_regex = re.compile(
self.comment_regex_fmt.format(**self.lang_comment_definition)
self.comment_regex_fmt.format(**self.lang_comment_definition),
flags=re.VERBOSE
)
self.prefixed_comment_regex = re.compile(
r"^ *{single}".format(**self.lang_comment_definition),
flags=re.MULTILINE
)

# Parent class will allow this class to populate self.strings_to_search via
Expand Down Expand Up @@ -102,15 +112,15 @@ def search(self, file_handle):
if any(anno in txt for anno in self.config.annotation_tokens):
fname = clean_abs_path(file_handle.name, self.config.source_path)

# Iterate on all comments: both prefixed- and non-prefixed.
for match in self.comment_regex.finditer(txt):
# Should only be one match
comment_content = [item for item in match.groups() if item is not None][0]
for inner_match in self.query.finditer(comment_content):
# Get the line number by counting newlines + 1 (for the first line).
# Note that this is the line number of the beginning of the comment, not the
# annotation token itself.
line = txt.count('\n', 0, match.start()) + 1
# Get the line number by counting newlines + 1 (for the first line).
# Note that this is the line number of the beginning of the comment, not the
# annotation token itself.
line = txt.count('\n', 0, match.start()) + 1

comment_content = self._find_comment_content(match)
for inner_match in self.query.finditer(comment_content):
try:
annotation_token = inner_match.group('token')
annotation_data = inner_match.group('data')
Expand All @@ -131,3 +141,27 @@ def search(self, file_handle):
})

return found_annotations

def _find_comment_content(self, match):
"""
Return the comment content as text.

Args:
match (sre.SRE_MATCH): one of the matches of the self.comment_regex regular expression.
"""
comment_content = match.groupdict()["comment"]
if comment_content:
return comment_content

# Find single-line comments and strip comment tokens
comment_content = match.groupdict()["prefixed_comment"]
return self._strip_single_line_comment_tokens(comment_content)

def _strip_single_line_comment_tokens(self, content):
"""
Strip the leading single-line comment tokens from a comment text.

Args:
content (str): token-prefixed multi-line comment string.
"""
return self.prefixed_comment_regex.sub("", content)
Original file line number Diff line number Diff line change
@@ -0,0 +1,7 @@
# Docstring
#.. pii: A long description that
# spans multiple
# lines
# A comment that is not indented and not part of the above multi-line annotation
#.. pii_types: id, name
# Some comment that comes after the multiple-line annotation
16 changes: 16 additions & 0 deletions tests/extensions/test_base_extensions.py
Original file line number Diff line number Diff line change
Expand Up @@ -28,3 +28,19 @@ def test_nothing_found():
r = FakeExtension(config, VerboseEcho())
with open('tests/extensions/base_test_files/empty.foo') as f:
r.search(f)


def test_strip_single_line_comment_tokens():
config = FakeConfig()

extension = FakeExtension(config, VerboseEcho())
text = """baz line1
baz line2
bazline3
baz line4"""
expected_result = """ line1
line2
line3
line4"""
# pylint: disable=protected-access
assert expected_result == extension._strip_single_line_comment_tokens(text)
9 changes: 9 additions & 0 deletions tests/extensions/test_extension_python.py
Original file line number Diff line number Diff line change
Expand Up @@ -76,6 +76,15 @@ def test_grouping_and_choice_failures(test_file, expected_exit_code, expected_me
Multi-line and multi-paragraph.""")
]
),
(
'multiline_singlelinecomment.pyt',
[
('.. pii:', """A long description that
spans multiple
lines"""),
('.. pii_types:', 'id, name'),
]
),
])
def test_multi_line_annotations(test_file, annotations):
config = AnnotationConfig('tests/test_configurations/.annotations_test')
Expand Down