From a309d10aefe9615abda03962e65babef203c54a6 Mon Sep 17 00:00:00 2001 From: Hritik Vijay Date: Sun, 18 Apr 2021 16:29:09 +0530 Subject: [PATCH 1/6] helper: split_markdown_front_matter helper for istio and mozilla importers Signed-off-by: Hritik Vijay --- vulnerabilities/helpers.py | 28 ++++++++++++++++++++++++++++ 1 file changed, 28 insertions(+) diff --git a/vulnerabilities/helpers.py b/vulnerabilities/helpers.py index 37fa7d1cb..c0139091e 100644 --- a/vulnerabilities/helpers.py +++ b/vulnerabilities/helpers.py @@ -26,6 +26,7 @@ import re from typing import Optional from typing import List +from typing import Tuple import requests import saneyaml @@ -164,3 +165,30 @@ def __lt__(self, other): ) return affected_package_with_patched_package_objects + + +def split_markdown_front_matter(text: str) -> Tuple[str, str]: + r""" + Split text into markdown front matter and the markdown body + Returns ("", text) for text with non existing front matter + + >>> text='''--- + ... title: DUMMY-SECURITY-2019-001 + ... description: Incorrect access control. + ... cves: [CVE-2042-1337] + ... --- + ... # Markdown starts here + ... ''' + >>> split_markdown_front_matter(text) + ('title: DUMMY-SECURITY-2019-001\ndescription: Incorrect access control.\ncves: [CVE-2042-1337]\n', '\n# Markdown starts here\n') + """ + + front_matter = "" + body = text + text = text.replace("\r\n", "\n") + linezero,_, text = text.partition("---\n") + + if not linezero: # nothing before first --- + front_matter,_, body = text.partition("---") + + return front_matter, body From 7a3a97956cda7880d1fb10726e58392c5bbac188 Mon Sep 17 00:00:00 2001 From: Hritik Vijay Date: Sun, 18 Apr 2021 17:13:43 +0530 Subject: [PATCH 2/6] black -l 100 Signed-off-by: Hritik Vijay --- vulnerabilities/helpers.py | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/vulnerabilities/helpers.py b/vulnerabilities/helpers.py index c0139091e..958507abb 100644 --- a/vulnerabilities/helpers.py +++ b/vulnerabilities/helpers.py @@ -186,9 +186,9 @@ def split_markdown_front_matter(text: str) -> Tuple[str, str]: front_matter = "" body = text text = text.replace("\r\n", "\n") - linezero,_, text = text.partition("---\n") + linezero, _, text = text.partition("---\n") - if not linezero: # nothing before first --- - front_matter,_, body = text.partition("---") + if not linezero: # nothing before first --- + front_matter, _, body = text.partition("---") return front_matter, body From e5143c5e76d5a16867686e01477efb0bad89a34b Mon Sep 17 00:00:00 2001 From: Hritik Vijay Date: Sun, 25 Apr 2021 21:22:16 +0530 Subject: [PATCH 3/6] Use split_markdown_front_matter helper in istio also, sort imports Signed-off-by: Hritik Vijay --- vulnerabilities/helpers.py | 2 +- vulnerabilities/importers/istio.py | 48 ++++-------------------------- 2 files changed, 6 insertions(+), 44 deletions(-) diff --git a/vulnerabilities/helpers.py b/vulnerabilities/helpers.py index 958507abb..11f6f238c 100644 --- a/vulnerabilities/helpers.py +++ b/vulnerabilities/helpers.py @@ -170,7 +170,7 @@ def __lt__(self, other): def split_markdown_front_matter(text: str) -> Tuple[str, str]: r""" Split text into markdown front matter and the markdown body - Returns ("", text) for text with non existing front matter + Return ("", text) for text with non existing front matter >>> text='''--- ... title: DUMMY-SECURITY-2019-001 diff --git a/vulnerabilities/importers/istio.py b/vulnerabilities/importers/istio.py index 509bde08d..00be91c76 100644 --- a/vulnerabilities/importers/istio.py +++ b/vulnerabilities/importers/istio.py @@ -32,6 +32,7 @@ from vulnerabilities.data_source import Advisory from vulnerabilities.data_source import GitDataSource from vulnerabilities.data_source import Reference +from vulnerabilities.helpers import split_markdown_front_matter from vulnerabilities.helpers import nearest_patched_package from vulnerabilities.package_managers import GitHubTagsAPI @@ -80,45 +81,6 @@ def get_pkg_versions_from_ranges(self, version_range_list): safe_pkg_versions = set(all_version) - set(vuln_pkg_versions) return safe_pkg_versions, vuln_pkg_versions - def get_data_from_yaml_lines(self, yaml_lines): - """Return a mapping of data from a iterable of yaml_lines - for example : - ['title: ISTIO-SECURITY-2019-001', - 'description: Incorrect access control.','cves: [CVE-2019-12243]'] - - would give {'title':'ISTIO-SECURITY-2019-001', - 'description': 'Incorrect access control.', - 'cves': '[CVE-2019-12243]'} - """ - - return saneyaml.load("\n".join(yaml_lines)) - - def get_yaml_lines(self, lines): - """The istio advisory file contains lines similar to yaml format . - This function extracts those lines and return an iterable of lines - - for example : - lines = - --- - title: ISTIO-SECURITY-2019-001 - description: Incorrect access control. - cves: [CVE-2019-12243] - --- - - get_yaml_lines(lines) would return - ['title: ISTIO-SECURITY-2019-001','description: Incorrect access control.' - ,'cves: [CVE-2019-12243]'] - """ - - for index, line in enumerate(lines): - line = line.strip() - if line.startswith("---") and index == 0: - continue - elif line.endswith("---"): - break - else: - yield line - def process_file(self, path): advisories = [] @@ -212,10 +174,10 @@ def process_file(self, path): return advisories def get_data_from_md(self, path): - """Return a mapping of vulnerability data from istio . The data is - in the form of yaml_lines inside a .md file. + """Return a mapping of vulnerability data from istio. The data is + in the form of yaml objects found inside front matter of the .md file. """ with open(path) as f: - yaml_lines = self.get_yaml_lines(f) - return self.get_data_from_yaml_lines(yaml_lines) + yaml_lines, _ = split_markdown_front_matter(f.read()) + return saneyaml.load(yaml_lines) From 075cdb23193046ed61c3078e2a9fb2af313500f8 Mon Sep 17 00:00:00 2001 From: Hritik Vijay Date: Mon, 26 Apr 2021 20:29:40 +0530 Subject: [PATCH 4/6] Use a more obvious version and reorder imports Signed-off-by: Hritik Vijay --- vulnerabilities/helpers.py | 28 ++++++++++++++++------------ vulnerabilities/importers/istio.py | 2 +- 2 files changed, 17 insertions(+), 13 deletions(-) diff --git a/vulnerabilities/helpers.py b/vulnerabilities/helpers.py index 11f6f238c..d46aa02ea 100644 --- a/vulnerabilities/helpers.py +++ b/vulnerabilities/helpers.py @@ -167,7 +167,7 @@ def __lt__(self, other): return affected_package_with_patched_package_objects -def split_markdown_front_matter(text: str) -> Tuple[str, str]: +def split_markdown_front_matter(lines: str) -> Tuple[str, str]: r""" Split text into markdown front matter and the markdown body Return ("", text) for text with non existing front matter @@ -180,15 +180,19 @@ def split_markdown_front_matter(text: str) -> Tuple[str, str]: ... # Markdown starts here ... ''' >>> split_markdown_front_matter(text) - ('title: DUMMY-SECURITY-2019-001\ndescription: Incorrect access control.\ncves: [CVE-2042-1337]\n', '\n# Markdown starts here\n') + ('title: DUMMY-SECURITY-2019-001\ndescription: Incorrect access control.\ncves: [CVE-2042-1337]', '# Markdown starts here\n') """ - - front_matter = "" - body = text - text = text.replace("\r\n", "\n") - linezero, _, text = text.partition("---\n") - - if not linezero: # nothing before first --- - front_matter, _, body = text.partition("---") - - return front_matter, body + fmlines = [] + mdlines = [] + splitter = mdlines + + lines = lines.replace("\r\n", "\n") + for index, line in enumerate(lines.split("\n")): + if index == 0 and line.strip().startswith("---"): + splitter = fmlines + elif line.strip().startswith("---"): + splitter = mdlines + else: + splitter.append(line) + + return "\n".join(fmlines), "\n".join(mdlines) diff --git a/vulnerabilities/importers/istio.py b/vulnerabilities/importers/istio.py index 00be91c76..ec131896b 100644 --- a/vulnerabilities/importers/istio.py +++ b/vulnerabilities/importers/istio.py @@ -32,8 +32,8 @@ from vulnerabilities.data_source import Advisory from vulnerabilities.data_source import GitDataSource from vulnerabilities.data_source import Reference -from vulnerabilities.helpers import split_markdown_front_matter from vulnerabilities.helpers import nearest_patched_package +from vulnerabilities.helpers import split_markdown_front_matter from vulnerabilities.package_managers import GitHubTagsAPI is_release = re.compile(r"^[\d.]+$", re.IGNORECASE).match From 5b79c9c3900bcd2fa554c2cc3cf04abaf07f66f8 Mon Sep 17 00:00:00 2001 From: Hritik Vijay Date: Sun, 9 May 2021 14:03:08 +0530 Subject: [PATCH 5/6] Update according to first review Better documentation and more readable function structrue review: https://github.com/nexB/vulnerablecode/pull/443#pullrequestreview-650928076 Signed-off-by: Hritik Vijay --- vulnerabilities/helpers.py | 30 ++++++++++++------------------ vulnerabilities/importers/istio.py | 8 +++----- 2 files changed, 15 insertions(+), 23 deletions(-) diff --git a/vulnerabilities/helpers.py b/vulnerabilities/helpers.py index d46aa02ea..f296aef5e 100644 --- a/vulnerabilities/helpers.py +++ b/vulnerabilities/helpers.py @@ -167,10 +167,10 @@ def __lt__(self, other): return affected_package_with_patched_package_objects -def split_markdown_front_matter(lines: str) -> Tuple[str, str]: +def split_markdown_front_matter(text: str) -> Tuple[str, str]: r""" - Split text into markdown front matter and the markdown body - Return ("", text) for text with non existing front matter + Return a tuple of (front matter, markdown body) strings split from ``text``. + Each can be an empty string. >>> text='''--- ... title: DUMMY-SECURITY-2019-001 @@ -180,19 +180,13 @@ def split_markdown_front_matter(lines: str) -> Tuple[str, str]: ... # Markdown starts here ... ''' >>> split_markdown_front_matter(text) - ('title: DUMMY-SECURITY-2019-001\ndescription: Incorrect access control.\ncves: [CVE-2042-1337]', '# Markdown starts here\n') + ('title: DUMMY-SECURITY-2019-001\ndescription: Incorrect access control.\ncves: [CVE-2042-1337]', '# Markdown starts here') """ - fmlines = [] - mdlines = [] - splitter = mdlines - - lines = lines.replace("\r\n", "\n") - for index, line in enumerate(lines.split("\n")): - if index == 0 and line.strip().startswith("---"): - splitter = fmlines - elif line.strip().startswith("---"): - splitter = mdlines - else: - splitter.append(line) - - return "\n".join(fmlines), "\n".join(mdlines) + lines = text.splitlines() + if lines[0] == "---": + lines = lines[1:] + text = "\n".join(lines) + frontmatter, _, markdown = text.partition("\n---\n") + return frontmatter, markdown + + return "", text diff --git a/vulnerabilities/importers/istio.py b/vulnerabilities/importers/istio.py index ec131896b..103044bc2 100644 --- a/vulnerabilities/importers/istio.py +++ b/vulnerabilities/importers/istio.py @@ -174,10 +174,8 @@ def process_file(self, path): return advisories def get_data_from_md(self, path): - """Return a mapping of vulnerability data from istio. The data is - in the form of yaml objects found inside front matter of the .md file. - """ + """Return a mapping of vulnerability data extracted from an advisory.""" with open(path) as f: - yaml_lines, _ = split_markdown_front_matter(f.read()) - return saneyaml.load(yaml_lines) + front_matter, _ = split_markdown_front_matter(f.read()) + return saneyaml.load(front_matter) From 9f3e1ef755ff27e3d42e941d4e3a48fca6ce9665 Mon Sep 17 00:00:00 2001 From: Hritik Vijay Date: Sun, 20 Jun 2021 14:54:46 +0530 Subject: [PATCH 6/6] Comment regarding raw docstring and sort imports Signed-off-by: Hritik Vijay --- vulnerabilities/helpers.py | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/vulnerabilities/helpers.py b/vulnerabilities/helpers.py index f296aef5e..95dc2d801 100644 --- a/vulnerabilities/helpers.py +++ b/vulnerabilities/helpers.py @@ -24,8 +24,8 @@ import dataclasses import json import re -from typing import Optional from typing import List +from typing import Optional from typing import Tuple import requests @@ -182,6 +182,7 @@ def split_markdown_front_matter(text: str) -> Tuple[str, str]: >>> split_markdown_front_matter(text) ('title: DUMMY-SECURITY-2019-001\ndescription: Incorrect access control.\ncves: [CVE-2042-1337]', '# Markdown starts here') """ + # The doctest contains \n and for the sake of clarity I chose raw strings than escaping those. lines = text.splitlines() if lines[0] == "---": lines = lines[1:]