From b263b479f719d1e36c4c2fd784a2b0161ba4f482 Mon Sep 17 00:00:00 2001 From: Philippe Ombredanne Date: Thu, 28 Jan 2021 10:58:41 +0100 Subject: [PATCH 1/4] Make RedHat CVE import more robust This importer failed at times toward the end of the fetch step when reaching (and trying repeatidly) to re-fetch the last page. The fix consists in fetching larger batches at once (10K instead of 1k) which measn fewer API calls and less risk to be throttled and properly handling exceptions and HTTP response codes and breaking rather than retrying. This also adds some minimal logging. Signed-off-by: Philippe Ombredanne --- vulnerabilities/importers/redhat.py | 55 +++++++++++++++++++---------- 1 file changed, 37 insertions(+), 18 deletions(-) diff --git a/vulnerabilities/importers/redhat.py b/vulnerabilities/importers/redhat.py index 3b08dffec..79ec92419 100644 --- a/vulnerabilities/importers/redhat.py +++ b/vulnerabilities/importers/redhat.py @@ -38,40 +38,57 @@ class RedhatDataSource(DataSource): CONFIG_CLASS = DataSourceConfiguration def __enter__(self): - self.redhat_response = fetch() - def updated_advisories(self): - processed_advisories = [] - for advisory_data in self.redhat_response: - processed_advisories.append(to_advisory(advisory_data)) + self.redhat_cves = fetch() + def updated_advisories(self): + processed_advisories = list(map(to_advisory, self.redhat_cves)) return self.batch_advisories(processed_advisories) def fetch(): - - response = [] + """ + Return a list of CVE data mappings fetched from the RedHat API. + See: + https://access.redhat.com/documentation/en-us/red_hat_security_data_api/1.0/html/red_hat_security_data_api/index + """ + cves = [] page_no = 1 - url = "https://access.redhat.com/hydra/rest/securitydata/cve.json?page={}" + url_template = "https://access.redhat.com/hydra/rest/securitydata/cve.json?per_page=10000&page={}" + cve_data = None while True: - resp_json = requests.get(url.format(page_no)).json() - page_no += 1 - if not resp_json: + current_url = url_template.format(page_no) + try: + print(f'Fetching: {current_url}') + response = requests.get(current_url) + if response.status_code != requests.codes.ok: + # TODO: log me + print(f'Failed to fetch results from {current_url}') + break + cve_data = response.json() + except Exception as e: + # TODO: log me + msg = f'Failed to fetch results from {current_url}:\n{e}' + print(msg) break - for advisory in resp_json: - response.append(advisory) + if not cve_data: + break + cves.extend(cve_data) + page_no += 1 - return response + print(f'Fetched {len(cves)} CVEs from: {current_url}') + return cves def to_advisory(advisory_data): affected_purls = [] if advisory_data.get("affected_packages"): for rpm in advisory_data["affected_packages"]: - if rpm_to_purl(rpm): - affected_purls.append(rpm_to_purl(rpm)) + purl = rpm_to_purl(rpm) + if purl: + affected_purls.append(purl) references = [] bugzilla = advisory_data.get("bugzilla") @@ -137,14 +154,16 @@ def to_advisory(advisory_data): references.append(Reference(severities=redhat_scores, url=advisory_data["resource_url"])) return Advisory( + cve_id=advisory_data["CVE"], summary=advisory_data["bugzilla_description"], - vulnerability_id=advisory_data["CVE"], impacted_package_urls=affected_purls, vuln_references=references, ) def rpm_to_purl(rpm_string): + # FIXME: there is code in scancode to handle RPM conversion AND this should + # be all be part of the pcakageurl library # Red Hat uses `-:0` instead of just `-` to separate # package name and version @@ -155,4 +174,4 @@ def rpm_to_purl(rpm_string): name, version = components if version[0].isdigit(): - return PackageURL(name=name, type="rpm", version=version, namespace="redhat") + return PackageURL(namespace="redhat", name=name, type="rpm", version=version) From 62f7348a72789228c814badbf95b042212afa30d Mon Sep 17 00:00:00 2001 From: Philippe Ombredanne Date: Tue, 9 Feb 2021 10:11:34 +0100 Subject: [PATCH 2/4] Apply black -l 100 Signed-off-by: Philippe Ombredanne --- vulnerabilities/importers/redhat.py | 12 +++++++----- 1 file changed, 7 insertions(+), 5 deletions(-) diff --git a/vulnerabilities/importers/redhat.py b/vulnerabilities/importers/redhat.py index 79ec92419..fb9f8b7b7 100644 --- a/vulnerabilities/importers/redhat.py +++ b/vulnerabilities/importers/redhat.py @@ -54,22 +54,24 @@ def fetch(): """ cves = [] page_no = 1 - url_template = "https://access.redhat.com/hydra/rest/securitydata/cve.json?per_page=10000&page={}" + url_template = ( + "https://access.redhat.com/hydra/rest/securitydata/cve.json?per_page=10000&page={}" + ) cve_data = None while True: current_url = url_template.format(page_no) try: - print(f'Fetching: {current_url}') + print(f"Fetching: {current_url}") response = requests.get(current_url) if response.status_code != requests.codes.ok: # TODO: log me - print(f'Failed to fetch results from {current_url}') + print(f"Failed to fetch results from {current_url}") break cve_data = response.json() except Exception as e: # TODO: log me - msg = f'Failed to fetch results from {current_url}:\n{e}' + msg = f"Failed to fetch results from {current_url}:\n{e}" print(msg) break @@ -78,7 +80,7 @@ def fetch(): cves.extend(cve_data) page_no += 1 - print(f'Fetched {len(cves)} CVEs from: {current_url}') + print(f"Fetched {len(cves)} CVEs from: {current_url}") return cves From 89a5b01380bec7cd709efa5707a7a13ab968570c Mon Sep 17 00:00:00 2001 From: Shivam Sandbhor Date: Sat, 13 Feb 2021 11:34:21 +0530 Subject: [PATCH 3/4] Rebase with latest main Signed-off-by: Shivam Sandbhor --- vulnerabilities/importers/redhat.py | 4 +--- 1 file changed, 1 insertion(+), 3 deletions(-) diff --git a/vulnerabilities/importers/redhat.py b/vulnerabilities/importers/redhat.py index fb9f8b7b7..d8948a133 100644 --- a/vulnerabilities/importers/redhat.py +++ b/vulnerabilities/importers/redhat.py @@ -54,9 +54,7 @@ def fetch(): """ cves = [] page_no = 1 - url_template = ( - "https://access.redhat.com/hydra/rest/securitydata/cve.json?per_page=10000&page={}" - ) + url_template = "https://access.redhat.com/hydra/rest/securitydata/cve.json?per_page=10&page={}" cve_data = None while True: From 9fbfe503e5680a69e09c6adf39a7a6b832be6321 Mon Sep 17 00:00:00 2001 From: Shivam Sandbhor Date: Wed, 24 Feb 2021 15:13:42 +0530 Subject: [PATCH 4/4] Fix codestyle Signed-off-by: Shivam Sandbhor --- vulnerabilities/importers/redhat.py | 11 +++-------- 1 file changed, 3 insertions(+), 8 deletions(-) diff --git a/vulnerabilities/importers/redhat.py b/vulnerabilities/importers/redhat.py index d8948a133..fc0ac8204 100644 --- a/vulnerabilities/importers/redhat.py +++ b/vulnerabilities/importers/redhat.py @@ -20,9 +20,6 @@ # VulnerableCode is a free software code from nexB Inc. and others. # Visit https://github.com/nexB/vulnerablecode/ for support and download. - -import json - from packageurl import PackageURL import requests @@ -54,7 +51,7 @@ def fetch(): """ cves = [] page_no = 1 - url_template = "https://access.redhat.com/hydra/rest/securitydata/cve.json?per_page=10&page={}" + url_template = "https://access.redhat.com/hydra/rest/securitydata/cve.json?per_page=10000&page={}" # nopep8 cve_data = None while True: @@ -78,7 +75,6 @@ def fetch(): cves.extend(cve_data) page_no += 1 - print(f"Fetched {len(cves)} CVEs from: {current_url}") return cves @@ -152,9 +148,8 @@ def to_advisory(advisory_data): ) references.append(Reference(severities=redhat_scores, url=advisory_data["resource_url"])) - return Advisory( - cve_id=advisory_data["CVE"], + vulnerability_id=advisory_data["CVE"], summary=advisory_data["bugzilla_description"], impacted_package_urls=affected_purls, vuln_references=references, @@ -163,7 +158,7 @@ def to_advisory(advisory_data): def rpm_to_purl(rpm_string): # FIXME: there is code in scancode to handle RPM conversion AND this should - # be all be part of the pcakageurl library + # be all be part of the packageurl library # Red Hat uses `-:0` instead of just `-` to separate # package name and version