From de518f77851f965620e84921c5b2fa117405e68d Mon Sep 17 00:00:00 2001 From: Hritik Vijay Date: Tue, 10 Aug 2021 18:30:43 +0530 Subject: [PATCH 01/40] Get rid of added_advisory and batches useless after a530627 Signed-off-by: Hritik Vijay --- vulnerabilities/data_source.py | 30 +------- vulnerabilities/import_runner.py | 72 +++++++++---------- vulnerabilities/importers/nginx.py | 2 +- vulnerabilities/management/commands/import.py | 10 +-- vulnerabilities/models.py | 4 +- 5 files changed, 38 insertions(+), 80 deletions(-) diff --git a/vulnerabilities/data_source.py b/vulnerabilities/data_source.py index ff48a374a..41ec168f2 100644 --- a/vulnerabilities/data_source.py +++ b/vulnerabilities/data_source.py @@ -78,8 +78,6 @@ def normalized(self): class Advisory: """ This data class expresses the contract between data sources and the import runner. - Data sources are expected to be usable as context managers and generators, yielding batches of - Advisory sequences. NB: There are two representations for package URLs that are commonly used by code consuming this data class; PackageURL objects and strings. As a convention, the former is referred to in @@ -131,7 +129,6 @@ class DataSource(ContextManager): def __init__( self, - batch_size: int, last_run_date: Optional[datetime] = None, cutoff_date: Optional[datetime] = None, config: Optional[Mapping[str, Any]] = None, @@ -139,14 +136,11 @@ def __init__( """ Create a DataSource instance. - :param batch_size: Maximum number of records to return from added_advisories() and - updated_advisories() :param last_run_date: Optional timestamp when this data source was last inspected :param cutoff_date: Optional timestamp, records older than this will be ignored :param config: Optional dictionary with subclass-specific configuration """ config = config or {} - self.batch_size = batch_size try: self.config = self.__class__.CONFIG_CLASS(**config) # These really should be declared in DataSourceConfiguration above but that would @@ -194,16 +188,9 @@ def validate_configuration(self) -> None: """ pass - def added_advisories(self) -> Set[Advisory]: - """ - Subclasses yield batch_size sized batches of Advisory objects that have been added to the - data source since the last run or self.cutoff_date. - """ - return set() - def updated_advisories(self) -> Set[Advisory]: """ - Subclasses yield batch_size sized batches of Advisory objects that have been modified since + Subclasses return Advisory objects that have been modified since the last run or self.cutoff_date. NOTE: Data sources that do not enable detection of changes to existing records vs added @@ -218,21 +205,6 @@ def error(self, msg: str) -> None: """ raise InvalidConfigurationError(f"{type(self).__name__}: {msg}") - def batch_advisories(self, advisories: List[Advisory]) -> Set[Advisory]: - """ - Yield batches of the passed in list of advisories. - """ - - # TODO make this less cryptic and efficient - - advisories = advisories[:] - # copy the list as we are mutating it in the loop below - - while advisories: - b, advisories = advisories[: self.batch_size], advisories[self.batch_size :] - yield b - - @dataclasses.dataclass class GitDataSourceConfiguration(DataSourceConfiguration): repository_url: str diff --git a/vulnerabilities/import_runner.py b/vulnerabilities/import_runner.py index 5b9a08d91..9b932b007 100644 --- a/vulnerabilities/import_runner.py +++ b/vulnerabilities/import_runner.py @@ -26,6 +26,7 @@ import logging from itertools import chain from typing import Tuple +from typing import Set from django.db import transaction @@ -68,9 +69,8 @@ class ImportRunner: - All update and select operations must use indexed columns. """ - def __init__(self, importer: models.Importer, batch_size: int): + def __init__(self, importer: models.Importer): self.importer = importer - self.batch_size = batch_size def run(self, cutoff_date: datetime.datetime = None) -> None: """ @@ -84,9 +84,10 @@ def run(self, cutoff_date: datetime.datetime = None) -> None: from all Linux distributions that package this kernel version. """ logger.info(f"Starting import for {self.importer.name}.") - data_source = self.importer.make_data_source(self.batch_size, cutoff_date=cutoff_date) + data_source = self.importer.make_data_source(cutoff_date=cutoff_date) with data_source: - process_advisories(data_source) + advisories = data_source.updated_advisories() + process_advisories(advisories) self.importer.last_run = datetime.datetime.now(tz=datetime.timezone.utc) self.importer.data_source_cfg = dataclasses.asdict(data_source.config) self.importer.save() @@ -108,46 +109,41 @@ def get_vuln_pkg_refs(vulnerability, package): @transaction.atomic -def process_advisories(data_source: DataSource) -> None: +def process_advisories(advisories: Set[Advisory]) -> None: bulk_create_vuln_pkg_refs = set() - # Treat updated_advisories and added_advisories as same. Eventually - # we want to refactor all data sources to provide advisories via a - # single method. - advisory_batches = chain(data_source.updated_advisories(), data_source.added_advisories()) - for batch in advisory_batches: - for advisory in batch: - vuln, vuln_created = _get_or_create_vulnerability(advisory) - for vuln_ref in advisory.references: - ref, _ = models.VulnerabilityReference.objects.get_or_create( - vulnerability=vuln, reference_id=vuln_ref.reference_id, url=vuln_ref.url + for advisory in advisories: + vuln, vuln_created = _get_or_create_vulnerability(advisory) + for vuln_ref in advisory.references: + ref, _ = models.VulnerabilityReference.objects.get_or_create( + vulnerability=vuln, reference_id=vuln_ref.reference_id, url=vuln_ref.url + ) + + for score in vuln_ref.severities: + models.VulnerabilitySeverity.objects.update_or_create( + vulnerability=vuln, + scoring_system=score.system.identifier, + reference=ref, + defaults={"value": str(score.value)}, ) - for score in vuln_ref.severities: - models.VulnerabilitySeverity.objects.update_or_create( - vulnerability=vuln, - scoring_system=score.system.identifier, - reference=ref, - defaults={"value": str(score.value)}, - ) - - for aff_pkg_with_patched_pkg in advisory.affected_packages: - vulnerable_package, _ = _get_or_create_package( - aff_pkg_with_patched_pkg.vulnerable_package + for aff_pkg_with_patched_pkg in advisory.affected_packages: + vulnerable_package, _ = _get_or_create_package( + aff_pkg_with_patched_pkg.vulnerable_package + ) + patched_package = None + if aff_pkg_with_patched_pkg.patched_package: + patched_package, _ = _get_or_create_package( + aff_pkg_with_patched_pkg.patched_package ) - patched_package = None - if aff_pkg_with_patched_pkg.patched_package: - patched_package, _ = _get_or_create_package( - aff_pkg_with_patched_pkg.patched_package - ) - prv, _ = models.PackageRelatedVulnerability.objects.get_or_create( - vulnerability=vuln, - package=vulnerable_package, - ) + prv, _ = models.PackageRelatedVulnerability.objects.get_or_create( + vulnerability=vuln, + package=vulnerable_package, + ) - if patched_package: - prv.patched_package = patched_package - prv.save() + if patched_package: + prv.patched_package = patched_package + prv.save() models.PackageRelatedVulnerability.objects.bulk_create( [i.to_model_object() for i in bulk_create_vuln_pkg_refs] diff --git a/vulnerabilities/importers/nginx.py b/vulnerabilities/importers/nginx.py index c5372647d..20118efd0 100644 --- a/vulnerabilities/importers/nginx.py +++ b/vulnerabilities/importers/nginx.py @@ -68,7 +68,7 @@ def updated_advisories(self): self.set_api() data = requests.get(self.url).content advisories.extend(self.to_advisories(data)) - return self.batch_advisories(advisories) + return advisories def to_advisories(self, data): advisories = [] diff --git a/vulnerabilities/management/commands/import.py b/vulnerabilities/management/commands/import.py index 3a76fce09..da333dcdd 100644 --- a/vulnerabilities/management/commands/import.py +++ b/vulnerabilities/management/commands/import.py @@ -53,10 +53,6 @@ def add_arguments(self, parser): ) parser.add_argument("sources", nargs="*", help="Data sources from which to import") - parser.add_argument( - "--batch_size", help="The batch size to be used for bulk inserting data" - ) - def handle(self, *args, **options): # load_importers() seeds the DB with Importers load_importers() @@ -64,9 +60,6 @@ def handle(self, *args, **options): self.list_sources() return - if options["batch_size"]: - self.batch_size = options["batch_size"] - if options["all"]: self._import_data(Importer.objects.all(), options["cutoff_date"]) return @@ -105,9 +98,8 @@ def _import_data(self, importers, cutoff_date): for importer in importers: self.stdout.write(f"Importing data from {importer.name}") - batch_size = int(getattr(self, "batch_size", 10)) try: - ImportRunner(importer, batch_size).run(cutoff_date=cutoff_date) + ImportRunner(importer).run(cutoff_date=cutoff_date) self.stdout.write( self.style.SUCCESS(f"Successfully imported data from {importer.name}") ) diff --git a/vulnerabilities/models.py b/vulnerabilities/models.py index e76c2a1c3..ba5feb1c2 100644 --- a/vulnerabilities/models.py +++ b/vulnerabilities/models.py @@ -235,18 +235,16 @@ class Importer(models.Model): help_text="Implementation-specific configuration for the data source", ) - def make_data_source(self, batch_size: int, cutoff_date: datetime = None) -> DataSource: + def make_data_source(self, cutoff_date: datetime = None) -> DataSource: """ Return a configured and ready to use instance of this importers data source implementation. - batch_size - max. number of records to return on each iteration cutoff_date - optional timestamp of the oldest data to include in the import """ importers_module = importlib.import_module("vulnerabilities.importers") klass = getattr(importers_module, self.data_source) ds = klass( - batch_size, last_run_date=self.last_run, cutoff_date=cutoff_date, config=self.data_source_cfg, From f78391541793ef0344642f3788ee8b15110cdbed Mon Sep 17 00:00:00 2001 From: Hritik Vijay Date: Thu, 12 Aug 2021 07:37:35 +0530 Subject: [PATCH 02/40] Implement improver registry and management command Improver look strikingly similar to importers with some enhancements. (Eg: It doesn't use an importer_yielder alternative, see issue 501). Overview: Improvers maintain a contract (like Advisory) with improve_runner which is named Inference. Inference class embeds an advisory and a confidence score for that advisory. It is the job of an improver to fetch data to improve from the database (probably using some helper functions) then use whatever means necessary to improve that data sample and return with Inferences. Do note, that Inferences which have already been "imported" by importers would be totally discarded as redundant. Also, in case of two inferences on same data point, the one with highest confidence will be taken into the database. Food for thought: Pssst... Probably Inference class is useless and Advisory class can itself have that confidence score, but then the importers would have to mention that whatever they import have 100% confidence which might be susceptible to typo errors making some importers not mention their confidence thus zeroing on confidence. Anyway, importer and improvers should be different and separated. If not, then we could totally discard the idea of improvers and embed everything in an importer with a confidence score. Well, then, where goes the idea of modularity and keeping things simple ? Also, data coming from an "import"er should always be absolutely correct. This will also ensure that if downstream doesn't want any "improved" data then they don't get our guesses. The whole point of separating importers and improvers is that running improvers could be totally optional and based on downstream taste. Signed-off-by: Hritik Vijay --- vulnerabilities/data_inference.py | 29 +++++ vulnerabilities/improve_runner.py | 25 ++++ vulnerabilities/improvers/__init__.py | 8 ++ .../management/commands/improve.py | 109 ++++++++++++++++++ 4 files changed, 171 insertions(+) create mode 100644 vulnerabilities/data_inference.py create mode 100644 vulnerabilities/improve_runner.py create mode 100644 vulnerabilities/improvers/__init__.py create mode 100644 vulnerabilities/management/commands/improve.py diff --git a/vulnerabilities/data_inference.py b/vulnerabilities/data_inference.py new file mode 100644 index 000000000..eb0accce2 --- /dev/null +++ b/vulnerabilities/data_inference.py @@ -0,0 +1,29 @@ +import dataclasses +import logging +from vulnerabilities.data_source import Advisory + +logger = logging.getLogger(__name__) + +class OverConfidenceError(ValueError): + pass + +class UnderConfidenceError(ValueError): + pass + +MAX_CONFIDENCE = 100 + +@dataclasses.dataclass(order=True) +class Inference: + """ + This data class expresses the contract between data improvers and the improve runner. + """ + advisory: Advisory + source: str + confidence: int + + def __post_init__(self): + if self.confidence > MAX_CONFIDENCE: + raise OverConfidenceError + + if self.confidence < 0: + raise UnderConfidenceError diff --git a/vulnerabilities/improve_runner.py b/vulnerabilities/improve_runner.py new file mode 100644 index 000000000..0e4656c81 --- /dev/null +++ b/vulnerabilities/improve_runner.py @@ -0,0 +1,25 @@ +from datetime import datetime +import dataclasses +import logging + +logger = logging.getLogger(__name__) + +class ImproveRunner: + """ + The ImproveRunner is responsible to improve the already imported data by a datasource. + Inferences regarding the data could be generated based on multiple factors. + All the inferences consist of a confidence score whose threshold could be tuned in user + settings (.env file) + """ + def __init__(self, improver): + self.improver = improver + + def run(self) -> None: + logger.info("Improving using %s.", self.improver.__module__) + inferences = self.improver.updated_inferences() + process_inferences(inferences) + logger.info("Finished improving using %s.", self.improver.__module__) + + +def process_inferences(inferences): + ... diff --git a/vulnerabilities/improvers/__init__.py b/vulnerabilities/improvers/__init__.py new file mode 100644 index 000000000..eb643e353 --- /dev/null +++ b/vulnerabilities/improvers/__init__.py @@ -0,0 +1,8 @@ +IMPROVER_REGISTRY = [] + +def class_name(module_name: str): + for improver in IMPROVER_REGISTRY: + if improver.__module__ == module_name: + return improver + + raise AttributeError diff --git a/vulnerabilities/management/commands/improve.py b/vulnerabilities/management/commands/improve.py new file mode 100644 index 000000000..9eebb53ac --- /dev/null +++ b/vulnerabilities/management/commands/improve.py @@ -0,0 +1,109 @@ +# +# Copyright (c) nexB Inc. and others. All rights reserved. +# http://nexb.com and https://github.com/nexB/vulnerablecode/ +# The VulnerableCode software is licensed under the Apache License version 2.0. +# Data generated with VulnerableCode require an acknowledgment. +# +# You may not use this software except in compliance with the License. +# You may obtain a copy of the License at: http://apache.org/licenses/LICENSE-2.0 +# Unless required by applicable law or agreed to in writing, software distributed +# under the License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR +# CONDITIONS OF ANY KIND, either express or implied. See the License for the +# specific language governing permissions and limitations under the License. +# +# When you publish or redistribute any data created with VulnerableCode or any VulnerableCode +# derivative work, you must accompany this data with the following acknowledgment: +# +# Generated with VulnerableCode and provided on an "AS IS" BASIS, WITHOUT WARRANTIES +# OR CONDITIONS OF ANY KIND, either express or implied. No content created from +# VulnerableCode should be considered or used as legal advice. Consult an Attorney +# for any legal advice. +# VulnerableCode is a free software code scanning tool from nexB Inc. and others. +# Visit https://github.com/nexB/vulnerablecode/ for support and download. + +from datetime import datetime +import traceback + +from django.core.management.base import BaseCommand +from django.core.management.base import CommandError + +from vulnerabilities.models import Importer +from vulnerabilities.import_runner import ImportRunner +from vulnerabilities.importer_yielder import load_importers +from vulnerabilities.improvers import IMPROVER_REGISTRY +from vulnerabilities.improvers import class_name +from vulnerabilities.improve_runner import ImproveRunner + + +class Command(BaseCommand): + help = "Improve imported vulnerability data" + + def add_arguments(self, parser): + parser.add_argument( + "--list", + action="store_true", + help="List available data inferences", + ) + parser.add_argument( + "--all", action="store_true", help="Improve data from all available inferences" + ) + + parser.add_argument("sources", nargs="*", help="Data sources from which to import") + + def handle(self, *args, **options): + if options["list"]: + self.list_sources() + return + + if options["all"]: + self.improve_data(IMPROVER_REGISTRY) + return + + sources = options["sources"] + if not sources: + raise CommandError( + 'Please provide at least one data inference to improve from or use "--all".' + ) + + self.improve_data(valid_sources(sources)) + + def list_sources(self): + improvers = [ improver.__module__ for improver in IMPROVER_REGISTRY ] + self.stdout.write("Vulnerability data can be improved from the following sources:") + self.stdout.write(", ".join(improvers)) + + def improve_data(self, improvers): + failed_improvers = [] + + for improver in improvers: + self.stdout.write(f"Improving data using {improver.__module__}") + try: + ImproveRunner(improver).run() + self.stdout.write( + self.style.SUCCESS(f"Successfully improved data using {improver.__module__}") + ) + except Exception: + failed_improvers.append(improver.__module__) + traceback.print_exc() + self.stdout.write( + self.style.ERROR(f"Failed to run improver {improver.__module__}. Continuing...") + ) + + if failed_improvers: + raise CommandError(f"{len(failed_improvers)} failed!: {','.join(failed_improvers)}") + + +def valid_sources(sources): + improvers = [] + unknown_sources = [] + for source in sources: + try: + improvers.append(class_name(source)) + except AttributeError: + unknown_sources.append(source) + if unknown_sources: + raise CommandError(f"Unknown sources: {unknown_sources}") + + return improvers + + From fa740257925dff85b43f468217b515c3ede6abde Mon Sep 17 00:00:00 2001 From: Hritik Vijay Date: Fri, 13 Aug 2021 06:02:10 +0530 Subject: [PATCH 03/40] Implement improver This is work in progress, there are a few bugs and a few fixmes as well. Everything will be replaced before the final commit Signed-off-by: Hritik Vijay --- vulnerabilities/data_inference.py | 8 ++ vulnerabilities/data_source.py | 8 +- vulnerabilities/import_runner.py | 92 ++------------ vulnerabilities/importers/nginx.py | 4 +- vulnerabilities/improve_runner.py | 115 +++++++++++++++++- vulnerabilities/improvers/__init__.py | 9 +- vulnerabilities/improvers/nginx.py | 44 +++++++ .../management/commands/improve.py | 15 +-- vulnerabilities/models.py | 29 ++++- 9 files changed, 221 insertions(+), 103 deletions(-) create mode 100644 vulnerabilities/improvers/nginx.py diff --git a/vulnerabilities/data_inference.py b/vulnerabilities/data_inference.py index eb0accce2..4ed7554bd 100644 --- a/vulnerabilities/data_inference.py +++ b/vulnerabilities/data_inference.py @@ -27,3 +27,11 @@ def __post_init__(self): if self.confidence < 0: raise UnderConfidenceError + +class Improver: + """ + All improvers should inherit this class and implement updated_inferences method to return + new inferences for a package or vulnerability + """ + def updated_inferences(self): + raise NotImplementedError diff --git a/vulnerabilities/data_source.py b/vulnerabilities/data_source.py index 41ec168f2..750784603 100644 --- a/vulnerabilities/data_source.py +++ b/vulnerabilities/data_source.py @@ -86,7 +86,8 @@ class Advisory: summary: str vulnerability_id: Optional[str] = None - affected_packages: List[AffectedPackage] = dataclasses.field(default_factory=list) + affected_package_urls: Iterable[PackageURL] = dataclasses.field(default_factory=list) + fixed_package_urls: Iterable[PackageURL] = dataclasses.field(default_factory=list) references: List[Reference] = dataclasses.field(default_factory=list) def __post_init__(self): @@ -94,6 +95,8 @@ def __post_init__(self): raise ValueError("CVE expected, found: {}".format(self.vulnerability_id)) def normalized(self): + affected_package_urls = set(self.affected_package_urls) + fixed_package_urls = set(self.fixed_package_urls) references = sorted( self.references, key=lambda reference: (reference.reference_id, reference.url) ) @@ -103,7 +106,8 @@ def normalized(self): return Advisory( summary=self.summary, vulnerability_id=self.vulnerability_id, - affected_packages=sorted(self.affected_packages), + affected_package_urls=affected_package_urls, + fixed_package_urls=fixed_package_urls, references=references, ) diff --git a/vulnerabilities/import_runner.py b/vulnerabilities/import_runner.py index 9b932b007..9475c887c 100644 --- a/vulnerabilities/import_runner.py +++ b/vulnerabilities/import_runner.py @@ -24,15 +24,16 @@ import dataclasses import datetime import logging -from itertools import chain from typing import Tuple from typing import Set -from django.db import transaction from vulnerabilities import models -from vulnerabilities.data_source import Advisory, DataSource +from vulnerabilities.data_source import Advisory from vulnerabilities.data_source import PackageURL +from vulnerabilities.data_inference import Inference +from vulnerabilities.data_inference import MAX_CONFIDENCE +from vulnerabilities.improve_runner import process_inferences logger = logging.getLogger(__name__) @@ -87,7 +88,7 @@ def run(self, cutoff_date: datetime.datetime = None) -> None: data_source = self.importer.make_data_source(cutoff_date=cutoff_date) with data_source: advisories = data_source.updated_advisories() - process_advisories(advisories) + process_advisories("importer", advisories) self.importer.last_run = datetime.datetime.now(tz=datetime.timezone.utc) self.importer.data_source_cfg = dataclasses.asdict(data_source.config) self.importer.save() @@ -108,79 +109,10 @@ def get_vuln_pkg_refs(vulnerability, package): ) -@transaction.atomic -def process_advisories(advisories: Set[Advisory]) -> None: - bulk_create_vuln_pkg_refs = set() - for advisory in advisories: - vuln, vuln_created = _get_or_create_vulnerability(advisory) - for vuln_ref in advisory.references: - ref, _ = models.VulnerabilityReference.objects.get_or_create( - vulnerability=vuln, reference_id=vuln_ref.reference_id, url=vuln_ref.url - ) - - for score in vuln_ref.severities: - models.VulnerabilitySeverity.objects.update_or_create( - vulnerability=vuln, - scoring_system=score.system.identifier, - reference=ref, - defaults={"value": str(score.value)}, - ) - - for aff_pkg_with_patched_pkg in advisory.affected_packages: - vulnerable_package, _ = _get_or_create_package( - aff_pkg_with_patched_pkg.vulnerable_package - ) - patched_package = None - if aff_pkg_with_patched_pkg.patched_package: - patched_package, _ = _get_or_create_package( - aff_pkg_with_patched_pkg.patched_package - ) - - prv, _ = models.PackageRelatedVulnerability.objects.get_or_create( - vulnerability=vuln, - package=vulnerable_package, - ) - - if patched_package: - prv.patched_package = patched_package - prv.save() - - models.PackageRelatedVulnerability.objects.bulk_create( - [i.to_model_object() for i in bulk_create_vuln_pkg_refs] - ) - - -def _get_or_create_vulnerability( - advisory: Advisory, -) -> Tuple[models.Vulnerability, bool]: - - vuln, created = models.Vulnerability.objects.get_or_create( - vulnerability_id=advisory.vulnerability_id - ) # nopep8 - # Eventually we only want to keep summary from NVD and ignore other descriptions. - if advisory.summary and vuln.summary != advisory.summary: - vuln.summary = advisory.summary - vuln.save() - - return vuln, created - - -def _get_or_create_package(p: PackageURL) -> Tuple[models.Package, bool]: - - query_kwargs = {} - for key, val in p.to_dict().items(): - if not val: - if key == "qualifiers": - query_kwargs[key] = {} - else: - query_kwargs[key] = "" - else: - query_kwargs[key] = val - - return models.Package.objects.get_or_create(**query_kwargs) - - -def _package_url_to_package(purl: PackageURL) -> models.Package: - p = models.Package() - p.set_package_url(purl) - return p +def process_advisories(source: str, advisories: Set[Advisory]) -> None: + """ + Insert advisories into the database + Advisories are treated as full confidence infererences. + """ + inferences = [ Inference(advisory, source, MAX_CONFIDENCE) for advisory in advisories ] + process_inferences(inferences) diff --git a/vulnerabilities/importers/nginx.py b/vulnerabilities/importers/nginx.py index 20118efd0..9bc76c899 100644 --- a/vulnerabilities/importers/nginx.py +++ b/vulnerabilities/importers/nginx.py @@ -36,6 +36,7 @@ from vulnerabilities.package_managers import GitHubTagsAPI from vulnerabilities.package_managers import Version from vulnerabilities.helpers import nearest_patched_package +from vulnerabilities.helpers import AffectedPackage @dataclasses.dataclass @@ -117,7 +118,8 @@ def to_advisories(self, data): Advisory( vulnerability_id=cve_id, summary=summary, - affected_packages=nearest_patched_package(vulnerable_packages, fixed_packages), + affected_package_urls=vulnerable_packages, + fixed_package_urls=fixed_packages, ) ) diff --git a/vulnerabilities/improve_runner.py b/vulnerabilities/improve_runner.py index 0e4656c81..530eb3c26 100644 --- a/vulnerabilities/improve_runner.py +++ b/vulnerabilities/improve_runner.py @@ -1,6 +1,13 @@ from datetime import datetime import dataclasses import logging +from typing import Tuple + +from django.db import transaction + +from vulnerabilities import models +from vulnerabilities.data_source import PackageURL +from vulnerabilities.data_source import Advisory logger = logging.getLogger(__name__) @@ -15,11 +22,111 @@ def __init__(self, improver): self.improver = improver def run(self) -> None: - logger.info("Improving using %s.", self.improver.__module__) - inferences = self.improver.updated_inferences() + logger.info("Improving using %s.", self.improver.__name__) + inferences = self.improver().updated_inferences() process_inferences(inferences) - logger.info("Finished improving using %s.", self.improver.__module__) + logger.info("Finished improving using %s.", self.improver.__name__) +@transaction.atomic def process_inferences(inferences): - ... + bulk_create_vuln_pkg_refs = set() + for inference in inferences: + advisory = inference.advisory + vuln, vuln_created = _get_or_create_vulnerability(advisory) + for vuln_ref in advisory.references: + ref, _ = models.VulnerabilityReference.objects.get_or_create( + vulnerability=vuln, reference_id=vuln_ref.reference_id, url=vuln_ref.url + ) + + for score in vuln_ref.severities: + models.VulnerabilitySeverity.objects.update_or_create( + vulnerability=vuln, + scoring_system=score.system.identifier, + reference=ref, + defaults={"value": str(score.value)}, + ) + + for aff_pkg in advisory.affected_package_urls: + vulnerable_package, _ = _get_or_create_package( + aff_pkg + ) + create_or_update_relation( + relation=models.PackageRelatedVulnerability, + vulnerability=vuln, + source=inference.source, + package=vulnerable_package, + confidence=inference.confidence) + + for fixed_pkg in advisory.fixed_package_urls: + patched_package, _ = _get_or_create_package( + fixed_pkg + ) + create_or_update_relation( + relation=models.PackageRelatedVulnerabilityFix, + vulnerability=vuln, + source=inference.source, + package=vulnerable_package, + confidence=inference.confidence) + + + models.PackageRelatedVulnerability.objects.bulk_create( + [i.to_model_object() for i in bulk_create_vuln_pkg_refs] + ) + + +def _get_or_create_vulnerability( + advisory: Advisory, +) -> Tuple[models.Vulnerability, bool]: + + vuln, created = models.Vulnerability.objects.get_or_create( + vulnerability_id=advisory.vulnerability_id + ) # nopep8 + # Eventually we only want to keep summary from NVD and ignore other descriptions. + if advisory.summary and vuln.summary != advisory.summary: + vuln.summary = advisory.summary + vuln.save() + + return vuln, created + + +def _get_or_create_package(p: PackageURL) -> Tuple[models.Package, bool]: + + query_kwargs = {} + for key, val in p.to_dict().items(): + if not val: + if key == "qualifiers": + query_kwargs[key] = {} + else: + query_kwargs[key] = "" + else: + query_kwargs[key] = val + + return models.Package.objects.get_or_create(**query_kwargs) + + +def _package_url_to_package(purl: PackageURL) -> models.Package: + p = models.Package() + p.set_package_url(purl) + return p + +def create_or_update_relation(relation, vulnerability, source, package, confidence): + try: + entry = relation.objects.get( + vulnerability=vulnerability, + package=package + ) + if confidence > entry.confidence: + entry.source = source + entry.confidence = confidence + entry.save() + logger.debug("%s: Confidence improved for %s R %s, new confidence: %d", relation, package, vulnerability, confidence) + + except relation.DoesNotExist: + relation.objects.create( + vulnerability=vulnerability, + source=source, + package=package, + confidence=confidence + ) + diff --git a/vulnerabilities/improvers/__init__.py b/vulnerabilities/improvers/__init__.py index eb643e353..6150e2650 100644 --- a/vulnerabilities/improvers/__init__.py +++ b/vulnerabilities/improvers/__init__.py @@ -1,8 +1,11 @@ -IMPROVER_REGISTRY = [] +from . import nginx -def class_name(module_name: str): +IMPROVER_REGISTRY = [nginx.NginxTimeTravel] + +def find_class(class_name: str): + # FIXME: this might cause problems when there are two modules containing same class name, think of a better approach for improver in IMPROVER_REGISTRY: - if improver.__module__ == module_name: + if class_name == improver.__name__: return improver raise AttributeError diff --git a/vulnerabilities/improvers/nginx.py b/vulnerabilities/improvers/nginx.py new file mode 100644 index 000000000..e97e30953 --- /dev/null +++ b/vulnerabilities/improvers/nginx.py @@ -0,0 +1,44 @@ +from packageurl import PackageURL + +from vulnerabilities.data_inference import Improver +from vulnerabilities.data_inference import Advisory +from vulnerabilities.data_inference import Inference +from vulnerabilities.helpers import nearest_patched_package +from vulnerabilities.models import Vulnerability +from vulnerabilities.models import Package + +class NginxTimeTravel(Improver): + def updated_inferences(self): + inferences = [] + + vulnerabilities = set(Vulnerability.objects.filter(vulnerable_packages__name="nginx")) + vulnerabilities.union(Vulnerability.objects.filter(patched_packages__name="nginx")) + + for vulnerability in vulnerabilities: + affected_packages = map(package_url, Package.objects.filter(vulnerable_package__package__name="nginx", vulnerabilities = vulnerability)) + fixed_packages = map(package_url, Package.objects.filter(patched_package__package__name="nginx", vulnerabilities = vulnerability)) + + time_traveller = nearest_patched_package(affected_packages, fixed_packages) + affected_packages = [ affected_package.vulnerable_package for affected_package in time_traveller] + fixed_packages = [ affected_package.patched_package for affected_package in time_traveller if affected_package.patched_package is not None] + + inference = Inference(advisory = Advisory( + vulnerability_id=vulnerability.vulnerability_id, + summary=vulnerability.summary, + affected_package_urls=fixed_packages, + ), source="time travel", confidence=30) + inferences.append(inference) + + return inferences + + +def package_url(package): + return PackageURL( + type=package.type, + namespace=package.namespace, + name=package.name, + version=package.version, + subpath=package.subpath, + qualifiers=package.qualifiers + ) + diff --git a/vulnerabilities/management/commands/improve.py b/vulnerabilities/management/commands/improve.py index 9eebb53ac..4205e3bcd 100644 --- a/vulnerabilities/management/commands/improve.py +++ b/vulnerabilities/management/commands/improve.py @@ -31,7 +31,7 @@ from vulnerabilities.import_runner import ImportRunner from vulnerabilities.importer_yielder import load_importers from vulnerabilities.improvers import IMPROVER_REGISTRY -from vulnerabilities.improvers import class_name +from vulnerabilities.improvers import find_class from vulnerabilities.improve_runner import ImproveRunner @@ -68,7 +68,7 @@ def handle(self, *args, **options): self.improve_data(valid_sources(sources)) def list_sources(self): - improvers = [ improver.__module__ for improver in IMPROVER_REGISTRY ] + improvers = [ improver.__name__ for improver in IMPROVER_REGISTRY ] self.stdout.write("Vulnerability data can be improved from the following sources:") self.stdout.write(", ".join(improvers)) @@ -76,17 +76,17 @@ def improve_data(self, improvers): failed_improvers = [] for improver in improvers: - self.stdout.write(f"Improving data using {improver.__module__}") + self.stdout.write(f"Improving data using {improver.__name__}") try: ImproveRunner(improver).run() self.stdout.write( - self.style.SUCCESS(f"Successfully improved data using {improver.__module__}") + self.style.SUCCESS(f"Successfully improved data using {improver.__name__}") ) except Exception: - failed_improvers.append(improver.__module__) + failed_improvers.append(improver.__name__) traceback.print_exc() self.stdout.write( - self.style.ERROR(f"Failed to run improver {improver.__module__}. Continuing...") + self.style.ERROR(f"Failed to run improver {improver.__name__}. Continuing...") ) if failed_improvers: @@ -94,11 +94,12 @@ def improve_data(self, improvers): def valid_sources(sources): + # FIXME: Need better approach, see definition of find_class improvers = [] unknown_sources = [] for source in sources: try: - improvers.append(class_name(source)) + improvers.append(find_class(source)) except AttributeError: unknown_sources.append(source) if unknown_sources: diff --git a/vulnerabilities/models.py b/vulnerabilities/models.py index ba5feb1c2..952f2cb15 100644 --- a/vulnerabilities/models.py +++ b/vulnerabilities/models.py @@ -25,12 +25,15 @@ from django.db import models from django.core.exceptions import ValidationError -from django.utils.translation import gettext_lazy as _ +from django.utils.translation import ugettext_lazy as _ +from django.core.validators import MinValueValidator +from django.core.validators import MaxValueValidator from packageurl.contrib.django.models import PackageURLMixin from packageurl import PackageURL from vulnerabilities.data_source import DataSource from vulnerabilities.severity_systems import scoring_systems +from vulnerabilities.data_inference import MAX_CONFIDENCE class Vulnerability(models.Model): @@ -131,8 +134,8 @@ class Package(PackageURLMixin): resolved_vulnerabilities = models.ManyToManyField( to="Vulnerability", - through="PackageRelatedVulnerability", - through_fields=("patched_package", "vulnerability"), + through="PackageRelatedVulnerabilityFix", + through_fields=("package", "vulnerability"), related_name="patched_packages", ) @@ -192,9 +195,8 @@ class PackageRelatedVulnerability(models.Model): Package, on_delete=models.CASCADE, related_name="vulnerable_package" ) vulnerability = models.ForeignKey(Vulnerability, on_delete=models.CASCADE) - patched_package = models.ForeignKey( - Package, on_delete=models.CASCADE, null=True, blank=True, related_name="patched_package" - ) + source = models.TextField(null=True) + confidence = models.PositiveIntegerField(default=1, validators=[MinValueValidator(0), MaxValueValidator(MAX_CONFIDENCE)]) def __str__(self): return f"{self.package.package_url} {self.vulnerability.vulnerability_id}" @@ -203,6 +205,21 @@ class Meta: unique_together = ("package", "vulnerability") verbose_name_plural = "PackageRelatedVulnerabilities" +class PackageRelatedVulnerabilityFix(models.Model): + + package = models.ForeignKey( + Package, on_delete=models.CASCADE, related_name="patched_package" + ) + vulnerability = models.ForeignKey(Vulnerability, on_delete=models.CASCADE) + source = models.TextField(null=True) + confidence = models.PositiveIntegerField(default=1, validators=[MinValueValidator(1), MaxValueValidator(MAX_CONFIDENCE)]) + + def __str__(self): + return f"{self.package.package_url} {self.vulnerability.vulnerability_id}" + + class Meta: + unique_together = ("package", "vulnerability") + verbose_name_plural = "PackageRelatedVulnerabilitiyFixes" class ImportProblem(models.Model): From 5fa3cd5d3acbf861b67f85245a286a10805b2743 Mon Sep 17 00:00:00 2001 From: Hritik Vijay Date: Sun, 29 Aug 2021 02:13:41 +0530 Subject: [PATCH 04/40] Use new Advisory model for importers Importing Advisory data is now 2 step process. 1) Import Advisory 2) Create relations (default improver) importers are now required to return AdvisoryData instead of Advisory. The data contained inside AdvisoryData is converted into a model named Advisory and then saved into the database. This will be useful later for improvers to work on existing Advisories. All the relationships for the Advisories are generated using a default improver at improvers/default.py. NOTE: There are errors in the UI due to models.py # TODO: Cannot resolve keyword 'resolved_vulnerabilities' into field # make vulnerabilities and resolved_vulnerabilities use the `fix` flag of PackageRelatedVulnerability Knows defects (in current PR): -[ ] UI break (see above) -[ ] might crash in multiple imports / improves -[ ] No improver than default improver is implemented yet -[ ] normalized function of ``AdvisoryData`` has no body -[ ] nginx importer still has remains of set_api etc -[ ] Inference -> AdvisoryData encapsulation -[ ] Duplicated data in database -[ ] ??? Knows defects (to be solved in different PR): -[ ] inconsistent naming - will be resolved in a different PR -[ ] unordered imports Signed-off-by: Hritik Vijay --- vulnerabilities/data_inference.py | 20 +- vulnerabilities/data_source.py | 230 ++++++++++-------- vulnerabilities/import_runner.py | 27 +- vulnerabilities/importers/nginx.py | 62 ++--- vulnerabilities/improve_runner.py | 82 +++---- vulnerabilities/improvers/__init__.py | 11 +- vulnerabilities/improvers/default.py | 22 ++ .../management/commands/improve.py | 28 +-- vulnerabilities/models.py | 81 ++++-- 9 files changed, 330 insertions(+), 233 deletions(-) create mode 100644 vulnerabilities/improvers/default.py diff --git a/vulnerabilities/data_inference.py b/vulnerabilities/data_inference.py index 4ed7554bd..6f02c8188 100644 --- a/vulnerabilities/data_inference.py +++ b/vulnerabilities/data_inference.py @@ -1,23 +1,33 @@ import dataclasses import logging -from vulnerabilities.data_source import Advisory +from typing import List + +from vulnerabilities.data_source import AdvisoryData logger = logging.getLogger(__name__) + class OverConfidenceError(ValueError): pass + class UnderConfidenceError(ValueError): pass + MAX_CONFIDENCE = 100 + @dataclasses.dataclass(order=True) class Inference: """ This data class expresses the contract between data improvers and the improve runner. + + Source and confidence correspond to the improver, only inferences with highest confidence + for one vulnerability <-> package relationship is to be inserted into the database """ - advisory: Advisory + + advisory_data: AdvisoryData source: str confidence: int @@ -28,10 +38,12 @@ def __post_init__(self): if self.confidence < 0: raise UnderConfidenceError + class Improver: """ - All improvers should inherit this class and implement updated_inferences method to return + All improvers should inherit this class and implement inferences method to return new inferences for a package or vulnerability """ - def updated_inferences(self): + + def inferences(self): raise NotImplementedError diff --git a/vulnerabilities/data_source.py b/vulnerabilities/data_source.py index 750784603..e56c610a4 100644 --- a/vulnerabilities/data_source.py +++ b/vulnerabilities/data_source.py @@ -21,6 +21,7 @@ # Visit https://github.com/nexB/vulnerablecode/ for support and download. import dataclasses +import json import logging import os import shutil @@ -47,7 +48,6 @@ from vulnerabilities.severity_systems import ScoringSystem from vulnerabilities.helpers import is_cve from vulnerabilities.helpers import nearest_patched_package -from vulnerabilities.helpers import AffectedPackage logger = logging.getLogger(__name__) @@ -74,8 +74,36 @@ def normalized(self): return Reference(reference_id=self.reference_id, url=self.url, severities=severities) +@dataclasses.dataclass(order=True, frozen=True) +class AffectedPackage: + # this package MUST NOT have a version + package: PackageURL + # the version specifier contains the version scheme as is: semver:>=1,3,4 + version_specifier: VersionSpecifier + + def toJson(self): + # TODO: VersionSpecifier.__str__ is not working + # https://github.com/nexB/univers/issues/7 + # Adjust following code when it is fixed + scheme = self.version_specifier.scheme + ranges = ",".join( + [f"{rng.operator}{rng.version.version_string}" for rng in self.version_specifier.ranges] + ) + return json.dumps({"package": self.package, "version_specifier": f"{scheme}:{ranges}"}) + + @staticmethod + def fromJson(affected_package_json): + obj = json.loads(affected_package_json) + affected_package = AffectedPackage(**obj) + package = PackageURL(*affected_package.package) + version_specifier = VersionSpecifier.from_version_spec_string( + affected_package.version_specifier + ) + return AffectedPackage(package=package, version_specifier=version_specifier) + + @dataclasses.dataclass(order=True) -class Advisory: +class AdvisoryData: """ This data class expresses the contract between data sources and the import runner. @@ -86,30 +114,44 @@ class Advisory: summary: str vulnerability_id: Optional[str] = None - affected_package_urls: Iterable[PackageURL] = dataclasses.field(default_factory=list) - fixed_package_urls: Iterable[PackageURL] = dataclasses.field(default_factory=list) + affected_packages: List[AffectedPackage] = dataclasses.field(default_factory=list) + fixed_packages: List[AffectedPackage] = dataclasses.field(default_factory=list) references: List[Reference] = dataclasses.field(default_factory=list) - - def __post_init__(self): - if self.vulnerability_id and not is_cve(self.vulnerability_id): - raise ValueError("CVE expected, found: {}".format(self.vulnerability_id)) + date_published: Optional[str] = None def normalized(self): - affected_package_urls = set(self.affected_package_urls) - fixed_package_urls = set(self.fixed_package_urls) - references = sorted( - self.references, key=lambda reference: (reference.reference_id, reference.url) - ) - for index, _ in enumerate(self.references): - references[index] = references[index].normalized() - - return Advisory( - summary=self.summary, - vulnerability_id=self.vulnerability_id, - affected_package_urls=affected_package_urls, - fixed_package_urls=fixed_package_urls, - references=references, - ) + ... + + def serializable(self, o): + if isinstance(o, AffectedPackage): + return o.toJson() + if isinstance(o, Reference): + return vars(o) + if isinstance(o, datetime): + return o.isoformat() + + return json.JSONEncoder.default(self, o) + + def toJson(self): + return json.dumps(vars(self), default=self.serializable) + + @staticmethod + def fromJson(advisory_data_json: str): + obj = json.loads(advisory_data_json) + advisory_data = AdvisoryData(**obj) + advisory_data.affected_packages = [ + AffectedPackage.fromJson(p) for p in advisory_data.affected_packages + ] + advisory_data.fixed_packages = [ + AffectedPackage.fromJson(p) for p in advisory_data.fixed_packages + ] + advisory_data.references = [Reference(**ref) for ref in advisory_data.references] + return advisory_data + + +class Advisory: + # TODO: Get rid of this after migration + ... class InvalidConfigurationError(Exception): @@ -192,16 +234,11 @@ def validate_configuration(self) -> None: """ pass - def updated_advisories(self) -> Set[Advisory]: + def advisory_data(self) -> Set[AdvisoryData]: """ - Subclasses return Advisory objects that have been modified since - the last run or self.cutoff_date. - - NOTE: Data sources that do not enable detection of changes to existing records vs added - records must only implement this method, not added_advisories(). The ImportRunner - relies on this contract to decide between insert and update operations. + Subclasses return AdvisoryData objects """ - return set() + raise NotImplementedError def error(self, msg: str) -> None: """ @@ -209,6 +246,7 @@ def error(self, msg: str) -> None: """ raise InvalidConfigurationError(f"{type(self).__name__}: {msg}") + @dataclasses.dataclass class GitDataSourceConfiguration(DataSourceConfiguration): repository_url: str @@ -451,7 +489,7 @@ def _fetch(self) -> Tuple[Mapping, Iterable[ET.ElementTree]]: # TODO: enforce that we receive the proper data here raise NotImplementedError - def updated_advisories(self) -> List[Advisory]: + def advisory_data(self) -> List[AdvisoryData]: for metadata, oval_file in self._fetch(): try: oval_data = self.get_data_from_xml_doc(oval_file, metadata) @@ -476,7 +514,7 @@ def set_api(self, all_pkgs: Iterable[str]): """ raise NotImplementedError - def get_data_from_xml_doc(self, xml_doc: ET.ElementTree, pkg_metadata={}) -> List[Advisory]: + def get_data_from_xml_doc(self, xml_doc: ET.ElementTree, pkg_metadata={}) -> List[AdvisoryData]: """ The orchestration method of the OvalDataSource. This method breaks an OVAL xml ElementTree into a list of `Advisory`. @@ -488,65 +526,67 @@ def get_data_from_xml_doc(self, xml_doc: ET.ElementTree, pkg_metadata={}) -> Lis {"type":"deb","qualifiers":{"distro":"buster"} } """ - all_adv = [] - oval_doc = OvalParser(self.translations, xml_doc) - raw_data = oval_doc.get_data() - all_pkgs = self._collect_pkgs(raw_data) - self.set_api(all_pkgs) - - # convert definition_data to Advisory objects - for definition_data in raw_data: - # These fields are definition level, i.e common for all elements - # connected/linked to an OvalDefinition - vuln_id = definition_data["vuln_id"] - description = definition_data["description"] - references = [Reference(url=url) for url in definition_data["reference_urls"]] - affected_packages = [] - for test_data in definition_data["test_data"]: - for package_name in test_data["package_list"]: - if package_name and len(package_name) >= 50: - continue - - affected_version_range = test_data["version_ranges"] or set() - version_class = version_class_by_package_type[pkg_metadata["type"]] - version_scheme = version_class.scheme - - affected_version_range = VersionSpecifier.from_scheme_version_spec_string( - version_scheme, affected_version_range - ) - all_versions = self.pkg_manager_api.get(package_name).valid_versions - - # FIXME: what is this 50 DB limit? that's too small for versions - # FIXME: we should not drop data this way - # This filter is for filtering out long versions. - # 50 is limit because that's what db permits atm. - all_versions = [version for version in all_versions if len(version) < 50] - if not all_versions: - continue - - affected_purls = [] - safe_purls = [] - for version in all_versions: - purl = self.create_purl( - pkg_name=package_name, - pkg_version=version, - pkg_data=pkg_metadata, - ) - if version_class(version) in affected_version_range: - affected_purls.append(purl) - else: - safe_purls.append(purl) - - affected_packages.extend( - nearest_patched_package(affected_purls, safe_purls), - ) - - all_adv.append( - Advisory( - summary=description, - affected_packages=affected_packages, - vulnerability_id=vuln_id, - references=references, - ) - ) - return all_adv + # TODO: Make this compatible to new model + + # all_adv = [] + # oval_doc = OvalParser(self.translations, xml_doc) + # raw_data = oval_doc.get_data() + # all_pkgs = self._collect_pkgs(raw_data) + # self.set_api(all_pkgs) + + # # convert definition_data to Advisory objects + # for definition_data in raw_data: + # # These fields are definition level, i.e common for all elements + # # connected/linked to an OvalDefinition + # vuln_id = definition_data["vuln_id"] + # description = definition_data["description"] + # references = [Reference(url=url) for url in definition_data["reference_urls"]] + # affected_packages = [] + # for test_data in definition_data["test_data"]: + # for package_name in test_data["package_list"]: + # if package_name and len(package_name) >= 50: + # continue + + # affected_version_range = test_data["version_ranges"] or set() + # version_class = version_class_by_package_type[pkg_metadata["type"]] + # version_scheme = version_class.scheme + + # affected_version_range = VersionSpecifier.from_scheme_version_spec_string( + # version_scheme, affected_version_range + # ) + # all_versions = self.pkg_manager_api.get(package_name).valid_versions + + # # FIXME: what is this 50 DB limit? that's too small for versions + # # FIXME: we should not drop data this way + # # This filter is for filtering out long versions. + # # 50 is limit because that's what db permits atm. + # all_versions = [version for version in all_versions if len(version) < 50] + # if not all_versions: + # continue + + # affected_purls = [] + # safe_purls = [] + # for version in all_versions: + # purl = self.create_purl( + # pkg_name=package_name, + # pkg_version=version, + # pkg_data=pkg_metadata, + # ) + # if version_class(version) in affected_version_range: + # affected_purls.append(purl) + # else: + # safe_purls.append(purl) + + # affected_packages.extend( + # nearest_patched_package(affected_purls, safe_purls), + # ) + + # all_adv.append( + # Advisory( + # summary=description, + # affected_packages=affected_packages, + # vulnerability_id=vuln_id, + # references=references, + # ) + # ) + # return all_adv diff --git a/vulnerabilities/import_runner.py b/vulnerabilities/import_runner.py index 9475c887c..09b937633 100644 --- a/vulnerabilities/import_runner.py +++ b/vulnerabilities/import_runner.py @@ -23,13 +23,15 @@ import dataclasses import datetime +import json import logging from typing import Tuple from typing import Set from vulnerabilities import models -from vulnerabilities.data_source import Advisory +from vulnerabilities.models import Advisory +from vulnerabilities.data_source import AdvisoryData from vulnerabilities.data_source import PackageURL from vulnerabilities.data_inference import Inference from vulnerabilities.data_inference import MAX_CONFIDENCE @@ -87,8 +89,9 @@ def run(self, cutoff_date: datetime.datetime = None) -> None: logger.info(f"Starting import for {self.importer.name}.") data_source = self.importer.make_data_source(cutoff_date=cutoff_date) with data_source: - advisories = data_source.updated_advisories() - process_advisories("importer", advisories) + advisory_data = data_source.advisory_data() + source = f"{data_source.__module__}.{data_source.__class__.__qualname__}" + process_advisories(source, advisory_data) self.importer.last_run = datetime.datetime.now(tz=datetime.timezone.utc) self.importer.data_source_cfg = dataclasses.asdict(data_source.config) self.importer.save() @@ -109,10 +112,20 @@ def get_vuln_pkg_refs(vulnerability, package): ) -def process_advisories(source: str, advisories: Set[Advisory]) -> None: +def process_advisories(source: str, advisory_data: Set[AdvisoryData]) -> None: """ Insert advisories into the database - Advisories are treated as full confidence infererences. """ - inferences = [ Inference(advisory, source, MAX_CONFIDENCE) for advisory in advisories ] - process_inferences(inferences) + + advisories = [] + for data in advisory_data: + advisories.append( + Advisory( + date_published=data.date_published, + date_collected=datetime.datetime.now(tz=datetime.timezone.utc), + source=source, + data=data.toJson(), + ) + ) + + Advisory.objects.bulk_create(advisories) # TODO: handle conflicts, duplicates (update? ignore?) diff --git a/vulnerabilities/importers/nginx.py b/vulnerabilities/importers/nginx.py index 9bc76c899..e603c9ecc 100644 --- a/vulnerabilities/importers/nginx.py +++ b/vulnerabilities/importers/nginx.py @@ -22,6 +22,8 @@ import asyncio import dataclasses +from datetime import datetime +from typing import List import requests from packageurl import PackageURL @@ -29,14 +31,14 @@ from univers.version_specifier import VersionSpecifier from univers.versions import SemverVersion -from vulnerabilities.data_source import Advisory +from vulnerabilities.data_source import AdvisoryData +from vulnerabilities.data_source import AffectedPackage from vulnerabilities.data_source import DataSource from vulnerabilities.data_source import DataSourceConfiguration from vulnerabilities.data_source import Reference from vulnerabilities.package_managers import GitHubTagsAPI from vulnerabilities.package_managers import Version from vulnerabilities.helpers import nearest_patched_package -from vulnerabilities.helpers import AffectedPackage @dataclasses.dataclass @@ -64,15 +66,15 @@ def set_api(self): normalized_versions.add(normalized_version) self.version_api.cache["nginx/nginx"] = normalized_versions - def updated_advisories(self): - advisories = [] - self.set_api() + def advisory_data(self) -> List[AdvisoryData]: + adv_data = [] + # self.set_api() data = requests.get(self.url).content - advisories.extend(self.to_advisories(data)) - return advisories + adv_data.extend(self.to_advisories(data)) + return adv_data def to_advisories(self, data): - advisories = [] + advisory_data = [] soup = BeautifulSoup(data, features="lxml") vuln_list = soup.select("li p") @@ -111,19 +113,21 @@ def to_advisories(self, data): continue if "Vulnerable" in child: - vulnerable_packages = self.extract_vuln_pkgs(child) + affected_packages = self.extract_vuln_pkgs(child) continue - advisories.append( - Advisory( - vulnerability_id=cve_id, + advisory_data.append( + AdvisoryData( summary=summary, - affected_package_urls=vulnerable_packages, - fixed_package_urls=fixed_packages, + vulnerability_id=cve_id, + affected_packages=affected_packages, + fixed_packages=fixed_packages, + references=references, + date_published=datetime.now(), # TODO: put real date here ) ) - return advisories + return advisory_data def extract_fixed_pkgs(self, vuln_info): vuln_status, version_info = vuln_info.split(": ") @@ -131,7 +135,8 @@ def extract_fixed_pkgs(self, vuln_info): return {} raw_ranges = version_info.split(",") - version_ranges = [] + purl = PackageURL(type="generic", name="nginx") + packages = [] for rng in raw_ranges: # Eg. "1.7.3+" gets converted to VersionSpecifier.from_scheme_version_spec_string("semver","^1.7.3") # The advisory in this case uses `+` in the sense that any version @@ -139,17 +144,16 @@ def extract_fixed_pkgs(self, vuln_info): # "1.7.4" satisifes "1.7.3+", but "1.8.4" does not. "1.7.3+" has same # semantics as that of "^1.7.3" - version_ranges.append( - VersionSpecifier.from_scheme_version_spec_string("semver", "^" + rng[:-1]) + packages.append( + AffectedPackage( + package=purl, + version_specifier=VersionSpecifier.from_scheme_version_spec_string( + "semver", "^" + rng[:-1] + ), + ) ) - valid_versions = find_valid_versions( - self.version_api.get("nginx/nginx").valid_versions, version_ranges - ) - - return [ - PackageURL(type="generic", name="nginx", version=version) for version in valid_versions - ] + return packages def extract_vuln_pkgs(self, vuln_info): vuln_status, version_infos = vuln_info.split(": ") @@ -180,16 +184,14 @@ def extract_vuln_pkgs(self, vuln_info): ) ) - valid_versions = find_valid_versions( - self.version_api.get("nginx/nginx").valid_versions, version_ranges - ) qualifiers = {} if windows_only: qualifiers["os"] = "windows" + purl = PackageURL(type="generic", name="nginx", qualifiers=qualifiers) return [ - PackageURL(type="generic", name="nginx", version=version, qualifiers=qualifiers) - for version in valid_versions + AffectedPackage(package=purl, version_specifier=version_range) + for version_range in version_ranges ] diff --git a/vulnerabilities/improve_runner.py b/vulnerabilities/improve_runner.py index 530eb3c26..0ad3cc1ef 100644 --- a/vulnerabilities/improve_runner.py +++ b/vulnerabilities/improve_runner.py @@ -2,15 +2,19 @@ import dataclasses import logging from typing import Tuple +from typing import Set from django.db import transaction from vulnerabilities import models from vulnerabilities.data_source import PackageURL -from vulnerabilities.data_source import Advisory +from vulnerabilities.data_source import AdvisoryData +from vulnerabilities.data_inference import Inference + logger = logging.getLogger(__name__) + class ImproveRunner: """ The ImproveRunner is responsible to improve the already imported data by a datasource. @@ -18,21 +22,23 @@ class ImproveRunner: All the inferences consist of a confidence score whose threshold could be tuned in user settings (.env file) """ + def __init__(self, improver): self.improver = improver def run(self) -> None: logger.info("Improving using %s.", self.improver.__name__) - inferences = self.improver().updated_inferences() + inferences = self.improver().inferences() process_inferences(inferences) logger.info("Finished improving using %s.", self.improver.__name__) @transaction.atomic -def process_inferences(inferences): +def process_inferences(inferences: Set[Inference]): bulk_create_vuln_pkg_refs = set() for inference in inferences: - advisory = inference.advisory + advisory = inference.advisory_data + print(advisory) vuln, vuln_created = _get_or_create_vulnerability(advisory) for vuln_ref in advisory.references: ref, _ = models.VulnerabilityReference.objects.get_or_create( @@ -47,28 +53,29 @@ def process_inferences(inferences): defaults={"value": str(score.value)}, ) - for aff_pkg in advisory.affected_package_urls: - vulnerable_package, _ = _get_or_create_package( - aff_pkg - ) - create_or_update_relation( - relation=models.PackageRelatedVulnerability, - vulnerability=vuln, - source=inference.source, - package=vulnerable_package, - confidence=inference.confidence) - - for fixed_pkg in advisory.fixed_package_urls: - patched_package, _ = _get_or_create_package( - fixed_pkg - ) - create_or_update_relation( - relation=models.PackageRelatedVulnerabilityFix, - vulnerability=vuln, - source=inference.source, - package=vulnerable_package, - confidence=inference.confidence) - + for pkg in advisory.affected_packages: + aff_pkg = pkg.package + vulnerable_package, _ = _get_or_create_package(aff_pkg) + + models.PackageRelatedVulnerability( + package=vulnerable_package, + vulnerability=vuln, + source=inference.source, + confidence=inference.confidence, + fix=False, + ).update_or_create() + + for pkg in advisory.fixed_packages: + fixed_pkg = pkg.package + patched_package, _ = _get_or_create_package(fixed_pkg) + + models.PackageRelatedVulnerability( + package=patched_package, + vulnerability=vuln, + source=inference.source, + confidence=inference.confidence, + fix=True, + ).update_or_create() models.PackageRelatedVulnerability.objects.bulk_create( [i.to_model_object() for i in bulk_create_vuln_pkg_refs] @@ -76,7 +83,7 @@ def process_inferences(inferences): def _get_or_create_vulnerability( - advisory: Advisory, + advisory: AdvisoryData, ) -> Tuple[models.Vulnerability, bool]: vuln, created = models.Vulnerability.objects.get_or_create( @@ -109,24 +116,3 @@ def _package_url_to_package(purl: PackageURL) -> models.Package: p = models.Package() p.set_package_url(purl) return p - -def create_or_update_relation(relation, vulnerability, source, package, confidence): - try: - entry = relation.objects.get( - vulnerability=vulnerability, - package=package - ) - if confidence > entry.confidence: - entry.source = source - entry.confidence = confidence - entry.save() - logger.debug("%s: Confidence improved for %s R %s, new confidence: %d", relation, package, vulnerability, confidence) - - except relation.DoesNotExist: - relation.objects.create( - vulnerability=vulnerability, - source=source, - package=package, - confidence=confidence - ) - diff --git a/vulnerabilities/improvers/__init__.py b/vulnerabilities/improvers/__init__.py index 6150e2650..4350f8612 100644 --- a/vulnerabilities/improvers/__init__.py +++ b/vulnerabilities/improvers/__init__.py @@ -1,11 +1,6 @@ from . import nginx +from . import default -IMPROVER_REGISTRY = [nginx.NginxTimeTravel] +IMPROVER_REGISTRY = [default.DefaultImprover] -def find_class(class_name: str): - # FIXME: this might cause problems when there are two modules containing same class name, think of a better approach - for improver in IMPROVER_REGISTRY: - if class_name == improver.__name__: - return improver - - raise AttributeError +improver_mapping = {f"{x.__module__}.{x.__name__}": x for x in IMPROVER_REGISTRY} diff --git a/vulnerabilities/improvers/default.py b/vulnerabilities/improvers/default.py new file mode 100644 index 000000000..9b970fc1a --- /dev/null +++ b/vulnerabilities/improvers/default.py @@ -0,0 +1,22 @@ +from typing import List + +from vulnerabilities.data_source import AdvisoryData +from vulnerabilities.data_inference import Inference +from vulnerabilities.data_inference import Improver +from vulnerabilities.data_inference import MAX_CONFIDENCE +from vulnerabilities.models import Advisory + + +class DefaultImprover(Improver): + def inferences(self) -> List[Inference]: + advisories = Advisory.objects.filter( + source="vulnerabilities.importers.nginx.NginxDataSource" + ) + return [ + Inference( + advisory_data=AdvisoryData.fromJson(advisory.data), + source=advisory.source, + confidence=MAX_CONFIDENCE, + ) + for advisory in advisories + ] diff --git a/vulnerabilities/management/commands/improve.py b/vulnerabilities/management/commands/improve.py index 4205e3bcd..562af23f9 100644 --- a/vulnerabilities/management/commands/improve.py +++ b/vulnerabilities/management/commands/improve.py @@ -31,24 +31,23 @@ from vulnerabilities.import_runner import ImportRunner from vulnerabilities.importer_yielder import load_importers from vulnerabilities.improvers import IMPROVER_REGISTRY -from vulnerabilities.improvers import find_class +from vulnerabilities.improvers import improver_mapping from vulnerabilities.improve_runner import ImproveRunner class Command(BaseCommand): - help = "Improve imported vulnerability data" + help = "Improve vulnerability data" def add_arguments(self, parser): parser.add_argument( "--list", action="store_true", - help="List available data inferences", + help="List available data improvers", ) parser.add_argument( - "--all", action="store_true", help="Improve data from all available inferences" + "--all", action="store_true", help="Improve data from all available improvers" ) - - parser.add_argument("sources", nargs="*", help="Data sources from which to import") + parser.add_argument("sources", nargs="*", help="Fully qualified improver name to run") def handle(self, *args, **options): if options["list"]: @@ -61,16 +60,16 @@ def handle(self, *args, **options): sources = options["sources"] if not sources: - raise CommandError( - 'Please provide at least one data inference to improve from or use "--all".' - ) + raise CommandError('Please provide at least one improver to run use "--all".') self.improve_data(valid_sources(sources)) def list_sources(self): - improvers = [ improver.__name__ for improver in IMPROVER_REGISTRY ] + improvers = [ + f"{improver.__module__}.{improver.__qualname__}" for improver in IMPROVER_REGISTRY + ] self.stdout.write("Vulnerability data can be improved from the following sources:") - self.stdout.write(", ".join(improvers)) + self.stdout.write("\n".join(improvers)) def improve_data(self, improvers): failed_improvers = [] @@ -94,17 +93,14 @@ def improve_data(self, improvers): def valid_sources(sources): - # FIXME: Need better approach, see definition of find_class improvers = [] unknown_sources = [] for source in sources: try: - improvers.append(find_class(source)) - except AttributeError: + improvers.append(improver_mapping[source]) + except KeyError: unknown_sources.append(source) if unknown_sources: raise CommandError(f"Unknown sources: {unknown_sources}") return improvers - - diff --git a/vulnerabilities/models.py b/vulnerabilities/models.py index 952f2cb15..9d879da95 100644 --- a/vulnerabilities/models.py +++ b/vulnerabilities/models.py @@ -22,6 +22,11 @@ import importlib from datetime import datetime +import dataclasses +import json +from typing import Optional +from typing import List +import logging from django.db import models from django.core.exceptions import ValidationError @@ -30,11 +35,15 @@ from django.core.validators import MaxValueValidator from packageurl.contrib.django.models import PackageURLMixin from packageurl import PackageURL +from univers.version_specifier import VersionSpecifier from vulnerabilities.data_source import DataSource +from vulnerabilities.data_source import AdvisoryData from vulnerabilities.severity_systems import scoring_systems from vulnerabilities.data_inference import MAX_CONFIDENCE +logger = logging.getLogger(__name__) + class Vulnerability(models.Model): """ @@ -125,18 +134,14 @@ class Package(PackageURLMixin): A software package with links to relevant vulnerabilities. """ + # TODO: Cannot resolve keyword 'resolved_vulnerabilities' into field + # make vulnerabilities and resolved_vulnerabilities use the `fix` flag of PackageRelatedVulnerability + vulnerabilities = models.ManyToManyField( to="Vulnerability", through="PackageRelatedVulnerability", through_fields=("package", "vulnerability"), - related_name="vulnerable_packages", - ) - - resolved_vulnerabilities = models.ManyToManyField( - to="Vulnerability", - through="PackageRelatedVulnerabilityFix", - through_fields=("package", "vulnerability"), - related_name="patched_packages", + related_name="packages", ) @property @@ -191,12 +196,13 @@ def __str__(self): class PackageRelatedVulnerability(models.Model): - package = models.ForeignKey( - Package, on_delete=models.CASCADE, related_name="vulnerable_package" - ) + package = models.ForeignKey(Package, on_delete=models.CASCADE, related_name="package") vulnerability = models.ForeignKey(Vulnerability, on_delete=models.CASCADE) source = models.TextField(null=True) - confidence = models.PositiveIntegerField(default=1, validators=[MinValueValidator(0), MaxValueValidator(MAX_CONFIDENCE)]) + confidence = models.PositiveIntegerField( + default=1, validators=[MinValueValidator(0), MaxValueValidator(MAX_CONFIDENCE)] + ) + fix = models.BooleanField(default=False) def __str__(self): return f"{self.package.package_url} {self.vulnerability.vulnerability_id}" @@ -205,21 +211,36 @@ class Meta: unique_together = ("package", "vulnerability") verbose_name_plural = "PackageRelatedVulnerabilities" -class PackageRelatedVulnerabilityFix(models.Model): - - package = models.ForeignKey( - Package, on_delete=models.CASCADE, related_name="patched_package" - ) - vulnerability = models.ForeignKey(Vulnerability, on_delete=models.CASCADE) - source = models.TextField(null=True) - confidence = models.PositiveIntegerField(default=1, validators=[MinValueValidator(1), MaxValueValidator(MAX_CONFIDENCE)]) + def update_or_create(self): + """ + Update if supplied record has more confidence than existing record + Create if doesn't exist + """ + try: + existing = self.__class__.objects.get( + vulnerability=self.vulnerability, package=self.package + ) + if self.confidence > existing.confidence: + existing.source = self.source + existing.confidence = self.confidence + existing.fix = self.fix + existing.save() + logger.debug( + "Confidence improved for %s R %s, new confidence: %d", + self.package, + self.vulnerability, + self.confidence, + ) - def __str__(self): - return f"{self.package.package_url} {self.vulnerability.vulnerability_id}" + except self.DoesNotExist: + self.__class__.objects.create( + vulnerability=self.vulnerability, + source=self.source, + package=self.package, + confidence=self.confidence, + fix=self.fix, + ) - class Meta: - unique_together = ("package", "vulnerability") - verbose_name_plural = "PackageRelatedVulnerabilitiyFixes" class ImportProblem(models.Model): @@ -296,3 +317,13 @@ class VulnerabilitySeverity(models.Model): class Meta: unique_together = ("vulnerability", "reference", "scoring_system") + + +class Advisory(models.Model): + date_published = models.DateField() + date_collected = models.DateField() + source = models.CharField(max_length=100) + improved_on = models.DateTimeField(null=True) + improved_times = models.IntegerField(default=0) + # data would contain a data_source.Advisory + data = models.JSONField() From 08605cd06430d590e7169f988caea6a41416a0f1 Mon Sep 17 00:00:00 2001 From: Hritik Vijay Date: Sun, 29 Aug 2021 16:16:16 +0530 Subject: [PATCH 05/40] Remove Inference->Advisory encapsulation Earlier Inference used to nest Advisory class, this doesn't make sense logically. Now, the Inference class is of it's own and contains some attributes that could be present in an advisory but are not required. Technical reason behind this decoupling is that the PackageURL in AffectedPackage is NOT supposed to contain version information. This information must be present inside an Inference (that's the whole point). Improvers are required to read AdvisoryData, modify the relevant fields, add inference specific fields (confidence, source) and expand the VersionSpecifier contained inside AffectedPackage package as they deem fit into PackageURLs. Known defects (in current PR): -[ ] UI break (see above) -[ ] might crash in multiple imports / improves -[ ] No improver than default improver is implemented yet -[ ] normalized function of ``AdvisoryData`` has no body -[ ] nginx importer still has remains of set_api etc -[x] Inference -> AdvisoryData encapsulation -[ ] Duplicated data in database -[ ] ??? Knows defects (to be solved in different PR): -[ ] inconsistent naming - will be resolved in a different PR -[ ] unordered imports Signed-off-by: Hritik Vijay --- vulnerabilities/data_inference.py | 20 +++++---- vulnerabilities/data_source.py | 16 ++++---- vulnerabilities/import_runner.py | 2 +- vulnerabilities/improve_runner.py | 36 +++++++--------- vulnerabilities/improvers/default.py | 61 +++++++++++++++++++++++----- vulnerabilities/models.py | 2 +- 6 files changed, 88 insertions(+), 49 deletions(-) diff --git a/vulnerabilities/data_inference.py b/vulnerabilities/data_inference.py index 6f02c8188..6e54fcdd5 100644 --- a/vulnerabilities/data_inference.py +++ b/vulnerabilities/data_inference.py @@ -1,8 +1,11 @@ import dataclasses import logging from typing import List +from typing import Optional -from vulnerabilities.data_source import AdvisoryData +from packageurl import PackageURL + +from vulnerabilities.data_source import Reference logger = logging.getLogger(__name__) @@ -23,13 +26,16 @@ class Inference: """ This data class expresses the contract between data improvers and the improve runner. - Source and confidence correspond to the improver, only inferences with highest confidence - for one vulnerability <-> package relationship is to be inserted into the database + Only inferences with highest confidence for one vulnerability <-> package + relationship is to be inserted into the database """ - advisory_data: AdvisoryData - source: str confidence: int + summary: Optional[str] = None + vulnerability_id: Optional[str] = None + affected_packages: List[PackageURL] = dataclasses.field(default_factory=list) + fixed_packages: List[PackageURL] = dataclasses.field(default_factory=list) + references: List[Reference] = dataclasses.field(default_factory=list) def __post_init__(self): if self.confidence > MAX_CONFIDENCE: @@ -42,8 +48,8 @@ def __post_init__(self): class Improver: """ All improvers should inherit this class and implement inferences method to return - new inferences for a package or vulnerability + new inferences for packages or vulnerabilities """ - def inferences(self): + def inferences(self) -> List[Inference]: raise NotImplementedError diff --git a/vulnerabilities/data_source.py b/vulnerabilities/data_source.py index e56c610a4..52f59ef02 100644 --- a/vulnerabilities/data_source.py +++ b/vulnerabilities/data_source.py @@ -81,18 +81,18 @@ class AffectedPackage: # the version specifier contains the version scheme as is: semver:>=1,3,4 version_specifier: VersionSpecifier - def toJson(self): + def json(self): # TODO: VersionSpecifier.__str__ is not working # https://github.com/nexB/univers/issues/7 # Adjust following code when it is fixed scheme = self.version_specifier.scheme ranges = ",".join( - [f"{rng.operator}{rng.version.version_string}" for rng in self.version_specifier.ranges] + [f"{rng.operator}{rng.version.value}" for rng in self.version_specifier.ranges] ) return json.dumps({"package": self.package, "version_specifier": f"{scheme}:{ranges}"}) @staticmethod - def fromJson(affected_package_json): + def from_json(affected_package_json): obj = json.loads(affected_package_json) affected_package = AffectedPackage(**obj) package = PackageURL(*affected_package.package) @@ -124,7 +124,7 @@ def normalized(self): def serializable(self, o): if isinstance(o, AffectedPackage): - return o.toJson() + return o.json() if isinstance(o, Reference): return vars(o) if isinstance(o, datetime): @@ -132,18 +132,18 @@ def serializable(self, o): return json.JSONEncoder.default(self, o) - def toJson(self): + def json(self): return json.dumps(vars(self), default=self.serializable) @staticmethod - def fromJson(advisory_data_json: str): + def from_json(advisory_data_json: str): obj = json.loads(advisory_data_json) advisory_data = AdvisoryData(**obj) advisory_data.affected_packages = [ - AffectedPackage.fromJson(p) for p in advisory_data.affected_packages + AffectedPackage.from_json(p) for p in advisory_data.affected_packages ] advisory_data.fixed_packages = [ - AffectedPackage.fromJson(p) for p in advisory_data.fixed_packages + AffectedPackage.from_json(p) for p in advisory_data.fixed_packages ] advisory_data.references = [Reference(**ref) for ref in advisory_data.references] return advisory_data diff --git a/vulnerabilities/import_runner.py b/vulnerabilities/import_runner.py index 09b937633..50e93a55b 100644 --- a/vulnerabilities/import_runner.py +++ b/vulnerabilities/import_runner.py @@ -124,7 +124,7 @@ def process_advisories(source: str, advisory_data: Set[AdvisoryData]) -> None: date_published=data.date_published, date_collected=datetime.datetime.now(tz=datetime.timezone.utc), source=source, - data=data.toJson(), + data=data.json(), ) ) diff --git a/vulnerabilities/improve_runner.py b/vulnerabilities/improve_runner.py index 0ad3cc1ef..6f76d25f9 100644 --- a/vulnerabilities/improve_runner.py +++ b/vulnerabilities/improve_runner.py @@ -28,19 +28,18 @@ def __init__(self, improver): def run(self) -> None: logger.info("Improving using %s.", self.improver.__name__) + source = f"{self.improver.__module__}.{self.improver.__qualname__}" inferences = self.improver().inferences() - process_inferences(inferences) + process_inferences(source=source, inferences=inferences) logger.info("Finished improving using %s.", self.improver.__name__) @transaction.atomic -def process_inferences(inferences: Set[Inference]): +def process_inferences(source: str, inferences: Set[Inference]): bulk_create_vuln_pkg_refs = set() for inference in inferences: - advisory = inference.advisory_data - print(advisory) - vuln, vuln_created = _get_or_create_vulnerability(advisory) - for vuln_ref in advisory.references: + vuln, vuln_created = _get_or_create_vulnerability(inference.vulnerability_id, inference.summary) + for vuln_ref in inference.references: ref, _ = models.VulnerabilityReference.objects.get_or_create( vulnerability=vuln, reference_id=vuln_ref.reference_id, url=vuln_ref.url ) @@ -53,26 +52,22 @@ def process_inferences(inferences: Set[Inference]): defaults={"value": str(score.value)}, ) - for pkg in advisory.affected_packages: - aff_pkg = pkg.package - vulnerable_package, _ = _get_or_create_package(aff_pkg) - + for pkg in inference.affected_packages: + vulnerable_package, _ = _get_or_create_package(pkg) models.PackageRelatedVulnerability( package=vulnerable_package, vulnerability=vuln, - source=inference.source, + source=source, confidence=inference.confidence, fix=False, ).update_or_create() - for pkg in advisory.fixed_packages: - fixed_pkg = pkg.package - patched_package, _ = _get_or_create_package(fixed_pkg) - + for pkg in inference.fixed_packages: + patched_package, _ = _get_or_create_package(pkg) models.PackageRelatedVulnerability( package=patched_package, vulnerability=vuln, - source=inference.source, + source=source, confidence=inference.confidence, fix=True, ).update_or_create() @@ -83,22 +78,21 @@ def process_inferences(inferences: Set[Inference]): def _get_or_create_vulnerability( - advisory: AdvisoryData, + vulnerability_id, summary ) -> Tuple[models.Vulnerability, bool]: vuln, created = models.Vulnerability.objects.get_or_create( - vulnerability_id=advisory.vulnerability_id + vulnerability_id=vulnerability_id ) # nopep8 # Eventually we only want to keep summary from NVD and ignore other descriptions. - if advisory.summary and vuln.summary != advisory.summary: - vuln.summary = advisory.summary + if summary and vuln.summary != summary: + vuln.summary = summary vuln.save() return vuln, created def _get_or_create_package(p: PackageURL) -> Tuple[models.Package, bool]: - query_kwargs = {} for key, val in p.to_dict().items(): if not val: diff --git a/vulnerabilities/improvers/default.py b/vulnerabilities/improvers/default.py index 9b970fc1a..52d6deaa6 100644 --- a/vulnerabilities/improvers/default.py +++ b/vulnerabilities/improvers/default.py @@ -1,22 +1,61 @@ from typing import List +from itertools import chain + +from packageurl import PackageURL +from univers.version_range import VersionRange from vulnerabilities.data_source import AdvisoryData +from vulnerabilities.data_source import AffectedPackage from vulnerabilities.data_inference import Inference from vulnerabilities.data_inference import Improver from vulnerabilities.data_inference import MAX_CONFIDENCE from vulnerabilities.models import Advisory - class DefaultImprover(Improver): def inferences(self) -> List[Inference]: - advisories = Advisory.objects.filter( - source="vulnerabilities.importers.nginx.NginxDataSource" - ) - return [ - Inference( - advisory_data=AdvisoryData.fromJson(advisory.data), - source=advisory.source, - confidence=MAX_CONFIDENCE, + advisories = Advisory.objects.all() + + inferences = [] + + for advisory in advisories: + advisory_data = AdvisoryData.from_json(advisory.data) + + affected_packages = chain.from_iterable( + [exact_purls(pkg) for pkg in advisory_data.affected_packages] + ) + fixed_packages = chain.from_iterable( + [exact_purls(pkg) for pkg in advisory_data.fixed_packages] ) - for advisory in advisories - ] + + inferences.append( + Inference( + confidence=MAX_CONFIDENCE, + summary=advisory_data.summary, + vulnerability_id=advisory_data.vulnerability_id, + affected_packages=affected_packages, + fixed_packages=fixed_packages, + references=advisory_data.references, + ) + ) + + return inferences + + +def exact_purls(pkg: AffectedPackage) -> List[PackageURL]: + """ + Only AffectedPackages with an equality in their VersionSpecifier are + considered as exact purls. + + For eg: + AffectedPackage with version_specifier as scheme:<=2.0 is treated as + version 2 but the same with scheme:<2.0 is not considered at all as there + is no info about what comes before the supplied version + """ + vs = pkg.version_specifier + purls = [] + for rng in vs.ranges: + if "=" in rng.operator and not "!" in rng.operator: + purl = pkg.package._replace(version = rng.version.value) + purls.append(purl) + + return purls diff --git a/vulnerabilities/models.py b/vulnerabilities/models.py index 9d879da95..72fc05c58 100644 --- a/vulnerabilities/models.py +++ b/vulnerabilities/models.py @@ -325,5 +325,5 @@ class Advisory(models.Model): source = models.CharField(max_length=100) improved_on = models.DateTimeField(null=True) improved_times = models.IntegerField(default=0) - # data would contain a data_source.Advisory + # data would contain a data_source.AdvisoryData data = models.JSONField() From 43c8ab975ab9e1d5259f8ed471885327d15d8669 Mon Sep 17 00:00:00 2001 From: Hritik Vijay Date: Sun, 29 Aug 2021 16:24:15 +0530 Subject: [PATCH 06/40] Fix UI break Recent model changes break the UI as now the PackageRelatedVulnerability contains a ``fix`` flag to mark the relationship as a fix. This is leveraged to eliminate multiple columns like patched_package or vulnerable_package. Known defects (in current PR): -[x] UI break -[ ] might crash in multiple imports / improves -[ ] No improver than default improver is implemented yet -[ ] normalized function of ``AdvisoryData`` has no body -[ ] nginx importer still has remains of set_api etc -[x] Inference -> AdvisoryData encapsulation -[ ] Duplicated data in database -[ ] ??? Knows defects (to be solved in different PR): -[ ] inconsistent naming - will be resolved in a different PR -[ ] unordered imports Signed-off-by: Hritik Vijay --- vulnerabilities/models.py | 8 ++++---- vulnerabilities/views.py | 9 +++++---- 2 files changed, 9 insertions(+), 8 deletions(-) diff --git a/vulnerabilities/models.py b/vulnerabilities/models.py index 72fc05c58..e7bd4ac4f 100644 --- a/vulnerabilities/models.py +++ b/vulnerabilities/models.py @@ -89,7 +89,7 @@ def vulnerable_to(self): """ Returns packages which are vulnerable to this vulnerability. """ - return self.vulnerable_packages.all() + return self.packages.filter(vulnerabilities__packagerelatedvulnerability__fix=False) @property def resolved_to(self): @@ -97,7 +97,7 @@ def resolved_to(self): Returns packages, which first received patch against this vulnerability in their particular version history. """ - return self.patched_packages.all().distinct() + return self.packages.filter(vulnerabilities__packagerelatedvulnerability__fix=True) def __str__(self): return self.vulnerability_id or self.summary @@ -149,14 +149,14 @@ def vulnerable_to(self): """ Returns vulnerabilities which are affecting this package. """ - return self.vulnerabilities.all() + return self.vulnerabilities.filter(packagerelatedvulnerability__fix=False) @property def resolved_to(self): """ Returns the vulnerabilities which this package is patched against. """ - return self.resolved_vulnerabilities.all().distinct() + return self.vulnerabilities.filter(packagerelatedvulnerability__fix=True) class Meta: unique_together = ("name", "namespace", "type", "version", "qualifiers", "subpath") diff --git a/vulnerabilities/views.py b/vulnerabilities/views.py index 1066ab744..bff55e93c 100644 --- a/vulnerabilities/views.py +++ b/vulnerabilities/views.py @@ -24,6 +24,7 @@ from django.core.paginator import Paginator from django.db.models import Count +from django.db.models import Q from django.http import HttpResponse from django.http.response import HttpResponseNotAllowed from django.shortcuts import render, redirect @@ -74,8 +75,8 @@ def request_to_queryset(request): models.Package.objects.all() .filter(name__icontains=package_name, type__icontains=package_type) .annotate( - vulnerability_count=Count("vulnerabilities"), - patched_vulnerability_count=Count("resolved_vulnerabilities"), + vulnerability_count=Count("vulnerabilities", filter=Q(vulnerabilities__packagerelatedvulnerability__fix=False)), + patched_vulnerability_count=Count("vulnerabilities",filter=Q(vulnerabilities__packagerelatedvulnerability__fix=True)), ) .prefetch_related() ) @@ -102,8 +103,8 @@ def request_to_vulnerabilities(request): vuln_id = request.GET["vuln_id"] return list( models.Vulnerability.objects.filter(vulnerability_id__icontains=vuln_id).annotate( - vulnerable_package_count=Count("vulnerable_packages"), - patched_package_count=Count("patched_packages"), + vulnerable_package_count=Count("packages", filter=Q(packagerelatedvulnerability__fix=False)), + patched_package_count=Count("packages", filter=Q(packagerelatedvulnerability__fix=True)), ) ) From 96a8cb351b9525261c00002466410baa2add1f6f Mon Sep 17 00:00:00 2001 From: Hritik Vijay Date: Mon, 13 Sep 2021 01:38:26 +0530 Subject: [PATCH 07/40] Use to_dict, add docs, infer as function name The refactors are based on https://github.com/nexB/vulnerablecode/pull/525#pullrequestreview-745189344 - class Inference: order should be the logic order from most important to least important fields - class Improver: docs - data_source.py: Use to_dict() than json() - models: Advisory: docs - improvers/nginx.py: Removed, relevant improvers will now reside inside importer module. In this case, importers/nginx.py - black formatting Signed-off-by: Hritik Vijay --- vulnerabilities/data_inference.py | 11 ++++--- vulnerabilities/data_source.py | 41 ++++++++++++-------------- vulnerabilities/import_runner.py | 2 +- vulnerabilities/improve_runner.py | 8 ++--- vulnerabilities/improvers/default.py | 6 ++-- vulnerabilities/improvers/nginx.py | 44 ---------------------------- vulnerabilities/models.py | 23 ++++++++++----- vulnerabilities/views.py | 18 +++++++++--- 8 files changed, 64 insertions(+), 89 deletions(-) delete mode 100644 vulnerabilities/improvers/nginx.py diff --git a/vulnerabilities/data_inference.py b/vulnerabilities/data_inference.py index 6e54fcdd5..bace8dba2 100644 --- a/vulnerabilities/data_inference.py +++ b/vulnerabilities/data_inference.py @@ -30,9 +30,9 @@ class Inference: relationship is to be inserted into the database """ + vulnerability_id: str confidence: int summary: Optional[str] = None - vulnerability_id: Optional[str] = None affected_packages: List[PackageURL] = dataclasses.field(default_factory=list) fixed_packages: List[PackageURL] = dataclasses.field(default_factory=list) references: List[Reference] = dataclasses.field(default_factory=list) @@ -47,9 +47,12 @@ def __post_init__(self): class Improver: """ - All improvers should inherit this class and implement inferences method to return - new inferences for packages or vulnerabilities + All improvers must inherit this class and implement the infer method to + return new inferences for packages or vulnerabilities """ - def inferences(self) -> List[Inference]: + def infer(self) -> List[Inference]: + """ + Implement this method to generate and return Inferences + """ raise NotImplementedError diff --git a/vulnerabilities/data_source.py b/vulnerabilities/data_source.py index 52f59ef02..a124a2d0f 100644 --- a/vulnerabilities/data_source.py +++ b/vulnerabilities/data_source.py @@ -81,7 +81,7 @@ class AffectedPackage: # the version specifier contains the version scheme as is: semver:>=1,3,4 version_specifier: VersionSpecifier - def json(self): + def to_dict(self): # TODO: VersionSpecifier.__str__ is not working # https://github.com/nexB/univers/issues/7 # Adjust following code when it is fixed @@ -89,12 +89,11 @@ def json(self): ranges = ",".join( [f"{rng.operator}{rng.version.value}" for rng in self.version_specifier.ranges] ) - return json.dumps({"package": self.package, "version_specifier": f"{scheme}:{ranges}"}) + return {"package": self.package, "version_specifier": f"{scheme}:{ranges}"} @staticmethod - def from_json(affected_package_json): - obj = json.loads(affected_package_json) - affected_package = AffectedPackage(**obj) + def from_dict(affected_package_dict): + affected_package = AffectedPackage(**affected_package_dict) package = PackageURL(*affected_package.package) version_specifier = VersionSpecifier.from_version_spec_string( affected_package.version_specifier @@ -117,33 +116,29 @@ class AdvisoryData: affected_packages: List[AffectedPackage] = dataclasses.field(default_factory=list) fixed_packages: List[AffectedPackage] = dataclasses.field(default_factory=list) references: List[Reference] = dataclasses.field(default_factory=list) - date_published: Optional[str] = None + date_published: Optional[datetime.date] = None def normalized(self): ... - def serializable(self, o): - if isinstance(o, AffectedPackage): - return o.json() - if isinstance(o, Reference): - return vars(o) - if isinstance(o, datetime): - return o.isoformat() - - return json.JSONEncoder.default(self, o) - - def json(self): - return json.dumps(vars(self), default=self.serializable) + def to_dict(self): + return { + "summary": self.summary, + "vulnerability_id": self.vulnerability_id, + "affected_packages": [pkg.to_dict() for pkg in self.affected_packages], + "fixed_packages": [pkg.to_dict() for pkg in self.fixed_packages], + "references": [vars(ref) for ref in self.references], + "date_published": self.date_published.isoformat(), + } @staticmethod - def from_json(advisory_data_json: str): - obj = json.loads(advisory_data_json) - advisory_data = AdvisoryData(**obj) + def from_dict(advisory_data: dict): + advisory_data = AdvisoryData(**advisory_data) advisory_data.affected_packages = [ - AffectedPackage.from_json(p) for p in advisory_data.affected_packages + AffectedPackage.from_dict(p) for p in advisory_data.affected_packages ] advisory_data.fixed_packages = [ - AffectedPackage.from_json(p) for p in advisory_data.fixed_packages + AffectedPackage.from_dict(p) for p in advisory_data.fixed_packages ] advisory_data.references = [Reference(**ref) for ref in advisory_data.references] return advisory_data diff --git a/vulnerabilities/import_runner.py b/vulnerabilities/import_runner.py index 50e93a55b..d8a9cfd80 100644 --- a/vulnerabilities/import_runner.py +++ b/vulnerabilities/import_runner.py @@ -124,7 +124,7 @@ def process_advisories(source: str, advisory_data: Set[AdvisoryData]) -> None: date_published=data.date_published, date_collected=datetime.datetime.now(tz=datetime.timezone.utc), source=source, - data=data.json(), + data=json.dumps(data.to_dict()), ) ) diff --git a/vulnerabilities/improve_runner.py b/vulnerabilities/improve_runner.py index 6f76d25f9..717faa18b 100644 --- a/vulnerabilities/improve_runner.py +++ b/vulnerabilities/improve_runner.py @@ -38,7 +38,9 @@ def run(self) -> None: def process_inferences(source: str, inferences: Set[Inference]): bulk_create_vuln_pkg_refs = set() for inference in inferences: - vuln, vuln_created = _get_or_create_vulnerability(inference.vulnerability_id, inference.summary) + vuln, vuln_created = _get_or_create_vulnerability( + inference.vulnerability_id, inference.summary + ) for vuln_ref in inference.references: ref, _ = models.VulnerabilityReference.objects.get_or_create( vulnerability=vuln, reference_id=vuln_ref.reference_id, url=vuln_ref.url @@ -77,9 +79,7 @@ def process_inferences(source: str, inferences: Set[Inference]): ) -def _get_or_create_vulnerability( - vulnerability_id, summary -) -> Tuple[models.Vulnerability, bool]: +def _get_or_create_vulnerability(vulnerability_id, summary) -> Tuple[models.Vulnerability, bool]: vuln, created = models.Vulnerability.objects.get_or_create( vulnerability_id=vulnerability_id diff --git a/vulnerabilities/improvers/default.py b/vulnerabilities/improvers/default.py index 52d6deaa6..4eb2c41e0 100644 --- a/vulnerabilities/improvers/default.py +++ b/vulnerabilities/improvers/default.py @@ -1,3 +1,4 @@ +import json from typing import List from itertools import chain @@ -11,6 +12,7 @@ from vulnerabilities.data_inference import MAX_CONFIDENCE from vulnerabilities.models import Advisory + class DefaultImprover(Improver): def inferences(self) -> List[Inference]: advisories = Advisory.objects.all() @@ -18,7 +20,7 @@ def inferences(self) -> List[Inference]: inferences = [] for advisory in advisories: - advisory_data = AdvisoryData.from_json(advisory.data) + advisory_data = AdvisoryData.from_dict(json.loads(advisory.data)) affected_packages = chain.from_iterable( [exact_purls(pkg) for pkg in advisory_data.affected_packages] @@ -55,7 +57,7 @@ def exact_purls(pkg: AffectedPackage) -> List[PackageURL]: purls = [] for rng in vs.ranges: if "=" in rng.operator and not "!" in rng.operator: - purl = pkg.package._replace(version = rng.version.value) + purl = pkg.package._replace(version=rng.version.value) purls.append(purl) return purls diff --git a/vulnerabilities/improvers/nginx.py b/vulnerabilities/improvers/nginx.py deleted file mode 100644 index e97e30953..000000000 --- a/vulnerabilities/improvers/nginx.py +++ /dev/null @@ -1,44 +0,0 @@ -from packageurl import PackageURL - -from vulnerabilities.data_inference import Improver -from vulnerabilities.data_inference import Advisory -from vulnerabilities.data_inference import Inference -from vulnerabilities.helpers import nearest_patched_package -from vulnerabilities.models import Vulnerability -from vulnerabilities.models import Package - -class NginxTimeTravel(Improver): - def updated_inferences(self): - inferences = [] - - vulnerabilities = set(Vulnerability.objects.filter(vulnerable_packages__name="nginx")) - vulnerabilities.union(Vulnerability.objects.filter(patched_packages__name="nginx")) - - for vulnerability in vulnerabilities: - affected_packages = map(package_url, Package.objects.filter(vulnerable_package__package__name="nginx", vulnerabilities = vulnerability)) - fixed_packages = map(package_url, Package.objects.filter(patched_package__package__name="nginx", vulnerabilities = vulnerability)) - - time_traveller = nearest_patched_package(affected_packages, fixed_packages) - affected_packages = [ affected_package.vulnerable_package for affected_package in time_traveller] - fixed_packages = [ affected_package.patched_package for affected_package in time_traveller if affected_package.patched_package is not None] - - inference = Inference(advisory = Advisory( - vulnerability_id=vulnerability.vulnerability_id, - summary=vulnerability.summary, - affected_package_urls=fixed_packages, - ), source="time travel", confidence=30) - inferences.append(inference) - - return inferences - - -def package_url(package): - return PackageURL( - type=package.type, - namespace=package.namespace, - name=package.name, - version=package.version, - subpath=package.subpath, - qualifiers=package.qualifiers - ) - diff --git a/vulnerabilities/models.py b/vulnerabilities/models.py index e7bd4ac4f..8a6c4f06c 100644 --- a/vulnerabilities/models.py +++ b/vulnerabilities/models.py @@ -320,10 +320,19 @@ class Meta: class Advisory(models.Model): - date_published = models.DateField() - date_collected = models.DateField() - source = models.CharField(max_length=100) - improved_on = models.DateTimeField(null=True) - improved_times = models.IntegerField(default=0) - # data would contain a data_source.AdvisoryData - data = models.JSONField() + """ + An advisory directly obtained from upstream without any modifications. + """ + + date_published = models.DateField(help_text="Date of publication of the advisory") + date_collected = models.DateField(help_text="Date on which the advisory was collected") + source = models.CharField( + max_length=100, + help_text="Fully qualified name of the importer prefixed with the module name importing the advisory. Eg: vulnerabilities.importers.nginx.NginxDataSource", + ) + date_improved = models.DateTimeField( + null=True, help_text="Latest date on which the advisory was improved by an improver" + ) + data = models.JSONField( + help_text="Contents of data_source.AdvisoryData serialized as a JSON object" + ) diff --git a/vulnerabilities/views.py b/vulnerabilities/views.py index bff55e93c..2e241ad2a 100644 --- a/vulnerabilities/views.py +++ b/vulnerabilities/views.py @@ -75,8 +75,14 @@ def request_to_queryset(request): models.Package.objects.all() .filter(name__icontains=package_name, type__icontains=package_type) .annotate( - vulnerability_count=Count("vulnerabilities", filter=Q(vulnerabilities__packagerelatedvulnerability__fix=False)), - patched_vulnerability_count=Count("vulnerabilities",filter=Q(vulnerabilities__packagerelatedvulnerability__fix=True)), + vulnerability_count=Count( + "vulnerabilities", + filter=Q(vulnerabilities__packagerelatedvulnerability__fix=False), + ), + patched_vulnerability_count=Count( + "vulnerabilities", + filter=Q(vulnerabilities__packagerelatedvulnerability__fix=True), + ), ) .prefetch_related() ) @@ -103,8 +109,12 @@ def request_to_vulnerabilities(request): vuln_id = request.GET["vuln_id"] return list( models.Vulnerability.objects.filter(vulnerability_id__icontains=vuln_id).annotate( - vulnerable_package_count=Count("packages", filter=Q(packagerelatedvulnerability__fix=False)), - patched_package_count=Count("packages", filter=Q(packagerelatedvulnerability__fix=True)), + vulnerable_package_count=Count( + "packages", filter=Q(packagerelatedvulnerability__fix=False) + ), + patched_package_count=Count( + "packages", filter=Q(packagerelatedvulnerability__fix=True) + ), ) ) From c3f34e9381a03107a0153adbee9929aad87b9b08 Mon Sep 17 00:00:00 2001 From: Hritik Vijay Date: Mon, 13 Sep 2021 03:42:18 +0530 Subject: [PATCH 08/40] Use OSV design for AffectedPackages AffectedPackges now contains all affected versions and one fix version. This is inspired from the design documented at https://docs.google.com/document/d/1sylBGNooKtf220RHQn1I8pZRmqXZQADDQ_TOABrKTpA Under "Format Overview", along the lines of: "affects": { "ranges": [ { "type": string, "repo": string, "introduced": string, "fixed": string } ], "versions": [ string ] }, Signed-off-by: Hritik Vijay --- vulnerabilities/data_source.py | 59 ++++++++++++++++++++++++---------- 1 file changed, 42 insertions(+), 17 deletions(-) diff --git a/vulnerabilities/data_source.py b/vulnerabilities/data_source.py index a124a2d0f..ed07d76a4 100644 --- a/vulnerabilities/data_source.py +++ b/vulnerabilities/data_source.py @@ -42,6 +42,8 @@ from git import Repo, DiffIndex from packageurl import PackageURL from univers.version_specifier import VersionSpecifier +from univers.versions import BaseVersion +from univers.versions import parse_version from univers.versions import version_class_by_package_type from vulnerabilities.oval_parser import OvalParser @@ -76,29 +78,57 @@ def normalized(self): @dataclasses.dataclass(order=True, frozen=True) class AffectedPackage: - # this package MUST NOT have a version + # TODO: Tweak after https://github.com/nexB/univers/issues/8 + """ + Contains a range of affected versions and a fixed verison of a given package + The PackageURL supplied must *not* have a version + """ package: PackageURL - # the version specifier contains the version scheme as is: semver:>=1,3,4 - version_specifier: VersionSpecifier + affected_version_specifier: VersionSpecifier + fixed_version: Optional[BaseVersion] = None + + def __post_init__(self): + if self.package.version: + raise ValueError + + if ( + self.fixed_version + and self.affected_version_specifier.scheme != self.fixed_version.scheme + ): + raise ValueError def to_dict(self): # TODO: VersionSpecifier.__str__ is not working # https://github.com/nexB/univers/issues/7 # Adjust following code when it is fixed - scheme = self.version_specifier.scheme + scheme = self.affected_version_specifier.scheme ranges = ",".join( - [f"{rng.operator}{rng.version.value}" for rng in self.version_specifier.ranges] + [f"{rng.operator}{rng.version.value}" for rng in self.affected_version_specifier.ranges] ) - return {"package": self.package, "version_specifier": f"{scheme}:{ranges}"} + dct = { + "package": self.package, + "affected_version_specifier": f"{scheme}:{ranges}", + } + if self.fixed_version: + dct["fixed_version"] = str(self.fixed_version) + return dct @staticmethod - def from_dict(affected_package_dict): - affected_package = AffectedPackage(**affected_package_dict) - package = PackageURL(*affected_package.package) - version_specifier = VersionSpecifier.from_version_spec_string( - affected_package.version_specifier + def from_dict(aff_pkg: dict): + package = PackageURL(*aff_pkg["package"]) + affected_version_specifier = VersionSpecifier.from_version_spec_string( + aff_pkg["affected_version_specifier"] + ) + if "fixed_version" in aff_pkg: + fixed_version = parse_version(aff_pkg["fixed_version"]) + else: + fixed_version = None + + return AffectedPackage( + package=package, + affected_version_specifier=affected_version_specifier, + fixed_version=fixed_version, ) - return AffectedPackage(package=package, version_specifier=version_specifier) @dataclasses.dataclass(order=True) @@ -114,7 +144,6 @@ class AdvisoryData: summary: str vulnerability_id: Optional[str] = None affected_packages: List[AffectedPackage] = dataclasses.field(default_factory=list) - fixed_packages: List[AffectedPackage] = dataclasses.field(default_factory=list) references: List[Reference] = dataclasses.field(default_factory=list) date_published: Optional[datetime.date] = None @@ -126,7 +155,6 @@ def to_dict(self): "summary": self.summary, "vulnerability_id": self.vulnerability_id, "affected_packages": [pkg.to_dict() for pkg in self.affected_packages], - "fixed_packages": [pkg.to_dict() for pkg in self.fixed_packages], "references": [vars(ref) for ref in self.references], "date_published": self.date_published.isoformat(), } @@ -137,9 +165,6 @@ def from_dict(advisory_data: dict): advisory_data.affected_packages = [ AffectedPackage.from_dict(p) for p in advisory_data.affected_packages ] - advisory_data.fixed_packages = [ - AffectedPackage.from_dict(p) for p in advisory_data.fixed_packages - ] advisory_data.references = [Reference(**ref) for ref in advisory_data.references] return advisory_data From c2ef79c9d8cf448c88cea559f12b7cf806937ed9 Mon Sep 17 00:00:00 2001 From: Hritik Vijay Date: Wed, 13 Oct 2021 01:03:59 +0530 Subject: [PATCH 09/40] Modify nginx importer to adopt OSV design This is still not perfect because univers is not stable yet. The uncertainty about the structure of version_specifier needs to be resolved. As of now, there are many redundant AffectedPackage objects which would be gone after https://github.com/nexB/univers/issues/8 is fixed. Signed-off-by: Hritik Vijay --- vulnerabilities/importers/nginx.py | 49 ++++++++++++++++-------------- 1 file changed, 27 insertions(+), 22 deletions(-) diff --git a/vulnerabilities/importers/nginx.py b/vulnerabilities/importers/nginx.py index e603c9ecc..ef22d5095 100644 --- a/vulnerabilities/importers/nginx.py +++ b/vulnerabilities/importers/nginx.py @@ -109,19 +109,31 @@ def to_advisories(self, data): continue if "Not vulnerable" in child: - fixed_packages = self.extract_fixed_pkgs(child) + fixed_package_versions = self.extract_fixed_pkg_versions(child) continue if "Vulnerable" in child: - affected_packages = self.extract_vuln_pkgs(child) + aff_pkgs = self.extract_vuln_pkgs(child) continue + # TODO: Change this after https://github.com/nexB/univers/issues/8 is fixed + purl = PackageURL(type="generic", name="nginx") + affected_packages = [] + for pkg in aff_pkgs: + for fixed_version in fixed_package_versions: + affected_packages.append( + AffectedPackage( + package=purl, + affected_version_specifier=pkg.affected_version_specifier, + fixed_version=fixed_version, + ) + ) + advisory_data.append( AdvisoryData( summary=summary, vulnerability_id=cve_id, affected_packages=affected_packages, - fixed_packages=fixed_packages, references=references, date_published=datetime.now(), # TODO: put real date here ) @@ -129,33 +141,26 @@ def to_advisories(self, data): return advisory_data - def extract_fixed_pkgs(self, vuln_info): + def extract_fixed_pkg_versions(self, vuln_info): vuln_status, version_info = vuln_info.split(": ") if "none" in version_info: return {} raw_ranges = version_info.split(",") - purl = PackageURL(type="generic", name="nginx") - packages = [] + versions = [] for rng in raw_ranges: - # Eg. "1.7.3+" gets converted to VersionSpecifier.from_scheme_version_spec_string("semver","^1.7.3") - # The advisory in this case uses `+` in the sense that any version - # with greater or equal `minor` version satisfies the range. - # "1.7.4" satisifes "1.7.3+", but "1.8.4" does not. "1.7.3+" has same - # semantics as that of "^1.7.3" - - packages.append( - AffectedPackage( - package=purl, - version_specifier=VersionSpecifier.from_scheme_version_spec_string( - "semver", "^" + rng[:-1] - ), - ) - ) + # Eg. "1.7.3+" gets converted to SemVersion(1.7.3) + # The way this needs to be interpreted is unique for nginx advisories + # More: https://github.com/nexB/vulnerablecode/issues/553 - return packages + versions.append(SemverVersion(rng[:-1].strip())) + + return versions def extract_vuln_pkgs(self, vuln_info): + # TODO: This method needs to be modified accordingy after + # https://github.com/nexB/univers/issues/8 is fixed + vuln_status, version_infos = vuln_info.split(": ") if "none" in version_infos: return {} @@ -190,7 +195,7 @@ def extract_vuln_pkgs(self, vuln_info): purl = PackageURL(type="generic", name="nginx", qualifiers=qualifiers) return [ - AffectedPackage(package=purl, version_specifier=version_range) + AffectedPackage(package=purl, affected_version_specifier=version_range) for version_range in version_ranges ] From 7c1e24c258a2828307631e1b3d94988bf7acef3e Mon Sep 17 00:00:00 2001 From: Hritik Vijay Date: Thu, 23 Sep 2021 18:48:10 +0530 Subject: [PATCH 10/40] Assert Inference fields There must be an affected or fixed purls if there's no vulnerability id. Otherwise, when vulnerability id is present, any (except summery for now) could be present. It does not make sense to neither have vulnerability id nor affected or fixed purls, such relation would be meaningless. Signed-off-by: Hritik Vijay --- vulnerabilities/data_inference.py | 36 +++++++++++++++------------- vulnerabilities/improvers/default.py | 12 +++++----- 2 files changed, 25 insertions(+), 23 deletions(-) diff --git a/vulnerabilities/data_inference.py b/vulnerabilities/data_inference.py index bace8dba2..d9b185bb3 100644 --- a/vulnerabilities/data_inference.py +++ b/vulnerabilities/data_inference.py @@ -9,18 +9,8 @@ logger = logging.getLogger(__name__) - -class OverConfidenceError(ValueError): - pass - - -class UnderConfidenceError(ValueError): - pass - - MAX_CONFIDENCE = 100 - @dataclasses.dataclass(order=True) class Inference: """ @@ -33,16 +23,28 @@ class Inference: vulnerability_id: str confidence: int summary: Optional[str] = None - affected_packages: List[PackageURL] = dataclasses.field(default_factory=list) - fixed_packages: List[PackageURL] = dataclasses.field(default_factory=list) + affected_purls: List[PackageURL] = dataclasses.field(default_factory=list) + fixed_purls: List[PackageURL] = dataclasses.field(default_factory=list) references: List[Reference] = dataclasses.field(default_factory=list) def __post_init__(self): - if self.confidence > MAX_CONFIDENCE: - raise OverConfidenceError - - if self.confidence < 0: - raise UnderConfidenceError + if self.confidence > MAX_CONFIDENCE or self.confidence < 0: + raise ValueError + + if self.vulnerability_id: + assert self.summary or self.affected_purls or self.fixed_purls or self.references + else: + # TODO: Maybe only having summary + assert self.affected_purls or self.fixed_purls or self.references + + versionless_purls = [] + for purl in self.affected_purls + self.fixed_purls: + if not purl.version: + versionless_purls.append(purl) + + assert ( + not versionless_purls + ), f"Version-less purls are not supported in an Inference: {versionless_purls}" class Improver: diff --git a/vulnerabilities/improvers/default.py b/vulnerabilities/improvers/default.py index 4eb2c41e0..018ae5951 100644 --- a/vulnerabilities/improvers/default.py +++ b/vulnerabilities/improvers/default.py @@ -22,20 +22,20 @@ def inferences(self) -> List[Inference]: for advisory in advisories: advisory_data = AdvisoryData.from_dict(json.loads(advisory.data)) - affected_packages = chain.from_iterable( + affected_purls = chain.from_iterable( [exact_purls(pkg) for pkg in advisory_data.affected_packages] ) - fixed_packages = chain.from_iterable( + fixed_purls = chain.from_iterable( [exact_purls(pkg) for pkg in advisory_data.fixed_packages] ) inferences.append( Inference( + vulnerability_id=advisory_data.vulnerability_id, confidence=MAX_CONFIDENCE, summary=advisory_data.summary, - vulnerability_id=advisory_data.vulnerability_id, - affected_packages=affected_packages, - fixed_packages=fixed_packages, + affected_purls=affected_purls, + fixed_purls=fixed_purls, references=advisory_data.references, ) ) @@ -50,7 +50,7 @@ def exact_purls(pkg: AffectedPackage) -> List[PackageURL]: For eg: AffectedPackage with version_specifier as scheme:<=2.0 is treated as - version 2 but the same with scheme:<2.0 is not considered at all as there + version 2.0 but the same with scheme:<2.0 is not considered at all as there is no info about what comes before the supplied version """ vs = pkg.version_specifier From 31c1054dbf42ce674203d1e1b73f61dd330781b8 Mon Sep 17 00:00:00 2001 From: Hritik Vijay Date: Mon, 11 Oct 2021 18:41:51 +0530 Subject: [PATCH 11/40] Process one advisory in one transaction Updated way: - Each Improver has a method to process a single Advisory model instance such as Improver.get_inferences(self, advisory): -> Inference - The framework then iterates on an Improver-provided QuerySet such as Improver.interesting_advisories that has the Advisories it is interested in. - In the framework, there is an atomic transaction that updates both the Advisory (e.g. date and later a log of improvements with select for update) and whatever is updated or create from the Inference This avoids failing the entire improver when only a single inference is erroneous. Also, the atomic transaction for every advisory and its inferences makes sure that improved_on date of advisory is consistent. Signed-off-by: Hritik Vijay --- vulnerabilities/data_inference.py | 14 +++++++-- vulnerabilities/improve_runner.py | 33 +++++++++++++-------- vulnerabilities/improvers/__init__.py | 1 - vulnerabilities/improvers/default.py | 41 +++++++++++++-------------- vulnerabilities/models.py | 4 +-- 5 files changed, 54 insertions(+), 39 deletions(-) diff --git a/vulnerabilities/data_inference.py b/vulnerabilities/data_inference.py index d9b185bb3..257490833 100644 --- a/vulnerabilities/data_inference.py +++ b/vulnerabilities/data_inference.py @@ -4,13 +4,16 @@ from typing import Optional from packageurl import PackageURL +from django.db.models.query import QuerySet from vulnerabilities.data_source import Reference +from vulnerabilities.data_source import AdvisoryData logger = logging.getLogger(__name__) MAX_CONFIDENCE = 100 + @dataclasses.dataclass(order=True) class Inference: """ @@ -53,8 +56,15 @@ class Improver: return new inferences for packages or vulnerabilities """ - def infer(self) -> List[Inference]: + @property + def interesting_advisories(self) -> QuerySet: + """ + Return QuerySet for the advisories this improver is interested in + """ + raise NotImplementedError + + def get_inferences(self, advisory_data: AdvisoryData) -> List[Inference]: """ - Implement this method to generate and return Inferences + Generate and return Inferences for the given advisory data """ raise NotImplementedError diff --git a/vulnerabilities/improve_runner.py b/vulnerabilities/improve_runner.py index 717faa18b..937809dd4 100644 --- a/vulnerabilities/improve_runner.py +++ b/vulnerabilities/improve_runner.py @@ -1,8 +1,9 @@ from datetime import datetime -import dataclasses +from datetime import timezone +import json import logging from typing import Tuple -from typing import Set +from typing import List from django.db import transaction @@ -10,6 +11,7 @@ from vulnerabilities.data_source import PackageURL from vulnerabilities.data_source import AdvisoryData from vulnerabilities.data_inference import Inference +from vulnerabilities.models import Advisory logger = logging.getLogger(__name__) @@ -29,14 +31,25 @@ def __init__(self, improver): def run(self) -> None: logger.info("Improving using %s.", self.improver.__name__) source = f"{self.improver.__module__}.{self.improver.__qualname__}" - inferences = self.improver().inferences() - process_inferences(source=source, inferences=inferences) + improver = self.improver() + for advisory in improver.interesting_advisories: + inferences = improver.get_inferences( + advisory_data=AdvisoryData.from_dict(json.loads(advisory.data)) + ) + process_inferences(source=source, advisory=advisory, inferences=inferences) logger.info("Finished improving using %s.", self.improver.__name__) @transaction.atomic -def process_inferences(source: str, inferences: Set[Inference]): - bulk_create_vuln_pkg_refs = set() +def process_inferences(source: str, advisory: Advisory, inferences: List[Inference]): + + if not inferences: + logger.debug("Nothing to improve") + return + + advisory.date_improved = datetime.now(timezone.utc) + advisory.save() + for inference in inferences: vuln, vuln_created = _get_or_create_vulnerability( inference.vulnerability_id, inference.summary @@ -54,7 +67,7 @@ def process_inferences(source: str, inferences: Set[Inference]): defaults={"value": str(score.value)}, ) - for pkg in inference.affected_packages: + for pkg in inference.affected_purls: vulnerable_package, _ = _get_or_create_package(pkg) models.PackageRelatedVulnerability( package=vulnerable_package, @@ -64,7 +77,7 @@ def process_inferences(source: str, inferences: Set[Inference]): fix=False, ).update_or_create() - for pkg in inference.fixed_packages: + for pkg in inference.fixed_purls: patched_package, _ = _get_or_create_package(pkg) models.PackageRelatedVulnerability( package=patched_package, @@ -74,10 +87,6 @@ def process_inferences(source: str, inferences: Set[Inference]): fix=True, ).update_or_create() - models.PackageRelatedVulnerability.objects.bulk_create( - [i.to_model_object() for i in bulk_create_vuln_pkg_refs] - ) - def _get_or_create_vulnerability(vulnerability_id, summary) -> Tuple[models.Vulnerability, bool]: diff --git a/vulnerabilities/improvers/__init__.py b/vulnerabilities/improvers/__init__.py index 4350f8612..a2ad6e35e 100644 --- a/vulnerabilities/improvers/__init__.py +++ b/vulnerabilities/improvers/__init__.py @@ -1,4 +1,3 @@ -from . import nginx from . import default IMPROVER_REGISTRY = [default.DefaultImprover] diff --git a/vulnerabilities/improvers/default.py b/vulnerabilities/improvers/default.py index 018ae5951..8c8f58d40 100644 --- a/vulnerabilities/improvers/default.py +++ b/vulnerabilities/improvers/default.py @@ -3,7 +3,7 @@ from itertools import chain from packageurl import PackageURL -from univers.version_range import VersionRange +from django.db.models.query import QuerySet from vulnerabilities.data_source import AdvisoryData from vulnerabilities.data_source import AffectedPackage @@ -14,36 +14,28 @@ class DefaultImprover(Improver): - def inferences(self) -> List[Inference]: - advisories = Advisory.objects.all() + @property + def interesting_advisories(self) -> QuerySet: + return Advisory.objects.all() + def get_inferences(self, advisory_data: AdvisoryData) -> List[Inference]: inferences = [] - - for advisory in advisories: - advisory_data = AdvisoryData.from_dict(json.loads(advisory.data)) - - affected_purls = chain.from_iterable( - [exact_purls(pkg) for pkg in advisory_data.affected_packages] - ) - fixed_purls = chain.from_iterable( - [exact_purls(pkg) for pkg in advisory_data.fixed_packages] - ) - + for aff_pkg in advisory_data.affected_packages: + affected_purls, fixed_purl = exact_purls(aff_pkg) inferences.append( Inference( vulnerability_id=advisory_data.vulnerability_id, confidence=MAX_CONFIDENCE, summary=advisory_data.summary, affected_purls=affected_purls, - fixed_purls=fixed_purls, + fixed_purls=[fixed_purl], references=advisory_data.references, ) ) - return inferences -def exact_purls(pkg: AffectedPackage) -> List[PackageURL]: +def exact_purls(aff_pkg: AffectedPackage) -> (List[PackageURL], PackageURL): """ Only AffectedPackages with an equality in their VersionSpecifier are considered as exact purls. @@ -52,12 +44,17 @@ def exact_purls(pkg: AffectedPackage) -> List[PackageURL]: AffectedPackage with version_specifier as scheme:<=2.0 is treated as version 2.0 but the same with scheme:<2.0 is not considered at all as there is no info about what comes before the supplied version + + Return a list of affected PackageURL and corresponding fixed PackageURL """ - vs = pkg.version_specifier - purls = [] + vs = aff_pkg.affected_version_specifier + aff_purls = [] for rng in vs.ranges: if "=" in rng.operator and not "!" in rng.operator: - purl = pkg.package._replace(version=rng.version.value) - purls.append(purl) + aff_purl = aff_pkg.package._replace(version=rng.version.value) + aff_purls.append(aff_purl) + + fixed_version = aff_pkg.fixed_version.version_string + fixed_purl = aff_pkg.package._replace(version=fixed_version) - return purls + return aff_purls, fixed_purl diff --git a/vulnerabilities/models.py b/vulnerabilities/models.py index 8a6c4f06c..6b2857c86 100644 --- a/vulnerabilities/models.py +++ b/vulnerabilities/models.py @@ -324,8 +324,8 @@ class Advisory(models.Model): An advisory directly obtained from upstream without any modifications. """ - date_published = models.DateField(help_text="Date of publication of the advisory") - date_collected = models.DateField(help_text="Date on which the advisory was collected") + date_published = models.DateField(help_text="UTC Date of publication of the advisory") + date_collected = models.DateField(help_text="UTC Date on which the advisory was collected") source = models.CharField( max_length=100, help_text="Fully qualified name of the importer prefixed with the module name importing the advisory. Eg: vulnerabilities.importers.nginx.NginxDataSource", From 2694c4ed97f9ea855db6f69cc946d09f5525873e Mon Sep 17 00:00:00 2001 From: Hritik Vijay Date: Wed, 13 Oct 2021 02:40:58 +0530 Subject: [PATCH 12/40] Improve docs and cleanup code Git rid of extraneous methods with empty body and commented code. Now as AdvisoryData has all the parameters optional, make sure there's a __post_init__ to assert the constraints. Signed-off-by: Hritik Vijay --- vulnerabilities/data_inference.py | 6 +- vulnerabilities/data_source.py | 155 +++++++++++++-------------- vulnerabilities/import_runner.py | 20 ---- vulnerabilities/improve_runner.py | 8 +- vulnerabilities/improvers/default.py | 6 ++ 5 files changed, 88 insertions(+), 107 deletions(-) diff --git a/vulnerabilities/data_inference.py b/vulnerabilities/data_inference.py index 257490833..d41175c11 100644 --- a/vulnerabilities/data_inference.py +++ b/vulnerabilities/data_inference.py @@ -52,8 +52,10 @@ def __post_init__(self): class Improver: """ - All improvers must inherit this class and implement the infer method to - return new inferences for packages or vulnerabilities + Improvers are responsible to improve the already imported data by a datasource. + Inferences regarding the data could be generated based on multiple factors. + All the inferences consist of a confidence score whose threshold could be tuned in user + settings (.env file) """ @property diff --git a/vulnerabilities/data_source.py b/vulnerabilities/data_source.py index ed07d76a4..7f42ea545 100644 --- a/vulnerabilities/data_source.py +++ b/vulnerabilities/data_source.py @@ -19,16 +19,13 @@ # for any legal advice. # VulnerableCode is a free software code scanning tool from nexB Inc. and others. # Visit https://github.com/nexB/vulnerablecode/ for support and download. - import dataclasses -import json import logging import os import shutil import tempfile import traceback import xml.etree.ElementTree as ET -from binaryornot.helpers import is_binary_string from datetime import datetime from pathlib import Path from typing import Any @@ -39,13 +36,16 @@ from typing import Optional from typing import Set from typing import Tuple -from git import Repo, DiffIndex + +from binaryornot.helpers import is_binary_string +from git import DiffIndex +from git import Repo from packageurl import PackageURL from univers.version_specifier import VersionSpecifier from univers.versions import BaseVersion from univers.versions import parse_version from univers.versions import version_class_by_package_type - +from vulnerabilities.helpers import nearest_patched_package from vulnerabilities.oval_parser import OvalParser from vulnerabilities.severity_systems import ScoringSystem from vulnerabilities.helpers import is_cve @@ -141,22 +141,25 @@ class AdvisoryData: variable names, etc. as "package_urls" and the latter as "purls". """ - summary: str vulnerability_id: Optional[str] = None + summary: str = None affected_packages: List[AffectedPackage] = dataclasses.field(default_factory=list) references: List[Reference] = dataclasses.field(default_factory=list) date_published: Optional[datetime.date] = None - def normalized(self): - ... + def __post_init__(self): + if self.vulnerability_id: + assert self.summary or self.affected_packages or self.references + else: + assert self.affected_packages or self.references def to_dict(self): return { - "summary": self.summary, "vulnerability_id": self.vulnerability_id, + "summary": self.summary, "affected_packages": [pkg.to_dict() for pkg in self.affected_packages], "references": [vars(ref) for ref in self.references], - "date_published": self.date_published.isoformat(), + "date_published": self.date_published.isoformat() if self.date_published else None, } @staticmethod @@ -169,11 +172,6 @@ def from_dict(advisory_data: dict): return advisory_data -class Advisory: - # TODO: Get rid of this after migration - ... - - class InvalidConfigurationError(Exception): pass @@ -252,7 +250,6 @@ def validate_configuration(self) -> None: This method is called in the constructor. It should raise InvalidConfigurationError with a human-readable message. """ - pass def advisory_data(self) -> Set[AdvisoryData]: """ @@ -546,67 +543,65 @@ def get_data_from_xml_doc(self, xml_doc: ET.ElementTree, pkg_metadata={}) -> Lis {"type":"deb","qualifiers":{"distro":"buster"} } """ - # TODO: Make this compatible to new model - - # all_adv = [] - # oval_doc = OvalParser(self.translations, xml_doc) - # raw_data = oval_doc.get_data() - # all_pkgs = self._collect_pkgs(raw_data) - # self.set_api(all_pkgs) - - # # convert definition_data to Advisory objects - # for definition_data in raw_data: - # # These fields are definition level, i.e common for all elements - # # connected/linked to an OvalDefinition - # vuln_id = definition_data["vuln_id"] - # description = definition_data["description"] - # references = [Reference(url=url) for url in definition_data["reference_urls"]] - # affected_packages = [] - # for test_data in definition_data["test_data"]: - # for package_name in test_data["package_list"]: - # if package_name and len(package_name) >= 50: - # continue - - # affected_version_range = test_data["version_ranges"] or set() - # version_class = version_class_by_package_type[pkg_metadata["type"]] - # version_scheme = version_class.scheme - - # affected_version_range = VersionSpecifier.from_scheme_version_spec_string( - # version_scheme, affected_version_range - # ) - # all_versions = self.pkg_manager_api.get(package_name).valid_versions - - # # FIXME: what is this 50 DB limit? that's too small for versions - # # FIXME: we should not drop data this way - # # This filter is for filtering out long versions. - # # 50 is limit because that's what db permits atm. - # all_versions = [version for version in all_versions if len(version) < 50] - # if not all_versions: - # continue - - # affected_purls = [] - # safe_purls = [] - # for version in all_versions: - # purl = self.create_purl( - # pkg_name=package_name, - # pkg_version=version, - # pkg_data=pkg_metadata, - # ) - # if version_class(version) in affected_version_range: - # affected_purls.append(purl) - # else: - # safe_purls.append(purl) - - # affected_packages.extend( - # nearest_patched_package(affected_purls, safe_purls), - # ) - - # all_adv.append( - # Advisory( - # summary=description, - # affected_packages=affected_packages, - # vulnerability_id=vuln_id, - # references=references, - # ) - # ) - # return all_adv + all_adv = [] + oval_doc = OvalParser(self.translations, xml_doc) + raw_data = oval_doc.get_data() + all_pkgs = self._collect_pkgs(raw_data) + self.set_api(all_pkgs) + + # convert definition_data to Advisory objects + for definition_data in raw_data: + # These fields are definition level, i.e common for all elements + # connected/linked to an OvalDefinition + vuln_id = definition_data["vuln_id"] + description = definition_data["description"] + references = [Reference(url=url) for url in definition_data["reference_urls"]] + affected_packages = [] + for test_data in definition_data["test_data"]: + for package_name in test_data["package_list"]: + if package_name and len(package_name) >= 50: + continue + + affected_version_range = test_data["version_ranges"] or set() + version_class = version_class_by_package_type[pkg_metadata["type"]] + version_scheme = version_class.scheme + + affected_version_range = VersionSpecifier.from_scheme_version_spec_string( + version_scheme, affected_version_range + ) + all_versions = self.pkg_manager_api.get(package_name).valid_versions + + # FIXME: what is this 50 DB limit? that's too small for versions + # FIXME: we should not drop data this way + # This filter is for filtering out long versions. + # 50 is limit because that's what db permits atm. + all_versions = [version for version in all_versions if len(version) < 50] + if not all_versions: + continue + + affected_purls = [] + safe_purls = [] + for version in all_versions: + purl = self.create_purl( + pkg_name=package_name, + pkg_version=version, + pkg_data=pkg_metadata, + ) + if version_class(version) in affected_version_range: + affected_purls.append(purl) + else: + safe_purls.append(purl) + + affected_packages.extend( + nearest_patched_package(affected_purls, safe_purls), + ) + + all_adv.append( + Advisory( + summary=description, + affected_packages=affected_packages, + vulnerability_id=vuln_id, + references=references, + ) + ) + return all_adv diff --git a/vulnerabilities/import_runner.py b/vulnerabilities/import_runner.py index d8a9cfd80..f4a1c1832 100644 --- a/vulnerabilities/import_runner.py +++ b/vulnerabilities/import_runner.py @@ -25,35 +25,15 @@ import datetime import json import logging -from typing import Tuple from typing import Set from vulnerabilities import models from vulnerabilities.models import Advisory from vulnerabilities.data_source import AdvisoryData -from vulnerabilities.data_source import PackageURL -from vulnerabilities.data_inference import Inference -from vulnerabilities.data_inference import MAX_CONFIDENCE -from vulnerabilities.improve_runner import process_inferences logger = logging.getLogger(__name__) -# This *Inserter class is used to instantiate model objects. -# Frozen dataclass store args required to store instantiate -# model objects, this way model objects can be hashed indirectly which -# is required in this implementation. - - -@dataclasses.dataclass(frozen=True) -class PackageRelatedVulnerabilityInserter: - vulnerability: models.Vulnerability - is_vulnerable: bool - package: models.Package - - def to_model_object(self): - return models.PackageRelatedVulnerability(**dataclasses.asdict(self)) - class ImportRunner: """ diff --git a/vulnerabilities/improve_runner.py b/vulnerabilities/improve_runner.py index 937809dd4..861466847 100644 --- a/vulnerabilities/improve_runner.py +++ b/vulnerabilities/improve_runner.py @@ -18,11 +18,9 @@ class ImproveRunner: - """ - The ImproveRunner is responsible to improve the already imported data by a datasource. - Inferences regarding the data could be generated based on multiple factors. - All the inferences consist of a confidence score whose threshold could be tuned in user - settings (.env file) + """ImproveRunner is responsible for populating the database with any + consumable data. It does so in its ``run`` method by invoking the given + improver and parsing the returned Inferences into proper database fields """ def __init__(self, improver): diff --git a/vulnerabilities/improvers/default.py b/vulnerabilities/improvers/default.py index 8c8f58d40..603d90d52 100644 --- a/vulnerabilities/improvers/default.py +++ b/vulnerabilities/improvers/default.py @@ -14,6 +14,12 @@ class DefaultImprover(Improver): + """ + This is the first step after running any importer. The inferences generated + are only a translation of Advisory data returned by the importers into + full confidence inferences + """ + @property def interesting_advisories(self) -> QuerySet: return Advisory.objects.all() From c60e030bb33bae906a351fba7368014bc15696e4 Mon Sep 17 00:00:00 2001 From: Hritik Vijay Date: Mon, 18 Oct 2021 20:32:08 +0530 Subject: [PATCH 13/40] Hotfix parse_version from univers https://github.com/nexB/univers/issues/10 - parse_version() should accept None and empty strings Signed-off-by: Hritik Vijay --- vulnerabilities/data_source.py | 12 ++++-------- vulnerabilities/helpers.py | 10 ++++++++++ 2 files changed, 14 insertions(+), 8 deletions(-) diff --git a/vulnerabilities/data_source.py b/vulnerabilities/data_source.py index 7f42ea545..6167e0ec3 100644 --- a/vulnerabilities/data_source.py +++ b/vulnerabilities/data_source.py @@ -43,13 +43,13 @@ from packageurl import PackageURL from univers.version_specifier import VersionSpecifier from univers.versions import BaseVersion -from univers.versions import parse_version from univers.versions import version_class_by_package_type from vulnerabilities.helpers import nearest_patched_package from vulnerabilities.oval_parser import OvalParser from vulnerabilities.severity_systems import ScoringSystem -from vulnerabilities.helpers import is_cve -from vulnerabilities.helpers import nearest_patched_package + +# TODO: remove after https://github.com/nexB/univers/issues/10 is fixed +from vulnerabilities.helpers import parse_version logger = logging.getLogger(__name__) @@ -119,15 +119,11 @@ def from_dict(aff_pkg: dict): affected_version_specifier = VersionSpecifier.from_version_spec_string( aff_pkg["affected_version_specifier"] ) - if "fixed_version" in aff_pkg: - fixed_version = parse_version(aff_pkg["fixed_version"]) - else: - fixed_version = None return AffectedPackage( package=package, affected_version_specifier=affected_version_specifier, - fixed_version=fixed_version, + fixed_version=parse_version(aff_pkg["fixed_version"]), ) diff --git a/vulnerabilities/helpers.py b/vulnerabilities/helpers.py index 95dc2d801..da515c40d 100644 --- a/vulnerabilities/helpers.py +++ b/vulnerabilities/helpers.py @@ -191,3 +191,13 @@ def split_markdown_front_matter(text: str) -> Tuple[str, str]: return frontmatter, markdown return "", text + + +# TODO: Remove after https://github.com/nexB/univers/issues/10 +from univers.versions import parse_version as univers_parse_version + + +def parse_version(version): + if not version: + return None + return univers_parse_version(version) From fd0f909c7bb28427ed9bec9ff7cd42a17a8ddc34 Mon Sep 17 00:00:00 2001 From: Hritik Vijay Date: Sat, 23 Oct 2021 03:35:09 +0530 Subject: [PATCH 14/40] Disable failing importers, add docs and refactor This is based on PR review done at https://github.com/nexB/vulnerablecode/pull/525#pullrequestreview-779776745 Majorly removes commented code, adds some doctest Signed-off-by: Hritik Vijay --- vulnerabilities/data_inference.py | 2 - vulnerabilities/data_source.py | 6 +- vulnerabilities/importers/__init__.py | 51 ++++++------ vulnerabilities/importers/nginx.py | 114 ++++++++++++++------------ vulnerabilities/improve_runner.py | 11 ++- 5 files changed, 99 insertions(+), 85 deletions(-) diff --git a/vulnerabilities/data_inference.py b/vulnerabilities/data_inference.py index d41175c11..5c33f153b 100644 --- a/vulnerabilities/data_inference.py +++ b/vulnerabilities/data_inference.py @@ -54,8 +54,6 @@ class Improver: """ Improvers are responsible to improve the already imported data by a datasource. Inferences regarding the data could be generated based on multiple factors. - All the inferences consist of a confidence score whose threshold could be tuned in user - settings (.env file) """ @property diff --git a/vulnerabilities/data_source.py b/vulnerabilities/data_source.py index 6167e0ec3..48c25ab82 100644 --- a/vulnerabilities/data_source.py +++ b/vulnerabilities/data_source.py @@ -105,13 +105,11 @@ def to_dict(self): ranges = ",".join( [f"{rng.operator}{rng.version.value}" for rng in self.affected_version_specifier.ranges] ) - dct = { + return { "package": self.package, "affected_version_specifier": f"{scheme}:{ranges}", + "fixed_version": str(self.fixed_version) if self.fixed_version else None, } - if self.fixed_version: - dct["fixed_version"] = str(self.fixed_version) - return dct @staticmethod def from_dict(aff_pkg: dict): diff --git a/vulnerabilities/importers/__init__.py b/vulnerabilities/importers/__init__.py index f7387df61..6d44dab21 100644 --- a/vulnerabilities/importers/__init__.py +++ b/vulnerabilities/importers/__init__.py @@ -21,31 +21,32 @@ # Visit https://github.com/nexB/vulnerablecode/ for support and download. -from vulnerabilities.importers.alpine_linux import AlpineDataSource -from vulnerabilities.importers.apache_httpd import ApacheHTTPDDataSource -from vulnerabilities.importers.apache_kafka import ApacheKafkaDataSource -from vulnerabilities.importers.apache_tomcat import ApacheTomcatDataSource -from vulnerabilities.importers.archlinux import ArchlinuxDataSource -from vulnerabilities.importers.debian import DebianDataSource -from vulnerabilities.importers.debian_oval import DebianOvalDataSource -from vulnerabilities.importers.elixir_security import ElixirSecurityDataSource -from vulnerabilities.importers.gentoo import GentooDataSource -from vulnerabilities.importers.github import GitHubAPIDataSource -from vulnerabilities.importers.kaybee import KaybeeDataSource +# from vulnerabilities.importers.alpine_linux import AlpineDataSource +# from vulnerabilities.importers.apache_httpd import ApacheHTTPDDataSource +# from vulnerabilities.importers.apache_kafka import ApacheKafkaDataSource +# from vulnerabilities.importers.apache_tomcat import ApacheTomcatDataSource +# from vulnerabilities.importers.archlinux import ArchlinuxDataSource +# from vulnerabilities.importers.debian import DebianDataSource +# from vulnerabilities.importers.debian_oval import DebianOvalDataSource +# from vulnerabilities.importers.elixir_security import ElixirSecurityDataSource +# from vulnerabilities.importers.gentoo import GentooDataSource +# from vulnerabilities.importers.github import GitHubAPIDataSource +# from vulnerabilities.importers.kaybee import KaybeeDataSource from vulnerabilities.importers.nginx import NginxDataSource -from vulnerabilities.importers.npm import NpmDataSource -from vulnerabilities.importers.nvd import NVDDataSource -from vulnerabilities.importers.openssl import OpenSSLDataSource -from vulnerabilities.importers.postgresql import PostgreSQLDataSource -from vulnerabilities.importers.project_kb_msr2019 import ProjectKBMSRDataSource -from vulnerabilities.importers.redhat import RedhatDataSource -from vulnerabilities.importers.retiredotnet import RetireDotnetDataSource -from vulnerabilities.importers.ruby import RubyDataSource -from vulnerabilities.importers.rust import RustDataSource -from vulnerabilities.importers.safety_db import SafetyDbDataSource -from vulnerabilities.importers.suse_scores import SUSESeverityScoreDataSource -from vulnerabilities.importers.ubuntu import UbuntuDataSource -from vulnerabilities.importers.ubuntu_usn import UbuntuUSNDataSource -from vulnerabilities.importers.istio import IstioDataSource + +# from vulnerabilities.importers.npm import NpmDataSource +# from vulnerabilities.importers.nvd import NVDDataSource +# from vulnerabilities.importers.openssl import OpenSSLDataSource +# from vulnerabilities.importers.postgresql import PostgreSQLDataSource +# from vulnerabilities.importers.project_kb_msr2019 import ProjectKBMSRDataSource +# from vulnerabilities.importers.redhat import RedhatDataSource +# from vulnerabilities.importers.retiredotnet import RetireDotnetDataSource +# from vulnerabilities.importers.ruby import RubyDataSource +# from vulnerabilities.importers.rust import RustDataSource +# from vulnerabilities.importers.safety_db import SafetyDbDataSource +# from vulnerabilities.importers.suse_scores import SUSESeverityScoreDataSource +# from vulnerabilities.importers.ubuntu import UbuntuDataSource +# from vulnerabilities.importers.ubuntu_usn import UbuntuUSNDataSource +# from vulnerabilities.importers.istio import IstioDataSource # from vulnerabilities.importers.suse_backports import SUSEBackportsDataSource diff --git a/vulnerabilities/importers/nginx.py b/vulnerabilities/importers/nginx.py index ef22d5095..8747172a3 100644 --- a/vulnerabilities/importers/nginx.py +++ b/vulnerabilities/importers/nginx.py @@ -109,11 +109,11 @@ def to_advisories(self, data): continue if "Not vulnerable" in child: - fixed_package_versions = self.extract_fixed_pkg_versions(child) + fixed_package_versions = extract_fixed_pkg_versions(child) continue if "Vulnerable" in child: - aff_pkgs = self.extract_vuln_pkgs(child) + aff_pkgs = extract_vuln_pkgs(child) continue # TODO: Change this after https://github.com/nexB/univers/issues/8 is fixed @@ -141,63 +141,71 @@ def to_advisories(self, data): return advisory_data - def extract_fixed_pkg_versions(self, vuln_info): - vuln_status, version_info = vuln_info.split(": ") - if "none" in version_info: - return {} - - raw_ranges = version_info.split(",") - versions = [] - for rng in raw_ranges: - # Eg. "1.7.3+" gets converted to SemVersion(1.7.3) - # The way this needs to be interpreted is unique for nginx advisories - # More: https://github.com/nexB/vulnerablecode/issues/553 - - versions.append(SemverVersion(rng[:-1].strip())) - - return versions - - def extract_vuln_pkgs(self, vuln_info): - # TODO: This method needs to be modified accordingy after - # https://github.com/nexB/univers/issues/8 is fixed - - vuln_status, version_infos = vuln_info.split(": ") - if "none" in version_infos: - return {} - - version_ranges = [] - windows_only = False - for version_info in version_infos.split(", "): - if version_info == "all": - # This is misleading since eventually some version get fixed. - continue - - if "-" not in version_info: - # These are discrete versions - version_ranges.append( - VersionSpecifier.from_scheme_version_spec_string("semver", version_info[0]) - ) - continue - windows_only = "nginx/Windows" in version_info - version_info = version_info.replace("nginx/Windows", "") - lower_bound, upper_bound = version_info.split("-") +def extract_fixed_pkg_versions(vuln_info): + vuln_status, version_info = vuln_info.split(": ") + if "none" in version_info: + return {} + + raw_ranges = version_info.split(",") + versions = [] + for rng in raw_ranges: + # Eg. "1.7.3+" gets converted to SemVersion(1.7.3) + # The way this needs to be interpreted is unique for nginx advisories + # More: https://github.com/nexB/vulnerablecode/issues/553 + + versions.append(SemverVersion(rng.partition("+")[0].strip())) + + return versions + + +def extract_vuln_pkgs(vuln_info) -> List[AffectedPackage]: + """ + >>> vuln_info = "Vulnerable: nginx/Windows 0.7.52-1.3.0" + >>> vuln_info = "Vulnerable: 1.1.3-1.15.5, 1.0.7-1.0.15" + >>> vuln_info = "Vulnerable: 0.5.6-1.13.2" + >>> vuln_info = "Vulnerable: all" + """ + # TODO: This method needs to be modified accordingy after + # https://github.com/nexB/univers/issues/8 is fixed + vuln_status, version_infos = vuln_info.split(": ") + if "none" in version_infos: + return {} + + version_ranges = [] + windows_only = False + for version_info in version_infos.split(", "): + if version_info == "all": + # This is misleading since eventually some version get fixed. + continue + + if "-" not in version_info: + # These are discrete versions version_ranges.append( - VersionSpecifier.from_scheme_version_spec_string( - "semver", f">={lower_bound},<={upper_bound}" - ) + VersionSpecifier.from_scheme_version_spec_string("semver", version_info[0]) + ) + continue + + windows_only = "nginx/Windows" in version_info + version_info = version_info.replace("nginx/Windows", "") + lower_bound, upper_bound = version_info.split("-") + + version_ranges.append( + VersionSpecifier.from_scheme_version_spec_string( + "semver", f">={lower_bound},<={upper_bound}" ) + ) - qualifiers = {} - if windows_only: - qualifiers["os"] = "windows" + qualifiers = {} + if windows_only: + qualifiers["os"] = "windows" - purl = PackageURL(type="generic", name="nginx", qualifiers=qualifiers) - return [ - AffectedPackage(package=purl, affected_version_specifier=version_range) - for version_range in version_ranges - ] + purl = PackageURL(type="generic", name="nginx", qualifiers=qualifiers) + return [ + AffectedPackage(package=purl, affected_version_specifier=version_range) + for version_range in version_ranges + ] def find_valid_versions(versions, version_ranges): diff --git a/vulnerabilities/improve_runner.py b/vulnerabilities/improve_runner.py index 861466847..e50aa6fce 100644 --- a/vulnerabilities/improve_runner.py +++ b/vulnerabilities/improve_runner.py @@ -40,9 +40,18 @@ def run(self) -> None: @transaction.atomic def process_inferences(source: str, advisory: Advisory, inferences: List[Inference]): + """ + An atomic transaction that updates both the Advisory (e.g. date_improved) + and processes the given inferences to create or update corresponding + database fields. + + This avoids failing the entire improver when only a single inference is + erroneous. Also, the atomic transaction for every advisory and its + inferences makes sure that date_improved of advisory is consistent. + """ if not inferences: - logger.debug("Nothing to improve") + logger.warn(f"Nothing to improve. Source: {source} Advisory id: {advisory.pk}") return advisory.date_improved = datetime.now(timezone.utc) From be31a0ef05fb25d46300a2b3619c1717eea505fc Mon Sep 17 00:00:00 2001 From: Philippe Ombredanne Date: Tue, 23 Nov 2021 11:44:40 +0100 Subject: [PATCH 15/40] Use latest univers branch This adds univer from the current branch that support "vers". See https://github.com/nexB/univers/pull/12 Signed-off-by: Philippe Ombredanne --- requirements.txt | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/requirements.txt b/requirements.txt index 7d23f3b96..08f27b42b 100644 --- a/requirements.txt +++ b/requirements.txt @@ -8,7 +8,8 @@ django-widget-tweaks>=1.4.8 packageurl-python>=0.9.4 binaryornot>=0.4.4 GitPython>=3.1.17 -univers>=21.4.16.6 +#univers>=21.4.16.6 +git+https://github.com/nexB/univers@adopt-vers-spec#univers saneyaml>=0.5.2 beautifulsoup4>=4.9.3 python-dateutil>=2.8.1 From 1971e5bb258c54f24748bbd4def70f5e0ea390f9 Mon Sep 17 00:00:00 2001 From: Hritik Vijay <7457065+Hritik14@users.noreply.github.com> Date: Tue, 30 Nov 2021 15:56:47 +0530 Subject: [PATCH 16/40] Update TODOs and docstrings All of this comes from the review done at https://github.com/nexB/vulnerablecode/pull/525#pullrequestreview-798711307 Co-authored-by: Philippe Ombredanne Signed-off-by: Hritik Vijay --- vulnerabilities/data_source.py | 2 +- vulnerabilities/importers/nginx.py | 1 + vulnerabilities/improve_runner.py | 4 ++++ vulnerabilities/improvers/default.py | 2 +- vulnerabilities/management/commands/improve.py | 2 +- vulnerabilities/models.py | 8 +++++++- vulnerabilities/views.py | 1 + 7 files changed, 16 insertions(+), 4 deletions(-) diff --git a/vulnerabilities/data_source.py b/vulnerabilities/data_source.py index 48c25ab82..aad758d0f 100644 --- a/vulnerabilities/data_source.py +++ b/vulnerabilities/data_source.py @@ -80,7 +80,7 @@ def normalized(self): class AffectedPackage: # TODO: Tweak after https://github.com/nexB/univers/issues/8 """ - Contains a range of affected versions and a fixed verison of a given package + Contains a range of affected versions and a fixed version of a given package The PackageURL supplied must *not* have a version """ package: PackageURL diff --git a/vulnerabilities/importers/nginx.py b/vulnerabilities/importers/nginx.py index 8747172a3..2cb77ec69 100644 --- a/vulnerabilities/importers/nginx.py +++ b/vulnerabilities/importers/nginx.py @@ -154,6 +154,7 @@ def extract_fixed_pkg_versions(vuln_info): # The way this needs to be interpreted is unique for nginx advisories # More: https://github.com/nexB/vulnerablecode/issues/553 + # TODO: create a version scheme that's specific to nginx... because this is not exactly semver versions.append(SemverVersion(rng.partition("+")[0].strip())) return versions diff --git a/vulnerabilities/improve_runner.py b/vulnerabilities/improve_runner.py index e50aa6fce..f22b0bbc5 100644 --- a/vulnerabilities/improve_runner.py +++ b/vulnerabilities/improve_runner.py @@ -95,12 +95,14 @@ def process_inferences(source: str, advisory: Advisory, inferences: List[Inferen ).update_or_create() +# TODO: This likely may be best as a model or manager method. def _get_or_create_vulnerability(vulnerability_id, summary) -> Tuple[models.Vulnerability, bool]: vuln, created = models.Vulnerability.objects.get_or_create( vulnerability_id=vulnerability_id ) # nopep8 # Eventually we only want to keep summary from NVD and ignore other descriptions. + # FIXME: it is really weird to update in a get or create function if summary and vuln.summary != summary: vuln.summary = summary vuln.save() @@ -110,6 +112,7 @@ def _get_or_create_vulnerability(vulnerability_id, summary) -> Tuple[models.Vuln def _get_or_create_package(p: PackageURL) -> Tuple[models.Package, bool]: query_kwargs = {} + # TODO: this should be revisited as this should best be a model or manager method... and possibly streamlined for key, val in p.to_dict().items(): if not val: if key == "qualifiers": @@ -123,6 +126,7 @@ def _get_or_create_package(p: PackageURL) -> Tuple[models.Package, bool]: def _package_url_to_package(purl: PackageURL) -> models.Package: + # FIXME: this is is likely creating a package from a purl? p = models.Package() p.set_package_url(purl) return p diff --git a/vulnerabilities/improvers/default.py b/vulnerabilities/improvers/default.py index 603d90d52..779f479eb 100644 --- a/vulnerabilities/improvers/default.py +++ b/vulnerabilities/improvers/default.py @@ -56,7 +56,7 @@ def exact_purls(aff_pkg: AffectedPackage) -> (List[PackageURL], PackageURL): vs = aff_pkg.affected_version_specifier aff_purls = [] for rng in vs.ranges: - if "=" in rng.operator and not "!" in rng.operator: + if rng.operator in ("=", ">=", "<="): aff_purl = aff_pkg.package._replace(version=rng.version.value) aff_purls.append(aff_purl) diff --git a/vulnerabilities/management/commands/improve.py b/vulnerabilities/management/commands/improve.py index 562af23f9..1c61f26b6 100644 --- a/vulnerabilities/management/commands/improve.py +++ b/vulnerabilities/management/commands/improve.py @@ -68,7 +68,7 @@ def list_sources(self): improvers = [ f"{improver.__module__}.{improver.__qualname__}" for improver in IMPROVER_REGISTRY ] - self.stdout.write("Vulnerability data can be improved from the following sources:") + self.stdout.write("Vulnerability data can be processed by these available improvers:") self.stdout.write("\n".join(improvers)) def improve_data(self, improvers): diff --git a/vulnerabilities/models.py b/vulnerabilities/models.py index 6b2857c86..d5a932ce3 100644 --- a/vulnerabilities/models.py +++ b/vulnerabilities/models.py @@ -87,7 +87,7 @@ def generate_vulcoid(timestamp=None): @property def vulnerable_to(self): """ - Returns packages which are vulnerable to this vulnerability. + Return packages that are vulnerable to this vulnerability. """ return self.packages.filter(vulnerabilities__packagerelatedvulnerability__fix=False) @@ -145,6 +145,7 @@ class Package(PackageURLMixin): ) @property + # TODO: consider renaming to "affected_by" def vulnerable_to(self): """ Returns vulnerabilities which are affecting this package. @@ -152,6 +153,7 @@ def vulnerable_to(self): return self.vulnerabilities.filter(packagerelatedvulnerability__fix=False) @property + # TODO: consider renaming to "fixes" or "fixing" ? (TBD) and updating the docstring def resolved_to(self): """ Returns the vulnerabilities which this package is patched against. @@ -225,6 +227,7 @@ def update_or_create(self): existing.confidence = self.confidence existing.fix = self.fix existing.save() + # TODO: later we want these to be part of a log field in the DB logger.debug( "Confidence improved for %s R %s, new confidence: %d", self.package, @@ -333,6 +336,9 @@ class Advisory(models.Model): date_improved = models.DateTimeField( null=True, help_text="Latest date on which the advisory was improved by an improver" ) + # we use a JSON field here to avoid creating a complete relational model for data that + # is never queried directly; instead it is only retrieved and processed as a whole by + # an improver data = models.JSONField( help_text="Contents of data_source.AdvisoryData serialized as a JSON object" ) diff --git a/vulnerabilities/views.py b/vulnerabilities/views.py index 2e241ad2a..cc5dc5e33 100644 --- a/vulnerabilities/views.py +++ b/vulnerabilities/views.py @@ -79,6 +79,7 @@ def request_to_queryset(request): "vulnerabilities", filter=Q(vulnerabilities__packagerelatedvulnerability__fix=False), ), + #TODO: consider renaming to fixed in the future patched_vulnerability_count=Count( "vulnerabilities", filter=Q(vulnerabilities__packagerelatedvulnerability__fix=True), From 12162445a77af2101a54c887494917b947cfd527 Mon Sep 17 00:00:00 2001 From: Hritik Vijay Date: Fri, 3 Dec 2021 05:37:53 +0530 Subject: [PATCH 17/40] Adopt new univers for importers and tiny refactor This commit adopts the new univers, this also requires to reset all the migrations as migration 0003 was using a univers method which is no longer available. There are also a few refactors based on https://github.com/nexB/vulnerablecode/pull/525#pullrequestreview-798711307 Signed-off-by: Hritik Vijay --- requirements.txt | 2 +- vulnerabilities/data_inference.py | 2 +- vulnerabilities/data_source.py | 68 ++--- vulnerabilities/helpers.py | 11 - vulnerabilities/import_runner.py | 8 +- vulnerabilities/migrations/0001_initial.py | 285 ++++-------------- .../migrations/0002_add_patched_package.py | 64 ---- .../0003_populate_patched_package.py | 107 ------- ...ckagerelatedvulnerability_is_vulnerable.py | 17 -- ...05_remove_vulnerabilityreference_source.py | 60 ---- vulnerabilities/models.py | 31 +- 11 files changed, 116 insertions(+), 539 deletions(-) delete mode 100644 vulnerabilities/migrations/0002_add_patched_package.py delete mode 100644 vulnerabilities/migrations/0003_populate_patched_package.py delete mode 100644 vulnerabilities/migrations/0004_remove_packagerelatedvulnerability_is_vulnerable.py delete mode 100644 vulnerabilities/migrations/0005_remove_vulnerabilityreference_source.py diff --git a/requirements.txt b/requirements.txt index 08f27b42b..a44b70b80 100644 --- a/requirements.txt +++ b/requirements.txt @@ -9,7 +9,7 @@ packageurl-python>=0.9.4 binaryornot>=0.4.4 GitPython>=3.1.17 #univers>=21.4.16.6 -git+https://github.com/nexB/univers@adopt-vers-spec#univers +git+https://github.com/nexB/univers saneyaml>=0.5.2 beautifulsoup4>=4.9.3 python-dateutil>=2.8.1 diff --git a/vulnerabilities/data_inference.py b/vulnerabilities/data_inference.py index 5c33f153b..c16413b54 100644 --- a/vulnerabilities/data_inference.py +++ b/vulnerabilities/data_inference.py @@ -24,7 +24,7 @@ class Inference: """ vulnerability_id: str - confidence: int + confidence: int = MAX_CONFIDENCE summary: Optional[str] = None affected_purls: List[PackageURL] = dataclasses.field(default_factory=list) fixed_purls: List[PackageURL] = dataclasses.field(default_factory=list) diff --git a/vulnerabilities/data_source.py b/vulnerabilities/data_source.py index aad758d0f..b9c84b948 100644 --- a/vulnerabilities/data_source.py +++ b/vulnerabilities/data_source.py @@ -26,7 +26,7 @@ import tempfile import traceback import xml.etree.ElementTree as ET -from datetime import datetime +import datetime from pathlib import Path from typing import Any from typing import ContextManager @@ -41,9 +41,8 @@ from git import DiffIndex from git import Repo from packageurl import PackageURL -from univers.version_specifier import VersionSpecifier -from univers.versions import BaseVersion -from univers.versions import version_class_by_package_type +from univers.version_range import VersionRange +from univers.versions import Version from vulnerabilities.helpers import nearest_patched_package from vulnerabilities.oval_parser import OvalParser from vulnerabilities.severity_systems import ScoringSystem @@ -59,6 +58,12 @@ class VulnerabilitySeverity: system: ScoringSystem value: str + def to_dict(self): + return { + "system": self.system.identifier, + "value": self.value, + } + @dataclasses.dataclass(order=True) class Reference: @@ -75,53 +80,49 @@ def normalized(self): severities = sorted(self.severities) return Reference(reference_id=self.reference_id, url=self.url, severities=severities) + def to_dict(self): + return { + "reference_id": self.reference_id, + "url": self.url, + "severities": [severity.to_dict() for severity in self.severities], + } + @dataclasses.dataclass(order=True, frozen=True) class AffectedPackage: - # TODO: Tweak after https://github.com/nexB/univers/issues/8 """ Contains a range of affected versions and a fixed version of a given package The PackageURL supplied must *not* have a version """ + package: PackageURL - affected_version_specifier: VersionSpecifier - fixed_version: Optional[BaseVersion] = None + affected_versions: VersionRange + fixed_version: Optional[Version] = None def __post_init__(self): if self.package.version: raise ValueError - if ( - self.fixed_version - and self.affected_version_specifier.scheme != self.fixed_version.scheme - ): - raise ValueError - def to_dict(self): - # TODO: VersionSpecifier.__str__ is not working - # https://github.com/nexB/univers/issues/7 - # Adjust following code when it is fixed - scheme = self.affected_version_specifier.scheme - ranges = ",".join( - [f"{rng.operator}{rng.version.value}" for rng in self.affected_version_specifier.ranges] - ) return { "package": self.package, - "affected_version_specifier": f"{scheme}:{ranges}", + "affected_versions": str(self.affected_versions), "fixed_version": str(self.fixed_version) if self.fixed_version else None, } @staticmethod def from_dict(aff_pkg: dict): package = PackageURL(*aff_pkg["package"]) - affected_version_specifier = VersionSpecifier.from_version_spec_string( - aff_pkg["affected_version_specifier"] - ) + affected_versions = VersionRange.from_string(aff_pkg["affected_versions"]) + fixed_version = Version.aff_pkg["fixed_version"] + if fixed_version: + # TODO: revisit after https://github.com/nexB/univers/issues/10 + fixed_version = Version(fixed_version) return AffectedPackage( package=package, - affected_version_specifier=affected_version_specifier, - fixed_version=parse_version(aff_pkg["fixed_version"]), + affected_versions=affected_versions, + fixed_version=fixed_version, ) @@ -129,17 +130,13 @@ def from_dict(aff_pkg: dict): class AdvisoryData: """ This data class expresses the contract between data sources and the import runner. - - NB: There are two representations for package URLs that are commonly used by code consuming this - data class; PackageURL objects and strings. As a convention, the former is referred to in - variable names, etc. as "package_urls" and the latter as "purls". """ vulnerability_id: Optional[str] = None summary: str = None affected_packages: List[AffectedPackage] = dataclasses.field(default_factory=list) references: List[Reference] = dataclasses.field(default_factory=list) - date_published: Optional[datetime.date] = None + date_published: Optional[datetime.datetime] = None def __post_init__(self): if self.vulnerability_id: @@ -147,12 +144,15 @@ def __post_init__(self): else: assert self.affected_packages or self.references + if self.date_published: + assert self.date_published.tzinfo == datetime.timezone.utc + def to_dict(self): return { "vulnerability_id": self.vulnerability_id, "summary": self.summary, "affected_packages": [pkg.to_dict() for pkg in self.affected_packages], - "references": [vars(ref) for ref in self.references], + "references": [ref.to_dict() for ref in self.references], "date_published": self.date_published.isoformat() if self.date_published else None, } @@ -187,8 +187,8 @@ class DataSource(ContextManager): def __init__( self, - last_run_date: Optional[datetime] = None, - cutoff_date: Optional[datetime] = None, + last_run_date: Optional[datetime.datetime] = None, + cutoff_date: Optional[datetime.datetime] = None, config: Optional[Mapping[str, Any]] = None, ): """ diff --git a/vulnerabilities/helpers.py b/vulnerabilities/helpers.py index da515c40d..6767f1f3b 100644 --- a/vulnerabilities/helpers.py +++ b/vulnerabilities/helpers.py @@ -33,7 +33,6 @@ import toml import urllib3 from packageurl import PackageURL -from univers.versions import version_class_by_package_type # TODO add logging here @@ -191,13 +190,3 @@ def split_markdown_front_matter(text: str) -> Tuple[str, str]: return frontmatter, markdown return "", text - - -# TODO: Remove after https://github.com/nexB/univers/issues/10 -from univers.versions import parse_version as univers_parse_version - - -def parse_version(version): - if not version: - return None - return univers_parse_version(version) diff --git a/vulnerabilities/import_runner.py b/vulnerabilities/import_runner.py index f4a1c1832..c5bc99188 100644 --- a/vulnerabilities/import_runner.py +++ b/vulnerabilities/import_runner.py @@ -70,8 +70,8 @@ def run(self, cutoff_date: datetime.datetime = None) -> None: data_source = self.importer.make_data_source(cutoff_date=cutoff_date) with data_source: advisory_data = data_source.advisory_data() - source = f"{data_source.__module__}.{data_source.__class__.__qualname__}" - process_advisories(source, advisory_data) + importer_name = f"{data_source.__module__}.{data_source.__class__.__qualname__}" + process_advisories(importer_name, advisory_data) self.importer.last_run = datetime.datetime.now(tz=datetime.timezone.utc) self.importer.data_source_cfg = dataclasses.asdict(data_source.config) self.importer.save() @@ -92,7 +92,7 @@ def get_vuln_pkg_refs(vulnerability, package): ) -def process_advisories(source: str, advisory_data: Set[AdvisoryData]) -> None: +def process_advisories(importer_name: str, advisory_data: Set[AdvisoryData]) -> None: """ Insert advisories into the database """ @@ -103,7 +103,7 @@ def process_advisories(source: str, advisory_data: Set[AdvisoryData]) -> None: Advisory( date_published=data.date_published, date_collected=datetime.datetime.now(tz=datetime.timezone.utc), - source=source, + created_by=importer_name, data=json.dumps(data.to_dict()), ) ) diff --git a/vulnerabilities/migrations/0001_initial.py b/vulnerabilities/migrations/0001_initial.py index cb18d9680..993391b67 100644 --- a/vulnerabilities/migrations/0001_initial.py +++ b/vulnerabilities/migrations/0001_initial.py @@ -1,6 +1,6 @@ -# Generated by Django 3.0.7 on 2021-02-18 06:13 +# Generated by Django 3.2.9 on 2021-12-02 18:22 -import django.contrib.postgres.fields.jsonb +import django.core.validators from django.db import migrations, models import django.db.models.deletion @@ -9,270 +9,103 @@ class Migration(migrations.Migration): initial = True - dependencies = [] + dependencies = [ + ] operations = [ migrations.CreateModel( - name="Importer", + name='Advisory', fields=[ - ( - "id", - models.AutoField( - auto_created=True, primary_key=True, serialize=False, verbose_name="ID" - ), - ), - ( - "name", - models.CharField(help_text="Name of the importer", max_length=100, unique=True), - ), - ( - "license", - models.CharField( - blank=True, help_text="License of the vulnerability data", max_length=100 - ), - ), - ( - "last_run", - models.DateTimeField(help_text="UTC Timestamp of the last run", null=True), - ), - ( - "data_source", - models.CharField( - help_text="Name of the data source implementation importable from vulnerabilities.importers", - max_length=100, - ), - ), - ( - "data_source_cfg", - django.contrib.postgres.fields.jsonb.JSONField( - default=dict, - help_text="Implementation-specific configuration for the data source", - ), - ), + ('id', models.AutoField(auto_created=True, primary_key=True, serialize=False, verbose_name='ID')), + ('date_published', models.DateField(help_text='UTC Date of publication of the advisory')), + ('date_collected', models.DateField(help_text='UTC Date on which the advisory was collected')), + ('created_by', models.CharField(help_text='Fully qualified name of the importer prefixed with the module name importing the advisory. Eg: vulnerabilities.importers.nginx.NginxDataSource', max_length=100)), + ('date_improved', models.DateTimeField(help_text='Latest date on which the advisory was improved by an improver', null=True)), + ('data', models.JSONField(help_text='Contents of data_source.AdvisoryData serialized as a JSON object')), ], ), migrations.CreateModel( - name="ImportProblem", + name='Importer', fields=[ - ( - "id", - models.AutoField( - auto_created=True, primary_key=True, serialize=False, verbose_name="ID" - ), - ), - ("conflicting_model", django.contrib.postgres.fields.jsonb.JSONField()), + ('id', models.AutoField(auto_created=True, primary_key=True, serialize=False, verbose_name='ID')), + ('name', models.CharField(help_text='Name of the importer', max_length=100, unique=True)), + ('license', models.CharField(blank=True, help_text='License of the vulnerability data', max_length=100)), + ('last_run', models.DateTimeField(help_text='UTC Timestamp of the last run', null=True)), + ('data_source', models.CharField(help_text='Name of the data source implementation importable from vulnerabilities.importers', max_length=100)), + ('data_source_cfg', models.JSONField(default=dict, help_text='Implementation-specific configuration for the data source')), ], ), migrations.CreateModel( - name="Package", + name='Package', fields=[ - ( - "id", - models.AutoField( - auto_created=True, primary_key=True, serialize=False, verbose_name="ID" - ), - ), - ( - "type", - models.CharField( - blank=True, - help_text="A short code to identify the type of this package. For example: gem for a Rubygem, docker for a container, pypi for a Python Wheel or Egg, maven for a Maven Jar, deb for a Debian package, etc.", - max_length=16, - ), - ), - ( - "namespace", - models.CharField( - blank=True, - help_text="Package name prefix, such as Maven groupid, Docker image owner, GitHub user or organization, etc.", - max_length=255, - ), - ), - ( - "name", - models.CharField(blank=True, help_text="Name of the package.", max_length=100), - ), - ( - "version", - models.CharField( - blank=True, help_text="Version of the package.", max_length=100 - ), - ), - ( - "subpath", - models.CharField( - blank=True, - help_text="Extra subpath within a package, relative to the package root.", - max_length=200, - ), - ), - ( - "qualifiers", - django.contrib.postgres.fields.jsonb.JSONField( - default=dict, - help_text="Extra qualifying data for a package such as the name of an OS, architecture, distro, etc.", - ), - ), + ('id', models.AutoField(auto_created=True, primary_key=True, serialize=False, verbose_name='ID')), + ('type', models.CharField(blank=True, help_text='A short code to identify the type of this package. For example: gem for a Rubygem, docker for a container, pypi for a Python Wheel or Egg, maven for a Maven Jar, deb for a Debian package, etc.', max_length=16)), + ('namespace', models.CharField(blank=True, help_text='Package name prefix, such as Maven groupid, Docker image owner, GitHub user or organization, etc.', max_length=255)), + ('name', models.CharField(blank=True, help_text='Name of the package.', max_length=100)), + ('version', models.CharField(blank=True, help_text='Version of the package.', max_length=100)), + ('subpath', models.CharField(blank=True, help_text='Extra subpath within a package, relative to the package root.', max_length=200)), + ('qualifiers', models.JSONField(blank=True, default=dict, help_text='Extra qualifying data for a package such as the name of an OS, architecture, distro, etc.')), ], ), migrations.CreateModel( - name="Vulnerability", + name='Vulnerability', fields=[ - ( - "id", - models.AutoField( - auto_created=True, primary_key=True, serialize=False, verbose_name="ID" - ), - ), - ( - "vulnerability_id", - models.CharField( - help_text="Unique vulnerability_id for a vulnerability: this is either a published CVE id (as in CVE-2020-7965) if it exists. Otherwise this is a VulnerableCode-assigned VULCOID (as in VULCOID-2021-01-23-15-12). When a vulnerability CVE is assigned later we replace this with the CVE and keep the 'old' VULCOID in the 'old_vulnerability_id' field to support redirection to the CVE id.", - max_length=50, - unique=True, - ), - ), - ( - "old_vulnerability_id", - models.CharField( - help_text="empty if no CVE else VC id", - max_length=50, - null=True, - unique=True, - ), - ), - ("summary", models.TextField(blank=True, help_text="Summary of the vulnerability")), + ('id', models.AutoField(auto_created=True, primary_key=True, serialize=False, verbose_name='ID')), + ('vulnerability_id', models.CharField(help_text="Unique identifier for a vulnerability: this is either a published CVE id (as in CVE-2020-7965) if it exists. Otherwise this is a VulnerableCode-assigned VULCOID (as in VULCOID-20210222-1315-16461541). When a vulnerability CVE is assigned later we replace this with the CVE and keep the 'old' VULCOID in the 'old_vulnerability_id' field to support redirection to the CVE id.", max_length=50, unique=True)), + ('old_vulnerability_id', models.CharField(blank=True, help_text='empty if no CVE else VC id', max_length=50, null=True, unique=True)), + ('summary', models.TextField(blank=True, help_text='Summary of the vulnerability')), ], options={ - "verbose_name_plural": "Vulnerabilities", + 'verbose_name_plural': 'Vulnerabilities', }, ), migrations.CreateModel( - name="VulnerabilityReference", + name='VulnerabilityReference', fields=[ - ( - "id", - models.AutoField( - auto_created=True, primary_key=True, serialize=False, verbose_name="ID" - ), - ), - ( - "source", - models.CharField(blank=True, help_text="Source(s) name eg:NVD", max_length=50), - ), - ( - "reference_id", - models.CharField( - blank=True, help_text="Reference ID, eg:DSA-4465-1", max_length=50 - ), - ), - ( - "url", - models.URLField( - blank=True, help_text="URL of Vulnerability data", max_length=1024 - ), - ), - ( - "vulnerability", - models.ForeignKey( - on_delete=django.db.models.deletion.CASCADE, - to="vulnerabilities.Vulnerability", - ), - ), + ('id', models.AutoField(auto_created=True, primary_key=True, serialize=False, verbose_name='ID')), + ('reference_id', models.CharField(blank=True, help_text='Reference ID, eg:DSA-4465-1', max_length=50)), + ('url', models.URLField(blank=True, help_text='URL of Vulnerability data', max_length=1024)), + ('vulnerability', models.ForeignKey(on_delete=django.db.models.deletion.CASCADE, to='vulnerabilities.vulnerability')), ], options={ - "unique_together": {("vulnerability", "source", "reference_id", "url")}, + 'unique_together': {('vulnerability', 'reference_id', 'url')}, }, ), migrations.CreateModel( - name="PackageRelatedVulnerability", + name='PackageRelatedVulnerability', fields=[ - ( - "id", - models.AutoField( - auto_created=True, primary_key=True, serialize=False, verbose_name="ID" - ), - ), - ("is_vulnerable", models.BooleanField()), - ( - "package", - models.ForeignKey( - on_delete=django.db.models.deletion.CASCADE, to="vulnerabilities.Package" - ), - ), - ( - "vulnerability", - models.ForeignKey( - on_delete=django.db.models.deletion.CASCADE, - to="vulnerabilities.Vulnerability", - ), - ), + ('id', models.AutoField(auto_created=True, primary_key=True, serialize=False, verbose_name='ID')), + ('created_by', models.CharField(blank=True, help_text='Fully qualified name of the improver prefixed with the module name responsible for creating this relation. Eg: vulnerabilities.importers.nginx.NginxTimeTravel', max_length=100)), + ('confidence', models.PositiveIntegerField(default=100, help_text='Confidence score for this relation', validators=[django.core.validators.MinValueValidator(0), django.core.validators.MaxValueValidator(100)])), + ('fix', models.BooleanField(default=False, help_text='Does this relation fix the specified vulnerability ?')), + ('package', models.ForeignKey(on_delete=django.db.models.deletion.CASCADE, related_name='package', to='vulnerabilities.package')), + ('vulnerability', models.ForeignKey(on_delete=django.db.models.deletion.CASCADE, to='vulnerabilities.vulnerability')), ], options={ - "verbose_name_plural": "PackageRelatedVulnerabilities", - "unique_together": {("package", "vulnerability")}, + 'verbose_name_plural': 'PackageRelatedVulnerabilities', + 'unique_together': {('package', 'vulnerability')}, }, ), migrations.AddField( - model_name="package", - name="vulnerabilities", - field=models.ManyToManyField( - through="vulnerabilities.PackageRelatedVulnerability", - to="vulnerabilities.Vulnerability", - ), + model_name='package', + name='vulnerabilities', + field=models.ManyToManyField(related_name='packages', through='vulnerabilities.PackageRelatedVulnerability', to='vulnerabilities.Vulnerability'), ), migrations.CreateModel( - name="VulnerabilitySeverity", + name='VulnerabilitySeverity', fields=[ - ( - "id", - models.AutoField( - auto_created=True, primary_key=True, serialize=False, verbose_name="ID" - ), - ), - ( - "value", - models.CharField(help_text="Example: 9.0, Important, High", max_length=50), - ), - ( - "scoring_system", - models.CharField( - choices=[ - ("cvssv2", "CVSSv2 Base Score"), - ("cvssv2_vector", "CVSSv2 Vector"), - ("cvssv3", "CVSSv3 Base Score"), - ("cvssv3_vector", "CVSSv3 Vector"), - ("cvssv3.1", "CVSSv3.1 Base Score"), - ("cvssv3.1_vector", "CVSSv3.1 Vector"), - ("rhbs", "RedHat Bugzilla severity"), - ("rhas", "RedHat Aggregate severity"), - ("avgs", "Archlinux Vulnerability Group Severity"), - ], - help_text="vulnerability_id for the scoring system used. Available choices are: cvssv2 is vulnerability_id for CVSSv2 Base Score system, cvssv2_vector is vulnerability_id for CVSSv2 Vector system, cvssv3 is vulnerability_id for CVSSv3 Base Score system, cvssv3_vector is vulnerability_id for CVSSv3 Vector system, cvssv3.1 is vulnerability_id for CVSSv3.1 Base Score system, cvssv3.1_vector is vulnerability_id for CVSSv3.1 Vector system, rhbs is vulnerability_id for RedHat Bugzilla severity system, rhas is vulnerability_id for RedHat Aggregate severity system, avgs is vulnerability_id for Archlinux Vulnerability Group Severity system ", - max_length=50, - ), - ), - ( - "reference", - models.ForeignKey( - on_delete=django.db.models.deletion.CASCADE, - to="vulnerabilities.VulnerabilityReference", - ), - ), - ( - "vulnerability", - models.ForeignKey( - on_delete=django.db.models.deletion.CASCADE, - to="vulnerabilities.Vulnerability", - ), - ), + ('id', models.AutoField(auto_created=True, primary_key=True, serialize=False, verbose_name='ID')), + ('value', models.CharField(help_text='Example: 9.0, Important, High', max_length=50)), + ('scoring_system', models.CharField(choices=[('cvssv2', 'CVSSv2 Base Score'), ('cvssv2_vector', 'CVSSv2 Vector'), ('cvssv3', 'CVSSv3 Base Score'), ('cvssv3_vector', 'CVSSv3 Vector'), ('cvssv3.1', 'CVSSv3.1 Base Score'), ('cvssv3.1_vector', 'CVSSv3.1 Vector'), ('rhbs', 'RedHat Bugzilla severity'), ('rhas', 'RedHat Aggregate severity'), ('avgs', 'Archlinux Vulnerability Group Severity'), ('cvssv3.1_qr', 'CVSSv3.1 Qualitative Severity Rating'), ('generic_textual', 'Generic textual severity rating'), ('apache_httpd', 'Apache Httpd Severity')], help_text='identifier for the scoring system used. Available choices are: cvssv2 is vulnerability_id for CVSSv2 Base Score system, cvssv2_vector is vulnerability_id for CVSSv2 Vector system, cvssv3 is vulnerability_id for CVSSv3 Base Score system, cvssv3_vector is vulnerability_id for CVSSv3 Vector system, cvssv3.1 is vulnerability_id for CVSSv3.1 Base Score system, cvssv3.1_vector is vulnerability_id for CVSSv3.1 Vector system, rhbs is vulnerability_id for RedHat Bugzilla severity system, rhas is vulnerability_id for RedHat Aggregate severity system, avgs is vulnerability_id for Archlinux Vulnerability Group Severity system, cvssv3.1_qr is vulnerability_id for CVSSv3.1 Qualitative Severity Rating system, generic_textual is vulnerability_id for Generic textual severity rating system, apache_httpd is vulnerability_id for Apache Httpd Severity system ', max_length=50)), + ('reference', models.ForeignKey(on_delete=django.db.models.deletion.CASCADE, to='vulnerabilities.vulnerabilityreference')), + ('vulnerability', models.ForeignKey(on_delete=django.db.models.deletion.CASCADE, to='vulnerabilities.vulnerability')), ], options={ - "unique_together": {("vulnerability", "reference", "scoring_system")}, + 'unique_together': {('vulnerability', 'reference', 'scoring_system')}, }, ), migrations.AlterUniqueTogether( - name="package", - unique_together={("name", "namespace", "type", "version", "qualifiers", "subpath")}, + name='package', + unique_together={('name', 'namespace', 'type', 'version', 'qualifiers', 'subpath')}, ), ] diff --git a/vulnerabilities/migrations/0002_add_patched_package.py b/vulnerabilities/migrations/0002_add_patched_package.py deleted file mode 100644 index d2024a2f7..000000000 --- a/vulnerabilities/migrations/0002_add_patched_package.py +++ /dev/null @@ -1,64 +0,0 @@ -# Generated by Django 3.0.13 on 2021-04-04 06:32 - -from django.db import migrations, models -import django.db.models.deletion - - -class Migration(migrations.Migration): - - dependencies = [ - ("vulnerabilities", "0001_initial"), - ] - - operations = [ - migrations.AddField( - model_name="packagerelatedvulnerability", - name="patched_package", - field=models.ForeignKey( - blank=True, - null=True, - on_delete=django.db.models.deletion.CASCADE, - related_name="patched_package", - to="vulnerabilities.Package", - ), - ), - migrations.AlterField( - model_name="packagerelatedvulnerability", - name="package", - field=models.ForeignKey( - on_delete=django.db.models.deletion.CASCADE, - related_name="vulnerable_package", - to="vulnerabilities.Package", - ), - ), - migrations.AlterField( - model_name="vulnerability", - name="vulnerability_id", - field=models.CharField( - help_text="Unique identifier for a vulnerability: this is either a published CVE id (as in CVE-2020-7965) if it exists. Otherwise this is a VulnerableCode-assigned VULCOID (as in VULCOID-20210222-1315-16461541). When a vulnerability CVE is assigned later we replace this with the CVE and keep the 'old' VULCOID in the 'old_vulnerability_id' field to support redirection to the CVE id.", - max_length=50, - unique=True, - ), - ), - migrations.AlterField( - model_name="vulnerabilityseverity", - name="scoring_system", - field=models.CharField( - choices=[ - ("cvssv2", "CVSSv2 Base Score"), - ("cvssv2_vector", "CVSSv2 Vector"), - ("cvssv3", "CVSSv3 Base Score"), - ("cvssv3_vector", "CVSSv3 Vector"), - ("cvssv3.1", "CVSSv3.1 Base Score"), - ("cvssv3.1_vector", "CVSSv3.1 Vector"), - ("rhbs", "RedHat Bugzilla severity"), - ("rhas", "RedHat Aggregate severity"), - ("avgs", "Archlinux Vulnerability Group Severity"), - ("cvssv3.1_qr", "CVSSv3.1 Qualitative Severity Rating"), - ("generic_textual", "Generic textual severity rating"), - ], - help_text="identifier for the scoring system used. Available choices are: cvssv2 is vulnerability_id for CVSSv2 Base Score system, cvssv2_vector is vulnerability_id for CVSSv2 Vector system, cvssv3 is vulnerability_id for CVSSv3 Base Score system, cvssv3_vector is vulnerability_id for CVSSv3 Vector system, cvssv3.1 is vulnerability_id for CVSSv3.1 Base Score system, cvssv3.1_vector is vulnerability_id for CVSSv3.1 Vector system, rhbs is vulnerability_id for RedHat Bugzilla severity system, rhas is vulnerability_id for RedHat Aggregate severity system, avgs is vulnerability_id for Archlinux Vulnerability Group Severity system, cvssv3.1_qr is vulnerability_id for CVSSv3.1 Qualitative Severity Rating system, generic_textual is vulnerability_id for Generic textual severity rating system ", - max_length=50, - ), - ), - ] diff --git a/vulnerabilities/migrations/0003_populate_patched_package.py b/vulnerabilities/migrations/0003_populate_patched_package.py deleted file mode 100644 index 4c852a0e5..000000000 --- a/vulnerabilities/migrations/0003_populate_patched_package.py +++ /dev/null @@ -1,107 +0,0 @@ -# Generated by Django 3.0.13 on 2021-04-04 06:40 - -import bisect -from sys import stdout - -from django.db import migrations -from univers.versions import version_class_by_package_type - - -def nearest_patched_versions(vulnerable_versions, resolved_versions): - """ - Returns a mapping of vulnerable_version -> nearest_safe_version - """ - - vulnerable_versions = sorted(vulnerable_versions) - resolved_versions = sorted(resolved_versions) - resolved_version_count = len(resolved_versions) - nearest_patch_for_version = {} - for vulnerable_version in vulnerable_versions: - nearest_patch_for_version[vulnerable_version] = None - if not resolved_versions: - continue - - patched_version_index = bisect.bisect_right(resolved_versions, vulnerable_version) - if patched_version_index >= resolved_version_count: - continue - nearest_patch_for_version[vulnerable_version] = resolved_versions[patched_version_index] - - return nearest_patch_for_version - - -def _get_tuple_key(packagerelatedvulnerability): - return ( - packagerelatedvulnerability.vulnerability, - packagerelatedvulnerability.package.type, - packagerelatedvulnerability.package.name, - ) - - -def create_patched_candidates_by_tuple_key(packagerelatedvulnerability_class): - """ - Creates and returns a mapping of form - (models.Vulnerability, models.Package.type, models.Package.name) -> List[models.PackageRelatedVulnerability] - It's used to prevent multiple hits to db in firther processing. - """ - patched_candidates = {} - for prv in packagerelatedvulnerability_class.objects.filter( - is_vulnerable=False - ).select_related(): - key_tuple = _get_tuple_key(prv) - if key_tuple in patched_candidates: - patched_candidates[key_tuple].append(prv.package) - else: - patched_candidates[key_tuple] = [prv.package] - - return patched_candidates - - -def populate_patched_packages(apps, schema_editor): - PackageRelatedVulnerability = apps.get_model("vulnerabilities", "PackageRelatedVulnerability") - patched_candidates = create_patched_candidates_by_tuple_key(PackageRelatedVulnerability) - tot = PackageRelatedVulnerability.objects.filter(is_vulnerable=True).count() - n = 0 - for prv in PackageRelatedVulnerability.objects.filter(is_vulnerable=True).select_related(): - stdout.write(f"{n}/{tot}\n") - n += 1 - key_tuple = _get_tuple_key(prv) - if key_tuple not in patched_candidates: - continue - - possible_patches = patched_candidates[key_tuple] - version_class = version_class_by_package_type[prv.package.type] - patched_package_by_version_obj = {} - - for patch in possible_patches: - try: - patched_package_by_version_obj[version_class(patch.version)] = patch - except Exception as e: - stdout.write(f"{e} {patch.version}") - continue - patched_version_objects = list(patched_package_by_version_obj.keys()) - - try: - target_version_object = version_class(prv.package.version) - nearest_patched_version = nearest_patched_versions( - [target_version_object], patched_version_objects - )[target_version_object] - except Exception as e: - stdout.write(f"{e}, {prv.package.version}") - continue - - if not nearest_patched_version: - continue - - prv.patched_package = patched_package_by_version_obj[nearest_patched_version] - prv.save() - - PackageRelatedVulnerability.objects.filter(is_vulnerable=False).delete() - - -class Migration(migrations.Migration): - - dependencies = [ - ("vulnerabilities", "0002_add_patched_package"), - ] - - operations = [migrations.RunPython(populate_patched_packages)] diff --git a/vulnerabilities/migrations/0004_remove_packagerelatedvulnerability_is_vulnerable.py b/vulnerabilities/migrations/0004_remove_packagerelatedvulnerability_is_vulnerable.py deleted file mode 100644 index ae7b9dffe..000000000 --- a/vulnerabilities/migrations/0004_remove_packagerelatedvulnerability_is_vulnerable.py +++ /dev/null @@ -1,17 +0,0 @@ -# Generated by Django 3.0.13 on 2021-04-08 11:05 - -from django.db import migrations - - -class Migration(migrations.Migration): - - dependencies = [ - ("vulnerabilities", "0003_populate_patched_package"), - ] - - operations = [ - migrations.RemoveField( - model_name="packagerelatedvulnerability", - name="is_vulnerable", - ), - ] diff --git a/vulnerabilities/migrations/0005_remove_vulnerabilityreference_source.py b/vulnerabilities/migrations/0005_remove_vulnerabilityreference_source.py deleted file mode 100644 index ae27b7ade..000000000 --- a/vulnerabilities/migrations/0005_remove_vulnerabilityreference_source.py +++ /dev/null @@ -1,60 +0,0 @@ -# Generated by Django 3.0.14 on 2021-04-26 06:57 - -import django.contrib.postgres.fields.jsonb -from django.db import migrations, models - - -class Migration(migrations.Migration): - - dependencies = [ - ("vulnerabilities", "0004_remove_packagerelatedvulnerability_is_vulnerable"), - ] - - operations = [ - migrations.AddField( - model_name="package", - name="resolved_vulnerabilities", - field=models.ManyToManyField( - related_name="patched_packages", - through="vulnerabilities.PackageRelatedVulnerability", - to="vulnerabilities.Vulnerability", - ), - ), - migrations.AlterField( - model_name="package", - name="qualifiers", - field=django.contrib.postgres.fields.jsonb.JSONField( - blank=True, - default=dict, - help_text="Extra qualifying data for a package such as the name of an OS, architecture, distro, etc.", - ), - ), - migrations.AlterField( - model_name="package", - name="vulnerabilities", - field=models.ManyToManyField( - related_name="vulnerable_packages", - through="vulnerabilities.PackageRelatedVulnerability", - to="vulnerabilities.Vulnerability", - ), - ), - migrations.AlterField( - model_name="vulnerability", - name="old_vulnerability_id", - field=models.CharField( - blank=True, - help_text="empty if no CVE else VC id", - max_length=50, - null=True, - unique=True, - ), - ), - migrations.AlterUniqueTogether( - name="vulnerabilityreference", - unique_together={("vulnerability", "reference_id", "url")}, - ), - migrations.RemoveField( - model_name="vulnerabilityreference", - name="source", - ), - ] diff --git a/vulnerabilities/models.py b/vulnerabilities/models.py index d5a932ce3..41460876f 100644 --- a/vulnerabilities/models.py +++ b/vulnerabilities/models.py @@ -35,7 +35,6 @@ from django.core.validators import MaxValueValidator from packageurl.contrib.django.models import PackageURLMixin from packageurl import PackageURL -from univers.version_specifier import VersionSpecifier from vulnerabilities.data_source import DataSource from vulnerabilities.data_source import AdvisoryData @@ -126,7 +125,7 @@ class Meta: unique_together = ("vulnerability", "reference_id", "url") def __str__(self): - return f"{self.source} {self.reference_id} {self.url}" + return f"{self.reference_id} {self.url}" class Package(PackageURLMixin): @@ -200,11 +199,20 @@ class PackageRelatedVulnerability(models.Model): package = models.ForeignKey(Package, on_delete=models.CASCADE, related_name="package") vulnerability = models.ForeignKey(Vulnerability, on_delete=models.CASCADE) - source = models.TextField(null=True) + created_by = models.CharField( + max_length=100, + blank=True, + help_text="Fully qualified name of the improver prefixed with the module name responsible for creating this relation. Eg: vulnerabilities.importers.nginx.NginxTimeTravel", + ) + confidence = models.PositiveIntegerField( - default=1, validators=[MinValueValidator(0), MaxValueValidator(MAX_CONFIDENCE)] + default=MAX_CONFIDENCE, + validators=[MinValueValidator(0), MaxValueValidator(MAX_CONFIDENCE)], + help_text="Confidence score for this relation", + ) + fix = models.BooleanField( + default=False, help_text="Does this relation fix the specified vulnerability ?" ) - fix = models.BooleanField(default=False) def __str__(self): return f"{self.package.package_url} {self.vulnerability.vulnerability_id}" @@ -223,7 +231,7 @@ def update_or_create(self): vulnerability=self.vulnerability, package=self.package ) if self.confidence > existing.confidence: - existing.source = self.source + existing.created_by = self.created_by existing.confidence = self.confidence existing.fix = self.fix existing.save() @@ -238,18 +246,13 @@ def update_or_create(self): except self.DoesNotExist: self.__class__.objects.create( vulnerability=self.vulnerability, - source=self.source, + created_by=self.created_by, package=self.package, confidence=self.confidence, fix=self.fix, ) -class ImportProblem(models.Model): - - conflicting_model = models.JSONField() - - class Importer(models.Model): """ Metadata and pointer to the implementation for a source of vulnerability data (aka security @@ -329,7 +332,7 @@ class Advisory(models.Model): date_published = models.DateField(help_text="UTC Date of publication of the advisory") date_collected = models.DateField(help_text="UTC Date on which the advisory was collected") - source = models.CharField( + created_by = models.CharField( max_length=100, help_text="Fully qualified name of the importer prefixed with the module name importing the advisory. Eg: vulnerabilities.importers.nginx.NginxDataSource", ) @@ -338,7 +341,7 @@ class Advisory(models.Model): ) # we use a JSON field here to avoid creating a complete relational model for data that # is never queried directly; instead it is only retrieved and processed as a whole by - # an improver + # an improver data = models.JSONField( help_text="Contents of data_source.AdvisoryData serialized as a JSON object" ) From 8065e3c89c57c3f6d073a8d178888dbc203d6f70 Mon Sep 17 00:00:00 2001 From: Hritik Vijay Date: Fri, 3 Dec 2021 05:38:13 +0530 Subject: [PATCH 18/40] Enable doctest in pytest because why not ? Signed-off-by: Hritik Vijay --- pytest.ini | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/pytest.ini b/pytest.ini index d64e30334..2de27327a 100644 --- a/pytest.ini +++ b/pytest.ini @@ -1,4 +1,5 @@ [pytest] DJANGO_SETTINGS_MODULE = vulnerablecode.settings markers = - webtest \ No newline at end of file + webtest +addopts = --doctest-modules From 5ec989dbc86c14335bf0cbffceb8dbe407c8fbae Mon Sep 17 00:00:00 2001 From: Hritik Vijay Date: Fri, 3 Dec 2021 05:39:17 +0530 Subject: [PATCH 19/40] Rewrite nginx importer to use new univers design Signed-off-by: Hritik Vijay --- vulnerabilities/importers/nginx.py | 233 +++++++++++++---------------- 1 file changed, 101 insertions(+), 132 deletions(-) diff --git a/vulnerabilities/importers/nginx.py b/vulnerabilities/importers/nginx.py index 2cb77ec69..03971e059 100644 --- a/vulnerabilities/importers/nginx.py +++ b/vulnerabilities/importers/nginx.py @@ -22,13 +22,13 @@ import asyncio import dataclasses -from datetime import datetime +import datetime from typing import List import requests -from packageurl import PackageURL from bs4 import BeautifulSoup -from univers.version_specifier import VersionSpecifier +from packageurl import PackageURL +from univers.version_range import NginxVersionRange from univers.versions import SemverVersion from vulnerabilities.data_source import AdvisoryData @@ -36,9 +36,11 @@ from vulnerabilities.data_source import DataSource from vulnerabilities.data_source import DataSourceConfiguration from vulnerabilities.data_source import Reference +from vulnerabilities.data_source import VulnerabilitySeverity +from vulnerabilities.helpers import nearest_patched_package from vulnerabilities.package_managers import GitHubTagsAPI from vulnerabilities.package_managers import Version -from vulnerabilities.helpers import nearest_patched_package +from vulnerabilities.severity_systems import scoring_systems @dataclasses.dataclass @@ -68,152 +70,119 @@ def set_api(self): def advisory_data(self) -> List[AdvisoryData]: adv_data = [] - # self.set_api() data = requests.get(self.url).content - adv_data.extend(self.to_advisories(data)) - return adv_data - - def to_advisories(self, data): - advisory_data = [] soup = BeautifulSoup(data, features="lxml") vuln_list = soup.select("li p") - - # Example value of `vuln_list` : - # ['Excessive CPU usage in HTTP/2 with small window updates', - #
, - # 'Severity: medium', - #
, - # Advisory, # nopep8 - #
, - # CVE-2019-9511, - #
, - # 'Not vulnerable: 1.17.3+, 1.16.1+', - #
, - # 'Vulnerable: 1.9.5-1.17.2'] - for vuln_info in vuln_list: - references = [] - for index, child in enumerate(vuln_info.children): - if index == 0: - # type of this child is bs4.element.NavigableString. - # Hence cast it into standard string - summary = str(child) - continue - - # hasattr(child, "attrs") == False for bs4.element.NavigableString - if hasattr(child, "attrs") and child.attrs.get("href"): - link = child.attrs["href"] - references.append(Reference(url=link)) - if "cve.mitre.org" in link: - cve_id = child.text - continue - - if "Not vulnerable" in child: - fixed_package_versions = extract_fixed_pkg_versions(child) - continue - - if "Vulnerable" in child: - aff_pkgs = extract_vuln_pkgs(child) - continue - - # TODO: Change this after https://github.com/nexB/univers/issues/8 is fixed - purl = PackageURL(type="generic", name="nginx") - affected_packages = [] - for pkg in aff_pkgs: - for fixed_version in fixed_package_versions: - affected_packages.append( - AffectedPackage( - package=purl, - affected_version_specifier=pkg.affected_version_specifier, - fixed_version=fixed_version, - ) - ) + adv_data.append(to_advisory_data(*parse_advisory_data_from_paragraph(vuln_info))) - advisory_data.append( - AdvisoryData( - summary=summary, - vulnerability_id=cve_id, - affected_packages=affected_packages, - references=references, - date_published=datetime.now(), # TODO: put real date here - ) - ) - - return advisory_data + return adv_data -def extract_fixed_pkg_versions(vuln_info): - vuln_status, version_info = vuln_info.split(": ") - if "none" in version_info: - return {} +def to_advisory_data( + cve, summary, advisory_severity, not_vulnerable, vulnerable, references +) -> AdvisoryData: + """ + Return AdvisoryData formed by given parameters + """ - raw_ranges = version_info.split(",") - versions = [] - for rng in raw_ranges: - # Eg. "1.7.3+" gets converted to SemVersion(1.7.3) - # The way this needs to be interpreted is unique for nginx advisories - # More: https://github.com/nexB/vulnerablecode/issues/553 + qualifiers = {} - # TODO: create a version scheme that's specific to nginx... because this is not exactly semver - versions.append(SemverVersion(rng.partition("+")[0].strip())) + affected_versions = vulnerable.partition(":")[2] + if "nginx/Windows" in affected_versions: + qualifiers["os"] = "windows" + affected_versions = affected_versions.replace("nginx/Windows", "") + affected_versions = NginxVersionRange.from_native(affected_versions) + + affected_packages = [] + branch = ["stable", "mainline"] + fixed_versions = not_vulnerable.partition(":")[2] + for fixed_version in fixed_versions.split(","): + fixed_version = fixed_version.rstrip("+") + + # TODO: Mail nginx for this anomaly + if "none" in fixed_version: + affected_packages.append( + AffectedPackage( + package=PackageURL(type="generic", name="nginx", qualifiers=qualifiers), + affected_versions=affected_versions, + fixed_version=fixed_version, + ) + ) + break + + fixed_version = SemverVersion(fixed_version) + # Even number minors are stable, see https://www.nginx.com/blog/nginx-1-18-1-19-released/ + qualifiers["branch"] = branch[fixed_version.value.minor % 2] + purl = PackageURL(type="generic", name="nginx", qualifiers=qualifiers) + affected_packages.append( + AffectedPackage( + package=purl, + affected_versions=affected_versions, + fixed_version=fixed_version, + ) + ) - return versions + return AdvisoryData( + vulnerability_id=cve, + summary=summary, + affected_packages=affected_packages, + references=references, + date_published=datetime.datetime.now(tz=datetime.timezone.utc), + ) -def extract_vuln_pkgs(vuln_info) -> List[AffectedPackage]: +def parse_advisory_data_from_paragraph(vuln_info): """ - >>> vuln_info = "Vulnerable: nginx/Windows 0.7.52-1.3.0" - >>> vuln_info = "Vulnerable: 1.1.3-1.15.5, 1.0.7-1.0.15" - >>> vuln_info = "Vulnerable: 0.5.6-1.13.2" - >>> vuln_info = "Vulnerable: all" + Return (summary, advisory_severity, not_vulnerable, vulnerable, references) + from bs4 paragraph + + For example: + >>> paragraph = '

1-byte memory overwrite in resolver
Severity: medium
Advisory
CVE-2021-23017
Not vulnerable: 1.21.0+, 1.20.1+
Vulnerable: 0.6.18-1.20.0
The patch  pgp

' + >>> vuln_info = BeautifulSoup(paragraph).p + >>> parse_advisory_data_from_paragraph(vuln_info) + ('CVE-2021-23017', '1-byte memory overwrite in resolver', 'Severity: medium', 'Not vulnerable: 1.21.0+, 1.20.1+', 'Vulnerable: 0.6.18-1.20.0', [Reference(reference_id='', url='http://mailman.nginx.org/pipermail/nginx-announce/2021/000300.html', severities=[]), Reference(reference_id='CVE-2021-23017', url='http://cve.mitre.org/cgi-bin/cvename.cgi?name=CVE-2021-23017', severities=[VulnerabilitySeverity(system=ScoringSystem(identifier='generic_textual', name='Generic textual severity rating', url='', notes='Severity for unknown scoring systems. Contains generic textual values like High, Low etc'), value='Severity: medium')]), Reference(reference_id='', url='https://nginx.org/download/patch.2021.resolver.txt', severities=[]), Reference(reference_id='', url='https://nginx.org/download/patch.2021.resolver.txt.asc', severities=[])]) """ - # TODO: This method needs to be modified accordingy after - # https://github.com/nexB/univers/issues/8 is fixed - - vuln_status, version_infos = vuln_info.split(": ") - if "none" in version_infos: - return {} - - version_ranges = [] - windows_only = False - for version_info in version_infos.split(", "): - if version_info == "all": - # This is misleading since eventually some version get fixed. + cve = summary = advisory_severity = not_vulnerable = vulnerable = None + references = [] + for index, child in enumerate(vuln_info.children): + if index == 0: + summary = child continue - if "-" not in version_info: - # These are discrete versions - version_ranges.append( - VersionSpecifier.from_scheme_version_spec_string("semver", version_info[0]) - ) + if "Severity" in child: + advisory_severity = child continue - windows_only = "nginx/Windows" in version_info - version_info = version_info.replace("nginx/Windows", "") - lower_bound, upper_bound = version_info.split("-") - - version_ranges.append( - VersionSpecifier.from_scheme_version_spec_string( - "semver", f">={lower_bound},<={upper_bound}" - ) - ) - - qualifiers = {} - if windows_only: - qualifiers["os"] = "windows" - - purl = PackageURL(type="generic", name="nginx", qualifiers=qualifiers) - return [ - AffectedPackage(package=purl, affected_version_specifier=version_range) - for version_range in version_ranges - ] + # hasattr(child, "attrs") == False for bs4.element.NavigableString + if hasattr(child, "attrs") and child.attrs.get("href"): + link = child.attrs["href"] + # Take care of relative urls + link = requests.compat.urljoin("https://nginx.org", link) + if "cve.mitre.org" in link: + cve = child.text + references.append( + Reference( + reference_id=cve, + url=link, + severities=[ + VulnerabilitySeverity( + system=scoring_systems["generic_textual"], + value=advisory_severity, + ) + ], + ) + ) + else: + references.append(Reference(url=link)) + continue + if "Not vulnerable" in child: + not_vulnerable = child + continue -def find_valid_versions(versions, version_ranges): - valid_versions = set() - for version in versions: - version_obj = SemverVersion(version) - if any([version_obj in ver_range for ver_range in version_ranges]): - valid_versions.add(version) + if "Vulnerable" in child: + vulnerable = child + continue - return valid_versions + return cve, summary, advisory_severity, not_vulnerable, vulnerable, references From 18e3d4b92712574b57a6159e9aa46d401edf234b Mon Sep 17 00:00:00 2001 From: Hritik Vijay Date: Wed, 8 Dec 2021 14:34:30 +0530 Subject: [PATCH 20/40] Refactor according to code review on 2021-12-04 The following suggestions were acception plus some code refactor https://github.com/nexB/vulnerablecode/blob/main/vulnerabilities/migrations/0003_populate_patched_package.py Is this hand written migration ? Why ? I'm resetting migrations, this is breaks on changes in univers - > move to init migration, provide data dump https://github.com/Hritik14/vulnerablecode/blob/ee0dba45f1d5b6680e121d91ce59b050325a5e67/vulnerabilities/import_runner.py#L73 - [x] name should be inside the importer class https://github.com/Hritik14/vulnerablecode/blob/ee0dba45f1d5b6680e121d91ce59b050325a5e67/vulnerabilities/import_runner.py#L108 - [ ] Advisory. get or create in the loop itself https://github.com/Hritik14/vulnerablecode/blob/ee0dba45f1d5b6680e121d91ce59b050325a5e67/vulnerabilities/import_runner.py#L95 - [x] Make it a list (or iterable is better), not set class AdvisoryData: """ This data class expresses the contract between data sources and the import runner. """ vulnerability_id: Optional[str] = None summary: str = None affected_packages: List[AffectedPackage] = dataclasses.field(default_factory=list) references: List[Reference] = dataclasses.field(default_factory=list) date_published: Optional[datetime.datetime] = None - [x] Use this to create an Advisory model and store List objects as json https://github.com/Hritik14/vulnerablecode/blob/ee0dba45f1d5b6680e121d91ce59b050325a5e67/vulnerabilities/data_source.py#L99 - [x] affected_version_range - [x] https://github.com/Hritik14/vulnerablecode/blob/ee0dba45f1d5b6680e121d91ce59b050325a5e67/vulnerabilities/data_source.py#L120 VersionRange.version_class to get the Version subclass https://github.com/Hritik14/vulnerablecode/blob/ee0dba45f1d5b6680e121d91ce59b050325a5e67/vulnerabilities/importers/nginx.py#L71 - [ ] docify this - [x] advisory_data should return an iterable as a contract https://github.com/Hritik14/vulnerablecode/blob/ee0dba45f1d5b6680e121d91ce59b050325a5e67/vulnerabilities/importers/nginx.py#L77 - [x] yield better https://github.com/Hritik14/vulnerablecode/blob/ee0dba45f1d5b6680e121d91ce59b050325a5e67/vulnerabilities/importers/nginx.py#L158 - [ ] use getattr https://github.com/Hritik14/vulnerablecode/blob/ee0dba45f1d5b6680e121d91ce59b050325a5e67/vulnerabilities/importers/nginx.py#L162 - [x] only for nginx advisory https://github.com/Hritik14/vulnerablecode/blob/ee0dba45f1d5b6680e121d91ce59b050325a5e67/vulnerabilities/importers/nginx.py#L188 - [x] return a dict and use ** on 77 https://github.com/Hritik14/vulnerablecode/blob/ee0dba45f1d5b6680e121d91ce59b050325a5e67/vulnerabilities/importers/nginx.py#L99 - [x] _,_,fixed_versions = https://github.com/Hritik14/vulnerablecode/blob/ee0dba45f1d5b6680e121d91ce59b050325a5e67/vulnerabilities/importers/nginx.py#L86 - [x] how does a paragraph look https://github.com/Hritik14/vulnerablecode/blob/ee0dba45f1d5b6680e121d91ce59b050325a5e67/vulnerabilities/importers/nginx.py#L116 - [x] remove branch qualifier Signed-off-by: Hritik Vijay --- vulnerabilities/data_source.py | 60 ++++++++-------- vulnerabilities/import_runner.py | 34 ++++++---- vulnerabilities/importers/nginx.py | 79 +++++++++++----------- vulnerabilities/migrations/0001_initial.py | 17 +++-- vulnerabilities/models.py | 64 +++++++++++++----- 5 files changed, 148 insertions(+), 106 deletions(-) diff --git a/vulnerabilities/data_source.py b/vulnerabilities/data_source.py index b9c84b948..1a7335452 100644 --- a/vulnerabilities/data_source.py +++ b/vulnerabilities/data_source.py @@ -35,6 +35,7 @@ from typing import Mapping from typing import Optional from typing import Set +from typing import Iterable from typing import Tuple from binaryornot.helpers import is_binary_string @@ -47,9 +48,6 @@ from vulnerabilities.oval_parser import OvalParser from vulnerabilities.severity_systems import ScoringSystem -# TODO: remove after https://github.com/nexB/univers/issues/10 is fixed -from vulnerabilities.helpers import parse_version - logger = logging.getLogger(__name__) @@ -96,7 +94,7 @@ class AffectedPackage: """ package: PackageURL - affected_versions: VersionRange + affected_version_range: VersionRange fixed_version: Optional[Version] = None def __post_init__(self): @@ -104,17 +102,25 @@ def __post_init__(self): raise ValueError def to_dict(self): + """ + Return a serializable dict that can be converted back using self.from_dict + """ return { "package": self.package, - "affected_versions": str(self.affected_versions), + "affected_version_range": str(self.affected_version_range), "fixed_version": str(self.fixed_version) if self.fixed_version else None, } @staticmethod - def from_dict(aff_pkg: dict): - package = PackageURL(*aff_pkg["package"]) - affected_versions = VersionRange.from_string(aff_pkg["affected_versions"]) - fixed_version = Version.aff_pkg["fixed_version"] + def from_dict(affected_pkg: dict): + # TODO: REWRITE + """ + Return AffectedPackage object from a dict generated by self.to_dict + """ + package = PackageURL(*affected_pkg["package"]) + affected_versions = VersionRange.from_string(affected_pkg["affected_versions"]) + # VersionRange.version_class to get the Version subclass + fixed_version = Version.affected_pkg["fixed_version"] if fixed_version: # TODO: revisit after https://github.com/nexB/univers/issues/10 fixed_version = Version(fixed_version) @@ -130,6 +136,13 @@ def from_dict(aff_pkg: dict): class AdvisoryData: """ This data class expresses the contract between data sources and the import runner. + + If a vulnerability_id is present then: + summary or affected_packages or references must be present + otherwise + either affected_package or references should be present + + date_published must be aware datetime """ vulnerability_id: Optional[str] = None @@ -145,25 +158,7 @@ def __post_init__(self): assert self.affected_packages or self.references if self.date_published: - assert self.date_published.tzinfo == datetime.timezone.utc - - def to_dict(self): - return { - "vulnerability_id": self.vulnerability_id, - "summary": self.summary, - "affected_packages": [pkg.to_dict() for pkg in self.affected_packages], - "references": [ref.to_dict() for ref in self.references], - "date_published": self.date_published.isoformat() if self.date_published else None, - } - - @staticmethod - def from_dict(advisory_data: dict): - advisory_data = AdvisoryData(**advisory_data) - advisory_data.affected_packages = [ - AffectedPackage.from_dict(p) for p in advisory_data.affected_packages - ] - advisory_data.references = [Reference(**ref) for ref in advisory_data.references] - return advisory_data + assert self.date_published.tzinfo class InvalidConfigurationError(Exception): @@ -217,6 +212,13 @@ def __enter__(self): def __exit__(self, exc_type, exc_val, exc_tb): pass + def __repr__(self): + """ + Fully qualified name prefixed with the module name of the data source + used in logging. + """ + return f"{self.__module__}.{self.__class__.__qualname__}" + @property def cutoff_timestamp(self) -> int: """ @@ -245,7 +247,7 @@ def validate_configuration(self) -> None: human-readable message. """ - def advisory_data(self) -> Set[AdvisoryData]: + def advisory_data(self) -> Iterable[AdvisoryData]: """ Subclasses return AdvisoryData objects """ diff --git a/vulnerabilities/import_runner.py b/vulnerabilities/import_runner.py index c5bc99188..0bc52e81b 100644 --- a/vulnerabilities/import_runner.py +++ b/vulnerabilities/import_runner.py @@ -26,6 +26,7 @@ import json import logging from typing import Set +from typing import Iterable from vulnerabilities import models @@ -70,8 +71,8 @@ def run(self, cutoff_date: datetime.datetime = None) -> None: data_source = self.importer.make_data_source(cutoff_date=cutoff_date) with data_source: advisory_data = data_source.advisory_data() - importer_name = f"{data_source.__module__}.{data_source.__class__.__qualname__}" - process_advisories(importer_name, advisory_data) + importer_name = repr(data_source) + process_advisories(advisory_datas=advisory_data, importer_name=importer_name) self.importer.last_run = datetime.datetime.now(tz=datetime.timezone.utc) self.importer.data_source_cfg = dataclasses.asdict(data_source.config) self.importer.save() @@ -92,20 +93,25 @@ def get_vuln_pkg_refs(vulnerability, package): ) -def process_advisories(importer_name: str, advisory_data: Set[AdvisoryData]) -> None: +def process_advisories(advisory_datas: Iterable[AdvisoryData], importer_name: str) -> None: """ Insert advisories into the database """ - advisories = [] - for data in advisory_data: - advisories.append( - Advisory( - date_published=data.date_published, - date_collected=datetime.datetime.now(tz=datetime.timezone.utc), - created_by=importer_name, - data=json.dumps(data.to_dict()), - ) + for data in advisory_datas: + obj, created = Advisory.objects.get_or_create( + vulnerability_id=data.vulnerability_id, + summary=data.summary, + affected_packages=[pkg.to_dict() for pkg in data.affected_packages], + references=[ref.to_dict() for ref in data.references], + created_by=importer_name, + defaults={ + "date_published": data.date_published, + "date_collected": datetime.datetime.now(tz=datetime.timezone.utc), + }, ) - - Advisory.objects.bulk_create(advisories) # TODO: handle conflicts, duplicates (update? ignore?) + if not created: + logger.warn( + f"Advisory with vulnerability_id: {obj.vulnerability_id}," + f"created_by: {obj.created_by} already exists" + ) diff --git a/vulnerabilities/importers/nginx.py b/vulnerabilities/importers/nginx.py index 03971e059..b2e19e0a5 100644 --- a/vulnerabilities/importers/nginx.py +++ b/vulnerabilities/importers/nginx.py @@ -20,10 +20,9 @@ # VulnerableCode is a free software tool from nexB Inc. and others. # Visit https://github.com/nexB/vulnerablecode/ for support and download. -import asyncio import dataclasses import datetime -from typing import List +from typing import Iterable import requests from bs4 import BeautifulSoup @@ -53,30 +52,12 @@ class NginxDataSource(DataSource): url = "http://nginx.org/en/security_advisories.html" - def set_api(self): - self.version_api = GitHubTagsAPI() - asyncio.run(self.version_api.load_api(["nginx/nginx"])) - - # For some reason nginx tags it's releases are in the form of `release-1.2.3` - # Chop off the `release-` part here. - normalized_versions = set() - while self.version_api.cache["nginx/nginx"]: - version = self.version_api.cache["nginx/nginx"].pop() - normalized_version = Version( - version.value.replace("release-", ""), version.release_date - ) - normalized_versions.add(normalized_version) - self.version_api.cache["nginx/nginx"] = normalized_versions - - def advisory_data(self) -> List[AdvisoryData]: - adv_data = [] + def advisory_data(self) -> Iterable[AdvisoryData]: data = requests.get(self.url).content soup = BeautifulSoup(data, features="lxml") vuln_list = soup.select("li p") for vuln_info in vuln_list: - adv_data.append(to_advisory_data(*parse_advisory_data_from_paragraph(vuln_info))) - - return adv_data + yield to_advisory_data(**parse_advisory_data_from_paragraph(vuln_info)) def to_advisory_data( @@ -84,41 +65,48 @@ def to_advisory_data( ) -> AdvisoryData: """ Return AdvisoryData formed by given parameters + An advisory paragraph, without html markup, looks like: + + 1-byte memory overwrite in resolver + Severity: medium + Advisory + CVE-2021-23017 + Not vulnerable: 1.21.0+, 1.20.1+ + Vulnerable: 0.6.18-1.20.0 + The patch pgp """ qualifiers = {} - affected_versions = vulnerable.partition(":")[2] - if "nginx/Windows" in affected_versions: + affected_version_range = vulnerable.partition(":")[2] + if "nginx/Windows" in affected_version_range: qualifiers["os"] = "windows" - affected_versions = affected_versions.replace("nginx/Windows", "") - affected_versions = NginxVersionRange.from_native(affected_versions) + affected_version_range = affected_version_range.replace("nginx/Windows", "") + affected_version_range = NginxVersionRange.from_native(affected_version_range) affected_packages = [] - branch = ["stable", "mainline"] - fixed_versions = not_vulnerable.partition(":")[2] + _, _, fixed_versions = not_vulnerable.partition(":") for fixed_version in fixed_versions.split(","): fixed_version = fixed_version.rstrip("+") # TODO: Mail nginx for this anomaly if "none" in fixed_version: - affected_packages.append( - AffectedPackage( - package=PackageURL(type="generic", name="nginx", qualifiers=qualifiers), - affected_versions=affected_versions, - fixed_version=fixed_version, - ) - ) + # FIXME: This breaks because https://github.com/nexB/univers/issues/10 break + # affected_packages.append( + # AffectedPackage( + # package=PackageURL(type="generic", name="nginx", qualifiers=qualifiers), + # affected_version_range=affected_version_range, + # ) + # ) + # break fixed_version = SemverVersion(fixed_version) - # Even number minors are stable, see https://www.nginx.com/blog/nginx-1-18-1-19-released/ - qualifiers["branch"] = branch[fixed_version.value.minor % 2] purl = PackageURL(type="generic", name="nginx", qualifiers=qualifiers) affected_packages.append( AffectedPackage( package=purl, - affected_versions=affected_versions, + affected_version_range=affected_version_range, fixed_version=fixed_version, ) ) @@ -165,6 +153,12 @@ def parse_advisory_data_from_paragraph(vuln_info): Reference( reference_id=cve, url=link, + ) + ) + elif "http://mailman.nginx.org" in link: + references.append( + Reference( + url=link, severities=[ VulnerabilitySeverity( system=scoring_systems["generic_textual"], @@ -185,4 +179,11 @@ def parse_advisory_data_from_paragraph(vuln_info): vulnerable = child continue - return cve, summary, advisory_severity, not_vulnerable, vulnerable, references + return { + "cve": cve, + "summary": summary, + "advisory_severity": advisory_severity, + "not_vulnerable": not_vulnerable, + "vulnerable": vulnerable, + "references": references, + } diff --git a/vulnerabilities/migrations/0001_initial.py b/vulnerabilities/migrations/0001_initial.py index 993391b67..6551edc64 100644 --- a/vulnerabilities/migrations/0001_initial.py +++ b/vulnerabilities/migrations/0001_initial.py @@ -1,4 +1,4 @@ -# Generated by Django 3.2.9 on 2021-12-02 18:22 +# Generated by Django 3.2.9 on 2021-12-08 09:02 import django.core.validators from django.db import migrations, models @@ -17,11 +17,14 @@ class Migration(migrations.Migration): name='Advisory', fields=[ ('id', models.AutoField(auto_created=True, primary_key=True, serialize=False, verbose_name='ID')), - ('date_published', models.DateField(help_text='UTC Date of publication of the advisory')), + ('vulnerability_id', models.CharField(blank=True, max_length=50, null=True)), + ('summary', models.TextField(blank=True, null=True)), + ('affected_packages', models.TextField()), + ('references', models.TextField()), + ('date_published', models.DateField(blank=True, help_text='UTC Date of publication of the advisory', null=True)), ('date_collected', models.DateField(help_text='UTC Date on which the advisory was collected')), + ('date_improved', models.DateTimeField(blank=True, help_text='Latest date on which the advisory was improved by an improver', null=True)), ('created_by', models.CharField(help_text='Fully qualified name of the importer prefixed with the module name importing the advisory. Eg: vulnerabilities.importers.nginx.NginxDataSource', max_length=100)), - ('date_improved', models.DateTimeField(help_text='Latest date on which the advisory was improved by an improver', null=True)), - ('data', models.JSONField(help_text='Contents of data_source.AdvisoryData serialized as a JSON object')), ], ), migrations.CreateModel( @@ -63,12 +66,12 @@ class Migration(migrations.Migration): name='VulnerabilityReference', fields=[ ('id', models.AutoField(auto_created=True, primary_key=True, serialize=False, verbose_name='ID')), - ('reference_id', models.CharField(blank=True, help_text='Reference ID, eg:DSA-4465-1', max_length=50)), - ('url', models.URLField(blank=True, help_text='URL of Vulnerability data', max_length=1024)), + ('url', models.URLField(blank=True, help_text='URL to the vulnerability reference', max_length=1024)), + ('reference_id', models.CharField(blank=True, help_text='An optional reference ID, such as DSA-4465-1 when available', max_length=50, null=True)), ('vulnerability', models.ForeignKey(on_delete=django.db.models.deletion.CASCADE, to='vulnerabilities.vulnerability')), ], options={ - 'unique_together': {('vulnerability', 'reference_id', 'url')}, + 'unique_together': {('vulnerability', 'url', 'reference_id')}, }, ), migrations.CreateModel( diff --git a/vulnerabilities/models.py b/vulnerabilities/models.py index 41460876f..40964accf 100644 --- a/vulnerabilities/models.py +++ b/vulnerabilities/models.py @@ -38,6 +38,8 @@ from vulnerabilities.data_source import DataSource from vulnerabilities.data_source import AdvisoryData +from vulnerabilities.data_source import AffectedPackage +from vulnerabilities.data_source import Reference from vulnerabilities.severity_systems import scoring_systems from vulnerabilities.data_inference import MAX_CONFIDENCE @@ -112,20 +114,25 @@ class VulnerabilityReference(models.Model): """ vulnerability = models.ForeignKey(Vulnerability, on_delete=models.CASCADE) + url = models.URLField( + max_length=1024, help_text="URL to the vulnerability reference", blank=True + ) reference_id = models.CharField( - max_length=50, help_text="Reference ID, eg:DSA-4465-1", blank=True + max_length=50, + help_text="An optional reference ID, such as DSA-4465-1 when available", + blank=True, + null=True, ) - url = models.URLField(max_length=1024, help_text="URL of Vulnerability data", blank=True) @property def scores(self): return VulnerabilitySeverity.objects.filter(reference=self.id) class Meta: - unique_together = ("vulnerability", "reference_id", "url") + unique_together = ("vulnerability", "url", "reference_id") def __str__(self): - return f"{self.reference_id} {self.url}" + return f"{self.url} {self.reference_id}" class Package(PackageURLMixin): @@ -202,7 +209,9 @@ class PackageRelatedVulnerability(models.Model): created_by = models.CharField( max_length=100, blank=True, - help_text="Fully qualified name of the improver prefixed with the module name responsible for creating this relation. Eg: vulnerabilities.importers.nginx.NginxTimeTravel", + help_text="Fully qualified name of the improver prefixed with the" + "module name responsible for creating this relation. Eg:" + "vulnerabilities.importers.nginx.NginxTimeTravel", ) confidence = models.PositiveIntegerField( @@ -327,21 +336,42 @@ class Meta: class Advisory(models.Model): """ - An advisory directly obtained from upstream without any modifications. + An advisory represents data directly obtained from upstream transformed + into structured data """ - date_published = models.DateField(help_text="UTC Date of publication of the advisory") - date_collected = models.DateField(help_text="UTC Date on which the advisory was collected") - created_by = models.CharField( - max_length=100, - help_text="Fully qualified name of the importer prefixed with the module name importing the advisory. Eg: vulnerabilities.importers.nginx.NginxDataSource", - ) - date_improved = models.DateTimeField( - null=True, help_text="Latest date on which the advisory was improved by an improver" - ) + vulnerability_id = models.CharField(max_length=50, null=True, blank=True) + summary = models.TextField(blank=True, null=True) # we use a JSON field here to avoid creating a complete relational model for data that # is never queried directly; instead it is only retrieved and processed as a whole by # an improver - data = models.JSONField( - help_text="Contents of data_source.AdvisoryData serialized as a JSON object" + affected_packages = models.JSONField( + blank=True, null=True, help_text="A list of serializabale AffectedPackage objects" + ) + references = models.JSONField( + blank=True, null=True, help_text="A list of serializabale Reference objects" ) + date_published = models.DateTimeField( + blank=True, null=True, help_text="UTC Date of publication of the advisory" + ) + date_collected = models.DateTimeField(help_text="UTC Date on which the advisory was collected") + date_improved = models.DateTimeField( + blank=True, + null=True, + help_text="Latest date on which the advisory was improved by an improver", + ) + created_by = models.CharField( + max_length=100, + help_text="Fully qualified name of the importer prefixed with the" + "module name importing the advisory. Eg:" + "vulnerabilities.importers.nginx.NginxDataSource", + ) + + def to_advisory_data(self) -> AdvisoryData: + return AdvisoryData( + vulnerability_id=self.vulnerability_id, + summary=self.summary, + affected_packages=[AffectedPackage.from_dict(pkg) for pkg in self.affected_packages], + references=[Reference.from_dict(ref) for ref in self.references], + date_published=self.date_published, + ) From 22d713200a746a3118666f21421d60f95b846fd0 Mon Sep 17 00:00:00 2001 From: Hritik Vijay Date: Wed, 15 Dec 2021 04:38:31 +0530 Subject: [PATCH 21/40] Update improvers to accept new univers design Rewrite following improvers: DefaultImprover Plus adjust the improver framework Signed-off-by: Hritik Vijay --- vulnerabilities/data_inference.py | 27 ++++++++-- vulnerabilities/data_source.py | 45 ++++++++++++---- vulnerabilities/improve_runner.py | 48 ++++++++--------- vulnerabilities/improvers/default.py | 81 ++++++++++++++-------------- vulnerabilities/models.py | 28 ++++------ 5 files changed, 133 insertions(+), 96 deletions(-) diff --git a/vulnerabilities/data_inference.py b/vulnerabilities/data_inference.py index c16413b54..3fee3343b 100644 --- a/vulnerabilities/data_inference.py +++ b/vulnerabilities/data_inference.py @@ -2,6 +2,7 @@ import logging from typing import List from typing import Optional +from uuid import uuid4 from packageurl import PackageURL from django.db.models.query import QuerySet @@ -19,6 +20,12 @@ class Inference: """ This data class expresses the contract between data improvers and the improve runner. + If a vulnerability_id is present then: + summary or affected_purls or fixed_purl or references must be present + otherwise + either affected_purls or fixed_purl or references should be present and + a VULCOID will be assigned as the vulnerability_id + Only inferences with highest confidence for one vulnerability <-> package relationship is to be inserted into the database """ @@ -27,7 +34,7 @@ class Inference: confidence: int = MAX_CONFIDENCE summary: Optional[str] = None affected_purls: List[PackageURL] = dataclasses.field(default_factory=list) - fixed_purls: List[PackageURL] = dataclasses.field(default_factory=list) + fixed_purl: PackageURL = dataclasses.field(default_factory=list) references: List[Reference] = dataclasses.field(default_factory=list) def __post_init__(self): @@ -35,13 +42,14 @@ def __post_init__(self): raise ValueError if self.vulnerability_id: - assert self.summary or self.affected_purls or self.fixed_purls or self.references + assert self.summary or self.affected_purls or self.fixed_purl or self.references else: # TODO: Maybe only having summary - assert self.affected_purls or self.fixed_purls or self.references + assert self.affected_purls or self.fixed_purl or self.references + self.vulnerability_id = self.generate_vulcoid() versionless_purls = [] - for purl in self.affected_purls + self.fixed_purls: + for purl in self.affected_purls + [self.fixed_purl]: if not purl.version: versionless_purls.append(purl) @@ -49,6 +57,10 @@ def __post_init__(self): not versionless_purls ), f"Version-less purls are not supported in an Inference: {versionless_purls}" + @staticmethod + def generate_vulcoid(): + return f"VULCOID-{uuid4()}" + class Improver: """ @@ -68,3 +80,10 @@ def get_inferences(self, advisory_data: AdvisoryData) -> List[Inference]: Generate and return Inferences for the given advisory data """ raise NotImplementedError + + def __repr__(self): + """ + Fully qualified name prefixed with the module name of the improver + used in logging. + """ + return f"{self.__module__}.{self.__class__.__qualname__}" diff --git a/vulnerabilities/data_source.py b/vulnerabilities/data_source.py index 1a7335452..c5be944cd 100644 --- a/vulnerabilities/data_source.py +++ b/vulnerabilities/data_source.py @@ -47,6 +47,7 @@ from vulnerabilities.helpers import nearest_patched_package from vulnerabilities.oval_parser import OvalParser from vulnerabilities.severity_systems import ScoringSystem +from vulnerabilities.severity_systems import scoring_systems logger = logging.getLogger(__name__) @@ -57,11 +58,23 @@ class VulnerabilitySeverity: value: str def to_dict(self): + """ + Return a serializable dict that can be converted back using self.from_dict + """ return { "system": self.system.identifier, "value": self.value, } + @staticmethod + def from_dict(severity: dict): + """ + Return a VulnerabilitySeverity object from dict generated by self.to_dict + """ + return VulnerabilitySeverity( + system=scoring_systems[severity["system"]], value=severity["value"] + ) + @dataclasses.dataclass(order=True) class Reference: @@ -79,12 +92,28 @@ def normalized(self): return Reference(reference_id=self.reference_id, url=self.url, severities=severities) def to_dict(self): + """ + Return a serializable dict that can be converted back using self.from_dict + """ return { "reference_id": self.reference_id, "url": self.url, "severities": [severity.to_dict() for severity in self.severities], } + @staticmethod + def from_dict(ref: dict): + """ + Return a Reference object from dict generated by self.to_dict + """ + return Reference( + reference_id=ref["reference_id"], + url=ref["url"], + severities=[ + VulnerabilitySeverity.from_dict(severity) for severity in ref["severities"] + ], + ) + @dataclasses.dataclass(order=True, frozen=True) class AffectedPackage: @@ -106,28 +135,26 @@ def to_dict(self): Return a serializable dict that can be converted back using self.from_dict """ return { - "package": self.package, + "package": self.package.to_dict(), "affected_version_range": str(self.affected_version_range), "fixed_version": str(self.fixed_version) if self.fixed_version else None, } @staticmethod def from_dict(affected_pkg: dict): - # TODO: REWRITE """ - Return AffectedPackage object from a dict generated by self.to_dict + Return an AffectedPackage object from dict generated by self.to_dict """ - package = PackageURL(*affected_pkg["package"]) - affected_versions = VersionRange.from_string(affected_pkg["affected_versions"]) - # VersionRange.version_class to get the Version subclass - fixed_version = Version.affected_pkg["fixed_version"] + package = PackageURL(**affected_pkg["package"]) + affected_version_range = VersionRange.from_string(affected_pkg["affected_version_range"]) + fixed_version = affected_pkg["fixed_version"] if fixed_version: # TODO: revisit after https://github.com/nexB/univers/issues/10 - fixed_version = Version(fixed_version) + fixed_version = affected_version_range.version_class(fixed_version) return AffectedPackage( package=package, - affected_versions=affected_versions, + affected_version_range=affected_version_range, fixed_version=fixed_version, ) diff --git a/vulnerabilities/improve_runner.py b/vulnerabilities/improve_runner.py index f22b0bbc5..ba64bfe65 100644 --- a/vulnerabilities/improve_runner.py +++ b/vulnerabilities/improve_runner.py @@ -1,16 +1,14 @@ +import logging from datetime import datetime from datetime import timezone -import json -import logging -from typing import Tuple from typing import List +from typing import Tuple from django.db import transaction from vulnerabilities import models -from vulnerabilities.data_source import PackageURL -from vulnerabilities.data_source import AdvisoryData from vulnerabilities.data_inference import Inference +from vulnerabilities.data_source import PackageURL from vulnerabilities.models import Advisory @@ -27,19 +25,18 @@ def __init__(self, improver): self.improver = improver def run(self) -> None: - logger.info("Improving using %s.", self.improver.__name__) - source = f"{self.improver.__module__}.{self.improver.__qualname__}" improver = self.improver() + logger.info(f"Running improver: {improver!r}") for advisory in improver.interesting_advisories: - inferences = improver.get_inferences( - advisory_data=AdvisoryData.from_dict(json.loads(advisory.data)) + inferences = improver.get_inferences(advisory_data=advisory.to_advisory_data()) + process_inferences( + inferences=inferences, advisory=advisory, improver_name=repr(improver) ) - process_inferences(source=source, advisory=advisory, inferences=inferences) logger.info("Finished improving using %s.", self.improver.__name__) @transaction.atomic -def process_inferences(source: str, advisory: Advisory, inferences: List[Inference]): +def process_inferences(inferences: List[Inference], advisory: Advisory, improver_name: str): """ An atomic transaction that updates both the Advisory (e.g. date_improved) and processes the given inferences to create or update corresponding @@ -51,11 +48,10 @@ def process_inferences(source: str, advisory: Advisory, inferences: List[Inferen """ if not inferences: - logger.warn(f"Nothing to improve. Source: {source} Advisory id: {advisory.pk}") + logger.warn(f"Nothing to improve. Source: {improver_name} Advisory id: {advisory.id}") return - advisory.date_improved = datetime.now(timezone.utc) - advisory.save() + logger.info(f"Improving advisory id: {advisory.id}") for inference in inferences: vuln, vuln_created = _get_or_create_vulnerability( @@ -77,22 +73,24 @@ def process_inferences(source: str, advisory: Advisory, inferences: List[Inferen for pkg in inference.affected_purls: vulnerable_package, _ = _get_or_create_package(pkg) models.PackageRelatedVulnerability( - package=vulnerable_package, vulnerability=vuln, - source=source, + package=vulnerable_package, + created_by=improver_name, confidence=inference.confidence, fix=False, ).update_or_create() - for pkg in inference.fixed_purls: - patched_package, _ = _get_or_create_package(pkg) - models.PackageRelatedVulnerability( - package=patched_package, - vulnerability=vuln, - source=source, - confidence=inference.confidence, - fix=True, - ).update_or_create() + fixed_package, _ = _get_or_create_package(inference.fixed_purl) + models.PackageRelatedVulnerability( + vulnerability=vuln, + package=fixed_package, + created_by=improver_name, + confidence=inference.confidence, + fix=True, + ).update_or_create() + + advisory.date_improved = datetime.now(timezone.utc) + advisory.save() # TODO: This likely may be best as a model or manager method. diff --git a/vulnerabilities/improvers/default.py b/vulnerabilities/improvers/default.py index 779f479eb..c9fda7ce9 100644 --- a/vulnerabilities/improvers/default.py +++ b/vulnerabilities/improvers/default.py @@ -1,66 +1,69 @@ -import json +from typing import Iterable from typing import List from itertools import chain -from packageurl import PackageURL from django.db.models.query import QuerySet +from packageurl import PackageURL -from vulnerabilities.data_source import AdvisoryData -from vulnerabilities.data_source import AffectedPackage -from vulnerabilities.data_inference import Inference from vulnerabilities.data_inference import Improver +from vulnerabilities.data_inference import Inference from vulnerabilities.data_inference import MAX_CONFIDENCE +from vulnerabilities.data_source import AdvisoryData +from vulnerabilities.data_source import AffectedPackage from vulnerabilities.models import Advisory class DefaultImprover(Improver): """ - This is the first step after running any importer. The inferences generated - are only a translation of Advisory data returned by the importers into - full confidence inferences + Generate a translation of Advisory data - returned by the importers - into + full confidence inferences. These are basic database relationships for + unstructured data present in the Advisory model without any other + information source. """ @property def interesting_advisories(self) -> QuerySet: return Advisory.objects.all() - def get_inferences(self, advisory_data: AdvisoryData) -> List[Inference]: - inferences = [] - for aff_pkg in advisory_data.affected_packages: - affected_purls, fixed_purl = exact_purls(aff_pkg) - inferences.append( - Inference( - vulnerability_id=advisory_data.vulnerability_id, - confidence=MAX_CONFIDENCE, - summary=advisory_data.summary, - affected_purls=affected_purls, - fixed_purls=[fixed_purl], - references=advisory_data.references, - ) + def get_inferences(self, advisory_data: AdvisoryData) -> Iterable[Inference]: + for affected_package in advisory_data.affected_packages: + affected_purls, fixed_purl = get_exact_purls(affected_package) + yield Inference( + vulnerability_id=advisory_data.vulnerability_id, + confidence=MAX_CONFIDENCE, + summary=advisory_data.summary, + affected_purls=affected_purls, + fixed_purl=fixed_purl, + references=advisory_data.references, ) - return inferences -def exact_purls(aff_pkg: AffectedPackage) -> (List[PackageURL], PackageURL): +def get_exact_purls(affected_package: AffectedPackage) -> (List[PackageURL], PackageURL): """ - Only AffectedPackages with an equality in their VersionSpecifier are - considered as exact purls. + Return purls for fixed and affected packages contained in the given + AffectedPackage disregarding any ranges. + Only exact version constraints (ie with an equality) are considered For eg: - AffectedPackage with version_specifier as scheme:<=2.0 is treated as - version 2.0 but the same with scheme:<2.0 is not considered at all as there - is no info about what comes before the supplied version - - Return a list of affected PackageURL and corresponding fixed PackageURL + >>> purl = {"type": "turtle", "name": "green"} + >>> vers = "vers:npm/>=2.0.0,<3.0.0 | <1.0.0" + >>> affected_package = AffectedPackage.from_dict({ + ... "package": purl, + ... "affected_version_range": vers, + ... "fixed_version": "5.0.0" + ... }) + >>> get_exact_purls(affected_package) + ({PackageURL(type='turtle', namespace=None, name='green', version='2.0.0', qualifiers={}, subpath=None)}, PackageURL(type='turtle', namespace=None, name='green', version='5.0.0', qualifiers={}, subpath=None)) """ - vs = aff_pkg.affected_version_specifier - aff_purls = [] - for rng in vs.ranges: - if rng.operator in ("=", ">=", "<="): - aff_purl = aff_pkg.package._replace(version=rng.version.value) - aff_purls.append(aff_purl) + affected_purls = set() + all_constraints = set(chain.from_iterable(affected_package.affected_version_range.constraints)) + for constraint in all_constraints: + if constraint.comparator in ["=", "<=", ">="]: + affected_purl = affected_package.package._replace(version=str(constraint.version)) + affected_purls.add(affected_purl) + affected_purls = list(affected_purls) - fixed_version = aff_pkg.fixed_version.version_string - fixed_purl = aff_pkg.package._replace(version=fixed_version) + fixed_version = affected_package.fixed_version + fixed_purl = affected_package.package._replace(version=str(fixed_version)) - return aff_purls, fixed_purl + return affected_purls, fixed_purl diff --git a/vulnerabilities/models.py b/vulnerabilities/models.py index 40964accf..ef94f7c22 100644 --- a/vulnerabilities/models.py +++ b/vulnerabilities/models.py @@ -73,18 +73,6 @@ class Vulnerability(models.Model): blank=True, ) - def save(self, *args, **kwargs): - if not self.vulnerability_id: - self.vulnerability_id = self.generate_vulcoid() - return super().save(*args, **kwargs) - - @staticmethod - def generate_vulcoid(timestamp=None): - if not timestamp: - timestamp = datetime.now() - timestamp = timestamp.strftime("%Y%m%d-%H%M-%S%f") - return f"VULCOID-{timestamp}" - @property def vulnerable_to(self): """ @@ -244,13 +232,11 @@ def update_or_create(self): existing.confidence = self.confidence existing.fix = self.fix existing.save() - # TODO: later we want these to be part of a log field in the DB - logger.debug( - "Confidence improved for %s R %s, new confidence: %d", - self.package, - self.vulnerability, - self.confidence, - ) + # TODO: later we want these to be part of a log field in the DB + logger.info( + f"Confidence improved for {self.package} R {self.vulnerability}, " + f"new confidence: {self.confidence}" + ) except self.DoesNotExist: self.__class__.objects.create( @@ -260,6 +246,10 @@ def update_or_create(self): confidence=self.confidence, fix=self.fix, ) + logger.info( + f"New relationship {self.package} R {self.vulnerability}, " + f"fix: {self.fix}, confidence: {self.confidence}" + ) class Importer(models.Model): From 7cd40445fa00997dc2f2bbed750f5cccf1de8a34 Mon Sep 17 00:00:00 2001 From: Hritik Vijay Date: Fri, 14 Jan 2022 18:20:11 +0530 Subject: [PATCH 22/40] Add AdvisoryData.to_inference and AffectedPackage.merge Signed-off-by: Hritik Vijay --- vulnerabilities/data_source.py | 41 ++++++++++++++++++++++++++++++++++ 1 file changed, 41 insertions(+) diff --git a/vulnerabilities/data_source.py b/vulnerabilities/data_source.py index c5be944cd..7d73c25fc 100644 --- a/vulnerabilities/data_source.py +++ b/vulnerabilities/data_source.py @@ -130,6 +130,31 @@ def __post_init__(self): if self.package.version: raise ValueError + @staticmethod + def merge(affected_packages: Iterable): + """ + Return a tuple with all attributes of AffectedPackage as a set + for all values in the given iterable of AffectedPackage + + This is useful where an iterable of AffectedPackage needs to be + converted into one tuple of structure similar to AffectedPackage + but with multiple fixed_versions, ie + package: PackageURL + affected_version_range: VersionRange + fixed_versions: [Version] + """ + affected_version_ranges = set() + fixed_versions = set() + purls = set() + for pkg in affected_packages: + affected_version_ranges.add(pkg.affected_version_range) + fixed_versions.add(pkg.fixed_version) + purls.add(pkg.package) + if len(purls) > 1: + print(affected_packages) + raise TypeError("Cannot merge with different purls", purls) + return purls.pop(), affected_version_ranges, fixed_versions + def to_dict(self): """ Return a serializable dict that can be converted back using self.from_dict @@ -187,6 +212,22 @@ def __post_init__(self): if self.date_published: assert self.date_published.tzinfo + def to_inference(self, confidence, affected_purls, fixed_purl): + """ + Convert to an Inference object while keeping the same values + for vulnerability_id, summary and references + """ + from vulnerabilities.data_inference import Inference + + return Inference( + vulnerability_id=self.vulnerability_id, + confidence=confidence, + summary=self.summary, + affected_purls=affected_purls, + fixed_purl=fixed_purl, + references=self.references, + ) + class InvalidConfigurationError(Exception): pass From c7bc550bfe0edc845eae0bdca53084573e5d551a Mon Sep 17 00:00:00 2001 From: Hritik Vijay Date: Fri, 14 Jan 2022 18:21:52 +0530 Subject: [PATCH 23/40] Add to_reper to DataSource Signed-off-by: Hritik Vijay --- vulnerabilities/data_source.py | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/vulnerabilities/data_source.py b/vulnerabilities/data_source.py index 7d73c25fc..522f415cf 100644 --- a/vulnerabilities/data_source.py +++ b/vulnerabilities/data_source.py @@ -281,11 +281,15 @@ def __exit__(self, exc_type, exc_val, exc_tb): pass def __repr__(self): + return self.to_repr() + + @classmethod + def to_repr(cls): """ Fully qualified name prefixed with the module name of the data source used in logging. """ - return f"{self.__module__}.{self.__class__.__qualname__}" + return f"{cls.__module__}.{cls.__qualname__}" @property def cutoff_timestamp(self) -> int: From f399af65d51c469389a35ca7bf146c2cb8c1c7e1 Mon Sep 17 00:00:00 2001 From: Hritik Vijay Date: Fri, 14 Jan 2022 18:22:45 +0530 Subject: [PATCH 24/40] Apply formatting changes Signed-off-by: Hritik Vijay --- vulnerabilities/import_runner.py | 11 +- vulnerabilities/importers/nginx.py | 8 + vulnerabilities/migrations/0001_initial.py | 344 +++++++++++++++++---- vulnerabilities/models.py | 4 +- vulnerabilities/views.py | 2 +- 5 files changed, 303 insertions(+), 66 deletions(-) diff --git a/vulnerabilities/import_runner.py b/vulnerabilities/import_runner.py index 0bc52e81b..1875e4be5 100644 --- a/vulnerabilities/import_runner.py +++ b/vulnerabilities/import_runner.py @@ -110,8 +110,13 @@ def process_advisories(advisory_datas: Iterable[AdvisoryData], importer_name: st "date_collected": datetime.datetime.now(tz=datetime.timezone.utc), }, ) - if not created: - logger.warn( - f"Advisory with vulnerability_id: {obj.vulnerability_id}," + if created: + logger.info( + f"[*] New Advisory with vulnerability_id: {obj.vulnerability_id}, " + f"created_by: {obj.created_by}" + ) + else: + logger.debug( + f"Advisory with vulnerability_id: {obj.vulnerability_id}, " f"created_by: {obj.created_by} already exists" ) diff --git a/vulnerabilities/importers/nginx.py b/vulnerabilities/importers/nginx.py index b2e19e0a5..68684da7f 100644 --- a/vulnerabilities/importers/nginx.py +++ b/vulnerabilities/importers/nginx.py @@ -23,12 +23,15 @@ import dataclasses import datetime from typing import Iterable +import logging +import asyncio import requests from bs4 import BeautifulSoup from packageurl import PackageURL from univers.version_range import NginxVersionRange from univers.versions import SemverVersion +from django.db.models.query import QuerySet from vulnerabilities.data_source import AdvisoryData from vulnerabilities.data_source import AffectedPackage @@ -36,11 +39,16 @@ from vulnerabilities.data_source import DataSourceConfiguration from vulnerabilities.data_source import Reference from vulnerabilities.data_source import VulnerabilitySeverity +from vulnerabilities.data_inference import Inference +from vulnerabilities.data_inference import Improver from vulnerabilities.helpers import nearest_patched_package +from vulnerabilities.models import Advisory from vulnerabilities.package_managers import GitHubTagsAPI from vulnerabilities.package_managers import Version from vulnerabilities.severity_systems import scoring_systems +logger = logging.getLogger(__name__) + @dataclasses.dataclass class NginxDataSourceConfiguration(DataSourceConfiguration): diff --git a/vulnerabilities/migrations/0001_initial.py b/vulnerabilities/migrations/0001_initial.py index 6551edc64..8aa6b9bf1 100644 --- a/vulnerabilities/migrations/0001_initial.py +++ b/vulnerabilities/migrations/0001_initial.py @@ -9,106 +9,330 @@ class Migration(migrations.Migration): initial = True - dependencies = [ - ] + dependencies = [] operations = [ migrations.CreateModel( - name='Advisory', + name="Advisory", fields=[ - ('id', models.AutoField(auto_created=True, primary_key=True, serialize=False, verbose_name='ID')), - ('vulnerability_id', models.CharField(blank=True, max_length=50, null=True)), - ('summary', models.TextField(blank=True, null=True)), - ('affected_packages', models.TextField()), - ('references', models.TextField()), - ('date_published', models.DateField(blank=True, help_text='UTC Date of publication of the advisory', null=True)), - ('date_collected', models.DateField(help_text='UTC Date on which the advisory was collected')), - ('date_improved', models.DateTimeField(blank=True, help_text='Latest date on which the advisory was improved by an improver', null=True)), - ('created_by', models.CharField(help_text='Fully qualified name of the importer prefixed with the module name importing the advisory. Eg: vulnerabilities.importers.nginx.NginxDataSource', max_length=100)), + ( + "id", + models.AutoField( + auto_created=True, primary_key=True, serialize=False, verbose_name="ID" + ), + ), + ("vulnerability_id", models.CharField(blank=True, max_length=50, null=True)), + ("summary", models.TextField(blank=True, null=True)), + ("affected_packages", models.TextField()), + ("references", models.TextField()), + ( + "date_published", + models.DateField( + blank=True, help_text="UTC Date of publication of the advisory", null=True + ), + ), + ( + "date_collected", + models.DateField(help_text="UTC Date on which the advisory was collected"), + ), + ( + "date_improved", + models.DateTimeField( + blank=True, + help_text="Latest date on which the advisory was improved by an improver", + null=True, + ), + ), + ( + "created_by", + models.CharField( + help_text="Fully qualified name of the importer prefixed with the module name importing the advisory. Eg: vulnerabilities.importers.nginx.NginxDataSource", + max_length=100, + ), + ), ], ), migrations.CreateModel( - name='Importer', + name="Importer", fields=[ - ('id', models.AutoField(auto_created=True, primary_key=True, serialize=False, verbose_name='ID')), - ('name', models.CharField(help_text='Name of the importer', max_length=100, unique=True)), - ('license', models.CharField(blank=True, help_text='License of the vulnerability data', max_length=100)), - ('last_run', models.DateTimeField(help_text='UTC Timestamp of the last run', null=True)), - ('data_source', models.CharField(help_text='Name of the data source implementation importable from vulnerabilities.importers', max_length=100)), - ('data_source_cfg', models.JSONField(default=dict, help_text='Implementation-specific configuration for the data source')), + ( + "id", + models.AutoField( + auto_created=True, primary_key=True, serialize=False, verbose_name="ID" + ), + ), + ( + "name", + models.CharField(help_text="Name of the importer", max_length=100, unique=True), + ), + ( + "license", + models.CharField( + blank=True, help_text="License of the vulnerability data", max_length=100 + ), + ), + ( + "last_run", + models.DateTimeField(help_text="UTC Timestamp of the last run", null=True), + ), + ( + "data_source", + models.CharField( + help_text="Name of the data source implementation importable from vulnerabilities.importers", + max_length=100, + ), + ), + ( + "data_source_cfg", + models.JSONField( + default=dict, + help_text="Implementation-specific configuration for the data source", + ), + ), ], ), migrations.CreateModel( - name='Package', + name="Package", fields=[ - ('id', models.AutoField(auto_created=True, primary_key=True, serialize=False, verbose_name='ID')), - ('type', models.CharField(blank=True, help_text='A short code to identify the type of this package. For example: gem for a Rubygem, docker for a container, pypi for a Python Wheel or Egg, maven for a Maven Jar, deb for a Debian package, etc.', max_length=16)), - ('namespace', models.CharField(blank=True, help_text='Package name prefix, such as Maven groupid, Docker image owner, GitHub user or organization, etc.', max_length=255)), - ('name', models.CharField(blank=True, help_text='Name of the package.', max_length=100)), - ('version', models.CharField(blank=True, help_text='Version of the package.', max_length=100)), - ('subpath', models.CharField(blank=True, help_text='Extra subpath within a package, relative to the package root.', max_length=200)), - ('qualifiers', models.JSONField(blank=True, default=dict, help_text='Extra qualifying data for a package such as the name of an OS, architecture, distro, etc.')), + ( + "id", + models.AutoField( + auto_created=True, primary_key=True, serialize=False, verbose_name="ID" + ), + ), + ( + "type", + models.CharField( + blank=True, + help_text="A short code to identify the type of this package. For example: gem for a Rubygem, docker for a container, pypi for a Python Wheel or Egg, maven for a Maven Jar, deb for a Debian package, etc.", + max_length=16, + ), + ), + ( + "namespace", + models.CharField( + blank=True, + help_text="Package name prefix, such as Maven groupid, Docker image owner, GitHub user or organization, etc.", + max_length=255, + ), + ), + ( + "name", + models.CharField(blank=True, help_text="Name of the package.", max_length=100), + ), + ( + "version", + models.CharField( + blank=True, help_text="Version of the package.", max_length=100 + ), + ), + ( + "subpath", + models.CharField( + blank=True, + help_text="Extra subpath within a package, relative to the package root.", + max_length=200, + ), + ), + ( + "qualifiers", + models.JSONField( + blank=True, + default=dict, + help_text="Extra qualifying data for a package such as the name of an OS, architecture, distro, etc.", + ), + ), ], ), migrations.CreateModel( - name='Vulnerability', + name="Vulnerability", fields=[ - ('id', models.AutoField(auto_created=True, primary_key=True, serialize=False, verbose_name='ID')), - ('vulnerability_id', models.CharField(help_text="Unique identifier for a vulnerability: this is either a published CVE id (as in CVE-2020-7965) if it exists. Otherwise this is a VulnerableCode-assigned VULCOID (as in VULCOID-20210222-1315-16461541). When a vulnerability CVE is assigned later we replace this with the CVE and keep the 'old' VULCOID in the 'old_vulnerability_id' field to support redirection to the CVE id.", max_length=50, unique=True)), - ('old_vulnerability_id', models.CharField(blank=True, help_text='empty if no CVE else VC id', max_length=50, null=True, unique=True)), - ('summary', models.TextField(blank=True, help_text='Summary of the vulnerability')), + ( + "id", + models.AutoField( + auto_created=True, primary_key=True, serialize=False, verbose_name="ID" + ), + ), + ( + "vulnerability_id", + models.CharField( + help_text="Unique identifier for a vulnerability: this is either a published CVE id (as in CVE-2020-7965) if it exists. Otherwise this is a VulnerableCode-assigned VULCOID (as in VULCOID-20210222-1315-16461541). When a vulnerability CVE is assigned later we replace this with the CVE and keep the 'old' VULCOID in the 'old_vulnerability_id' field to support redirection to the CVE id.", + max_length=50, + unique=True, + ), + ), + ( + "old_vulnerability_id", + models.CharField( + blank=True, + help_text="empty if no CVE else VC id", + max_length=50, + null=True, + unique=True, + ), + ), + ("summary", models.TextField(blank=True, help_text="Summary of the vulnerability")), ], options={ - 'verbose_name_plural': 'Vulnerabilities', + "verbose_name_plural": "Vulnerabilities", }, ), migrations.CreateModel( - name='VulnerabilityReference', + name="VulnerabilityReference", fields=[ - ('id', models.AutoField(auto_created=True, primary_key=True, serialize=False, verbose_name='ID')), - ('url', models.URLField(blank=True, help_text='URL to the vulnerability reference', max_length=1024)), - ('reference_id', models.CharField(blank=True, help_text='An optional reference ID, such as DSA-4465-1 when available', max_length=50, null=True)), - ('vulnerability', models.ForeignKey(on_delete=django.db.models.deletion.CASCADE, to='vulnerabilities.vulnerability')), + ( + "id", + models.AutoField( + auto_created=True, primary_key=True, serialize=False, verbose_name="ID" + ), + ), + ( + "url", + models.URLField( + blank=True, help_text="URL to the vulnerability reference", max_length=1024 + ), + ), + ( + "reference_id", + models.CharField( + blank=True, + help_text="An optional reference ID, such as DSA-4465-1 when available", + max_length=50, + null=True, + ), + ), + ( + "vulnerability", + models.ForeignKey( + on_delete=django.db.models.deletion.CASCADE, + to="vulnerabilities.vulnerability", + ), + ), ], options={ - 'unique_together': {('vulnerability', 'url', 'reference_id')}, + "unique_together": {("vulnerability", "url", "reference_id")}, }, ), migrations.CreateModel( - name='PackageRelatedVulnerability', + name="PackageRelatedVulnerability", fields=[ - ('id', models.AutoField(auto_created=True, primary_key=True, serialize=False, verbose_name='ID')), - ('created_by', models.CharField(blank=True, help_text='Fully qualified name of the improver prefixed with the module name responsible for creating this relation. Eg: vulnerabilities.importers.nginx.NginxTimeTravel', max_length=100)), - ('confidence', models.PositiveIntegerField(default=100, help_text='Confidence score for this relation', validators=[django.core.validators.MinValueValidator(0), django.core.validators.MaxValueValidator(100)])), - ('fix', models.BooleanField(default=False, help_text='Does this relation fix the specified vulnerability ?')), - ('package', models.ForeignKey(on_delete=django.db.models.deletion.CASCADE, related_name='package', to='vulnerabilities.package')), - ('vulnerability', models.ForeignKey(on_delete=django.db.models.deletion.CASCADE, to='vulnerabilities.vulnerability')), + ( + "id", + models.AutoField( + auto_created=True, primary_key=True, serialize=False, verbose_name="ID" + ), + ), + ( + "created_by", + models.CharField( + blank=True, + help_text="Fully qualified name of the improver prefixed with the module name responsible for creating this relation. Eg: vulnerabilities.importers.nginx.NginxTimeTravel", + max_length=100, + ), + ), + ( + "confidence", + models.PositiveIntegerField( + default=100, + help_text="Confidence score for this relation", + validators=[ + django.core.validators.MinValueValidator(0), + django.core.validators.MaxValueValidator(100), + ], + ), + ), + ( + "fix", + models.BooleanField( + default=False, + help_text="Does this relation fix the specified vulnerability ?", + ), + ), + ( + "package", + models.ForeignKey( + on_delete=django.db.models.deletion.CASCADE, + related_name="package", + to="vulnerabilities.package", + ), + ), + ( + "vulnerability", + models.ForeignKey( + on_delete=django.db.models.deletion.CASCADE, + to="vulnerabilities.vulnerability", + ), + ), ], options={ - 'verbose_name_plural': 'PackageRelatedVulnerabilities', - 'unique_together': {('package', 'vulnerability')}, + "verbose_name_plural": "PackageRelatedVulnerabilities", + "unique_together": {("package", "vulnerability")}, }, ), migrations.AddField( - model_name='package', - name='vulnerabilities', - field=models.ManyToManyField(related_name='packages', through='vulnerabilities.PackageRelatedVulnerability', to='vulnerabilities.Vulnerability'), + model_name="package", + name="vulnerabilities", + field=models.ManyToManyField( + related_name="packages", + through="vulnerabilities.PackageRelatedVulnerability", + to="vulnerabilities.Vulnerability", + ), ), migrations.CreateModel( - name='VulnerabilitySeverity', + name="VulnerabilitySeverity", fields=[ - ('id', models.AutoField(auto_created=True, primary_key=True, serialize=False, verbose_name='ID')), - ('value', models.CharField(help_text='Example: 9.0, Important, High', max_length=50)), - ('scoring_system', models.CharField(choices=[('cvssv2', 'CVSSv2 Base Score'), ('cvssv2_vector', 'CVSSv2 Vector'), ('cvssv3', 'CVSSv3 Base Score'), ('cvssv3_vector', 'CVSSv3 Vector'), ('cvssv3.1', 'CVSSv3.1 Base Score'), ('cvssv3.1_vector', 'CVSSv3.1 Vector'), ('rhbs', 'RedHat Bugzilla severity'), ('rhas', 'RedHat Aggregate severity'), ('avgs', 'Archlinux Vulnerability Group Severity'), ('cvssv3.1_qr', 'CVSSv3.1 Qualitative Severity Rating'), ('generic_textual', 'Generic textual severity rating'), ('apache_httpd', 'Apache Httpd Severity')], help_text='identifier for the scoring system used. Available choices are: cvssv2 is vulnerability_id for CVSSv2 Base Score system, cvssv2_vector is vulnerability_id for CVSSv2 Vector system, cvssv3 is vulnerability_id for CVSSv3 Base Score system, cvssv3_vector is vulnerability_id for CVSSv3 Vector system, cvssv3.1 is vulnerability_id for CVSSv3.1 Base Score system, cvssv3.1_vector is vulnerability_id for CVSSv3.1 Vector system, rhbs is vulnerability_id for RedHat Bugzilla severity system, rhas is vulnerability_id for RedHat Aggregate severity system, avgs is vulnerability_id for Archlinux Vulnerability Group Severity system, cvssv3.1_qr is vulnerability_id for CVSSv3.1 Qualitative Severity Rating system, generic_textual is vulnerability_id for Generic textual severity rating system, apache_httpd is vulnerability_id for Apache Httpd Severity system ', max_length=50)), - ('reference', models.ForeignKey(on_delete=django.db.models.deletion.CASCADE, to='vulnerabilities.vulnerabilityreference')), - ('vulnerability', models.ForeignKey(on_delete=django.db.models.deletion.CASCADE, to='vulnerabilities.vulnerability')), + ( + "id", + models.AutoField( + auto_created=True, primary_key=True, serialize=False, verbose_name="ID" + ), + ), + ( + "value", + models.CharField(help_text="Example: 9.0, Important, High", max_length=50), + ), + ( + "scoring_system", + models.CharField( + choices=[ + ("cvssv2", "CVSSv2 Base Score"), + ("cvssv2_vector", "CVSSv2 Vector"), + ("cvssv3", "CVSSv3 Base Score"), + ("cvssv3_vector", "CVSSv3 Vector"), + ("cvssv3.1", "CVSSv3.1 Base Score"), + ("cvssv3.1_vector", "CVSSv3.1 Vector"), + ("rhbs", "RedHat Bugzilla severity"), + ("rhas", "RedHat Aggregate severity"), + ("avgs", "Archlinux Vulnerability Group Severity"), + ("cvssv3.1_qr", "CVSSv3.1 Qualitative Severity Rating"), + ("generic_textual", "Generic textual severity rating"), + ("apache_httpd", "Apache Httpd Severity"), + ], + help_text="identifier for the scoring system used. Available choices are: cvssv2 is vulnerability_id for CVSSv2 Base Score system, cvssv2_vector is vulnerability_id for CVSSv2 Vector system, cvssv3 is vulnerability_id for CVSSv3 Base Score system, cvssv3_vector is vulnerability_id for CVSSv3 Vector system, cvssv3.1 is vulnerability_id for CVSSv3.1 Base Score system, cvssv3.1_vector is vulnerability_id for CVSSv3.1 Vector system, rhbs is vulnerability_id for RedHat Bugzilla severity system, rhas is vulnerability_id for RedHat Aggregate severity system, avgs is vulnerability_id for Archlinux Vulnerability Group Severity system, cvssv3.1_qr is vulnerability_id for CVSSv3.1 Qualitative Severity Rating system, generic_textual is vulnerability_id for Generic textual severity rating system, apache_httpd is vulnerability_id for Apache Httpd Severity system ", + max_length=50, + ), + ), + ( + "reference", + models.ForeignKey( + on_delete=django.db.models.deletion.CASCADE, + to="vulnerabilities.vulnerabilityreference", + ), + ), + ( + "vulnerability", + models.ForeignKey( + on_delete=django.db.models.deletion.CASCADE, + to="vulnerabilities.vulnerability", + ), + ), ], options={ - 'unique_together': {('vulnerability', 'reference', 'scoring_system')}, + "unique_together": {("vulnerability", "reference", "scoring_system")}, }, ), migrations.AlterUniqueTogether( - name='package', - unique_together={('name', 'namespace', 'type', 'version', 'qualifiers', 'subpath')}, + name="package", + unique_together={("name", "namespace", "type", "version", "qualifiers", "subpath")}, ), ] diff --git a/vulnerabilities/models.py b/vulnerabilities/models.py index ef94f7c22..5b914a40a 100644 --- a/vulnerabilities/models.py +++ b/vulnerabilities/models.py @@ -353,8 +353,8 @@ class Advisory(models.Model): created_by = models.CharField( max_length=100, help_text="Fully qualified name of the importer prefixed with the" - "module name importing the advisory. Eg:" - "vulnerabilities.importers.nginx.NginxDataSource", + "module name importing the advisory. Eg:" + "vulnerabilities.importers.nginx.NginxDataSource", ) def to_advisory_data(self) -> AdvisoryData: diff --git a/vulnerabilities/views.py b/vulnerabilities/views.py index cc5dc5e33..5d5ea686e 100644 --- a/vulnerabilities/views.py +++ b/vulnerabilities/views.py @@ -79,7 +79,7 @@ def request_to_queryset(request): "vulnerabilities", filter=Q(vulnerabilities__packagerelatedvulnerability__fix=False), ), - #TODO: consider renaming to fixed in the future + # TODO: consider renaming to fixed in the future patched_vulnerability_count=Count( "vulnerabilities", filter=Q(vulnerabilities__packagerelatedvulnerability__fix=True), From 73f868291df394b0f18f9bfeb77ce3c908c2d55f Mon Sep 17 00:00:00 2001 From: Hritik Vijay Date: Fri, 14 Jan 2022 18:25:30 +0530 Subject: [PATCH 25/40] Adopt new vers spec VersionRange is always a flat list of constraints Signed-off-by: Hritik Vijay --- vulnerabilities/improvers/default.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/vulnerabilities/improvers/default.py b/vulnerabilities/improvers/default.py index c9fda7ce9..8e2568c7e 100644 --- a/vulnerabilities/improvers/default.py +++ b/vulnerabilities/improvers/default.py @@ -56,7 +56,7 @@ def get_exact_purls(affected_package: AffectedPackage) -> (List[PackageURL], Pac ({PackageURL(type='turtle', namespace=None, name='green', version='2.0.0', qualifiers={}, subpath=None)}, PackageURL(type='turtle', namespace=None, name='green', version='5.0.0', qualifiers={}, subpath=None)) """ affected_purls = set() - all_constraints = set(chain.from_iterable(affected_package.affected_version_range.constraints)) + all_constraints = affected_package.affected_version_range.constraints for constraint in all_constraints: if constraint.comparator in ["=", "<=", ">="]: affected_purl = affected_package.package._replace(version=str(constraint.version)) From d89e3d3c9dff590402c6791b10a13a6c8f157f33 Mon Sep 17 00:00:00 2001 From: Hritik Vijay Date: Fri, 14 Jan 2022 18:26:22 +0530 Subject: [PATCH 26/40] Implement NginxBasicImprover Signed-off-by: Hritik Vijay --- vulnerabilities/importers/nginx.py | 92 ++++++++++++++++++++++++--- vulnerabilities/improvers/__init__.py | 3 +- 2 files changed, 86 insertions(+), 9 deletions(-) diff --git a/vulnerabilities/importers/nginx.py b/vulnerabilities/importers/nginx.py index 68684da7f..917b5bd29 100644 --- a/vulnerabilities/importers/nginx.py +++ b/vulnerabilities/importers/nginx.py @@ -99,15 +99,15 @@ def to_advisory_data( # TODO: Mail nginx for this anomaly if "none" in fixed_version: - # FIXME: This breaks because https://github.com/nexB/univers/issues/10 + # FIXME: Fix after https://github.com/nexB/univers/pull/31 + break + affected_packages.append( + AffectedPackage( + package=PackageURL(type="generic", name="nginx", qualifiers=qualifiers), + affected_version_range=affected_version_range, + ) + ) break - # affected_packages.append( - # AffectedPackage( - # package=PackageURL(type="generic", name="nginx", qualifiers=qualifiers), - # affected_version_range=affected_version_range, - # ) - # ) - # break fixed_version = SemverVersion(fixed_version) purl = PackageURL(type="generic", name="nginx", qualifiers=qualifiers) @@ -195,3 +195,79 @@ def parse_advisory_data_from_paragraph(vuln_info): "vulnerable": vulnerable, "references": references, } + + +class NginxBasicImprover(Improver): + def __init__(self): + self.set_api() + + @property + def interesting_advisories(self) -> QuerySet: + return Advisory.objects.filter(created_by=NginxDataSource.to_repr()) + + def get_inferences(self, advisory_data: AdvisoryData) -> Iterable[Inference]: + """ + Generate and return Inferences for the given advisory data + """ + try: + purl, affected_version_ranges, fixed_versions = AffectedPackage.merge( + advisory_data.affected_packages + ) + except KeyError: + return iter([]) + all_versions = self.version_api.get("nginx/nginx").valid_versions + affected_purls = [] + for affected_version_range in affected_version_ranges: + for version in all_versions: + version = SemverVersion(version) + if is_vulnerable( + version=version, + affected_version_range=affected_version_range, + fixed_versions=fixed_versions, + ): + affected_purls.append(purl._replace(version=version)) + + for fixed_version in fixed_versions: + # TODO: This also yields with a lower fixed version, maybe we should + # only yield fixes that are upgrades ? + fixed_purl = purl._replace(version=fixed_version) + yield advisory_data.to_inference( + confidence=90, # TODO: Decide properly + affected_purls=affected_purls, + fixed_purl=fixed_purl, + ) + + def set_api(self): + self.version_api = GitHubTagsAPI() + asyncio.run(self.version_api.load_api(["nginx/nginx"])) + + # Nginx tags it's releases are in the form of `release-1.2.3` + # Chop off the `release-` part here. + normalized_versions = set() + while self.version_api.cache["nginx/nginx"]: + version = self.version_api.cache["nginx/nginx"].pop() + normalized_version = Version( + version.value.replace("release-", ""), version.release_date + ) + normalized_versions.add(normalized_version) + self.version_api.cache["nginx/nginx"] = normalized_versions + + +def is_vulnerable(version, affected_version_range, fixed_versions): + # Check if the version is in "Vulnerable" range. If it's not, the + # version is not vulnerable. + # + # If it is, check if the branch is explicitly listed in the "Not + # vulnerable". If it's not, the version is vulnerable. If it + # is, check the minor number: if it's greater or equal to the + # version listed as not vulnerable, the version is not vulnerable, + # else the version is vulnerable. + # + # See: https://marc.info/?l=nginx&m=164070162912710&w=2 + + if version in NginxVersionRange.from_string(affected_version_range.to_string()): + for fixed_version in fixed_versions: + if version.value.minor == fixed_version.value.minor and version >= fixed_version: + return False + return True + return False diff --git a/vulnerabilities/improvers/__init__.py b/vulnerabilities/improvers/__init__.py index a2ad6e35e..951e6da38 100644 --- a/vulnerabilities/improvers/__init__.py +++ b/vulnerabilities/improvers/__init__.py @@ -1,5 +1,6 @@ from . import default +from .. import importers -IMPROVER_REGISTRY = [default.DefaultImprover] +IMPROVER_REGISTRY = [default.DefaultImprover, importers.nginx.NginxBasicImprover] improver_mapping = {f"{x.__module__}.{x.__name__}": x for x in IMPROVER_REGISTRY} From 4988452fab7b13a41d31027d3585ef8ed25717b9 Mon Sep 17 00:00:00 2001 From: Hritik Vijay Date: Sat, 15 Jan 2022 18:23:25 +0530 Subject: [PATCH 27/40] Introduce aliases An alias is a unique vulnerability identifier in some database, such as the NVD, PYSEC, CVE or similar. These databases guarantee that these identifiers are unique within their namespace. An alias may also be used as a Reference. But in contrast with some Reference may not be an identifier for a single vulnerability, for instance, security advisories such as Debian security advisory reference various vulnerabilities. Signed-off-by: Hritik Vijay --- vulnerabilities/data_inference.py | 40 ++++--- vulnerabilities/data_source.py | 38 ++----- vulnerabilities/import_runner.py | 14 +-- vulnerabilities/importers/nginx.py | 87 +++++++-------- vulnerabilities/improve_runner.py | 114 +++++++++++++++---- vulnerabilities/improvers/default.py | 2 +- vulnerabilities/models.py | 160 ++++++++++++++++++--------- vulnerabilities/severity_systems.py | 2 +- 8 files changed, 276 insertions(+), 181 deletions(-) diff --git a/vulnerabilities/data_inference.py b/vulnerabilities/data_inference.py index 3fee3343b..6a83fda6f 100644 --- a/vulnerabilities/data_inference.py +++ b/vulnerabilities/data_inference.py @@ -20,17 +20,12 @@ class Inference: """ This data class expresses the contract between data improvers and the improve runner. - If a vulnerability_id is present then: - summary or affected_purls or fixed_purl or references must be present - otherwise - either affected_purls or fixed_purl or references should be present and - a VULCOID will be assigned as the vulnerability_id - Only inferences with highest confidence for one vulnerability <-> package relationship is to be inserted into the database """ - vulnerability_id: str + vulnerability_id: str = None + aliases: List[str] = dataclasses.field(default_factory=list) confidence: int = MAX_CONFIDENCE summary: Optional[str] = None affected_purls: List[PackageURL] = dataclasses.field(default_factory=list) @@ -41,12 +36,14 @@ def __post_init__(self): if self.confidence > MAX_CONFIDENCE or self.confidence < 0: raise ValueError - if self.vulnerability_id: - assert self.summary or self.affected_purls or self.fixed_purl or self.references - else: - # TODO: Maybe only having summary - assert self.affected_purls or self.fixed_purl or self.references - self.vulnerability_id = self.generate_vulcoid() + assert ( + self.vulnerability_id + or self.aliases + or self.summary + or self.affected_purls + or self.fixed_purl + or self.references + ) versionless_purls = [] for purl in self.affected_purls + [self.fixed_purl]: @@ -57,9 +54,20 @@ def __post_init__(self): not versionless_purls ), f"Version-less purls are not supported in an Inference: {versionless_purls}" - @staticmethod - def generate_vulcoid(): - return f"VULCOID-{uuid4()}" + @classmethod + def from_advisory_data(cls, advisory_data, confidence, affected_purls, fixed_purl): + """ + Return an Inference object while keeping the same values as of advisory_data + for vulnerability_id, summary and references + """ + return cls( + aliases=advisory_data.aliases, + confidence=confidence, + summary=advisory_data.summary, + affected_purls=affected_purls, + fixed_purl=fixed_purl, + references=advisory_data.references, + ) class Improver: diff --git a/vulnerabilities/data_source.py b/vulnerabilities/data_source.py index 522f415cf..625a80df5 100644 --- a/vulnerabilities/data_source.py +++ b/vulnerabilities/data_source.py @@ -47,7 +47,7 @@ from vulnerabilities.helpers import nearest_patched_package from vulnerabilities.oval_parser import OvalParser from vulnerabilities.severity_systems import ScoringSystem -from vulnerabilities.severity_systems import scoring_systems +from vulnerabilities.severity_systems import SCORING_SYSTEMS logger = logging.getLogger(__name__) @@ -72,7 +72,7 @@ def from_dict(severity: dict): Return a VulnerabilitySeverity object from dict generated by self.to_dict """ return VulnerabilitySeverity( - system=scoring_systems[severity["system"]], value=severity["value"] + system=SCORING_SYSTEMS[severity["system"]], value=severity["value"] ) @@ -140,15 +140,16 @@ def merge(affected_packages: Iterable): converted into one tuple of structure similar to AffectedPackage but with multiple fixed_versions, ie package: PackageURL - affected_version_range: VersionRange - fixed_versions: [Version] + affected_version_range: set(VersionRange) + fixed_versions: set(Version) """ affected_version_ranges = set() fixed_versions = set() purls = set() for pkg in affected_packages: affected_version_ranges.add(pkg.affected_version_range) - fixed_versions.add(pkg.fixed_version) + if pkg.fixed_version: + fixed_versions.add(pkg.fixed_version) purls.add(pkg.package) if len(purls) > 1: print(affected_packages) @@ -197,36 +198,15 @@ class AdvisoryData: date_published must be aware datetime """ - vulnerability_id: Optional[str] = None + aliases: List[str] = dataclasses.field(default_factory=list) summary: str = None affected_packages: List[AffectedPackage] = dataclasses.field(default_factory=list) references: List[Reference] = dataclasses.field(default_factory=list) date_published: Optional[datetime.datetime] = None def __post_init__(self): - if self.vulnerability_id: - assert self.summary or self.affected_packages or self.references - else: - assert self.affected_packages or self.references - - if self.date_published: - assert self.date_published.tzinfo - - def to_inference(self, confidence, affected_purls, fixed_purl): - """ - Convert to an Inference object while keeping the same values - for vulnerability_id, summary and references - """ - from vulnerabilities.data_inference import Inference - - return Inference( - vulnerability_id=self.vulnerability_id, - confidence=confidence, - summary=self.summary, - affected_purls=affected_purls, - fixed_purl=fixed_purl, - references=self.references, - ) + if self.date_published and not self.date_published.tzinfo: + logger.warn(f"AdvisoryData with no tzinfo: {self!r}") class InvalidConfigurationError(Exception): diff --git a/vulnerabilities/import_runner.py b/vulnerabilities/import_runner.py index 1875e4be5..1276d6f1e 100644 --- a/vulnerabilities/import_runner.py +++ b/vulnerabilities/import_runner.py @@ -100,23 +100,19 @@ def process_advisories(advisory_datas: Iterable[AdvisoryData], importer_name: st for data in advisory_datas: obj, created = Advisory.objects.get_or_create( - vulnerability_id=data.vulnerability_id, + aliases=data.aliases, summary=data.summary, affected_packages=[pkg.to_dict() for pkg in data.affected_packages], references=[ref.to_dict() for ref in data.references], - created_by=importer_name, + date_published=data.date_published, defaults={ - "date_published": data.date_published, + "created_by": importer_name, "date_collected": datetime.datetime.now(tz=datetime.timezone.utc), }, ) if created: logger.info( - f"[*] New Advisory with vulnerability_id: {obj.vulnerability_id}, " - f"created_by: {obj.created_by}" + f"[*] New Advisory with aliases: {obj.aliases!r}, created_by: {obj.created_by}" ) else: - logger.debug( - f"Advisory with vulnerability_id: {obj.vulnerability_id}, " - f"created_by: {obj.created_by} already exists" - ) + logger.debug(f"Advisory with aliases: {obj.aliases!r} already exists. Skipped.") diff --git a/vulnerabilities/importers/nginx.py b/vulnerabilities/importers/nginx.py index 917b5bd29..af1ff24a6 100644 --- a/vulnerabilities/importers/nginx.py +++ b/vulnerabilities/importers/nginx.py @@ -45,7 +45,7 @@ from vulnerabilities.models import Advisory from vulnerabilities.package_managers import GitHubTagsAPI from vulnerabilities.package_managers import Version -from vulnerabilities.severity_systems import scoring_systems +from vulnerabilities.severity_systems import SCORING_SYSTEMS logger = logging.getLogger(__name__) @@ -69,7 +69,7 @@ def advisory_data(self) -> Iterable[AdvisoryData]: def to_advisory_data( - cve, summary, advisory_severity, not_vulnerable, vulnerable, references + aliases, summary, advisory_severity, not_vulnerable, vulnerable, references ) -> AdvisoryData: """ Return AdvisoryData formed by given parameters @@ -97,10 +97,8 @@ def to_advisory_data( for fixed_version in fixed_versions.split(","): fixed_version = fixed_version.rstrip("+") - # TODO: Mail nginx for this anomaly + # TODO: Mail nginx for this anomaly (create ticket on our side) if "none" in fixed_version: - # FIXME: Fix after https://github.com/nexB/univers/pull/31 - break affected_packages.append( AffectedPackage( package=PackageURL(type="generic", name="nginx", qualifiers=qualifiers), @@ -120,18 +118,17 @@ def to_advisory_data( ) return AdvisoryData( - vulnerability_id=cve, + aliases=aliases, summary=summary, affected_packages=affected_packages, references=references, - date_published=datetime.datetime.now(tz=datetime.timezone.utc), ) def parse_advisory_data_from_paragraph(vuln_info): """ - Return (summary, advisory_severity, not_vulnerable, vulnerable, references) - from bs4 paragraph + Return a dict with keys (aliases, summary, advisory_severity, + not_vulnerable, vulnerable, references) from bs4 paragraph For example: >>> paragraph = '

1-byte memory overwrite in resolver
Severity: medium
Advisory
CVE-2021-23017
Not vulnerable: 1.21.0+, 1.20.1+
Vulnerable: 0.6.18-1.20.0
The patch  pgp

' @@ -139,56 +136,50 @@ def parse_advisory_data_from_paragraph(vuln_info): >>> parse_advisory_data_from_paragraph(vuln_info) ('CVE-2021-23017', '1-byte memory overwrite in resolver', 'Severity: medium', 'Not vulnerable: 1.21.0+, 1.20.1+', 'Vulnerable: 0.6.18-1.20.0', [Reference(reference_id='', url='http://mailman.nginx.org/pipermail/nginx-announce/2021/000300.html', severities=[]), Reference(reference_id='CVE-2021-23017', url='http://cve.mitre.org/cgi-bin/cvename.cgi?name=CVE-2021-23017', severities=[VulnerabilitySeverity(system=ScoringSystem(identifier='generic_textual', name='Generic textual severity rating', url='', notes='Severity for unknown scoring systems. Contains generic textual values like High, Low etc'), value='Severity: medium')]), Reference(reference_id='', url='https://nginx.org/download/patch.2021.resolver.txt', severities=[]), Reference(reference_id='', url='https://nginx.org/download/patch.2021.resolver.txt.asc', severities=[])]) """ - cve = summary = advisory_severity = not_vulnerable = vulnerable = None + aliases = [] + summary = advisory_severity = not_vulnerable = vulnerable = None references = [] - for index, child in enumerate(vuln_info.children): - if index == 0: + is_first = True + for child in vuln_info.children: + if is_first: summary = child - continue + is_first = False + + elif child.text.startswith( + ( + "CVE-", + "CORE-", + "VU#", + ) + ): + aliases.append(child.text) + + elif "severity" in child.text.lower(): + advisory_severity = child.text + + elif "not vulnerable" in child.text.lower(): + not_vulnerable = child.text - if "Severity" in child: - advisory_severity = child - continue + elif "vulnerable" in child.text.lower(): + vulnerable = child.text - # hasattr(child, "attrs") == False for bs4.element.NavigableString - if hasattr(child, "attrs") and child.attrs.get("href"): + elif hasattr(child, "attrs") and child.attrs.get("href"): link = child.attrs["href"] # Take care of relative urls link = requests.compat.urljoin("https://nginx.org", link) if "cve.mitre.org" in link: - cve = child.text - references.append( - Reference( - reference_id=cve, - url=link, - ) - ) + cve = child.text.strip() + reference = Reference(reference_id=cve, url=link) + references.append(reference) elif "http://mailman.nginx.org" in link: - references.append( - Reference( - url=link, - severities=[ - VulnerabilitySeverity( - system=scoring_systems["generic_textual"], - value=advisory_severity, - ) - ], - ) - ) + ss = SCORING_SYSTEMS["generic_textual"] + severity = VulnerabilitySeverity(system=ss, value=advisory_severity) + references.append(Reference(url=link, severities=[severity])) else: references.append(Reference(url=link)) - continue - - if "Not vulnerable" in child: - not_vulnerable = child - continue - - if "Vulnerable" in child: - vulnerable = child - continue return { - "cve": cve, + "aliases": aliases, "summary": summary, "advisory_severity": advisory_severity, "not_vulnerable": not_vulnerable, @@ -231,7 +222,8 @@ def get_inferences(self, advisory_data: AdvisoryData) -> Iterable[Inference]: # TODO: This also yields with a lower fixed version, maybe we should # only yield fixes that are upgrades ? fixed_purl = purl._replace(version=fixed_version) - yield advisory_data.to_inference( + yield Inference.from_advisory_data( + advisory_data, confidence=90, # TODO: Decide properly affected_purls=affected_purls, fixed_purl=fixed_purl, @@ -264,7 +256,6 @@ def is_vulnerable(version, affected_version_range, fixed_versions): # else the version is vulnerable. # # See: https://marc.info/?l=nginx&m=164070162912710&w=2 - if version in NginxVersionRange.from_string(affected_version_range.to_string()): for fixed_version in fixed_versions: if version.value.minor == fixed_version.value.minor and version >= fixed_version: diff --git a/vulnerabilities/improve_runner.py b/vulnerabilities/improve_runner.py index ba64bfe65..235bfdc24 100644 --- a/vulnerabilities/improve_runner.py +++ b/vulnerabilities/improve_runner.py @@ -54,21 +54,26 @@ def process_inferences(inferences: List[Inference], advisory: Advisory, improver logger.info(f"Improving advisory id: {advisory.id}") for inference in inferences: - vuln, vuln_created = _get_or_create_vulnerability( - inference.vulnerability_id, inference.summary + vuln = get_or_create_vulnerability_and_aliases( + inference.vulnerability_id, inference.aliases, inference.summary ) - for vuln_ref in inference.references: + if not vuln: + continue + + for ref in inference.references: ref, _ = models.VulnerabilityReference.objects.get_or_create( - vulnerability=vuln, reference_id=vuln_ref.reference_id, url=vuln_ref.url + vulnerability=vuln, reference_id=ref.reference_id, url=ref.url ) - for score in vuln_ref.severities: - models.VulnerabilitySeverity.objects.update_or_create( + for severity in ref.severities: + obj, updated = models.VulnerabilitySeverity.objects.update_or_create( vulnerability=vuln, - scoring_system=score.system.identifier, + scoring_system=severity.system.identifier, reference=ref, - defaults={"value": str(score.value)}, + defaults={"value": str(severity.value)}, ) + if updated: + logger.info("Severity updated for reference {ref!r} to {severity.value!r}") for pkg in inference.affected_purls: vulnerable_package, _ = _get_or_create_package(pkg) @@ -93,21 +98,6 @@ def process_inferences(inferences: List[Inference], advisory: Advisory, improver advisory.save() -# TODO: This likely may be best as a model or manager method. -def _get_or_create_vulnerability(vulnerability_id, summary) -> Tuple[models.Vulnerability, bool]: - - vuln, created = models.Vulnerability.objects.get_or_create( - vulnerability_id=vulnerability_id - ) # nopep8 - # Eventually we only want to keep summary from NVD and ignore other descriptions. - # FIXME: it is really weird to update in a get or create function - if summary and vuln.summary != summary: - vuln.summary = summary - vuln.save() - - return vuln, created - - def _get_or_create_package(p: PackageURL) -> Tuple[models.Package, bool]: query_kwargs = {} # TODO: this should be revisited as this should best be a model or manager method... and possibly streamlined @@ -128,3 +118,81 @@ def _package_url_to_package(purl: PackageURL) -> models.Package: p = models.Package() p.set_package_url(purl) return p + + +def get_or_create_vulnerability_and_aliases(vulnerability_id, aliases, summary): + """ + Get or create vulnerabilitiy and aliases such that all existing and new + aliases point to the same vulnerability + """ + existing_vulns = set() + aliases = set(aliases) + existing_aliases = set() + new_aliases = set() + for alias in aliases: + alias, created = models.Alias.objects.get_or_create(alias=alias) + if created: + new_aliases.add(alias) + else: + existing_aliases.add(alias) + if alias.vulnerability: + existing_vulns.add(alias.vulnerability) + + # If given set of aliases point to different vulnerabilities in the + # database, request is malformed + # TODO: It is possible that all those vulnerabilities are actually + # the same at data level, figure out a way to merge them + if len(existing_vulns) > 1: + logger.warn( + f"Given aliases {existing_aliases} already exist and do not point " + "to a single vulnerability. Cannot improve. Skipped." + ) + return None + + existing_alias_vuln = existing_vulns.pop() if existing_vulns else None + + # If we have been supplied with a vulnerability_id and existing aliases do + # not have vulnerability_id then create one for all aliases, otherwise use + # the vulnerability_id from the existing aliases + if vulnerability_id: + if not existing_alias_vuln: + vuln = models.Vulnerability(summary=summary) + vuln.save() + for alias in existing_aliases | new_aliases: + alias.vulnerability = vuln + alias.save() + return vuln + + if existing_alias_vuln.vulnerability_id == vulnerability_id: + # TODO: What to do with the new summary ? + for alias in new_aliases: + alias.vulnerability = existing_alias_vuln + alias.save() + return existing_alias_vuln + + logger.warn( + f"Given aliases {existing_aliases!r} already exist and point to existing" + "vulnerability {existing_alias_vuln}. Unable to create Vulnerability " + "with vulnerability_id {vulnerability_id}. Skipped" + ) + return None + + # No vulnerability_id is present, infer one from aliases + + # If all existing aliases point to one vulnerability then point new aliases + # to that vulnerbility and vulnerability is found + # TODO: What to do with the new summary ? + if existing_alias_vuln: + for alias in new_aliases: + alias.vulnerability = existing_alias_vuln + alias.save() + return existing_alias_vuln + + # No vulnerability already exists for given aliases, create one and + # point all aliases to this vulnerability + vuln = models.Vulnerability(summary=summary) + vuln.save() + for alias in existing_aliases | new_aliases: + alias.vulnerability = vuln + alias.save() + return vuln diff --git a/vulnerabilities/improvers/default.py b/vulnerabilities/improvers/default.py index 8e2568c7e..db23c7df6 100644 --- a/vulnerabilities/improvers/default.py +++ b/vulnerabilities/improvers/default.py @@ -29,7 +29,7 @@ def get_inferences(self, advisory_data: AdvisoryData) -> Iterable[Inference]: for affected_package in advisory_data.affected_packages: affected_purls, fixed_purl = get_exact_purls(affected_package) yield Inference( - vulnerability_id=advisory_data.vulnerability_id, + aliases=advisory_data.aliases, confidence=MAX_CONFIDENCE, summary=advisory_data.summary, affected_purls=affected_purls, diff --git a/vulnerabilities/models.py b/vulnerabilities/models.py index 5b914a40a..f19dececa 100644 --- a/vulnerabilities/models.py +++ b/vulnerabilities/models.py @@ -27,6 +27,7 @@ from typing import Optional from typing import List import logging +import uuid from django.db import models from django.core.exceptions import ValidationError @@ -40,7 +41,7 @@ from vulnerabilities.data_source import AdvisoryData from vulnerabilities.data_source import AffectedPackage from vulnerabilities.data_source import Reference -from vulnerabilities.severity_systems import scoring_systems +from vulnerabilities.severity_systems import SCORING_SYSTEMS from vulnerabilities.data_inference import MAX_CONFIDENCE logger = logging.getLogger(__name__) @@ -52,27 +53,23 @@ class Vulnerability(models.Model): VulnerabilityReference. """ - vulnerability_id = models.CharField( - max_length=50, - help_text="Unique identifier for a vulnerability: this is either a published CVE id" - " (as in CVE-2020-7965) if it exists. Otherwise this is a VulnerableCode-assigned VULCOID" - " (as in VULCOID-20210222-1315-16461541). When a vulnerability CVE is assigned later we" - " replace this with the CVE and keep the 'old' VULCOID in the 'old_vulnerability_id'" - " field to support redirection to the CVE id.", - unique=True, - ) - old_vulnerability_id = models.CharField( - max_length=50, - help_text="empty if no CVE else VC id", + vulnerability_id = models.UUIDField( + default=uuid.uuid4, + editable=False, unique=True, - null=True, - blank=True, + help_text="Unique identifier for a vulnerability in this database, assigned automatically. " + "In the external representation it is prefixed with VULCOID-", ) + summary = models.TextField( help_text="Summary of the vulnerability", blank=True, ) + @property + def vulcoid(self): + return f"VULCOID-{self.vulnerability_id}" + @property def vulnerable_to(self): """ @@ -83,13 +80,13 @@ def vulnerable_to(self): @property def resolved_to(self): """ - Returns packages, which first received patch against this vulnerability + Returns packages that first received patch against this vulnerability in their particular version history. """ return self.packages.filter(vulnerabilities__packagerelatedvulnerability__fix=True) def __str__(self): - return self.vulnerability_id or self.summary + return self.vulcoid class Meta: verbose_name_plural = "Vulnerabilities" @@ -113,24 +110,22 @@ class VulnerabilityReference(models.Model): ) @property - def scores(self): + def severities(self): return VulnerabilitySeverity.objects.filter(reference=self.id) class Meta: unique_together = ("vulnerability", "url", "reference_id") def __str__(self): - return f"{self.url} {self.reference_id}" + reference_id = " {self.reference_id}" if self.reference_id else "" + return f"{self.url}{reference_id}" class Package(PackageURLMixin): """ - A software package with links to relevant vulnerabilities. + A software package with related vulnerabilities. """ - # TODO: Cannot resolve keyword 'resolved_vulnerabilities' into field - # make vulnerabilities and resolved_vulnerabilities use the `fix` flag of PackageRelatedVulnerability - vulnerabilities = models.ManyToManyField( to="Vulnerability", through="PackageRelatedVulnerability", @@ -138,6 +133,30 @@ class Package(PackageURLMixin): related_name="packages", ) + # Remove the `qualifers` and `set_package_url` overrides after + # https://github.com/package-url/packageurl-python/pull/35 + # https://github.com/package-url/packageurl-python/pull/67 + # gets merged + qualifiers = models.JSONField( + default=dict, + help_text=_( + "Extra qualifying data for a package such as the name of an OS, " + "architecture, distro, etc." + ), + blank=True, + null=False, + ) + + class Meta: + unique_together = ( + "type", + "namespace", + "name", + "version", + "qualifiers", + "subpath", + ) + @property # TODO: consider renaming to "affected_by" def vulnerable_to(self): @@ -154,21 +173,6 @@ def resolved_to(self): """ return self.vulnerabilities.filter(packagerelatedvulnerability__fix=True) - class Meta: - unique_together = ("name", "namespace", "type", "version", "qualifiers", "subpath") - - # Remove the `qualifers` and `set_package_url` overrides after - # https://github.com/package-url/packageurl-python/pull/35 gets merged - qualifiers = models.JSONField( - default=dict, - help_text=_( - "Extra qualifying data for a package such as the name of an OS, " - "architecture, distro, etc." - ), - blank=True, - null=False, - ) - def set_package_url(self, package_url): """ Set each field values to the values of the provided `package_url` string @@ -192,6 +196,7 @@ def __str__(self): class PackageRelatedVulnerability(models.Model): + # TODO: Fix related_name package = models.ForeignKey(Package, on_delete=models.CASCADE, related_name="package") vulnerability = models.ForeignKey(Vulnerability, on_delete=models.CASCADE) created_by = models.CharField( @@ -199,7 +204,7 @@ class PackageRelatedVulnerability(models.Model): blank=True, help_text="Fully qualified name of the improver prefixed with the" "module name responsible for creating this relation. Eg:" - "vulnerabilities.importers.nginx.NginxTimeTravel", + "vulnerabilities.importers.nginx.NginxBasicImprover", ) confidence = models.PositiveIntegerField( @@ -207,6 +212,7 @@ class PackageRelatedVulnerability(models.Model): validators=[MinValueValidator(0), MaxValueValidator(MAX_CONFIDENCE)], help_text="Confidence score for this relation", ) + fix = models.BooleanField( default=False, help_text="Does this relation fix the specified vulnerability ?" ) @@ -217,6 +223,7 @@ def __str__(self): class Meta: unique_together = ("package", "vulnerability") verbose_name_plural = "PackageRelatedVulnerabilities" + indexes = [models.Index(fields=["fix"])] def update_or_create(self): """ @@ -301,27 +308,63 @@ def __str__(self): class VulnerabilitySeverity(models.Model): - scoring_system_choices = ( - (system.identifier, system.name) for system in scoring_systems.values() - ) # nopep8 vulnerability = models.ForeignKey(Vulnerability, on_delete=models.CASCADE) - value = models.CharField(max_length=50, help_text="Example: 9.0, Important, High") + reference = models.ForeignKey(VulnerabilityReference, on_delete=models.CASCADE) + + scoring_system_choices = tuple( + (system.identifier, system.name) for system in SCORING_SYSTEMS.values() + ) + scoring_system = models.CharField( max_length=50, choices=scoring_system_choices, - help_text="identifier for the scoring system used. Available choices are: {} ".format( + help_text="Identifier for the scoring system used. Available choices are: {} ".format( ", ".join( - [ - f"{ss.identifier} is vulnerability_id for {ss.name} system" - for ss in scoring_systems.values() - ] + f"{sid} is vulnerability_id for {sname} system" + for sid, sname in scoring_system_choices ) ), ) - reference = models.ForeignKey(VulnerabilityReference, on_delete=models.CASCADE) + + value = models.CharField(max_length=50, help_text="Example: 9.0, Important, High") class Meta: - unique_together = ("vulnerability", "reference", "scoring_system") + unique_together = ( + "vulnerability", + "reference", + "scoring_system", + "value", + ) + + +class Alias(models.Model): + """ + An alias is a unique vulnerability identifier in some database, such as + the NVD, PYSEC, CVE or similar. These databases guarantee that these + identifiers are unique within their namespace. + An alias may also be used as a Reference. But in contrast with some + Reference may not be an identifier for a single vulnerability, for instance, + security advisories such as Debian security advisory reference various + vulnerabilities. + """ + + alias = models.CharField( + max_length=50, + unique=True, + help_text="An alias is a unique vulnerability identifier in some database, " + "such as CVE-2020-2233", + ) + + vulnerability = models.ForeignKey( + Vulnerability, + on_delete=models.CASCADE, + related_name="aliases", + null=True, + blank=True, + ) + + def __str__(self): + return self.alias class Advisory(models.Model): @@ -330,16 +373,16 @@ class Advisory(models.Model): into structured data """ - vulnerability_id = models.CharField(max_length=50, null=True, blank=True) + aliases = models.JSONField(blank=True, default=list, help_text="A list of alias strings") summary = models.TextField(blank=True, null=True) # we use a JSON field here to avoid creating a complete relational model for data that # is never queried directly; instead it is only retrieved and processed as a whole by # an improver affected_packages = models.JSONField( - blank=True, null=True, help_text="A list of serializabale AffectedPackage objects" + blank=True, default=list, help_text="A list of serializable AffectedPackage objects" ) references = models.JSONField( - blank=True, null=True, help_text="A list of serializabale Reference objects" + blank=True, default=list, help_text="A list of serializable Reference objects" ) date_published = models.DateTimeField( blank=True, null=True, help_text="UTC Date of publication of the advisory" @@ -357,9 +400,18 @@ class Advisory(models.Model): "vulnerabilities.importers.nginx.NginxDataSource", ) + class Meta: + unique_together = ( + "aliases", + "summary", + "affected_packages", + "references", + "date_published", + ) + def to_advisory_data(self) -> AdvisoryData: return AdvisoryData( - vulnerability_id=self.vulnerability_id, + aliases=self.aliases, summary=self.summary, affected_packages=[AffectedPackage.from_dict(pkg) for pkg in self.affected_packages], references=[Reference.from_dict(ref) for ref in self.references], diff --git a/vulnerabilities/severity_systems.py b/vulnerabilities/severity_systems.py index f8b7f9404..262b3316a 100644 --- a/vulnerabilities/severity_systems.py +++ b/vulnerabilities/severity_systems.py @@ -23,7 +23,7 @@ def as_score(self, value): raise NotImplementedError -scoring_systems = { +SCORING_SYSTEMS = { "cvssv2": ScoringSystem( identifier="cvssv2", name="CVSSv2 Base Score", From 299d6f164b4d67dacccb0c9212a5457373fd1804 Mon Sep 17 00:00:00 2001 From: Hritik Vijay Date: Sun, 23 Jan 2022 18:43:01 +0530 Subject: [PATCH 28/40] Reformat Alias structure and process_improver Signed-off-by: Hritik Vijay --- vulnerabilities/improve_runner.py | 100 ++++++++++++++---------------- vulnerabilities/models.py | 3 +- 2 files changed, 46 insertions(+), 57 deletions(-) diff --git a/vulnerabilities/improve_runner.py b/vulnerabilities/improve_runner.py index 235bfdc24..83dc702a8 100644 --- a/vulnerabilities/improve_runner.py +++ b/vulnerabilities/improve_runner.py @@ -58,6 +58,7 @@ def process_inferences(inferences: List[Inference], advisory: Advisory, improver inference.vulnerability_id, inference.aliases, inference.summary ) if not vuln: + logger.warn(f"Unable to get vulnerability for inference: {inference!r}") continue for ref in inference.references: @@ -120,23 +121,20 @@ def _package_url_to_package(purl: PackageURL) -> models.Package: return p -def get_or_create_vulnerability_and_aliases(vulnerability_id, aliases, summary): +def get_or_create_vulnerability_and_aliases(vulnerability_id, alias_names, summary): """ Get or create vulnerabilitiy and aliases such that all existing and new aliases point to the same vulnerability """ existing_vulns = set() - aliases = set(aliases) - existing_aliases = set() - new_aliases = set() - for alias in aliases: - alias, created = models.Alias.objects.get_or_create(alias=alias) - if created: - new_aliases.add(alias) - else: - existing_aliases.add(alias) - if alias.vulnerability: - existing_vulns.add(alias.vulnerability) + alias_names = set(alias_names) + new_alias_names = set() + for alias_name in alias_names: + try: + alias = models.Alias.objects.get(alias=alias_name) + existing_vulns.add(alias.vulnerability) + except models.Alias.DoesNotExist: + new_alias_names.add(alias_name) # If given set of aliases point to different vulnerabilities in the # database, request is malformed @@ -144,55 +142,47 @@ def get_or_create_vulnerability_and_aliases(vulnerability_id, aliases, summary): # the same at data level, figure out a way to merge them if len(existing_vulns) > 1: logger.warn( - f"Given aliases {existing_aliases} already exist and do not point " - "to a single vulnerability. Cannot improve. Skipped." + f"Given aliases {alias_names} already exist and do not point " + f"to a single vulnerability. Cannot improve. Skipped." ) - return None + return existing_alias_vuln = existing_vulns.pop() if existing_vulns else None - # If we have been supplied with a vulnerability_id and existing aliases do - # not have vulnerability_id then create one for all aliases, otherwise use - # the vulnerability_id from the existing aliases - if vulnerability_id: - if not existing_alias_vuln: - vuln = models.Vulnerability(summary=summary) - vuln.save() - for alias in existing_aliases | new_aliases: - alias.vulnerability = vuln - alias.save() - return vuln - - if existing_alias_vuln.vulnerability_id == vulnerability_id: - # TODO: What to do with the new summary ? - for alias in new_aliases: - alias.vulnerability = existing_alias_vuln - alias.save() - return existing_alias_vuln - + if ( + existing_alias_vuln + and vulnerability_id + and existing_alias_vuln.vulnerability_id != vulnerability_id + ): logger.warn( - f"Given aliases {existing_aliases!r} already exist and point to existing" - "vulnerability {existing_alias_vuln}. Unable to create Vulnerability " - "with vulnerability_id {vulnerability_id}. Skipped" + f"Given aliases {alias_names!r} already exist and point to existing" + f"vulnerability {existing_alias_vuln}. Unable to create Vulnerability " + f"with vulnerability_id {vulnerability_id}. Skipped" ) - return None - - # No vulnerability_id is present, infer one from aliases + return - # If all existing aliases point to one vulnerability then point new aliases - # to that vulnerbility and vulnerability is found - # TODO: What to do with the new summary ? if existing_alias_vuln: - for alias in new_aliases: - alias.vulnerability = existing_alias_vuln - alias.save() - return existing_alias_vuln - - # No vulnerability already exists for given aliases, create one and - # point all aliases to this vulnerability - vuln = models.Vulnerability(summary=summary) - vuln.save() - for alias in existing_aliases | new_aliases: - alias.vulnerability = vuln + vulnerability = existing_alias_vuln + elif vulnerability_id: + try: + vulnerability = models.Vulnerability.objects.get(vulnerability_id=vulnerability_id) + except models.Vulnerability.DoesNotExist: + logger.warn( + f"Given vulnerability_id: {vulnerability_id} does not exist in the database" + ) + return + else: + vulnerability = models.Vulnerability(summary=summary) + vulnerability.save() + + if summary and summary != vulnerability.summary: + logger.warn( + f"Inconsistent summary for {vulnerability!r}. " + f"Existing: {vulnerability.summary}, provided: {summary}" + ) + + for alias_name in new_alias_names: + alias = models.Alias(alias=alias_name, vulnerability=vulnerability) alias.save() - return vuln + + return vulnerability diff --git a/vulnerabilities/models.py b/vulnerabilities/models.py index f19dececa..63d386261 100644 --- a/vulnerabilities/models.py +++ b/vulnerabilities/models.py @@ -63,6 +63,7 @@ class Vulnerability(models.Model): summary = models.TextField( help_text="Summary of the vulnerability", + null=True, blank=True, ) @@ -359,8 +360,6 @@ class Alias(models.Model): Vulnerability, on_delete=models.CASCADE, related_name="aliases", - null=True, - blank=True, ) def __str__(self): From b9ba26a8e84eb79f7814198d14ee5d18a520b62f Mon Sep 17 00:00:00 2001 From: Hritik Vijay Date: Sun, 23 Jan 2022 18:42:11 +0530 Subject: [PATCH 29/40] Change __repr__ to qualified_name fn Signed-off-by: Hritik Vijay --- vulnerabilities/data_inference.py | 5 +++-- vulnerabilities/data_source.py | 5 +---- vulnerabilities/import_runner.py | 2 +- vulnerabilities/importers/nginx.py | 2 +- vulnerabilities/improve_runner.py | 4 ++-- vulnerabilities/improvers/__init__.py | 2 +- 6 files changed, 9 insertions(+), 11 deletions(-) diff --git a/vulnerabilities/data_inference.py b/vulnerabilities/data_inference.py index 6a83fda6f..3d3fdc10d 100644 --- a/vulnerabilities/data_inference.py +++ b/vulnerabilities/data_inference.py @@ -89,9 +89,10 @@ def get_inferences(self, advisory_data: AdvisoryData) -> List[Inference]: """ raise NotImplementedError - def __repr__(self): + @classmethod + def qualified_name(cls): """ Fully qualified name prefixed with the module name of the improver used in logging. """ - return f"{self.__module__}.{self.__class__.__qualname__}" + return f"{cls.__module__}.{cls.__qualname__}" diff --git a/vulnerabilities/data_source.py b/vulnerabilities/data_source.py index 625a80df5..9794a8663 100644 --- a/vulnerabilities/data_source.py +++ b/vulnerabilities/data_source.py @@ -260,11 +260,8 @@ def __enter__(self): def __exit__(self, exc_type, exc_val, exc_tb): pass - def __repr__(self): - return self.to_repr() - @classmethod - def to_repr(cls): + def qualified_name(cls): """ Fully qualified name prefixed with the module name of the data source used in logging. diff --git a/vulnerabilities/import_runner.py b/vulnerabilities/import_runner.py index 1276d6f1e..e31d0ed6d 100644 --- a/vulnerabilities/import_runner.py +++ b/vulnerabilities/import_runner.py @@ -71,7 +71,7 @@ def run(self, cutoff_date: datetime.datetime = None) -> None: data_source = self.importer.make_data_source(cutoff_date=cutoff_date) with data_source: advisory_data = data_source.advisory_data() - importer_name = repr(data_source) + importer_name = data_source.qualified_name() process_advisories(advisory_datas=advisory_data, importer_name=importer_name) self.importer.last_run = datetime.datetime.now(tz=datetime.timezone.utc) self.importer.data_source_cfg = dataclasses.asdict(data_source.config) diff --git a/vulnerabilities/importers/nginx.py b/vulnerabilities/importers/nginx.py index af1ff24a6..1201df104 100644 --- a/vulnerabilities/importers/nginx.py +++ b/vulnerabilities/importers/nginx.py @@ -194,7 +194,7 @@ def __init__(self): @property def interesting_advisories(self) -> QuerySet: - return Advisory.objects.filter(created_by=NginxDataSource.to_repr()) + return Advisory.objects.filter(created_by=NginxDataSource.qualified_name()) def get_inferences(self, advisory_data: AdvisoryData) -> Iterable[Inference]: """ diff --git a/vulnerabilities/improve_runner.py b/vulnerabilities/improve_runner.py index 83dc702a8..c43d4a646 100644 --- a/vulnerabilities/improve_runner.py +++ b/vulnerabilities/improve_runner.py @@ -26,11 +26,11 @@ def __init__(self, improver): def run(self) -> None: improver = self.improver() - logger.info(f"Running improver: {improver!r}") + logger.info(f"Running improver: {improver.qualified_name()}") for advisory in improver.interesting_advisories: inferences = improver.get_inferences(advisory_data=advisory.to_advisory_data()) process_inferences( - inferences=inferences, advisory=advisory, improver_name=repr(improver) + inferences=inferences, advisory=advisory, improver_name=improver.qualified_name() ) logger.info("Finished improving using %s.", self.improver.__name__) diff --git a/vulnerabilities/improvers/__init__.py b/vulnerabilities/improvers/__init__.py index 951e6da38..b214f88e6 100644 --- a/vulnerabilities/improvers/__init__.py +++ b/vulnerabilities/improvers/__init__.py @@ -3,4 +3,4 @@ IMPROVER_REGISTRY = [default.DefaultImprover, importers.nginx.NginxBasicImprover] -improver_mapping = {f"{x.__module__}.{x.__name__}": x for x in IMPROVER_REGISTRY} +improver_mapping = {x.qualified_name(): x for x in IMPROVER_REGISTRY} From e3a1ea59337c84dfacd0fb068d64d8e90fed6b0d Mon Sep 17 00:00:00 2001 From: Hritik Vijay Date: Sun, 23 Jan 2022 18:44:18 +0530 Subject: [PATCH 30/40] Update .gitignore for junk files Signed-off-by: Hritik Vijay --- .gitignore | 9 +++++++++ 1 file changed, 9 insertions(+) diff --git a/.gitignore b/.gitignore index 545630d64..73b68d215 100644 --- a/.gitignore +++ b/.gitignore @@ -125,3 +125,12 @@ Pipfile # VSCode .vscode + +# Various junk and temp files +.DS_Store +*~ +.*.sw[po] +.build +.ve +*.bak +/.cache/ From c3e71f4984b1be1077f9d4f613fac75d0d38ac6c Mon Sep 17 00:00:00 2001 From: Hritik Vijay Date: Sun, 23 Jan 2022 19:59:35 +0530 Subject: [PATCH 31/40] Fix few tests to recent structure, dump ugettext_lazy Signed-off-by: Hritik Vijay --- vulnerabilities/importers/nginx.py | 4 ++-- vulnerabilities/improvers/default.py | 4 ++-- vulnerabilities/models.py | 9 +++------ vulnerabilities/tests/test_helpers.py | 2 +- 4 files changed, 8 insertions(+), 11 deletions(-) diff --git a/vulnerabilities/importers/nginx.py b/vulnerabilities/importers/nginx.py index 1201df104..8731785e8 100644 --- a/vulnerabilities/importers/nginx.py +++ b/vulnerabilities/importers/nginx.py @@ -132,9 +132,9 @@ def parse_advisory_data_from_paragraph(vuln_info): For example: >>> paragraph = '

1-byte memory overwrite in resolver
Severity: medium
Advisory
CVE-2021-23017
Not vulnerable: 1.21.0+, 1.20.1+
Vulnerable: 0.6.18-1.20.0
The patch  pgp

' - >>> vuln_info = BeautifulSoup(paragraph).p + >>> vuln_info = BeautifulSoup(paragraph, features="lxml").p >>> parse_advisory_data_from_paragraph(vuln_info) - ('CVE-2021-23017', '1-byte memory overwrite in resolver', 'Severity: medium', 'Not vulnerable: 1.21.0+, 1.20.1+', 'Vulnerable: 0.6.18-1.20.0', [Reference(reference_id='', url='http://mailman.nginx.org/pipermail/nginx-announce/2021/000300.html', severities=[]), Reference(reference_id='CVE-2021-23017', url='http://cve.mitre.org/cgi-bin/cvename.cgi?name=CVE-2021-23017', severities=[VulnerabilitySeverity(system=ScoringSystem(identifier='generic_textual', name='Generic textual severity rating', url='', notes='Severity for unknown scoring systems. Contains generic textual values like High, Low etc'), value='Severity: medium')]), Reference(reference_id='', url='https://nginx.org/download/patch.2021.resolver.txt', severities=[]), Reference(reference_id='', url='https://nginx.org/download/patch.2021.resolver.txt.asc', severities=[])]) + {'aliases': ['CVE-2021-23017'], 'summary': '1-byte memory overwrite in resolver', 'advisory_severity': 'Severity: medium', 'not_vulnerable': 'Not vulnerable: 1.21.0+, 1.20.1+', 'vulnerable': 'Vulnerable: 0.6.18-1.20.0', 'references': [Reference(reference_id='', url='http://mailman.nginx.org/pipermail/nginx-announce/2021/000300.html', severities=[VulnerabilitySeverity(system=ScoringSystem(identifier='generic_textual', name='Generic textual severity rating', url='', notes='Severity for unknown scoring systems. Contains generic textual values like High, Low etc'), value='Severity: medium')]), Reference(reference_id='', url='https://nginx.org/download/patch.2021.resolver.txt', severities=[]), Reference(reference_id='', url='https://nginx.org/download/patch.2021.resolver.txt.asc', severities=[])]} """ aliases = [] summary = advisory_severity = not_vulnerable = vulnerable = None diff --git a/vulnerabilities/improvers/default.py b/vulnerabilities/improvers/default.py index db23c7df6..f44a7d2f7 100644 --- a/vulnerabilities/improvers/default.py +++ b/vulnerabilities/improvers/default.py @@ -46,14 +46,14 @@ def get_exact_purls(affected_package: AffectedPackage) -> (List[PackageURL], Pac Only exact version constraints (ie with an equality) are considered For eg: >>> purl = {"type": "turtle", "name": "green"} - >>> vers = "vers:npm/>=2.0.0,<3.0.0 | <1.0.0" + >>> vers = "vers:npm/<1.0.0 | >=2.0.0 | <3.0.0" >>> affected_package = AffectedPackage.from_dict({ ... "package": purl, ... "affected_version_range": vers, ... "fixed_version": "5.0.0" ... }) >>> get_exact_purls(affected_package) - ({PackageURL(type='turtle', namespace=None, name='green', version='2.0.0', qualifiers={}, subpath=None)}, PackageURL(type='turtle', namespace=None, name='green', version='5.0.0', qualifiers={}, subpath=None)) + ([PackageURL(type='turtle', namespace=None, name='green', version='2.0.0', qualifiers={}, subpath=None)], PackageURL(type='turtle', namespace=None, name='green', version='5.0.0', qualifiers={}, subpath=None)) """ affected_purls = set() all_constraints = affected_package.affected_version_range.constraints diff --git a/vulnerabilities/models.py b/vulnerabilities/models.py index 63d386261..c00bdeaf5 100644 --- a/vulnerabilities/models.py +++ b/vulnerabilities/models.py @@ -31,7 +31,6 @@ from django.db import models from django.core.exceptions import ValidationError -from django.utils.translation import ugettext_lazy as _ from django.core.validators import MinValueValidator from django.core.validators import MaxValueValidator from packageurl.contrib.django.models import PackageURLMixin @@ -140,10 +139,8 @@ class Package(PackageURLMixin): # gets merged qualifiers = models.JSONField( default=dict, - help_text=_( - "Extra qualifying data for a package such as the name of an OS, " - "architecture, distro, etc." - ), + help_text="Extra qualifying data for a package such as the name of an OS, " + "architecture, distro, etc.", blank=True, null=False, ) @@ -187,7 +184,7 @@ def set_package_url(self, package_url): model_field = self._meta.get_field(field_name) if value and len(value) > model_field.max_length: - raise ValidationError(_('Value too long for field "{}".'.format(field_name))) + raise ValidationError(f'Value too long for field "{field_name}".') setattr(self, field_name, value or None) diff --git a/vulnerabilities/tests/test_helpers.py b/vulnerabilities/tests/test_helpers.py index 994d6659e..e17d6ee2b 100644 --- a/vulnerabilities/tests/test_helpers.py +++ b/vulnerabilities/tests/test_helpers.py @@ -42,7 +42,7 @@ class TestHelpers(TestCase): @classmethod def setUpClass(cls): data_source_cfg = {"etags": {}} - cls.data_source = DummyDataSource(batch_size=1, config=data_source_cfg) + cls.data_source = DummyDataSource(config=data_source_cfg) def test_create_etag(self): assert self.data_source.config.etags == {} From e822c410b055414620eccfca51c10f1b63def644 Mon Sep 17 00:00:00 2001 From: Hritik Vijay Date: Sun, 23 Jan 2022 20:02:13 +0530 Subject: [PATCH 32/40] Ignore outdated tests Signed-off-by: Hritik Vijay --- pytest.ini | 31 +++++++++++++++++++++- vulnerabilities/tests/conftest.py | 43 +++++++++++++++++++++++++++++++ 2 files changed, 73 insertions(+), 1 deletion(-) diff --git a/pytest.ini b/pytest.ini index 2de27327a..7a196de40 100644 --- a/pytest.ini +++ b/pytest.ini @@ -2,4 +2,33 @@ DJANGO_SETTINGS_MODULE = vulnerablecode.settings markers = webtest -addopts = --doctest-modules +addopts = + --doctest-modules +# Ignore the following doctests until these files are migrated to +# import-improve structure + --ignore=vulnerabilities/importers/alpine_linux.py + --ignore=vulnerabilities/importers/apache_httpd.py + --ignore=vulnerabilities/importers/apache_kafka.py + --ignore=vulnerabilities/importers/apache_tomcat.py + --ignore=vulnerabilities/importers/archlinux.py + --ignore=vulnerabilities/importers/debian.py + --ignore=vulnerabilities/importers/elixir_security.py + --ignore=vulnerabilities/importers/gentoo.py + --ignore=vulnerabilities/importers/github.py + --ignore=vulnerabilities/importers/istio.py + --ignore=vulnerabilities/importers/kaybee.py + --ignore=vulnerabilities/importers/npm.py + --ignore=vulnerabilities/importers/nvd.py + --ignore=vulnerabilities/importers/openssl.py + --ignore=vulnerabilities/importers/postgresql.py + --ignore=vulnerabilities/importers/project_kb_msr2019.py + --ignore=vulnerabilities/importers/redhat.py + --ignore=vulnerabilities/importers/retiredotnet.py + --ignore=vulnerabilities/importers/ruby.py + --ignore=vulnerabilities/importers/rust.py + --ignore=vulnerabilities/importers/safety_db.py + --ignore=vulnerabilities/importers/suse_backports.py + --ignore=vulnerabilities/importers/suse_scores.py + --ignore=vulnerabilities/importers/ubuntu_usn.py + --ignore=vulnerabilities/management/commands/create_cpe_to_purl_map.py + --ignore=vulnerabilities/lib_oval.py diff --git a/vulnerabilities/tests/conftest.py b/vulnerabilities/tests/conftest.py index 6099c16bf..dc5225168 100644 --- a/vulnerabilities/tests/conftest.py +++ b/vulnerabilities/tests/conftest.py @@ -34,3 +34,46 @@ def no_mkdir(monkeypatch): @pytest.fixture def no_rmtree(monkeypatch): monkeypatch.delattr("shutil.rmtree") + + +# TODO: Ignore these tests for now but we need to migrate each one of them to the new struture. +# Step 1: Fix importer_yielder: https://github.com/nexB/vulnerablecode/issues/501 +# Step 2: Run test for importer only if it is activated (pytestmark = pytest.mark.skipif(...)) +# Step 3: Migrate all the tests +collect_ignore = [ + "test_models.py", + "test_msr2019.py", + "test_alpine.py", + "test_nginx.py", + "test_apache_httpd.py", + "test_npm.py", + "test_apache_kafka.py", + "test_nvd.py", + "test_apache_tomcat.py", + "test_openssl.py", + "test_api.py", + "test_package_managers.py", + "test_archlinux.py", + "test_postgresql.py", + "test_redhat_importer.py", + "test_data_source.py", + "test_retiredotnet.py", + "test_debian.py", + "test_ruby.py", + "test_debian_oval.py", + "test_rust.py", + "test_elixir_security.py", + "test_safety_db.py", + "test_gentoo.py", + "test_suse.py", + "test_github.py", + "test_suse_backports.py", + "test_suse_scores.py", + "test_import_cmd.py", + "test_ubuntu.py", + "test_import_runner.py", + "test_ubuntu_usn.py", + "test_importer_yielder.py", + "test_upstream.py", + "test_istio.py", +] From e9f940d54db41ca8830a3506cd256ab0e5ec5a08 Mon Sep 17 00:00:00 2001 From: Hritik Vijay Date: Sun, 23 Jan 2022 20:11:14 +0530 Subject: [PATCH 33/40] Disable outdated importers Signed-off-by: Hritik Vijay --- vulnerabilities/importer_yielder.py | 397 +++++++++--------- vulnerabilities/management/commands/import.py | 5 +- 2 files changed, 203 insertions(+), 199 deletions(-) diff --git a/vulnerabilities/importer_yielder.py b/vulnerabilities/importer_yielder.py index 567755c3c..8fe372646 100644 --- a/vulnerabilities/importer_yielder.py +++ b/vulnerabilities/importer_yielder.py @@ -22,218 +22,221 @@ from vulnerabilities.models import Importer +# TODO: This entire registry needs to go away in favor of a registry similar to +# improvers. +# See ./improvers/__init__.py IMPORTER_REGISTRY = [ { - "name": "rust", - "license": "cc0-1.0", - "last_run": None, - "data_source": "RustDataSource", - "data_source_cfg": { - "branch": None, - "repository_url": "https://github.com/RustSec/advisory-db", - }, - }, - { - "name": "alpine", + "name": "nginx", "license": "", "last_run": None, - "data_source": "AlpineDataSource", - "data_source_cfg": {}, - }, - { - "name": "archlinux", - "license": "mit", - "last_run": None, - "data_source": "ArchlinuxDataSource", - "data_source_cfg": {"archlinux_tracker_url": "https://security.archlinux.org/json"}, - }, - { - "name": "debian", - "license": "mit", - "last_run": None, - "data_source": "DebianDataSource", - "data_source_cfg": { - "debian_tracker_url": "https://security-tracker.debian.org/tracker/data/json" - }, + "data_source": "NginxDataSource", + "data_source_cfg": {"etags": {}}, }, # { - # "name": "safetydb", - # "license": "cc-by-nc-4.0", + # "name": "rust", + # "license": "cc0-1.0", + # "last_run": None, + # "data_source": "RustDataSource", + # "data_source_cfg": { + # "branch": None, + # "repository_url": "https://github.com/RustSec/advisory-db", + # }, + # }, + # { + # "name": "alpine", + # "license": "", + # "last_run": None, + # "data_source": "AlpineDataSource", + # "data_source_cfg": {}, + # }, + # { + # "name": "archlinux", + # "license": "mit", + # "last_run": None, + # "data_source": "ArchlinuxDataSource", + # "data_source_cfg": {"archlinux_tracker_url": "https://security.archlinux.org/json"}, + # }, + # { + # "name": "debian", + # "license": "mit", # "last_run": None, - # "data_source": "SafetyDbDataSource", + # "data_source": "DebianDataSource", + # "data_source_cfg": { + # "debian_tracker_url": "https://security-tracker.debian.org/tracker/data/json" + # }, + # }, + # # { + # # "name": "safetydb", + # # "license": "cc-by-nc-4.0", + # # "last_run": None, + # # "data_source": "SafetyDbDataSource", + # # "data_source_cfg": { + # # "url": "https://raw.githubusercontent.com/pyupio/safety-db/master/data/insecure_full.json", + # # "etags": {}, + # # }, + # # }, + # { + # "name": "npm", + # "license": "mit", + # "last_run": None, + # "data_source": "NpmDataSource", + # "data_source_cfg": {"repository_url": "https://github.com/nodejs/security-wg.git"}, + # }, + # { + # "name": "ruby", + # "license": "", + # "last_run": None, + # "data_source": "RubyDataSource", + # "data_source_cfg": {"repository_url": "https://github.com/rubysec/ruby-advisory-db.git"}, + # }, + # { + # "name": "ubuntu", + # "license": "gpl-2.0", + # "last_run": None, + # "data_source": "UbuntuDataSource", # "data_source_cfg": { - # "url": "https://raw.githubusercontent.com/pyupio/safety-db/master/data/insecure_full.json", # "etags": {}, + # "releases": ["bionic", "trusty", "focal", "eoan", "xenial"], # }, # }, - { - "name": "npm", - "license": "mit", - "last_run": None, - "data_source": "NpmDataSource", - "data_source_cfg": {"repository_url": "https://github.com/nodejs/security-wg.git"}, - }, - { - "name": "ruby", - "license": "", - "last_run": None, - "data_source": "RubyDataSource", - "data_source_cfg": {"repository_url": "https://github.com/rubysec/ruby-advisory-db.git"}, - }, - { - "name": "ubuntu", - "license": "gpl-2.0", - "last_run": None, - "data_source": "UbuntuDataSource", - "data_source_cfg": { - "etags": {}, - "releases": ["bionic", "trusty", "focal", "eoan", "xenial"], - }, - }, - { - "name": "retiredotnet", - "license": "mit", - "last_run": None, - "data_source": "RetireDotnetDataSource", - "data_source_cfg": {"repository_url": "https://github.com/RetireNet/Packages.git"}, - }, # { - # "name": "suse_backports", + # "name": "retiredotnet", + # "license": "mit", + # "last_run": None, + # "data_source": "RetireDotnetDataSource", + # "data_source_cfg": {"repository_url": "https://github.com/RetireNet/Packages.git"}, + # }, + # # { + # # "name": "suse_backports", + # # "license": "", + # # "last_run": None, + # # "data_source": "SUSEBackportsDataSource", + # # "data_source_cfg": {"url": "http://ftp.suse.com/pub/projects/security/yaml/", "etags": {}}, + # # }, + # { + # "name": "suse_scores", # "license": "", # "last_run": None, - # "data_source": "SUSEBackportsDataSource", - # "data_source_cfg": {"url": "http://ftp.suse.com/pub/projects/security/yaml/", "etags": {}}, + # "data_source": "SUSESeverityScoreDataSource", + # "data_source_cfg": {}, + # }, + # { + # "name": "debian_oval", + # "license": "", + # "last_run": None, + # "data_source": "DebianOvalDataSource", + # "data_source_cfg": {"etags": {}, "releases": ["wheezy", "stretch", "jessie", "buster"]}, + # }, + # { + # "name": "redhat", + # "license": "cc-by-4.0", + # "last_run": None, + # "data_source": "RedhatDataSource", + # "data_source_cfg": {}, + # }, + # { + # "name": "nvd", + # "license": "", + # "last_run": None, + # "data_source": "NVDDataSource", + # "data_source_cfg": {"etags": {}}, + # }, + # { + # "name": "gentoo", + # "license": "", + # "last_run": None, + # "data_source": "GentooDataSource", + # "data_source_cfg": {"repository_url": "https://anongit.gentoo.org/git/data/glsa.git"}, + # }, + # { + # "name": "openssl", + # "license": "", + # "last_run": None, + # "data_source": "OpenSSLDataSource", + # "data_source_cfg": {"etags": {}}, + # }, + # { + # "name": "ubuntu_usn", + # "license": "gpl-2.0", + # "last_run": None, + # "data_source": "UbuntuUSNDataSource", + # "data_source_cfg": { + # "etags": {}, + # "db_url": "https://usn.ubuntu.com/usn-db/database-all.json.bz2", + # }, + # }, + # { + # "name": "github", + # "license": "", + # "last_run": None, + # "data_source": "GitHubAPIDataSource", + # "data_source_cfg": { + # "endpoint": "https://api.github.com/graphql", + # "ecosystems": ["MAVEN", "NUGET", "COMPOSER", "PIP", "RUBYGEMS"], + # }, + # }, + # { + # "name": "msr2019", + # "license": "apache-2.0", + # "last_run": None, + # "data_source": "ProjectKBMSRDataSource", + # "data_source_cfg": {"etags": {}}, + # }, + # { + # "name": "apache_httpd", + # "license": "", + # "last_run": None, + # "data_source": "ApacheHTTPDDataSource", + # "data_source_cfg": {"etags": {}}, + # }, + # { + # "name": "kaybee", + # "license": "apache-2.0", + # "last_run": None, + # "data_source": "KaybeeDataSource", + # "data_source_cfg": { + # "repository_url": "https://github.com/SAP/project-kb.git", + # "branch": "vulnerability-data", + # }, + # }, + # { + # "name": "postgresql", + # "license": "", + # "last_run": None, + # "data_source": "PostgreSQLDataSource", + # "data_source_cfg": {}, + # }, + # { + # "name": "elixir_security", + # "license": "cc0-1.0", + # "last_run": None, + # "data_source": "ElixirSecurityDataSource", + # "data_source_cfg": { + # "repository_url": "https://github.com/dependabot/elixir-security-advisories" + # }, + # }, + # { + # "name": "apache_tomcat", + # "license": "", + # "last_run": None, + # "data_source": "ApacheTomcatDataSource", + # "data_source_cfg": {"etags": {}}, + # }, + # { + # "name": "apache_kafka", + # "license": "", + # "last_run": None, + # "data_source": "ApacheKafkaDataSource", + # "data_source_cfg": {}, + # }, + # { + # "name": "istio", + # "license": "apache-2.0", + # "last_run": None, + # "data_source": "IstioDataSource", + # "data_source_cfg": {"repository_url": "https://github.com/istio/istio.io"}, # }, - { - "name": "suse_scores", - "license": "", - "last_run": None, - "data_source": "SUSESeverityScoreDataSource", - "data_source_cfg": {}, - }, - { - "name": "debian_oval", - "license": "", - "last_run": None, - "data_source": "DebianOvalDataSource", - "data_source_cfg": {"etags": {}, "releases": ["wheezy", "stretch", "jessie", "buster"]}, - }, - { - "name": "redhat", - "license": "cc-by-4.0", - "last_run": None, - "data_source": "RedhatDataSource", - "data_source_cfg": {}, - }, - { - "name": "nvd", - "license": "", - "last_run": None, - "data_source": "NVDDataSource", - "data_source_cfg": {"etags": {}}, - }, - { - "name": "gentoo", - "license": "", - "last_run": None, - "data_source": "GentooDataSource", - "data_source_cfg": {"repository_url": "https://anongit.gentoo.org/git/data/glsa.git"}, - }, - { - "name": "openssl", - "license": "", - "last_run": None, - "data_source": "OpenSSLDataSource", - "data_source_cfg": {"etags": {}}, - }, - { - "name": "ubuntu_usn", - "license": "gpl-2.0", - "last_run": None, - "data_source": "UbuntuUSNDataSource", - "data_source_cfg": { - "etags": {}, - "db_url": "https://usn.ubuntu.com/usn-db/database-all.json.bz2", - }, - }, - { - "name": "github", - "license": "", - "last_run": None, - "data_source": "GitHubAPIDataSource", - "data_source_cfg": { - "endpoint": "https://api.github.com/graphql", - "ecosystems": ["MAVEN", "NUGET", "COMPOSER", "PIP", "RUBYGEMS"], - }, - }, - { - "name": "msr2019", - "license": "apache-2.0", - "last_run": None, - "data_source": "ProjectKBMSRDataSource", - "data_source_cfg": {"etags": {}}, - }, - { - "name": "apache_httpd", - "license": "", - "last_run": None, - "data_source": "ApacheHTTPDDataSource", - "data_source_cfg": {"etags": {}}, - }, - { - "name": "kaybee", - "license": "apache-2.0", - "last_run": None, - "data_source": "KaybeeDataSource", - "data_source_cfg": { - "repository_url": "https://github.com/SAP/project-kb.git", - "branch": "vulnerability-data", - }, - }, - { - "name": "nginx", - "license": "", - "last_run": None, - "data_source": "NginxDataSource", - "data_source_cfg": {"etags": {}}, - }, - { - "name": "postgresql", - "license": "", - "last_run": None, - "data_source": "PostgreSQLDataSource", - "data_source_cfg": {}, - }, - { - "name": "elixir_security", - "license": "cc0-1.0", - "last_run": None, - "data_source": "ElixirSecurityDataSource", - "data_source_cfg": { - "repository_url": "https://github.com/dependabot/elixir-security-advisories" - }, - }, - { - "name": "apache_tomcat", - "license": "", - "last_run": None, - "data_source": "ApacheTomcatDataSource", - "data_source_cfg": {"etags": {}}, - }, - { - "name": "apache_kafka", - "license": "", - "last_run": None, - "data_source": "ApacheKafkaDataSource", - "data_source_cfg": {}, - }, - { - "name": "istio", - "license": "apache-2.0", - "last_run": None, - "data_source": "IstioDataSource", - "data_source_cfg": {"repository_url": "https://github.com/istio/istio.io"}, - }, ] diff --git a/vulnerabilities/management/commands/import.py b/vulnerabilities/management/commands/import.py index da333dcdd..d45d4983e 100644 --- a/vulnerabilities/management/commands/import.py +++ b/vulnerabilities/management/commands/import.py @@ -28,6 +28,7 @@ from django.core.management.base import CommandError from vulnerabilities.models import Importer +from vulnerabilities.importer_yielder import IMPORTER_REGISTRY from vulnerabilities.import_runner import ImportRunner from vulnerabilities.importer_yielder import load_importers @@ -73,9 +74,9 @@ def handle(self, *args, **options): self.import_data(sources, options["cutoff_date"]) def list_sources(self): - importers = Importer.objects.all() + importers = IMPORTER_REGISTRY self.stdout.write("Vulnerability data can be imported from the following sources:") - self.stdout.write(", ".join([i.name for i in importers])) + self.stdout.write(", ".join([i["name"] for i in importers])) def import_data(self, names, cutoff_date): importers = [] From 318663e90017b32caa5d95d2d77016fa33a883ca Mon Sep 17 00:00:00 2001 From: Hritik Vijay Date: Sun, 23 Jan 2022 20:21:37 +0530 Subject: [PATCH 34/40] Reset migrations Signed-off-by: Hritik Vijay --- vulnerabilities/migrations/0001_initial.py | 173 ++++++++++++++------- 1 file changed, 113 insertions(+), 60 deletions(-) diff --git a/vulnerabilities/migrations/0001_initial.py b/vulnerabilities/migrations/0001_initial.py index 8aa6b9bf1..0a6f624b8 100644 --- a/vulnerabilities/migrations/0001_initial.py +++ b/vulnerabilities/migrations/0001_initial.py @@ -1,8 +1,9 @@ -# Generated by Django 3.2.9 on 2021-12-08 09:02 +# Generated by Django 3.2.9 on 2022-01-23 14:44 import django.core.validators from django.db import migrations, models import django.db.models.deletion +import uuid class Migration(migrations.Migration): @@ -12,46 +13,6 @@ class Migration(migrations.Migration): dependencies = [] operations = [ - migrations.CreateModel( - name="Advisory", - fields=[ - ( - "id", - models.AutoField( - auto_created=True, primary_key=True, serialize=False, verbose_name="ID" - ), - ), - ("vulnerability_id", models.CharField(blank=True, max_length=50, null=True)), - ("summary", models.TextField(blank=True, null=True)), - ("affected_packages", models.TextField()), - ("references", models.TextField()), - ( - "date_published", - models.DateField( - blank=True, help_text="UTC Date of publication of the advisory", null=True - ), - ), - ( - "date_collected", - models.DateField(help_text="UTC Date on which the advisory was collected"), - ), - ( - "date_improved", - models.DateTimeField( - blank=True, - help_text="Latest date on which the advisory was improved by an improver", - null=True, - ), - ), - ( - "created_by", - models.CharField( - help_text="Fully qualified name of the importer prefixed with the module name importing the advisory. Eg: vulnerabilities.importers.nginx.NginxDataSource", - max_length=100, - ), - ), - ], - ), migrations.CreateModel( name="Importer", fields=[ @@ -155,23 +116,19 @@ class Migration(migrations.Migration): ), ( "vulnerability_id", - models.CharField( - help_text="Unique identifier for a vulnerability: this is either a published CVE id (as in CVE-2020-7965) if it exists. Otherwise this is a VulnerableCode-assigned VULCOID (as in VULCOID-20210222-1315-16461541). When a vulnerability CVE is assigned later we replace this with the CVE and keep the 'old' VULCOID in the 'old_vulnerability_id' field to support redirection to the CVE id.", - max_length=50, + models.UUIDField( + default=uuid.uuid4, + editable=False, + help_text="Unique identifier for a vulnerability in this database, assigned automatically. In the external representation it is prefixed with VULCOID-", unique=True, ), ), ( - "old_vulnerability_id", - models.CharField( - blank=True, - help_text="empty if no CVE else VC id", - max_length=50, - null=True, - unique=True, + "summary", + models.TextField( + blank=True, help_text="Summary of the vulnerability", null=True ), ), - ("summary", models.TextField(blank=True, help_text="Summary of the vulnerability")), ], options={ "verbose_name_plural": "Vulnerabilities", @@ -226,7 +183,7 @@ class Migration(migrations.Migration): "created_by", models.CharField( blank=True, - help_text="Fully qualified name of the improver prefixed with the module name responsible for creating this relation. Eg: vulnerabilities.importers.nginx.NginxTimeTravel", + help_text="Fully qualified name of the improver prefixed with themodule name responsible for creating this relation. Eg:vulnerabilities.importers.nginx.NginxBasicImprover", max_length=100, ), ), @@ -266,7 +223,6 @@ class Migration(migrations.Migration): ], options={ "verbose_name_plural": "PackageRelatedVulnerabilities", - "unique_together": {("package", "vulnerability")}, }, ), migrations.AddField( @@ -279,7 +235,7 @@ class Migration(migrations.Migration): ), ), migrations.CreateModel( - name="VulnerabilitySeverity", + name="Alias", fields=[ ( "id", @@ -288,8 +244,93 @@ class Migration(migrations.Migration): ), ), ( - "value", - models.CharField(help_text="Example: 9.0, Important, High", max_length=50), + "alias", + models.CharField( + help_text="An alias is a unique vulnerability identifier in some database, such as CVE-2020-2233", + max_length=50, + unique=True, + ), + ), + ( + "vulnerability", + models.ForeignKey( + on_delete=django.db.models.deletion.CASCADE, + related_name="aliases", + to="vulnerabilities.vulnerability", + ), + ), + ], + ), + migrations.CreateModel( + name="Advisory", + fields=[ + ( + "id", + models.AutoField( + auto_created=True, primary_key=True, serialize=False, verbose_name="ID" + ), + ), + ( + "aliases", + models.JSONField(blank=True, default=list, help_text="A list of alias strings"), + ), + ("summary", models.TextField(blank=True, null=True)), + ( + "affected_packages", + models.JSONField( + blank=True, + default=list, + help_text="A list of serializable AffectedPackage objects", + ), + ), + ( + "references", + models.JSONField( + blank=True, + default=list, + help_text="A list of serializable Reference objects", + ), + ), + ( + "date_published", + models.DateTimeField( + blank=True, help_text="UTC Date of publication of the advisory", null=True + ), + ), + ( + "date_collected", + models.DateTimeField(help_text="UTC Date on which the advisory was collected"), + ), + ( + "date_improved", + models.DateTimeField( + blank=True, + help_text="Latest date on which the advisory was improved by an improver", + null=True, + ), + ), + ( + "created_by", + models.CharField( + help_text="Fully qualified name of the importer prefixed with themodule name importing the advisory. Eg:vulnerabilities.importers.nginx.NginxDataSource", + max_length=100, + ), + ), + ], + options={ + "unique_together": { + ("aliases", "summary", "affected_packages", "references", "date_published") + }, + }, + ), + migrations.CreateModel( + name="VulnerabilitySeverity", + fields=[ + ( + "id", + models.AutoField( + auto_created=True, primary_key=True, serialize=False, verbose_name="ID" + ), ), ( "scoring_system", @@ -308,10 +349,14 @@ class Migration(migrations.Migration): ("generic_textual", "Generic textual severity rating"), ("apache_httpd", "Apache Httpd Severity"), ], - help_text="identifier for the scoring system used. Available choices are: cvssv2 is vulnerability_id for CVSSv2 Base Score system, cvssv2_vector is vulnerability_id for CVSSv2 Vector system, cvssv3 is vulnerability_id for CVSSv3 Base Score system, cvssv3_vector is vulnerability_id for CVSSv3 Vector system, cvssv3.1 is vulnerability_id for CVSSv3.1 Base Score system, cvssv3.1_vector is vulnerability_id for CVSSv3.1 Vector system, rhbs is vulnerability_id for RedHat Bugzilla severity system, rhas is vulnerability_id for RedHat Aggregate severity system, avgs is vulnerability_id for Archlinux Vulnerability Group Severity system, cvssv3.1_qr is vulnerability_id for CVSSv3.1 Qualitative Severity Rating system, generic_textual is vulnerability_id for Generic textual severity rating system, apache_httpd is vulnerability_id for Apache Httpd Severity system ", + help_text="Identifier for the scoring system used. Available choices are: cvssv2 is vulnerability_id for CVSSv2 Base Score system, cvssv2_vector is vulnerability_id for CVSSv2 Vector system, cvssv3 is vulnerability_id for CVSSv3 Base Score system, cvssv3_vector is vulnerability_id for CVSSv3 Vector system, cvssv3.1 is vulnerability_id for CVSSv3.1 Base Score system, cvssv3.1_vector is vulnerability_id for CVSSv3.1 Vector system, rhbs is vulnerability_id for RedHat Bugzilla severity system, rhas is vulnerability_id for RedHat Aggregate severity system, avgs is vulnerability_id for Archlinux Vulnerability Group Severity system, cvssv3.1_qr is vulnerability_id for CVSSv3.1 Qualitative Severity Rating system, generic_textual is vulnerability_id for Generic textual severity rating system, apache_httpd is vulnerability_id for Apache Httpd Severity system ", max_length=50, ), ), + ( + "value", + models.CharField(help_text="Example: 9.0, Important, High", max_length=50), + ), ( "reference", models.ForeignKey( @@ -328,11 +373,19 @@ class Migration(migrations.Migration): ), ], options={ - "unique_together": {("vulnerability", "reference", "scoring_system")}, + "unique_together": {("vulnerability", "reference", "scoring_system", "value")}, }, ), + migrations.AddIndex( + model_name="packagerelatedvulnerability", + index=models.Index(fields=["fix"], name="vulnerabili_fix_100a33_idx"), + ), + migrations.AlterUniqueTogether( + name="packagerelatedvulnerability", + unique_together={("package", "vulnerability")}, + ), migrations.AlterUniqueTogether( name="package", - unique_together={("name", "namespace", "type", "version", "qualifiers", "subpath")}, + unique_together={("type", "namespace", "name", "version", "qualifiers", "subpath")}, ), ] From 86d175e436139114925ca34266ca68e41f11afea Mon Sep 17 00:00:00 2001 From: Hritik Vijay Date: Tue, 25 Jan 2022 16:19:08 +0530 Subject: [PATCH 35/40] Partition without numerical index Signed-off-by: Hritik Vijay --- vulnerabilities/importers/nginx.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/vulnerabilities/importers/nginx.py b/vulnerabilities/importers/nginx.py index 8731785e8..188024430 100644 --- a/vulnerabilities/importers/nginx.py +++ b/vulnerabilities/importers/nginx.py @@ -86,7 +86,7 @@ def to_advisory_data( qualifiers = {} - affected_version_range = vulnerable.partition(":")[2] + _, _, affected_version_range = vulnerable.partition(":") if "nginx/Windows" in affected_version_range: qualifiers["os"] = "windows" affected_version_range = affected_version_range.replace("nginx/Windows", "") From bd390d4f6eac7f438b6163b717f5a39abf88957c Mon Sep 17 00:00:00 2001 From: Hritik Vijay Date: Wed, 26 Jan 2022 02:17:37 +0530 Subject: [PATCH 36/40] Add get_fixed_purl in AffectedPackage, fix from_dict from_dict is a factory, should be a classmethod Signed-off-by: Hritik Vijay --- vulnerabilities/data_source.py | 32 +++++++++++++++++++------------- 1 file changed, 19 insertions(+), 13 deletions(-) diff --git a/vulnerabilities/data_source.py b/vulnerabilities/data_source.py index 9794a8663..6e4a13537 100644 --- a/vulnerabilities/data_source.py +++ b/vulnerabilities/data_source.py @@ -66,14 +66,12 @@ def to_dict(self): "value": self.value, } - @staticmethod - def from_dict(severity: dict): + @classmethod + def from_dict(cls, severity: dict): """ Return a VulnerabilitySeverity object from dict generated by self.to_dict """ - return VulnerabilitySeverity( - system=SCORING_SYSTEMS[severity["system"]], value=severity["value"] - ) + return cls(system=SCORING_SYSTEMS[severity["system"]], value=severity["value"]) @dataclasses.dataclass(order=True) @@ -101,12 +99,12 @@ def to_dict(self): "severities": [severity.to_dict() for severity in self.severities], } - @staticmethod - def from_dict(ref: dict): + @classmethod + def from_dict(cls, ref: dict): """ Return a Reference object from dict generated by self.to_dict """ - return Reference( + return cls( reference_id=ref["reference_id"], url=ref["url"], severities=[ @@ -130,8 +128,16 @@ def __post_init__(self): if self.package.version: raise ValueError - @staticmethod - def merge(affected_packages: Iterable): + def get_fixed_purl(self): + """ + Return PackageURL corresponding to object's fixed_version + """ + fixed_version = self.fixed_version + fixed_purl = self.package._replace(version=str(fixed_version)) + return fixed_purl + + @classmethod + def merge(cls, affected_packages: Iterable): """ Return a tuple with all attributes of AffectedPackage as a set for all values in the given iterable of AffectedPackage @@ -166,8 +172,8 @@ def to_dict(self): "fixed_version": str(self.fixed_version) if self.fixed_version else None, } - @staticmethod - def from_dict(affected_pkg: dict): + @classmethod + def from_dict(cls, affected_pkg: dict): """ Return an AffectedPackage object from dict generated by self.to_dict """ @@ -178,7 +184,7 @@ def from_dict(affected_pkg: dict): # TODO: revisit after https://github.com/nexB/univers/issues/10 fixed_version = affected_version_range.version_class(fixed_version) - return AffectedPackage( + return cls( package=package, affected_version_range=affected_version_range, fixed_version=fixed_version, From 6ab6f06040271f09ec3bc083f253fc3d363188d5 Mon Sep 17 00:00:00 2001 From: Hritik Vijay <7457065+Hritik14@users.noreply.github.com> Date: Wed, 26 Jan 2022 03:09:24 +0530 Subject: [PATCH 37/40] Add FIXME about wrong filters in PackageSearchView The entire view is messed up. Will probably need a rewrite Co-authored-by: Philippe Ombredanne Signed-off-by: Hritik Vijay --- vulnerabilities/views.py | 2 ++ 1 file changed, 2 insertions(+) diff --git a/vulnerabilities/views.py b/vulnerabilities/views.py index 5d5ea686e..98ac12fe7 100644 --- a/vulnerabilities/views.py +++ b/vulnerabilities/views.py @@ -73,6 +73,8 @@ def request_to_queryset(request): return list( models.Package.objects.all() + # FIXME: This filter is wrong and ignoring most of the fields needed for a + # proper package lookup: type/namespace/name@version?qualifiers and so on .filter(name__icontains=package_name, type__icontains=package_type) .annotate( vulnerability_count=Count( From 33d0c0d9ce25c97289013f1f4375dddc35c53559 Mon Sep 17 00:00:00 2001 From: Hritik Vijay Date: Tue, 25 Jan 2022 17:55:50 +0530 Subject: [PATCH 38/40] Teeny weeny fixes Fixes are in accordance with the final review for this PR done at : https://github.com/nexB/vulnerablecode/pull/525#pullrequestreview-862081431 Signed-off-by: Hritik Vijay Co-authored-by: Philippe Ombredanne --- vulnerabilities/data_source.py | 1 - vulnerabilities/importers/nginx.py | 4 +-- vulnerabilities/improve_runner.py | 19 +++++------ vulnerabilities/improvers/__init__.py | 4 +-- vulnerabilities/improvers/default.py | 32 ++++++++++++------- .../management/commands/improve.py | 6 ++-- vulnerabilities/models.py | 14 +++++--- 7 files changed, 45 insertions(+), 35 deletions(-) diff --git a/vulnerabilities/data_source.py b/vulnerabilities/data_source.py index 6e4a13537..02d9ff498 100644 --- a/vulnerabilities/data_source.py +++ b/vulnerabilities/data_source.py @@ -158,7 +158,6 @@ def merge(cls, affected_packages: Iterable): fixed_versions.add(pkg.fixed_version) purls.add(pkg.package) if len(purls) > 1: - print(affected_packages) raise TypeError("Cannot merge with different purls", purls) return purls.pop(), affected_version_ranges, fixed_versions diff --git a/vulnerabilities/importers/nginx.py b/vulnerabilities/importers/nginx.py index 188024430..a24f9c457 100644 --- a/vulnerabilities/importers/nginx.py +++ b/vulnerabilities/importers/nginx.py @@ -58,7 +58,7 @@ class NginxDataSourceConfiguration(DataSourceConfiguration): class NginxDataSource(DataSource): CONFIG_CLASS = NginxDataSourceConfiguration - url = "http://nginx.org/en/security_advisories.html" + url = "https://nginx.org/en/security_advisories.html" def advisory_data(self) -> Iterable[AdvisoryData]: data = requests.get(self.url).content @@ -239,7 +239,7 @@ def set_api(self): while self.version_api.cache["nginx/nginx"]: version = self.version_api.cache["nginx/nginx"].pop() normalized_version = Version( - version.value.replace("release-", ""), version.release_date + value=version.value.replace("release-", ""), release_date=version.release_date ) normalized_versions.add(normalized_version) self.version_api.cache["nginx/nginx"] = normalized_versions diff --git a/vulnerabilities/improve_runner.py b/vulnerabilities/improve_runner.py index c43d4a646..0718a1449 100644 --- a/vulnerabilities/improve_runner.py +++ b/vulnerabilities/improve_runner.py @@ -16,7 +16,8 @@ class ImproveRunner: - """ImproveRunner is responsible for populating the database with any + """ + ImproveRunner is responsible for populating the database with any consumable data. It does so in its ``run`` method by invoking the given improver and parsing the returned Inferences into proper database fields """ @@ -102,14 +103,14 @@ def process_inferences(inferences: List[Inference], advisory: Advisory, improver def _get_or_create_package(p: PackageURL) -> Tuple[models.Package, bool]: query_kwargs = {} # TODO: this should be revisited as this should best be a model or manager method... and possibly streamlined - for key, val in p.to_dict().items(): - if not val: - if key == "qualifiers": - query_kwargs[key] = {} - else: - query_kwargs[key] = "" - else: - query_kwargs[key] = val + query_kwargs = dict( + type=p.type or "", + namespace=p.namespace or "", + name=p.name or "", + version=p.version or "", + qualifiers=p.qualifiers or {}, + subpath=p.subpath or "", + ) return models.Package.objects.get_or_create(**query_kwargs) diff --git a/vulnerabilities/improvers/__init__.py b/vulnerabilities/improvers/__init__.py index b214f88e6..6f036a63c 100644 --- a/vulnerabilities/improvers/__init__.py +++ b/vulnerabilities/improvers/__init__.py @@ -1,5 +1,5 @@ -from . import default -from .. import importers +from vulnerabilities.improvers import default +from vulnerabilities import importers IMPROVER_REGISTRY = [default.DefaultImprover, importers.nginx.NginxBasicImprover] diff --git a/vulnerabilities/improvers/default.py b/vulnerabilities/improvers/default.py index f44a7d2f7..b72831c7e 100644 --- a/vulnerabilities/improvers/default.py +++ b/vulnerabilities/improvers/default.py @@ -40,7 +40,7 @@ def get_inferences(self, advisory_data: AdvisoryData) -> Iterable[Inference]: def get_exact_purls(affected_package: AffectedPackage) -> (List[PackageURL], PackageURL): """ - Return purls for fixed and affected packages contained in the given + Return a list of affected purls and the fixed package found in the ``affected_package`` AffectedPackage disregarding any ranges. Only exact version constraints (ie with an equality) are considered @@ -52,18 +52,26 @@ def get_exact_purls(affected_package: AffectedPackage) -> (List[PackageURL], Pac ... "affected_version_range": vers, ... "fixed_version": "5.0.0" ... }) - >>> get_exact_purls(affected_package) - ([PackageURL(type='turtle', namespace=None, name='green', version='2.0.0', qualifiers={}, subpath=None)], PackageURL(type='turtle', namespace=None, name='green', version='5.0.0', qualifiers={}, subpath=None)) + >>> got = get_exact_purls(affected_package) + >>> expected = ( + ... [PackageURL(type='turtle', namespace=None, name='green', version='2.0.0', qualifiers={}, subpath=None)], + ... PackageURL(type='turtle', namespace=None, name='green', version='5.0.0', qualifiers={}, subpath=None) + ... ) + >>> assert expected == got """ - affected_purls = set() - all_constraints = affected_package.affected_version_range.constraints - for constraint in all_constraints: - if constraint.comparator in ["=", "<=", ">="]: - affected_purl = affected_package.package._replace(version=str(constraint.version)) - affected_purls.add(affected_purl) - affected_purls = list(affected_purls) - fixed_version = affected_package.fixed_version - fixed_purl = affected_package.package._replace(version=str(fixed_version)) + vr = affected_package.affected_version_range + # We need ``if c`` below because univers returns None as version + # in case of vers:nginx/* + # TODO: Revisit after https://github.com/nexB/univers/issues/33 + range_versions = [c.version for c in vr.constraints if c] + resolved_versions = [v for v in range_versions if v and v in vr] + + affected_purls = [] + for version in resolved_versions: + affected_purl = affected_package.package._replace(version=str(version)) + affected_purls.append(affected_purl) + + fixed_purl = affected_package.get_fixed_purl() return affected_purls, fixed_purl diff --git a/vulnerabilities/management/commands/improve.py b/vulnerabilities/management/commands/improve.py index 1c61f26b6..66bce82e9 100644 --- a/vulnerabilities/management/commands/improve.py +++ b/vulnerabilities/management/commands/improve.py @@ -65,10 +65,8 @@ def handle(self, *args, **options): self.improve_data(valid_sources(sources)) def list_sources(self): - improvers = [ - f"{improver.__module__}.{improver.__qualname__}" for improver in IMPROVER_REGISTRY - ] - self.stdout.write("Vulnerability data can be processed by these available improvers:") + improvers = [improver.qualified_name() for improver in IMPROVER_REGISTRY] + self.stdout.write("Vulnerability data can be processed by these available improvers:\n") self.stdout.write("\n".join(improvers)) def improve_data(self, improvers): diff --git a/vulnerabilities/models.py b/vulnerabilities/models.py index c00bdeaf5..f5ebd344d 100644 --- a/vulnerabilities/models.py +++ b/vulnerabilities/models.py @@ -48,8 +48,8 @@ class Vulnerability(models.Model): """ - A software vulnerability with minimal information. Identifiers other than CVE ID are stored as - VulnerabilityReference. + A software vulnerability with minimal information. Unique identifiers are + stored as ``Alias``. """ vulnerability_id = models.UUIDField( @@ -114,7 +114,11 @@ def severities(self): return VulnerabilitySeverity.objects.filter(reference=self.id) class Meta: - unique_together = ("vulnerability", "url", "reference_id") + unique_together = ( + "vulnerability", + "url", + "reference_id", + ) def __str__(self): reference_id = " {self.reference_id}" if self.reference_id else "" @@ -229,7 +233,7 @@ def update_or_create(self): Create if doesn't exist """ try: - existing = self.__class__.objects.get( + existing = PackageRelatedVulnerability.objects.get( vulnerability=self.vulnerability, package=self.package ) if self.confidence > existing.confidence: @@ -244,7 +248,7 @@ def update_or_create(self): ) except self.DoesNotExist: - self.__class__.objects.create( + PackageRelatedVulnerability.objects.create( vulnerability=self.vulnerability, created_by=self.created_by, package=self.package, From 21c6178a31d04d0a6de1575971f85903c42088a2 Mon Sep 17 00:00:00 2001 From: Philippe Ombredanne Date: Tue, 25 Jan 2022 22:46:08 +0100 Subject: [PATCH 39/40] Correct file format Signed-off-by: Philippe Ombredanne --- vulnerabilities/views.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/vulnerabilities/views.py b/vulnerabilities/views.py index 98ac12fe7..0c916f9bb 100644 --- a/vulnerabilities/views.py +++ b/vulnerabilities/views.py @@ -74,7 +74,7 @@ def request_to_queryset(request): return list( models.Package.objects.all() # FIXME: This filter is wrong and ignoring most of the fields needed for a - # proper package lookup: type/namespace/name@version?qualifiers and so on + # proper package lookup: type/namespace/name@version?qualifiers and so on .filter(name__icontains=package_name, type__icontains=package_type) .annotate( vulnerability_count=Count( From 0e74bea595ae307d02bb7f0932613393e6d84e48 Mon Sep 17 00:00:00 2001 From: Philippe Ombredanne Date: Tue, 25 Jan 2022 23:06:10 +0100 Subject: [PATCH 40/40] Use univers 30+. Bump lxml Signed-off-by: Philippe Ombredanne --- requirements.txt | 5 ++--- 1 file changed, 2 insertions(+), 3 deletions(-) diff --git a/requirements.txt b/requirements.txt index a44b70b80..79f88a040 100644 --- a/requirements.txt +++ b/requirements.txt @@ -8,13 +8,12 @@ django-widget-tweaks>=1.4.8 packageurl-python>=0.9.4 binaryornot>=0.4.4 GitPython>=3.1.17 -#univers>=21.4.16.6 -git+https://github.com/nexB/univers +univers>=30.0.0 saneyaml>=0.5.2 beautifulsoup4>=4.9.3 python-dateutil>=2.8.1 toml>=0.10.2 -lxml>=4.6.3 +lxml>=4.6.4 gunicorn>=20.1.0 django-environ==0.4.5 defusedxml==0.7.1