diff --git a/docs/source/contributing.rst b/docs/source/contributing.rst index fa6e7075b..9c9b79a59 100644 --- a/docs/source/contributing.rst +++ b/docs/source/contributing.rst @@ -454,7 +454,10 @@ Improver ^^^^^^^^^ All the Improvers must inherit from ``Improver`` superclass and implement the -``interesting_advisories`` property and the ``get_inferences`` method. +``interesting_advisories`` property and the ``get_inferences`` method, +unless they are not improving advisory data. In this case they should override +``is_custom_improver`` property to True and implement the ``run`` method. + Writing an improver --------------------- diff --git a/vulnerabilities/improve_runner.py b/vulnerabilities/improve_runner.py index b6658d9ea..952991c82 100644 --- a/vulnerabilities/improve_runner.py +++ b/vulnerabilities/improve_runner.py @@ -39,14 +39,23 @@ class ImproveRunner: def __init__(self, improver_class): self.improver_class = improver_class - def run(self) -> None: + def __run_advisory_improver(self) -> None: improver = self.improver_class() - logger.info(f"Running improver: {improver.qualified_name}") for advisory in improver.interesting_advisories: inferences = improver.get_inferences(advisory_data=advisory.to_advisory_data()) process_inferences( inferences=inferences, advisory=advisory, improver_name=improver.qualified_name ) + + def __run_custom_improver(self) -> None: + self.improver_class().run() + + def run(self) -> None: + logger.info(f"Running improver: {self.improver_class().qualified_name}") + if self.improver_class().is_custom_improver: + self.__run_custom_improver() + else: + self.__run_advisory_improver() logger.info("Finished improving using %s.", self.improver_class.qualified_name) diff --git a/vulnerabilities/improver.py b/vulnerabilities/improver.py index 9b46cce0a..6f8dcad0e 100644 --- a/vulnerabilities/improver.py +++ b/vulnerabilities/improver.py @@ -110,8 +110,16 @@ class Improver: required to override the ``interesting_advisories`` property method to return a QuerySet of ``Advisory`` objects. These advisories are then passed to ``get_inferences`` method which is responsible for returning an iterable of ``Inferences`` for that particular ``Advisory`` + + Some improvers are related to already imported data, but not related the advisories directly + Such improver must set 'custom_improver' to true and implement the run method in the improver file. + """ + @classproperty + def is_custom_improver(cls): + return False + @classproperty def qualified_name(cls): """ @@ -135,3 +143,11 @@ def get_inferences(self, advisory_data: AdvisoryData) -> Iterable[Inference]: Subclasses must implement. """ raise NotImplementedError + + def run(self) -> None: + """ + Runs a custom Improver which doesn't improve the advisory data, and needs custom action. + + Subclasses must implement. + """ + raise NotImplementedError diff --git a/vulnerabilities/improvers/__init__.py b/vulnerabilities/improvers/__init__.py index 629ece67f..856af6ba6 100644 --- a/vulnerabilities/improvers/__init__.py +++ b/vulnerabilities/improvers/__init__.py @@ -7,6 +7,7 @@ # See https://aboutcode.org for more information about nexB OSS projects. # +from vulnerabilities.improvers import add_commit_ref from vulnerabilities.improvers import default from vulnerabilities.improvers import valid_versions @@ -24,6 +25,7 @@ valid_versions.IstioImprover, valid_versions.DebianOvalImprover, valid_versions.UbuntuOvalImprover, + add_commit_ref.CommitRelationImprover, ] IMPROVERS_REGISTRY = {x.qualified_name: x for x in IMPROVERS_REGISTRY} diff --git a/vulnerabilities/improvers/add_commit_ref.py b/vulnerabilities/improvers/add_commit_ref.py new file mode 100644 index 000000000..9ffdbc1d4 --- /dev/null +++ b/vulnerabilities/improvers/add_commit_ref.py @@ -0,0 +1,78 @@ +# +# Copyright (c) nexB Inc. and others. All rights reserved. +# VulnerableCode is a trademark of nexB Inc. +# SPDX-License-Identifier: Apache-2.0 +# See http://www.apache.org/licenses/LICENSE-2.0 for the license text. +# See https://github.com/nexB/vulnerablecode for support or download. +# See https://aboutcode.org for more information about nexB OSS projects. +# + +import logging +import re + +from django.db import transaction +from django.db.models.query import QuerySet + +from vulnerabilities.improver import Improver +from vulnerabilities.models import Commit +from vulnerabilities.models import VulnerabilityReference + +logger = logging.getLogger(__name__) + +""" +Improver that looks for commits related to a vulnerability +""" + + +class CommitRelationImprover(Improver): + """ + Detect related commits to an advisory by applying a REGEX. + """ + + def __init__(self): + # using cached insertion for memory efficiency + self.insert_chunk_size = 500 + self.commit_instances = [] + + @property + def is_custom_improver(cls): + return True + + @property + def interesting_references(self) -> QuerySet: + # Regex base coming from: https://github.com/secureIT-project/CVEfixes/ + # Below regex is the compatible form for Postgresql + # For now, we are only interested in Bitbucket, Github and Gitlab sources + # TODO: Add other sources such as Apache related sources, Linux kernel, etc. + git_url = r"((https|http)://(bitbucket|github|gitlab)\.(org|com)/([^/]+)/([^/]*))/(commit|commits)/(\w+)#?" + return VulnerabilityReference.objects.filter( + url__iregex=git_url, + ) + + def __generate_instance(self): + commit_pattern = r"(((?P(https|http):\/\/(bitbucket|github|gitlab)\.(org|com)\/(?P[^\/]+)\/(?P[^\/]*))\/(commit|commits)\/(?P\w+)#?)+)" + for ref in self.interesting_references: + commit_groups = re.search(commit_pattern, ref.url) + yield Commit( + reference=ref, + hash=commit_groups.group("hash"), + ) + + def __insert_bulk(self) -> None: + if len(self.commit_instances) == 0: + return + + with transaction.atomic(): + # Ignore_conflicts allows mass + Commit.objects.bulk_create(self.commit_instances, ignore_conflicts=True) + + # Empty the cache buffer further inserts + self.commit_instances.clear() + + def run(self) -> None: + for i, commit in enumerate(self.__generate_instance()): + self.commit_instances.append(commit) + if len(self.commit_instances) >= self.insert_chunk_size: + self.__insert_bulk() + # Add remaining commits + self.__insert_bulk() diff --git a/vulnerabilities/migrations/0040_commit.py b/vulnerabilities/migrations/0040_commit.py new file mode 100644 index 000000000..623220578 --- /dev/null +++ b/vulnerabilities/migrations/0040_commit.py @@ -0,0 +1,45 @@ +# Generated by Django 4.1.7 on 2023-07-05 17:38 + +from django.db import migrations, models +import django.db.models.deletion + + +class Migration(migrations.Migration): + + dependencies = [ + ("vulnerabilities", "0039_alter_vulnerabilityseverity_scoring_system"), + ] + + operations = [ + migrations.CreateModel( + name="Commit", + fields=[ + ( + "id", + models.AutoField( + auto_created=True, primary_key=True, serialize=False, verbose_name="ID" + ), + ), + ( + "hash", + models.CharField(blank=True, help_text="Hash of the commit", max_length=1024), + ), + ( + "chain_urls", + models.JSONField( + blank=True, default=list, help_text="List of URLS used to reach the commit" + ), + ), + ( + "reference", + models.ForeignKey( + on_delete=django.db.models.deletion.CASCADE, + to="vulnerabilities.vulnerabilityreference", + ), + ), + ], + options={ + "ordering": ["reference_id"], + }, + ), + ] diff --git a/vulnerabilities/models.py b/vulnerabilities/models.py index 7b6c9fcc6..867a1af47 100644 --- a/vulnerabilities/models.py +++ b/vulnerabilities/models.py @@ -886,3 +886,33 @@ class ApiUser(UserModel): class Meta: proxy = True + + +class Commit(models.Model): + """ + Reference to a commit(s) that fixed the vulnerability + """ + + reference = models.ForeignKey( + VulnerabilityReference, + on_delete=models.CASCADE, + ) + + hash = models.CharField( + max_length=1024, + help_text="Hash of the commit", + blank=True, + ) + + chain_urls = models.JSONField( + default=list, + help_text="List of URLS used to reach the commit", + blank=True, + ) + + class Meta: + ordering = ["reference_id"] + + def __str__(self): + reference_id = f" {self.reference_id}" if self.reference_id else "" + return f"{self.url}{reference_id}"