From 8519b0d2daa0d73d5d6313e30c50c269f10d27f2 Mon Sep 17 00:00:00 2001 From: Tushar Goel Date: Fri, 20 Dec 2024 23:00:19 +0530 Subject: [PATCH 01/10] Add models for CodeFix Signed-off-by: Tushar Goel --- vulnerabilities/migrations/0085_codefix.py | 60 ++++++++++++++++++++++ vulnerabilities/models.py | 29 +++++++++++ 2 files changed, 89 insertions(+) create mode 100644 vulnerabilities/migrations/0085_codefix.py diff --git a/vulnerabilities/migrations/0085_codefix.py b/vulnerabilities/migrations/0085_codefix.py new file mode 100644 index 000000000..cbe162845 --- /dev/null +++ b/vulnerabilities/migrations/0085_codefix.py @@ -0,0 +1,60 @@ +# Generated by Django 4.2.16 on 2024-12-20 17:29 + +from django.db import migrations, models +import django.db.models.deletion + + +class Migration(migrations.Migration): + + dependencies = [ + ("vulnerabilities", "0084_alter_package_options_package_version_rank"), + ] + + operations = [ + migrations.CreateModel( + name="CodeFix", + fields=[ + ( + "id", + models.AutoField( + auto_created=True, primary_key=True, serialize=False, verbose_name="ID" + ), + ), + ("commits", models.JSONField(blank=True, default=list)), + ("pulls", models.JSONField(blank=True, default=list)), + ("downloads", models.JSONField(blank=True, default=list)), + ("patch", models.TextField(blank=True, null=True)), + ("notes", models.TextField(blank=True, null=True)), + ("references", models.JSONField(blank=True, default=list)), + ("status_reviewed", models.BooleanField(default=False)), + ("base_commit", models.CharField(blank=True, max_length=255, null=True)), + ("created_at", models.DateTimeField(auto_now_add=True)), + ("updated_at", models.DateTimeField(auto_now=True)), + ( + "applies_to_versions", + models.ManyToManyField( + blank=True, related_name="fixes", to="vulnerabilities.package" + ), + ), + ( + "base_version", + models.ForeignKey( + blank=True, + null=True, + on_delete=django.db.models.deletion.SET_NULL, + related_name="base_version_changes", + to="vulnerabilities.package", + ), + ), + ( + "vulnerabilities", + models.ManyToManyField( + blank=True, related_name="codefixes", to="vulnerabilities.vulnerability" + ), + ), + ], + options={ + "abstract": False, + }, + ), + ] diff --git a/vulnerabilities/models.py b/vulnerabilities/models.py index 6248e1e47..610d35c5f 100644 --- a/vulnerabilities/models.py +++ b/vulnerabilities/models.py @@ -1581,3 +1581,32 @@ class Exploit(models.Model): @property def get_known_ransomware_campaign_use_type(self): return "Known" if self.known_ransomware_campaign_use else "Unknown" + + +class CodeChange(models.Model): + commits = models.JSONField(blank=True, default=list) + pulls = models.JSONField(blank=True, default=list) + downloads = models.JSONField(blank=True, default=list) + patch = models.TextField(blank=True, null=True) + notes = models.TextField(blank=True, null=True) + references = models.JSONField(blank=True, default=list) + status_reviewed = models.BooleanField(default=False) + base_version = models.ForeignKey( + "Package", + null=True, + blank=True, + on_delete=models.SET_NULL, + related_name="base_version_changes", + ) + base_commit = models.CharField(max_length=255, blank=True, null=True) + + created_at = models.DateTimeField(auto_now_add=True) + updated_at = models.DateTimeField(auto_now=True) + + class Meta: + abstract = True + + +class CodeFix(CodeChange): + vulnerabilities = models.ManyToManyField("Vulnerability", related_name="codefixes", blank=True) + applies_to_versions = models.ManyToManyField("Package", related_name="fixes", blank=True) From 6f984c3e7c17b53a0b22168692fc740686460b60 Mon Sep 17 00:00:00 2001 From: Tushar Goel Date: Fri, 20 Dec 2024 23:33:10 +0530 Subject: [PATCH 02/10] Add pipeline to collect fix commit Signed-off-by: Tushar Goel --- vulnerabilities/pipelines/collect_commits.py | 112 +++++++++++++++++++ 1 file changed, 112 insertions(+) create mode 100644 vulnerabilities/pipelines/collect_commits.py diff --git a/vulnerabilities/pipelines/collect_commits.py b/vulnerabilities/pipelines/collect_commits.py new file mode 100644 index 000000000..61f60b2a2 --- /dev/null +++ b/vulnerabilities/pipelines/collect_commits.py @@ -0,0 +1,112 @@ +# +# Copyright (c) nexB Inc. and others. All rights reserved. +# VulnerableCode is a trademark of nexB Inc. +# SPDX-License-Identifier: Apache-2.0 +# See http://www.apache.org/licenses/LICENSE-2.0 for the license text. +# See https://github.com/aboutcode-org/vulnerablecode for support or download. +# See https://aboutcode.org for more information about nexB OSS projects. +# + +from aboutcode.pipeline import LoopProgress + +from vulnerabilities.models import CodeFix +from vulnerabilities.models import Package +from vulnerabilities.models import VulnerabilityReference +from vulnerabilities.pipelines import VulnerableCodePipeline +from vulnerabilities.utils import normalize_purl + + +class CollectFixCommitsPipeline(VulnerableCodePipeline): + """ + Improver pipeline to scout References and create CodeFix entries. + """ + + pipeline_id = "collect_fix_commits" + license_expression = None + + @classmethod + def steps(cls): + return (cls.collect_and_store_fix_commits,) + + def collect_and_store_fix_commits(self): + references = VulnerabilityReference.objects.prefetch_related("vulnerabilities").distinct() + + self.log(f"Processing {references.count():,d} references to collect fix commits.") + + created_fix_count = 0 + progress = LoopProgress(total_iterations=references.count(), logger=self.log) + for reference in progress.iter(references.paginated(per_page=500)): + for vulnerability in reference.vulnerabilities.all(): + package_urls = self.extract_package_urls(reference) + commit_id = self.extract_commit_id(reference.url) + + if commit_id and package_urls: + for purl in package_urls: + normalized_purl = normalize_purl(purl) + package = self.get_or_create_package(normalized_purl) + codefix = self.create_codefix_entry( + vulnerability=vulnerability, + package=package, + commit_id=commit_id, + reference=reference.url, + ) + if codefix: + created_fix_count += 1 + + self.log(f"Successfully created {created_fix_count:,d} CodeFix entries.") + + def extract_package_urls(self, reference): + """ + Extract Package URLs from a reference. + Returns a list of Package URLs inferred from the reference. + """ + urls = [] + if "github" in reference.url: + parts = reference.url.split("/") + if len(parts) >= 5: + namespace = parts[-3] + name = parts[-2] + commit = parts[-1] + if commit: + urls.append(f"pkg:github/{namespace}/{name}@{commit}") + return urls + + def extract_commit_id(self, url): + """ + Extract a commit ID from a URL, if available. + """ + if "github" in url: + parts = url.split("/") + return parts[-1] if len(parts) > 0 else None + return None + + def get_or_create_package(self, purl): + """ + Get or create a Package object from a Package URL. + """ + try: + package, _ = Package.objects.get_or_create_from_purl(purl) + return package + except Exception as e: + self.log(f"Error creating package from purl {purl}: {e}") + return None + + def create_codefix_entry(self, vulnerability, package, commit_id, reference): + """ + Create a CodeFix entry associated with the given vulnerability and package. + """ + try: + codefix, created = CodeFix.objects.get_or_create( + base_version=package, + defaults={ + "commits": [commit_id], + "references": [reference], + }, + ) + if created: + codefix.vulnerabilities.add(vulnerability) + codefix.save() + return codefix + except Exception as e: + self.log(f"Error creating CodeFix entry: {e}") + return None From bcdc572515bbe437a36415b9e5d0314559a0c66c Mon Sep 17 00:00:00 2001 From: Tushar Goel Date: Tue, 24 Dec 2024 00:54:57 +0530 Subject: [PATCH 03/10] Address review comments Signed-off-by: Tushar Goel --- vulnerabilities/models.py | 61 ++++-- vulnerabilities/pipelines/collect_commits.py | 187 ++++++++++++++----- 2 files changed, 193 insertions(+), 55 deletions(-) diff --git a/vulnerabilities/models.py b/vulnerabilities/models.py index 610d35c5f..ab93084d9 100644 --- a/vulnerabilities/models.py +++ b/vulnerabilities/models.py @@ -1584,29 +1584,64 @@ def get_known_ransomware_campaign_use_type(self): class CodeChange(models.Model): - commits = models.JSONField(blank=True, default=list) - pulls = models.JSONField(blank=True, default=list) - downloads = models.JSONField(blank=True, default=list) - patch = models.TextField(blank=True, null=True) - notes = models.TextField(blank=True, null=True) - references = models.JSONField(blank=True, default=list) - status_reviewed = models.BooleanField(default=False) + """ + Abstract base model representing a change in code, either introducing or fixing a vulnerability. + This includes details about commits, patches, and related metadata. + """ + + commits = models.JSONField( + blank=True, + default=list, + help_text="List of commit identifiers associated with the code change.", + ) + pulls = models.JSONField( + blank=True, + default=list, + help_text="List of pull request URLs associated with the code change.", + ) + downloads = models.JSONField( + blank=True, default=list, help_text="List of download URLs for the patched code." + ) + patch = models.TextField( + blank=True, null=True, help_text="The code change in patch format (e.g., git diff)." + ) + notes = models.TextField( + blank=True, null=True, help_text="Additional notes or instructions about the code change." + ) + references = models.JSONField( + blank=True, default=list, help_text="External references related to this code change." + ) + status_reviewed = models.BooleanField( + default=False, help_text="Indicates if the code change has been reviewed." + ) base_version = models.ForeignKey( "Package", null=True, blank=True, on_delete=models.SET_NULL, related_name="base_version_changes", + help_text="The base version of the package to which this code change applies.", + ) + base_commit = models.CharField( + max_length=255, + blank=True, + null=True, + help_text="The commit ID representing the state of the code before applying the fix or change.", + ) + created_at = models.DateTimeField( + auto_now_add=True, help_text="Timestamp indicating when the code change was created." + ) + updated_at = models.DateTimeField( + auto_now=True, help_text="Timestamp indicating when the code change was last updated." ) - base_commit = models.CharField(max_length=255, blank=True, null=True) - - created_at = models.DateTimeField(auto_now_add=True) - updated_at = models.DateTimeField(auto_now=True) class Meta: abstract = True class CodeFix(CodeChange): - vulnerabilities = models.ManyToManyField("Vulnerability", related_name="codefixes", blank=True) - applies_to_versions = models.ManyToManyField("Package", related_name="fixes", blank=True) + package_vulnerabilities = models.ManyToManyField( + "AffectedByPackageRelatedVulnerability", + related_name="code_fixes", + help_text="The vulnerabilities fixed by this code change.", + ) diff --git a/vulnerabilities/pipelines/collect_commits.py b/vulnerabilities/pipelines/collect_commits.py index 61f60b2a2..44e91be31 100644 --- a/vulnerabilities/pipelines/collect_commits.py +++ b/vulnerabilities/pipelines/collect_commits.py @@ -8,12 +8,36 @@ # from aboutcode.pipeline import LoopProgress +from packageurl.contrib.url2purl import url2purl from vulnerabilities.models import CodeFix from vulnerabilities.models import Package from vulnerabilities.models import VulnerabilityReference from vulnerabilities.pipelines import VulnerableCodePipeline -from vulnerabilities.utils import normalize_purl + + +def extract_commit_id(url): + """ + Extract a commit ID from a URL, if available. + Supports different URL structures for commit references. + + >>> extract_commit_id("https://github.com/hedgedoc/hedgedoc/commit/c1789474020a6d668d616464cb2da5e90e123f65") + 'c1789474020a6d668d616464cb2da5e90e123f65' + """ + if "/commit/" in url: + parts = url.split("/") + if len(parts) > 1 and parts[-2] == "commit": + return parts[-1] + return None + + +def is_reference_already_processed(reference_url, commit_id): + """ + Check if a reference and commit ID pair already exists in a CodeFix entry. + """ + return CodeFix.objects.filter( + references__contains=[reference_url], commits__contains=[commit_id] + ).exists() class CollectFixCommitsPipeline(VulnerableCodePipeline): @@ -37,48 +61,33 @@ def collect_and_store_fix_commits(self): progress = LoopProgress(total_iterations=references.count(), logger=self.log) for reference in progress.iter(references.paginated(per_page=500)): for vulnerability in reference.vulnerabilities.all(): - package_urls = self.extract_package_urls(reference) - commit_id = self.extract_commit_id(reference.url) - - if commit_id and package_urls: - for purl in package_urls: - normalized_purl = normalize_purl(purl) - package = self.get_or_create_package(normalized_purl) - codefix = self.create_codefix_entry( - vulnerability=vulnerability, - package=package, - commit_id=commit_id, - reference=reference.url, - ) - if codefix: - created_fix_count += 1 + vcs_url = normalize_vcs_url(reference.url) + commit_id = extract_commit_id(reference.url) - self.log(f"Successfully created {created_fix_count:,d} CodeFix entries.") + if not commit_id or not vcs_url: + continue - def extract_package_urls(self, reference): - """ - Extract Package URLs from a reference. - Returns a list of Package URLs inferred from the reference. - """ - urls = [] - if "github" in reference.url: - parts = reference.url.split("/") - if len(parts) >= 5: - namespace = parts[-3] - name = parts[-2] - commit = parts[-1] - if commit: - urls.append(f"pkg:github/{namespace}/{name}@{commit}") - return urls - - def extract_commit_id(self, url): - """ - Extract a commit ID from a URL, if available. - """ - if "github" in url: - parts = url.split("/") - return parts[-1] if len(parts) > 0 else None - return None + # Skip if already processed + if is_reference_already_processed(reference.url, commit_id): + self.log( + f"Skipping already processed reference: {reference.url} with commit {commit_id}" + ) + continue + purl = url2purl(vcs_url) + if not purl: + self.log(f"Could not create purl from url: {vcs_url}") + continue + package = self.get_or_create_package(purl) + codefix = self.create_codefix_entry( + vulnerability=vulnerability, + package=package, + commit_id=commit_id, + reference=reference.url, + ) + if codefix: + created_fix_count += 1 + + self.log(f"Successfully created {created_fix_count:,d} CodeFix entries.") def get_or_create_package(self, purl): """ @@ -109,4 +118,98 @@ def create_codefix_entry(self, vulnerability, package, commit_id, reference): return codefix except Exception as e: self.log(f"Error creating CodeFix entry: {e}") - return None + return + + +PLAIN_URLS = ( + "https://", + "http://", +) + +VCS_URLS = ( + "git://", + "git+git://", + "git+https://", + "git+http://", + "hg://", + "hg+http://", + "hg+https://", + "svn://", + "svn+https://", + "svn+http://", +) + + +def normalize_vcs_url(repo_url, vcs_tool=None): + """ + Return a normalized vcs_url version control URL given some `repo_url` and an + optional `vcs_tool` hint (such as 'git', 'hg', etc. + + Handles shortcuts for GitHub, GitHub gist, Bitbucket, or GitLab repositories + and more using the same approach as npm install: + + See https://docs.npmjs.com/files/package.json#repository + or https://getcomposer.org/doc/05-repositories.md + + This is done here in npm: + https://github.com/npm/npm/blob/d3c858ce4cfb3aee515bb299eb034fe1b5e44344/node_modules/hosted-git-info/git-host-info.js + + These should be resolved: + npm/npm + gist:11081aaa281 + bitbucket:example/repo + gitlab:another/repo + expressjs/serve-static + git://github.com/angular/di.js.git + git://github.com/hapijs/boom + git@github.com:balderdashy/waterline-criteria.git + http://github.com/ariya/esprima.git + http://github.com/isaacs/nopt + https://github.com/chaijs/chai + https://github.com/christkv/kerberos.git + https://gitlab.com/foo/private.git + git@gitlab.com:foo/private.git + """ + if not repo_url or not isinstance(repo_url, str): + return + + repo_url = repo_url.strip() + if not repo_url: + return + + # TODO: If we match http and https, we may should add more check in + # case if the url is not a repo one. For example, check the domain + # name in the url... + if repo_url.startswith(VCS_URLS + PLAIN_URLS): + return repo_url + + if repo_url.startswith("git@"): + tool, _, right = repo_url.partition("@") + if ":" in repo_url: + host, _, repo = right.partition(":") + else: + # git@github.com/Filirom1/npm2aur.git + host, _, repo = right.partition("/") + + if any(r in host for r in ("bitbucket", "gitlab", "github")): + scheme = "https" + else: + scheme = "git" + + return f"{scheme}://{host}/{repo}" + + # FIXME: where these URL schemes come from?? + if repo_url.startswith(("bitbucket:", "gitlab:", "github:", "gist:")): + hoster_urls = { + "bitbucket": f"https://bitbucket.org/{repo}", + "github": f"https://github.com/{repo}", + "gitlab": f"https://gitlab.com/{repo}", + "gist": f"https://gist.github.com/{repo}", + } + hoster, _, repo = repo_url.partition(":") + return hoster_urls[hoster] % locals() + + if len(repo_url.split("/")) == 2: + # implicit github, but that's only on NPM? + return f"https://github.com/{repo_url}" + return repo_url From b3c0ef260594caebb294e9433012792a85ce112a Mon Sep 17 00:00:00 2001 From: Tushar Goel Date: Tue, 24 Dec 2024 01:02:54 +0530 Subject: [PATCH 04/10] Fix Signed-off-by: Tushar Goel --- vulnerabilities/migrations/0085_codefix.py | 60 --------- vulnerabilities/migrations/0086_codefix.py | 124 +++++++++++++++++++ vulnerabilities/models.py | 2 +- vulnerabilities/pipelines/collect_commits.py | 28 +---- 4 files changed, 131 insertions(+), 83 deletions(-) delete mode 100644 vulnerabilities/migrations/0085_codefix.py create mode 100644 vulnerabilities/migrations/0086_codefix.py diff --git a/vulnerabilities/migrations/0085_codefix.py b/vulnerabilities/migrations/0085_codefix.py deleted file mode 100644 index cbe162845..000000000 --- a/vulnerabilities/migrations/0085_codefix.py +++ /dev/null @@ -1,60 +0,0 @@ -# Generated by Django 4.2.16 on 2024-12-20 17:29 - -from django.db import migrations, models -import django.db.models.deletion - - -class Migration(migrations.Migration): - - dependencies = [ - ("vulnerabilities", "0084_alter_package_options_package_version_rank"), - ] - - operations = [ - migrations.CreateModel( - name="CodeFix", - fields=[ - ( - "id", - models.AutoField( - auto_created=True, primary_key=True, serialize=False, verbose_name="ID" - ), - ), - ("commits", models.JSONField(blank=True, default=list)), - ("pulls", models.JSONField(blank=True, default=list)), - ("downloads", models.JSONField(blank=True, default=list)), - ("patch", models.TextField(blank=True, null=True)), - ("notes", models.TextField(blank=True, null=True)), - ("references", models.JSONField(blank=True, default=list)), - ("status_reviewed", models.BooleanField(default=False)), - ("base_commit", models.CharField(blank=True, max_length=255, null=True)), - ("created_at", models.DateTimeField(auto_now_add=True)), - ("updated_at", models.DateTimeField(auto_now=True)), - ( - "applies_to_versions", - models.ManyToManyField( - blank=True, related_name="fixes", to="vulnerabilities.package" - ), - ), - ( - "base_version", - models.ForeignKey( - blank=True, - null=True, - on_delete=django.db.models.deletion.SET_NULL, - related_name="base_version_changes", - to="vulnerabilities.package", - ), - ), - ( - "vulnerabilities", - models.ManyToManyField( - blank=True, related_name="codefixes", to="vulnerabilities.vulnerability" - ), - ), - ], - options={ - "abstract": False, - }, - ), - ] diff --git a/vulnerabilities/migrations/0086_codefix.py b/vulnerabilities/migrations/0086_codefix.py new file mode 100644 index 000000000..64ea35fe0 --- /dev/null +++ b/vulnerabilities/migrations/0086_codefix.py @@ -0,0 +1,124 @@ +# Generated by Django 4.2.16 on 2024-12-23 19:32 + +from django.db import migrations, models +import django.db.models.deletion + + +class Migration(migrations.Migration): + + dependencies = [ + ("vulnerabilities", "0085_alter_package_is_ghost_alter_package_version_rank_and_more"), + ] + + operations = [ + migrations.CreateModel( + name="CodeFix", + fields=[ + ( + "id", + models.AutoField( + auto_created=True, primary_key=True, serialize=False, verbose_name="ID" + ), + ), + ( + "commits", + models.JSONField( + blank=True, + default=list, + help_text="List of commit identifiers associated with the code change.", + ), + ), + ( + "pulls", + models.JSONField( + blank=True, + default=list, + help_text="List of pull request URLs associated with the code change.", + ), + ), + ( + "downloads", + models.JSONField( + blank=True, + default=list, + help_text="List of download URLs for the patched code.", + ), + ), + ( + "patch", + models.TextField( + blank=True, + help_text="The code change in patch format (e.g., git diff).", + null=True, + ), + ), + ( + "notes", + models.TextField( + blank=True, + help_text="Additional notes or instructions about the code change.", + null=True, + ), + ), + ( + "references", + models.JSONField( + blank=True, + default=list, + help_text="External references related to this code change.", + ), + ), + ( + "status_reviewed", + models.BooleanField( + default=False, help_text="Indicates if the code change has been reviewed." + ), + ), + ( + "base_commit", + models.CharField( + blank=True, + help_text="The commit ID representing the state of the code before applying the fix or change.", + max_length=255, + null=True, + ), + ), + ( + "created_at", + models.DateTimeField( + auto_now_add=True, + help_text="Timestamp indicating when the code change was created.", + ), + ), + ( + "updated_at", + models.DateTimeField( + auto_now=True, + help_text="Timestamp indicating when the code change was last updated.", + ), + ), + ( + "base_version", + models.ForeignKey( + blank=True, + help_text="The base version of the package to which this code change applies.", + null=True, + on_delete=django.db.models.deletion.SET_NULL, + related_name="base_version_codechanges", + to="vulnerabilities.package", + ), + ), + ( + "package_vulnerabilities", + models.ManyToManyField( + help_text="The vulnerabilities fixed by this code change.", + related_name="code_fixes", + to="vulnerabilities.affectedbypackagerelatedvulnerability", + ), + ), + ], + options={ + "abstract": False, + }, + ), + ] diff --git a/vulnerabilities/models.py b/vulnerabilities/models.py index ab93084d9..7da4ec2c4 100644 --- a/vulnerabilities/models.py +++ b/vulnerabilities/models.py @@ -1619,7 +1619,7 @@ class CodeChange(models.Model): null=True, blank=True, on_delete=models.SET_NULL, - related_name="base_version_changes", + related_name="base_version_codechanges", help_text="The base version of the package to which this code change applies.", ) base_commit = models.CharField( diff --git a/vulnerabilities/pipelines/collect_commits.py b/vulnerabilities/pipelines/collect_commits.py index 44e91be31..564988d34 100644 --- a/vulnerabilities/pipelines/collect_commits.py +++ b/vulnerabilities/pipelines/collect_commits.py @@ -16,21 +16,6 @@ from vulnerabilities.pipelines import VulnerableCodePipeline -def extract_commit_id(url): - """ - Extract a commit ID from a URL, if available. - Supports different URL structures for commit references. - - >>> extract_commit_id("https://github.com/hedgedoc/hedgedoc/commit/c1789474020a6d668d616464cb2da5e90e123f65") - 'c1789474020a6d668d616464cb2da5e90e123f65' - """ - if "/commit/" in url: - parts = url.split("/") - if len(parts) > 1 and parts[-2] == "commit": - return parts[-1] - return None - - def is_reference_already_processed(reference_url, commit_id): """ Check if a reference and commit ID pair already exists in a CodeFix entry. @@ -62,15 +47,14 @@ def collect_and_store_fix_commits(self): for reference in progress.iter(references.paginated(per_page=500)): for vulnerability in reference.vulnerabilities.all(): vcs_url = normalize_vcs_url(reference.url) - commit_id = extract_commit_id(reference.url) - if not commit_id or not vcs_url: + if not vcs_url: continue # Skip if already processed - if is_reference_already_processed(reference.url, commit_id): + if is_reference_already_processed(reference.url, vcs_url): self.log( - f"Skipping already processed reference: {reference.url} with commit {commit_id}" + f"Skipping already processed reference: {reference.url} with VCS URL {vcs_url}" ) continue purl = url2purl(vcs_url) @@ -81,7 +65,7 @@ def collect_and_store_fix_commits(self): codefix = self.create_codefix_entry( vulnerability=vulnerability, package=package, - commit_id=commit_id, + vcs_url=vcs_url, reference=reference.url, ) if codefix: @@ -100,7 +84,7 @@ def get_or_create_package(self, purl): self.log(f"Error creating package from purl {purl}: {e}") return None - def create_codefix_entry(self, vulnerability, package, commit_id, reference): + def create_codefix_entry(self, vulnerability, package, vcs_url, reference): """ Create a CodeFix entry associated with the given vulnerability and package. """ @@ -108,7 +92,7 @@ def create_codefix_entry(self, vulnerability, package, commit_id, reference): codefix, created = CodeFix.objects.get_or_create( base_version=package, defaults={ - "commits": [commit_id], + "commits": [vcs_url], "references": [reference], }, ) From c01f6ec81cba6c617063d2fcdbd7be3c253f2d78 Mon Sep 17 00:00:00 2001 From: Tushar Goel Date: Tue, 31 Dec 2024 17:20:42 +0530 Subject: [PATCH 05/10] Model changes Signed-off-by: Tushar Goel --- vulnerabilities/models.py | 62 +++++---- vulnerabilities/pipelines/collect_commits.py | 22 ++- vulnerabilities/tests/test_collect_commits.py | 129 ++++++++++++++++++ 3 files changed, 185 insertions(+), 28 deletions(-) create mode 100644 vulnerabilities/tests/test_collect_commits.py diff --git a/vulnerabilities/models.py b/vulnerabilities/models.py index 7da4ec2c4..6af4db6ae 100644 --- a/vulnerabilities/models.py +++ b/vulnerabilities/models.py @@ -1587,12 +1587,16 @@ class CodeChange(models.Model): """ Abstract base model representing a change in code, either introducing or fixing a vulnerability. This includes details about commits, patches, and related metadata. + + We are tracking commits, pulls and downloads as references to the code change. The goal is to + keep track and store the actual code patch in the ``patch`` field. When not available the patch + will be inferred from these references using improvers. """ commits = models.JSONField( blank=True, default=list, - help_text="List of commit identifiers associated with the code change.", + help_text="List of commit identifiers using VCS URLs associated with the code change.", ) pulls = models.JSONField( blank=True, @@ -1603,36 +1607,30 @@ class CodeChange(models.Model): blank=True, default=list, help_text="List of download URLs for the patched code." ) patch = models.TextField( - blank=True, null=True, help_text="The code change in patch format (e.g., git diff)." - ) - notes = models.TextField( - blank=True, null=True, help_text="Additional notes or instructions about the code change." - ) - references = models.JSONField( - blank=True, default=list, help_text="External references related to this code change." - ) - status_reviewed = models.BooleanField( - default=False, help_text="Indicates if the code change has been reviewed." + blank=True, null=True, help_text="The code change as a patch in unified diff format." ) - base_version = models.ForeignKey( + base_package_version = models.ForeignKey( "Package", null=True, blank=True, on_delete=models.SET_NULL, - related_name="base_version_codechanges", - help_text="The base version of the package to which this code change applies.", + related_name="codechanges", + help_text="The base package version to which this code change applies.", ) - base_commit = models.CharField( - max_length=255, - blank=True, - null=True, - help_text="The commit ID representing the state of the code before applying the fix or change.", + notes = models.TextField( + blank=True, null=True, help_text="Notes or instructions about this code change." + ) + references = models.JSONField( + blank=True, default=list, help_text="URL references related to this code change." + ) + is_reviewed = models.BooleanField( + default=False, help_text="Indicates if this code change has been reviewed." ) created_at = models.DateTimeField( - auto_now_add=True, help_text="Timestamp indicating when the code change was created." + auto_now_add=True, help_text="Timestamp indicating when this code change was created." ) updated_at = models.DateTimeField( - auto_now=True, help_text="Timestamp indicating when the code change was last updated." + auto_now=True, help_text="Timestamp indicating when this code change was last updated." ) class Meta: @@ -1640,8 +1638,24 @@ class Meta: class CodeFix(CodeChange): - package_vulnerabilities = models.ManyToManyField( + """ + A code fix is a code change that addresses a vulnerability and is associated: + - with a specific affected package version + - optionally with a specific fixing package version when it is known + """ + + affected_package_vulnerability = models.ForeignKey( "AffectedByPackageRelatedVulnerability", - related_name="code_fixes", - help_text="The vulnerabilities fixed by this code change.", + on_delete=models.CASCADE, + related_name="code_fix", + help_text="The affected package version to which this code fix applies.", + ) + + fixed_package_vulnerability = models.ForeignKey( + "FixingPackageRelatedVulnerability", + null=True, + blank=True, + on_delete=models.SET_NULL, + related_name="code_fix", + help_text="The fixing package version with this code fix", ) diff --git a/vulnerabilities/pipelines/collect_commits.py b/vulnerabilities/pipelines/collect_commits.py index 564988d34..690789b83 100644 --- a/vulnerabilities/pipelines/collect_commits.py +++ b/vulnerabilities/pipelines/collect_commits.py @@ -44,15 +44,25 @@ def collect_and_store_fix_commits(self): created_fix_count = 0 progress = LoopProgress(total_iterations=references.count(), logger=self.log) + + Reference + AffectedByPackageRelatedVulnerability + # FixingPackageRelatedVulnerability + + + for apv in AffectedByPackageRelatedVulnerability.objects.all(): + vuln = apv.vulnerability + for ref in vuln.references: + for reference in progress.iter(references.paginated(per_page=500)): for vulnerability in reference.vulnerabilities.all(): - vcs_url = normalize_vcs_url(reference.url) + vcs_url = normalize_vcs_url(repo_url=reference.url) if not vcs_url: continue # Skip if already processed - if is_reference_already_processed(reference.url, vcs_url): + if is_reference_already_processed(reference_url=reference.url, commit_id=vcs_url): self.log( f"Skipping already processed reference: {reference.url} with VCS URL {vcs_url}" ) @@ -97,7 +107,8 @@ def create_codefix_entry(self, vulnerability, package, vcs_url, reference): }, ) if created: - codefix.vulnerabilities.add(vulnerability) + AffectedByPackageRelatedVulnerability.objects.get + codefix.package_vulnerabilities.add(vulnerability) codefix.save() return codefix except Exception as e: @@ -124,10 +135,13 @@ def create_codefix_entry(self, vulnerability, package, vcs_url, reference): ) +# TODO: This function was borrowed from scancode-toolkit. We need to create a shared library for that. def normalize_vcs_url(repo_url, vcs_tool=None): """ Return a normalized vcs_url version control URL given some `repo_url` and an - optional `vcs_tool` hint (such as 'git', 'hg', etc. + optional `vcs_tool` hint (such as 'git', 'hg', etc.) + + Return None if repo_url is not recognized as a VCS URL. Handles shortcuts for GitHub, GitHub gist, Bitbucket, or GitLab repositories and more using the same approach as npm install: diff --git a/vulnerabilities/tests/test_collect_commits.py b/vulnerabilities/tests/test_collect_commits.py new file mode 100644 index 000000000..ad6aa1ba2 --- /dev/null +++ b/vulnerabilities/tests/test_collect_commits.py @@ -0,0 +1,129 @@ +from unittest.mock import patch + +from vulnerabilities.models import CodeFix +from vulnerabilities.pipelines.collect_commits import CollectFixCommitsPipeline +from vulnerabilities.pipelines.collect_commits import is_reference_already_processed +from vulnerabilities.pipelines.collect_commits import normalize_vcs_url + + +# --- Mocked Dependencies --- +class MockVulnerability: + def __init__(self, id): + self.id = id + + +class MockReference: + def __init__(self, url, vulnerabilities): + self.url = url + self.vulnerabilities = vulnerabilities + + +class MockPackage: + def __init__(self, purl): + self.purl = purl + + +# --- Tests for Utility Functions --- +@patch("vulnerabilities.models.CodeFix.objects.filter") +def test_reference_already_processed_true(mock_filter): + mock_filter.return_value.exists.return_value = True + result = is_reference_already_processed("http://example.com", "commit123") + assert result is True + mock_filter.assert_called_once_with( + references__contains=["http://example.com"], commits__contains=["commit123"] + ) + + +@patch("vulnerabilities.models.CodeFix.objects.filter") +def test_reference_already_processed_false(mock_filter): + mock_filter.return_value.exists.return_value = False + result = is_reference_already_processed("http://example.com", "commit123") + assert result is False + + +# --- Tests for normalize_vcs_url --- +def test_normalize_plain_url(): + url = normalize_vcs_url("https://github.com/user/repo.git") + assert url == "https://github.com/user/repo.git" + + +def test_normalize_git_ssh_url(): + url = normalize_vcs_url("git@github.com:user/repo.git") + assert url == "https://github.com/user/repo.git" + + +def test_normalize_implicit_github(): + url = normalize_vcs_url("user/repo") + assert url == "https://github.com/user/repo" + + +# --- Tests for CollectFixCommitsPipeline --- +@patch("vulnerabilities.models.VulnerabilityReference.objects.prefetch_related") +@patch("vulnerabilities.pipelines.collect_commits.CollectFixCommitsPipeline.get_or_create_package") +@patch("vulnerabilities.pipelines.collect_commits.is_reference_already_processed") +@patch("vulnerabilities.pipelines.collect_commits.url2purl") +def test_collect_and_store_fix_commits( + mock_url2purl, mock_is_processed, mock_get_package, mock_prefetch +): + mock_vuln = MockVulnerability(id=1) + mock_reference = MockReference(url="http://example.com", vulnerabilities=[mock_vuln]) + mock_prefetch.return_value.distinct.return_value.paginated.return_value = [mock_reference] + mock_url2purl.return_value = "pkg:example/package@1.0.0" + mock_is_processed.return_value = False + mock_get_package.return_value = MockPackage(purl="pkg:example/package@1.0.0") + + pipeline = CollectFixCommitsPipeline() + pipeline.log = lambda msg: None + pipeline.collect_and_store_fix_commits() + + mock_is_processed.assert_called_once_with("http://example.com", "pkg:example/package@1.0.0") + mock_get_package.assert_called_once_with("pkg:example/package@1.0.0") + + +@patch("vulnerabilities.pipelines.collect_commits.CollectFixCommitsPipeline.get_or_create_package") +def test_get_or_create_package_success(mock_get_or_create): + mock_get_or_create.return_value = (MockPackage(purl="pkg:example/package@1.0.0"), True) + pipeline = CollectFixCommitsPipeline() + package = pipeline.get_or_create_package("pkg:example/package@1.0.0") + assert package.purl == "pkg:example/package@1.0.0" + + +@patch("vulnerabilities.pipelines.collect_commits.CollectFixCommitsPipeline.get_or_create_package") +def test_get_or_create_package_failure(mock_get_or_create): + mock_get_or_create.side_effect = Exception("Error") + pipeline = CollectFixCommitsPipeline() + logs = [] + pipeline.log = lambda msg: logs.append(msg) + result = pipeline.get_or_create_package("pkg:example/package@1.0.0") + assert result is None + assert len(logs) == 1 + + +@patch("vulnerabilities.models.CodeFix.objects.get_or_create") +def test_create_codefix_entry_success(mock_get_or_create): + mock_get_or_create.return_value = (CodeFix(), True) + pipeline = CollectFixCommitsPipeline() + result = pipeline.create_codefix_entry( + MockVulnerability(1), + MockPackage("pkg:example/package@1.0.0"), + "http://example.com", + "http://reference", + ) + assert result is not None + mock_get_or_create.assert_called_once() + + +@patch("vulnerabilities.models.CodeFix.objects.get_or_create") +def test_create_codefix_entry_failure(mock_get_or_create): + mock_get_or_create.side_effect = Exception("Error") + pipeline = CollectFixCommitsPipeline() + logs = [] + pipeline.log = lambda msg: logs.append(msg) + result = pipeline.create_codefix_entry( + MockVulnerability(1), + MockPackage("pkg:example/package@1.0.0"), + "http://example.com", + "http://reference", + ) + assert result is None + assert len(logs) == 1 From 48d2144b8c0cf1145928f6c543f0604622a6144f Mon Sep 17 00:00:00 2001 From: Tushar Goel Date: Tue, 7 Jan 2025 20:20:46 +0530 Subject: [PATCH 06/10] Refactor the collect fix commit pipeline Signed-off-by: Tushar Goel --- vulnerabilities/pipelines/collect_commits.py | 164 +++++++++++------- vulnerabilities/tests/test_collect_commits.py | 6 +- 2 files changed, 106 insertions(+), 64 deletions(-) diff --git a/vulnerabilities/pipelines/collect_commits.py b/vulnerabilities/pipelines/collect_commits.py index 690789b83..93bcce205 100644 --- a/vulnerabilities/pipelines/collect_commits.py +++ b/vulnerabilities/pipelines/collect_commits.py @@ -7,22 +7,24 @@ # See https://aboutcode.org for more information about nexB OSS projects. # +import re + from aboutcode.pipeline import LoopProgress from packageurl.contrib.url2purl import url2purl +from vulnerabilities.models import AffectedByPackageRelatedVulnerability from vulnerabilities.models import CodeFix +from vulnerabilities.models import FixingPackageRelatedVulnerability from vulnerabilities.models import Package from vulnerabilities.models import VulnerabilityReference from vulnerabilities.pipelines import VulnerableCodePipeline -def is_reference_already_processed(reference_url, commit_id): +def is_vcs_url_already_processed(commit_id): """ - Check if a reference and commit ID pair already exists in a CodeFix entry. + Check if a VCS URL exists in a CodeFix entry. """ - return CodeFix.objects.filter( - references__contains=[reference_url], commits__contains=[commit_id] - ).exists() + return CodeFix.objects.filter(commits__contains=[commit_id]).exists() class CollectFixCommitsPipeline(VulnerableCodePipeline): @@ -38,83 +40,54 @@ def steps(cls): return (cls.collect_and_store_fix_commits,) def collect_and_store_fix_commits(self): - references = VulnerabilityReference.objects.prefetch_related("vulnerabilities").distinct() + affected_by_package_related_vulnerabilities = ( + AffectedByPackageRelatedVulnerability.objects.all().prefetch_related( + "vulnerability", "vulnerability__references" + ) + ) - self.log(f"Processing {references.count():,d} references to collect fix commits.") + self.log( + f"Processing {affected_by_package_related_vulnerabilities.count():,d} references to collect fix commits." + ) created_fix_count = 0 - progress = LoopProgress(total_iterations=references.count(), logger=self.log) - - Reference - AffectedByPackageRelatedVulnerability - # FixingPackageRelatedVulnerability + progress = LoopProgress( + total_iterations=affected_by_package_related_vulnerabilities.count(), logger=self.log + ) + for apv in progress.iter( + affected_by_package_related_vulnerabilities.paginated(per_page=500) + ): + vulnerability = apv.vulnerability + for reference in vulnerability.references: - for apv in AffectedByPackageRelatedVulnerability.objects.all(): - vuln = apv.vulnerability - for ref in vuln.references: + if not is_vcs_url(reference.url): + continue - for reference in progress.iter(references.paginated(per_page=500)): - for vulnerability in reference.vulnerabilities.all(): vcs_url = normalize_vcs_url(repo_url=reference.url) if not vcs_url: continue # Skip if already processed - if is_reference_already_processed(reference_url=reference.url, commit_id=vcs_url): + if is_vcs_url_already_processed(commit_id=vcs_url): self.log( f"Skipping already processed reference: {reference.url} with VCS URL {vcs_url}" ) continue - purl = url2purl(vcs_url) - if not purl: - self.log(f"Could not create purl from url: {vcs_url}") - continue - package = self.get_or_create_package(purl) - codefix = self.create_codefix_entry( - vulnerability=vulnerability, - package=package, - vcs_url=vcs_url, - reference=reference.url, + code_fix, created = CodeFix.objects.get_or_create( + commits=[vcs_url], + affected_package_vulnerability=apv, ) - if codefix: + + if created: created_fix_count += 1 + self.log( + f"Created CodeFix entry for reference: {reference.url} with VCS URL {vcs_url}" + ) self.log(f"Successfully created {created_fix_count:,d} CodeFix entries.") - def get_or_create_package(self, purl): - """ - Get or create a Package object from a Package URL. - """ - try: - package, _ = Package.objects.get_or_create_from_purl(purl) - return package - except Exception as e: - self.log(f"Error creating package from purl {purl}: {e}") - return None - - def create_codefix_entry(self, vulnerability, package, vcs_url, reference): - """ - Create a CodeFix entry associated with the given vulnerability and package. - """ - try: - codefix, created = CodeFix.objects.get_or_create( - base_version=package, - defaults={ - "commits": [vcs_url], - "references": [reference], - }, - ) - if created: - AffectedByPackageRelatedVulnerability.objects.get - codefix.package_vulnerabilities.add(vulnerability) - codefix.save() - return codefix - except Exception as e: - self.log(f"Error creating CodeFix entry: {e}") - return - PLAIN_URLS = ( "https://", @@ -211,3 +184,72 @@ def normalize_vcs_url(repo_url, vcs_tool=None): # implicit github, but that's only on NPM? return f"https://github.com/{repo_url}" return repo_url + + +def is_vcs_url(repo_url): + """ + Check if a given URL or string matches a valid VCS (Version Control System) URL. + + Supports: + - Standard VCS URL protocols (git, http, https, ssh) + - Shortcut syntax (e.g., github:user/repo, gitlab:group/repo) + - GitHub shortcut (e.g., user/repo) + + Args: + repo_url (str): The repository URL or shortcut to validate. + + Returns: + bool: True if the string is a valid VCS URL, False otherwise. + + Examples: + >>> is_vcs_url("git://github.com/angular/di.js.git") + True + >>> is_vcs_url("github:user/repo") + True + >>> is_vcs_url("user/repo") + True + >>> is_vcs_url("https://github.com/user/repo.git") + True + >>> is_vcs_url("git@github.com:user/repo.git") + True + >>> is_vcs_url("http://github.com/isaacs/nopt") + True + >>> is_vcs_url("https://gitlab.com/foo/private.git") + True + >>> is_vcs_url("git@gitlab.com:foo/private.git") + True + >>> is_vcs_url("bitbucket:example/repo") + True + >>> is_vcs_url("gist:11081aaa281") + True + >>> is_vcs_url("ftp://example.com/not-a-repo") + False + >>> is_vcs_url("random-string") + False + >>> is_vcs_url("https://example.com/not-a-repo") + False + """ + if not repo_url or not isinstance(repo_url, str): + return False + + repo_url = repo_url.strip() + if not repo_url: + return False + + # 1. Match URLs with standard protocols + if re.match(r"^(git|ssh|http|https)://", repo_url): + return True + + # 2. Match SSH URLs (e.g., git@github.com:user/repo.git) + if re.match(r"^git@\w+\.\w+:[\w\-./]+$", repo_url): + return True + + # 3. Match shortcut syntax (e.g., github:user/repo) + if re.match(r"^(github|gitlab|bitbucket|gist):[\w\-./]+$", repo_url): + return True + + # 4. Match implicit GitHub shortcut (e.g., user/repo) + if re.match(r"^[\w\-]+/[\w\-]+$", repo_url): + return True + + return False diff --git a/vulnerabilities/tests/test_collect_commits.py b/vulnerabilities/tests/test_collect_commits.py index ad6aa1ba2..6749fc54e 100644 --- a/vulnerabilities/tests/test_collect_commits.py +++ b/vulnerabilities/tests/test_collect_commits.py @@ -2,7 +2,7 @@ from vulnerabilities.models import CodeFix from vulnerabilities.pipelines.collect_commits import CollectFixCommitsPipeline -from vulnerabilities.pipelines.collect_commits import is_reference_already_processed +from vulnerabilities.pipelines.collect_commits import is_vcs_url_already_processed from vulnerabilities.pipelines.collect_commits import normalize_vcs_url @@ -27,7 +27,7 @@ def __init__(self, purl): @patch("vulnerabilities.models.CodeFix.objects.filter") def test_reference_already_processed_true(mock_filter): mock_filter.return_value.exists.return_value = True - result = is_reference_already_processed("http://example.com", "commit123") + result = is_vcs_url_already_processed("http://example.com", "commit123") assert result is True mock_filter.assert_called_once_with( references__contains=["http://example.com"], commits__contains=["commit123"] @@ -37,7 +37,7 @@ def test_reference_already_processed_true(mock_filter): @patch("vulnerabilities.models.CodeFix.objects.filter") def test_reference_already_processed_false(mock_filter): mock_filter.return_value.exists.return_value = False - result = is_reference_already_processed("http://example.com", "commit123") + result = is_vcs_url_already_processed("http://example.com", "commit123") assert result is False From 991fbeb3aefdd1f2ef3f517c82860522f2edeb9f Mon Sep 17 00:00:00 2001 From: Tushar Goel Date: Wed, 8 Jan 2025 18:58:48 +0530 Subject: [PATCH 07/10] Add tests Signed-off-by: Tushar Goel --- vulnerabilities/migrations/0086_codefix.py | 55 ++-- vulnerabilities/pipelines/collect_commits.py | 17 +- vulnerabilities/tests/test_collect_commits.py | 281 ++++++++++-------- 3 files changed, 196 insertions(+), 157 deletions(-) diff --git a/vulnerabilities/migrations/0086_codefix.py b/vulnerabilities/migrations/0086_codefix.py index 64ea35fe0..df67c3ae8 100644 --- a/vulnerabilities/migrations/0086_codefix.py +++ b/vulnerabilities/migrations/0086_codefix.py @@ -1,4 +1,4 @@ -# Generated by Django 4.2.16 on 2024-12-23 19:32 +# Generated by Django 4.2.16 on 2025-01-08 13:28 from django.db import migrations, models import django.db.models.deletion @@ -25,7 +25,7 @@ class Migration(migrations.Migration): models.JSONField( blank=True, default=list, - help_text="List of commit identifiers associated with the code change.", + help_text="List of commit identifiers using VCS URLs associated with the code change.", ), ), ( @@ -48,7 +48,7 @@ class Migration(migrations.Migration): "patch", models.TextField( blank=True, - help_text="The code change in patch format (e.g., git diff).", + help_text="The code change as a patch in unified diff format.", null=True, ), ), @@ -56,7 +56,7 @@ class Migration(migrations.Migration): "notes", models.TextField( blank=True, - help_text="Additional notes or instructions about the code change.", + help_text="Notes or instructions about this code change.", null=True, ), ), @@ -65,55 +65,58 @@ class Migration(migrations.Migration): models.JSONField( blank=True, default=list, - help_text="External references related to this code change.", + help_text="URL references related to this code change.", ), ), ( - "status_reviewed", + "is_reviewed", models.BooleanField( - default=False, help_text="Indicates if the code change has been reviewed." - ), - ), - ( - "base_commit", - models.CharField( - blank=True, - help_text="The commit ID representing the state of the code before applying the fix or change.", - max_length=255, - null=True, + default=False, help_text="Indicates if this code change has been reviewed." ), ), ( "created_at", models.DateTimeField( auto_now_add=True, - help_text="Timestamp indicating when the code change was created.", + help_text="Timestamp indicating when this code change was created.", ), ), ( "updated_at", models.DateTimeField( auto_now=True, - help_text="Timestamp indicating when the code change was last updated.", + help_text="Timestamp indicating when this code change was last updated.", ), ), ( - "base_version", + "affected_package_vulnerability", + models.ForeignKey( + help_text="The affected package version to which this code fix applies.", + on_delete=django.db.models.deletion.CASCADE, + related_name="code_fix", + to="vulnerabilities.affectedbypackagerelatedvulnerability", + ), + ), + ( + "base_package_version", models.ForeignKey( blank=True, - help_text="The base version of the package to which this code change applies.", + help_text="The base package version to which this code change applies.", null=True, on_delete=django.db.models.deletion.SET_NULL, - related_name="base_version_codechanges", + related_name="codechanges", to="vulnerabilities.package", ), ), ( - "package_vulnerabilities", - models.ManyToManyField( - help_text="The vulnerabilities fixed by this code change.", - related_name="code_fixes", - to="vulnerabilities.affectedbypackagerelatedvulnerability", + "fixed_package_vulnerability", + models.ForeignKey( + blank=True, + help_text="The fixing package version with this code fix", + null=True, + on_delete=django.db.models.deletion.SET_NULL, + related_name="code_fix", + to="vulnerabilities.fixingpackagerelatedvulnerability", ), ), ], diff --git a/vulnerabilities/pipelines/collect_commits.py b/vulnerabilities/pipelines/collect_commits.py index 93bcce205..8806fb4fb 100644 --- a/vulnerabilities/pipelines/collect_commits.py +++ b/vulnerabilities/pipelines/collect_commits.py @@ -10,13 +10,9 @@ import re from aboutcode.pipeline import LoopProgress -from packageurl.contrib.url2purl import url2purl from vulnerabilities.models import AffectedByPackageRelatedVulnerability from vulnerabilities.models import CodeFix -from vulnerabilities.models import FixingPackageRelatedVulnerability -from vulnerabilities.models import Package -from vulnerabilities.models import VulnerabilityReference from vulnerabilities.pipelines import VulnerableCodePipeline @@ -59,8 +55,7 @@ def collect_and_store_fix_commits(self): affected_by_package_related_vulnerabilities.paginated(per_page=500) ): vulnerability = apv.vulnerability - for reference in vulnerability.references: - + for reference in vulnerability.references.all(): if not is_vcs_url(reference.url): continue @@ -171,6 +166,7 @@ def normalize_vcs_url(repo_url, vcs_tool=None): # FIXME: where these URL schemes come from?? if repo_url.startswith(("bitbucket:", "gitlab:", "github:", "gist:")): + repo = repo_url.split(":")[1] hoster_urls = { "bitbucket": f"https://bitbucket.org/{repo}", "github": f"https://github.com/{repo}", @@ -236,12 +232,15 @@ def is_vcs_url(repo_url): if not repo_url: return False - # 1. Match URLs with standard protocols - if re.match(r"^(git|ssh|http|https)://", repo_url): + # Define valid VCS domains + vcs_domains = r"(github\.com|gitlab\.com|bitbucket\.org|gist\.github\.com)" + + # 1. Match URLs with standard protocols pointing to VCS domains + if re.match(rf"^(git|ssh|http|https)://{vcs_domains}/[\w\-.]+/[\w\-.]+", repo_url): return True # 2. Match SSH URLs (e.g., git@github.com:user/repo.git) - if re.match(r"^git@\w+\.\w+:[\w\-./]+$", repo_url): + if re.match(rf"^git@{vcs_domains}:[\w\-.]+/[\w\-.]+(\.git)?$", repo_url): return True # 3. Match shortcut syntax (e.g., github:user/repo) diff --git a/vulnerabilities/tests/test_collect_commits.py b/vulnerabilities/tests/test_collect_commits.py index 6749fc54e..c478244e1 100644 --- a/vulnerabilities/tests/test_collect_commits.py +++ b/vulnerabilities/tests/test_collect_commits.py @@ -1,129 +1,166 @@ -from unittest.mock import patch +from django.test import TestCase +from vulnerabilities.models import AffectedByPackageRelatedVulnerability from vulnerabilities.models import CodeFix +from vulnerabilities.models import Package +from vulnerabilities.models import Vulnerability +from vulnerabilities.models import VulnerabilityReference +from vulnerabilities.models import VulnerabilityRelatedReference from vulnerabilities.pipelines.collect_commits import CollectFixCommitsPipeline +from vulnerabilities.pipelines.collect_commits import is_vcs_url from vulnerabilities.pipelines.collect_commits import is_vcs_url_already_processed from vulnerabilities.pipelines.collect_commits import normalize_vcs_url -# --- Mocked Dependencies --- -class MockVulnerability: - def __init__(self, id): - self.id = id - - -class MockReference: - def __init__(self, url, vulnerabilities): - self.url = url - self.vulnerabilities = vulnerabilities - - -class MockPackage: - def __init__(self, purl): - self.purl = purl - - -# --- Tests for Utility Functions --- -@patch("vulnerabilities.models.CodeFix.objects.filter") -def test_reference_already_processed_true(mock_filter): - mock_filter.return_value.exists.return_value = True - result = is_vcs_url_already_processed("http://example.com", "commit123") - assert result is True - mock_filter.assert_called_once_with( - references__contains=["http://example.com"], commits__contains=["commit123"] - ) - - -@patch("vulnerabilities.models.CodeFix.objects.filter") -def test_reference_already_processed_false(mock_filter): - mock_filter.return_value.exists.return_value = False - result = is_vcs_url_already_processed("http://example.com", "commit123") - assert result is False - - -# --- Tests for normalize_vcs_url --- -def test_normalize_plain_url(): - url = normalize_vcs_url("https://github.com/user/repo.git") - assert url == "https://github.com/user/repo.git" - - -def test_normalize_git_ssh_url(): - url = normalize_vcs_url("git@github.com:user/repo.git") - assert url == "https://github.com/user/repo.git" - - -def test_normalize_implicit_github(): - url = normalize_vcs_url("user/repo") - assert url == "https://github.com/user/repo" - - -# --- Tests for CollectFixCommitsPipeline --- -@patch("vulnerabilities.models.VulnerabilityReference.objects.prefetch_related") -@patch("vulnerabilities.pipelines.collect_commits.CollectFixCommitsPipeline.get_or_create_package") -@patch("vulnerabilities.pipelines.collect_commits.is_reference_already_processed") -@patch("vulnerabilities.pipelines.collect_commits.url2purl") -def test_collect_and_store_fix_commits( - mock_url2purl, mock_is_processed, mock_get_package, mock_prefetch -): - mock_vuln = MockVulnerability(id=1) - mock_reference = MockReference(url="http://example.com", vulnerabilities=[mock_vuln]) - mock_prefetch.return_value.distinct.return_value.paginated.return_value = [mock_reference] - mock_url2purl.return_value = "pkg:example/package@1.0.0" - mock_is_processed.return_value = False - mock_get_package.return_value = MockPackage(purl="pkg:example/package@1.0.0") - - pipeline = CollectFixCommitsPipeline() - pipeline.log = lambda msg: None - pipeline.collect_and_store_fix_commits() - - mock_is_processed.assert_called_once_with("http://example.com", "pkg:example/package@1.0.0") - mock_get_package.assert_called_once_with("pkg:example/package@1.0.0") - - -@patch("vulnerabilities.pipelines.collect_commits.CollectFixCommitsPipeline.get_or_create_package") -def test_get_or_create_package_success(mock_get_or_create): - mock_get_or_create.return_value = (MockPackage(purl="pkg:example/package@1.0.0"), True) - pipeline = CollectFixCommitsPipeline() - package = pipeline.get_or_create_package("pkg:example/package@1.0.0") - assert package.purl == "pkg:example/package@1.0.0" - - -@patch("vulnerabilities.pipelines.collect_commits.CollectFixCommitsPipeline.get_or_create_package") -def test_get_or_create_package_failure(mock_get_or_create): - mock_get_or_create.side_effect = Exception("Error") - pipeline = CollectFixCommitsPipeline() - logs = [] - pipeline.log = lambda msg: logs.append(msg) - result = pipeline.get_or_create_package("pkg:example/package@1.0.0") - assert result is None - assert len(logs) == 1 - - -@patch("vulnerabilities.models.CodeFix.objects.get_or_create") -def test_create_codefix_entry_success(mock_get_or_create): - mock_get_or_create.return_value = (CodeFix(), True) - pipeline = CollectFixCommitsPipeline() - result = pipeline.create_codefix_entry( - MockVulnerability(1), - MockPackage("pkg:example/package@1.0.0"), - "http://example.com", - "http://reference", - ) - assert result is not None - mock_get_or_create.assert_called_once() - - -@patch("vulnerabilities.models.CodeFix.objects.get_or_create") -def test_create_codefix_entry_failure(mock_get_or_create): - mock_get_or_create.side_effect = Exception("Error") - pipeline = CollectFixCommitsPipeline() - logs = [] - pipeline.log = lambda msg: logs.append(msg) - result = pipeline.create_codefix_entry( - MockVulnerability(1), - MockPackage("pkg:example/package@1.0.0"), - "http://example.com", - "http://reference", - ) - assert result is None - assert len(logs) == 1 +class CollectFixCommitsPipelineTests(TestCase): + def setUp(self): + self.vulnerability = Vulnerability.objects.create( + vulnerability_id="VCID-1234", summary="Test vulnerability" + ) + + package = Package.objects.create(type="npm", namespace="abc", name="def", version="1") + + self.affected_by_vuln = AffectedByPackageRelatedVulnerability.objects.create( + package=package, vulnerability=self.vulnerability + ) + + self.reference1 = VulnerabilityReference.objects.create( + url="https://github.com/example/repo/commit/abcd1234" + ) + + self.reference2 = VulnerabilityReference.objects.create( + url="https://gitlab.com/example/repo/commit/efgh5678" + ) + VulnerabilityRelatedReference.objects.create( + vulnerability=self.vulnerability, reference=self.reference2 + ) + VulnerabilityRelatedReference.objects.create( + vulnerability=self.vulnerability, reference=self.reference1 + ) + + def test_is_vcs_url(self): + valid_urls = [ + "git://github.com/angular/di.js.git", + "https://github.com/user/repo.git", + "git@gitlab.com:user/repo.git", + ] + invalid_urls = [ + "ftp://example.com/not-a-repo", + "random-string", + "https://example.com/not-a-repo", + ] + for url in valid_urls: + assert is_vcs_url(url) is True + + for url in invalid_urls: + assert is_vcs_url(url) is False + + def test_normalize_vcs_url(self): + + assert ( + normalize_vcs_url("git@github.com:user/repo.git") == "https://github.com/user/repo.git" + ) + assert normalize_vcs_url("github:user/repo") == "https://github.com/user/repo" + assert normalize_vcs_url( + "https://github.com/user/repo.git" + ), "https://github.com/user/repo.git" + + def test_is_vcs_url_already_processed(self): + CodeFix.objects.create( + commits=["https://github.com/example/repo/commit/abcd1234"], + affected_package_vulnerability=self.affected_by_vuln, + ) + assert ( + is_vcs_url_already_processed("https://github.com/example/repo/commit/abcd1234") is True + ) + assert ( + is_vcs_url_already_processed("https://github.com/example/repo/commit/unknown") is False + ) + + def test_collect_and_store_fix_commits(self): + pipeline = CollectFixCommitsPipeline() + pipeline.collect_and_store_fix_commits() + + assert ( + CodeFix.objects.filter( + commits__contains=["https://github.com/example/repo/commit/abcd1234"] + ).exists() + is True + ) + assert ( + CodeFix.objects.filter( + commits__contains=["https://gitlab.com/example/repo/commit/efgh5678"] + ).exists() + is True + ) + + def test_skip_already_processed_commit(self): + CodeFix.objects.create( + commits=["https://github.com/example/repo/commit/abcd1234"], + affected_package_vulnerability=self.affected_by_vuln, + ) + + pipeline = CollectFixCommitsPipeline() + pipeline.collect_and_store_fix_commits() + + # Ensure duplicate entry was not created + self.assertEqual( + CodeFix.objects.filter( + commits__contains=["https://github.com/example/repo/commit/abcd1234"] + ).count(), + 1, + ) + + +class IsVCSURLTests(TestCase): + def test_valid_vcs_urls(self): + valid_urls = [ + "git://github.com/example/repo.git", + "https://github.com/example/repo.git", + "git@github.com:example/repo.git", + "github:user/repo", + ] + for url in valid_urls: + with self.subTest(url=url): + self.assertTrue(is_vcs_url(url)) + + def test_invalid_vcs_urls(self): + invalid_urls = ["http://example.com", "ftp://example.com/repo", "random-string"] + for url in invalid_urls: + with self.subTest(url=url): + self.assertFalse(is_vcs_url(url)) + + +class NormalizeVCSURLTests(TestCase): + def test_normalize_valid_vcs_urls(self): + self.assertEqual( + normalize_vcs_url("git@github.com:user/repo.git"), "https://github.com/user/repo.git" + ) + self.assertEqual(normalize_vcs_url("github:user/repo"), "https://github.com/user/repo") + self.assertEqual( + normalize_vcs_url("https://github.com/user/repo.git"), + "https://github.com/user/repo.git", + ) + + +class IsVCSURLAlreadyProcessedTests(TestCase): + def setUp(self): + self.vulnerability = Vulnerability.objects.create(vulnerability_id="VCID-5678") + package = Package.objects.create(type="npm", namespace="abc", name="def", version="1") + self.affected_by_vuln = AffectedByPackageRelatedVulnerability.objects.create( + package=package, vulnerability=self.vulnerability + ) + self.code_fix = CodeFix.objects.create( + commits=["https://github.com/example/repo/commit/commit1"], + affected_package_vulnerability=self.affected_by_vuln, + ) + + def test_commit_already_processed(self): + self.assertTrue( + is_vcs_url_already_processed("https://github.com/example/repo/commit/commit1") + ) + + def test_commit_not_processed(self): + self.assertFalse( + is_vcs_url_already_processed("https://github.com/example/repo/commit/commit2") + ) From 7bb44be8bd2bd5a8bc157c2ad3a5d70e64b92a53 Mon Sep 17 00:00:00 2001 From: Tushar Goel Date: Wed, 8 Jan 2025 20:21:54 +0530 Subject: [PATCH 08/10] Add CodeFix in API Signed-off-by: Tushar Goel --- vulnerabilities/api_v2.py | 90 ++++++++++++++++++++ vulnerabilities/improvers/__init__.py | 2 + vulnerabilities/models.py | 2 + vulnerabilities/pipelines/collect_commits.py | 23 ++--- vulnerabilities/tests/test_api_v2.py | 30 +++++-- vulnerablecode/urls.py | 3 + 6 files changed, 133 insertions(+), 17 deletions(-) diff --git a/vulnerabilities/api_v2.py b/vulnerabilities/api_v2.py index b570570ed..d3f1d714c 100644 --- a/vulnerabilities/api_v2.py +++ b/vulnerabilities/api_v2.py @@ -21,6 +21,7 @@ from rest_framework.response import Response from rest_framework.reverse import reverse +from vulnerabilities.models import CodeFix from vulnerabilities.models import Package from vulnerabilities.models import Vulnerability from vulnerabilities.models import VulnerabilityReference @@ -198,14 +199,25 @@ def get_affected_by_vulnerabilities(self, obj): Return a dictionary with vulnerabilities as keys and their details, including fixed_by_packages. """ result = {} + request = self.context.get("request") for vuln in getattr(obj, "prefetched_affected_vulnerabilities", []): fixed_by_package = vuln.fixed_by_packages.first() purl = None if fixed_by_package: purl = fixed_by_package.package_url + # Get code fixed for a vulnerability + code_fixes = CodeFix.objects.filter( + affected_package_vulnerability__vulnerability=vuln + ).distinct() + code_fix_urls = [ + reverse("codefix-detail", args=[code_fix.id], request=request) + for code_fix in code_fixes + ] + result[vuln.vulnerability_id] = { "vulnerability_id": vuln.vulnerability_id, "fixed_by_packages": purl, + "code_fixes": code_fix_urls, } return result @@ -521,3 +533,81 @@ def lookup(self, request): qs = self.get_queryset().for_purls([purl]).with_is_vulnerable() return Response(PackageV2Serializer(qs, many=True, context={"request": request}).data) + + +from rest_framework import serializers + +from vulnerabilities.models import CodeFix + + +class CodeFixSerializer(serializers.ModelSerializer): + """ + Serializer for the CodeFix model. + Provides detailed information about a code fix. + """ + + affected_vulnerability_id = serializers.CharField( + source="affected_package_vulnerability.vulnerability.vulnerability_id", + read_only=True, + help_text="ID of the affected vulnerability.", + ) + affected_package_purl = serializers.CharField( + source="affected_package_vulnerability.package.package_url", + read_only=True, + help_text="PURL of the affected package.", + ) + fixed_package_purl = serializers.CharField( + source="fixed_package_vulnerability.package.package_url", + read_only=True, + help_text="PURL of the fixing package (if available).", + ) + created_at = serializers.DateTimeField( + format="%Y-%m-%dT%H:%M:%SZ", + read_only=True, + help_text="Timestamp when the code fix was created.", + ) + updated_at = serializers.DateTimeField( + format="%Y-%m-%dT%H:%M:%SZ", + read_only=True, + help_text="Timestamp when the code fix was last updated.", + ) + + class Meta: + model = CodeFix + fields = [ + "id", + "commits", + "pulls", + "downloads", + "patch", + "affected_vulnerability_id", + "affected_package_purl", + "fixed_package_purl", + "notes", + "references", + "is_reviewed", + "created_at", + "updated_at", + ] + read_only_fields = ["created_at", "updated_at"] + + +class CodeFixViewSet(viewsets.ReadOnlyModelViewSet): + """ + API endpoint that allows viewing CodeFix entries. + """ + + queryset = CodeFix.objects.all() + serializer_class = CodeFixSerializer + + def get_queryset(self): + """ + Optionally filter by vulnerability ID. + """ + queryset = super().get_queryset() + vulnerability_id = self.request.query_params.get("vulnerability_id") + if vulnerability_id: + queryset = queryset.filter( + affected_package_vulnerability__vulnerability__vulnerability_id=vulnerability_id + ) + return queryset diff --git a/vulnerabilities/improvers/__init__.py b/vulnerabilities/improvers/__init__.py index dd73eb02d..44a65df47 100644 --- a/vulnerabilities/improvers/__init__.py +++ b/vulnerabilities/improvers/__init__.py @@ -10,6 +10,7 @@ from vulnerabilities.improvers import valid_versions from vulnerabilities.improvers import vulnerability_status from vulnerabilities.pipelines import VulnerableCodePipeline +from vulnerabilities.pipelines import collect_commits from vulnerabilities.pipelines import compute_package_risk from vulnerabilities.pipelines import compute_package_version_rank from vulnerabilities.pipelines import enhance_with_exploitdb @@ -41,6 +42,7 @@ enhance_with_exploitdb.ExploitDBImproverPipeline, compute_package_risk.ComputePackageRiskPipeline, compute_package_version_rank.ComputeVersionRankPipeline, + collect_commits.CollectFixCommitsPipeline, ] IMPROVERS_REGISTRY = { diff --git a/vulnerabilities/models.py b/vulnerabilities/models.py index 6af4db6ae..1a58ec4dc 100644 --- a/vulnerabilities/models.py +++ b/vulnerabilities/models.py @@ -1101,6 +1101,8 @@ class AffectedByPackageRelatedVulnerability(PackageRelatedVulnerabilityBase): related_name="affected_package_vulnerability_relations", ) + objects = BaseQuerySet.as_manager() + class Meta(PackageRelatedVulnerabilityBase.Meta): verbose_name_plural = "Affected By Package Related Vulnerabilities" diff --git a/vulnerabilities/pipelines/collect_commits.py b/vulnerabilities/pipelines/collect_commits.py index 8806fb4fb..bf94b755d 100644 --- a/vulnerabilities/pipelines/collect_commits.py +++ b/vulnerabilities/pipelines/collect_commits.py @@ -20,7 +20,8 @@ def is_vcs_url_already_processed(commit_id): """ Check if a VCS URL exists in a CodeFix entry. """ - return CodeFix.objects.filter(commits__contains=[commit_id]).exists() + if "commit" in commit_id: + return CodeFix.objects.filter(commits__contains=[commit_id]).exists() class CollectFixCommitsPipeline(VulnerableCodePipeline): @@ -70,17 +71,19 @@ def collect_and_store_fix_commits(self): f"Skipping already processed reference: {reference.url} with VCS URL {vcs_url}" ) continue - code_fix, created = CodeFix.objects.get_or_create( - commits=[vcs_url], - affected_package_vulnerability=apv, - ) - - if created: - created_fix_count += 1 - self.log( - f"Created CodeFix entry for reference: {reference.url} with VCS URL {vcs_url}" + # check if vcs_url has commit + if "/commit/" in vcs_url: + code_fix, created = CodeFix.objects.get_or_create( + commits=[vcs_url], + affected_package_vulnerability=apv, ) + if created: + created_fix_count += 1 + self.log( + f"Created CodeFix entry for reference: {reference.url} with VCS URL {vcs_url}" + ) + self.log(f"Successfully created {created_fix_count:,d} CodeFix entries.") diff --git a/vulnerabilities/tests/test_api_v2.py b/vulnerabilities/tests/test_api_v2.py index af4dc47c8..e3434c6a9 100644 --- a/vulnerabilities/tests/test_api_v2.py +++ b/vulnerabilities/tests/test_api_v2.py @@ -216,7 +216,7 @@ def test_list_packages(self): Should return a list of packages with their details and associated vulnerabilities. """ url = reverse("package-v2-list") - with self.assertNumQueries(31): + with self.assertNumQueries(32): response = self.client.get(url, format="json") self.assertEqual(response.status_code, status.HTTP_200_OK) self.assertIn("results", response.data) @@ -238,7 +238,7 @@ def test_filter_packages_by_purl(self): Test filtering packages by one or more PURLs. """ url = reverse("package-v2-list") - with self.assertNumQueries(19): + with self.assertNumQueries(20): response = self.client.get(url, {"purl": "pkg:pypi/django@3.2"}, format="json") self.assertEqual(response.status_code, status.HTTP_200_OK) self.assertEqual(len(response.data["results"]["packages"]), 1) @@ -249,7 +249,7 @@ def test_filter_packages_by_affected_vulnerability(self): Test filtering packages by affected_by_vulnerability. """ url = reverse("package-v2-list") - with self.assertNumQueries(19): + with self.assertNumQueries(20): response = self.client.get( url, {"affected_by_vulnerability": "VCID-1234"}, format="json" ) @@ -308,7 +308,11 @@ def test_package_serializer_fields(self): # Verify affected_by_vulnerabilities structure expected_affected_by_vulnerabilities = { - "VCID-1234": {"vulnerability_id": "VCID-1234", "fixed_by_packages": None} + "VCID-1234": { + "code_fixes": [], + "vulnerability_id": "VCID-1234", + "fixed_by_packages": None, + } } self.assertEqual(data["affected_by_vulnerabilities"], expected_affected_by_vulnerabilities) @@ -387,7 +391,13 @@ def test_get_affected_by_vulnerabilities(self): vulnerabilities = serializer.get_affected_by_vulnerabilities(package) self.assertEqual( vulnerabilities, - {"VCID-1234": {"vulnerability_id": "VCID-1234", "fixed_by_packages": None}}, + { + "VCID-1234": { + "code_fixes": [], + "vulnerability_id": "VCID-1234", + "fixed_by_packages": None, + } + }, ) def test_get_fixing_vulnerabilities(self): @@ -591,7 +601,7 @@ def test_lookup_with_valid_purl(self): """ url = reverse("package-v2-lookup") data = {"purl": "pkg:pypi/django@3.2"} - with self.assertNumQueries(12): + with self.assertNumQueries(13): response = self.client.post(url, data, format="json") self.assertEqual(response.status_code, status.HTTP_200_OK) self.assertEqual(1, len(response.data)) @@ -603,7 +613,13 @@ def test_lookup_with_valid_purl(self): self.assertEqual(response.data[0]["purl"], "pkg:pypi/django@3.2") self.assertEqual( response.data[0]["affected_by_vulnerabilities"], - {"VCID-1234": {"vulnerability_id": "VCID-1234", "fixed_by_packages": None}}, + { + "VCID-1234": { + "code_fixes": [], + "vulnerability_id": "VCID-1234", + "fixed_by_packages": None, + } + }, ) self.assertEqual(response.data[0]["fixing_vulnerabilities"], []) diff --git a/vulnerablecode/urls.py b/vulnerablecode/urls.py index 10f7db13f..54540a66d 100644 --- a/vulnerablecode/urls.py +++ b/vulnerablecode/urls.py @@ -20,6 +20,7 @@ from vulnerabilities.api import CPEViewSet from vulnerabilities.api import PackageViewSet from vulnerabilities.api import VulnerabilityViewSet +from vulnerabilities.api_v2 import CodeFixViewSet from vulnerabilities.api_v2 import PackageV2ViewSet from vulnerabilities.api_v2 import VulnerabilityV2ViewSet from vulnerabilities.views import ApiUserCreateView @@ -48,6 +49,8 @@ def __init__(self, *args, **kwargs): api_v2_router = OptionalSlashRouter() api_v2_router.register("packages", PackageV2ViewSet, basename="package-v2") api_v2_router.register("vulnerabilities", VulnerabilityV2ViewSet, basename="vulnerability-v2") +api_v2_router.register("codefixes", CodeFixViewSet, basename="codefix") + urlpatterns = [ path("api/v2/", include(api_v2_router.urls)), From 805590b89052a99465be5f68d9ce5cdda2b2c883 Mon Sep 17 00:00:00 2001 From: Tushar Goel Date: Wed, 8 Jan 2025 20:26:21 +0530 Subject: [PATCH 09/10] Fix code Signed-off-by: Tushar Goel --- vulnerabilities/api_v2.py | 5 ----- 1 file changed, 5 deletions(-) diff --git a/vulnerabilities/api_v2.py b/vulnerabilities/api_v2.py index d3f1d714c..10ffb6d98 100644 --- a/vulnerabilities/api_v2.py +++ b/vulnerabilities/api_v2.py @@ -535,11 +535,6 @@ def lookup(self, request): return Response(PackageV2Serializer(qs, many=True, context={"request": request}).data) -from rest_framework import serializers - -from vulnerabilities.models import CodeFix - - class CodeFixSerializer(serializers.ModelSerializer): """ Serializer for the CodeFix model. From 3d7c209395bd99e7e33aa30bd314a18fac98f033 Mon Sep 17 00:00:00 2001 From: Tushar Goel Date: Wed, 8 Jan 2025 20:31:02 +0530 Subject: [PATCH 10/10] Minor Fix Signed-off-by: Tushar Goel --- vulnerabilities/pipelines/collect_commits.py | 24 ++++++++++---------- 1 file changed, 12 insertions(+), 12 deletions(-) diff --git a/vulnerabilities/pipelines/collect_commits.py b/vulnerabilities/pipelines/collect_commits.py index bf94b755d..92145c051 100644 --- a/vulnerabilities/pipelines/collect_commits.py +++ b/vulnerabilities/pipelines/collect_commits.py @@ -20,8 +20,7 @@ def is_vcs_url_already_processed(commit_id): """ Check if a VCS URL exists in a CodeFix entry. """ - if "commit" in commit_id: - return CodeFix.objects.filter(commits__contains=[commit_id]).exists() + return CodeFix.objects.filter(commits__contains=[commit_id]).exists() class CollectFixCommitsPipeline(VulnerableCodePipeline): @@ -57,6 +56,8 @@ def collect_and_store_fix_commits(self): ): vulnerability = apv.vulnerability for reference in vulnerability.references.all(): + if not "/commit/" in reference.url: + continue if not is_vcs_url(reference.url): continue @@ -72,17 +73,16 @@ def collect_and_store_fix_commits(self): ) continue # check if vcs_url has commit - if "/commit/" in vcs_url: - code_fix, created = CodeFix.objects.get_or_create( - commits=[vcs_url], - affected_package_vulnerability=apv, - ) + code_fix, created = CodeFix.objects.get_or_create( + commits=[vcs_url], + affected_package_vulnerability=apv, + ) - if created: - created_fix_count += 1 - self.log( - f"Created CodeFix entry for reference: {reference.url} with VCS URL {vcs_url}" - ) + if created: + created_fix_count += 1 + self.log( + f"Created CodeFix entry for reference: {reference.url} with VCS URL {vcs_url}" + ) self.log(f"Successfully created {created_fix_count:,d} CodeFix entries.")