-
Notifications
You must be signed in to change notification settings - Fork 870
Optimize OSV vulnerability scanning #44684
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Changes from all commits
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
| Original file line number | Diff line number | Diff line change |
|---|---|---|
| @@ -0,0 +1 @@ | ||
| * Optimized OSV vulnerability scanning to query distinct software per OS version rather than per host, reducing redundant database queries for many hosts sharing the same packages. |
| Original file line number | Diff line number | Diff line change |
|---|---|---|
|
|
@@ -3309,6 +3309,88 @@ func (ds *Datastore) ListSoftwareForVulnDetection( | |
| return result, nil | ||
| } | ||
|
|
||
| const softwareVulnDetectionBatchSize = 10000 | ||
|
|
||
| func (ds *Datastore) ListSoftwareForVulnDetectionByOSVersion( | ||
| ctx context.Context, | ||
| osVer fleet.OSVersion, | ||
| ) ([]fleet.Software, error) { | ||
| var softwareIDs []uint | ||
| err := sqlx.SelectContext(ctx, ds.reader(ctx), &softwareIDs, ` | ||
| SELECT DISTINCT hs.software_id | ||
| FROM host_software hs | ||
| JOIN hosts h ON hs.host_id = h.id | ||
| WHERE h.platform = ? AND h.os_version = ? | ||
| `, osVer.Platform, osVer.Name) | ||
| if err != nil { | ||
| return nil, ctxerr.Wrap(ctx, err, "listing distinct software IDs for OS version") | ||
| } | ||
|
|
||
|
Comment on lines
+3313
to
+3328
Contributor
Author
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. The current implementation ran in 600ms locally with 14M rows — extrapolating linearly to 330M would be ~14 seconds. That seems acceptable. Especially given the cron job ran for 8+ hours. We could potentially batch it as stated without the join: |
||
| if len(softwareIDs) == 0 { | ||
| return nil, nil | ||
| } | ||
|
|
||
| var result []fleet.Software | ||
| if err := common_mysql.BatchProcessSimple(softwareIDs, softwareVulnDetectionBatchSize, func(batch []uint) error { | ||
| placeholders := strings.TrimSuffix(strings.Repeat("?,", len(batch)), ",") | ||
| query := fmt.Sprintf(` | ||
| SELECT s.id, s.name, s.version, s.release, s.arch, COALESCE(cpe.cpe, '') AS generated_cpe | ||
| FROM software s | ||
| LEFT JOIN software_cpe cpe ON s.id = cpe.software_id | ||
| WHERE s.id IN (%s) | ||
| `, placeholders) | ||
| args := make([]any, len(batch)) | ||
| for i, id := range batch { | ||
| args[i] = id | ||
| } | ||
| var batchResult []fleet.Software | ||
| if err := sqlx.SelectContext(ctx, ds.reader(ctx), &batchResult, query, args...); err != nil { | ||
| return ctxerr.Wrap(ctx, err, "fetching software details for vulnerability detection") | ||
| } | ||
| result = append(result, batchResult...) | ||
| return nil | ||
| }); err != nil { | ||
| return nil, err | ||
| } | ||
|
|
||
| return result, nil | ||
| } | ||
|
|
||
| func (ds *Datastore) ListSoftwareVulnerabilitiesBySoftwareIDs( | ||
| ctx context.Context, | ||
| softwareIDs []uint, | ||
| source fleet.VulnerabilitySource, | ||
| ) ([]fleet.SoftwareVulnerability, error) { | ||
| if len(softwareIDs) == 0 { | ||
| return nil, nil | ||
| } | ||
|
|
||
| var result []fleet.SoftwareVulnerability | ||
| if err := common_mysql.BatchProcessSimple(softwareIDs, softwareVulnDetectionBatchSize, func(batch []uint) error { | ||
| placeholders := strings.TrimSuffix(strings.Repeat("?,", len(batch)), ",") | ||
| query := fmt.Sprintf(` | ||
| SELECT software_id, cve, resolved_in_version | ||
| FROM software_cve | ||
| WHERE source = ? AND software_id IN (%s) | ||
| `, placeholders) | ||
| args := make([]any, 0, len(batch)+1) | ||
| args = append(args, source) | ||
| for _, id := range batch { | ||
| args = append(args, id) | ||
| } | ||
| var batchResult []fleet.SoftwareVulnerability | ||
| if err := sqlx.SelectContext(ctx, ds.reader(ctx), &batchResult, query, args...); err != nil { | ||
| return ctxerr.Wrap(ctx, err, "fetching software vulnerabilities by software IDs") | ||
| } | ||
| result = append(result, batchResult...) | ||
| return nil | ||
| }); err != nil { | ||
| return nil, err | ||
| } | ||
|
|
||
| return result, nil | ||
| } | ||
|
|
||
| // ListCVEs returns all cve_meta rows published after 'maxAge' | ||
| func (ds *Datastore) ListCVEs(ctx context.Context, maxAge time.Duration) ([]fleet.CVEMeta, error) { | ||
| var result []fleet.CVEMeta | ||
|
|
||
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
curious if we'll run into memory issues here. I believe the old pattern paginated
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
For this method in particular, if we take 100k softwares then...
~100K uint (8 bytes each) = 800KB for the IDs
~100K fleet.Software structs - we only populate 6 fields in our query (
id,name,version,release,arch,generated_cpe)~105 bytes
Struct overhead: ~200 bytes (8 strings × 16 bytes header + uint)
Total per item: ~305 bytes
100K items: ~30MB
Seemed reasonable. Am I missing something?
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
lgtm, just caught my eye