diff --git a/.github/workflows/component-tests.yaml b/.github/workflows/component-tests.yaml index 4703e1df7..163b6c50f 100644 --- a/.github/workflows/component-tests.yaml +++ b/.github/workflows/component-tests.yaml @@ -32,6 +32,7 @@ on: branches: - feat/signature-verification - feat/tamperalert + - feat/tamper-detection workflow_dispatch: inputs: build_image: @@ -203,7 +204,8 @@ jobs: Test_27_ApplicationProfileOpens, Test_28_UserDefinedNetworkNeighborhood, Test_29_SignedApplicationProfile, - Test_30_TamperedSignedProfiles + Test_30_TamperedSignedProfiles, + Test_31_TamperDetectionAlert ] steps: - name: Checkout code diff --git a/cmd/main.go b/cmd/main.go index 7a5f850a1..494e506cc 100644 --- a/cmd/main.go +++ b/cmd/main.go @@ -294,10 +294,10 @@ func main() { ruleBindingNotify = make(chan rulebinding.RuleBindingNotify, 100) ruleBindingCache.AddNotifier(&ruleBindingNotify) - apc := applicationprofilecache.NewApplicationProfileCache(cfg, storageClient, k8sObjectCache) + apc := applicationprofilecache.NewApplicationProfileCache(cfg, storageClient, k8sObjectCache, exporter) apc.Start(ctx) - nnc := networkneighborhoodcache.NewNetworkNeighborhoodCache(cfg, storageClient, k8sObjectCache) + nnc := networkneighborhoodcache.NewNetworkNeighborhoodCache(cfg, storageClient, k8sObjectCache, exporter) nnc.Start(ctx) dc := dnscache.NewDnsCache(dnsResolver) diff --git a/pkg/objectcache/applicationprofilecache/applicationprofilecache.go b/pkg/objectcache/applicationprofilecache/applicationprofilecache.go index 59686128c..ffcd145fb 100644 --- a/pkg/objectcache/applicationprofilecache/applicationprofilecache.go +++ b/pkg/objectcache/applicationprofilecache/applicationprofilecache.go @@ -2,7 +2,6 @@ package applicationprofilecache import ( "context" - "errors" "fmt" "strings" "sync" @@ -15,10 +14,13 @@ import ( "github.com/kubescape/go-logger" "github.com/kubescape/go-logger/helpers" helpersv1 "github.com/kubescape/k8s-interface/instanceidhandler/v1/helpers" + "github.com/armosec/armoapi-go/armotypes" "github.com/kubescape/node-agent/pkg/config" + "github.com/kubescape/node-agent/pkg/exporters" "github.com/kubescape/node-agent/pkg/objectcache" "github.com/kubescape/node-agent/pkg/objectcache/applicationprofilecache/callstackcache" "github.com/kubescape/node-agent/pkg/resourcelocks" + "github.com/kubescape/node-agent/pkg/rulemanager/types" "github.com/kubescape/node-agent/pkg/signature" "github.com/kubescape/node-agent/pkg/signature/profiles" "github.com/kubescape/node-agent/pkg/storage" @@ -51,6 +53,7 @@ type ApplicationProfileCacheImpl struct { containerToCallStackIndex maps.SafeMap[string, *ContainerCallStackIndex] storageClient storage.ProfileClient k8sObjectCache objectcache.K8sObjectCache + exporter exporters.Exporter // Exporter for sending tamper detection alerts updateInterval time.Duration updateInProgress bool // Flag to track if update is in progress updateMutex sync.Mutex // Mutex to protect the flag @@ -58,7 +61,7 @@ type ApplicationProfileCacheImpl struct { } // NewApplicationProfileCache creates a new application profile cache with periodic updates -func NewApplicationProfileCache(cfg config.Config, storageClient storage.ProfileClient, k8sObjectCache objectcache.K8sObjectCache) *ApplicationProfileCacheImpl { +func NewApplicationProfileCache(cfg config.Config, storageClient storage.ProfileClient, k8sObjectCache objectcache.K8sObjectCache, exporter exporters.Exporter) *ApplicationProfileCacheImpl { updateInterval := utils.AddJitter(cfg.ProfilesCacheRefreshRate, 10) // Add 10% jitter to avoid high load on the storage apc := &ApplicationProfileCacheImpl{ @@ -70,6 +73,7 @@ func NewApplicationProfileCache(cfg config.Config, storageClient storage.Profile containerToCallStackIndex: maps.SafeMap[string, *ContainerCallStackIndex]{}, storageClient: storageClient, k8sObjectCache: k8sObjectCache, + exporter: exporter, updateInterval: updateInterval, containerLocks: resourcelocks.New(), } @@ -274,42 +278,101 @@ func (apc *ApplicationProfileCacheImpl) updateAllProfiles(ctx context.Context) { } } -// verifyApplicationProfile verifies the profile signature if verification is enabled. -// Returns error if verification fails, nil otherwise (including when verification is disabled). -// Also updates profileState with error details if verification fails. +// verifyApplicationProfile verifies the profile signature. +// Always checks signed profiles for tamper (emits R1016 alert on tamper). +// When EnableSignatureVerification is true, also rejects tampered/unsigned profiles. +// Returns error if the profile should not be loaded, nil otherwise. func (apc *ApplicationProfileCacheImpl) verifyApplicationProfile(profile *v1beta1.ApplicationProfile, workloadID, context string, recordFailure bool) error { - if !apc.cfg.EnableSignatureVerification { - return nil - } profileAdapter := profiles.NewApplicationProfileAdapter(profile) - if err := signature.VerifyObject(profileAdapter); err != nil { - // Only warn if signature exists but doesn't match; missing signatures are debug - if errors.Is(err, signature.ErrObjectNotSigned) { - logger.L().Debug(context+" is not signed, skipping", - helpers.String("profile", profile.Name), - helpers.String("namespace", profile.Namespace), - helpers.String("workloadID", workloadID)) - } else { - logger.L().Warning(context+" signature verification failed, skipping", + + // Always check signed profiles for tamper, regardless of enforcement setting + if signature.IsSigned(profileAdapter) { + if err := signature.VerifyObject(profileAdapter); err != nil { + // Signed profile failed verification → tamper detected + logger.L().Warning(context+" signature verification failed (tamper detected)", helpers.String("profile", profile.Name), helpers.String("namespace", profile.Namespace), helpers.String("workloadID", workloadID), helpers.Error(err)) + + // Emit R1016 tamper alert + apc.emitTamperAlert(profile.Name, profile.Namespace, workloadID, "ApplicationProfile", err) + + if apc.cfg.EnableSignatureVerification { + if recordFailure { + apc.setVerificationFailed(workloadID, profile.Name, err) + } + return err + } + // Enforcement off: allow loading despite tamper + return nil } + logger.L().Debug(context+" verification successful", + helpers.String("profile", profile.Name), + helpers.String("namespace", profile.Namespace)) + return nil + } - // Update profile state with verification error + // Profile is not signed + if apc.cfg.EnableSignatureVerification { + logger.L().Debug(context+" is not signed, skipping", + helpers.String("profile", profile.Name), + helpers.String("namespace", profile.Namespace), + helpers.String("workloadID", workloadID)) if recordFailure { - apc.setVerificationFailed(workloadID, profile.Name, err) + apc.setVerificationFailed(workloadID, profile.Name, signature.ErrObjectNotSigned) } - - return err + return signature.ErrObjectNotSigned } - logger.L().Debug(context+" verification successful", - helpers.String("profile", profile.Name), - helpers.String("namespace", profile.Namespace)) + return nil } +// emitTamperAlert sends an R1016 "Signed profile tampered" alert via the exporter. +func (apc *ApplicationProfileCacheImpl) emitTamperAlert(profileName, namespace, workloadID, objectKind string, verifyErr error) { + if apc.exporter == nil { + return + } + + ruleFailure := &types.GenericRuleFailure{ + BaseRuntimeAlert: armotypes.BaseRuntimeAlert{ + AlertName: "Signed profile tampered", + InfectedPID: 1, + Severity: 10, + FixSuggestions: "Investigate who modified the " + objectKind + " '" + profileName + "' in namespace '" + namespace + "'. Re-sign the profile after verifying its contents.", + }, + AlertType: armotypes.AlertTypeRule, + RuntimeProcessDetails: armotypes.ProcessTree{ + ProcessTree: armotypes.Process{ + PID: 1, + Comm: "node-agent", + }, + }, + RuleAlert: armotypes.RuleAlert{ + RuleDescription: fmt.Sprintf("Signed %s '%s' in namespace '%s' has been tampered with: %v", objectKind, profileName, namespace, verifyErr), + }, + RuntimeAlertK8sDetails: armotypes.RuntimeAlertK8sDetails{ + Namespace: namespace, + }, + RuleID: "R1016", + } + + // Populate workload details from workloadID if available + ruleFailure.SetWorkloadDetails(extractWlidFromWorkloadID(workloadID)) + + apc.exporter.SendRuleAlert(ruleFailure) +} + +// extractWlidFromWorkloadID extracts the wlid part from a "wlid/templateHash" key. +func extractWlidFromWorkloadID(workloadID string) string { + if idx := strings.LastIndex(workloadID, "/"); idx > 0 { + // workloadID format is "wlid://////" + // We need everything before the last "/" which is the templateHash + return workloadID[:idx] + } + return workloadID +} + func (apc *ApplicationProfileCacheImpl) setVerificationFailed(workloadID, profileName string, err error) { profileState := &objectcache.ProfileState{ Completion: "failed", @@ -557,7 +620,9 @@ func (apc *ApplicationProfileCacheImpl) addContainer(container *containercollect } } } else { - apc.workloadIDToProfileState.Set(workloadID, nil) + apc.workloadIDToProfileState.Set(workloadID, &objectcache.ProfileState{ + Error: fmt.Errorf("waiting for profile update"), + }) } // Create container info @@ -607,6 +672,12 @@ func (apc *ApplicationProfileCacheImpl) addContainer(container *containercollect // Update the profile in the cache apc.workloadIDToProfile.Set(workloadID, fullProfile) + profileState := &objectcache.ProfileState{ + Completion: fullProfile.Annotations[helpersv1.CompletionMetadataKey], + Status: fullProfile.Annotations[helpersv1.StatusMetadataKey], + Name: fullProfile.Name, + } + apc.workloadIDToProfileState.Set(workloadID, profileState) logger.L().Debug("added user-defined profile to cache", helpers.String("containerID", containerID), helpers.String("workloadID", workloadID), @@ -782,10 +853,9 @@ func (apc *ApplicationProfileCacheImpl) GetApplicationProfileState(containerID s if profileState, exists := apc.workloadIDToProfileState.Load(workloadID); exists { if profileState != nil { return profileState - } else { - return &objectcache.ProfileState{ - Error: fmt.Errorf("profile state not available - shouldn't happen"), - } + } + return &objectcache.ProfileState{ + Error: fmt.Errorf("application profile state is nil for workload %s", workloadID), } } diff --git a/pkg/objectcache/applicationprofilecache/applicationprofilecache_test.go b/pkg/objectcache/applicationprofilecache/applicationprofilecache_test.go index 7ce56181c..6a89edcb2 100644 --- a/pkg/objectcache/applicationprofilecache/applicationprofilecache_test.go +++ b/pkg/objectcache/applicationprofilecache/applicationprofilecache_test.go @@ -77,7 +77,7 @@ func TestPagination(t *testing.T) { spy := &SpyProfileClient{Profiles: profiles} // mock k8s object cache is irrelevant since we inject container info directly - cache := NewApplicationProfileCache(config.Config{}, spy, nil) + cache := NewApplicationProfileCache(config.Config{}, spy, nil, nil) // Inject a container so that "default" namespace is processed. // The WorkloadID needs to match something if we want deeper logic to run, diff --git a/pkg/objectcache/networkneighborhoodcache/networkneighborhoodcache.go b/pkg/objectcache/networkneighborhoodcache/networkneighborhoodcache.go index 394bb4485..6efd6042a 100644 --- a/pkg/objectcache/networkneighborhoodcache/networkneighborhoodcache.go +++ b/pkg/objectcache/networkneighborhoodcache/networkneighborhoodcache.go @@ -14,9 +14,12 @@ import ( "github.com/kubescape/go-logger" "github.com/kubescape/go-logger/helpers" helpersv1 "github.com/kubescape/k8s-interface/instanceidhandler/v1/helpers" + "github.com/armosec/armoapi-go/armotypes" "github.com/kubescape/node-agent/pkg/config" + "github.com/kubescape/node-agent/pkg/exporters" "github.com/kubescape/node-agent/pkg/objectcache" "github.com/kubescape/node-agent/pkg/resourcelocks" + "github.com/kubescape/node-agent/pkg/rulemanager/types" "github.com/kubescape/node-agent/pkg/signature" "github.com/kubescape/node-agent/pkg/signature/profiles" "github.com/kubescape/node-agent/pkg/storage" @@ -44,6 +47,7 @@ type NetworkNeighborhoodCacheImpl struct { networkNeighborhoodToUserManagedIdentifier maps.SafeMap[string, string] // networkNeighborhoodName -> user-managed profile unique identifier storageClient storage.ProfileClient k8sObjectCache objectcache.K8sObjectCache + exporter exporters.Exporter // Exporter for sending tamper detection alerts updateInterval time.Duration updateInProgress bool // Flag to track if update is in progress updateMutex sync.Mutex // Mutex to protect the flag @@ -51,7 +55,7 @@ type NetworkNeighborhoodCacheImpl struct { } // NewNetworkNeighborhoodCache creates a new network neighborhood cache with periodic updates -func NewNetworkNeighborhoodCache(cfg config.Config, storageClient storage.ProfileClient, k8sObjectCache objectcache.K8sObjectCache) *NetworkNeighborhoodCacheImpl { +func NewNetworkNeighborhoodCache(cfg config.Config, storageClient storage.ProfileClient, k8sObjectCache objectcache.K8sObjectCache, exporter exporters.Exporter) *NetworkNeighborhoodCacheImpl { updateInterval := utils.AddJitter(cfg.ProfilesCacheRefreshRate, 10) // Add 10% jitter to avoid high load on the storage nnc := &NetworkNeighborhoodCacheImpl{ @@ -62,6 +66,7 @@ func NewNetworkNeighborhoodCache(cfg config.Config, storageClient storage.Profil networkNeighborhoodToUserManagedIdentifier: maps.SafeMap[string, string]{}, storageClient: storageClient, k8sObjectCache: k8sObjectCache, + exporter: exporter, updateInterval: updateInterval, containerLocks: resourcelocks.New(), } @@ -248,19 +253,12 @@ func (nnc *NetworkNeighborhoodCacheImpl) updateAllNetworkNeighborhoods(ctx conte continue } - // Verify signature if enabled - if nnc.cfg.EnableSignatureVerification { - adapter := profiles.NewNetworkNeighborhoodAdapter(fullNN) - if err := signature.VerifyObjectStrict(adapter); err != nil { - logger.L().Warning("network neighborhood signature verification failed, skipping", - helpers.String("workloadID", workloadID), - helpers.String("namespace", namespace), - helpers.String("name", fullNN.Name), - helpers.Error(err)) - profileState.Error = fmt.Errorf("signature verification failed: %w", err) - nnc.workloadIDToProfileState.Set(workloadID, profileState) - continue - } + // Verify signature — always check signed NNs for tamper (R1016), + // enforcement mode only controls whether tampered NNs are loaded. + if err := nnc.verifyNetworkNeighborhood(fullNN, workloadID); err != nil { + profileState.Error = fmt.Errorf("signature verification failed: %w", err) + nnc.workloadIDToProfileState.Set(workloadID, profileState) + continue } nnc.workloadIDToNetworkNeighborhood.Set(workloadID, fullNN) @@ -335,47 +333,31 @@ func (nnc *NetworkNeighborhoodCacheImpl) handleUserManagedNetworkNeighborhood(nn } // Verify signature on the original network neighborhood before merging - if nnc.cfg.EnableSignatureVerification { - adapter := profiles.NewNetworkNeighborhoodAdapter(originalNN) - if err := signature.VerifyObjectStrict(adapter); err != nil { - logger.L().Warning("original network neighborhood signature verification failed, skipping merge", - helpers.String("workloadID", toMerge.wlid), - helpers.String("namespace", originalNN.Namespace), - helpers.String("name", originalNN.Name), - helpers.Error(err)) - profileState := &objectcache.ProfileState{ - Completion: originalNN.Annotations[helpersv1.CompletionMetadataKey], - Status: originalNN.Annotations[helpersv1.StatusMetadataKey], - Name: originalNN.Name, - Error: fmt.Errorf("signature verification failed: %w", err), - } - nnc.workloadIDToProfileState.Set(toMerge.wlid, profileState) - // Evict stale merged profile from cache on verification failure - nnc.workloadIDToNetworkNeighborhood.Delete(toMerge.wlid) - return - } + if err := nnc.verifyNetworkNeighborhood(originalNN, toMerge.wlid); err != nil { + profileState := &objectcache.ProfileState{ + Completion: originalNN.Annotations[helpersv1.CompletionMetadataKey], + Status: originalNN.Annotations[helpersv1.StatusMetadataKey], + Name: originalNN.Name, + Error: fmt.Errorf("signature verification failed: %w", err), + } + nnc.workloadIDToProfileState.Set(toMerge.wlid, profileState) + // Evict stale merged profile from cache on verification failure + nnc.workloadIDToNetworkNeighborhood.Delete(toMerge.wlid) + return } // Verify signature on the user-managed network neighborhood before merging - if nnc.cfg.EnableSignatureVerification { - adapter := profiles.NewNetworkNeighborhoodAdapter(fullUserNN) - if err := signature.VerifyObjectStrict(adapter); err != nil { - logger.L().Warning("user-managed network neighborhood signature verification failed, skipping merge", - helpers.String("workloadID", toMerge.wlid), - helpers.String("namespace", fullUserNN.Namespace), - helpers.String("name", fullUserNN.Name), - helpers.Error(err)) - profileState := &objectcache.ProfileState{ - Completion: fullUserNN.Annotations[helpersv1.CompletionMetadataKey], - Status: fullUserNN.Annotations[helpersv1.StatusMetadataKey], - Name: fullUserNN.Name, - Error: fmt.Errorf("signature verification failed: %w", err), - } - nnc.workloadIDToProfileState.Set(toMerge.wlid, profileState) - // Restore cache to originalNN on user-managed verification failure - nnc.workloadIDToNetworkNeighborhood.Set(toMerge.wlid, originalNN) - return - } + if err := nnc.verifyNetworkNeighborhood(fullUserNN, toMerge.wlid); err != nil { + profileState := &objectcache.ProfileState{ + Completion: fullUserNN.Annotations[helpersv1.CompletionMetadataKey], + Status: fullUserNN.Annotations[helpersv1.StatusMetadataKey], + Name: fullUserNN.Name, + Error: fmt.Errorf("signature verification failed: %w", err), + } + nnc.workloadIDToProfileState.Set(toMerge.wlid, profileState) + // Restore cache to originalNN on user-managed verification failure + nnc.workloadIDToNetworkNeighborhood.Set(toMerge.wlid, originalNN) + return } // Merge the network neighborhoods @@ -542,7 +524,9 @@ func (nnc *NetworkNeighborhoodCacheImpl) addContainer(container *containercollec // Create workload ID to state mapping if _, exists := nnc.workloadIDToProfileState.Load(workloadID); !exists { - nnc.workloadIDToProfileState.Set(workloadID, nil) + nnc.workloadIDToProfileState.Set(workloadID, &objectcache.ProfileState{ + Error: fmt.Errorf("waiting for profile update"), + }) } logger.L().Debug("container added to cache", @@ -653,10 +637,9 @@ func (nnc *NetworkNeighborhoodCacheImpl) GetNetworkNeighborhoodState(containerID if profileState, exists := nnc.workloadIDToProfileState.Load(workloadID); exists { if profileState != nil { return profileState - } else { - return &objectcache.ProfileState{ - Error: fmt.Errorf("profile state not available - shouldn't happen"), - } + } + return &objectcache.ProfileState{ + Error: fmt.Errorf("network neighborhood state is nil for workload %s", workloadID), } } @@ -848,6 +831,84 @@ func (nnc *NetworkNeighborhoodCacheImpl) workloadHasUserDefinedNetwork(workloadI return found } +// verifyNetworkNeighborhood verifies the NN signature. +// Always checks signed NNs for tamper (emits R1016 alert on tamper). +// When EnableSignatureVerification is true, also rejects tampered/unsigned NNs. +// Returns error if the NN should not be loaded, nil otherwise. +func (nnc *NetworkNeighborhoodCacheImpl) verifyNetworkNeighborhood(nn *v1beta1.NetworkNeighborhood, workloadID string) error { + adapter := profiles.NewNetworkNeighborhoodAdapter(nn) + + // Always check signed NNs for tamper, regardless of enforcement setting + if signature.IsSigned(adapter) { + if err := signature.VerifyObjectStrict(adapter); err != nil { + logger.L().Warning("network neighborhood signature verification failed (tamper detected)", + helpers.String("name", nn.Name), + helpers.String("namespace", nn.Namespace), + helpers.String("workloadID", workloadID), + helpers.Error(err)) + + // Emit R1016 tamper alert + nnc.emitTamperAlert(nn.Name, nn.Namespace, workloadID, "NetworkNeighborhood", err) + + if nnc.cfg.EnableSignatureVerification { + return err + } + // Enforcement off: allow loading despite tamper + return nil + } + return nil + } + + // Not signed + if nnc.cfg.EnableSignatureVerification { + return fmt.Errorf("network neighborhood is not signed") + } + return nil +} + +// emitTamperAlert sends an R1016 "Signed profile tampered" alert via the exporter. +func (nnc *NetworkNeighborhoodCacheImpl) emitTamperAlert(nnName, namespace, workloadID, objectKind string, verifyErr error) { + if nnc.exporter == nil { + return + } + + ruleFailure := &types.GenericRuleFailure{ + BaseRuntimeAlert: armotypes.BaseRuntimeAlert{ + AlertName: "Signed profile tampered", + InfectedPID: 1, + Severity: 10, + FixSuggestions: "Investigate who modified the " + objectKind + " '" + nnName + "' in namespace '" + namespace + "'. Re-sign the profile after verifying its contents.", + }, + AlertType: armotypes.AlertTypeRule, + RuntimeProcessDetails: armotypes.ProcessTree{ + ProcessTree: armotypes.Process{ + PID: 1, + Comm: "node-agent", + }, + }, + RuleAlert: armotypes.RuleAlert{ + RuleDescription: fmt.Sprintf("Signed %s '%s' in namespace '%s' has been tampered with: %v", objectKind, nnName, namespace, verifyErr), + }, + RuntimeAlertK8sDetails: armotypes.RuntimeAlertK8sDetails{ + Namespace: namespace, + }, + RuleID: "R1016", + } + + // Populate workload details from workloadID if available + ruleFailure.SetWorkloadDetails(extractWlidFromWorkloadID(workloadID)) + + nnc.exporter.SendRuleAlert(ruleFailure) +} + +// extractWlidFromWorkloadID extracts the wlid part from a "wlid/templateHash" key. +func extractWlidFromWorkloadID(workloadID string) string { + if idx := strings.LastIndex(workloadID, "/"); idx > 0 { + return workloadID[:idx] + } + return workloadID +} + func isUserManagedNN(nn *v1beta1.NetworkNeighborhood) bool { return nn.Annotations != nil && nn.Annotations[helpersv1.ManagedByMetadataKey] == helpersv1.ManagedByUserValue && diff --git a/pkg/objectcache/networkneighborhoodcache/networkneighborhoodcache_test.go b/pkg/objectcache/networkneighborhoodcache/networkneighborhoodcache_test.go index f2714141c..47ea2097e 100644 --- a/pkg/objectcache/networkneighborhoodcache/networkneighborhoodcache_test.go +++ b/pkg/objectcache/networkneighborhoodcache/networkneighborhoodcache_test.go @@ -80,7 +80,7 @@ func TestPagination(t *testing.T) { spy := &SpyProfileClient{NetworkNeighborhoods: items} - cache := NewNetworkNeighborhoodCache(config.Config{}, spy, nil) + cache := NewNetworkNeighborhoodCache(config.Config{}, spy, nil, nil) // Inject a container so that "default" namespace is processed. cache.containerIDToInfo.Set("test-container", &ContainerInfo{ diff --git a/tests/chart/templates/node-agent/default-rules.yaml b/tests/chart/templates/node-agent/default-rules.yaml index 1a545524c..9dfd4b2ac 100644 --- a/tests/chart/templates/node-agent/default-rules.yaml +++ b/tests/chart/templates/node-agent/default-rules.yaml @@ -583,3 +583,21 @@ spec: - "syscalls" - "io_uring" - "applicationprofile" + - name: "Signed profile tampered" + enabled: true + id: "R1016" + description: "Detects when a previously signed ApplicationProfile or NetworkNeighborhood has been tampered with (signature no longer valid)." + expressions: + message: "'Signed profile tampered'" + uniqueId: "'R1016'" + ruleExpression: [] + profileDependency: 2 + severity: 10 + supportPolicy: false + isTriggerAlert: false + mitreTactic: "TA0005" + mitreTechnique: "T1565" + tags: + - "integrity" + - "signature" + - "tamper" diff --git a/tests/component_test.go b/tests/component_test.go index 794f95397..9c0f8e14a 100644 --- a/tests/component_test.go +++ b/tests/component_test.go @@ -2379,6 +2379,252 @@ func Test_28_UserDefinedNetworkNeighborhood(t *testing.T) { require.Greater(t, countByRule(alerts, "R0011"), 0, "MITM: fusioncore.ai allowed but spoofed IP 8.8.4.4 must fire R0011") }) + + // --------------------------------------------------------------- + // 28e. MITM — real CoreDNS poisoning via template plugin. + // Poisons CoreDNS so fusioncore.ai resolves to 8.8.4.4 + // instead of the legitimate 162.0.217.171. + // + // nslookup triggers the poisoned DNS response. + // R0005 does NOT fire: fusioncore.ai is in the NN egress + // list and BusyBox nslookup does NOT do PTR reverse-lookups. + // R0011 does NOT fire: no TCP egress (DNS is UDP to cluster + // DNS which is a private IP filtered by is_private_ip). + // + // This documents a detection gap: pure DNS MITM (without + // subsequent TCP to the spoofed IP) is invisible to both + // R0005 and R0011 when the domain is already whitelisted. + // + // NOTE: this subtest MUST run last — it modifies the + // cluster-wide CoreDNS configmap. + // --------------------------------------------------------------- + t.Run("mitm_coredns_poisoning", func(t *testing.T) { + wl := setup(t) + ctx := context.Background() + k8sClient := k8sinterface.NewKubernetesApi() + + // ── Back up original CoreDNS Corefile ── + cm, err := k8sClient.KubernetesClient.CoreV1(). + ConfigMaps("kube-system").Get(ctx, "coredns", metav1.GetOptions{}) + require.NoError(t, err, "get coredns configmap") + originalCorefile := cm.Data["Corefile"] + + restartAndWaitCoreDNS := func() { + deploy, err := k8sClient.KubernetesClient.AppsV1(). + Deployments("kube-system").Get(ctx, "coredns", metav1.GetOptions{}) + require.NoError(t, err, "get coredns deployment") + if deploy.Spec.Template.ObjectMeta.Annotations == nil { + deploy.Spec.Template.ObjectMeta.Annotations = make(map[string]string) + } + deploy.Spec.Template.ObjectMeta.Annotations["kubectl.kubernetes.io/restartedAt"] = time.Now().Format(time.RFC3339) + _, err = k8sClient.KubernetesClient.AppsV1(). + Deployments("kube-system").Update(ctx, deploy, metav1.UpdateOptions{}) + require.NoError(t, err, "restart coredns") + + require.Eventually(t, func() bool { + d, err := k8sClient.KubernetesClient.AppsV1(). + Deployments("kube-system").Get(ctx, "coredns", metav1.GetOptions{}) + if err != nil || d.Spec.Replicas == nil { + return false + } + return d.Status.ReadyReplicas == *d.Spec.Replicas && + d.Status.UpdatedReplicas == *d.Spec.Replicas + }, 60*time.Second, 2*time.Second, "coredns must become ready") + } + + // ── Restore CoreDNS on cleanup (best-effort) ── + t.Cleanup(func() { + t.Log("cleanup: restoring CoreDNS Corefile") + cm, err := k8sClient.KubernetesClient.CoreV1(). + ConfigMaps("kube-system").Get(ctx, "coredns", metav1.GetOptions{}) + if err != nil { + t.Logf("cleanup: get coredns cm: %v", err) + return + } + cm.Data["Corefile"] = originalCorefile + if _, err := k8sClient.KubernetesClient.CoreV1(). + ConfigMaps("kube-system").Update(ctx, cm, metav1.UpdateOptions{}); err != nil { + t.Logf("cleanup: update coredns cm: %v", err) + return + } + deploy, err := k8sClient.KubernetesClient.AppsV1(). + Deployments("kube-system").Get(ctx, "coredns", metav1.GetOptions{}) + if err != nil { + t.Logf("cleanup: get coredns deploy: %v", err) + return + } + if deploy.Spec.Template.ObjectMeta.Annotations == nil { + deploy.Spec.Template.ObjectMeta.Annotations = make(map[string]string) + } + deploy.Spec.Template.ObjectMeta.Annotations["kubectl.kubernetes.io/restartedAt"] = time.Now().Format(time.RFC3339) + if _, err := k8sClient.KubernetesClient.AppsV1(). + Deployments("kube-system").Update(ctx, deploy, metav1.UpdateOptions{}); err != nil { + t.Logf("cleanup: restart coredns: %v", err) + } + }) + + // ── Poison CoreDNS: fusioncore.ai → 8.8.4.4 ── + poisoned := strings.Replace(originalCorefile, + "forward .", + "template IN A fusioncore.ai {\n answer \"fusioncore.ai. 60 IN A 8.8.4.4\"\n fallthrough\n }\n forward .", + 1) + require.NotEqual(t, originalCorefile, poisoned, "template injection must modify Corefile") + + cm.Data["Corefile"] = poisoned + _, err = k8sClient.KubernetesClient.CoreV1(). + ConfigMaps("kube-system").Update(ctx, cm, metav1.UpdateOptions{}) + require.NoError(t, err, "apply poisoned Corefile") + restartAndWaitCoreDNS() + + // Verify poisoned DNS returns the spoofed IP. + require.Eventually(t, func() bool { + stdout, _, _ := wl.ExecIntoPod([]string{"nslookup", "fusioncore.ai"}, "curl") + return strings.Contains(stdout, "8.8.4.4") + }, 30*time.Second, 3*time.Second, "poisoned CoreDNS must return 8.8.4.4 for fusioncore.ai") + + // ── Trigger alerts ── + // nslookup does DNS only (no TCP egress). + // BusyBox nslookup does NOT do PTR reverse-lookups on result IPs. + stdout, stderr, err := wl.ExecIntoPod([]string{"nslookup", "fusioncore.ai"}, "curl") + t.Logf("nslookup (poisoned) → err=%v stdout=%q stderr=%q", err, stdout, stderr) + + alerts := waitAlerts(t, wl.Namespace) + t.Logf("=== %d alerts ===", len(alerts)) + logAlerts(t, alerts) + + // R0005 does NOT fire: fusioncore.ai is already in the NN + // egress list, and BusyBox nslookup does NOT perform PTR + // reverse-lookups on result IPs, so no unknown domain is queried. + assert.Equal(t, 0, countByRule(alerts, "R0005"), + "DNS MITM: domain is in NN and no PTR lookup — R0005 should not fire") + + // R0011 does NOT fire: nslookup generates only DNS (UDP) + // traffic to the cluster DNS service, which is a private IP + // excluded by is_private_ip(). + assert.Equal(t, 0, countByRule(alerts, "R0011"), + "DNS MITM: nslookup has no TCP egress — R0011 should not fire") + }) + + // --------------------------------------------------------------- + // 28f. MITM — CoreDNS poisoning with TCP egress. + // Same CoreDNS poisoning as 28e, but now fusioncore.ai + // resolves to 128.130.194.56 (a routable IP that accepts + // TCP on port 80). curl generates a real TCP connection + // to the spoofed IP. + // + // Expected: + // R0005 = 0 — domain is in NN, no PTR reverse-lookup. + // R0011 fires — TCP egress to 128.130.194.56 which is + // NOT in the NN (NN only has 162.0.217.171). + // + // NOTE: runs after 28e; modifies cluster-wide CoreDNS. + // --------------------------------------------------------------- + t.Run("mitm_coredns_poisoning_tcp", func(t *testing.T) { + wl := setup(t) + ctx := context.Background() + k8sClient := k8sinterface.NewKubernetesApi() + + // ── Back up original CoreDNS Corefile ── + cm, err := k8sClient.KubernetesClient.CoreV1(). + ConfigMaps("kube-system").Get(ctx, "coredns", metav1.GetOptions{}) + require.NoError(t, err, "get coredns configmap") + originalCorefile := cm.Data["Corefile"] + + restartAndWaitCoreDNS := func() { + deploy, err := k8sClient.KubernetesClient.AppsV1(). + Deployments("kube-system").Get(ctx, "coredns", metav1.GetOptions{}) + require.NoError(t, err, "get coredns deployment") + if deploy.Spec.Template.ObjectMeta.Annotations == nil { + deploy.Spec.Template.ObjectMeta.Annotations = make(map[string]string) + } + deploy.Spec.Template.ObjectMeta.Annotations["kubectl.kubernetes.io/restartedAt"] = time.Now().Format(time.RFC3339) + _, err = k8sClient.KubernetesClient.AppsV1(). + Deployments("kube-system").Update(ctx, deploy, metav1.UpdateOptions{}) + require.NoError(t, err, "restart coredns") + + require.Eventually(t, func() bool { + d, err := k8sClient.KubernetesClient.AppsV1(). + Deployments("kube-system").Get(ctx, "coredns", metav1.GetOptions{}) + if err != nil || d.Spec.Replicas == nil { + return false + } + return d.Status.ReadyReplicas == *d.Spec.Replicas && + d.Status.UpdatedReplicas == *d.Spec.Replicas + }, 60*time.Second, 2*time.Second, "coredns must become ready") + } + + // ── Restore CoreDNS on cleanup (best-effort) ── + t.Cleanup(func() { + t.Log("cleanup: restoring CoreDNS Corefile") + cm, err := k8sClient.KubernetesClient.CoreV1(). + ConfigMaps("kube-system").Get(ctx, "coredns", metav1.GetOptions{}) + if err != nil { + t.Logf("cleanup: get coredns cm: %v", err) + return + } + cm.Data["Corefile"] = originalCorefile + if _, err := k8sClient.KubernetesClient.CoreV1(). + ConfigMaps("kube-system").Update(ctx, cm, metav1.UpdateOptions{}); err != nil { + t.Logf("cleanup: update coredns cm: %v", err) + return + } + deploy, err := k8sClient.KubernetesClient.AppsV1(). + Deployments("kube-system").Get(ctx, "coredns", metav1.GetOptions{}) + if err != nil { + t.Logf("cleanup: get coredns deploy: %v", err) + return + } + if deploy.Spec.Template.ObjectMeta.Annotations == nil { + deploy.Spec.Template.ObjectMeta.Annotations = make(map[string]string) + } + deploy.Spec.Template.ObjectMeta.Annotations["kubectl.kubernetes.io/restartedAt"] = time.Now().Format(time.RFC3339) + if _, err := k8sClient.KubernetesClient.AppsV1(). + Deployments("kube-system").Update(ctx, deploy, metav1.UpdateOptions{}); err != nil { + t.Logf("cleanup: restart coredns: %v", err) + } + }) + + // ── Poison CoreDNS: fusioncore.ai → 128.130.194.56 ── + poisoned := strings.Replace(originalCorefile, + "forward .", + "template IN A fusioncore.ai {\n answer \"fusioncore.ai. 60 IN A 128.130.194.56\"\n fallthrough\n }\n forward .", + 1) + require.NotEqual(t, originalCorefile, poisoned, "template injection must modify Corefile") + + cm.Data["Corefile"] = poisoned + _, err = k8sClient.KubernetesClient.CoreV1(). + ConfigMaps("kube-system").Update(ctx, cm, metav1.UpdateOptions{}) + require.NoError(t, err, "apply poisoned Corefile") + restartAndWaitCoreDNS() + + // Verify poisoned DNS returns the spoofed IP. + require.Eventually(t, func() bool { + stdout, _, _ := wl.ExecIntoPod([]string{"nslookup", "fusioncore.ai"}, "curl") + return strings.Contains(stdout, "128.130.194.56") + }, 30*time.Second, 3*time.Second, "poisoned CoreDNS must return 128.130.194.56 for fusioncore.ai") + + // ── Trigger alerts ── + // curl resolves fusioncore.ai → 128.130.194.56 (poisoned) + // then opens a TCP connection to 128.130.194.56:80. + stdout, stderr, err := wl.ExecIntoPod( + []string{"curl", "-sm5", "http://fusioncore.ai"}, "curl") + t.Logf("curl (poisoned DNS) → err=%v stdout=%q stderr=%q", err, stdout, stderr) + + alerts := waitAlerts(t, wl.Namespace) + t.Logf("=== %d alerts ===", len(alerts)) + logAlerts(t, alerts) + + // R0005 does NOT fire: fusioncore.ai is already in the NN + // egress list, and curl (like BusyBox nslookup) does NOT + // perform PTR reverse-lookups on resolved IPs. + assert.Equal(t, 0, countByRule(alerts, "R0005"), + "DNS MITM: domain is in NN and no PTR lookup — R0005 should not fire") + + // R0011 fires: TCP egress to 128.130.194.56 which is NOT + // in the NN (NN only allows 162.0.217.171). + require.Greater(t, countByRule(alerts, "R0011"), 0, + "DNS MITM: TCP to spoofed IP 128.130.194.56 must fire R0011") + }) } // Test_29_SignedApplicationProfile verifies that a cryptographically signed @@ -2473,12 +2719,18 @@ func Test_29_SignedApplicationProfile(t *testing.T) { t.Logf("curl (allowed) → err=%v stdout=%q stderr=%q", execErr, stdout, stderr) // ── 8. Exec an anomalous binary — should fire R0001 ── + // The user-defined profile may not be cached yet when the first exec runs. + // Re-exec nslookup on each poll so the eBPF event is generated after + // the profile is loaded (same race as the crypto miner test). stdout, stderr, execErr = wl.ExecIntoPod([]string{"nslookup", "ebpf.io"}, "curl") t.Logf("nslookup (anomalous) → err=%v stdout=%q stderr=%q", execErr, stdout, stderr) // ── 9. Wait for R0001 alert ── var alerts []testutils.Alert require.Eventually(t, func() bool { + // Re-exec on each poll to ensure the event arrives after the profile is cached. + wl.ExecIntoPod([]string{"nslookup", "ebpf.io"}, "curl") + alerts, err = testutils.GetAlerts(ns.Name) if err != nil || len(alerts) == 0 { return false @@ -2489,7 +2741,7 @@ func Test_29_SignedApplicationProfile(t *testing.T) { } } return false - }, 60*time.Second, 5*time.Second, "nslookup is not in signed AP — must fire R0001") + }, 120*time.Second, 10*time.Second, "nslookup is not in signed AP — must fire R0001") // Extra settle time. time.Sleep(10 * time.Second) @@ -2766,3 +3018,132 @@ func Test_30_TamperedSignedProfiles(t *testing.T) { t.Log(" the tampered profile would be silently rejected. No R-number fires for tampering.") }) } + +// Test_31_TamperDetectionAlert verifies that R1016 fires when a previously +// signed ApplicationProfile has been tampered with (signature is stale). +// +// This test proves the new tamper-detection alerting: +// - Sign an AP, push to storage +// - Tamper the AP in storage (modify spec, keep stale signature annotations) +// - Deploy a pod referencing the tampered profile +// - R1016 "Signed profile tampered" must fire +// +// R1016 fires regardless of enableSignatureVerification setting. +// The detection happens in the AP cache when it loads the profile. +func Test_31_TamperDetectionAlert(t *testing.T) { + start := time.Now() + defer tearDownTest(t, start) + + ns := testutils.NewRandomNamespace() + k8sClient := k8sinterface.NewKubernetesApi() + storageClient := spdxv1beta1client.NewForConfigOrDie(k8sClient.K8SConfig) + + // ── 1. Build and sign an ApplicationProfile ── + ap := &v1beta1.ApplicationProfile{ + ObjectMeta: metav1.ObjectMeta{ + Name: "signed-ap", + Namespace: ns.Name, + }, + Spec: v1beta1.ApplicationProfileSpec{ + Containers: []v1beta1.ApplicationProfileContainer{ + { + Name: "curl", + Execs: []v1beta1.ExecCalls{ + {Path: "/bin/sleep"}, + {Path: "/usr/bin/curl"}, + }, + Syscalls: []string{"socket", "connect", "read", "write", "close", "openat"}, + }, + }, + }, + } + + apAdapter := profiles.NewApplicationProfileAdapter(ap) + require.NoError(t, signature.SignObjectDisableKeyless(apAdapter), "sign AP") + require.True(t, signature.IsSigned(apAdapter), "AP must be signed") + require.NoError(t, signature.VerifyObjectAllowUntrusted(apAdapter), + "signature must verify immediately after signing") + t.Log("AP signed successfully") + + // ── 2. Tamper the AP (add unauthorized exec path) ── + ap.Spec.Containers[0].Execs = append(ap.Spec.Containers[0].Execs, + v1beta1.ExecCalls{Path: "/usr/bin/nslookup"}) + + // Verify the signature is now invalid + tamperedAdapter := profiles.NewApplicationProfileAdapter(ap) + require.Error(t, signature.VerifyObjectAllowUntrusted(tamperedAdapter), + "tampered AP must fail verification") + require.True(t, signature.IsSigned(tamperedAdapter), + "tampered AP must still have signature annotations (stale)") + t.Log("AP tampered — signature is stale") + + // ── 3. Push tampered AP to storage ── + _, err := storageClient.ApplicationProfiles(ns.Name).Create( + context.Background(), ap, metav1.CreateOptions{}) + require.NoError(t, err, "push tampered AP to storage") + + // Verify it's stored with stale signature + require.Eventually(t, func() bool { + stored, getErr := storageClient.ApplicationProfiles(ns.Name).Get( + context.Background(), "signed-ap", v1.GetOptions{}) + if getErr != nil { + return false + } + storedAdapter := profiles.NewApplicationProfileAdapter(stored) + return signature.IsSigned(storedAdapter) && + signature.VerifyObjectAllowUntrusted(storedAdapter) != nil + }, 30*time.Second, 1*time.Second, "stored AP must have stale signature") + t.Log("Tampered AP stored with stale signature") + + // ── 4. Deploy pod referencing the tampered profile ── + wl, err := testutils.NewTestWorkload(ns.Name, + path.Join(utils.CurrentDir(), "resources/curl-signed-deployment.yaml")) + require.NoError(t, err) + require.NoError(t, wl.WaitForReady(80)) + t.Log("Pod deployed, waiting for cache to detect tamper...") + + // ── 5. Wait for R1016 "Signed profile tampered" alert ── + // The AP cache's addContainer or periodicUpdate will detect the tampered + // signature and emit R1016 via the exporter. + var alerts []testutils.Alert + require.Eventually(t, func() bool { + alerts, err = testutils.GetAlerts(ns.Name) + if err != nil || len(alerts) == 0 { + return false + } + for _, a := range alerts { + if a.Labels["rule_id"] == "R1016" { + return true + } + } + return false + }, 120*time.Second, 5*time.Second, "R1016 must fire for tampered signed AP") + + // ── 6. Log all alerts for debugging ── + time.Sleep(5 * time.Second) + alerts, _ = testutils.GetAlerts(ns.Name) + + t.Logf("=== %d alerts ===", len(alerts)) + for i, a := range alerts { + t.Logf(" [%d] %s(%s) comm=%s container=%s", + i, a.Labels["rule_name"], a.Labels["rule_id"], + a.Labels["comm"], a.Labels["container_name"]) + } + + // Verify R1016 alert details + r1016Count := 0 + for _, a := range alerts { + if a.Labels["rule_id"] == "R1016" { + r1016Count++ + assert.Equal(t, "Signed profile tampered", a.Labels["rule_name"], + "R1016 alert must have correct rule name") + assert.Equal(t, ns.Name, a.Labels["namespace"], + "R1016 alert must have correct namespace") + t.Logf("R1016 alert: rule_name=%s namespace=%s severity=%s", + a.Labels["rule_name"], a.Labels["namespace"], a.Labels["severity"]) + } + } + require.Greater(t, r1016Count, 0, + "R1016 must fire — proves tamper detection alerting works") + t.Log("Tamper detection alerting verified successfully") +}