From 9681e2bcc0d28957f3afea024bcd02c83977a0b5 Mon Sep 17 00:00:00 2001 From: entlein Date: Sat, 14 Mar 2026 21:34:55 +0100 Subject: [PATCH 1/5] TEST29 minor fix, TEST28 add another dns spoof Signed-off-by: entlein --- tests/component_test.go | 134 +++++++++++++++++++++++++++++++++++++++- 1 file changed, 133 insertions(+), 1 deletion(-) diff --git a/tests/component_test.go b/tests/component_test.go index 794f95397..723b364ab 100644 --- a/tests/component_test.go +++ b/tests/component_test.go @@ -2379,6 +2379,132 @@ func Test_28_UserDefinedNetworkNeighborhood(t *testing.T) { require.Greater(t, countByRule(alerts, "R0011"), 0, "MITM: fusioncore.ai allowed but spoofed IP 8.8.4.4 must fire R0011") }) + + // --------------------------------------------------------------- + // 28e. MITM — real CoreDNS poisoning via template plugin. + // Poisons CoreDNS so fusioncore.ai resolves to 8.8.4.4 + // instead of the legitimate 162.0.217.171. + // + // nslookup triggers the poisoned DNS response. BusyBox + // nslookup also does a PTR reverse-lookup on the result IP; + // the PTR domain (4.4.8.8.in-addr.arpa.) is NOT in the NN, + // so R0005 (DNS Anomalies) fires. + // + // No TCP egress occurs (nslookup is DNS-only, and the UDP + // DNS query goes to the cluster DNS service which is a + // private IP filtered by R0011), so R0011 does NOT fire. + // + // This documents a detection gap: DNS MITM is visible only + // via the PTR side-channel in R0005, not via R0011. + // + // NOTE: this subtest MUST run last — it modifies the + // cluster-wide CoreDNS configmap. + // --------------------------------------------------------------- + t.Run("mitm_coredns_poisoning", func(t *testing.T) { + wl := setup(t) + ctx := context.Background() + k8sClient := k8sinterface.NewKubernetesApi() + + // ── Back up original CoreDNS Corefile ── + cm, err := k8sClient.KubernetesClient.CoreV1(). + ConfigMaps("kube-system").Get(ctx, "coredns", metav1.GetOptions{}) + require.NoError(t, err, "get coredns configmap") + originalCorefile := cm.Data["Corefile"] + + restartAndWaitCoreDNS := func() { + deploy, err := k8sClient.KubernetesClient.AppsV1(). + Deployments("kube-system").Get(ctx, "coredns", metav1.GetOptions{}) + require.NoError(t, err, "get coredns deployment") + if deploy.Spec.Template.ObjectMeta.Annotations == nil { + deploy.Spec.Template.ObjectMeta.Annotations = make(map[string]string) + } + deploy.Spec.Template.ObjectMeta.Annotations["kubectl.kubernetes.io/restartedAt"] = time.Now().Format(time.RFC3339) + _, err = k8sClient.KubernetesClient.AppsV1(). + Deployments("kube-system").Update(ctx, deploy, metav1.UpdateOptions{}) + require.NoError(t, err, "restart coredns") + + require.Eventually(t, func() bool { + d, err := k8sClient.KubernetesClient.AppsV1(). + Deployments("kube-system").Get(ctx, "coredns", metav1.GetOptions{}) + if err != nil || d.Spec.Replicas == nil { + return false + } + return d.Status.ReadyReplicas == *d.Spec.Replicas && + d.Status.UpdatedReplicas == *d.Spec.Replicas + }, 60*time.Second, 2*time.Second, "coredns must become ready") + } + + // ── Restore CoreDNS on cleanup (best-effort) ── + t.Cleanup(func() { + t.Log("cleanup: restoring CoreDNS Corefile") + cm, err := k8sClient.KubernetesClient.CoreV1(). + ConfigMaps("kube-system").Get(ctx, "coredns", metav1.GetOptions{}) + if err != nil { + t.Logf("cleanup: get coredns cm: %v", err) + return + } + cm.Data["Corefile"] = originalCorefile + if _, err := k8sClient.KubernetesClient.CoreV1(). + ConfigMaps("kube-system").Update(ctx, cm, metav1.UpdateOptions{}); err != nil { + t.Logf("cleanup: update coredns cm: %v", err) + return + } + deploy, err := k8sClient.KubernetesClient.AppsV1(). + Deployments("kube-system").Get(ctx, "coredns", metav1.GetOptions{}) + if err != nil { + t.Logf("cleanup: get coredns deploy: %v", err) + return + } + if deploy.Spec.Template.ObjectMeta.Annotations == nil { + deploy.Spec.Template.ObjectMeta.Annotations = make(map[string]string) + } + deploy.Spec.Template.ObjectMeta.Annotations["kubectl.kubernetes.io/restartedAt"] = time.Now().Format(time.RFC3339) + if _, err := k8sClient.KubernetesClient.AppsV1(). + Deployments("kube-system").Update(ctx, deploy, metav1.UpdateOptions{}); err != nil { + t.Logf("cleanup: restart coredns: %v", err) + } + }) + + // ── Poison CoreDNS: fusioncore.ai → 8.8.4.4 ── + poisoned := strings.Replace(originalCorefile, + "forward .", + "template IN A fusioncore.ai {\n answer \"fusioncore.ai. 60 IN A 8.8.4.4\"\n fallthrough\n }\n forward .", + 1) + require.NotEqual(t, originalCorefile, poisoned, "template injection must modify Corefile") + + cm.Data["Corefile"] = poisoned + _, err = k8sClient.KubernetesClient.CoreV1(). + ConfigMaps("kube-system").Update(ctx, cm, metav1.UpdateOptions{}) + require.NoError(t, err, "apply poisoned Corefile") + restartAndWaitCoreDNS() + + // Verify poisoned DNS returns the spoofed IP. + require.Eventually(t, func() bool { + stdout, _, _ := wl.ExecIntoPod([]string{"nslookup", "fusioncore.ai"}, "curl") + return strings.Contains(stdout, "8.8.4.4") + }, 30*time.Second, 3*time.Second, "poisoned CoreDNS must return 8.8.4.4 for fusioncore.ai") + + // ── Trigger alerts ── + // nslookup does DNS only (no TCP egress). + // BusyBox nslookup does a PTR reverse-lookup on the result IP. + stdout, stderr, err := wl.ExecIntoPod([]string{"nslookup", "fusioncore.ai"}, "curl") + t.Logf("nslookup (poisoned) → err=%v stdout=%q stderr=%q", err, stdout, stderr) + + alerts := waitAlerts(t, wl.Namespace) + t.Logf("=== %d alerts ===", len(alerts)) + logAlerts(t, alerts) + + // R0005 fires: the PTR reverse-lookup on the spoofed IP + // (4.4.8.8.in-addr.arpa.) is NOT in the NN. + require.Greater(t, countByRule(alerts, "R0005"), 0, + "DNS MITM: PTR reverse-lookup on spoofed IP must fire R0005") + + // R0011 does NOT fire: nslookup generates only DNS (UDP) + // traffic to the cluster DNS service, which is a private IP + // excluded by is_private_ip(). + assert.Equal(t, 0, countByRule(alerts, "R0011"), + "DNS MITM: nslookup has no TCP egress — R0011 should not fire") + }) } // Test_29_SignedApplicationProfile verifies that a cryptographically signed @@ -2473,12 +2599,18 @@ func Test_29_SignedApplicationProfile(t *testing.T) { t.Logf("curl (allowed) → err=%v stdout=%q stderr=%q", execErr, stdout, stderr) // ── 8. Exec an anomalous binary — should fire R0001 ── + // The user-defined profile may not be cached yet when the first exec runs. + // Re-exec nslookup on each poll so the eBPF event is generated after + // the profile is loaded (same race as the crypto miner test). stdout, stderr, execErr = wl.ExecIntoPod([]string{"nslookup", "ebpf.io"}, "curl") t.Logf("nslookup (anomalous) → err=%v stdout=%q stderr=%q", execErr, stdout, stderr) // ── 9. Wait for R0001 alert ── var alerts []testutils.Alert require.Eventually(t, func() bool { + // Re-exec on each poll to ensure the event arrives after the profile is cached. + wl.ExecIntoPod([]string{"nslookup", "ebpf.io"}, "curl") + alerts, err = testutils.GetAlerts(ns.Name) if err != nil || len(alerts) == 0 { return false @@ -2489,7 +2621,7 @@ func Test_29_SignedApplicationProfile(t *testing.T) { } } return false - }, 60*time.Second, 5*time.Second, "nslookup is not in signed AP — must fire R0001") + }, 120*time.Second, 10*time.Second, "nslookup is not in signed AP — must fire R0001") // Extra settle time. time.Sleep(10 * time.Second) From 48c3eb4b04c3d64d4d9c2d34b3343c06e035db12 Mon Sep 17 00:00:00 2001 From: entlein Date: Sat, 14 Mar 2026 21:40:04 +0100 Subject: [PATCH 2/5] TEST28 another spoof, this time TCP Signed-off-by: entlein --- tests/component_test.go | 120 ++++++++++++++++++++++++++++++++++++++++ 1 file changed, 120 insertions(+) diff --git a/tests/component_test.go b/tests/component_test.go index 723b364ab..78cdf1548 100644 --- a/tests/component_test.go +++ b/tests/component_test.go @@ -2505,6 +2505,126 @@ func Test_28_UserDefinedNetworkNeighborhood(t *testing.T) { assert.Equal(t, 0, countByRule(alerts, "R0011"), "DNS MITM: nslookup has no TCP egress — R0011 should not fire") }) + + // --------------------------------------------------------------- + // 28f. MITM — CoreDNS poisoning with TCP egress. + // Same CoreDNS poisoning as 28e, but now fusioncore.ai + // resolves to 128.130.194.56 (a routable IP that accepts + // TCP on port 80). curl generates a real TCP connection + // to the spoofed IP. + // + // Expected: + // R0005 fires — PTR reverse-lookup on the spoofed IP. + // R0011 fires — TCP egress to 128.130.194.56 which is + // NOT in the NN (NN only has 162.0.217.171). + // + // NOTE: runs after 28e; modifies cluster-wide CoreDNS. + // --------------------------------------------------------------- + t.Run("mitm_coredns_poisoning_tcp", func(t *testing.T) { + wl := setup(t) + ctx := context.Background() + k8sClient := k8sinterface.NewKubernetesApi() + + // ── Back up original CoreDNS Corefile ── + cm, err := k8sClient.KubernetesClient.CoreV1(). + ConfigMaps("kube-system").Get(ctx, "coredns", metav1.GetOptions{}) + require.NoError(t, err, "get coredns configmap") + originalCorefile := cm.Data["Corefile"] + + restartAndWaitCoreDNS := func() { + deploy, err := k8sClient.KubernetesClient.AppsV1(). + Deployments("kube-system").Get(ctx, "coredns", metav1.GetOptions{}) + require.NoError(t, err, "get coredns deployment") + if deploy.Spec.Template.ObjectMeta.Annotations == nil { + deploy.Spec.Template.ObjectMeta.Annotations = make(map[string]string) + } + deploy.Spec.Template.ObjectMeta.Annotations["kubectl.kubernetes.io/restartedAt"] = time.Now().Format(time.RFC3339) + _, err = k8sClient.KubernetesClient.AppsV1(). + Deployments("kube-system").Update(ctx, deploy, metav1.UpdateOptions{}) + require.NoError(t, err, "restart coredns") + + require.Eventually(t, func() bool { + d, err := k8sClient.KubernetesClient.AppsV1(). + Deployments("kube-system").Get(ctx, "coredns", metav1.GetOptions{}) + if err != nil || d.Spec.Replicas == nil { + return false + } + return d.Status.ReadyReplicas == *d.Spec.Replicas && + d.Status.UpdatedReplicas == *d.Spec.Replicas + }, 60*time.Second, 2*time.Second, "coredns must become ready") + } + + // ── Restore CoreDNS on cleanup (best-effort) ── + t.Cleanup(func() { + t.Log("cleanup: restoring CoreDNS Corefile") + cm, err := k8sClient.KubernetesClient.CoreV1(). + ConfigMaps("kube-system").Get(ctx, "coredns", metav1.GetOptions{}) + if err != nil { + t.Logf("cleanup: get coredns cm: %v", err) + return + } + cm.Data["Corefile"] = originalCorefile + if _, err := k8sClient.KubernetesClient.CoreV1(). + ConfigMaps("kube-system").Update(ctx, cm, metav1.UpdateOptions{}); err != nil { + t.Logf("cleanup: update coredns cm: %v", err) + return + } + deploy, err := k8sClient.KubernetesClient.AppsV1(). + Deployments("kube-system").Get(ctx, "coredns", metav1.GetOptions{}) + if err != nil { + t.Logf("cleanup: get coredns deploy: %v", err) + return + } + if deploy.Spec.Template.ObjectMeta.Annotations == nil { + deploy.Spec.Template.ObjectMeta.Annotations = make(map[string]string) + } + deploy.Spec.Template.ObjectMeta.Annotations["kubectl.kubernetes.io/restartedAt"] = time.Now().Format(time.RFC3339) + if _, err := k8sClient.KubernetesClient.AppsV1(). + Deployments("kube-system").Update(ctx, deploy, metav1.UpdateOptions{}); err != nil { + t.Logf("cleanup: restart coredns: %v", err) + } + }) + + // ── Poison CoreDNS: fusioncore.ai → 128.130.194.56 ── + poisoned := strings.Replace(originalCorefile, + "forward .", + "template IN A fusioncore.ai {\n answer \"fusioncore.ai. 60 IN A 128.130.194.56\"\n fallthrough\n }\n forward .", + 1) + require.NotEqual(t, originalCorefile, poisoned, "template injection must modify Corefile") + + cm.Data["Corefile"] = poisoned + _, err = k8sClient.KubernetesClient.CoreV1(). + ConfigMaps("kube-system").Update(ctx, cm, metav1.UpdateOptions{}) + require.NoError(t, err, "apply poisoned Corefile") + restartAndWaitCoreDNS() + + // Verify poisoned DNS returns the spoofed IP. + require.Eventually(t, func() bool { + stdout, _, _ := wl.ExecIntoPod([]string{"nslookup", "fusioncore.ai"}, "curl") + return strings.Contains(stdout, "128.130.194.56") + }, 30*time.Second, 3*time.Second, "poisoned CoreDNS must return 128.130.194.56 for fusioncore.ai") + + // ── Trigger alerts ── + // curl resolves fusioncore.ai → 128.130.194.56 (poisoned) + // then opens a TCP connection to 128.130.194.56:80. + stdout, stderr, err := wl.ExecIntoPod( + []string{"curl", "-sm5", "http://fusioncore.ai"}, "curl") + t.Logf("curl (poisoned DNS) → err=%v stdout=%q stderr=%q", err, stdout, stderr) + + alerts := waitAlerts(t, wl.Namespace) + t.Logf("=== %d alerts ===", len(alerts)) + logAlerts(t, alerts) + + // R0005 fires: fusioncore.ai is in the NN but the PTR + // reverse-lookup on the spoofed IP is not. + require.Greater(t, countByRule(alerts, "R0005"), 0, + "DNS MITM: PTR reverse-lookup on spoofed IP must fire R0005") + + // R0011 fires: TCP egress to 128.130.194.56 which is NOT + // in the NN (NN only allows 162.0.217.171). + require.Greater(t, countByRule(alerts, "R0011"), 0, + "DNS MITM: TCP to spoofed IP 128.130.194.56 must fire R0011") + }) } // Test_29_SignedApplicationProfile verifies that a cryptographically signed From 047684466df55c8b8b8eb5c78c08e63a8dbd562c Mon Sep 17 00:00:00 2001 From: tanzee Date: Sat, 14 Mar 2026 21:49:38 +0100 Subject: [PATCH 3/5] Feature: Tampering Alert, Try1 --- .github/workflows/component-tests.yaml | 4 +- cmd/main.go | 4 +- .../applicationprofilecache.go | 111 +++++++++--- .../applicationprofilecache_test.go | 2 +- .../networkneighborhoodcache.go | 164 ++++++++++++------ .../networkneighborhoodcache_test.go | 2 +- .../templates/node-agent/default-rules.yaml | 18 ++ tests/component_test.go | 129 ++++++++++++++ 8 files changed, 353 insertions(+), 81 deletions(-) diff --git a/.github/workflows/component-tests.yaml b/.github/workflows/component-tests.yaml index 4703e1df7..163b6c50f 100644 --- a/.github/workflows/component-tests.yaml +++ b/.github/workflows/component-tests.yaml @@ -32,6 +32,7 @@ on: branches: - feat/signature-verification - feat/tamperalert + - feat/tamper-detection workflow_dispatch: inputs: build_image: @@ -203,7 +204,8 @@ jobs: Test_27_ApplicationProfileOpens, Test_28_UserDefinedNetworkNeighborhood, Test_29_SignedApplicationProfile, - Test_30_TamperedSignedProfiles + Test_30_TamperedSignedProfiles, + Test_31_TamperDetectionAlert ] steps: - name: Checkout code diff --git a/cmd/main.go b/cmd/main.go index 7a5f850a1..494e506cc 100644 --- a/cmd/main.go +++ b/cmd/main.go @@ -294,10 +294,10 @@ func main() { ruleBindingNotify = make(chan rulebinding.RuleBindingNotify, 100) ruleBindingCache.AddNotifier(&ruleBindingNotify) - apc := applicationprofilecache.NewApplicationProfileCache(cfg, storageClient, k8sObjectCache) + apc := applicationprofilecache.NewApplicationProfileCache(cfg, storageClient, k8sObjectCache, exporter) apc.Start(ctx) - nnc := networkneighborhoodcache.NewNetworkNeighborhoodCache(cfg, storageClient, k8sObjectCache) + nnc := networkneighborhoodcache.NewNetworkNeighborhoodCache(cfg, storageClient, k8sObjectCache, exporter) nnc.Start(ctx) dc := dnscache.NewDnsCache(dnsResolver) diff --git a/pkg/objectcache/applicationprofilecache/applicationprofilecache.go b/pkg/objectcache/applicationprofilecache/applicationprofilecache.go index 59686128c..7f59dc401 100644 --- a/pkg/objectcache/applicationprofilecache/applicationprofilecache.go +++ b/pkg/objectcache/applicationprofilecache/applicationprofilecache.go @@ -2,7 +2,6 @@ package applicationprofilecache import ( "context" - "errors" "fmt" "strings" "sync" @@ -15,10 +14,13 @@ import ( "github.com/kubescape/go-logger" "github.com/kubescape/go-logger/helpers" helpersv1 "github.com/kubescape/k8s-interface/instanceidhandler/v1/helpers" + "github.com/armosec/armoapi-go/armotypes" "github.com/kubescape/node-agent/pkg/config" + "github.com/kubescape/node-agent/pkg/exporters" "github.com/kubescape/node-agent/pkg/objectcache" "github.com/kubescape/node-agent/pkg/objectcache/applicationprofilecache/callstackcache" "github.com/kubescape/node-agent/pkg/resourcelocks" + "github.com/kubescape/node-agent/pkg/rulemanager/types" "github.com/kubescape/node-agent/pkg/signature" "github.com/kubescape/node-agent/pkg/signature/profiles" "github.com/kubescape/node-agent/pkg/storage" @@ -51,6 +53,7 @@ type ApplicationProfileCacheImpl struct { containerToCallStackIndex maps.SafeMap[string, *ContainerCallStackIndex] storageClient storage.ProfileClient k8sObjectCache objectcache.K8sObjectCache + exporter exporters.Exporter // Exporter for sending tamper detection alerts updateInterval time.Duration updateInProgress bool // Flag to track if update is in progress updateMutex sync.Mutex // Mutex to protect the flag @@ -58,7 +61,7 @@ type ApplicationProfileCacheImpl struct { } // NewApplicationProfileCache creates a new application profile cache with periodic updates -func NewApplicationProfileCache(cfg config.Config, storageClient storage.ProfileClient, k8sObjectCache objectcache.K8sObjectCache) *ApplicationProfileCacheImpl { +func NewApplicationProfileCache(cfg config.Config, storageClient storage.ProfileClient, k8sObjectCache objectcache.K8sObjectCache, exporter exporters.Exporter) *ApplicationProfileCacheImpl { updateInterval := utils.AddJitter(cfg.ProfilesCacheRefreshRate, 10) // Add 10% jitter to avoid high load on the storage apc := &ApplicationProfileCacheImpl{ @@ -70,6 +73,7 @@ func NewApplicationProfileCache(cfg config.Config, storageClient storage.Profile containerToCallStackIndex: maps.SafeMap[string, *ContainerCallStackIndex]{}, storageClient: storageClient, k8sObjectCache: k8sObjectCache, + exporter: exporter, updateInterval: updateInterval, containerLocks: resourcelocks.New(), } @@ -274,42 +278,101 @@ func (apc *ApplicationProfileCacheImpl) updateAllProfiles(ctx context.Context) { } } -// verifyApplicationProfile verifies the profile signature if verification is enabled. -// Returns error if verification fails, nil otherwise (including when verification is disabled). -// Also updates profileState with error details if verification fails. +// verifyApplicationProfile verifies the profile signature. +// Always checks signed profiles for tamper (emits R1016 alert on tamper). +// When EnableSignatureVerification is true, also rejects tampered/unsigned profiles. +// Returns error if the profile should not be loaded, nil otherwise. func (apc *ApplicationProfileCacheImpl) verifyApplicationProfile(profile *v1beta1.ApplicationProfile, workloadID, context string, recordFailure bool) error { - if !apc.cfg.EnableSignatureVerification { - return nil - } profileAdapter := profiles.NewApplicationProfileAdapter(profile) - if err := signature.VerifyObject(profileAdapter); err != nil { - // Only warn if signature exists but doesn't match; missing signatures are debug - if errors.Is(err, signature.ErrObjectNotSigned) { - logger.L().Debug(context+" is not signed, skipping", - helpers.String("profile", profile.Name), - helpers.String("namespace", profile.Namespace), - helpers.String("workloadID", workloadID)) - } else { - logger.L().Warning(context+" signature verification failed, skipping", + + // Always check signed profiles for tamper, regardless of enforcement setting + if signature.IsSigned(profileAdapter) { + if err := signature.VerifyObject(profileAdapter); err != nil { + // Signed profile failed verification → tamper detected + logger.L().Warning(context+" signature verification failed (tamper detected)", helpers.String("profile", profile.Name), helpers.String("namespace", profile.Namespace), helpers.String("workloadID", workloadID), helpers.Error(err)) + + // Emit R1016 tamper alert + apc.emitTamperAlert(profile.Name, profile.Namespace, workloadID, "ApplicationProfile", err) + + if apc.cfg.EnableSignatureVerification { + if recordFailure { + apc.setVerificationFailed(workloadID, profile.Name, err) + } + return err + } + // Enforcement off: allow loading despite tamper + return nil } + logger.L().Debug(context+" verification successful", + helpers.String("profile", profile.Name), + helpers.String("namespace", profile.Namespace)) + return nil + } - // Update profile state with verification error + // Profile is not signed + if apc.cfg.EnableSignatureVerification { + logger.L().Debug(context+" is not signed, skipping", + helpers.String("profile", profile.Name), + helpers.String("namespace", profile.Namespace), + helpers.String("workloadID", workloadID)) if recordFailure { - apc.setVerificationFailed(workloadID, profile.Name, err) + apc.setVerificationFailed(workloadID, profile.Name, signature.ErrObjectNotSigned) } - - return err + return signature.ErrObjectNotSigned } - logger.L().Debug(context+" verification successful", - helpers.String("profile", profile.Name), - helpers.String("namespace", profile.Namespace)) + return nil } +// emitTamperAlert sends an R1016 "Signed profile tampered" alert via the exporter. +func (apc *ApplicationProfileCacheImpl) emitTamperAlert(profileName, namespace, workloadID, objectKind string, verifyErr error) { + if apc.exporter == nil { + return + } + + ruleFailure := &types.GenericRuleFailure{ + BaseRuntimeAlert: armotypes.BaseRuntimeAlert{ + AlertName: "Signed profile tampered", + InfectedPID: 1, + Severity: 10, + FixSuggestions: "Investigate who modified the " + objectKind + " '" + profileName + "' in namespace '" + namespace + "'. Re-sign the profile after verifying its contents.", + }, + AlertType: armotypes.AlertTypeRule, + RuntimeProcessDetails: armotypes.ProcessTree{ + ProcessTree: armotypes.Process{ + PID: 1, + Comm: "node-agent", + }, + }, + RuleAlert: armotypes.RuleAlert{ + RuleDescription: fmt.Sprintf("Signed %s '%s' in namespace '%s' has been tampered with: %v", objectKind, profileName, namespace, verifyErr), + }, + RuntimeAlertK8sDetails: armotypes.RuntimeAlertK8sDetails{ + Namespace: namespace, + }, + RuleID: "R1016", + } + + // Populate workload details from workloadID if available + ruleFailure.SetWorkloadDetails(extractWlidFromWorkloadID(workloadID)) + + apc.exporter.SendRuleAlert(ruleFailure) +} + +// extractWlidFromWorkloadID extracts the wlid part from a "wlid/templateHash" key. +func extractWlidFromWorkloadID(workloadID string) string { + if idx := strings.LastIndex(workloadID, "/"); idx > 0 { + // workloadID format is "wlid://////" + // We need everything before the last "/" which is the templateHash + return workloadID[:idx] + } + return workloadID +} + func (apc *ApplicationProfileCacheImpl) setVerificationFailed(workloadID, profileName string, err error) { profileState := &objectcache.ProfileState{ Completion: "failed", diff --git a/pkg/objectcache/applicationprofilecache/applicationprofilecache_test.go b/pkg/objectcache/applicationprofilecache/applicationprofilecache_test.go index 7ce56181c..6a89edcb2 100644 --- a/pkg/objectcache/applicationprofilecache/applicationprofilecache_test.go +++ b/pkg/objectcache/applicationprofilecache/applicationprofilecache_test.go @@ -77,7 +77,7 @@ func TestPagination(t *testing.T) { spy := &SpyProfileClient{Profiles: profiles} // mock k8s object cache is irrelevant since we inject container info directly - cache := NewApplicationProfileCache(config.Config{}, spy, nil) + cache := NewApplicationProfileCache(config.Config{}, spy, nil, nil) // Inject a container so that "default" namespace is processed. // The WorkloadID needs to match something if we want deeper logic to run, diff --git a/pkg/objectcache/networkneighborhoodcache/networkneighborhoodcache.go b/pkg/objectcache/networkneighborhoodcache/networkneighborhoodcache.go index 394bb4485..a845a549f 100644 --- a/pkg/objectcache/networkneighborhoodcache/networkneighborhoodcache.go +++ b/pkg/objectcache/networkneighborhoodcache/networkneighborhoodcache.go @@ -14,9 +14,12 @@ import ( "github.com/kubescape/go-logger" "github.com/kubescape/go-logger/helpers" helpersv1 "github.com/kubescape/k8s-interface/instanceidhandler/v1/helpers" + "github.com/armosec/armoapi-go/armotypes" "github.com/kubescape/node-agent/pkg/config" + "github.com/kubescape/node-agent/pkg/exporters" "github.com/kubescape/node-agent/pkg/objectcache" "github.com/kubescape/node-agent/pkg/resourcelocks" + "github.com/kubescape/node-agent/pkg/rulemanager/types" "github.com/kubescape/node-agent/pkg/signature" "github.com/kubescape/node-agent/pkg/signature/profiles" "github.com/kubescape/node-agent/pkg/storage" @@ -44,6 +47,7 @@ type NetworkNeighborhoodCacheImpl struct { networkNeighborhoodToUserManagedIdentifier maps.SafeMap[string, string] // networkNeighborhoodName -> user-managed profile unique identifier storageClient storage.ProfileClient k8sObjectCache objectcache.K8sObjectCache + exporter exporters.Exporter // Exporter for sending tamper detection alerts updateInterval time.Duration updateInProgress bool // Flag to track if update is in progress updateMutex sync.Mutex // Mutex to protect the flag @@ -51,7 +55,7 @@ type NetworkNeighborhoodCacheImpl struct { } // NewNetworkNeighborhoodCache creates a new network neighborhood cache with periodic updates -func NewNetworkNeighborhoodCache(cfg config.Config, storageClient storage.ProfileClient, k8sObjectCache objectcache.K8sObjectCache) *NetworkNeighborhoodCacheImpl { +func NewNetworkNeighborhoodCache(cfg config.Config, storageClient storage.ProfileClient, k8sObjectCache objectcache.K8sObjectCache, exporter exporters.Exporter) *NetworkNeighborhoodCacheImpl { updateInterval := utils.AddJitter(cfg.ProfilesCacheRefreshRate, 10) // Add 10% jitter to avoid high load on the storage nnc := &NetworkNeighborhoodCacheImpl{ @@ -62,6 +66,7 @@ func NewNetworkNeighborhoodCache(cfg config.Config, storageClient storage.Profil networkNeighborhoodToUserManagedIdentifier: maps.SafeMap[string, string]{}, storageClient: storageClient, k8sObjectCache: k8sObjectCache, + exporter: exporter, updateInterval: updateInterval, containerLocks: resourcelocks.New(), } @@ -248,19 +253,12 @@ func (nnc *NetworkNeighborhoodCacheImpl) updateAllNetworkNeighborhoods(ctx conte continue } - // Verify signature if enabled - if nnc.cfg.EnableSignatureVerification { - adapter := profiles.NewNetworkNeighborhoodAdapter(fullNN) - if err := signature.VerifyObjectStrict(adapter); err != nil { - logger.L().Warning("network neighborhood signature verification failed, skipping", - helpers.String("workloadID", workloadID), - helpers.String("namespace", namespace), - helpers.String("name", fullNN.Name), - helpers.Error(err)) - profileState.Error = fmt.Errorf("signature verification failed: %w", err) - nnc.workloadIDToProfileState.Set(workloadID, profileState) - continue - } + // Verify signature — always check signed NNs for tamper (R1016), + // enforcement mode only controls whether tampered NNs are loaded. + if err := nnc.verifyNetworkNeighborhood(fullNN, workloadID); err != nil { + profileState.Error = fmt.Errorf("signature verification failed: %w", err) + nnc.workloadIDToProfileState.Set(workloadID, profileState) + continue } nnc.workloadIDToNetworkNeighborhood.Set(workloadID, fullNN) @@ -335,47 +333,31 @@ func (nnc *NetworkNeighborhoodCacheImpl) handleUserManagedNetworkNeighborhood(nn } // Verify signature on the original network neighborhood before merging - if nnc.cfg.EnableSignatureVerification { - adapter := profiles.NewNetworkNeighborhoodAdapter(originalNN) - if err := signature.VerifyObjectStrict(adapter); err != nil { - logger.L().Warning("original network neighborhood signature verification failed, skipping merge", - helpers.String("workloadID", toMerge.wlid), - helpers.String("namespace", originalNN.Namespace), - helpers.String("name", originalNN.Name), - helpers.Error(err)) - profileState := &objectcache.ProfileState{ - Completion: originalNN.Annotations[helpersv1.CompletionMetadataKey], - Status: originalNN.Annotations[helpersv1.StatusMetadataKey], - Name: originalNN.Name, - Error: fmt.Errorf("signature verification failed: %w", err), - } - nnc.workloadIDToProfileState.Set(toMerge.wlid, profileState) - // Evict stale merged profile from cache on verification failure - nnc.workloadIDToNetworkNeighborhood.Delete(toMerge.wlid) - return - } + if err := nnc.verifyNetworkNeighborhood(originalNN, toMerge.wlid); err != nil { + profileState := &objectcache.ProfileState{ + Completion: originalNN.Annotations[helpersv1.CompletionMetadataKey], + Status: originalNN.Annotations[helpersv1.StatusMetadataKey], + Name: originalNN.Name, + Error: fmt.Errorf("signature verification failed: %w", err), + } + nnc.workloadIDToProfileState.Set(toMerge.wlid, profileState) + // Evict stale merged profile from cache on verification failure + nnc.workloadIDToNetworkNeighborhood.Delete(toMerge.wlid) + return } // Verify signature on the user-managed network neighborhood before merging - if nnc.cfg.EnableSignatureVerification { - adapter := profiles.NewNetworkNeighborhoodAdapter(fullUserNN) - if err := signature.VerifyObjectStrict(adapter); err != nil { - logger.L().Warning("user-managed network neighborhood signature verification failed, skipping merge", - helpers.String("workloadID", toMerge.wlid), - helpers.String("namespace", fullUserNN.Namespace), - helpers.String("name", fullUserNN.Name), - helpers.Error(err)) - profileState := &objectcache.ProfileState{ - Completion: fullUserNN.Annotations[helpersv1.CompletionMetadataKey], - Status: fullUserNN.Annotations[helpersv1.StatusMetadataKey], - Name: fullUserNN.Name, - Error: fmt.Errorf("signature verification failed: %w", err), - } - nnc.workloadIDToProfileState.Set(toMerge.wlid, profileState) - // Restore cache to originalNN on user-managed verification failure - nnc.workloadIDToNetworkNeighborhood.Set(toMerge.wlid, originalNN) - return - } + if err := nnc.verifyNetworkNeighborhood(fullUserNN, toMerge.wlid); err != nil { + profileState := &objectcache.ProfileState{ + Completion: fullUserNN.Annotations[helpersv1.CompletionMetadataKey], + Status: fullUserNN.Annotations[helpersv1.StatusMetadataKey], + Name: fullUserNN.Name, + Error: fmt.Errorf("signature verification failed: %w", err), + } + nnc.workloadIDToProfileState.Set(toMerge.wlid, profileState) + // Restore cache to originalNN on user-managed verification failure + nnc.workloadIDToNetworkNeighborhood.Set(toMerge.wlid, originalNN) + return } // Merge the network neighborhoods @@ -848,6 +830,84 @@ func (nnc *NetworkNeighborhoodCacheImpl) workloadHasUserDefinedNetwork(workloadI return found } +// verifyNetworkNeighborhood verifies the NN signature. +// Always checks signed NNs for tamper (emits R1016 alert on tamper). +// When EnableSignatureVerification is true, also rejects tampered/unsigned NNs. +// Returns error if the NN should not be loaded, nil otherwise. +func (nnc *NetworkNeighborhoodCacheImpl) verifyNetworkNeighborhood(nn *v1beta1.NetworkNeighborhood, workloadID string) error { + adapter := profiles.NewNetworkNeighborhoodAdapter(nn) + + // Always check signed NNs for tamper, regardless of enforcement setting + if signature.IsSigned(adapter) { + if err := signature.VerifyObjectStrict(adapter); err != nil { + logger.L().Warning("network neighborhood signature verification failed (tamper detected)", + helpers.String("name", nn.Name), + helpers.String("namespace", nn.Namespace), + helpers.String("workloadID", workloadID), + helpers.Error(err)) + + // Emit R1016 tamper alert + nnc.emitTamperAlert(nn.Name, nn.Namespace, workloadID, "NetworkNeighborhood", err) + + if nnc.cfg.EnableSignatureVerification { + return err + } + // Enforcement off: allow loading despite tamper + return nil + } + return nil + } + + // Not signed + if nnc.cfg.EnableSignatureVerification { + return fmt.Errorf("network neighborhood is not signed") + } + return nil +} + +// emitTamperAlert sends an R1016 "Signed profile tampered" alert via the exporter. +func (nnc *NetworkNeighborhoodCacheImpl) emitTamperAlert(nnName, namespace, workloadID, objectKind string, verifyErr error) { + if nnc.exporter == nil { + return + } + + ruleFailure := &types.GenericRuleFailure{ + BaseRuntimeAlert: armotypes.BaseRuntimeAlert{ + AlertName: "Signed profile tampered", + InfectedPID: 1, + Severity: 10, + FixSuggestions: "Investigate who modified the " + objectKind + " '" + nnName + "' in namespace '" + namespace + "'. Re-sign the profile after verifying its contents.", + }, + AlertType: armotypes.AlertTypeRule, + RuntimeProcessDetails: armotypes.ProcessTree{ + ProcessTree: armotypes.Process{ + PID: 1, + Comm: "node-agent", + }, + }, + RuleAlert: armotypes.RuleAlert{ + RuleDescription: fmt.Sprintf("Signed %s '%s' in namespace '%s' has been tampered with: %v", objectKind, nnName, namespace, verifyErr), + }, + RuntimeAlertK8sDetails: armotypes.RuntimeAlertK8sDetails{ + Namespace: namespace, + }, + RuleID: "R1016", + } + + // Populate workload details from workloadID if available + ruleFailure.SetWorkloadDetails(extractWlidFromWorkloadID(workloadID)) + + nnc.exporter.SendRuleAlert(ruleFailure) +} + +// extractWlidFromWorkloadID extracts the wlid part from a "wlid/templateHash" key. +func extractWlidFromWorkloadID(workloadID string) string { + if idx := strings.LastIndex(workloadID, "/"); idx > 0 { + return workloadID[:idx] + } + return workloadID +} + func isUserManagedNN(nn *v1beta1.NetworkNeighborhood) bool { return nn.Annotations != nil && nn.Annotations[helpersv1.ManagedByMetadataKey] == helpersv1.ManagedByUserValue && diff --git a/pkg/objectcache/networkneighborhoodcache/networkneighborhoodcache_test.go b/pkg/objectcache/networkneighborhoodcache/networkneighborhoodcache_test.go index f2714141c..47ea2097e 100644 --- a/pkg/objectcache/networkneighborhoodcache/networkneighborhoodcache_test.go +++ b/pkg/objectcache/networkneighborhoodcache/networkneighborhoodcache_test.go @@ -80,7 +80,7 @@ func TestPagination(t *testing.T) { spy := &SpyProfileClient{NetworkNeighborhoods: items} - cache := NewNetworkNeighborhoodCache(config.Config{}, spy, nil) + cache := NewNetworkNeighborhoodCache(config.Config{}, spy, nil, nil) // Inject a container so that "default" namespace is processed. cache.containerIDToInfo.Set("test-container", &ContainerInfo{ diff --git a/tests/chart/templates/node-agent/default-rules.yaml b/tests/chart/templates/node-agent/default-rules.yaml index 1a545524c..9dfd4b2ac 100644 --- a/tests/chart/templates/node-agent/default-rules.yaml +++ b/tests/chart/templates/node-agent/default-rules.yaml @@ -583,3 +583,21 @@ spec: - "syscalls" - "io_uring" - "applicationprofile" + - name: "Signed profile tampered" + enabled: true + id: "R1016" + description: "Detects when a previously signed ApplicationProfile or NetworkNeighborhood has been tampered with (signature no longer valid)." + expressions: + message: "'Signed profile tampered'" + uniqueId: "'R1016'" + ruleExpression: [] + profileDependency: 2 + severity: 10 + supportPolicy: false + isTriggerAlert: false + mitreTactic: "TA0005" + mitreTechnique: "T1565" + tags: + - "integrity" + - "signature" + - "tamper" diff --git a/tests/component_test.go b/tests/component_test.go index 78cdf1548..2d1b78dc9 100644 --- a/tests/component_test.go +++ b/tests/component_test.go @@ -3018,3 +3018,132 @@ func Test_30_TamperedSignedProfiles(t *testing.T) { t.Log(" the tampered profile would be silently rejected. No R-number fires for tampering.") }) } + +// Test_31_TamperDetectionAlert verifies that R1016 fires when a previously +// signed ApplicationProfile has been tampered with (signature is stale). +// +// This test proves the new tamper-detection alerting: +// - Sign an AP, push to storage +// - Tamper the AP in storage (modify spec, keep stale signature annotations) +// - Deploy a pod referencing the tampered profile +// - R1016 "Signed profile tampered" must fire +// +// R1016 fires regardless of enableSignatureVerification setting. +// The detection happens in the AP cache when it loads the profile. +func Test_31_TamperDetectionAlert(t *testing.T) { + start := time.Now() + defer tearDownTest(t, start) + + ns := testutils.NewRandomNamespace() + k8sClient := k8sinterface.NewKubernetesApi() + storageClient := spdxv1beta1client.NewForConfigOrDie(k8sClient.K8SConfig) + + // ── 1. Build and sign an ApplicationProfile ── + ap := &v1beta1.ApplicationProfile{ + ObjectMeta: metav1.ObjectMeta{ + Name: "signed-ap", + Namespace: ns.Name, + }, + Spec: v1beta1.ApplicationProfileSpec{ + Containers: []v1beta1.ApplicationProfileContainer{ + { + Name: "curl", + Execs: []v1beta1.ExecCalls{ + {Path: "/bin/sleep"}, + {Path: "/usr/bin/curl"}, + }, + Syscalls: []string{"socket", "connect", "read", "write", "close", "openat"}, + }, + }, + }, + } + + apAdapter := profiles.NewApplicationProfileAdapter(ap) + require.NoError(t, signature.SignObjectDisableKeyless(apAdapter), "sign AP") + require.True(t, signature.IsSigned(apAdapter), "AP must be signed") + require.NoError(t, signature.VerifyObjectAllowUntrusted(apAdapter), + "signature must verify immediately after signing") + t.Log("AP signed successfully") + + // ── 2. Tamper the AP (add unauthorized exec path) ── + ap.Spec.Containers[0].Execs = append(ap.Spec.Containers[0].Execs, + v1beta1.ExecCalls{Path: "/usr/bin/nslookup"}) + + // Verify the signature is now invalid + tamperedAdapter := profiles.NewApplicationProfileAdapter(ap) + require.Error(t, signature.VerifyObjectAllowUntrusted(tamperedAdapter), + "tampered AP must fail verification") + require.True(t, signature.IsSigned(tamperedAdapter), + "tampered AP must still have signature annotations (stale)") + t.Log("AP tampered — signature is stale") + + // ── 3. Push tampered AP to storage ── + _, err := storageClient.ApplicationProfiles(ns.Name).Create( + context.Background(), ap, metav1.CreateOptions{}) + require.NoError(t, err, "push tampered AP to storage") + + // Verify it's stored with stale signature + require.Eventually(t, func() bool { + stored, getErr := storageClient.ApplicationProfiles(ns.Name).Get( + context.Background(), "signed-ap", v1.GetOptions{}) + if getErr != nil { + return false + } + storedAdapter := profiles.NewApplicationProfileAdapter(stored) + return signature.IsSigned(storedAdapter) && + signature.VerifyObjectAllowUntrusted(storedAdapter) != nil + }, 30*time.Second, 1*time.Second, "stored AP must have stale signature") + t.Log("Tampered AP stored with stale signature") + + // ── 4. Deploy pod referencing the tampered profile ── + wl, err := testutils.NewTestWorkload(ns.Name, + path.Join(utils.CurrentDir(), "resources/curl-signed-deployment.yaml")) + require.NoError(t, err) + require.NoError(t, wl.WaitForReady(80)) + t.Log("Pod deployed, waiting for cache to detect tamper...") + + // ── 5. Wait for R1016 "Signed profile tampered" alert ── + // The AP cache's addContainer or periodicUpdate will detect the tampered + // signature and emit R1016 via the exporter. + var alerts []testutils.Alert + require.Eventually(t, func() bool { + alerts, err = testutils.GetAlerts(ns.Name) + if err != nil || len(alerts) == 0 { + return false + } + for _, a := range alerts { + if a.Labels["rule_id"] == "R1016" { + return true + } + } + return false + }, 120*time.Second, 5*time.Second, "R1016 must fire for tampered signed AP") + + // ── 6. Log all alerts for debugging ── + time.Sleep(5 * time.Second) + alerts, _ = testutils.GetAlerts(ns.Name) + + t.Logf("=== %d alerts ===", len(alerts)) + for i, a := range alerts { + t.Logf(" [%d] %s(%s) comm=%s container=%s", + i, a.Labels["rule_name"], a.Labels["rule_id"], + a.Labels["comm"], a.Labels["container_name"]) + } + + // Verify R1016 alert details + r1016Count := 0 + for _, a := range alerts { + if a.Labels["rule_id"] == "R1016" { + r1016Count++ + assert.Equal(t, "Signed profile tampered", a.Labels["rule_name"], + "R1016 alert must have correct rule name") + assert.Equal(t, ns.Name, a.Labels["namespace"], + "R1016 alert must have correct namespace") + t.Logf("R1016 alert: rule_name=%s namespace=%s severity=%s", + a.Labels["rule_name"], a.Labels["namespace"], a.Labels["severity"]) + } + } + require.Greater(t, r1016Count, 0, + "R1016 must fire — proves tamper detection alerting works") + t.Log("Tamper detection alerting verified successfully") +} From 995a7bcb8209e9e43607aa5539062e4ed1650a5d Mon Sep 17 00:00:00 2001 From: entlein Date: Sat, 14 Mar 2026 22:15:31 +0100 Subject: [PATCH 4/5] We seem to have lost some profiles or not synched them into cache Signed-off-by: entlein --- .../applicationprofilecache.go | 6 +++++ pkg/rulemanager/rule_manager.go | 26 ++++++++++++++----- 2 files changed, 26 insertions(+), 6 deletions(-) diff --git a/pkg/objectcache/applicationprofilecache/applicationprofilecache.go b/pkg/objectcache/applicationprofilecache/applicationprofilecache.go index 7f59dc401..662695e62 100644 --- a/pkg/objectcache/applicationprofilecache/applicationprofilecache.go +++ b/pkg/objectcache/applicationprofilecache/applicationprofilecache.go @@ -670,6 +670,12 @@ func (apc *ApplicationProfileCacheImpl) addContainer(container *containercollect // Update the profile in the cache apc.workloadIDToProfile.Set(workloadID, fullProfile) + profileState := &objectcache.ProfileState{ + Completion: fullProfile.Annotations[helpersv1.CompletionMetadataKey], + Status: fullProfile.Annotations[helpersv1.StatusMetadataKey], + Name: fullProfile.Name, + } + apc.workloadIDToProfileState.Set(workloadID, profileState) logger.L().Debug("added user-defined profile to cache", helpers.String("containerID", containerID), helpers.String("workloadID", workloadID), diff --git a/pkg/rulemanager/rule_manager.go b/pkg/rulemanager/rule_manager.go index f08cb8bfa..70f7464f1 100644 --- a/pkg/rulemanager/rule_manager.go +++ b/pkg/rulemanager/rule_manager.go @@ -4,6 +4,7 @@ import ( "context" "crypto/md5" "fmt" + "slices" "time" "github.com/armosec/armoapi-go/armotypes" @@ -148,7 +149,6 @@ func (rm *RuleManager) startRuleManager(container *containercollection.Container func (rm *RuleManager) ReportEnrichedEvent(enrichedEvent *events.EnrichedEvent) { rm.enrichEventWithContext(enrichedEvent) - var profileExists bool var details string namespace := enrichedEvent.Event.GetNamespace() pod := enrichedEvent.Event.GetPod() @@ -195,7 +195,9 @@ func (rm *RuleManager) ReportEnrichedEvent(enrichedEvent *events.EnrichedEvent) } _, apChecksum, err := profilehelper.GetContainerApplicationProfile(rm.objectCache, enrichedEvent.ContainerID) - profileExists = err == nil + apExists := err == nil + + nnExists := rm.objectCache.NetworkNeighborhoodCache().GetNetworkNeighborhood(enrichedEvent.ContainerID) != nil // Early exit if monitoring is disabled for this context - skip rule evaluation if !rm.isMonitoringEnabledForContext(enrichedEvent.SourceContext) { @@ -211,10 +213,22 @@ func (rm *RuleManager) ReportEnrichedEvent(enrichedEvent *events.EnrichedEvent) if !RuleAppliesToContext(&rule, enrichedEvent.SourceContext) { continue } - // Skip profile dependency checks for non-K8s contexts (profiles are K8s-specific) - // Only K8s contexts should enforce profile dependencies - if isK8sContext && !profileExists && rule.ProfileDependency == armotypes.Required { - continue + // Skip rules whose required profile is not available. + // Rules tagged "networkprofile" depend on the NN; rules tagged + // "applicationprofile" depend on the AP. Check the correct one. + if isK8sContext && rule.ProfileDependency == armotypes.Required { + needsNN := slices.Contains(rule.Tags, types.NetworkProfile) + needsAP := slices.Contains(rule.Tags, types.ApplicationProfile) + if needsNN && !nnExists { + continue + } + if needsAP && !apExists { + continue + } + // Legacy fallback: rules without a profile-type tag use AP + if !needsNN && !needsAP && !apExists { + continue + } } ruleExpressions := rm.getRuleExpressions(rule, eventType) From 0cd1992a56ee19634e97832bff02e7908570520b Mon Sep 17 00:00:00 2001 From: entlein Date: Sun, 15 Mar 2026 09:53:06 +0100 Subject: [PATCH 5/5] Profile state cache: new error message Signed-off-by: entlein --- .../applicationprofilecache.go | 11 ++--- .../networkneighborhoodcache.go | 11 ++--- pkg/rulemanager/rule_manager.go | 26 +++--------- tests/component_test.go | 40 +++++++++---------- 4 files changed, 38 insertions(+), 50 deletions(-) diff --git a/pkg/objectcache/applicationprofilecache/applicationprofilecache.go b/pkg/objectcache/applicationprofilecache/applicationprofilecache.go index 662695e62..ffcd145fb 100644 --- a/pkg/objectcache/applicationprofilecache/applicationprofilecache.go +++ b/pkg/objectcache/applicationprofilecache/applicationprofilecache.go @@ -620,7 +620,9 @@ func (apc *ApplicationProfileCacheImpl) addContainer(container *containercollect } } } else { - apc.workloadIDToProfileState.Set(workloadID, nil) + apc.workloadIDToProfileState.Set(workloadID, &objectcache.ProfileState{ + Error: fmt.Errorf("waiting for profile update"), + }) } // Create container info @@ -851,10 +853,9 @@ func (apc *ApplicationProfileCacheImpl) GetApplicationProfileState(containerID s if profileState, exists := apc.workloadIDToProfileState.Load(workloadID); exists { if profileState != nil { return profileState - } else { - return &objectcache.ProfileState{ - Error: fmt.Errorf("profile state not available - shouldn't happen"), - } + } + return &objectcache.ProfileState{ + Error: fmt.Errorf("application profile state is nil for workload %s", workloadID), } } diff --git a/pkg/objectcache/networkneighborhoodcache/networkneighborhoodcache.go b/pkg/objectcache/networkneighborhoodcache/networkneighborhoodcache.go index a845a549f..6efd6042a 100644 --- a/pkg/objectcache/networkneighborhoodcache/networkneighborhoodcache.go +++ b/pkg/objectcache/networkneighborhoodcache/networkneighborhoodcache.go @@ -524,7 +524,9 @@ func (nnc *NetworkNeighborhoodCacheImpl) addContainer(container *containercollec // Create workload ID to state mapping if _, exists := nnc.workloadIDToProfileState.Load(workloadID); !exists { - nnc.workloadIDToProfileState.Set(workloadID, nil) + nnc.workloadIDToProfileState.Set(workloadID, &objectcache.ProfileState{ + Error: fmt.Errorf("waiting for profile update"), + }) } logger.L().Debug("container added to cache", @@ -635,10 +637,9 @@ func (nnc *NetworkNeighborhoodCacheImpl) GetNetworkNeighborhoodState(containerID if profileState, exists := nnc.workloadIDToProfileState.Load(workloadID); exists { if profileState != nil { return profileState - } else { - return &objectcache.ProfileState{ - Error: fmt.Errorf("profile state not available - shouldn't happen"), - } + } + return &objectcache.ProfileState{ + Error: fmt.Errorf("network neighborhood state is nil for workload %s", workloadID), } } diff --git a/pkg/rulemanager/rule_manager.go b/pkg/rulemanager/rule_manager.go index 70f7464f1..f08cb8bfa 100644 --- a/pkg/rulemanager/rule_manager.go +++ b/pkg/rulemanager/rule_manager.go @@ -4,7 +4,6 @@ import ( "context" "crypto/md5" "fmt" - "slices" "time" "github.com/armosec/armoapi-go/armotypes" @@ -149,6 +148,7 @@ func (rm *RuleManager) startRuleManager(container *containercollection.Container func (rm *RuleManager) ReportEnrichedEvent(enrichedEvent *events.EnrichedEvent) { rm.enrichEventWithContext(enrichedEvent) + var profileExists bool var details string namespace := enrichedEvent.Event.GetNamespace() pod := enrichedEvent.Event.GetPod() @@ -195,9 +195,7 @@ func (rm *RuleManager) ReportEnrichedEvent(enrichedEvent *events.EnrichedEvent) } _, apChecksum, err := profilehelper.GetContainerApplicationProfile(rm.objectCache, enrichedEvent.ContainerID) - apExists := err == nil - - nnExists := rm.objectCache.NetworkNeighborhoodCache().GetNetworkNeighborhood(enrichedEvent.ContainerID) != nil + profileExists = err == nil // Early exit if monitoring is disabled for this context - skip rule evaluation if !rm.isMonitoringEnabledForContext(enrichedEvent.SourceContext) { @@ -213,22 +211,10 @@ func (rm *RuleManager) ReportEnrichedEvent(enrichedEvent *events.EnrichedEvent) if !RuleAppliesToContext(&rule, enrichedEvent.SourceContext) { continue } - // Skip rules whose required profile is not available. - // Rules tagged "networkprofile" depend on the NN; rules tagged - // "applicationprofile" depend on the AP. Check the correct one. - if isK8sContext && rule.ProfileDependency == armotypes.Required { - needsNN := slices.Contains(rule.Tags, types.NetworkProfile) - needsAP := slices.Contains(rule.Tags, types.ApplicationProfile) - if needsNN && !nnExists { - continue - } - if needsAP && !apExists { - continue - } - // Legacy fallback: rules without a profile-type tag use AP - if !needsNN && !needsAP && !apExists { - continue - } + // Skip profile dependency checks for non-K8s contexts (profiles are K8s-specific) + // Only K8s contexts should enforce profile dependencies + if isK8sContext && !profileExists && rule.ProfileDependency == armotypes.Required { + continue } ruleExpressions := rm.getRuleExpressions(rule, eventType) diff --git a/tests/component_test.go b/tests/component_test.go index 2d1b78dc9..9c0f8e14a 100644 --- a/tests/component_test.go +++ b/tests/component_test.go @@ -2385,17 +2385,15 @@ func Test_28_UserDefinedNetworkNeighborhood(t *testing.T) { // Poisons CoreDNS so fusioncore.ai resolves to 8.8.4.4 // instead of the legitimate 162.0.217.171. // - // nslookup triggers the poisoned DNS response. BusyBox - // nslookup also does a PTR reverse-lookup on the result IP; - // the PTR domain (4.4.8.8.in-addr.arpa.) is NOT in the NN, - // so R0005 (DNS Anomalies) fires. + // nslookup triggers the poisoned DNS response. + // R0005 does NOT fire: fusioncore.ai is in the NN egress + // list and BusyBox nslookup does NOT do PTR reverse-lookups. + // R0011 does NOT fire: no TCP egress (DNS is UDP to cluster + // DNS which is a private IP filtered by is_private_ip). // - // No TCP egress occurs (nslookup is DNS-only, and the UDP - // DNS query goes to the cluster DNS service which is a - // private IP filtered by R0011), so R0011 does NOT fire. - // - // This documents a detection gap: DNS MITM is visible only - // via the PTR side-channel in R0005, not via R0011. + // This documents a detection gap: pure DNS MITM (without + // subsequent TCP to the spoofed IP) is invisible to both + // R0005 and R0011 when the domain is already whitelisted. // // NOTE: this subtest MUST run last — it modifies the // cluster-wide CoreDNS configmap. @@ -2486,7 +2484,7 @@ func Test_28_UserDefinedNetworkNeighborhood(t *testing.T) { // ── Trigger alerts ── // nslookup does DNS only (no TCP egress). - // BusyBox nslookup does a PTR reverse-lookup on the result IP. + // BusyBox nslookup does NOT do PTR reverse-lookups on result IPs. stdout, stderr, err := wl.ExecIntoPod([]string{"nslookup", "fusioncore.ai"}, "curl") t.Logf("nslookup (poisoned) → err=%v stdout=%q stderr=%q", err, stdout, stderr) @@ -2494,10 +2492,11 @@ func Test_28_UserDefinedNetworkNeighborhood(t *testing.T) { t.Logf("=== %d alerts ===", len(alerts)) logAlerts(t, alerts) - // R0005 fires: the PTR reverse-lookup on the spoofed IP - // (4.4.8.8.in-addr.arpa.) is NOT in the NN. - require.Greater(t, countByRule(alerts, "R0005"), 0, - "DNS MITM: PTR reverse-lookup on spoofed IP must fire R0005") + // R0005 does NOT fire: fusioncore.ai is already in the NN + // egress list, and BusyBox nslookup does NOT perform PTR + // reverse-lookups on result IPs, so no unknown domain is queried. + assert.Equal(t, 0, countByRule(alerts, "R0005"), + "DNS MITM: domain is in NN and no PTR lookup — R0005 should not fire") // R0011 does NOT fire: nslookup generates only DNS (UDP) // traffic to the cluster DNS service, which is a private IP @@ -2514,7 +2513,7 @@ func Test_28_UserDefinedNetworkNeighborhood(t *testing.T) { // to the spoofed IP. // // Expected: - // R0005 fires — PTR reverse-lookup on the spoofed IP. + // R0005 = 0 — domain is in NN, no PTR reverse-lookup. // R0011 fires — TCP egress to 128.130.194.56 which is // NOT in the NN (NN only has 162.0.217.171). // @@ -2615,10 +2614,11 @@ func Test_28_UserDefinedNetworkNeighborhood(t *testing.T) { t.Logf("=== %d alerts ===", len(alerts)) logAlerts(t, alerts) - // R0005 fires: fusioncore.ai is in the NN but the PTR - // reverse-lookup on the spoofed IP is not. - require.Greater(t, countByRule(alerts, "R0005"), 0, - "DNS MITM: PTR reverse-lookup on spoofed IP must fire R0005") + // R0005 does NOT fire: fusioncore.ai is already in the NN + // egress list, and curl (like BusyBox nslookup) does NOT + // perform PTR reverse-lookups on resolved IPs. + assert.Equal(t, 0, countByRule(alerts, "R0005"), + "DNS MITM: domain is in NN and no PTR lookup — R0005 should not fire") // R0011 fires: TCP egress to 128.130.194.56 which is NOT // in the NN (NN only allows 162.0.217.171).