diff --git a/config/rbac/role.yaml b/config/rbac/role.yaml index ee707ce7..8b1da08e 100644 --- a/config/rbac/role.yaml +++ b/config/rbac/role.yaml @@ -8,6 +8,14 @@ rules: - "" resources: - configmaps + verbs: + - get + - list + - update + - watch +- apiGroups: + - "" + resources: - endpoints - events - limitranges @@ -18,8 +26,6 @@ rules: - pods - replicationcontrollers - resourcequotas - - serviceaccounts - - services verbs: - get - list @@ -32,6 +38,18 @@ rules: - pods/status verbs: - get +- apiGroups: + - "" + resources: + - serviceaccounts + - services + verbs: + - create + - get + - list + - patch + - update + - watch - apiGroups: - apiextensions.k8s.io resources: @@ -50,17 +68,28 @@ rules: - list - patch - update + - watch - apiGroups: - apps resources: - daemonsets - - deployments - replicasets - statefulsets verbs: - get - list - watch +- apiGroups: + - apps + resources: + - deployments + verbs: + - create + - get + - list + - patch + - update + - watch - apiGroups: - argoproj.io resources: @@ -224,7 +253,6 @@ rules: resources: - clusterrolebindings - clusterroles - - role - rolebindings verbs: - create diff --git a/dist/backend-install.yaml b/dist/backend-install.yaml index bad5390d..9cab93d2 100644 --- a/dist/backend-install.yaml +++ b/dist/backend-install.yaml @@ -913,6 +913,14 @@ rules: - "" resources: - configmaps + verbs: + - get + - list + - update + - watch + - apiGroups: + - "" + resources: - endpoints - events - limitranges @@ -923,8 +931,6 @@ rules: - pods - replicationcontrollers - resourcequotas - - serviceaccounts - - services verbs: - get - list @@ -937,6 +943,18 @@ rules: - pods/status verbs: - get + - apiGroups: + - "" + resources: + - serviceaccounts + - services + verbs: + - create + - get + - list + - patch + - update + - watch - apiGroups: - apiextensions.k8s.io resources: @@ -955,17 +973,28 @@ rules: - list - patch - update + - watch - apiGroups: - apps resources: - daemonsets - - deployments - replicasets - statefulsets verbs: - get - list - watch + - apiGroups: + - apps + resources: + - deployments + verbs: + - create + - get + - list + - patch + - update + - watch - apiGroups: - argoproj.io resources: @@ -1129,7 +1158,6 @@ rules: resources: - clusterrolebindings - clusterroles - - role - rolebindings verbs: - create diff --git a/dist/install.yaml b/dist/install.yaml index 7144916f..9c0ae89d 100644 --- a/dist/install.yaml +++ b/dist/install.yaml @@ -918,6 +918,14 @@ rules: - "" resources: - configmaps + verbs: + - get + - list + - update + - watch +- apiGroups: + - "" + resources: - endpoints - events - limitranges @@ -928,8 +936,6 @@ rules: - pods - replicationcontrollers - resourcequotas - - serviceaccounts - - services verbs: - get - list @@ -942,6 +948,18 @@ rules: - pods/status verbs: - get +- apiGroups: + - "" + resources: + - serviceaccounts + - services + verbs: + - create + - get + - list + - patch + - update + - watch - apiGroups: - apiextensions.k8s.io resources: @@ -960,17 +978,28 @@ rules: - list - patch - update + - watch - apiGroups: - apps resources: - daemonsets - - deployments - replicasets - statefulsets verbs: - get - list - watch +- apiGroups: + - apps + resources: + - deployments + verbs: + - create + - get + - list + - patch + - update + - watch - apiGroups: - argoproj.io resources: @@ -1134,7 +1163,6 @@ rules: resources: - clusterrolebindings - clusterroles - - role - rolebindings verbs: - create diff --git a/dist/installer_updater.yaml b/dist/installer_updater.yaml index 6d9a9144..951d6520 100644 --- a/dist/installer_updater.yaml +++ b/dist/installer_updater.yaml @@ -913,6 +913,14 @@ rules: - "" resources: - configmaps + verbs: + - get + - list + - update + - watch + - apiGroups: + - "" + resources: - endpoints - events - limitranges @@ -923,8 +931,6 @@ rules: - pods - replicationcontrollers - resourcequotas - - serviceaccounts - - services verbs: - get - list @@ -937,6 +943,18 @@ rules: - pods/status verbs: - get + - apiGroups: + - "" + resources: + - serviceaccounts + - services + verbs: + - create + - get + - list + - patch + - update + - watch - apiGroups: - apiextensions.k8s.io resources: @@ -955,17 +973,28 @@ rules: - list - patch - update + - watch - apiGroups: - apps resources: - daemonsets - - deployments - replicasets - statefulsets verbs: - get - list - watch + - apiGroups: + - apps + resources: + - deployments + verbs: + - create + - get + - list + - patch + - update + - watch - apiGroups: - argoproj.io resources: @@ -1129,7 +1158,6 @@ rules: resources: - clusterrolebindings - clusterroles - - role - rolebindings verbs: - create diff --git a/dist/zxporter.yaml b/dist/zxporter.yaml index e65a7366..d8475d0a 100644 --- a/dist/zxporter.yaml +++ b/dist/zxporter.yaml @@ -146,6 +146,14 @@ rules: - "" resources: - configmaps + verbs: + - get + - list + - update + - watch +- apiGroups: + - "" + resources: - endpoints - events - limitranges @@ -156,8 +164,6 @@ rules: - pods - replicationcontrollers - resourcequotas - - serviceaccounts - - services verbs: - get - list @@ -170,6 +176,18 @@ rules: - pods/status verbs: - get +- apiGroups: + - "" + resources: + - serviceaccounts + - services + verbs: + - create + - get + - list + - patch + - update + - watch - apiGroups: - apiextensions.k8s.io resources: @@ -188,17 +206,28 @@ rules: - list - patch - update + - watch - apiGroups: - apps resources: - daemonsets - - deployments - replicasets - statefulsets verbs: - get - list - watch +- apiGroups: + - apps + resources: + - deployments + verbs: + - create + - get + - list + - patch + - update + - watch - apiGroups: - argoproj.io resources: @@ -362,7 +391,6 @@ rules: resources: - clusterrolebindings - clusterroles - - role - rolebindings verbs: - create diff --git a/docs/rbac.md b/docs/rbac.md new file mode 100644 index 00000000..2658d7e8 --- /dev/null +++ b/docs/rbac.md @@ -0,0 +1,337 @@ +# ZXporter Operator RBAC Documentation + +## Overview + +The ZXporter Operator is a comprehensive Kubernetes monitoring and data collection system that automatically gathers resource metrics, events, and configuration data from clusters. It features automatic metrics-server installation, extensive third-party integration support, and exports collected data to external analytics platforms. + +## Complete Permissions Matrix + +### Main Operator ClusterRole +| API Group | Resource | get | list | watch | create | update | patch | delete | Purpose | +|-----------|----------|-----|------|-------|--------|--------|-------|--------|---------| +| `devzero.io` | `collectionpolicies` | ✓ | ✓ | ✓ | ✓ | ✓ | ✓ | ✓ | CRD management | +| `devzero.io` | `collectionpolicies/status` | ✓ | | | | ✓ | ✓ | | Status updates | +| `devzero.io` | `collectionpolicies/finalizers` | | | | | ✓ | | | Cleanup coordination | +| `""` (core) | `serviceaccounts` | ✓ | ✓ | ✓ | ✓ | ✓ | ✓ | | Bootstrap metrics-server | +| `""` (core) | `services` | ✓ | ✓ | ✓ | ✓ | ✓ | ✓ | | Bootstrap metrics-server | +| `""` (core) | `configmaps` | ✓ | ✓ | ✓ | | ✓ | | | Token persistence | +| `""` (core) | `pods` | ✓ | ✓ | ✓ | | | | | Resource monitoring | +| `""` (core) | `pods/status` | ✓ | | | | | | | Pod metrics | +| `""` (core) | `nodes` | ✓ | ✓ | ✓ | | | | | Node monitoring | +| `""` (core) | `nodes/status` | ✓ | | | | | | | Node metrics | +| `""` (core) | `nodes/metrics` | ✓ | | | | | | | Metrics collection | +| `""` (core) | `namespaces` | ✓ | ✓ | ✓ | | | | | Namespace discovery | +| `""` (core) | `persistentvolumeclaims` | ✓ | ✓ | ✓ | | | | | Storage monitoring | +| `""` (core) | `persistentvolumes` | ✓ | ✓ | ✓ | | | | | Storage monitoring | +| `""` (core) | `events` | ✓ | ✓ | ✓ | | | | | Event collection | +| `""` (core) | `limitranges` | ✓ | ✓ | ✓ | | | | | Resource limits | +| `""` (core) | `resourcequotas` | ✓ | ✓ | ✓ | | | | | Quota monitoring | +| `""` (core) | `replicationcontrollers` | ✓ | ✓ | ✓ | | | | | Legacy workloads | +| `""` (core) | `endpoints` | ✓ | ✓ | ✓ | | | | | Service endpoints | +| `apps` | `deployments` | ✓ | ✓ | ✓ | ✓ | ✓ | ✓ | | Bootstrap + monitoring | +| `apps` | `statefulsets` | ✓ | ✓ | ✓ | | | | | Workload monitoring | +| `apps` | `daemonsets` | ✓ | ✓ | ✓ | | | | | Workload monitoring | +| `apps` | `replicasets` | ✓ | ✓ | ✓ | | | | | Workload monitoring | +| `batch` | `jobs` | ✓ | ✓ | ✓ | | | | | Job monitoring | +| `batch` | `cronjobs` | ✓ | ✓ | ✓ | | | | | Scheduled job monitoring | +| `metrics.k8s.io` | `nodes` | ✓ | ✓ | ✓ | | | | | Metrics collection | +| `metrics.k8s.io` | `pods` | ✓ | ✓ | ✓ | | | | | Metrics collection | +| `networking.k8s.io` | `ingresses` | ✓ | ✓ | ✓ | | | | | Network monitoring | +| `networking.k8s.io` | `networkpolicies` | ✓ | ✓ | ✓ | | | | | Security monitoring | +| `networking.k8s.io` | `ingressclasses` | ✓ | ✓ | ✓ | | | | | Ingress discovery | +| `autoscaling` | `horizontalpodautoscalers` | ✓ | ✓ | ✓ | | | | | HPA monitoring | +| `autoscaling.k8s.io` | `verticalpodautoscalers` | ✓ | ✓ | ✓ | | | | | VPA monitoring | +| `policy` | `poddisruptionbudgets` | ✓ | ✓ | ✓ | | | | | Policy monitoring | +| `storage.k8s.io` | `storageclasses` | ✓ | ✓ | ✓ | | | | | Storage monitoring | +| `storage.k8s.io` | `csinodes` | ✓ | ✓ | ✓ | | | | | CSI monitoring | +| `storage.k8s.io` | `csidrivers` | ✓ | ✓ | ✓ | | | | | CSI monitoring | +| `storage.k8s.io` | `csistoragecapacities` | ✓ | ✓ | ✓ | | | | | Storage capacity | +| `storage.k8s.io` | `volumeattachments` | ✓ | ✓ | ✓ | | | | | Volume monitoring | +| `rbac.authorization.k8s.io` | `clusterroles` | ✓ | ✓ | ✓ | ✓ | ✓ | ✓ | | Bootstrap + monitoring | +| `rbac.authorization.k8s.io` | `clusterrolebindings` | ✓ | ✓ | ✓ | ✓ | ✓ | ✓ | | Bootstrap + monitoring | +| `rbac.authorization.k8s.io` | `rolebindings` | ✓ | ✓ | ✓ | ✓ | ✓ | ✓ | | Bootstrap + monitoring | +| `rbac.authorization.k8s.io` | `roles` | ✓ | ✓ | ✓ | | | | | RBAC monitoring | +| `apiregistration.k8s.io` | `apiservices` | ✓ | ✓ | | ✓ | ✓ | ✓ | | Bootstrap metrics API | +| `apiextensions.k8s.io` | `customresourcedefinitions` | ✓ | ✓ | ✓ | | | | | CRD discovery | + +### Conditional Third-Party Permissions +| API Group | Resource | get | list | watch | create | update | patch | delete | Condition | +|-----------|----------|-----|------|-------|--------|--------|-------|--------|-----------| +| `karpenter.sh` | `provisioners` | ✓ | ✓ | ✓ | | | | | Karpenter installed | +| `karpenter.sh` | `machines` | ✓ | ✓ | ✓ | | | | | Karpenter installed | +| `karpenter.sh` | `nodepools` | ✓ | ✓ | ✓ | | | | | Karpenter installed | +| `karpenter.sh` | `nodeclaims` | ✓ | ✓ | ✓ | | | | | Karpenter installed | +| `karpenter.sh` | `nodeoverlays` | ✓ | ✓ | ✓ | | | | | Karpenter installed | +| `karpenter.k8s.aws` | `awsnodetemplates` | ✓ | ✓ | ✓ | | | | | Karpenter AWS | +| `karpenter.k8s.aws` | `ec2nodeclasses` | ✓ | ✓ | ✓ | | | | | Karpenter AWS | +| `karpenter.azure.com` | `aksnodeclasses` | ✓ | ✓ | ✓ | | | | | Karpenter Azure | +| `karpenter.k8s.oracle` | `ocinodeclasses` | ✓ | ✓ | ✓ | | | | | Karpenter Oracle | +| `karpenter.k8s.gcp` | `gcenodeclasses` | ✓ | ✓ | ✓ | | | | | Karpenter GCP | +| `keda.sh` | `scaledobjects` | ✓ | ✓ | ✓ | | | | | KEDA installed | +| `keda.sh` | `scaledjobs` | ✓ | ✓ | ✓ | | | | | KEDA installed | +| `keda.sh` | `triggerauthentications` | ✓ | ✓ | ✓ | | | | | KEDA installed | +| `keda.sh` | `clustertriggerauthentications` | ✓ | ✓ | ✓ | | | | | KEDA installed | +| `argoproj.io` | `rollouts` | ✓ | ✓ | ✓ | | | | | Argo Rollouts installed | +| `datadoghq.com` | `extendeddaemonsetreplicasets` | ✓ | ✓ | ✓ | | | | | Datadog operator installed | +| `kubeflow.org` | `notebooks` | ✓ | ✓ | ✓ | | | | | Kubeflow installed | + +### Leader Election Role +| API Group | Resource | get | list | watch | create | update | patch | delete | +|-----------|----------|-----|------|-------|--------|--------|-------|--------| +| `coordination.k8s.io` | `leases` | ✓ | ✓ | ✓ | ✓ | ✓ | ✓ | ✓ | +| `""` (core) | `configmaps` | ✓ | ✓ | ✓ | ✓ | ✓ | ✓ | ✓ | +| `""` (core) | `events` | | | | ✓ | | ✓ | | + +**Legend**: ✓ = Permission granted | Conditional = Only when specific CRDs detected + +## ServiceAccount + +```yaml +apiVersion: v1 +kind: ServiceAccount +metadata: + name: devzero-zxporter-controller-manager + namespace: {{ .Values.namespace }} +``` +**Scope**: Service account for the zxporter controller deployment + +## ClusterRole Breakdown + +### Bootstrap Phase (entrypoint.sh) + +#### Metrics Server Installation +**Purpose**: Automatic metrics-server deployment when missing from cluster + +- **`serviceaccounts`** (`create`, `get`, `list`, `update`, `patch`, `watch`) + - Creates `dz-metrics-server` ServiceAccount + - **Code**: `entrypoint.sh:29`, `dist/metrics-server.yaml` + +- **`services`** (`create`, `get`, `list`, `update`, `patch`, `watch`) + - Creates metrics-server service endpoint + - **Code**: `entrypoint.sh:29`, `dist/metrics-server.yaml` + +- **`deployments`** (`create`, `get`, `list`, `update`, `patch`, `watch`) + - Creates metrics-server deployment + - **Code**: `entrypoint.sh:29`, `dist/metrics-server.yaml` + +#### RBAC Setup for Metrics Server +**Purpose**: Creates necessary RBAC for metrics-server operation + +- **`clusterroles`** (`create`, `get`, `list`, `update`, `patch`, `watch`) + - Creates `system:dz-metrics-server` ClusterRole + - **Code**: `entrypoint.sh:29`, metrics-server RBAC + +- **`clusterrolebindings`** (`create`, `get`, `list`, `update`, `patch`, `watch`) + - Binds metrics-server ServiceAccount to ClusterRoles + - **Code**: `entrypoint.sh:29`, metrics-server RBAC + +- **`rolebindings`** (`create`, `get`, `list`, `update`, `patch`, `watch`) + - Creates auth-reader RoleBinding for extension API server + - **Code**: `entrypoint.sh:29`, metrics-server RBAC + +#### API Registration +**Purpose**: Registers metrics API with Kubernetes aggregation layer + +- **`apiservices`** (`create`, `get`, `list`, `update`, `patch`) + - Registers `v1beta1.metrics.k8s.io` APIService + - **Code**: `entrypoint.sh:29`, metrics-server installation + +### Runtime Phase (Go Controller) + +#### Collection Policy Management +**Purpose**: Core CRD lifecycle management + +- **`collectionpolicies`** (Full CRUD) + - Manages CollectionPolicy custom resources + - **Code**: `internal/controller/collectionpolicy_controller.go:239` + +- **Status subresources** (`get`, `update`, `patch`) + - Reports collection status and health + - **Code**: Controller status updates + +- **Finalizer management** (`update`) + - Ensures proper cleanup sequencing + - **Code**: Controller finalizer management + +#### Token Persistence +**Purpose**: Cluster token management for external API authentication + +- **`configmaps`** (`get`, `list`, `watch`, `update`) + - Updates `devzero-zxporter-env-config` with cluster tokens + - **Code**: `internal/controller/custom.go:335` + +#### Core Resource Monitoring +**Purpose**: Comprehensive Kubernetes resource collection + +- **Pod monitoring** (`get`, `list`, `watch`) + - Collects pod lifecycle events, container metrics, resource usage + - **Code**: `internal/collector/pod_collector.go` + +- **Node monitoring** (`get`, `list`, `watch`) + - Gathers node metrics, capacity, conditions, and allocatable resources + - **Code**: `internal/collector/node_collector.go` + +- **Storage monitoring** (`get`, `list`, `watch`) + - Tracks PVs, PVCs, StorageClasses, and CSI resources + - **Code**: `internal/collector/pv_collector.go`, `internal/collector/pvc_collector.go` + +- **Workload monitoring** (`get`, `list`, `watch`) + - Monitors Deployments, StatefulSets, DaemonSets, Jobs, CronJobs + - **Code**: `internal/collector/deployment_collector.go`, etc. + +#### Metrics Collection +**Purpose**: Resource utilization data gathering + +- **`nodes/metrics`** (`get`) + - Retrieves node-level resource metrics + - **Code**: `internal/collector/node_collector.go` + +- **`metrics.k8s.io`** resources (`get`, `list`, `watch`) + - Collects pod and node metrics from metrics-server + - **Code**: Metrics collection components + +#### Network and Security Monitoring +**Purpose**: Network topology and security policy tracking + +- **Networking resources** (`get`, `list`, `watch`) + - Monitors Ingresses, NetworkPolicies, IngressClasses + - **Code**: `internal/collector/ingress_collector.go`, etc. + +- **RBAC monitoring** (`get`, `list`, `watch`) + - Tracks Roles, RoleBindings, ClusterRoles, ClusterRoleBindings + - **Code**: `internal/collector/role_collector.go`, etc. + +#### Autoscaling Monitoring +**Purpose**: Scaling behavior and policy tracking + +- **HPA/VPA monitoring** (`get`, `list`, `watch`) + - Collects autoscaling configurations and status + - **Code**: `internal/collector/hpa_collector.go` + +- **PDB monitoring** (`get`, `list`, `watch`) + - Tracks pod disruption budgets + - **Code**: `internal/collector/pdb_collector.go` + +### Third-Party Integrations + +#### Karpenter Integration +**Purpose**: Node provisioning and scaling monitoring + +- **All Karpenter resources** (`get`, `list`, `watch`) + - Monitors node provisioning across cloud providers + - **Code**: `internal/collector/karpenter_collector.go` + - **Condition**: Gracefully handles missing Karpenter CRDs + +#### KEDA Integration +**Purpose**: Event-driven autoscaling monitoring + +- **KEDA resources** (`get`, `list`, `watch`) + - Tracks ScaledObjects, ScaledJobs, TriggerAuthentications + - **Code**: `internal/collector/keda_*_collector.go` + - **Condition**: Only when KEDA CRDs detected + +#### Argo Rollouts Integration +**Purpose**: Progressive delivery monitoring + +- **`rollouts`** (`get`, `list`, `watch`) + - Monitors advanced deployment strategies + - **Code**: `internal/collector/argo_rollouts_collector.go` + - **Condition**: Only when Argo Rollouts CRDs detected + +#### Datadog Integration +**Purpose**: Datadog operator monitoring + +- **`extendeddaemonsetreplicasets`** (`get`, `list`, `watch`) + - Tracks Datadog-specific resources + - **Code**: `internal/collector/datadog_collector.go` + - **Condition**: Only when Datadog operator CRDs detected + +#### Kubeflow Integration +**Purpose**: ML workload monitoring + +- **`notebooks`** (`get`, `list`, `watch`) + - Monitors Jupyter notebook resources + - **Code**: `internal/collector/kubeflow_collector.go` + - **Condition**: Only when Kubeflow CRDs detected + +### Leader Election +**Purpose**: High availability support for controller deployments + +- **`leases`** (Full CRUD) + - Primary leader election mechanism + - **Code**: Controller runtime leader election + +- **`configmaps`** (Full CRUD within namespace) + - Backup coordination mechanism + - **Code**: Leader election framework + +- **`events`** (`create`, `patch`) + - Leader election event logging + - **Code**: Leader election framework + +## Security Considerations + +### Principle of Least Privilege + +The zxporter operator follows a two-phase permission model: + +1. **Bootstrap Phase**: Requires elevated permissions for one-time metrics-server installation +2. **Runtime Phase**: Operates with minimal write permissions (only ConfigMap updates) + +### Permission Justification + +- **Write permissions** are limited to essential operations: + - Metrics server bootstrap (one-time setup) + - Token persistence (security requirement) + - Leader election (HA requirement) + +- **Read permissions** enable comprehensive monitoring without cluster modification + +### Third-Party Graceful Degradation + +All third-party integrations: +- Test for CRD availability before attempting access +- Gracefully handle missing operators +- Can be disabled via `DisabledCollectors` configuration +- Do not block core functionality if unavailable + +### Metrics Server Bootstrap + +The automatic metrics-server installation feature: +- Only activates when metrics-server is missing +- Uses standard Helm-generated manifests +- Follows Kubernetes security best practices +- Can be disabled if metrics-server is pre-installed + +## Configuration Options + +### Disabling Collectors + +```yaml +apiVersion: devzero.io/v1 +kind: CollectionPolicy +spec: + policies: + disabledCollectors: + - "karpenter" + - "keda_scaled_job" + - "argo_rollouts" + - "datadog" + - "kubeflow_notebook" +``` + +### Namespace Targeting + +```yaml +apiVersion: devzero.io/v1 +kind: CollectionPolicy +spec: + targetSelector: + namespaces: + - "production" + - "staging" + exclusions: + excludedNamespaces: + - "kube-system" + - "kube-public" +``` \ No newline at end of file diff --git a/helm-chart/zxporter/templates/zxporter-rbac.yaml b/helm-chart/zxporter/templates/zxporter-rbac.yaml index d7ae9531..708b946f 100644 --- a/helm-chart/zxporter/templates/zxporter-rbac.yaml +++ b/helm-chart/zxporter/templates/zxporter-rbac.yaml @@ -1,3 +1,9 @@ +# ========================================== +# LEADER ELECTION AND BOOTSTRAP PERMISSIONS +# ========================================== +# This role contains permissions needed for: +# 1. Controller leader election coordination +# 2. Metrics server bootstrap installation (entrypoint.sh) apiVersion: rbac.authorization.k8s.io/v1 kind: Role metadata: @@ -7,33 +13,7 @@ metadata: name: devzero-zxporter-leader-election-role namespace: {{ .Values.namespace }} rules: - - apiGroups: - - "" - resources: - - configmaps - - deployments - - serviceaccounts - - services - verbs: - - get - - list - - watch - - create - - update - - patch - - delete - - apiGroups: - - apps - resources: - - deployments - verbs: - - get - - list - - watch - - create - - update - - patch - - delete + # Leader election coordination - apiGroups: - coordination.k8s.io resources: @@ -46,6 +26,7 @@ rules: - update - patch - delete + # Event creation for leader election - apiGroups: - "" resources: @@ -53,21 +34,19 @@ rules: verbs: - create - patch + # ConfigMap access for token persistence and general config - apiGroups: - - rbac.authorization.k8s.io + - "" resources: - - rolebindings - - roles - - clusterrolebindings - - clusterroles + - configmaps verbs: - - create - - delete - get - list - - patch - - update - watch + - create + - update + - patch + - delete --- apiVersion: rbac.authorization.k8s.io/v1 kind: ClusterRole @@ -119,11 +98,87 @@ rules: verbs: - get --- +# ========================================== +# MAIN CLUSTER ROLE - COMPREHENSIVE PERMISSIONS +# ========================================== +# This ClusterRole contains all permissions needed for: +# 1. Metrics server bootstrap installation (entrypoint.sh phase) +# 2. Runtime monitoring and data collection +# 3. Optional third-party resource monitoring apiVersion: rbac.authorization.k8s.io/v1 kind: ClusterRole metadata: name: devzero-zxporter-manager-role rules: +# ========================================== +# BOOTSTRAP PERMISSIONS (entrypoint.sh) +# ========================================== +# ServiceAccount creation for metrics server +- apiGroups: + - "" + resources: + - serviceaccounts + verbs: + - create + - get + - list + - update + - patch + - watch +# Service creation for metrics server +- apiGroups: + - "" + resources: + - services + verbs: + - create + - get + - list + - update + - patch + - watch +# Deployment creation for metrics server +- apiGroups: + - apps + resources: + - deployments + verbs: + - create + - get + - list + - update + - patch + - watch +# RBAC creation for metrics server +- apiGroups: + - rbac.authorization.k8s.io + resources: + - clusterrolebindings + - clusterroles + - rolebindings + verbs: + - create + - get + - list + - patch + - update + - watch +# APIService registration for metrics server +- apiGroups: + - apiregistration.k8s.io + resources: + - apiservices + verbs: + - create + - get + - list + - patch + - update + +# ========================================== +# RUNTIME MONITORING PERMISSIONS +# ========================================== +# Core Kubernetes resources (READ-ONLY monitoring) - apiGroups: - "" resources: @@ -138,8 +193,6 @@ rules: - pods - replicationcontrollers - resourcequotas - - serviceaccounts - - services verbs: - get - list @@ -170,11 +223,11 @@ rules: - list - patch - update +# Apps resources (READ-ONLY monitoring - note: deployments already has write perms above for bootstrap) - apiGroups: - apps resources: - daemonsets - - deployments - replicasets - statefulsets verbs: @@ -248,10 +301,21 @@ rules: - get - patch - update +# ========================================== +# OPTIONAL THIRD-PARTY RESOURCES +# ========================================== +# These resources are optional - collectors gracefully handle missing CRDs +# Can be disabled via DisabledCollectors configuration + +# Karpenter node provisioning (optional) - apiGroups: - - karpenter.azure.com + - karpenter.sh resources: - - aksnodeclasses + - machines + - nodeclaims + - nodeoverlays + - nodepools + - provisioners verbs: - get - list @@ -266,33 +330,30 @@ rules: - list - watch - apiGroups: - - karpenter.k8s.gcp + - karpenter.azure.com resources: - - gcenodeclasses + - aksnodeclasses verbs: - get - list - watch - apiGroups: - - karpenter.k8s.oracle + - karpenter.k8s.gcp resources: - - ocinodeclasses + - gcenodeclasses verbs: - get - list - watch - apiGroups: - - karpenter.sh + - karpenter.k8s.oracle resources: - - machines - - nodeclaims - - nodeoverlays - - nodepools - - provisioners + - ocinodeclasses verbs: - get - list - watch +# Other optional third-party integrations - apiGroups: - keda.sh resources: @@ -339,20 +400,7 @@ rules: - get - list - watch -- apiGroups: - - rbac.authorization.k8s.io - resources: - - clusterrolebindings - - clusterroles - - role - - rolebindings - verbs: - - create - - get - - list - - patch - - update - - watch +# RBAC monitoring (READ-ONLY - write permissions declared above for bootstrap) - apiGroups: - rbac.authorization.k8s.io resources: @@ -374,6 +422,10 @@ rules: - list - watch --- +# ========================================== +# METRICS AUTHENTICATION ROLE +# ========================================== +# Role for authenticating metrics endpoint access apiVersion: rbac.authorization.k8s.io/v1 kind: ClusterRole metadata: diff --git a/internal/controller/collectionpolicy_controller.go b/internal/controller/collectionpolicy_controller.go index 3ecd1fe0..0774a3e4 100644 --- a/internal/controller/collectionpolicy_controller.go +++ b/internal/controller/collectionpolicy_controller.go @@ -136,36 +136,58 @@ type PolicyConfig struct { NumResourceProcessors int } +// ======================================== +// COLLECTION POLICY CRD MANAGEMENT +// ======================================== //+kubebuilder:rbac:groups=devzero.io,resources=collectionpolicies,verbs=get;list;watch;create;update;patch;delete //+kubebuilder:rbac:groups=devzero.io,resources=collectionpolicies/status,verbs=get;update;patch //+kubebuilder:rbac:groups=devzero.io,resources=collectionpolicies/finalizers,verbs=update -// Metric server installation permissions +// ======================================== +// BOOTSTRAP PERMISSIONS (entrypoint.sh) +// ======================================== +// These permissions are required for automatic metrics-server installation +// via kubectl apply in entrypoint.sh when metrics-server is not detected + +// ServiceAccount creation for metrics-server +//+kubebuilder:rbac:groups="",resources=serviceaccounts,verbs=get;list;watch;create;update;patch + +// RBAC setup for metrics-server (ClusterRoles, ClusterRoleBindings, RoleBindings) //+kubebuilder:rbac:groups=rbac.authorization.k8s.io,resources=clusterroles,verbs=get;list;watch;create;update;patch //+kubebuilder:rbac:groups=rbac.authorization.k8s.io,resources=clusterrolebindings,verbs=get;list;watch;create;update;patch //+kubebuilder:rbac:groups=rbac.authorization.k8s.io,resources=rolebindings,verbs=get;list;watch;create;update;patch -//+kubebuilder:rbac:groups=rbac.authorization.k8s.io,resources=role,verbs=get;list;watch;create;update;patch -//+kubebuilder:rbac:groups=apiregistration.k8s.io,resources=apiservices,verbs=get;list;create;update;patch -//+kubebuilder:rbac:groups=core,resources=configmaps,verbs=get;list;watch -//+kubebuilder:rbac:groups=core,resources=nodes/metrics,verbs=get - -// Core API Group resources -//+kubebuilder:rbac:groups=core,resources=pods,verbs=get;list;watch -//+kubebuilder:rbac:groups=core,resources=pods/status,verbs=get -//+kubebuilder:rbac:groups=core,resources=nodes,verbs=get;list;watch -//+kubebuilder:rbac:groups=core,resources=nodes/status,verbs=get -//+kubebuilder:rbac:groups=core,resources=services,verbs=get;list;watch -//+kubebuilder:rbac:groups=core,resources=namespaces,verbs=get;list;watch -//+kubebuilder:rbac:groups=core,resources=persistentvolumeclaims,verbs=get;list;watch -//+kubebuilder:rbac:groups=core,resources=events,verbs=get;list;watch -//+kubebuilder:rbac:groups=core,resources=serviceaccounts,verbs=get;list;watch -//+kubebuilder:rbac:groups=core,resources=limitranges,verbs=get;list;watch -//+kubebuilder:rbac:groups=core,resources=resourcequotas,verbs=get;list;watch -//+kubebuilder:rbac:groups=core,resources=replicationcontrollers,verbs=get;list;watch -//+kubebuilder:rbac:groups=core,resources=persistentvolumes,verbs=get;list;watch - -// Apps API Group resources -//+kubebuilder:rbac:groups=apps,resources=deployments,verbs=get;list;watch + +// Service and Deployment creation for metrics-server +//+kubebuilder:rbac:groups="",resources=services,verbs=get;list;watch;create;update;patch +//+kubebuilder:rbac:groups=apps,resources=deployments,verbs=get;list;watch;create;update;patch + +// APIService registration for metrics-server API +//+kubebuilder:rbac:groups=apiregistration.k8s.io,resources=apiservices,verbs=get;list;watch;create;update;patch + +// ======================================== +// RUNTIME PERMISSIONS +// ======================================== +// ConfigMap access for cluster token persistence (ONLY write permission in runtime) +//+kubebuilder:rbac:groups="",resources=configmaps,verbs=get;list;watch;update + +// Metrics access +//+kubebuilder:rbac:groups="",resources=nodes/metrics,verbs=get + +// Core Kubernetes resources (READ-ONLY monitoring and collection) +//+kubebuilder:rbac:groups="",resources=pods,verbs=get;list;watch +//+kubebuilder:rbac:groups="",resources=pods/status,verbs=get +//+kubebuilder:rbac:groups="",resources=nodes,verbs=get;list;watch +//+kubebuilder:rbac:groups="",resources=nodes/status,verbs=get +//+kubebuilder:rbac:groups="",resources=namespaces,verbs=get;list;watch +//+kubebuilder:rbac:groups="",resources=persistentvolumeclaims,verbs=get;list;watch +//+kubebuilder:rbac:groups="",resources=events,verbs=get;list;watch +//+kubebuilder:rbac:groups="",resources=limitranges,verbs=get;list;watch +//+kubebuilder:rbac:groups="",resources=resourcequotas,verbs=get;list;watch +//+kubebuilder:rbac:groups="",resources=replicationcontrollers,verbs=get;list;watch +//+kubebuilder:rbac:groups="",resources=persistentvolumes,verbs=get;list;watch +//+kubebuilder:rbac:groups="",resources=endpoints,verbs=get;list;watch + +// Apps API Group resources (READ-ONLY monitoring - note: deployments also needs write for metrics-server bootstrap) //+kubebuilder:rbac:groups=apps,resources=statefulsets,verbs=get;list;watch //+kubebuilder:rbac:groups=apps,resources=daemonsets,verbs=get;list;watch //+kubebuilder:rbac:groups=apps,resources=replicasets,verbs=get;list;watch @@ -183,12 +205,8 @@ type PolicyConfig struct { //+kubebuilder:rbac:groups=networking.k8s.io,resources=networkpolicies,verbs=get;list;watch //+kubebuilder:rbac:groups=networking.k8s.io,resources=ingressclasses,verbs=get;list;watch -// RBAC API Group resources +// RBAC resources (READ-ONLY monitoring - note: write permissions declared above for bootstrap) //+kubebuilder:rbac:groups=rbac.authorization.k8s.io,resources=roles,verbs=get;list;watch -//+kubebuilder:rbac:groups=rbac.authorization.k8s.io,resources=rolebindings,verbs=get;list;watch -//+kubebuilder:rbac:groups=rbac.authorization.k8s.io,resources=clusterroles,verbs=get;list;watch -//+kubebuilder:rbac:groups=rbac.authorization.k8s.io,resources=clusterrolebindings,verbs=get;list;watch -//+kubebuilder:rbac:groups=core,resources=endpoints,verbs=get;list;watch // Autoscaling API Group resources //+kubebuilder:rbac:groups=autoscaling,resources=horizontalpodautoscalers,verbs=get;list;watch @@ -204,34 +222,26 @@ type PolicyConfig struct { //+kubebuilder:rbac:groups=storage.k8s.io,resources=csistoragecapacities,verbs=get;list;watch //+kubebuilder:rbac:groups=storage.k8s.io,resources=volumeattachments,verbs=get;list;watch -// Karpenter resources -//+kubebuilder:rbac:groups=karpenter.sh,resources=provisioners,verbs=get;list;watch -//+kubebuilder:rbac:groups=karpenter.sh,resources=machines,verbs=get;list;watch -//+kubebuilder:rbac:groups=karpenter.sh,resources=nodepools,verbs=get;list;watch -//+kubebuilder:rbac:groups=karpenter.sh,resources=nodeclaims,verbs=get;list;watch -//+kubebuilder:rbac:groups=karpenter.sh,resources=nodeoverlays,verbs=get;list;watch -//+kubebuilder:rbac:groups=karpenter.k8s.aws,resources=awsnodetemplates,verbs=get;list;watch -//+kubebuilder:rbac:groups=karpenter.k8s.aws,resources=ec2nodeclasses,verbs=get;list;watch +// ======================================== +// OPTIONAL THIRD-PARTY RESOURCES +// ======================================== +// These permissions are for optional third-party operators. +// All collectors gracefully handle missing CRDs and can be disabled via DisabledCollectors config. + +// Karpenter node provisioning (optional - only if Karpenter operator installed) +//+kubebuilder:rbac:groups=karpenter.sh,resources=provisioners;machines;nodepools;nodeclaims;nodeoverlays,verbs=get;list;watch +//+kubebuilder:rbac:groups=karpenter.k8s.aws,resources=awsnodetemplates;ec2nodeclasses,verbs=get;list;watch //+kubebuilder:rbac:groups=karpenter.azure.com,resources=aksnodeclasses,verbs=get;list;watch //+kubebuilder:rbac:groups=karpenter.k8s.oracle,resources=ocinodeclasses,verbs=get;list;watch //+kubebuilder:rbac:groups=karpenter.k8s.gcp,resources=gcenodeclasses,verbs=get;list;watch -// CRD API Group resources +// API Extensions (READ-ONLY for CRD discovery) //+kubebuilder:rbac:groups=apiextensions.k8s.io,resources=customresourcedefinitions,verbs=get;list;watch -// DataDog resources +// Optional third-party monitoring integrations //+kubebuilder:rbac:groups=datadoghq.com,resources=extendeddaemonsetreplicasets,verbs=get;list;watch - -// Argo Rollouts resources //+kubebuilder:rbac:groups=argoproj.io,resources=rollouts,verbs=get;list;watch - -// KEDA resources -//+kubebuilder:rbac:groups=keda.sh,resources=scaledobjects,verbs=get;list;watch -//+kubebuilder:rbac:groups=keda.sh,resources=scaledjobs,verbs=get;list;watch -//+kubebuilder:rbac:groups=keda.sh,resources=triggerauthentications,verbs=get;list;watch -//+kubebuilder:rbac:groups=keda.sh,resources=clustertriggerauthentications,verbs=get;list;watch - -// Kubeflow resources +//+kubebuilder:rbac:groups=keda.sh,resources=scaledobjects;scaledjobs;triggerauthentications;clustertriggerauthentications,verbs=get;list;watch //+kubebuilder:rbac:groups=kubeflow.org,resources=notebooks,verbs=get;list;watch // Reconcile is part of the main kubernetes reconciliation loop which aims to