Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Original file line number Diff line number Diff line change
Expand Up @@ -187,3 +187,46 @@ subjects:
- kind: ServiceAccount
name: cloud-controller-manager
namespace: openshift-cloud-controller-manager

---
apiVersion: v1
kind: ServiceAccount
metadata:
annotations:
include.release.openshift.io/self-managed-high-availability: "true"
include.release.openshift.io/single-node-developer: "true"
name: cloud-node-manager
namespace: openshift-cloud-controller-manager

---
kind: ClusterRole
apiVersion: rbac.authorization.k8s.io/v1
metadata:
name: cloud-node-manager
annotations:
include.release.openshift.io/self-managed-high-availability: "true"
include.release.openshift.io/single-node-developer: "true"
rules:
- apiGroups: [""]
resources: ["nodes"]
verbs: ["watch","list","get","update", "patch"]
- apiGroups: [""]
resources: ["nodes/status"]
verbs: ["patch"]

---
apiVersion: rbac.authorization.k8s.io/v1
kind: ClusterRoleBinding
metadata:
name: cloud-node-manager
annotations:
include.release.openshift.io/self-managed-high-availability: "true"
include.release.openshift.io/single-node-developer: "true"
roleRef:
apiGroup: rbac.authorization.k8s.io
kind: ClusterRole
name: cloud-node-manager
subjects:
- kind: ServiceAccount
name: cloud-node-manager
namespace: openshift-cloud-controller-manager
97 changes: 97 additions & 0 deletions pkg/cloud/azure/assets/cloud-controller-manager-deployment.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,97 @@
kind: Deployment
apiVersion: apps/v1
metadata:
name: azure-cloud-controller-manager
namespace: openshift-cloud-controller-manager
labels:
app: azure-cloud-controller-manager
spec:
selector:
matchLabels:
app: azure-cloud-controller-manager
template:
metadata:
labels:
app: azure-cloud-controller-manager
spec:
hostNetwork: true
Copy link
Copy Markdown

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Suppose hostNetwork is not required for other controllers except cloud-node

serviceAccountName: cloud-controller-manager
priorityClassName: system-cluster-critical
nodeSelector:
node-role.kubernetes.io/master: ""
affinity:
podAntiAffinity:
requiredDuringSchedulingIgnoredDuringExecution:
- topologyKey: "kubernetes.io/hostname"
labelSelector:
matchLabels:
app: azure-cloud-controller-manager
tolerations:
Copy link
Copy Markdown
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Copy link
Copy Markdown
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

None of our components should be tolerating that taint, if we do, we will schedule onto a node without networking, which will likely cause our pods to crash loop as they can't contact the APIs they need to contact

Copy link
Copy Markdown
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

It will now need to tolerate the not-ready based on the additions made in #76

- effect: NoSchedule
key: node-role.kubernetes.io/master
operator: Exists
- effect: NoExecute
key: node.kubernetes.io/unreachable
operator: Exists
tolerationSeconds: 120
- effect: NoExecute
key: node.kubernetes.io/not-ready
operator: Exists
tolerationSeconds: 120
- effect: NoSchedule
key: node.cloudprovider.kubernetes.io/uninitialized
operator: Exists
- effect: NoSchedule
key: node.kubernetes.io/not-ready
operator: Exists
containers:
- name: azure-cloud-controller-manager
image: quay.io/openshift/origin-azure-cloud-controller-manager:4.8.0
imagePullPolicy: "IfNotPresent"
env:
- name: CLOUD_CONFIG
value: /etc/kubernetes-cloud-config/cloud.conf
- name: OCP_INFRASTRUCTURE_NAME
value: kubernetes # default cluster name in ccm
resources:
requests:
cpu: 200m
memory: 50Mi
command:
- /bin/bash
- -c
- |
#!/bin/bash
set -o allexport
if [[ -f /etc/kubernetes/apiserver-url.env ]]; then
source /etc/kubernetes/apiserver-url.env
fi
exec /bin/azure-cloud-controller-manager \
--v=3 \
--cloud-config=$(CLOUD_CONFIG) \
--cloud-provider=azure \
--controllers=*,-cloud-node,-route \
--allocate-node-cidrs=false \
--configure-cloud-routes=false \
--use-service-account-credentials=true \
--bind-address=127.0.0.1 \
--cluster-name=$(OCP_INFRASTRUCTURE_NAME) \
--leader-elect-resource-namespace=openshift-cloud-controller-manager
volumeMounts:
- name: host-etc-kube
mountPath: /etc/kubernetes
readOnly: true
- name: config-accm
mountPath: /etc/kubernetes-cloud-config
readOnly: true
volumes:
- name: config-accm
configMap:
name: cloud-conf
items:
- key: cloud.conf
path: cloud.conf
- name: host-etc-kube
hostPath:
path: /etc/kubernetes
type: Directory
76 changes: 76 additions & 0 deletions pkg/cloud/azure/assets/cloud-node-manager-daemonset.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,76 @@
apiVersion: apps/v1
kind: DaemonSet
metadata:
name: azure-cloud-node-manager
namespace: openshift-cloud-controller-manager
labels:
component: azure-cloud-node-manager
kubernetes.io/cluster-service: "true"
spec:
selector:
matchLabels:
app: azure-cloud-node-manager
template:
metadata:
labels:
app: azure-cloud-node-manager
annotations:
cluster-autoscaler.kubernetes.io/daemonset-pod: "true"
spec:
priorityClassName: system-node-critical
serviceAccountName: cloud-node-manager
hostNetwork: true
nodeSelector:
kubernetes.io/os: linux
tolerations:
Copy link
Copy Markdown
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

same question here, i'm guessing since this uses the host network that it will need to tolerate not-ready as well.

Copy link
Copy Markdown
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Host network maybe means we can allow it to tolerate the taint, though IMO, we should only add the toleration if we are certain we need it. I'd suggest being conservative initially

- effect: NoSchedule
key: node-role.kubernetes.io/master
operator: Exists
- effect: NoExecute
key: node.kubernetes.io/unreachable
operator: Exists
tolerationSeconds: 120
- effect: NoExecute
key: node.kubernetes.io/not-ready
operator: Exists
tolerationSeconds: 120
- effect: NoSchedule
key: node.cloudprovider.kubernetes.io/uninitialized
operator: Exists
- effect: NoSchedule
key: node.kubernetes.io/not-ready
operator: Exists
containers:
- name: azure-cloud-node-manager
image: quay.io/openshift/origin-azure-cloud-node-manager:4.8.0
imagePullPolicy: IfNotPresent
command:
- /bin/bash
- -c
- |
#!/bin/bash
set -o allexport
if [[ -f /etc/kubernetes/apiserver-url.env ]]; then
source /etc/kubernetes/apiserver-url.env
fi
exec /bin/azure-cloud-node-manager \
--node-name=$(NODE_NAME) \
--wait-routes=false
env:
- name: NODE_NAME
valueFrom:
fieldRef:
fieldPath: spec.nodeName
volumeMounts:
- name: host-etc-kube
mountPath: /etc/kubernetes
readOnly: true
resources:
requests:
cpu: 50m
memory: 50Mi
volumes:
- name: host-etc-kube
hostPath:
path: /etc/kubernetes
type: Directory
37 changes: 37 additions & 0 deletions pkg/cloud/azure/azure.go
Original file line number Diff line number Diff line change
@@ -1 +1,38 @@
package azure

import (
"embed"

"github.com/openshift/cluster-cloud-controller-manager-operator/pkg/cloud/common"
appsv1 "k8s.io/api/apps/v1"
utilruntime "k8s.io/apimachinery/pkg/util/runtime"
"sigs.k8s.io/controller-runtime/pkg/client"
)

var (
//go:embed assets/*
azureFs embed.FS

azureResources []client.Object

azureSources = []common.ObjectSource{
{Object: &appsv1.DaemonSet{}, Path: "assets/cloud-node-manager-daemonset.yaml"},
{Object: &appsv1.Deployment{}, Path: "assets/cloud-controller-manager-deployment.yaml"},
}
)

func init() {
var err error
azureResources, err = common.ReadResources(azureFs, azureSources)
utilruntime.Must(err)
}

// GetResources returns a list of AWS resources for provisioning CCM in running cluster
func GetResources() []client.Object {
resources := make([]client.Object, len(azureResources))
for i := range azureResources {
resources[i] = azureResources[i].DeepCopyObject().(client.Object)
}

return resources
}
23 changes: 23 additions & 0 deletions pkg/cloud/azure/azure_test.go
Original file line number Diff line number Diff line change
@@ -1 +1,24 @@
package azure

import (
"testing"

"github.com/stretchr/testify/assert"
)

func TestGetResources(t *testing.T) {
resources := GetResources()
assert.Len(t, resources, 2)

var names, kinds []string
for _, r := range resources {
names = append(names, r.GetName())
kinds = append(kinds, r.GetObjectKind().GroupVersionKind().Kind)
}

assert.Contains(t, names, "azure-cloud-controller-manager")
assert.Contains(t, kinds, "Deployment")

assert.Contains(t, names, "azure-cloud-node-manager")
assert.Contains(t, kinds, "DaemonSet")
}
3 changes: 3 additions & 0 deletions pkg/cloud/cloud.go
Original file line number Diff line number Diff line change
Expand Up @@ -3,6 +3,7 @@ package cloud
import (
configv1 "github.com/openshift/api/config/v1"
"github.com/openshift/cluster-cloud-controller-manager-operator/pkg/cloud/aws"
"github.com/openshift/cluster-cloud-controller-manager-operator/pkg/cloud/azure"
"github.com/openshift/cluster-cloud-controller-manager-operator/pkg/cloud/openstack"
"k8s.io/klog"
"sigs.k8s.io/controller-runtime/pkg/client"
Expand All @@ -21,6 +22,8 @@ func GetResources(platform configv1.PlatformType) []client.Object {
return aws.GetResources()
case configv1.OpenStackPlatformType:
return openstack.GetResources()
case configv1.AzurePlatformType:
return azure.GetResources()
default:
klog.Warningf("Unrecognized platform type %q found in infrastructure", platform)
return nil
Expand Down
4 changes: 3 additions & 1 deletion pkg/cloud/cloud_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -6,6 +6,7 @@ import (

configv1 "github.com/openshift/api/config/v1"
"github.com/openshift/cluster-cloud-controller-manager-operator/pkg/cloud/aws"
"github.com/openshift/cluster-cloud-controller-manager-operator/pkg/cloud/azure"
"github.com/openshift/cluster-cloud-controller-manager-operator/pkg/cloud/openstack"
"github.com/stretchr/testify/assert"
appsv1 "k8s.io/api/apps/v1"
Expand All @@ -31,8 +32,9 @@ func TestGetResources(t *testing.T) {
name: "GCP resources are empty, as the platform is not yet supported",
platform: configv1.GCPPlatformType,
}, {
name: "Azure resources are empty, as the platform is not yet supported",
name: "Azure resources returned as expected",
platform: configv1.AzurePlatformType,
expected: azure.GetResources(),
}, {
name: "VSphere resources are empty, as the platform is not yet supported",
platform: configv1.VSpherePlatformType,
Expand Down
22 changes: 12 additions & 10 deletions pkg/config/config.go
Original file line number Diff line number Diff line change
Expand Up @@ -20,11 +20,12 @@ type imagesReference struct {

// OperatorConfig contains configuration values for templating resources
type OperatorConfig struct {
ManagedNamespace string
ControllerImage string
CloudNodeImage string
IsSingleReplica bool
Platform configv1.PlatformType
ManagedNamespace string
ControllerImage string
CloudNodeImage string
IsSingleReplica bool
InfrastructureName string
Comment thread
lobziik marked this conversation as resolved.
Platform configv1.PlatformType
}

// GetProviderFromInfrastructure reads the Infrastructure resource and returns Platform value
Expand Down Expand Up @@ -92,11 +93,12 @@ func ComposeConfig(infrastructure *configv1.Infrastructure, imagesFile, managedN
}

config := OperatorConfig{
Platform: platform,
ManagedNamespace: managedNamespace,
ControllerImage: getCloudControllerManagerFromImages(platform, images),
CloudNodeImage: getCloudNodeManagerFromImages(platform, images),
IsSingleReplica: infrastructure.Status.ControlPlaneTopology == configv1.SingleReplicaTopologyMode,
Platform: platform,
ManagedNamespace: managedNamespace,
ControllerImage: getCloudControllerManagerFromImages(platform, images),
CloudNodeImage: getCloudNodeManagerFromImages(platform, images),
InfrastructureName: infrastructure.Status.InfrastructureName,
IsSingleReplica: infrastructure.Status.ControlPlaneTopology == configv1.SingleReplicaTopologyMode,
}

return config, nil
Expand Down
17 changes: 17 additions & 0 deletions pkg/substitution/substitution.go
Original file line number Diff line number Diff line change
Expand Up @@ -14,6 +14,8 @@ const (
// cloudControllerManagerName is a name for default CCM controller container any provider may have
cloudControllerManagerName = "cloud-controller-manager"
cloudNodeManagerName = "cloud-node-manager"

infraNameEnvVar = "OCP_INFRASTRUCTURE_NAME"
)

// setCloudControllerImage substitutes controller containers in provided pod specs with correct image
Expand All @@ -39,6 +41,20 @@ func setCloudControllerImage(config config.OperatorConfig, p corev1.PodSpec) cor
return updatedPod
}

// setInfrastructureNameVariable tries to find env variable with name OCP_INFRASTRUCTURE_NAME, if found put infra name from infra resource there.
func setInfrastructureNameVariable(infrastructureName string, p corev1.PodSpec) corev1.PodSpec {
updatedPod := *p.DeepCopy()
for _, container := range updatedPod.Containers {
for i, envVar := range container.Env {
if envVar.Name == infraNameEnvVar {
container.Env[i].Value = infrastructureName
break
}
}
}
return updatedPod
}

func FillConfigValues(config config.OperatorConfig, templates []client.Object) []client.Object {
objects := make([]client.Object, len(templates))
for i, objectTemplate := range templates {
Expand All @@ -50,6 +66,7 @@ func FillConfigValues(config config.OperatorConfig, templates []client.Object) [
switch obj := templateCopy.(type) {
case *appsv1.Deployment:
obj.Spec.Template.Spec = setCloudControllerImage(config, obj.Spec.Template.Spec)
obj.Spec.Template.Spec = setInfrastructureNameVariable(config.InfrastructureName, obj.Spec.Template.Spec)
if config.IsSingleReplica {
obj.Spec.Replicas = pointer.Int32(1)
}
Expand Down
Loading