Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
4 changes: 4 additions & 0 deletions go.mod
Original file line number Diff line number Diff line change
Expand Up @@ -3,10 +3,13 @@ module github.com/openshift/machine-api-operator
go 1.13

require (
github.com/MakeNowJust/heredoc v1.0.0 // indirect
github.com/blang/semver v3.5.1+incompatible
github.com/gobuffalo/flect v0.2.2 // indirect
github.com/go-logr/logr v0.3.0
github.com/google/gofuzz v1.1.0
github.com/google/uuid v1.1.2
github.com/mattn/go-isatty v0.0.12 // indirect
github.com/onsi/ginkgo v1.14.1
github.com/onsi/gomega v1.10.2
github.com/openshift/api v0.0.0-20201216151826-78a19e96f9eb
Expand All @@ -23,6 +26,7 @@ require (
gopkg.in/gcfg.v1 v1.2.3
k8s.io/api v0.20.0
k8s.io/apimachinery v0.20.0
k8s.io/apiserver v0.20.0
k8s.io/client-go v0.20.0
k8s.io/code-generator v0.20.0
k8s.io/klog/v2 v2.4.0
Expand Down
9 changes: 9 additions & 0 deletions go.sum
Original file line number Diff line number Diff line change
Expand Up @@ -64,6 +64,8 @@ github.com/BurntSushi/toml v0.3.1/go.mod h1:xHWCNGjB5oqiDr8zfno3MHue2Ht5sIBksp03
github.com/BurntSushi/xgb v0.0.0-20160522181843-27f122750802/go.mod h1:IVnqGOEym/WlBOVXweHU+Q+/VP0lqqI8lqeDx9IjBqo=
github.com/MakeNowJust/heredoc v0.0.0-20170808103936-bb23615498cd h1:sjQovDkwrZp8u+gxLtPgKGjk5hCxuy2hrRejBTA9xFU=
github.com/MakeNowJust/heredoc v0.0.0-20170808103936-bb23615498cd/go.mod h1:64YHyfSL2R96J44Nlwm39UHepQbyR5q10x7iYa1ks2E=
github.com/MakeNowJust/heredoc v1.0.0 h1:cXCdzVdstXyiTqTvfqk9SDHpKNjxuom+DOlyEeQ4pzQ=
github.com/MakeNowJust/heredoc v1.0.0/go.mod h1:mG5amYoWBHf8vpLOuehzbGGw0EHxpZZ6lCpQ4fNJ8LE=
github.com/Microsoft/go-winio v0.4.11/go.mod h1:VhR8bwka0BXejwEJY73c50VrPtXAaKcyvVC4A4RozmA=
github.com/NYTimes/gziphandler v0.0.0-20170623195520-56545f4a5d46/go.mod h1:3wb06e3pkSAbeQ52E9H9iFoQsEEwGN64994WTCIhntQ=
github.com/OneOfOne/xxhash v1.2.2/go.mod h1:HSdplMjZKSmBqAxg5vPj2TmRDmfkzw+cTzAElWljhcU=
Expand Down Expand Up @@ -249,6 +251,8 @@ github.com/go-openapi/validate v0.19.5/go.mod h1:8DJv2CVJQ6kGNpFW6eV9N3JviE1C85n
github.com/go-stack/stack v1.8.0/go.mod h1:v0f6uXyyMGvRgIKkXu+yp6POWl0qKG85gN/melR3HDY=
github.com/gobuffalo/flect v0.2.0 h1:EWCvMGGxOjsgwlWaP+f4+Hh6yrrte7JeFL2S6b+0hdM=
github.com/gobuffalo/flect v0.2.0/go.mod h1:W3K3X9ksuZfir8f/LrfVtWmCDQFfayuylOJ7sz/Fj80=
github.com/gobuffalo/flect v0.2.2 h1:PAVD7sp0KOdfswjAw9BpLCU9hXo7wFSzgpQ+zNeks/A=
github.com/gobuffalo/flect v0.2.2/go.mod h1:vmkQwuZYhN5Pc4ljYQZzP+1sq+NEkK+lh20jmEmX3jc=
github.com/gogo/protobuf v1.1.1/go.mod h1:r8qH/GZQm5c6nD/R0oafs1akxWv10x8SbQlK7atdtwQ=
github.com/gogo/protobuf v1.2.1/go.mod h1:hp+jE20tsWTFYpLwKvXlhS1hjn+gTNwPg2I6zVXpSg4=
github.com/gogo/protobuf v1.2.2-0.20190723190241-65acae22fc9d/go.mod h1:SlYgWuQ5SjCEi6WLHjHCa1yvBfUnHcTbrrZtXPKa29o=
Expand Down Expand Up @@ -429,6 +433,8 @@ github.com/mattn/go-isatty v0.0.3/go.mod h1:M+lRXTBqGeGNdLjl/ufCoiOlB5xdOkqRJdNx
github.com/mattn/go-isatty v0.0.4/go.mod h1:M+lRXTBqGeGNdLjl/ufCoiOlB5xdOkqRJdNxMWT7Zi4=
github.com/mattn/go-isatty v0.0.8 h1:HLtExJ+uU2HOZ+wI0Tt5DtUDrx8yhUqDcp7fYERX4CE=
github.com/mattn/go-isatty v0.0.8/go.mod h1:Iq45c/XA43vh69/j3iqttzPXn0bhXyGjM0Hdxcsrc5s=
github.com/mattn/go-isatty v0.0.12 h1:wuysRhFDzyxgEmMf5xjvJ2M9dZoWAXNNr5LSBS7uHXY=
github.com/mattn/go-isatty v0.0.12/go.mod h1:cbi8OIDigv2wuxKPP5vlRcQ1OAZbq2CE4Kysco4FUpU=
github.com/mattn/go-runewidth v0.0.2/go.mod h1:LwmH8dsx7+W8Uxz3IHJYH5QSwggIsqBzpuz5H//U1FU=
github.com/matttproud/golang_protobuf_extensions v1.0.1/go.mod h1:D8He9yQNgCq6Z5Ld7szi9bcBfOoFv/3dc6xSMkL2PC0=
github.com/matttproud/golang_protobuf_extensions v1.0.2-0.20181231171920-c182affec369 h1:I0XW9+e1XWDxdcEniV4rQAIOPUGDq67JSCiRCgGCZLI=
Expand Down Expand Up @@ -602,6 +608,7 @@ github.com/spf13/viper v1.7.0/go.mod h1:8WkrPz2fc9jxqZNCJI/76HCieCp4Q8HaLFoCha5q
github.com/stoewer/go-strcase v1.2.0/go.mod h1:IBiWB2sKIp3wVVQ3Y035++gc+knqhUQag1KpM8ahLw8=
github.com/stretchr/objx v0.1.0/go.mod h1:HFkY916IF+rwdDfMAkV7OtwuqBVzrE8GR6GFx+wExME=
github.com/stretchr/objx v0.1.1/go.mod h1:HFkY916IF+rwdDfMAkV7OtwuqBVzrE8GR6GFx+wExME=
github.com/stretchr/objx v0.2.0 h1:Hbg2NidpLE8veEBkEZTL3CvlkUIVzuU9jDplZO54c48=
github.com/stretchr/objx v0.2.0/go.mod h1:qt09Ya8vawLte6SNmTgCsAVtYtaKzEcn8ATUoHMkEqE=
github.com/stretchr/testify v0.0.0-20151208002404-e3a8ff8ce365/go.mod h1:a8OnRcib4nhh0OaRAV+Yts87kKdq0PP7pXfy6kDkUVs=
github.com/stretchr/testify v1.2.2/go.mod h1:a8OnRcib4nhh0OaRAV+Yts87kKdq0PP7pXfy6kDkUVs=
Expand Down Expand Up @@ -809,6 +816,7 @@ golang.org/x/sys v0.0.0-20191204072324-ce4227a45e2e/go.mod h1:h1NjWce9XRLGQEsW7w
golang.org/x/sys v0.0.0-20191228213918-04cbcbbfeed8/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs=
golang.org/x/sys v0.0.0-20200106162015-b016eb3dc98e/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs=
golang.org/x/sys v0.0.0-20200113162924-86b910548bc1/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs=
golang.org/x/sys v0.0.0-20200116001909-b77594299b42/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs=
golang.org/x/sys v0.0.0-20200122134326-e047566fdf82/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs=
golang.org/x/sys v0.0.0-20200202164722-d101bd2416d5/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs=
golang.org/x/sys v0.0.0-20200212091648-12a6c2dcc1e4/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs=
Expand Down Expand Up @@ -1085,6 +1093,7 @@ k8s.io/apiserver v0.18.2/go.mod h1:Xbh066NqrZO8cbsoenCwyDJ1OSi8Ag8I2lezeHxzwzw=
k8s.io/apiserver v0.18.6/go.mod h1:Zt2XvTHuaZjBz6EFYzpp+X4hTmgWGy8AthNVnTdm3Wg=
k8s.io/apiserver v0.19.0/go.mod h1:XvzqavYj73931x7FLtyagh8WibHpePJ1QwWrSJs2CLk=
k8s.io/apiserver v0.19.2/go.mod h1:FreAq0bJ2vtZFj9Ago/X0oNGC51GfubKK/ViOKfVAOA=
k8s.io/apiserver v0.20.0 h1:0MwO4xCoqZwhoLbFyyBSJdu55CScp4V4sAgX6z4oPBY=
k8s.io/apiserver v0.20.0/go.mod h1:6gRIWiOkvGvQt12WTYmsiYoUyYW0FXSiMdNl4m+sxY8=
k8s.io/cli-runtime v0.18.0-rc.1/go.mod h1:yuKZYDG8raONmwjwIkT77lCfIuPwX+Bsp88MKYf1TlU=
k8s.io/cli-runtime v0.19.0 h1:wLe+osHSqcItyS3MYQXVyGFa54fppORVA8Jn7DBGSWw=
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -64,6 +64,31 @@ spec:
description: Machines older than this duration without a node will be considered to have failed and will be remediated. Expects an unsigned duration string of decimal numbers each with optional fraction and a unit suffix, eg "300ms", "1.5h" or "2h45m". Valid time units are "ns", "us" (or "µs"), "ms", "s", "m", "h".
pattern: ^([0-9]+(\.[0-9]+)?(ns|us|µs|ms|s|m|h))+$
type: string
remediationTemplate:
description: "RemediationTemplate is a reference to a remediation template provided by an infrastructure provider. \n This field is completely optional, when filled, the MachineHealthCheck controller creates a new object from the template referenced and hands off remediation of the machine to a controller that lives outside of Machine API Operator."
properties:
apiVersion:
description: API version of the referent.
type: string
fieldPath:
description: 'If referring to a piece of an object instead of an entire object, this string should contain a valid JSON/Go field access statement, such as desiredState.manifest.containers[2]. For example, if the object reference is to a container within a pod, this would take on a value like: "spec.containers{name}" (where "name" refers to the name of the container that triggered the event) or if no container name is specified "spec.containers[2]" (container with index 2 in this pod). This syntax is chosen only to have some well-defined way of referencing a part of an object. TODO: this design is not final and this field is subject to change in the future.'
type: string
kind:
description: 'Kind of the referent. More info: https://git.k8s.io/community/contributors/devel/sig-architecture/api-conventions.md#types-kinds'
type: string
name:
description: 'Name of the referent. More info: https://kubernetes.io/docs/concepts/overview/working-with-objects/names/#names'
type: string
namespace:
description: 'Namespace of the referent. More info: https://kubernetes.io/docs/concepts/overview/working-with-objects/namespaces/'
type: string
resourceVersion:
description: 'Specific resourceVersion to which this reference is made, if any. More info: https://git.k8s.io/community/contributors/devel/sig-architecture/api-conventions.md#concurrency-control-and-consistency'
type: string
uid:
description: 'UID of the referent. More info: https://kubernetes.io/docs/concepts/overview/working-with-objects/names/#uids'
type: string
type: object
selector:
description: 'Label selector to match machines whose health will be exercised. Note: An empty selector will match all machines.'
properties:
Expand Down
29 changes: 29 additions & 0 deletions install/0000_30_machine-api-operator_09_rbac.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -394,6 +394,35 @@ rules:
- create
- update

---
apiVersion: rbac.authorization.k8s.io/v1
kind: ClusterRole
metadata:
name: machine-api-operator-ext-remediation
aggregationRule:
clusterRoleSelectors:
- matchLabels: # Allowing external remediations to add their permissions
rbac.ext-remediation/aggregate-to-ext-remediation: "true"

rules: [] # The control plane automatically fills in the rules

---
apiVersion: rbac.authorization.k8s.io/v1
kind: ClusterRoleBinding
metadata:
name: machine-api-operator-ext-remediation
annotations:
include.release.openshift.io/self-managed-high-availability: "true"
include.release.openshift.io/single-node-developer: "true"
roleRef:
apiGroup: rbac.authorization.k8s.io
kind: ClusterRole
name: machine-api-operator-ext-remediation
subjects:
- kind: ServiceAccount
name: machine-api-controllers
namespace: openshift-machine-api

---
apiVersion: rbac.authorization.k8s.io/v1
kind: ClusterRoleBinding
Expand Down
15 changes: 14 additions & 1 deletion pkg/apis/machine/v1beta1/condition_consts.go
Original file line number Diff line number Diff line change
Expand Up @@ -17,7 +17,6 @@ limitations under the License.
package v1beta1

// Conditions and condition Reasons for the MachineHealthCheck object

const (
// RemediationAllowedCondition is set on MachineHealthChecks to show the status of whether the MachineHealthCheck is
// allowed to remediate any Machines or whether it is blocked from remediating any further.
Expand All @@ -26,6 +25,20 @@ const (
// TooManyUnhealthy is the reason used when too many Machines are unhealthy and the MachineHealthCheck is blocked
// from making any further remediations.
TooManyUnhealthyReason = "TooManyUnhealthy"

// ExternalRemediationTemplateAvailable is set on machinehealthchecks when MachineHealthCheck controller uses external remediation.
// ExternalRemediationTemplateAvailable is set to false if external remediation template is not found.
ExternalRemediationTemplateAvailable ConditionType = "ExternalRemediationTemplateAvailable"

// ExternalRemediationTemplateNotFound is the reason used when a machine health check fails to find external remediation template.
ExternalRemediationTemplateNotFound = "ExternalRemediationTemplateNotFound"

// ExternalRemediationRequestAvailable is set on machinehealthchecks when MachineHealthCheck controller uses external remediation.
// ExternalRemediationRequestAvailable is set to false if creating external remediation request fails.
ExternalRemediationRequestAvailable ConditionType = "ExternalRemediationRequestAvailable"

// ExternalRemediationRequestCreationFailed is the reason used when a machine health check fails to create external remediation request.
ExternalRemediationRequestCreationFailed = "ExternalRemediationRequestCreationFailed"
)

const (
Expand Down
77 changes: 10 additions & 67 deletions pkg/apis/machine/v1beta1/consts.go
Original file line number Diff line number Diff line change
Expand Up @@ -16,10 +16,6 @@ limitations under the License.

package v1beta1

// Constants aren't automatically generated for unversioned packages.
// Instead share the same constant for all versioned packages
type MachineStatusError string

const (
// Represents that the combination of configuration in the MachineSpec
// is not supported by this cluster. This is not a transient error, but
Expand All @@ -28,20 +24,6 @@ const (
// Example: the ProviderSpec specifies an instance type that doesn't exist,
InvalidConfigurationMachineError MachineStatusError = "InvalidConfiguration"

// This indicates that the MachineSpec has been updated in a way that
// is not supported for reconciliation on this cluster. The spec may be
// completely valid from a configuration standpoint, but the controller
// does not support changing the real world state to match the new
// spec.
//
// Example: the responsible controller is not capable of changing the
// container runtime from docker to rkt.
UnsupportedChangeMachineError MachineStatusError = "UnsupportedChange"

// This generally refers to exceeding one's quota in a cloud provider,
// or running out of physical machines in an on-premise environment.
InsufficientResourcesMachineError MachineStatusError = "InsufficientResources"

// There was an error while trying to create a Node to match this
// Machine. This may indicate a transient problem that will be fixed
// automatically with time, such as a service outage, or a terminal
Expand All @@ -66,56 +48,17 @@ const (
// Example: cannot resolve EC2 IP address.
DeleteMachineError MachineStatusError = "DeleteError"

// This error indicates that the machine did not join the cluster
// as a new node within the expected timeframe after instance
// creation at the provider succeeded
//
// Example use case: A controller that deletes Machines which do
// not result in a Node joining the cluster within a given timeout
// and that are managed by a MachineSet
JoinClusterTimeoutMachineError = "JoinClusterTimeoutError"
)

type ClusterStatusError string

const (
// InvalidConfigurationClusterError indicates that the cluster
// configuration is invalid.
InvalidConfigurationClusterError ClusterStatusError = "InvalidConfiguration"

// UnsupportedChangeClusterError indicates that the cluster
// spec has been updated in an unsupported way. That cannot be
// reconciled.
UnsupportedChangeClusterError ClusterStatusError = "UnsupportedChange"

// CreateClusterError indicates that an error was encountered
// when trying to create the cluster.
CreateClusterError ClusterStatusError = "CreateError"

// UpdateClusterError indicates that an error was encountered
// when trying to update the cluster.
UpdateClusterError ClusterStatusError = "UpdateError"
Comment on lines -82 to -97
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Did you mean to remove these?

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Yep.
They weren't being used in MAO so I assumed it is a pretty safe bet to remove them.
Of course I could be wrong here - so looking for cloud team input.

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I think these changes should at least belong to a separate commit or a different PR. Agree - we don't use these values, we don't have and planning to have cluster resource in MAPI, yet it makes hard to approve such change.

// TemplateClonedFromGroupKindAnnotation is the infrastructure machine annotation that stores the group-kind of the infrastructure template resource
// that was cloned for the machine. This annotation is set only during cloning a template. Older/adopted machines will not have this annotation.
TemplateClonedFromGroupKindAnnotation = "machine.openshift.io/cloned-from-groupkind"

// DeleteClusterError indicates that an error was encountered
// when trying to delete the cluster.
DeleteClusterError ClusterStatusError = "DeleteError"
// TemplateClonedFromNameAnnotation is the infrastructure machine annotation that stores the name of the infrastructure template resource
// that was cloned for the machine. This annotation is set only during cloning a template. Older/adopted machines will not have this annotation.
TemplateClonedFromNameAnnotation = "machine.openshift.io/cloned-from-name"
)

type MachineSetStatusError string

const (
// Represents that the combination of configuration in the MachineTemplateSpec
// is not supported by this cluster. This is not a transient error, but
// indicates a state that must be fixed before progress can be made.
//
// Example: the ProviderSpec specifies an instance type that doesn't exist.
InvalidConfigurationMachineSetError MachineSetStatusError = "InvalidConfiguration"
)

type MachineDeploymentStrategyType string
// Constants aren't automatically generated for unversioned packages.
// Instead share the same constant for all versioned packages
type MachineStatusError string

const (
// Replace the old MachineSet by new one using rolling update
// i.e. gradually scale down the old MachineSet and scale up the new one.
RollingUpdateMachineDeploymentStrategyType MachineDeploymentStrategyType = "RollingUpdate"
)
type MachineSetStatusError string
9 changes: 9 additions & 0 deletions pkg/apis/machine/v1beta1/machinehealthcheck_types.go
Original file line number Diff line number Diff line change
Expand Up @@ -80,6 +80,15 @@ type MachineHealthCheckSpec struct {
// +kubebuilder:validation:Pattern="^([0-9]+(\\.[0-9]+)?(ns|us|µs|ms|s|m|h))+$"
// +kubebuilder:validation:Type:=string
NodeStartupTimeout metav1.Duration `json:"nodeStartupTimeout,omitempty"`

// RemediationTemplate is a reference to a remediation template

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Are we sure this does not require a whole new set of RBAC permissions on our side?

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

How will the controller be able to create new objects from the template? Who is going to be adding the appropriate RBAC? Is that going to be part of the default set of RBAC or will another component grant permission to the MHC service account in their own way?

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

There was no change in this logic from upstream.
In a nutshell the existence of a template means that the proper CRD exist so a CR (EMR) can be created.
If you want to read more, here is the upstream md file

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Just because a CRD exists, doesn't mean a user has permission to create resources 😉 Something will need to give the service account for the Machine Health Check controller permissions to be able to create these resources, it could be that we just add those to the defaults (which I think is perfectly acceptable), but to do this we need to know ahead of time all of the types that could possible be referred to here

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I understand.
I actually had to do it on my e2e test.
However I'm not sure if it is something we can know ahead - since I'm pretty sure that the CRDs are defined by the third party (external).
It might be something that needs to be updated after coordination with the external user.
/cc @n1r1 what do you think ?

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

What would be the downside to implement the suggestion I made?

IMO forcing all external remediations into a single API group will limit us severely. This means that only operators built by OpenShift and maintained by OpenShift would be able to be used, ie, no community remediation templates, if that's not an issue then sure, we can go down this route

If it's more, perhaps the operators responsible for those external remediation strategies should be responsible for adding a binding for the service account?

I think my preference would be this kind of approach, it would be consistent across both openshift and community remediation methods at least

From my point of view, external remediation is a generic infrastructure. It is not tied to a specific platform.
I prefer to have a complete decoupling between MHC and the external remediation controller.
There's a known api, and whoever wants can just implement it.

Yes I appreciate that and definitely want it to be generic, but that doesn't fit RBAC very well, so we need to come up with some solution of allowing the MHC permission for the groups that are appropriate.

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

What would be the downside to implement the suggestion I made?

IMO forcing all external remediations into a single API group will limit us severely. This means that only operators built by OpenShift and maintained by OpenShift would be able to be used, ie, no community remediation templates, if that's not an issue then sure, we can go down this route

Good point.
We could take an hybrid approach, where openshift remediators can be seamlessly added while community operators would need to do some extra step (either patch the SA or update the RBAC in this repo)

If it's more, perhaps the operators responsible for those external remediation strategies should be responsible for adding a binding for the service account?

I think my preference would be this kind of approach, it would be consistent across both openshift and community remediation methods at least

The service account could be different in different distributions, isn't it? for example, MHC SA could be ocp-mhc-sa and in CAPI based clusters it could be capi-mhc-sa, so the remediator will need to patch all "known" SAs.

Also, I'm not sure if someone will be able to remember that changing MHC service account breaks other external components.

Maybe remediators should expose some config value. it could be either the apiGroup to be used or the ServiceAccount to be patched.

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

a new idea came up to my mind - what about just having a non openshift api group which is specifically for external remediation?
for example external-remediation.io. We could propose that upstream as well, such that community remediators could work the same on OCP and upstream, by using that group.

WDYT?

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

just wanted to add another idea here. we could create the default build with a tightly focused api group for the external remediation template, but give the user clear instructions on how to override/expand that initial group. basically as a day 2 operation they could create their own special EMRs and load those in, but by default it would work "out of the box" with a reasonable setting.

We could take an hybrid approach, where openshift remediators can be seamlessly added while community operators would need to do some extra step (either patch the SA or update the RBAC in this repo)

+1, i think this is saying something similar

this situation is also similar to how we have modified the cluster-api provider in the cluster-autoscaler. it's a different problem there, but perhaps a similar solution. we compile the autoscaler to know about our resrouce groups, but it is possible for the autoscaler to be reconfigured by environment variable to use a different api group. this allows us to share code with upstream capi and also keep our openshift types for when we deploy.

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Thanks @elmiko

We discussed this issue with the CAPI community, and CAPI is already using RBAC aggregated label which allows external components to add their RBAC rules.

I believe this is an elegant way to solve this problem and it's always good to be aligned with upstream.

// provided by an infrastructure provider.
//
// This field is completely optional, when filled, the MachineHealthCheck controller
// creates a new object from the template referenced and hands off remediation of the machine to
// a controller that lives outside of Machine API Operator.
// +optional
RemediationTemplate *corev1.ObjectReference `json:"remediationTemplate,omitempty"`
}

// UnhealthyCondition represents a Node condition type and value with a timeout
Expand Down
6 changes: 3 additions & 3 deletions pkg/apis/machine/v1beta1/machineset_types_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -88,7 +88,7 @@ func TestDefaults(t *testing.T) {
func TestRoundTripMachineSet(t *testing.T) {
codecs := serializer.NewCodecFactory(scheme.Scheme)
seed := time.Now().UnixNano()
fuzzer := fuzzer.FuzzerFor(fuzzer.MergeFuzzerFuncs(metafuzzer.Funcs, machineFuzzerFuncs), rand.NewSource(seed), codecs)
machineFuzzer := fuzzer.FuzzerFor(fuzzer.MergeFuzzerFuncs(metafuzzer.Funcs, machineFuzzerFuncs), rand.NewSource(seed), codecs)
ctx := context.Background()
g := NewWithT(t)

Expand All @@ -103,8 +103,8 @@ func TestRoundTripMachineSet(t *testing.T) {
// losing data
spec := &MachineSetSpec{}
status := &MachineSetStatus{}
fuzzer.Fuzz(spec)
fuzzer.Fuzz(status)
machineFuzzer.Fuzz(spec)
machineFuzzer.Fuzz(status)

machineSet.Spec = *spec.DeepCopy()
g.Expect(c.Create(ctx, machineSet)).To(Succeed())
Expand Down
5 changes: 5 additions & 0 deletions pkg/apis/machine/v1beta1/zz_generated.deepcopy.go

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

Loading