-
Notifications
You must be signed in to change notification settings - Fork 143
feat: add node pool vs control plane version skew validations during nodepool creation #4566
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
base: main
Are you sure you want to change the base?
Changes from all commits
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
| Original file line number | Diff line number | Diff line change |
|---|---|---|
|
|
@@ -32,7 +32,8 @@ import ( | |
|
|
||
| // NodePoolAdmissionContext carries dependencies that node pool mutation/admission needs | ||
| // beyond the node pool object itself. It includes the parent cluster and optionally | ||
| // the service provider cluster and nodepool (for update-specific validations like version upgrades). | ||
| // the service provider cluster (for version skew validation) and | ||
| // service provider nodepool (for update-specific validations like version upgrades). | ||
| type NodePoolAdmissionContext struct { | ||
| Cluster *api.HCPOpenShiftCluster | ||
| ServiceProviderNodePool *api.ServiceProviderNodePool | ||
|
|
@@ -94,7 +95,8 @@ func AdmitNodePoolOnDelete(ctx context.Context, admissionContext *NodePoolDelete | |
| } | ||
|
|
||
| // AdmitNodePool performs non-static checks of nodepool. Checks that require more information than is contained inside of | ||
| // the nodepool instance itself. For update operations with version changes, include ServiceProviderNodePool and | ||
| // the nodepool instance itself. For create operations, include ServiceProviderCluster in the admissionContext to enable | ||
| // version skew validation. For update operations with version changes, include both ServiceProviderNodePool and | ||
|
Comment on lines
+98
to
+99
Collaborator
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. this comment makes it seem like the validation execution is based on these parameters not on the actual type of the operation. Instead, wdyt of updating it to something like this?
|
||
| // ServiceProviderCluster in the admissionContext to enable version upgrade validation. | ||
| func AdmitNodePool(ctx context.Context, admissionContext *NodePoolAdmissionContext, op operation.Operation, newNodePool, oldNodePool *api.HCPOpenShiftClusterNodePool) field.ErrorList { | ||
| errs := field.ErrorList{} | ||
|
|
@@ -128,9 +130,11 @@ func admitNodePoolVersion(ctx context.Context, admissionContext *NodePoolAdmissi | |
| )) | ||
| } | ||
|
|
||
| // Perform update-specific version upgrade validation | ||
| if op.Type == operation.Update { | ||
| errs = append(errs, validateNodePoolVersionChange(ctx, admissionContext, op, fldPath.Child("id"), newObj, oldObj)...) | ||
| switch op.Type { | ||
| case operation.Create: | ||
| errs = append(errs, validateNodePoolVersionOnCreate(ctx, admissionContext, op, fldPath.Child("id"), newObj)...) | ||
| case operation.Update: | ||
| errs = append(errs, validateNodePoolVersionOnUpdate(ctx, admissionContext, op, fldPath.Child("id"), newObj, oldObj)...) | ||
| } | ||
|
|
||
| return errs | ||
|
|
@@ -166,13 +170,38 @@ func admitNodePoolPlatform(ctx context.Context, admissionContext *NodePoolAdmiss | |
| return errs | ||
| } | ||
|
|
||
| // validateNodePoolVersionChange validates that a node pool version change is valid. | ||
| // validateNodePoolVersionOnCreate validates the requested node pool version at CREATE time. | ||
| // At CREATE time, only basic validations apply (format, minimum version). | ||
| func validateNodePoolVersionOnCreate(ctx context.Context, admissionContext *NodePoolAdmissionContext, op operation.Operation, fldPath *field.Path, newObj *api.NodePoolVersionProfile) field.ErrorList { | ||
| if len(newObj.ID) == 0 { | ||
| return nil | ||
| } | ||
|
|
||
| errs := field.ErrorList{} | ||
| newVersion, err := semver.Parse(newObj.ID) | ||
| if err != nil { | ||
| errs = append(errs, field.Invalid(fldPath, newObj.ID, fmt.Sprintf("invalid node pool version format: %s", err.Error()))) | ||
| return errs | ||
| } | ||
| spCluster := admissionContext.ServiceProviderCluster | ||
|
|
||
| lowestCPVersion, highestCPVersion := apihelpers.FindLowestAndHighestClusterVersion(spCluster.Status.ControlPlaneVersion.ActiveVersions) | ||
|
Collaborator
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. This can mean it could return |
||
|
|
||
| err = validation.ValidateNodePoolVersionSkew(newVersion, lowestCPVersion, highestCPVersion, op.HasOption(api.FeatureExperimentalReleaseFeatures)) | ||
| if err != nil { | ||
| errs = append(errs, field.Invalid(fldPath, newObj.ID, err.Error())) | ||
| } | ||
|
|
||
| return errs | ||
| } | ||
|
|
||
| // validateNodePoolVersionOnUpdate validates that a node pool version change is valid. | ||
| // It checks: | ||
| // - Upgrade: at most +2 minor versions from current, and cannot exceed lowest control plane version | ||
| // - Downgrade: at most -2 minor versions from the highest control plane version | ||
| // - Cross-major changes (either direction) require AFEC FeatureExperimentalReleaseFeatures | ||
| // - NP version must be in the allowed skew map when CP and NP are on different majors | ||
| func validateNodePoolVersionChange(ctx context.Context, admissionContext *NodePoolAdmissionContext, op operation.Operation, fldPath *field.Path, newObj, oldObj *api.NodePoolVersionProfile) field.ErrorList { | ||
| func validateNodePoolVersionOnUpdate(ctx context.Context, admissionContext *NodePoolAdmissionContext, op operation.Operation, fldPath *field.Path, newObj, oldObj *api.NodePoolVersionProfile) field.ErrorList { | ||
| spNodePool, spCluster := admissionContext.ServiceProviderNodePool, admissionContext.ServiceProviderCluster | ||
| // Skip validation if no version is specified or version didn't change | ||
| if len(newObj.ID) == 0 || newObj.ID == oldObj.ID { | ||
|
|
@@ -194,6 +223,7 @@ func validateNodePoolVersionChange(ctx context.Context, admissionContext *NodePo | |
| } | ||
|
|
||
| lowestCPVersion, highestCPVersion := apihelpers.FindLowestAndHighestClusterVersion(spCluster.Status.ControlPlaneVersion.ActiveVersions) | ||
|
|
||
| if err := validation.ValidateNodePoolVersionChange(newVersion, spNodePool.Status.NodePoolVersion.ActiveVersions, lowestCPVersion, highestCPVersion, op.HasOption(api.FeatureExperimentalReleaseFeatures)); err != nil { | ||
| errs = append(errs, field.Invalid(fldPath, newObj.ID, err.Error())) | ||
| } | ||
|
|
||
| Original file line number | Diff line number | Diff line change |
|---|---|---|
|
|
@@ -924,6 +924,39 @@ func ValidateNodePoolVersionChange(desiredVersion semver.Version, activeVersions | |
| } | ||
|
|
||
| lowest, highest := apihelpers.FindLowestAndHighestNodePoolVersion(activeVersions) | ||
| err := ValidateNodePoolVersionSkew(desiredVersion, lowestCPVersion, highestCPVersion, allowMajorUpgrade) | ||
| if err != nil { | ||
| return err | ||
| } | ||
|
|
||
| // Cross-major change (upgrade or downgrade): gated behind FeatureExperimentalReleaseFeatures. | ||
| isCrossMajorUpgrade := lowest != nil && desiredVersion.Major > lowest.Major | ||
| isCrossMajorDowngrade := highest != nil && desiredVersion.Major < highest.Major | ||
| if isCrossMajorUpgrade || isCrossMajorDowngrade { | ||
| if !allowMajorUpgrade { | ||
| return fmt.Errorf("major version changes are not supported") | ||
| } | ||
| if isCrossMajorUpgrade { | ||
| return ValidateMajorUpgrade(*lowest, desiredVersion) | ||
| } | ||
| return ValidateCrossMajorNodePoolSkew(desiredVersion, *highestCPVersion) | ||
| } | ||
|
|
||
| // Minor skip validation (N-2 skew policy) | ||
| if lowest != nil && desiredVersion.Minor > lowest.Minor+2 { | ||
| return fmt.Errorf( | ||
| "invalid upgrade path from %s to %s: skipping more than 2 minor versions is not allowed", | ||
| lowest.String(), desiredVersion.String(), | ||
| ) | ||
| } | ||
|
|
||
| return nil | ||
| } | ||
|
|
||
| func ValidateNodePoolVersionSkew(desiredVersion semver.Version, lowestCPVersion, highestCPVersion *semver.Version, allowMajorUpgrade bool) error { | ||
| if lowestCPVersion == nil || highestCPVersion == nil { | ||
|
Collaborator
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. This is not needed. It will be different from nil
Collaborator
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. this defensive check is needed to pass the tests. The error arises due to |
||
| return nil | ||
| } | ||
|
|
||
| if desiredVersion.GT(*lowestCPVersion) { | ||
| return fmt.Errorf( | ||
|
|
@@ -945,34 +978,11 @@ func ValidateNodePoolVersionChange(desiredVersion semver.Version, activeVersions | |
| // This applies to both upgrades (4.21→4.22) and downgrades (4.22→4.21) within | ||
| // the same major. NP changes that cross majors (e.g., 5.0→4.22) skip this and | ||
| // go through the AFEC gate below. | ||
| isSameMajorNPChange := highest == nil || desiredVersion.Major == highest.Major | ||
| if desiredVersion.Major != highestCPVersion.Major && isSameMajorNPChange { | ||
| if !allowMajorUpgrade { | ||
| return fmt.Errorf("major version changes are not supported") | ||
| } | ||
| return ValidateCrossMajorNodePoolSkew(desiredVersion, *highestCPVersion) | ||
| } | ||
|
|
||
| // Cross-major change (upgrade or downgrade): gated behind FeatureExperimentalReleaseFeatures. | ||
| isCrossMajorUpgrade := lowest != nil && desiredVersion.Major > lowest.Major | ||
| isCrossMajorDowngrade := highest != nil && desiredVersion.Major < highest.Major | ||
| if isCrossMajorUpgrade || isCrossMajorDowngrade { | ||
| if desiredVersion.Major != highestCPVersion.Major { | ||
| if !allowMajorUpgrade { | ||
| return fmt.Errorf("major version changes are not supported") | ||
| } | ||
|
Comment on lines
982
to
984
Collaborator
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. why is it checking if major upgrade is allowed here? what happens if this is called on create where cp is on 5.x and the np to be created is on 4.x?
Collaborator
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. This is a bit complex. Creating a cp with 5.x is only allowed under ExperimentalFeature flag. So the allowMajorUpgrade will be mostly true unless you unset that flag after the cluster creation. I think avoid to use it could be an option here, blocking the creation of a node pool if the subscription don't longer hold that AFEC. On the other hand we can allow creating node pool in that case we can omit the AFEC flag check. I don't have a strong opinion though.
Collaborator
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. i think the naming of What is the goal? Is it to block the following for subs without this afec flag:
If so, blocking the creation of a 4.x node pool for a 5.x cluster if the subscription no longer holds that afec doesn't make sense no? I would expect it to always be created since a sub without this afec flag should always be able to create a 4.x np, provided its within the valid skew, no matter the cp version. Maybe i'm missing something else?
Collaborator
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. After syncing with @ahitacat and @cgiradkar, this check is being done as we do not allow cross major np/cp creation, update or downgrade unless it is done with a sub that has that afec flag enabled. This is an existing behaviour and is the same for other offerings. Reached an agreement to change the name and update the comment above to clarify this requirement. |
||
| if isCrossMajorUpgrade { | ||
| return ValidateMajorUpgrade(*lowest, desiredVersion) | ||
| } | ||
| return ValidateCrossMajorNodePoolSkew(desiredVersion, *highestCPVersion) | ||
| } | ||
|
|
||
| // Minor skip validation (N-2 skew policy) | ||
| if lowest != nil && desiredVersion.Minor > lowest.Minor+2 { | ||
| return fmt.Errorf( | ||
| "invalid upgrade path from %s to %s: skipping more than 2 minor versions is not allowed", | ||
| lowest.String(), desiredVersion.String(), | ||
| ) | ||
| } | ||
|
|
||
| return nil | ||
| } | ||
| Original file line number | Diff line number | Diff line change |
|---|---|---|
| @@ -0,0 +1,3 @@ | ||
| { | ||
| "resourceID": "/subscriptions/6b690bec-0c16-4ecb-8f67-781caf40bba7" | ||
| } |
| Original file line number | Diff line number | Diff line change |
|---|---|---|
| @@ -0,0 +1,6 @@ | ||
| { | ||
| "properties": null, | ||
| "registrationDate": "2025-12-19T19:53:15+00:00", | ||
| "resourceId": "/subscriptions/6b690bec-0c16-4ecb-8f67-781caf40bba7", | ||
| "state": "Registered" | ||
| } |
| Original file line number | Diff line number | Diff line change |
|---|---|---|
| @@ -0,0 +1,3 @@ | ||
| { | ||
| "resourceID": "/subscriptions/6b690bec-0c16-4ecb-8f67-781caf40bba7/resourceGroups/resourceGroupName/providers/Microsoft.RedHatOpenShift/hcpOpenShiftClusters/cluster-name" | ||
| } |
| Original file line number | Diff line number | Diff line change |
|---|---|---|
| @@ -0,0 +1,57 @@ | ||
| { | ||
| "identity": { | ||
| "type": "UserAssigned", | ||
| "userAssignedIdentities": { | ||
| "/subscriptions/6b690bec-0c16-4ecb-8f67-781caf40bba7/resourceGroups/different-resource-group/providers/Microsoft.ManagedIdentity/userAssignedIdentities/service-managed-identity": {} | ||
| } | ||
| }, | ||
| "name": "cluster-name", | ||
| "properties": { | ||
| "api": { | ||
| "visibility": "Public" | ||
| }, | ||
| "clusterImageRegistry": { | ||
| "state": "Disabled" | ||
| }, | ||
| "console": {}, | ||
| "dns": {}, | ||
| "etcd": { | ||
| "dataEncryption": { | ||
| "customerManaged": { | ||
| "encryptionType": "KMS", | ||
| "kms": { | ||
| "activeKey": { | ||
| "name": "encryptionKeyName", | ||
| "vaultName": "keyVaultName", | ||
| "version": "2024-12-01-preview" | ||
| } | ||
| } | ||
| }, | ||
| "keyManagementMode": "CustomerManaged" | ||
| } | ||
| }, | ||
| "network": { | ||
| "hostPrefix": 23, | ||
| "machineCidr": "10.0.0.0/16", | ||
| "networkType": "OVNKubernetes", | ||
| "podCidr": "10.128.0.0/14", | ||
| "serviceCidr": "172.30.0.0/16" | ||
| }, | ||
| "platform": { | ||
| "managedResourceGroup": "managed-resource-group-name", | ||
| "networkSecurityGroupId": "/subscriptions/6b690bec-0c16-4ecb-8f67-781caf40bba7/resourceGroups/some-resource-group/providers/Microsoft.Network/networkSecurityGroups/nsg", | ||
| "operatorsAuthentication": { | ||
| "userAssignedIdentities": { | ||
| "serviceManagedIdentity": "/subscriptions/6b690bec-0c16-4ecb-8f67-781caf40bba7/resourceGroups/different-resource-group/providers/Microsoft.ManagedIdentity/userAssignedIdentities/service-managed-identity" | ||
| } | ||
| }, | ||
| "outboundType": "LoadBalancer", | ||
| "subnetId": "/subscriptions/6b690bec-0c16-4ecb-8f67-781caf40bba7/resourceGroups/different-resource-group/providers/Microsoft.Network/virtualNetworks/vnet/subnets/subnet" | ||
| }, | ||
| "version": { | ||
| "channelGroup": "stable", | ||
| "id": "4.22" | ||
| } | ||
| }, | ||
| "type": "Microsoft.RedHatOpenShift/hcpOpenShiftClusters" | ||
| } |
| Original file line number | Diff line number | Diff line change |
|---|---|---|
| @@ -0,0 +1,3 @@ | ||
| { | ||
| "resourceID": "/subscriptions/6b690bec-0c16-4ecb-8f67-781caf40bba7/resourceGroups/resourceGroupName/providers/Microsoft.RedHatOpenShift/hcpOpenShiftClusters/cluster-name" | ||
| } |
| Original file line number | Diff line number | Diff line change |
|---|---|---|
| @@ -0,0 +1,23 @@ | ||
| { | ||
| "id": "|subscriptions|6b690bec-0c16-4ecb-8f67-781caf40bba7|resourcegroups|resourcegroupname|providers|microsoft.redhatopenshift|hcpopenshiftclusters|cluster-name|serviceproviderclusters|default", | ||
| "partitionKey": "6b690bec-0c16-4ecb-8f67-781caf40bba7", | ||
| "properties": { | ||
| "resourceId": "/subscriptions/6b690bec-0c16-4ecb-8f67-781caf40bba7/resourceGroups/resourceGroupName/providers/Microsoft.RedHatOpenShift/hcpOpenShiftClusters/cluster-name/serviceProviderClusters/default", | ||
| "spec": { | ||
| "control_plane_version": { | ||
| "desired_version": "4.22.0" | ||
| } | ||
| }, | ||
| "status": { | ||
| "control_plane_version": { | ||
| "active_versions": [ | ||
| { | ||
| "version": "4.22.0" | ||
| } | ||
| ] | ||
| } | ||
| } | ||
| }, | ||
| "resourceID": "/subscriptions/6b690bec-0c16-4ecb-8f67-781caf40bba7/resourceGroups/resourceGroupName/providers/Microsoft.RedHatOpenShift/hcpOpenShiftClusters/cluster-name/serviceProviderClusters/default", | ||
| "resourceType": "microsoft.redhatopenshift/hcpopenshiftclusters/serviceproviderclusters" | ||
| } |
| Original file line number | Diff line number | Diff line change |
|---|---|---|
| @@ -0,0 +1,3 @@ | ||
| { | ||
| "resourceID": "/subscriptions/6b690bec-0c16-4ecb-8f67-781caf40bba7/resourceGroups/resourceGroupName/providers/Microsoft.RedHatOpenShift/hcpOpenShiftClusters/cluster-name/nodePools/test-np" | ||
| } |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Based on the comment above, should spCluster always be provided no matter the operation type?