Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
22 changes: 13 additions & 9 deletions frontend/pkg/frontend/node_pool.go
Original file line number Diff line number Diff line change
Expand Up @@ -227,19 +227,18 @@ func decodeDesiredNodePoolCreate(ctx context.Context, azureLocation string) (*ap
// The spCluster and spNodePool parameters provide service provider state needed for
// admission checks that depend on runtime state (e.g., version upgrade validation).
// For UPDATE operations, these parameters are required and the function will fail if they're nil.
// For CREATE operations, these can be nil since no prior state exists.
// For CREATE operations, spNodePool should be nil (no prior state exists), and spCluster is to be provided
func (f *Frontend) newNodePoolAdmissionContext(ctx context.Context, op operation.Operation, cluster *api.HCPOpenShiftCluster, spCluster *api.ServiceProviderCluster, spNodePool *api.ServiceProviderNodePool) (*admission.NodePoolAdmissionContext, error) {
if cluster == nil {
return nil, fmt.Errorf("cluster is required for admission context")
}

if op.Type == operation.Update {
if spCluster == nil {
return nil, fmt.Errorf("serviceProviderCluster is required for UPDATE operations")
}
if spNodePool == nil {
return nil, fmt.Errorf("serviceProviderNodePool is required for UPDATE operations")
}
if spCluster == nil && op.Type == operation.Update {
return nil, fmt.Errorf("serviceProviderCluster is required for %v operations", op.Type)
}
Comment on lines +236 to +238

Copy link
Copy Markdown
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Based on the comment above, should spCluster always be provided no matter the operation type?


if spNodePool == nil && op.Type == operation.Update {
return nil, fmt.Errorf("serviceProviderNodePool is required for %v operations", op.Type)
}

return &admission.NodePoolAdmissionContext{
Expand Down Expand Up @@ -286,11 +285,16 @@ func (f *Frontend) createNodePool(writer http.ResponseWriter, request *http.Requ
return utils.TrackError(fmt.Errorf("cluster %s has no ClusterServiceID", cluster.ID))
}

serviceProviderCluster, err := database.GetOrCreateServiceProviderCluster(ctx, f.resourcesDBClient, resourceID.Parent)
if err != nil {
return utils.TrackError(err)
}

restOperation := operation.Operation{
Type: operation.Create,
Options: validation.AFECsToValidationOptions(subscription.GetRegisteredFeatures()),
}
admissionContext, err := f.newNodePoolAdmissionContext(ctx, restOperation, cluster, nil, nil)
admissionContext, err := f.newNodePoolAdmissionContext(ctx, restOperation, cluster, serviceProviderCluster, nil)
if err != nil {
return utils.TrackError(err)
}
Comment on lines +288 to 300
Expand Down
44 changes: 37 additions & 7 deletions internal/admission/admit_nodepool.go
Original file line number Diff line number Diff line change
Expand Up @@ -32,7 +32,8 @@ import (

// NodePoolAdmissionContext carries dependencies that node pool mutation/admission needs
// beyond the node pool object itself. It includes the parent cluster and optionally
// the service provider cluster and nodepool (for update-specific validations like version upgrades).
// the service provider cluster (for version skew validation) and
// service provider nodepool (for update-specific validations like version upgrades).
type NodePoolAdmissionContext struct {
Cluster *api.HCPOpenShiftCluster
ServiceProviderNodePool *api.ServiceProviderNodePool
Expand Down Expand Up @@ -94,7 +95,8 @@ func AdmitNodePoolOnDelete(ctx context.Context, admissionContext *NodePoolDelete
}

// AdmitNodePool performs non-static checks of nodepool. Checks that require more information than is contained inside of
// the nodepool instance itself. For update operations with version changes, include ServiceProviderNodePool and
// the nodepool instance itself. For create operations, include ServiceProviderCluster in the admissionContext to enable
// version skew validation. For update operations with version changes, include both ServiceProviderNodePool and
Comment on lines +98 to +99

Copy link
Copy Markdown
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

this comment makes it seem like the validation execution is based on these parameters not on the actual type of the operation. Instead, wdyt of updating it to something like this?

For update operations, ServiceProviderNodePool must be specified in the admissionContext
  • Make the doc more about the parameters required rather than validation enablement.
  • The ServiceProviderCluster must always be specified on both scenarios so no need to call it out here.

// ServiceProviderCluster in the admissionContext to enable version upgrade validation.
func AdmitNodePool(ctx context.Context, admissionContext *NodePoolAdmissionContext, op operation.Operation, newNodePool, oldNodePool *api.HCPOpenShiftClusterNodePool) field.ErrorList {
errs := field.ErrorList{}
Expand Down Expand Up @@ -128,9 +130,11 @@ func admitNodePoolVersion(ctx context.Context, admissionContext *NodePoolAdmissi
))
}

// Perform update-specific version upgrade validation
if op.Type == operation.Update {
errs = append(errs, validateNodePoolVersionChange(ctx, admissionContext, op, fldPath.Child("id"), newObj, oldObj)...)
switch op.Type {
case operation.Create:
errs = append(errs, validateNodePoolVersionOnCreate(ctx, admissionContext, op, fldPath.Child("id"), newObj)...)
case operation.Update:
errs = append(errs, validateNodePoolVersionOnUpdate(ctx, admissionContext, op, fldPath.Child("id"), newObj, oldObj)...)
}

return errs
Expand Down Expand Up @@ -166,13 +170,38 @@ func admitNodePoolPlatform(ctx context.Context, admissionContext *NodePoolAdmiss
return errs
}

// validateNodePoolVersionChange validates that a node pool version change is valid.
// validateNodePoolVersionOnCreate validates the requested node pool version at CREATE time.
// At CREATE time, only basic validations apply (format, minimum version).
func validateNodePoolVersionOnCreate(ctx context.Context, admissionContext *NodePoolAdmissionContext, op operation.Operation, fldPath *field.Path, newObj *api.NodePoolVersionProfile) field.ErrorList {
if len(newObj.ID) == 0 {
return nil
}

errs := field.ErrorList{}
newVersion, err := semver.Parse(newObj.ID)
if err != nil {
errs = append(errs, field.Invalid(fldPath, newObj.ID, fmt.Sprintf("invalid node pool version format: %s", err.Error())))
return errs
}
spCluster := admissionContext.ServiceProviderCluster

lowestCPVersion, highestCPVersion := apihelpers.FindLowestAndHighestClusterVersion(spCluster.Status.ControlPlaneVersion.ActiveVersions)

Copy link
Copy Markdown
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

This can mean it could return nil, nil. Are we aware of this chance, and what would be the resulting behavior in that case?


err = validation.ValidateNodePoolVersionSkew(newVersion, lowestCPVersion, highestCPVersion, op.HasOption(api.FeatureExperimentalReleaseFeatures))
if err != nil {
errs = append(errs, field.Invalid(fldPath, newObj.ID, err.Error()))
}

return errs
}

// validateNodePoolVersionOnUpdate validates that a node pool version change is valid.
// It checks:
// - Upgrade: at most +2 minor versions from current, and cannot exceed lowest control plane version
// - Downgrade: at most -2 minor versions from the highest control plane version
// - Cross-major changes (either direction) require AFEC FeatureExperimentalReleaseFeatures
// - NP version must be in the allowed skew map when CP and NP are on different majors
func validateNodePoolVersionChange(ctx context.Context, admissionContext *NodePoolAdmissionContext, op operation.Operation, fldPath *field.Path, newObj, oldObj *api.NodePoolVersionProfile) field.ErrorList {
func validateNodePoolVersionOnUpdate(ctx context.Context, admissionContext *NodePoolAdmissionContext, op operation.Operation, fldPath *field.Path, newObj, oldObj *api.NodePoolVersionProfile) field.ErrorList {
spNodePool, spCluster := admissionContext.ServiceProviderNodePool, admissionContext.ServiceProviderCluster
// Skip validation if no version is specified or version didn't change
if len(newObj.ID) == 0 || newObj.ID == oldObj.ID {
Expand All @@ -194,6 +223,7 @@ func validateNodePoolVersionChange(ctx context.Context, admissionContext *NodePo
}

lowestCPVersion, highestCPVersion := apihelpers.FindLowestAndHighestClusterVersion(spCluster.Status.ControlPlaneVersion.ActiveVersions)

if err := validation.ValidateNodePoolVersionChange(newVersion, spNodePool.Status.NodePoolVersion.ActiveVersions, lowestCPVersion, highestCPVersion, op.HasOption(api.FeatureExperimentalReleaseFeatures)); err != nil {
errs = append(errs, field.Invalid(fldPath, newObj.ID, err.Error()))
}
Expand Down
48 changes: 26 additions & 22 deletions internal/admission/admit_nodepool_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -638,10 +638,17 @@ func TestAdmitNodePool_VersionValidation(t *testing.T) {
},
},
}

// Use cluster version from test case's clusterVersions if cross-major upgrade
clusterVersion := "4.18"
if tt.allowMajorUpgrades && len(tt.clusterVersions) > 0 {
clusterVersion = tt.clusterVersions[0]
}

cluster := &api.HCPOpenShiftCluster{
CustomerProperties: api.HCPOpenShiftClusterCustomerProperties{
Version: api.VersionProfile{
ID: "4.18",
ID: clusterVersion,
ChannelGroup: "stable",
},
},
Expand Down Expand Up @@ -685,19 +692,7 @@ func TestAdmitNodePool_VersionValidation(t *testing.T) {
},
}

// Build ServiceProviderCluster with active versions
var clusterActiveVersions []api.HCPClusterActiveVersion
for _, v := range tt.clusterVersions {
ver := semver.MustParse(v)
clusterActiveVersions = append(clusterActiveVersions, api.HCPClusterActiveVersion{Version: &ver})
}
spCluster := &api.ServiceProviderCluster{
Status: api.ServiceProviderClusterStatus{
ControlPlaneVersion: api.ServiceProviderClusterStatusVersion{
ActiveVersions: clusterActiveVersions,
},
},
}
spCluster := serviceProviderClusterWithVersions(t, tt.clusterVersions)

errs := AdmitNodePool(context.Background(), &NodePoolAdmissionContext{
Cluster: cluster,
Expand Down Expand Up @@ -751,14 +746,7 @@ func TestAdmitNodePool_IncludesChannelGroupCheck(t *testing.T) {
},
}

clusterVer := semver.MustParse("4.18.0")
spCluster := &api.ServiceProviderCluster{
Status: api.ServiceProviderClusterStatus{
ControlPlaneVersion: api.ServiceProviderClusterStatusVersion{
ActiveVersions: []api.HCPClusterActiveVersion{{Version: &clusterVer}},
},
},
}
spCluster := serviceProviderClusterWithVersions(t, []string{"4.18.0"})

// Empty update operation (no experimental features)
op := operation.Operation{Type: operation.Update}
Expand Down Expand Up @@ -843,3 +831,19 @@ func TestAdmitNodePoolOnDelete(t *testing.T) {
})
}
}

func serviceProviderClusterWithVersions(t *testing.T, versions []string) *api.ServiceProviderCluster {
t.Helper()
var active []api.HCPClusterActiveVersion
for _, s := range versions {
v := semver.MustParse(s)
active = append(active, api.HCPClusterActiveVersion{Version: &v})
}
return &api.ServiceProviderCluster{
Status: api.ServiceProviderClusterStatus{
ControlPlaneVersion: api.ServiceProviderClusterStatusVersion{
ActiveVersions: active,
},
},
}
}
58 changes: 34 additions & 24 deletions internal/validation/validators.go
Original file line number Diff line number Diff line change
Expand Up @@ -924,6 +924,39 @@ func ValidateNodePoolVersionChange(desiredVersion semver.Version, activeVersions
}

lowest, highest := apihelpers.FindLowestAndHighestNodePoolVersion(activeVersions)
err := ValidateNodePoolVersionSkew(desiredVersion, lowestCPVersion, highestCPVersion, allowMajorUpgrade)
if err != nil {
return err
}

// Cross-major change (upgrade or downgrade): gated behind FeatureExperimentalReleaseFeatures.
isCrossMajorUpgrade := lowest != nil && desiredVersion.Major > lowest.Major
isCrossMajorDowngrade := highest != nil && desiredVersion.Major < highest.Major
if isCrossMajorUpgrade || isCrossMajorDowngrade {
if !allowMajorUpgrade {
return fmt.Errorf("major version changes are not supported")
}
if isCrossMajorUpgrade {
return ValidateMajorUpgrade(*lowest, desiredVersion)
}
return ValidateCrossMajorNodePoolSkew(desiredVersion, *highestCPVersion)
}

// Minor skip validation (N-2 skew policy)
if lowest != nil && desiredVersion.Minor > lowest.Minor+2 {
return fmt.Errorf(
"invalid upgrade path from %s to %s: skipping more than 2 minor versions is not allowed",
lowest.String(), desiredVersion.String(),
)
}

return nil
}

func ValidateNodePoolVersionSkew(desiredVersion semver.Version, lowestCPVersion, highestCPVersion *semver.Version, allowMajorUpgrade bool) error {
if lowestCPVersion == nil || highestCPVersion == nil {

Copy link
Copy Markdown
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

This is not needed. It will be different from nil

Copy link
Copy Markdown
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

this defensive check is needed to pass the tests. The error arises due to activeVersions being empty

return nil
}

if desiredVersion.GT(*lowestCPVersion) {
return fmt.Errorf(
Expand All @@ -945,34 +978,11 @@ func ValidateNodePoolVersionChange(desiredVersion semver.Version, activeVersions
// This applies to both upgrades (4.21→4.22) and downgrades (4.22→4.21) within
// the same major. NP changes that cross majors (e.g., 5.0→4.22) skip this and
// go through the AFEC gate below.
isSameMajorNPChange := highest == nil || desiredVersion.Major == highest.Major
if desiredVersion.Major != highestCPVersion.Major && isSameMajorNPChange {
if !allowMajorUpgrade {
return fmt.Errorf("major version changes are not supported")
}
return ValidateCrossMajorNodePoolSkew(desiredVersion, *highestCPVersion)
}

// Cross-major change (upgrade or downgrade): gated behind FeatureExperimentalReleaseFeatures.
isCrossMajorUpgrade := lowest != nil && desiredVersion.Major > lowest.Major
isCrossMajorDowngrade := highest != nil && desiredVersion.Major < highest.Major
if isCrossMajorUpgrade || isCrossMajorDowngrade {
if desiredVersion.Major != highestCPVersion.Major {
if !allowMajorUpgrade {
return fmt.Errorf("major version changes are not supported")
}
Comment on lines 982 to 984

Copy link
Copy Markdown
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

why is it checking if major upgrade is allowed here? what happens if this is called on create where cp is on 5.x and the np to be created is on 4.x?

Copy link
Copy Markdown
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

This is a bit complex. Creating a cp with 5.x is only allowed under ExperimentalFeature flag. So the allowMajorUpgrade will be mostly true unless you unset that flag after the cluster creation.
What should we do in that case?

I think avoid to use it could be an option here, blocking the creation of a node pool if the subscription don't longer hold that AFEC.

On the other hand we can allow creating node pool in that case we can omit the AFEC flag check.

I don't have a strong opinion though.

Copy link
Copy Markdown
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

i think the naming of allowMajorUpgrade confused me here since this is called on both create and update. Outside the context of update, it doesn't make sense.

What is the goal? Is it to block the following for subs without this afec flag:

  • creation of 5.x np?
  • upgrade from 4.x to 5.x?
  • downgrade from 5.x to 4.x?

If so, blocking the creation of a 4.x node pool for a 5.x cluster if the subscription no longer holds that afec doesn't make sense no? I would expect it to always be created since a sub without this afec flag should always be able to create a 4.x np, provided its within the valid skew, no matter the cp version. Maybe i'm missing something else?

Copy link
Copy Markdown
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

After syncing with @ahitacat and @cgiradkar, this check is being done as we do not allow cross major np/cp creation, update or downgrade unless it is done with a sub that has that afec flag enabled. This is an existing behaviour and is the same for other offerings. Reached an agreement to change the name and update the comment above to clarify this requirement.

if isCrossMajorUpgrade {
return ValidateMajorUpgrade(*lowest, desiredVersion)
}
return ValidateCrossMajorNodePoolSkew(desiredVersion, *highestCPVersion)
}

// Minor skip validation (N-2 skew policy)
if lowest != nil && desiredVersion.Minor > lowest.Minor+2 {
return fmt.Errorf(
"invalid upgrade path from %s to %s: skipping more than 2 minor versions is not allowed",
lowest.String(), desiredVersion.String(),
)
}

return nil
}
Original file line number Diff line number Diff line change
Expand Up @@ -25,7 +25,7 @@
],
"version": {
"channelGroup": "stable",
"id": "4.20.8"
"id": "4.22.8"
}
},
"type": "Microsoft.RedHatOpenShift/hcpOpenShiftClusters/nodePools"
Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1,3 @@
{
"resourceID": "/subscriptions/6b690bec-0c16-4ecb-8f67-781caf40bba7"
}
Original file line number Diff line number Diff line change
@@ -0,0 +1,6 @@
{
"properties": null,
"registrationDate": "2025-12-19T19:53:15+00:00",
"resourceId": "/subscriptions/6b690bec-0c16-4ecb-8f67-781caf40bba7",
"state": "Registered"
}
Original file line number Diff line number Diff line change
@@ -0,0 +1,3 @@
{
"resourceID": "/subscriptions/6b690bec-0c16-4ecb-8f67-781caf40bba7/resourceGroups/resourceGroupName/providers/Microsoft.RedHatOpenShift/hcpOpenShiftClusters/cluster-name"
}
Original file line number Diff line number Diff line change
@@ -0,0 +1,57 @@
{
"identity": {
"type": "UserAssigned",
"userAssignedIdentities": {
"/subscriptions/6b690bec-0c16-4ecb-8f67-781caf40bba7/resourceGroups/different-resource-group/providers/Microsoft.ManagedIdentity/userAssignedIdentities/service-managed-identity": {}
}
},
"name": "cluster-name",
"properties": {
"api": {
"visibility": "Public"
},
"clusterImageRegistry": {
"state": "Disabled"
},
"console": {},
"dns": {},
"etcd": {
"dataEncryption": {
"customerManaged": {
"encryptionType": "KMS",
"kms": {
"activeKey": {
"name": "encryptionKeyName",
"vaultName": "keyVaultName",
"version": "2024-12-01-preview"
}
}
},
"keyManagementMode": "CustomerManaged"
}
},
"network": {
"hostPrefix": 23,
"machineCidr": "10.0.0.0/16",
"networkType": "OVNKubernetes",
"podCidr": "10.128.0.0/14",
"serviceCidr": "172.30.0.0/16"
},
"platform": {
"managedResourceGroup": "managed-resource-group-name",
"networkSecurityGroupId": "/subscriptions/6b690bec-0c16-4ecb-8f67-781caf40bba7/resourceGroups/some-resource-group/providers/Microsoft.Network/networkSecurityGroups/nsg",
"operatorsAuthentication": {
"userAssignedIdentities": {
"serviceManagedIdentity": "/subscriptions/6b690bec-0c16-4ecb-8f67-781caf40bba7/resourceGroups/different-resource-group/providers/Microsoft.ManagedIdentity/userAssignedIdentities/service-managed-identity"
}
},
"outboundType": "LoadBalancer",
"subnetId": "/subscriptions/6b690bec-0c16-4ecb-8f67-781caf40bba7/resourceGroups/different-resource-group/providers/Microsoft.Network/virtualNetworks/vnet/subnets/subnet"
},
"version": {
"channelGroup": "stable",
"id": "4.22"
}
},
"type": "Microsoft.RedHatOpenShift/hcpOpenShiftClusters"
}
Original file line number Diff line number Diff line change
@@ -0,0 +1,3 @@
{
"resourceID": "/subscriptions/6b690bec-0c16-4ecb-8f67-781caf40bba7/resourceGroups/resourceGroupName/providers/Microsoft.RedHatOpenShift/hcpOpenShiftClusters/cluster-name"
}
Original file line number Diff line number Diff line change
@@ -0,0 +1,23 @@
{
"id": "|subscriptions|6b690bec-0c16-4ecb-8f67-781caf40bba7|resourcegroups|resourcegroupname|providers|microsoft.redhatopenshift|hcpopenshiftclusters|cluster-name|serviceproviderclusters|default",
"partitionKey": "6b690bec-0c16-4ecb-8f67-781caf40bba7",
"properties": {
"resourceId": "/subscriptions/6b690bec-0c16-4ecb-8f67-781caf40bba7/resourceGroups/resourceGroupName/providers/Microsoft.RedHatOpenShift/hcpOpenShiftClusters/cluster-name/serviceProviderClusters/default",
"spec": {
"control_plane_version": {
"desired_version": "4.22.0"
}
},
"status": {
"control_plane_version": {
"active_versions": [
{
"version": "4.22.0"
}
]
}
}
},
"resourceID": "/subscriptions/6b690bec-0c16-4ecb-8f67-781caf40bba7/resourceGroups/resourceGroupName/providers/Microsoft.RedHatOpenShift/hcpOpenShiftClusters/cluster-name/serviceProviderClusters/default",
"resourceType": "microsoft.redhatopenshift/hcpopenshiftclusters/serviceproviderclusters"
}
Original file line number Diff line number Diff line change
@@ -0,0 +1,3 @@
{
"resourceID": "/subscriptions/6b690bec-0c16-4ecb-8f67-781caf40bba7/resourceGroups/resourceGroupName/providers/Microsoft.RedHatOpenShift/hcpOpenShiftClusters/cluster-name/nodePools/test-np"
}
Loading