diff --git a/cluster-autoscaler/simulator/dynamicresources/utils/resource_claims.go b/cluster-autoscaler/simulator/dynamicresources/utils/resource_claims.go index f9b7b6b87e7f..474eb9032bfc 100644 --- a/cluster-autoscaler/simulator/dynamicresources/utils/resource_claims.go +++ b/cluster-autoscaler/simulator/dynamicresources/utils/resource_claims.go @@ -24,6 +24,7 @@ import ( metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" "k8s.io/component-helpers/scheduling/corev1" "k8s.io/dynamic-resource-allocation/resourceclaim" + "k8s.io/utils/ptr" ) // ClaimAllocated returns whether the provided claim is allocated. @@ -114,3 +115,32 @@ func PodClaimOwnerReference(pod *apiv1.Pod) metav1.OwnerReference { func claimConsumerReferenceMatchesPod(pod *apiv1.Pod, ref resourceapi.ResourceClaimConsumerReference) bool { return ref.APIGroup == "" && ref.Resource == "pods" && ref.Name == pod.Name && ref.UID == pod.UID } + +// ClaimWithoutAdminAccessRequests returns a copy of the claim without admin access requests, and their results. +func ClaimWithoutAdminAccessRequests(claim *resourceapi.ResourceClaim) *resourceapi.ResourceClaim { + claimCopy := claim.DeepCopy() + if claimCopy.Status.Allocation == nil { + return claimCopy + } + deviceRequestAllocationResults := make([]resourceapi.DeviceRequestAllocationResult, 0) + for _, deviceRequestAllocationResult := range claimCopy.Status.Allocation.Devices.Results { + // Device requests with AdminAccess don't reserve their allocated resources, and are ignored when scheuling. + devReq := getDeviceResultRequest(claim, &deviceRequestAllocationResult) + if ptr.Deref(devReq.Exactly.AdminAccess, false) { + continue + } + deviceRequestAllocationResults = append(deviceRequestAllocationResults, deviceRequestAllocationResult) + } + claimCopy.Status.Allocation.Devices.Results = deviceRequestAllocationResults + return claimCopy +} + +// getDeviceResultRequest returns the DeviceRequest for the provided DeviceRequestAllocationResult in the provided ResourceClaim. If no result is found, nil is returned. +func getDeviceResultRequest(claim *resourceapi.ResourceClaim, deviceRequestAllocationResult *resourceapi.DeviceRequestAllocationResult) *resourceapi.DeviceRequest { + for _, deviceRequest := range claim.Spec.Devices.Requests { + if deviceRequest.Name == deviceRequestAllocationResult.Request { + return &deviceRequest + } + } + return nil +} diff --git a/cluster-autoscaler/simulator/dynamicresources/utils/sanitize.go b/cluster-autoscaler/simulator/dynamicresources/utils/sanitize.go index de1ced990245..6c157d980c56 100644 --- a/cluster-autoscaler/simulator/dynamicresources/utils/sanitize.go +++ b/cluster-autoscaler/simulator/dynamicresources/utils/sanitize.go @@ -24,6 +24,7 @@ import ( metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" "k8s.io/apimachinery/pkg/util/uuid" "k8s.io/dynamic-resource-allocation/resourceclaim" + "k8s.io/utils/ptr" "k8s.io/utils/set" ) @@ -89,6 +90,11 @@ func SanitizedPodResourceClaims(newOwner, oldOwner *v1.Pod, claims []*resourceap var sanitizedAllocations []resourceapi.DeviceRequestAllocationResult for _, devAlloc := range claim.Status.Allocation.Devices.Results { + if ptr.Deref(devAlloc.AdminAccess, false) { + // Device requests with AdminAccess don't reserve their allocated resources, so we can safely ignore them when sanitizing. + sanitizedAllocations = append(sanitizedAllocations, devAlloc) + continue + } // It's possible to have both node-local and global allocations in a single resource claim. Make sure that all allocations were node-local on the old node. if !oldNodePoolNames.Has(devAlloc.Pool) { return nil, fmt.Errorf("claim %s/%s has an allocation %s, from a pool that isn't node-local on %s - can't be sanitized", claim.Namespace, claim.Name, devAlloc.Request, oldNodeName) diff --git a/cluster-autoscaler/simulator/dynamicresources/utils/sanitize_test.go b/cluster-autoscaler/simulator/dynamicresources/utils/sanitize_test.go index cb14e5d3fb64..cf4d442dca29 100644 --- a/cluster-autoscaler/simulator/dynamicresources/utils/sanitize_test.go +++ b/cluster-autoscaler/simulator/dynamicresources/utils/sanitize_test.go @@ -26,6 +26,7 @@ import ( resourceapi "k8s.io/api/resource/v1" metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" "k8s.io/autoscaler/cluster-autoscaler/utils/test" + "k8s.io/utils/ptr" "k8s.io/utils/set" ) @@ -322,6 +323,34 @@ func TestSanitizedPodResourceClaims(t *testing.T) { }, }, } + singleNodeAllocationWithAdminAccessRequest := &resourceapi.AllocationResult{ + NodeSelector: &apiv1.NodeSelector{NodeSelectorTerms: []apiv1.NodeSelectorTerm{{ + MatchFields: []apiv1.NodeSelectorRequirement{ + {Key: "metadata.name", Operator: apiv1.NodeSelectorOpIn, Values: []string{"testNode"}}, + }}, + }}, + Devices: resourceapi.DeviceAllocationResult{ + Results: []resourceapi.DeviceRequestAllocationResult{ + {Request: "request1", Driver: "driver.foo.com", Pool: "testNodePool1", Device: "device1"}, + {Request: "request2", Driver: "driver.foo.com", Pool: "testNodePool1", Device: "device2", AdminAccess: ptr.To(false)}, + {Request: "request3", Driver: "driver.foo.com", Pool: "testNodePool2", Device: "device1", AdminAccess: ptr.To(true)}, + }, + }, + } + singleNodeAllocationWithAdminAccessRequestSanitized := &resourceapi.AllocationResult{ + NodeSelector: &apiv1.NodeSelector{NodeSelectorTerms: []apiv1.NodeSelectorTerm{{ + MatchFields: []apiv1.NodeSelectorRequirement{ + {Key: "metadata.name", Operator: apiv1.NodeSelectorOpIn, Values: []string{"newNode"}}, + }}, + }}, + Devices: resourceapi.DeviceAllocationResult{ + Results: []resourceapi.DeviceRequestAllocationResult{ + {Request: "request1", Driver: "driver.foo.com", Pool: "testNodePool1-abc", Device: "device1"}, + {Request: "request2", Driver: "driver.foo.com", Pool: "testNodePool1-abc", Device: "device2", AdminAccess: ptr.To(false)}, + {Request: "request3", Driver: "driver.foo.com", Pool: "testNodePool2", Device: "device1", AdminAccess: ptr.To(true)}, + }, + }, + } for _, tc := range []struct { testName string @@ -416,6 +445,20 @@ func TestSanitizedPodResourceClaims(t *testing.T) { TestClaimWithPodReservations(TestClaimWithAllocation(newOwnerClaim2, singleNodeAllocationSanitized), newOwner), }, }, + { + testName: "allocated and reserved pod-owned claims are sanitized", + claims: []*resourceapi.ResourceClaim{ + TestClaimWithPodReservations(TestClaimWithAllocation(oldOwnerClaim1, singleNodeAllocationWithAdminAccessRequest), oldOwner), + TestClaimWithPodReservations(TestClaimWithAllocation(oldOwnerClaim2, singleNodeAllocationWithAdminAccessRequest), oldOwner), + }, + oldNodeName: "testNode", + newNodeName: "newNode", + oldNodePoolNames: set.New("testNodePool1", "testNodePool2"), + wantClaims: []*resourceapi.ResourceClaim{ + TestClaimWithPodReservations(TestClaimWithAllocation(newOwnerClaim1, singleNodeAllocationWithAdminAccessRequestSanitized), newOwner), + TestClaimWithPodReservations(TestClaimWithAllocation(newOwnerClaim2, singleNodeAllocationWithAdminAccessRequestSanitized), newOwner), + }, + }, } { t.Run(tc.testName, func(t *testing.T) { claims, err := SanitizedPodResourceClaims(newOwner, oldOwner, tc.claims, nameSuffix, tc.newNodeName, tc.oldNodeName, tc.oldNodePoolNames) diff --git a/cluster-autoscaler/simulator/dynamicresources/utils/utilization.go b/cluster-autoscaler/simulator/dynamicresources/utils/utilization.go index b8e2ebff463e..dfc5e01ed66c 100644 --- a/cluster-autoscaler/simulator/dynamicresources/utils/utilization.go +++ b/cluster-autoscaler/simulator/dynamicresources/utils/utilization.go @@ -22,6 +22,7 @@ import ( v1 "k8s.io/api/core/v1" resourceapi "k8s.io/api/resource/v1" "k8s.io/autoscaler/cluster-autoscaler/simulator/framework" + "k8s.io/utils/ptr" ) // CalculateDynamicResourceUtilization calculates a map of ResourceSlice pool utilization grouped by the driver and pool. Returns @@ -110,6 +111,11 @@ func groupAllocatedDevices(claims []*resourceapi.ResourceClaim) (map[string]map[ } for _, deviceAlloc := range alloc.Devices.Results { + if ptr.Deref(deviceAlloc.AdminAccess, false) { + // devices with admin access don't count for utilization + continue + } + if result[deviceAlloc.Driver] == nil { result[deviceAlloc.Driver] = map[string][]string{} } diff --git a/cluster-autoscaler/simulator/dynamicresources/utils/utilization_test.go b/cluster-autoscaler/simulator/dynamicresources/utils/utilization_test.go index d7e89b32d70e..8263e75f28ba 100644 --- a/cluster-autoscaler/simulator/dynamicresources/utils/utilization_test.go +++ b/cluster-autoscaler/simulator/dynamicresources/utils/utilization_test.go @@ -29,6 +29,7 @@ import ( "k8s.io/apimachinery/pkg/types" "k8s.io/autoscaler/cluster-autoscaler/simulator/framework" "k8s.io/autoscaler/cluster-autoscaler/utils/test" + "k8s.io/utils/ptr" ) func TestDynamicResourceUtilization(t *testing.T) { @@ -57,7 +58,7 @@ func TestDynamicResourceUtilization(t *testing.T) { testName: "single slice, single pool, 6/10 devices used", nodeInfo: framework.NewNodeInfo(node, testResourceSlices(fooDriver, "pool1", "node", 0, 10, 1), - testPodsWithClaims(fooDriver, "pool1", "node", 6, 1)..., + testPodsWithClaims(fooDriver, "pool1", "node", 6, 1, nil)..., ), wantUtilization: map[string]map[string]float64{ fooDriver: { @@ -76,9 +77,9 @@ func TestDynamicResourceUtilization(t *testing.T) { testResourceSlices(barDriver, "pool1", "node", 0, 8, 2), ), mergeLists( - testPodsWithClaims(fooDriver, "pool1", "node", 6, 2), - testPodsWithClaims(fooDriver, "pool2", "node", 18, 3), - testPodsWithClaims(barDriver, "pool1", "node", 2, 1), + testPodsWithClaims(fooDriver, "pool1", "node", 6, 2, nil), + testPodsWithClaims(fooDriver, "pool2", "node", 18, 3, nil), + testPodsWithClaims(barDriver, "pool1", "node", 2, 1, nil), )..., ), wantUtilization: map[string]map[string]float64{ @@ -101,7 +102,7 @@ func TestDynamicResourceUtilization(t *testing.T) { testResourceSlices(fooDriver, "pool1", "node", 1, 20, 2), testResourceSlices(fooDriver, "pool1", "node", 2, 30, 2), ), - testPodsWithClaims(fooDriver, "pool1", "node", 6, 2)..., + testPodsWithClaims(fooDriver, "pool1", "node", 6, 2, nil)..., ), wantUtilization: map[string]map[string]float64{ fooDriver: { @@ -119,7 +120,7 @@ func TestDynamicResourceUtilization(t *testing.T) { testResourceSlices(fooDriver, "pool1", "node", 1, 20, 2), testResourceSlices(fooDriver, "pool1", "node", 2, 30, 2)[:14], ), - testPodsWithClaims(fooDriver, "pool1", "node", 6, 2)..., + testPodsWithClaims(fooDriver, "pool1", "node", 6, 2, nil)..., ), wantErr: cmpopts.AnyError, }, @@ -131,7 +132,7 @@ func TestDynamicResourceUtilization(t *testing.T) { testResourceSlices(fooDriver, "pool1", "node", 1, 20, 2)[:7], testResourceSlices(fooDriver, "pool1", "node", 2, 30, 2), ), - testPodsWithClaims(fooDriver, "pool1", "node", 6, 2)..., + testPodsWithClaims(fooDriver, "pool1", "node", 6, 2, nil)..., ), wantUtilization: map[string]map[string]float64{ fooDriver: { @@ -141,7 +142,41 @@ func TestDynamicResourceUtilization(t *testing.T) { wantHighestUtilization: 0.2, wantHighestUtilizationName: apiv1.ResourceName(fmt.Sprintf("%s/%s", fooDriver, "pool1")), }, + { + testName: "AdminAccess ResourceClaim don't count for utilization", + nodeInfo: framework.NewNodeInfo(node, + testResourceSlices(fooDriver, "pool1", "node", 0, 10, 1), + testPodsWithClaims(fooDriver, "pool1", "node", 6, 1, ptr.To(true))..., + ), + wantUtilization: map[string]map[string]float64{ + fooDriver: { + "pool1": 0, + }, + }, + wantHighestUtilization: 0, + wantHighestUtilizationName: apiv1.ResourceName(fmt.Sprintf("%s/%s", fooDriver, "pool1")), + }, + { + testName: "AdminAccess ResourceClaim don't count for utilization", + nodeInfo: framework.NewNodeInfo(node, + testResourceSlices(fooDriver, "pool1", "node", 0, 10, 1), + mergeLists( + testPodsWithClaims(fooDriver, "pool1", "node", 6, 1, ptr.To(true)), + testPodsWithClaims(fooDriver, "pool1", "node", 6, 1, ptr.To(false)), + )..., + ), + wantUtilization: map[string]map[string]float64{ + fooDriver: { + "pool1": 0.6, + }, + }, + wantHighestUtilization: 0.6, + wantHighestUtilizationName: apiv1.ResourceName(fmt.Sprintf("%s/%s", fooDriver, "pool1")), + }, } { + if tc.testName != "AdminAccess ResourceClaim don't count for utilization" { + continue + } t.Run(tc.testName, func(t *testing.T) { utilization, err := CalculateDynamicResourceUtilization(tc.nodeInfo) if diff := cmp.Diff(tc.wantErr, err, cmpopts.EquateErrors()); diff != "" { @@ -190,7 +225,7 @@ func testResourceSlices(driverName, poolName, nodeName string, poolGen, deviceCo return result } -func testPodsWithClaims(driverName, poolName, nodeName string, deviceCount, devicesPerPod int64) []*framework.PodInfo { +func testPodsWithClaims(driverName, poolName, nodeName string, deviceCount, devicesPerPod int64, adminaccess *bool) []*framework.PodInfo { podCount := deviceCount / devicesPerPod deviceIndex := 0 @@ -201,13 +236,25 @@ func testPodsWithClaims(driverName, poolName, nodeName string, deviceCount, devi for podDevIndex := range devicesPerPod { claimName := fmt.Sprintf("%s-claim-%d", pod.Name, podDevIndex) devName := fmt.Sprintf("%s-%s-dev-%d", driverName, poolName, deviceIndex) + devReqName := fmt.Sprintf("request-%d", podDevIndex) + devReq := resourceapi.DeviceRequest{ + Name: devReqName, + Exactly: &resourceapi.ExactDeviceRequest{ + AdminAccess: adminaccess, + }, + } claims = append(claims, &resourceapi.ResourceClaim{ ObjectMeta: metav1.ObjectMeta{Name: claimName, UID: types.UID(claimName)}, + Spec: resourceapi.ResourceClaimSpec{ + Devices: resourceapi.DeviceClaim{ + Requests: []resourceapi.DeviceRequest{devReq}, + }, + }, Status: resourceapi.ResourceClaimStatus{ Allocation: &resourceapi.AllocationResult{ Devices: resourceapi.DeviceAllocationResult{ Results: []resourceapi.DeviceRequestAllocationResult{ - {Request: fmt.Sprintf("request-%d", podDevIndex), Driver: driverName, Pool: poolName, Device: devName}, + {Request: devReqName, Driver: driverName, Pool: poolName, Device: devName, AdminAccess: adminaccess}, }, }, },