Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
25 commits
Select commit Hold shift + click to select a range
d2d21f4
feat: Support DRA Admin Access
MenD32 Apr 27, 2025
e7b9ad4
tests: added utilization test for adminaccess resourceclaims
MenD32 Apr 27, 2025
76ca2c9
fix: ran gofmt
MenD32 Apr 27, 2025
05dcfc6
fix: false positives on SanitizedPodResourceClaims
MenD32 Apr 27, 2025
a389604
fix: non admin results get removed from resourceclaim when requests i…
MenD32 Apr 27, 2025
bece2b0
feat: Support DRA Admin Access
MenD32 Apr 29, 2025
31237c6
refactor: moved device filtering to groupAllocatedDevices
MenD32 Jun 7, 2025
e9265d9
Merge branch 'master' into feat/dra-admin-access
MenD32 Jun 7, 2025
371713f
feat: Support DRA Admin Access
MenD32 Apr 27, 2025
3532049
tests: added utilization test for adminaccess resourceclaims
MenD32 Apr 27, 2025
ce2b874
fix: ran gofmt
MenD32 Apr 27, 2025
a99a709
fix: false positives on SanitizedPodResourceClaims
MenD32 Apr 27, 2025
a363619
fix: non admin results get removed from resourceclaim when requests i…
MenD32 Apr 27, 2025
6dd2644
feat: Support DRA Admin Access
MenD32 Apr 29, 2025
2cb70c1
refactor: moved device filtering to groupAllocatedDevices
MenD32 Jun 7, 2025
43462fa
tests: added test of combined admin access and non-admin access utili…
MenD32 Jun 7, 2025
13f7199
tests: added test for admin access request sanitization
MenD32 Jun 7, 2025
38083af
tests: added test for admin access request sanitization
MenD32 Jun 7, 2025
d846fd2
Merge branch 'master' into feat/dra-admin-access
MenD32 Jun 11, 2025
63c63ee
fix: removed ClaimWithoutAdminAccessRequests sanitization
MenD32 Jul 5, 2025
eee7143
Merge branch 'master' into feat/dra-admin-access
MenD32 Oct 3, 2025
57805ff
chore: migrated to v1
MenD32 Oct 3, 2025
fb24953
fix: changed methodology to using ptr.Deref
MenD32 Oct 3, 2025
3a4e76a
fix: changed methodology to using ptr.Deref
MenD32 Oct 3, 2025
141158d
removed allocation status check from getDeviceResultRequest
MenD32 Oct 6, 2025
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Original file line number Diff line number Diff line change
Expand Up @@ -24,6 +24,7 @@ import (
metav1 "k8s.io/apimachinery/pkg/apis/meta/v1"
"k8s.io/component-helpers/scheduling/corev1"
"k8s.io/dynamic-resource-allocation/resourceclaim"
"k8s.io/utils/ptr"
)

// ClaimAllocated returns whether the provided claim is allocated.
Expand Down Expand Up @@ -114,3 +115,32 @@ func PodClaimOwnerReference(pod *apiv1.Pod) metav1.OwnerReference {
func claimConsumerReferenceMatchesPod(pod *apiv1.Pod, ref resourceapi.ResourceClaimConsumerReference) bool {
return ref.APIGroup == "" && ref.Resource == "pods" && ref.Name == pod.Name && ref.UID == pod.UID
}

// ClaimWithoutAdminAccessRequests returns a copy of the claim without admin access requests, and their results.
func ClaimWithoutAdminAccessRequests(claim *resourceapi.ResourceClaim) *resourceapi.ResourceClaim {
claimCopy := claim.DeepCopy()
if claimCopy.Status.Allocation == nil {
return claimCopy
}
deviceRequestAllocationResults := make([]resourceapi.DeviceRequestAllocationResult, 0)
for _, deviceRequestAllocationResult := range claimCopy.Status.Allocation.Devices.Results {
// Device requests with AdminAccess don't reserve their allocated resources, and are ignored when scheuling.
devReq := getDeviceResultRequest(claim, &deviceRequestAllocationResult)
if ptr.Deref(devReq.Exactly.AdminAccess, false) {
continue
}
deviceRequestAllocationResults = append(deviceRequestAllocationResults, deviceRequestAllocationResult)
}
claimCopy.Status.Allocation.Devices.Results = deviceRequestAllocationResults
return claimCopy
}

// getDeviceResultRequest returns the DeviceRequest for the provided DeviceRequestAllocationResult in the provided ResourceClaim. If no result is found, nil is returned.
func getDeviceResultRequest(claim *resourceapi.ResourceClaim, deviceRequestAllocationResult *resourceapi.DeviceRequestAllocationResult) *resourceapi.DeviceRequest {
for _, deviceRequest := range claim.Spec.Devices.Requests {
if deviceRequest.Name == deviceRequestAllocationResult.Request {
return &deviceRequest
}
}
return nil
}
Original file line number Diff line number Diff line change
Expand Up @@ -24,6 +24,7 @@ import (
metav1 "k8s.io/apimachinery/pkg/apis/meta/v1"
"k8s.io/apimachinery/pkg/util/uuid"
"k8s.io/dynamic-resource-allocation/resourceclaim"
"k8s.io/utils/ptr"
"k8s.io/utils/set"
)

Expand Down Expand Up @@ -89,6 +90,11 @@ func SanitizedPodResourceClaims(newOwner, oldOwner *v1.Pod, claims []*resourceap

var sanitizedAllocations []resourceapi.DeviceRequestAllocationResult
for _, devAlloc := range claim.Status.Allocation.Devices.Results {
if ptr.Deref(devAlloc.AdminAccess, false) {
// Device requests with AdminAccess don't reserve their allocated resources, so we can safely ignore them when sanitizing.
sanitizedAllocations = append(sanitizedAllocations, devAlloc)
continue
}
// It's possible to have both node-local and global allocations in a single resource claim. Make sure that all allocations were node-local on the old node.
if !oldNodePoolNames.Has(devAlloc.Pool) {
return nil, fmt.Errorf("claim %s/%s has an allocation %s, from a pool that isn't node-local on %s - can't be sanitized", claim.Namespace, claim.Name, devAlloc.Request, oldNodeName)
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -26,6 +26,7 @@ import (
resourceapi "k8s.io/api/resource/v1"
metav1 "k8s.io/apimachinery/pkg/apis/meta/v1"
"k8s.io/autoscaler/cluster-autoscaler/utils/test"
"k8s.io/utils/ptr"
"k8s.io/utils/set"
)

Expand Down Expand Up @@ -322,6 +323,34 @@ func TestSanitizedPodResourceClaims(t *testing.T) {
},
},
}
singleNodeAllocationWithAdminAccessRequest := &resourceapi.AllocationResult{
NodeSelector: &apiv1.NodeSelector{NodeSelectorTerms: []apiv1.NodeSelectorTerm{{
MatchFields: []apiv1.NodeSelectorRequirement{
{Key: "metadata.name", Operator: apiv1.NodeSelectorOpIn, Values: []string{"testNode"}},
}},
}},
Devices: resourceapi.DeviceAllocationResult{
Results: []resourceapi.DeviceRequestAllocationResult{
{Request: "request1", Driver: "driver.foo.com", Pool: "testNodePool1", Device: "device1"},
{Request: "request2", Driver: "driver.foo.com", Pool: "testNodePool1", Device: "device2", AdminAccess: ptr.To(false)},
{Request: "request3", Driver: "driver.foo.com", Pool: "testNodePool2", Device: "device1", AdminAccess: ptr.To(true)},
},
},
}
singleNodeAllocationWithAdminAccessRequestSanitized := &resourceapi.AllocationResult{
NodeSelector: &apiv1.NodeSelector{NodeSelectorTerms: []apiv1.NodeSelectorTerm{{
MatchFields: []apiv1.NodeSelectorRequirement{
{Key: "metadata.name", Operator: apiv1.NodeSelectorOpIn, Values: []string{"newNode"}},
}},
}},
Devices: resourceapi.DeviceAllocationResult{
Results: []resourceapi.DeviceRequestAllocationResult{
{Request: "request1", Driver: "driver.foo.com", Pool: "testNodePool1-abc", Device: "device1"},
{Request: "request2", Driver: "driver.foo.com", Pool: "testNodePool1-abc", Device: "device2", AdminAccess: ptr.To(false)},
{Request: "request3", Driver: "driver.foo.com", Pool: "testNodePool2", Device: "device1", AdminAccess: ptr.To(true)},
},
},
}

for _, tc := range []struct {
testName string
Expand Down Expand Up @@ -416,6 +445,20 @@ func TestSanitizedPodResourceClaims(t *testing.T) {
TestClaimWithPodReservations(TestClaimWithAllocation(newOwnerClaim2, singleNodeAllocationSanitized), newOwner),
},
},
{
testName: "allocated and reserved pod-owned claims are sanitized",
claims: []*resourceapi.ResourceClaim{
TestClaimWithPodReservations(TestClaimWithAllocation(oldOwnerClaim1, singleNodeAllocationWithAdminAccessRequest), oldOwner),
TestClaimWithPodReservations(TestClaimWithAllocation(oldOwnerClaim2, singleNodeAllocationWithAdminAccessRequest), oldOwner),
},
oldNodeName: "testNode",
newNodeName: "newNode",
oldNodePoolNames: set.New("testNodePool1", "testNodePool2"),
wantClaims: []*resourceapi.ResourceClaim{
TestClaimWithPodReservations(TestClaimWithAllocation(newOwnerClaim1, singleNodeAllocationWithAdminAccessRequestSanitized), newOwner),
TestClaimWithPodReservations(TestClaimWithAllocation(newOwnerClaim2, singleNodeAllocationWithAdminAccessRequestSanitized), newOwner),
},
},
} {
t.Run(tc.testName, func(t *testing.T) {
claims, err := SanitizedPodResourceClaims(newOwner, oldOwner, tc.claims, nameSuffix, tc.newNodeName, tc.oldNodeName, tc.oldNodePoolNames)
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -22,6 +22,7 @@ import (
v1 "k8s.io/api/core/v1"
resourceapi "k8s.io/api/resource/v1"
"k8s.io/autoscaler/cluster-autoscaler/simulator/framework"
"k8s.io/utils/ptr"
)

// CalculateDynamicResourceUtilization calculates a map of ResourceSlice pool utilization grouped by the driver and pool. Returns
Expand Down Expand Up @@ -110,6 +111,11 @@ func groupAllocatedDevices(claims []*resourceapi.ResourceClaim) (map[string]map[
}

for _, deviceAlloc := range alloc.Devices.Results {
if ptr.Deref(deviceAlloc.AdminAccess, false) {
// devices with admin access don't count for utilization
continue
}

if result[deviceAlloc.Driver] == nil {
result[deviceAlloc.Driver] = map[string][]string{}
}
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -29,6 +29,7 @@ import (
"k8s.io/apimachinery/pkg/types"
"k8s.io/autoscaler/cluster-autoscaler/simulator/framework"
"k8s.io/autoscaler/cluster-autoscaler/utils/test"
"k8s.io/utils/ptr"
)

func TestDynamicResourceUtilization(t *testing.T) {
Expand Down Expand Up @@ -57,7 +58,7 @@ func TestDynamicResourceUtilization(t *testing.T) {
testName: "single slice, single pool, 6/10 devices used",
nodeInfo: framework.NewNodeInfo(node,
testResourceSlices(fooDriver, "pool1", "node", 0, 10, 1),
testPodsWithClaims(fooDriver, "pool1", "node", 6, 1)...,
testPodsWithClaims(fooDriver, "pool1", "node", 6, 1, nil)...,
),
wantUtilization: map[string]map[string]float64{
fooDriver: {
Expand All @@ -76,9 +77,9 @@ func TestDynamicResourceUtilization(t *testing.T) {
testResourceSlices(barDriver, "pool1", "node", 0, 8, 2),
),
mergeLists(
testPodsWithClaims(fooDriver, "pool1", "node", 6, 2),
testPodsWithClaims(fooDriver, "pool2", "node", 18, 3),
testPodsWithClaims(barDriver, "pool1", "node", 2, 1),
testPodsWithClaims(fooDriver, "pool1", "node", 6, 2, nil),
testPodsWithClaims(fooDriver, "pool2", "node", 18, 3, nil),
testPodsWithClaims(barDriver, "pool1", "node", 2, 1, nil),
)...,
),
wantUtilization: map[string]map[string]float64{
Expand All @@ -101,7 +102,7 @@ func TestDynamicResourceUtilization(t *testing.T) {
testResourceSlices(fooDriver, "pool1", "node", 1, 20, 2),
testResourceSlices(fooDriver, "pool1", "node", 2, 30, 2),
),
testPodsWithClaims(fooDriver, "pool1", "node", 6, 2)...,
testPodsWithClaims(fooDriver, "pool1", "node", 6, 2, nil)...,
),
wantUtilization: map[string]map[string]float64{
fooDriver: {
Expand All @@ -119,7 +120,7 @@ func TestDynamicResourceUtilization(t *testing.T) {
testResourceSlices(fooDriver, "pool1", "node", 1, 20, 2),
testResourceSlices(fooDriver, "pool1", "node", 2, 30, 2)[:14],
),
testPodsWithClaims(fooDriver, "pool1", "node", 6, 2)...,
testPodsWithClaims(fooDriver, "pool1", "node", 6, 2, nil)...,
),
wantErr: cmpopts.AnyError,
},
Expand All @@ -131,7 +132,7 @@ func TestDynamicResourceUtilization(t *testing.T) {
testResourceSlices(fooDriver, "pool1", "node", 1, 20, 2)[:7],
testResourceSlices(fooDriver, "pool1", "node", 2, 30, 2),
),
testPodsWithClaims(fooDriver, "pool1", "node", 6, 2)...,
testPodsWithClaims(fooDriver, "pool1", "node", 6, 2, nil)...,
),
wantUtilization: map[string]map[string]float64{
fooDriver: {
Expand All @@ -141,7 +142,41 @@ func TestDynamicResourceUtilization(t *testing.T) {
wantHighestUtilization: 0.2,
wantHighestUtilizationName: apiv1.ResourceName(fmt.Sprintf("%s/%s", fooDriver, "pool1")),
},
{
Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Could you add a test case with both kinds of claims together?

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

yep

testName: "AdminAccess ResourceClaim don't count for utilization",
nodeInfo: framework.NewNodeInfo(node,
testResourceSlices(fooDriver, "pool1", "node", 0, 10, 1),
testPodsWithClaims(fooDriver, "pool1", "node", 6, 1, ptr.To(true))...,
),
wantUtilization: map[string]map[string]float64{
fooDriver: {
"pool1": 0,
},
},
wantHighestUtilization: 0,
wantHighestUtilizationName: apiv1.ResourceName(fmt.Sprintf("%s/%s", fooDriver, "pool1")),
},
{
testName: "AdminAccess ResourceClaim don't count for utilization",
nodeInfo: framework.NewNodeInfo(node,
testResourceSlices(fooDriver, "pool1", "node", 0, 10, 1),
mergeLists(
testPodsWithClaims(fooDriver, "pool1", "node", 6, 1, ptr.To(true)),
testPodsWithClaims(fooDriver, "pool1", "node", 6, 1, ptr.To(false)),
)...,
),
wantUtilization: map[string]map[string]float64{
fooDriver: {
"pool1": 0.6,
},
},
wantHighestUtilization: 0.6,
wantHighestUtilizationName: apiv1.ResourceName(fmt.Sprintf("%s/%s", fooDriver, "pool1")),
},
} {
if tc.testName != "AdminAccess ResourceClaim don't count for utilization" {
continue
}
t.Run(tc.testName, func(t *testing.T) {
utilization, err := CalculateDynamicResourceUtilization(tc.nodeInfo)
if diff := cmp.Diff(tc.wantErr, err, cmpopts.EquateErrors()); diff != "" {
Expand Down Expand Up @@ -190,7 +225,7 @@ func testResourceSlices(driverName, poolName, nodeName string, poolGen, deviceCo
return result
}

func testPodsWithClaims(driverName, poolName, nodeName string, deviceCount, devicesPerPod int64) []*framework.PodInfo {
func testPodsWithClaims(driverName, poolName, nodeName string, deviceCount, devicesPerPod int64, adminaccess *bool) []*framework.PodInfo {
podCount := deviceCount / devicesPerPod

deviceIndex := 0
Expand All @@ -201,13 +236,25 @@ func testPodsWithClaims(driverName, poolName, nodeName string, deviceCount, devi
for podDevIndex := range devicesPerPod {
claimName := fmt.Sprintf("%s-claim-%d", pod.Name, podDevIndex)
devName := fmt.Sprintf("%s-%s-dev-%d", driverName, poolName, deviceIndex)
devReqName := fmt.Sprintf("request-%d", podDevIndex)
devReq := resourceapi.DeviceRequest{
Name: devReqName,
Exactly: &resourceapi.ExactDeviceRequest{
AdminAccess: adminaccess,
},
}
claims = append(claims, &resourceapi.ResourceClaim{
ObjectMeta: metav1.ObjectMeta{Name: claimName, UID: types.UID(claimName)},
Spec: resourceapi.ResourceClaimSpec{
Devices: resourceapi.DeviceClaim{
Requests: []resourceapi.DeviceRequest{devReq},
},
},
Status: resourceapi.ResourceClaimStatus{
Allocation: &resourceapi.AllocationResult{
Devices: resourceapi.DeviceAllocationResult{
Results: []resourceapi.DeviceRequestAllocationResult{
{Request: fmt.Sprintf("request-%d", podDevIndex), Driver: driverName, Pool: poolName, Device: devName},
{Request: devReqName, Driver: driverName, Pool: poolName, Device: devName, AdminAccess: adminaccess},
},
},
},
Expand Down
Loading