Skip to content

Commit f44af8e

Browse files
committed
Add retries to shutdown to handle 409 conflict
1 parent 11907d2 commit f44af8e

File tree

1 file changed

+29
-8
lines changed

1 file changed

+29
-8
lines changed

servo/connectors/kubernetes.py

Lines changed: 29 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -620,14 +620,35 @@ async def shutdown(self, error: Optional[Exception] = None) -> None:
620620
error: An optional error that triggered the destruction.
621621
"""
622622
self.logger.info(f"adjustment failed: shutting down deployment's pods...")
623-
self.workload = await self.workload_helper.read(
624-
self.workload_config.name, self.workload_config.namespace
625-
)
626-
self.workload.spec.replicas = 0
627-
self.workload = await asyncio.wait_for(
628-
self.workload_helper.patch(self.workload),
629-
timeout=self.timeout.total_seconds(),
630-
)
623+
624+
retries = 3
625+
while retries > 0:
626+
# patch the deployment
627+
try:
628+
self.workload = await self.workload_helper.read(
629+
self.workload_config.name, self.workload_config.namespace
630+
)
631+
self.workload.spec.replicas = 0
632+
self.workload = await asyncio.wait_for(
633+
self.workload_helper.patch(self.workload),
634+
timeout=self.timeout.total_seconds(),
635+
)
636+
except kubernetes_asyncio.client.ApiException as ae:
637+
retries -= 1
638+
if retries == 0:
639+
self.logger.error(
640+
"Failed to shutdown SaturationOptimization after 3 retries"
641+
)
642+
raise
643+
644+
if ae.status == 409 and ae.reason == "Conflict":
645+
# If we have a conflict, just load the existing object and try again
646+
pass
647+
else:
648+
raise
649+
else:
650+
# No need to retry if no exception raised
651+
break
631652

632653
def to_components(self) -> List[servo.Component]:
633654
settings = [self.cpu, self.memory, self.replicas]

0 commit comments

Comments
 (0)