From 65086acb126cf4ea6116077c75abe2b5319bcc44 Mon Sep 17 00:00:00 2001 From: hedi bouattour Date: Fri, 3 Oct 2025 10:01:03 +0000 Subject: [PATCH] The current code allows to recreate pods even though the state is broken so we lose the state and we have hanging pods that would never get deleted This patch rewrites the state even if unmarshalling errors but still allows to read the state. --- calico-vpp-agent/cni/cni_server.go | 15 ++++++++++++--- 1 file changed, 12 insertions(+), 3 deletions(-) diff --git a/calico-vpp-agent/cni/cni_server.go b/calico-vpp-agent/cni/cni_server.go index 1f099f8c..285f7d3d 100644 --- a/calico-vpp-agent/cni/cni_server.go +++ b/calico-vpp-agent/cni/cni_server.go @@ -184,9 +184,9 @@ func (s *Server) rescanState() { } } - cniServerState, err := model.LoadCniServerState(config.CniServerStateFilename) - if err != nil { - s.log.Errorf("Error getting pods from file %s, removing cache", err) + cniServerState, errorLoading := model.LoadCniServerState(config.CniServerStateFilename) + if errorLoading != nil { + s.log.Errorf("Error getting pods from file %s, removing cache", errorLoading) err := os.Remove(config.CniServerStateFilename) if err != nil { s.log.Errorf("Could not remove %s, %s", config.CniServerStateFilename, err) @@ -206,6 +206,15 @@ func (s *Server) rescanState() { case nil: s.log.Infof("pod(re-add) podSpec=%s", podSpecCopy.String()) s.podInterfaceMap[podSpec.Key()] = podSpecCopy + if errorLoading != nil { + err = model.PersistCniServerState( + model.NewCniServerState(s.podInterfaceMap), + config.CniServerStateFilename, + ) + if err != nil { + s.log.Errorf("CNI state persist errored %v", err) + } + } default: s.log.Errorf("Interface add failed %s : %v", podSpecCopy.String(), err) }