Skip to content

OOM (Out of memory) status #40

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 4 commits into from
Mar 22, 2019
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions pkg/api/resource/saved_status.go
Original file line number Diff line number Diff line change
Expand Up @@ -47,6 +47,7 @@ const (
ExitCodeDataSucceeded DataExitCode = "succeeded"
ExitCodeDataFailed DataExitCode = "failed"
ExitCodeDataKilled DataExitCode = "killed"
ExitCodeDataOOM DataExitCode = "oom"
)

func DataSavedStatusPtrsEqual(savedStatus *DataSavedStatus, savedStatus2 *DataSavedStatus) bool {
Expand Down
14 changes: 11 additions & 3 deletions pkg/api/resource/status.go
Original file line number Diff line number Diff line change
Expand Up @@ -109,6 +109,8 @@ const (
StatusAPIGroupParentFailed
StatusAPIGroupParentKilled
StatusAPIGroupUpdateSkipped

StatusDataKilledOOM
)

var statusCodes = []string{
Expand Down Expand Up @@ -136,9 +138,11 @@ var statusCodes = []string{
"status_api_group_parent_failed",
"status_api_group_parent_killed",
"status_api_group_update_skipped",

"status_data_oom",
}

var _ = [1]int{}[int(StatusAPIGroupUpdateSkipped)-(len(statusCodes)-1)] // Ensure list length matches
var _ = [1]int{}[int(StatusDataKilledOOM)-(len(statusCodes)-1)] // Ensure list length matches

var statusCodeMessages = []string{
"unknown", // StatusUnknown
Expand All @@ -165,9 +169,11 @@ var statusCodeMessages = []string{
"upstream error", // StatusAPIGroupParentFailed
"upstream termination", // StatusAPIGroupParentKilled
"update skipped", // StatusAPIGroupUpdateSkipped

"terminated (out of mem)", // StatusDataOOM
}

var _ = [1]int{}[int(StatusAPIGroupUpdateSkipped)-(len(statusCodeMessages)-1)] // Ensure list length matches
var _ = [1]int{}[int(StatusDataKilledOOM)-(len(statusCodeMessages)-1)] // Ensure list length matches

// StatusDataRunning aliases
const (
Expand Down Expand Up @@ -203,9 +209,11 @@ var statusSortBuckets = []int{
2, // StatusAPIGroupParentFailed
2, // StatusAPIGroupParentKilled
2, // StatusAPIGroupUpdateSkipped

1, // StatusDataKilledOOM
}

var _ = [1]int{}[int(StatusAPIGroupUpdateSkipped)-(len(statusSortBuckets)-1)] // Ensure list length matches
var _ = [1]int{}[int(StatusDataKilledOOM)-(len(statusSortBuckets)-1)] // Ensure list length matches

func (code StatusCode) String() string {
if int(code) < 0 || int(code) >= len(statusCodes) {
Expand Down
9 changes: 8 additions & 1 deletion pkg/operator/k8s/pod.go
Original file line number Diff line number Diff line change
Expand Up @@ -33,13 +33,14 @@ var podTypeMeta = metav1.TypeMeta{
}

const (
PodStatusUnknown = "Unknown"
PodStatusPending = "Pending"
PodStatusRunning = "Running"
PodStatusTerminating = "Terminating"
PodStatusSucceeded = "Succeeded"
PodStatusFailed = "Failed"
PodStatusKilled = "Killed"
PodStatusUnknown = "Unknown"
PodStatusKilledOOM = "Out of Memory"
)

var killStatuses = map[int32]bool{
Expand Down Expand Up @@ -108,11 +109,17 @@ func GetPodStatus(pod *corev1.Pod) string {
for _, containerStatus := range pod.Status.ContainerStatuses {
if containerStatus.LastTerminationState.Terminated != nil {
exitCode := containerStatus.LastTerminationState.Terminated.ExitCode
if exitCode == 137 {
return PodStatusKilledOOM
}
if killStatuses[exitCode] {
return PodStatusKilled
}
} else if containerStatus.State.Terminated != nil {
exitCode := containerStatus.State.Terminated.ExitCode
if exitCode == 137 {
return PodStatusKilledOOM
}
if killStatuses[exitCode] {
return PodStatusKilled
}
Expand Down
2 changes: 1 addition & 1 deletion pkg/operator/workloads/api_status.go
Original file line number Diff line number Diff line change
Expand Up @@ -194,7 +194,7 @@ func updateAPIStatusCodeByParents(apiStatus *resource.APIStatus, dataStatuses ma
parentSkipped := false
for dependency := range allDependencies {
switch dataStatuses[dependency].Code {
case resource.StatusDataKilled:
case resource.StatusDataKilled, resource.StatusDataKilledOOM:
apiStatus.Code = resource.StatusParentKilled
return
case resource.StatusDataFailed:
Expand Down
4 changes: 3 additions & 1 deletion pkg/operator/workloads/data_status.go
Original file line number Diff line number Diff line change
Expand Up @@ -77,6 +77,8 @@ func dataStatusCode(dataSavedStatus *resource.DataSavedStatus) resource.StatusCo
return resource.StatusDataFailed
case resource.ExitCodeDataKilled:
return resource.StatusDataKilled
case resource.ExitCodeDataOOM:
return resource.StatusDataKilledOOM
}

return resource.StatusUnknown
Expand All @@ -91,7 +93,7 @@ func updateDataStatusCodeByParents(dataStatus *resource.DataStatus, dataStatuses
parentSkipped := false
for dependency := range allDependencies {
switch dataStatuses[dependency].Code {
case resource.StatusDataKilled:
case resource.StatusDataKilled, resource.StatusDataKilledOOM:
dataStatus.Code = resource.StatusParentKilled
return
case resource.StatusDataFailed:
Expand Down
8 changes: 6 additions & 2 deletions pkg/operator/workloads/workload_spec.go
Original file line number Diff line number Diff line change
Expand Up @@ -150,9 +150,13 @@ func UpdateDataWorkflowErrors(failedPods []corev1.Pod) error {
savedStatus.Start = nowTime
}

savedStatus.ExitCode = resource.ExitCodeDataFailed
if k8s.GetPodStatus(&pod) == k8s.PodStatusKilled {
switch k8s.GetPodStatus(&pod) {
case k8s.PodStatusKilled:
savedStatus.ExitCode = resource.ExitCodeDataKilled
case k8s.PodStatusKilledOOM:
savedStatus.ExitCode = resource.ExitCodeDataOOM
default:
savedStatus.ExitCode = resource.ExitCodeDataFailed
}

savedStatusesToUpload = append(savedStatusesToUpload, savedStatus)
Expand Down