Skip to content

Commit ff763e9

Browse files
committed
TEP-0090: Failure Strategies - Remove Fail Fast
In #4951, we implemented `isFailure` for matrixed `TaskRuns` where we applied fail-fast failure strategy. We discussed failure strategies further in this PR - tektoncd/community#724 - and API WG on 13 June 2022. We agreed to leave early termination upon failure out of scope for the initial release of Matrix. We plan to explore failure strategies, including fail-fast, in future work. And these failure strategies may apply more broadly beyond Matrix. In this change, we update `isFailure` to evaluate to `true` only when there's a failure and there are no running `TaskRuns` in the `rprt`.
1 parent c13edc2 commit ff763e9

File tree

2 files changed

+21
-19
lines changed

2 files changed

+21
-19
lines changed

pkg/reconciler/pipelinerun/resources/pipelinerunresolution.go

Lines changed: 13 additions & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -108,7 +108,8 @@ func (t ResolvedPipelineRunTask) isSuccessful() bool {
108108
}
109109

110110
// isFailure returns true only if the run has failed and will not be retried.
111-
// If the PipelineTask has a Matrix, isFailure returns true if any run has failed and will not be retried.
111+
// If the PipelineTask has a Matrix, isFailure returns true if any run has failed (no remaining retries)
112+
// and all other runs are done.
112113
func (t ResolvedPipelineRunTask) isFailure() bool {
113114
if t.isCancelled() {
114115
return true
@@ -130,14 +131,15 @@ func (t ResolvedPipelineRunTask) isFailure() bool {
130131
if len(t.TaskRuns) == 0 {
131132
return false
132133
}
134+
var atLeastOneFailed bool
135+
var atLeastOneRunning bool
133136
for _, taskRun := range t.TaskRuns {
134137
c = taskRun.Status.GetCondition(apis.ConditionSucceeded)
135-
isDone = taskRun.IsDone()
136-
if isDone && c.IsFalse() && !t.hasRemainingRetries() {
137-
return true
138-
}
138+
taskRunFailed := taskRun.IsDone() && c.IsFalse() && !t.hasRemainingRetries()
139+
atLeastOneFailed = atLeastOneFailed || taskRunFailed
140+
atLeastOneRunning = atLeastOneRunning || !taskRun.IsDone()
139141
}
140-
return false
142+
return atLeastOneFailed && !atLeastOneRunning
141143
default:
142144
if t.TaskRun == nil {
143145
return false
@@ -192,14 +194,14 @@ func (t ResolvedPipelineRunTask) isCancelled() bool {
192194
if len(t.TaskRuns) == 0 {
193195
return false
194196
}
195-
// is cancelled when any TaskRun is cancelled to fail fast
197+
var atLeastOneCancelled bool
198+
var atLeastOneRunning bool
196199
for _, taskRun := range t.TaskRuns {
197200
c := taskRun.Status.GetCondition(apis.ConditionSucceeded)
198-
if c != nil && c.IsFalse() && c.Reason == v1beta1.TaskRunReasonCancelled.String() {
199-
return true
200-
}
201+
atLeastOneCancelled = atLeastOneCancelled || c != nil && c.IsFalse() && c.Reason == v1beta1.TaskRunReasonCancelled.String()
202+
atLeastOneRunning = atLeastOneRunning || !taskRun.IsDone()
201203
}
202-
return false
204+
return atLeastOneCancelled && !atLeastOneRunning
203205
default:
204206
if t.TaskRun == nil {
205207
return false

pkg/reconciler/pipelinerun/resources/pipelinerunresolution_test.go

Lines changed: 8 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -1120,12 +1120,12 @@ func TestIsFailure(t *testing.T) {
11201120
},
11211121
want: true,
11221122
}, {
1123-
name: "one matrixed taskruns failed",
1123+
name: "one matrixed taskrun failed, one matrixed taskrun running",
11241124
rprt: ResolvedPipelineRunTask{
11251125
PipelineTask: matrixedPipelineTask,
11261126
TaskRuns: []*v1beta1.TaskRun{makeFailed(trs[0]), makeStarted(trs[1])},
11271127
},
1128-
want: true,
1128+
want: false,
11291129
}, {
11301130
name: "matrixed taskruns failed: retries remaining",
11311131
rprt: ResolvedPipelineRunTask{
@@ -1155,12 +1155,12 @@ func TestIsFailure(t *testing.T) {
11551155
},
11561156
want: true,
11571157
}, {
1158-
name: "one matrixed taskrun cancelled",
1158+
name: "one matrixed taskrun cancelled, one matrixed taskrun running",
11591159
rprt: ResolvedPipelineRunTask{
11601160
PipelineTask: matrixedPipelineTask,
11611161
TaskRuns: []*v1beta1.TaskRun{withCancelled(makeFailed(trs[0])), makeStarted(trs[1])},
11621162
},
1163-
want: true,
1163+
want: false,
11641164
}, {
11651165
name: "matrixed taskruns cancelled but not failed",
11661166
rprt: ResolvedPipelineRunTask{
@@ -1183,12 +1183,12 @@ func TestIsFailure(t *testing.T) {
11831183
},
11841184
want: true,
11851185
}, {
1186-
name: "one matrixed taskrun cancelled: retries remaining",
1186+
name: "one matrixed taskrun cancelled: retries remaining, one matrixed taskrun running",
11871187
rprt: ResolvedPipelineRunTask{
11881188
PipelineTask: withPipelineTaskRetries(*matrixedPipelineTask, 1),
11891189
TaskRuns: []*v1beta1.TaskRun{withCancelled(makeFailed(trs[0])), makeStarted(trs[1])},
11901190
},
1191-
want: true,
1191+
want: false,
11921192
}, {
11931193
name: "matrixed taskruns cancelled: no retries remaining",
11941194
rprt: ResolvedPipelineRunTask{
@@ -1197,12 +1197,12 @@ func TestIsFailure(t *testing.T) {
11971197
},
11981198
want: true,
11991199
}, {
1200-
name: "one matrixed taskrun cancelled: no retries remaining",
1200+
name: "one matrixed taskrun cancelled: no retries remaining, one matrixed taskrun running",
12011201
rprt: ResolvedPipelineRunTask{
12021202
PipelineTask: withPipelineTaskRetries(*matrixedPipelineTask, 1),
12031203
TaskRuns: []*v1beta1.TaskRun{withCancelled(withRetries(makeFailed(trs[0]))), makeStarted(trs[1])},
12041204
},
1205-
want: true,
1205+
want: false,
12061206
}} {
12071207
t.Run(tc.name, func(t *testing.T) {
12081208
if got := tc.rprt.isFailure(); got != tc.want {

0 commit comments

Comments
 (0)