diff --git a/.changeset/early-points-jam.md b/.changeset/early-points-jam.md new file mode 100644 index 0000000000..645a50002f --- /dev/null +++ b/.changeset/early-points-jam.md @@ -0,0 +1,5 @@ +--- +"trigger.dev": patch +--- + +Gracefully shutdown task run processes using SIGTERM followed by SIGKILL after a 1s timeout. This also prevents cancelled or completed runs from leaving orphaned Ttask run processes behind diff --git a/packages/cli-v3/src/executions/taskRunProcess.ts b/packages/cli-v3/src/executions/taskRunProcess.ts index 74aecededf..7b2adb6eb6 100644 --- a/packages/cli-v3/src/executions/taskRunProcess.ts +++ b/packages/cli-v3/src/executions/taskRunProcess.ts @@ -51,6 +51,7 @@ export type TaskRunProcessOptions = { machineResources: MachinePresetResources; isWarmStart?: boolean; cwd?: string; + gracefulTerminationTimeoutInMs?: number; }; export type TaskRunProcessExecuteParams = { @@ -114,7 +115,7 @@ export class TaskRunProcess { console.error("Error cancelling task run process", { err }); } - await this.kill(); + await this.#gracefullyTerminate(this.options.gracefulTerminationTimeoutInMs); } async cleanup(kill = true) { @@ -131,7 +132,7 @@ export class TaskRunProcess { } if (kill) { - await this.kill("SIGKILL"); + await this.#gracefullyTerminate(this.options.gracefulTerminationTimeoutInMs); } } @@ -395,6 +396,18 @@ export class TaskRunProcess { this._stderr.push(errorLine); } + async #gracefullyTerminate(timeoutInMs: number = 1_000) { + logger.debug("gracefully terminating task run process", { pid: this.pid, timeoutInMs }); + + await this.kill("SIGTERM", timeoutInMs); + + if (this._child?.connected) { + logger.debug("child process is still connected, sending SIGKILL", { pid: this.pid }); + + await this.kill("SIGKILL"); + } + } + /** This will never throw. */ async kill(signal?: number | NodeJS.Signals, timeoutInMs?: number) { logger.debug(`killing task run process`, { @@ -420,7 +433,11 @@ export class TaskRunProcess { const [error] = await tryCatch(killTimeout); if (error) { - logger.debug("kill: failed to wait for child process to exit", { error }); + logger.debug("kill: failed to wait for child process to exit", { + timeoutInMs, + signal, + pid: this.pid, + }); } } diff --git a/packages/cli-v3/src/indexing/indexWorkerManifest.ts b/packages/cli-v3/src/indexing/indexWorkerManifest.ts index ff8de685ce..e4ae72283f 100644 --- a/packages/cli-v3/src/indexing/indexWorkerManifest.ts +++ b/packages/cli-v3/src/indexing/indexWorkerManifest.ts @@ -61,7 +61,7 @@ export async function indexWorkerManifest({ } resolved = true; - child.kill(); + child.kill("SIGKILL"); reject(new Error("Worker timed out")); }, 20_000); @@ -79,21 +79,21 @@ export async function indexWorkerManifest({ } else { resolve(message.payload.manifest); } - child.kill(); + child.kill("SIGKILL"); break; } case "TASKS_FAILED_TO_PARSE": { clearTimeout(timeout); resolved = true; reject(new TaskMetadataParseError(message.payload.zodIssues, message.payload.tasks)); - child.kill(); + child.kill("SIGKILL"); break; } case "UNCAUGHT_EXCEPTION": { clearTimeout(timeout); resolved = true; reject(new UncaughtExceptionError(message.payload.error, message.payload.origin)); - child.kill(); + child.kill("SIGKILL"); break; } }