diff --git a/.changeset/little-birds-appear.md b/.changeset/little-birds-appear.md new file mode 100644 index 0000000000..870f253b85 --- /dev/null +++ b/.changeset/little-birds-appear.md @@ -0,0 +1,45 @@ +--- +"@trigger.dev/sdk": patch +--- + +Removes the `releaseConcurrencyOnWaitpoint` option on queues and the `releaseConcurrency` option on various wait functions. Replaced with the following default behavior: + +- Concurrency is never released when a run is first blocked via a waitpoint, at either the env or queue level. +- Concurrency is always released when a run is checkpointed and shutdown, at both the env and queue level. + +Additionally, environment concurrency limits now have a new "Burst Factor", defaulting to 2.0x. The "Burst Factor" allows the environment-wide concurrency limit to be higher than any individual queue's concurrency limit. For example, if you have an environment concurrency limit of 100, and a Burst Factor of 2.0x, then you can execute up to 200 runs concurrently, but any one task/queue can still only execute 100 runs concurrently. + +We've done some work cleaning up the run statuses. The new statuses are: + +- `PENDING_VERSION`: Task is waiting for a version update because it cannot execute without additional information (task, queue, etc.) +- `QUEUED`: Task is waiting to be executed by a worker +- `DEQUEUED`: Task has been dequeued and is being sent to a worker to start executing. +- `EXECUTING`: Task is currently being executed by a worker +- `WAITING`: Task has been paused by the system, and will be resumed by the system +- `COMPLETED`: Task has been completed successfully +- `CANCELED`: Task has been canceled by the user +- `FAILED`: Task has failed to complete, due to an error in the system +- `CRASHED`: Task has crashed and won't be retried, most likely the worker ran out of resources, e.g. memory or storage +- `SYSTEM_FAILURE`: Task has failed to complete, due to an error in the system +- `DELAYED`: Task has been scheduled to run at a specific time +- `EXPIRED`: Task has expired and won't be executed +- `TIMED_OUT`: Task has reached it's maxDuration and has been stopped + +We've removed the following statuses: + +- `WAITING_FOR_DEPLOY`: This is no longer used, and is replaced by `PENDING_VERSION` +- `FROZEN`: This is no longer used, and is replaced by `WAITING` +- `INTERRUPTED`: This is no longer used +- `REATTEMPTING`: This is no longer used, and is replaced by `EXECUTING` + +We've also added "boolean" helpers to runs returned via the API and from Realtime: + +- `isQueued`: Returns true when the status is `QUEUED`, `PENDING_VERSION`, or `DELAYED` +- `isExecuting`: Returns true when the status is `EXECUTING`, `DEQUEUED`. These count against your concurrency limits. +- `isWaiting`: Returns true when the status is `WAITING`. These do not count against your concurrency limits. +- `isCompleted`: Returns true when the status is any of the completed statuses. +- `isCanceled`: Returns true when the status is `CANCELED` +- `isFailed`: Returns true when the status is any of the failed statuses. +- `isSuccess`: Returns true when the status is `COMPLETED` + +This change adds the ability to easily detect which runs are being counted against your concurrency limit by filtering for both `EXECUTING` or `DEQUEUED`. diff --git a/apps/webapp/app/api/versions.ts b/apps/webapp/app/api/versions.ts new file mode 100644 index 0000000000..250d214b07 --- /dev/null +++ b/apps/webapp/app/api/versions.ts @@ -0,0 +1,57 @@ +import { + API_VERSION_HEADER_NAME, + API_VERSION as CORE_API_VERSION, +} from "@trigger.dev/core/v3/serverOnly"; +import { z } from "zod"; + +export const CURRENT_API_VERSION = CORE_API_VERSION; + +export const NON_SPECIFIC_API_VERSION = "none"; + +export type API_VERSIONS = typeof CURRENT_API_VERSION | typeof NON_SPECIFIC_API_VERSION; + +export function getApiVersion(request: Request): API_VERSIONS { + const apiVersion = request.headers.get(API_VERSION_HEADER_NAME); + + if (apiVersion === CURRENT_API_VERSION) { + return apiVersion; + } + + return NON_SPECIFIC_API_VERSION; +} + +// This has been copied from the core package to allow us to use these types in the webapp +export const RunStatusUnspecifiedApiVersion = z.enum([ + /// Task is waiting for a version update because it cannot execute without additional information (task, queue, etc.). Replaces WAITING_FOR_DEPLOY + "PENDING_VERSION", + /// Task hasn't been deployed yet but is waiting to be executed + "WAITING_FOR_DEPLOY", + /// Task is waiting to be executed by a worker + "QUEUED", + /// Task is currently being executed by a worker + "EXECUTING", + /// Task has failed and is waiting to be retried + "REATTEMPTING", + /// Task has been paused by the system, and will be resumed by the system + "FROZEN", + /// Task has been completed successfully + "COMPLETED", + /// Task has been canceled by the user + "CANCELED", + /// Task has been completed with errors + "FAILED", + /// Task has crashed and won't be retried, most likely the worker ran out of resources, e.g. memory or storage + "CRASHED", + /// Task was interrupted during execution, mostly this happens in development environments + "INTERRUPTED", + /// Task has failed to complete, due to an error in the system + "SYSTEM_FAILURE", + /// Task has been scheduled to run at a specific time + "DELAYED", + /// Task has expired and won't be executed + "EXPIRED", + /// Task has reached it's maxDuration and has been stopped + "TIMED_OUT", +]); + +export type RunStatusUnspecifiedApiVersion = z.infer; diff --git a/apps/webapp/app/components/runs/v3/RunFilters.tsx b/apps/webapp/app/components/runs/v3/RunFilters.tsx index 9db584cbeb..403690aa11 100644 --- a/apps/webapp/app/components/runs/v3/RunFilters.tsx +++ b/apps/webapp/app/components/runs/v3/RunFilters.tsx @@ -10,10 +10,11 @@ import { import { Form, useFetcher } from "@remix-run/react"; import { IconToggleLeft } from "@tabler/icons-react"; import type { BulkActionType, TaskRunStatus, TaskTriggerSource } from "@trigger.dev/database"; -import { ListChecks, ListFilterIcon } from "lucide-react"; +import { ListFilterIcon } from "lucide-react"; import { matchSorter } from "match-sorter"; import { type ReactNode, useCallback, useEffect, useMemo, useState } from "react"; import { z } from "zod"; +import { ListCheckedIcon } from "~/assets/icons/ListCheckedIcon"; import { StatusIcon } from "~/assets/icons/StatusIcon"; import { TaskIcon } from "~/assets/icons/TaskIcon"; import { AppliedFilter } from "~/components/primitives/AppliedFilter"; @@ -55,8 +56,6 @@ import { TaskRunStatusCombo, } from "./TaskRunStatus"; import { TaskTriggerSourceIcon } from "./TaskTriggerSource"; -import { ListCheckedIcon } from "~/assets/icons/ListCheckedIcon"; -import { cn } from "~/utils/cn"; export const RunStatus = z.enum(allTaskRunStatuses); diff --git a/apps/webapp/app/components/runs/v3/TaskRunStatus.tsx b/apps/webapp/app/components/runs/v3/TaskRunStatus.tsx index fd2143ecb8..20eb3901f8 100644 --- a/apps/webapp/app/components/runs/v3/TaskRunStatus.tsx +++ b/apps/webapp/app/components/runs/v3/TaskRunStatus.tsx @@ -24,12 +24,13 @@ export const allTaskRunStatuses = [ "WAITING_FOR_DEPLOY", "PENDING_VERSION", "PENDING", + "DEQUEUED", "EXECUTING", "RETRYING_AFTER_FAILURE", "WAITING_TO_RESUME", "COMPLETED_SUCCESSFULLY", - "CANCELED", "COMPLETED_WITH_ERRORS", + "CANCELED", "TIMED_OUT", "CRASHED", "PAUSED", @@ -42,16 +43,15 @@ export const filterableTaskRunStatuses = [ "PENDING_VERSION", "DELAYED", "PENDING", - "WAITING_TO_RESUME", + "DEQUEUED", "EXECUTING", - "RETRYING_AFTER_FAILURE", + "WAITING_TO_RESUME", "COMPLETED_SUCCESSFULLY", - "CANCELED", "COMPLETED_WITH_ERRORS", "TIMED_OUT", "CRASHED", - "INTERRUPTED", "SYSTEM_FAILURE", + "CANCELED", "EXPIRED", ] as const satisfies Readonly>; @@ -60,6 +60,7 @@ const taskRunStatusDescriptions: Record = { PENDING: "Task is waiting to be executed.", PENDING_VERSION: "Run cannot execute until a version includes the task and queue.", WAITING_FOR_DEPLOY: "Run cannot execute until a version includes the task and queue.", + DEQUEUED: "Task has been dequeued from the queue but is not yet executing.", EXECUTING: "Task is currently being executed.", RETRYING_AFTER_FAILURE: "Task is being reattempted after a failure.", WAITING_TO_RESUME: `You have used a "wait" function. When the wait is complete, the task will resume execution.`, @@ -82,6 +83,7 @@ export const QUEUED_STATUSES = [ ] satisfies TaskRunStatus[]; export const RUNNING_STATUSES = [ + "DEQUEUED", "EXECUTING", "RETRYING_AFTER_FAILURE", "WAITING_TO_RESUME", @@ -164,6 +166,8 @@ export function TaskRunStatusIcon({ case "PENDING_VERSION": case "WAITING_FOR_DEPLOY": return ; + case "DEQUEUED": + return ; case "EXECUTING": return ; case "WAITING_TO_RESUME": @@ -205,6 +209,7 @@ export function runStatusClassNameColor(status: TaskRunStatus): string { return "text-amber-500"; case "EXECUTING": case "RETRYING_AFTER_FAILURE": + case "DEQUEUED": return "text-pending"; case "WAITING_TO_RESUME": return "text-charcoal-500"; @@ -240,6 +245,8 @@ export function runStatusTitle(status: TaskRunStatus): string { case "PENDING_VERSION": case "WAITING_FOR_DEPLOY": return "Pending version"; + case "DEQUEUED": + return "Dequeued"; case "EXECUTING": return "Executing"; case "WAITING_TO_RESUME": diff --git a/apps/webapp/app/database-types.ts b/apps/webapp/app/database-types.ts index b6d47cd9ac..3305dc67d5 100644 --- a/apps/webapp/app/database-types.ts +++ b/apps/webapp/app/database-types.ts @@ -30,6 +30,7 @@ export const TaskRunStatus = { PENDING: "PENDING", PENDING_VERSION: "PENDING_VERSION", WAITING_FOR_DEPLOY: "WAITING_FOR_DEPLOY", + DEQUEUED: "DEQUEUED", EXECUTING: "EXECUTING", WAITING_TO_RESUME: "WAITING_TO_RESUME", RETRYING_AFTER_FAILURE: "RETRYING_AFTER_FAILURE", diff --git a/apps/webapp/app/env.server.ts b/apps/webapp/app/env.server.ts index 13c9a56ba2..104b280c66 100644 --- a/apps/webapp/app/env.server.ts +++ b/apps/webapp/app/env.server.ts @@ -200,6 +200,7 @@ const EnvironmentSchema = z.object({ PUBSUB_REDIS_CLUSTER_MODE_ENABLED: z.string().default("0"), DEFAULT_ENV_EXECUTION_CONCURRENCY_LIMIT: z.coerce.number().int().default(100), + DEFAULT_ENV_EXECUTION_CONCURRENCY_BURST_FACTOR: z.coerce.number().default(1.0), DEFAULT_ORG_EXECUTION_CONCURRENCY_LIMIT: z.coerce.number().int().default(300), DEFAULT_DEV_ENV_EXECUTION_ATTEMPTS: z.coerce.number().int().positive().default(1), diff --git a/apps/webapp/app/models/taskRun.server.ts b/apps/webapp/app/models/taskRun.server.ts index cfd13a424b..1035c1b7ad 100644 --- a/apps/webapp/app/models/taskRun.server.ts +++ b/apps/webapp/app/models/taskRun.server.ts @@ -129,6 +129,7 @@ export function batchTaskRunItemStatusForRunStatus( case TaskRunStatus.WAITING_FOR_DEPLOY: case TaskRunStatus.WAITING_TO_RESUME: case TaskRunStatus.RETRYING_AFTER_FAILURE: + case TaskRunStatus.DEQUEUED: case TaskRunStatus.EXECUTING: case TaskRunStatus.PAUSED: case TaskRunStatus.DELAYED: diff --git a/apps/webapp/app/presenters/v3/ApiRetrieveRunPresenter.server.ts b/apps/webapp/app/presenters/v3/ApiRetrieveRunPresenter.server.ts index 52c93bc6f0..06f1a4c538 100644 --- a/apps/webapp/app/presenters/v3/ApiRetrieveRunPresenter.server.ts +++ b/apps/webapp/app/presenters/v3/ApiRetrieveRunPresenter.server.ts @@ -1,6 +1,5 @@ import { AttemptStatus, - RetrieveRunResponse, RunStatus, SerializedError, TaskRunError, @@ -12,10 +11,12 @@ import { } from "@trigger.dev/core/v3"; import { Prisma, TaskRunAttemptStatus, TaskRunStatus } from "@trigger.dev/database"; import assertNever from "assert-never"; +import { API_VERSIONS, CURRENT_API_VERSION, RunStatusUnspecifiedApiVersion } from "~/api/versions"; +import { $replica, prisma } from "~/db.server"; import { AuthenticatedEnvironment } from "~/services/apiAuth.server"; import { generatePresignedUrl } from "~/v3/r2.server"; -import { BasePresenter } from "./basePresenter.server"; -import { $replica, prisma } from "~/db.server"; +import { tracer } from "~/v3/tracer.server"; +import { startSpanWithEnv } from "~/v3/tracing.server"; // Build 'select' object const commonRunSelect = { @@ -63,7 +64,9 @@ type CommonRelatedRun = Prisma.Result< type FoundRun = NonNullable>>; -export class ApiRetrieveRunPresenter extends BasePresenter { +export class ApiRetrieveRunPresenter { + constructor(private readonly apiVersion: API_VERSIONS) {} + public static async findRun(friendlyId: string, env: AuthenticatedEnvironment) { return $replica.taskRun.findFirst({ where: { @@ -98,11 +101,8 @@ export class ApiRetrieveRunPresenter extends BasePresenter { }); } - public async call( - taskRun: FoundRun, - env: AuthenticatedEnvironment - ): Promise { - return this.traceWithEnv("call", env, async (span) => { + public async call(taskRun: FoundRun, env: AuthenticatedEnvironment) { + return startSpanWithEnv(tracer, "ApiRetrieveRunPresenter.call", env, async () => { let $payload: any; let $payloadPresignedUrl: string | undefined; let $output: any; @@ -167,7 +167,7 @@ export class ApiRetrieveRunPresenter extends BasePresenter { } return { - ...(await createCommonRunStructure(taskRun)), + ...(await createCommonRunStructure(taskRun, this.apiVersion)), payload: $payload, payloadPresignedUrl: $payloadPresignedUrl, output: $output, @@ -180,13 +180,13 @@ export class ApiRetrieveRunPresenter extends BasePresenter { attempts: [], relatedRuns: { root: taskRun.rootTaskRun - ? await createCommonRunStructure(taskRun.rootTaskRun) + ? await createCommonRunStructure(taskRun.rootTaskRun, this.apiVersion) : undefined, parent: taskRun.parentTaskRun - ? await createCommonRunStructure(taskRun.parentTaskRun) + ? await createCommonRunStructure(taskRun.parentTaskRun, this.apiVersion) : undefined, children: await Promise.all( - taskRun.childRuns.map(async (r) => await createCommonRunStructure(r)) + taskRun.childRuns.map(async (r) => await createCommonRunStructure(r, this.apiVersion)) ), }, }; @@ -205,7 +205,7 @@ export class ApiRetrieveRunPresenter extends BasePresenter { } } - static isStatusFinished(status: RunStatus) { + static isStatusFinished(status: RunStatus | RunStatusUnspecifiedApiVersion) { return ( status === "COMPLETED" || status === "FAILED" || @@ -216,7 +216,21 @@ export class ApiRetrieveRunPresenter extends BasePresenter { ); } - static apiStatusFromRunStatus(status: TaskRunStatus): RunStatus { + static apiStatusFromRunStatus( + status: TaskRunStatus, + apiVersion: API_VERSIONS + ): RunStatus | RunStatusUnspecifiedApiVersion { + switch (apiVersion) { + case CURRENT_API_VERSION: { + return this.apiStatusFromRunStatusV2(status); + } + default: { + return this.apiStatusFromRunStatusV1(status); + } + } + } + + static apiStatusFromRunStatusV1(status: TaskRunStatus): RunStatusUnspecifiedApiVersion { switch (status) { case "DELAYED": { return "DELAYED"; @@ -237,6 +251,7 @@ export class ApiRetrieveRunPresenter extends BasePresenter { case "RETRYING_AFTER_FAILURE": { return "REATTEMPTING"; } + case "DEQUEUED": case "EXECUTING": { return "EXECUTING"; } @@ -270,19 +285,76 @@ export class ApiRetrieveRunPresenter extends BasePresenter { } } - static apiBooleanHelpersFromTaskRunStatus(status: TaskRunStatus) { + static apiStatusFromRunStatusV2(status: TaskRunStatus): RunStatus { + switch (status) { + case "DELAYED": { + return "DELAYED"; + } + case "PENDING_VERSION": { + return "PENDING_VERSION"; + } + case "WAITING_FOR_DEPLOY": { + return "PENDING_VERSION"; + } + case "PENDING": { + return "QUEUED"; + } + case "PAUSED": + case "WAITING_TO_RESUME": { + return "WAITING"; + } + case "DEQUEUED": { + return "DEQUEUED"; + } + case "RETRYING_AFTER_FAILURE": + case "EXECUTING": { + return "EXECUTING"; + } + case "CANCELED": { + return "CANCELED"; + } + case "COMPLETED_SUCCESSFULLY": { + return "COMPLETED"; + } + case "SYSTEM_FAILURE": { + return "SYSTEM_FAILURE"; + } + case "CRASHED": { + return "CRASHED"; + } + case "INTERRUPTED": + case "COMPLETED_WITH_ERRORS": { + return "FAILED"; + } + case "EXPIRED": { + return "EXPIRED"; + } + case "TIMED_OUT": { + return "TIMED_OUT"; + } + default: { + assertNever(status); + } + } + } + + static apiBooleanHelpersFromTaskRunStatus(status: TaskRunStatus, apiVersion: API_VERSIONS) { return ApiRetrieveRunPresenter.apiBooleanHelpersFromRunStatus( - ApiRetrieveRunPresenter.apiStatusFromRunStatus(status) + ApiRetrieveRunPresenter.apiStatusFromRunStatus(status, apiVersion) ); } - static apiBooleanHelpersFromRunStatus(status: RunStatus) { + static apiBooleanHelpersFromRunStatus(status: RunStatus | RunStatusUnspecifiedApiVersion) { const isQueued = status === "QUEUED" || status === "WAITING_FOR_DEPLOY" || status === "DELAYED" || status === "PENDING_VERSION"; - const isExecuting = status === "EXECUTING" || status === "REATTEMPTING" || status === "FROZEN"; + const isExecuting = + status === "EXECUTING" || + status === "REATTEMPTING" || + status === "FROZEN" || + status === "DEQUEUED"; const isCompleted = status === "COMPLETED" || status === "CANCELED" || @@ -293,6 +365,7 @@ export class ApiRetrieveRunPresenter extends BasePresenter { const isFailed = isCompleted && status !== "COMPLETED"; const isSuccess = isCompleted && status === "COMPLETED"; const isCancelled = status === "CANCELED"; + const isWaiting = status === "WAITING"; return { isQueued, @@ -301,6 +374,7 @@ export class ApiRetrieveRunPresenter extends BasePresenter { isFailed, isSuccess, isCancelled, + isWaiting, }; } @@ -358,7 +432,7 @@ async function resolveSchedule(run: CommonRelatedRun) { }; } -async function createCommonRunStructure(run: CommonRelatedRun) { +async function createCommonRunStructure(run: CommonRelatedRun, apiVersion: API_VERSIONS) { const metadata = await parsePacket({ data: run.metadata ?? undefined, dataType: run.metadataType, @@ -369,7 +443,7 @@ async function createCommonRunStructure(run: CommonRelatedRun) { taskIdentifier: run.taskIdentifier, idempotencyKey: run.idempotencyKey ?? undefined, version: run.lockedToVersion?.version, - status: ApiRetrieveRunPresenter.apiStatusFromRunStatus(run.status), + status: ApiRetrieveRunPresenter.apiStatusFromRunStatus(run.status, apiVersion), createdAt: run.createdAt, startedAt: run.startedAt ?? undefined, updatedAt: run.updatedAt, @@ -385,7 +459,7 @@ async function createCommonRunStructure(run: CommonRelatedRun) { tags: run.tags .map((t: { name: string }) => t.name) .sort((a: string, b: string) => a.localeCompare(b)), - ...ApiRetrieveRunPresenter.apiBooleanHelpersFromTaskRunStatus(run.status), + ...ApiRetrieveRunPresenter.apiBooleanHelpersFromTaskRunStatus(run.status, apiVersion), triggerFunction: resolveTriggerFunction(run), batchId: run.batch?.friendlyId, metadata, diff --git a/apps/webapp/app/presenters/v3/ApiRunListPresenter.server.ts b/apps/webapp/app/presenters/v3/ApiRunListPresenter.server.ts index 46e8a3704c..b541f75a47 100644 --- a/apps/webapp/app/presenters/v3/ApiRunListPresenter.server.ts +++ b/apps/webapp/app/presenters/v3/ApiRunListPresenter.server.ts @@ -1,12 +1,8 @@ -import { - type ListRunResponse, - type ListRunResponseItem, - parsePacket, - RunStatus, -} from "@trigger.dev/core/v3"; +import { parsePacket, RunStatus } from "@trigger.dev/core/v3"; import { type Project, type RuntimeEnvironment, type TaskRunStatus } from "@trigger.dev/database"; import assertNever from "assert-never"; import { z } from "zod"; +import { API_VERSIONS, RunStatusUnspecifiedApiVersion } from "~/api/versions"; import { clickhouseClient } from "~/services/clickhouseInstance.server"; import { logger } from "~/services/logger.server"; import { CoercedDate } from "~/utils/zod"; @@ -28,7 +24,9 @@ export const ApiRunListSearchParams = z.object({ } const statuses = value.split(","); - const parsedStatuses = statuses.map((status) => RunStatus.safeParse(status)); + const parsedStatuses = statuses.map((status) => + RunStatus.or(RunStatusUnspecifiedApiVersion).safeParse(status) + ); if (parsedStatuses.some((result) => !result.success)) { const invalidStatuses: string[] = []; @@ -114,8 +112,9 @@ export class ApiRunListPresenter extends BasePresenter { public async call( project: Project, searchParams: ApiRunListSearchParams, + apiVersion: API_VERSIONS, environment?: RuntimeEnvironment - ): Promise { + ) { return this.trace("call", async (span) => { const options: RunListOptions = { projectId: project.id, @@ -145,7 +144,7 @@ export class ApiRunListPresenter extends BasePresenter { organizationId = environment.organizationId; } else { if (searchParams["filter[env]"]) { - const environments = await this._prisma.runtimeEnvironment.findMany({ + const environments = await this._replica.runtimeEnvironment.findMany({ where: { projectId: project.id, slug: { @@ -213,7 +212,7 @@ export class ApiRunListPresenter extends BasePresenter { options.batchId = searchParams["filter[batch]"]; } - const presenter = new NextRunListPresenter(this._prisma, clickhouseClient); + const presenter = new NextRunListPresenter(this._replica, clickhouseClient); logger.debug("Calling RunListPresenter", { options }); @@ -221,7 +220,7 @@ export class ApiRunListPresenter extends BasePresenter { logger.debug("RunListPresenter results", { runs: results.runs.length }); - const data: ListRunResponseItem[] = await Promise.all( + const data = await Promise.all( results.runs.map(async (run) => { const metadata = await parsePacket( { @@ -235,7 +234,7 @@ export class ApiRunListPresenter extends BasePresenter { return { id: run.friendlyId, - status: ApiRetrieveRunPresenter.apiStatusFromRunStatus(run.status), + status: ApiRetrieveRunPresenter.apiStatusFromRunStatus(run.status, apiVersion), taskIdentifier: run.taskIdentifier, idempotencyKey: run.idempotencyKey, version: run.version ?? undefined, @@ -259,7 +258,7 @@ export class ApiRunListPresenter extends BasePresenter { depth: run.depth, metadata, ...ApiRetrieveRunPresenter.apiBooleanHelpersFromRunStatus( - ApiRetrieveRunPresenter.apiStatusFromRunStatus(run.status) + ApiRetrieveRunPresenter.apiStatusFromRunStatus(run.status, apiVersion) ), }; }) @@ -275,7 +274,9 @@ export class ApiRunListPresenter extends BasePresenter { }); } - static apiStatusToRunStatuses(status: RunStatus): TaskRunStatus[] | TaskRunStatus { + static apiStatusToRunStatuses( + status: RunStatus | RunStatusUnspecifiedApiVersion + ): TaskRunStatus[] | TaskRunStatus { switch (status) { case "DELAYED": return "DELAYED"; @@ -321,6 +322,12 @@ export class ApiRunListPresenter extends BasePresenter { case "TIMED_OUT": { return "TIMED_OUT"; } + case "DEQUEUED": { + return "DEQUEUED"; + } + case "WAITING": { + return "WAITING_TO_RESUME"; + } default: { assertNever(status); } diff --git a/apps/webapp/app/presenters/v3/QueueListPresenter.server.ts b/apps/webapp/app/presenters/v3/QueueListPresenter.server.ts index 0e13fc6167..8dc50e96e3 100644 --- a/apps/webapp/app/presenters/v3/QueueListPresenter.server.ts +++ b/apps/webapp/app/presenters/v3/QueueListPresenter.server.ts @@ -116,7 +116,6 @@ export class QueueListPresenter extends BasePresenter { concurrencyLimit: true, type: true, paused: true, - releaseConcurrencyOnWaitpoint: true, }, orderBy: { orderableName: "asc", @@ -146,7 +145,6 @@ export class QueueListPresenter extends BasePresenter { queued: results[0][queue.name] ?? 0, concurrencyLimit: queue.concurrencyLimit ?? null, paused: queue.paused, - releaseConcurrencyOnWaitpoint: queue.releaseConcurrencyOnWaitpoint, }) ); } diff --git a/apps/webapp/app/presenters/v3/QueueRetrievePresenter.server.ts b/apps/webapp/app/presenters/v3/QueueRetrievePresenter.server.ts index 04cc26a5ad..409c14d545 100644 --- a/apps/webapp/app/presenters/v3/QueueRetrievePresenter.server.ts +++ b/apps/webapp/app/presenters/v3/QueueRetrievePresenter.server.ts @@ -76,7 +76,6 @@ export class QueueRetrievePresenter extends BasePresenter { queued: results[0]?.[queue.name] ?? 0, concurrencyLimit: queue.concurrencyLimit ?? null, paused: queue.paused, - releaseConcurrencyOnWaitpoint: queue.releaseConcurrencyOnWaitpoint, }), }; } @@ -106,8 +105,7 @@ export function toQueueItem(data: { queued: number; concurrencyLimit: number | null; paused: boolean; - releaseConcurrencyOnWaitpoint: boolean; -}): QueueItem { +}): QueueItem & { releaseConcurrencyOnWaitpoint: boolean } { return { id: data.friendlyId, //remove the task/ prefix if it exists @@ -117,6 +115,7 @@ export function toQueueItem(data: { queued: data.queued, concurrencyLimit: data.concurrencyLimit, paused: data.paused, - releaseConcurrencyOnWaitpoint: data.releaseConcurrencyOnWaitpoint, + // TODO: This needs to be removed but keeping this here for now to avoid breaking existing clients + releaseConcurrencyOnWaitpoint: true, }; } diff --git a/apps/webapp/app/routes/_app.orgs.$organizationSlug.projects.$projectParam.env.$envParam.queues/route.tsx b/apps/webapp/app/routes/_app.orgs.$organizationSlug.projects.$projectParam.env.$envParam.queues/route.tsx index 8ab4b24ba4..de6dea2711 100644 --- a/apps/webapp/app/routes/_app.orgs.$organizationSlug.projects.$projectParam.env.$envParam.queues/route.tsx +++ b/apps/webapp/app/routes/_app.orgs.$organizationSlug.projects.$projectParam.env.$envParam.queues/route.tsx @@ -360,32 +360,6 @@ export default function Page() { > Limited by - - - When a task executing on this queue is paused and waiting for a - waitpoint to complete, the queue will release the concurrency being used - by the run so other runs can be started. - - - Read docs - - - } - > - Release on waitpoint - Pause/resume @@ -450,16 +424,12 @@ export default function Page() { alignment="right" className={cn( queue.paused ? "tabular-nums opacity-50" : undefined, + queue.running > 0 && "text-text-bright", isAtLimit && "text-warning" )} > {queue.running}/ - + {limit} @@ -472,12 +442,6 @@ export default function Page() { > {queue.concurrencyLimit ? "User" : "Environment"} - - {queue.releaseConcurrencyOnWaitpoint ? "Yes" : "No"} - { + async ({ searchParams, params, authentication, apiVersion }) => { const project = await findProjectByRef(params.projectRef, authentication.userId); if (!project) { @@ -25,7 +25,7 @@ export const loader = createLoaderPATApiRoute( } const presenter = new ApiRunListPresenter(); - const result = await presenter.call(project, searchParams); + const result = await presenter.call(project, searchParams, apiVersion); if (!result) { return json({ data: [] }); diff --git a/apps/webapp/app/routes/api.v1.runs.$runParam.reschedule.ts b/apps/webapp/app/routes/api.v1.runs.$runParam.reschedule.ts index 8a96f731ba..f4e08831f4 100644 --- a/apps/webapp/app/routes/api.v1.runs.$runParam.reschedule.ts +++ b/apps/webapp/app/routes/api.v1.runs.$runParam.reschedule.ts @@ -2,6 +2,7 @@ import type { ActionFunctionArgs } from "@remix-run/server-runtime"; import { json } from "@remix-run/server-runtime"; import { RescheduleRunRequestBody } from "@trigger.dev/core/v3/schemas"; import { z } from "zod"; +import { getApiVersion } from "~/api/versions"; import { prisma } from "~/db.server"; import { ApiRetrieveRunPresenter } from "~/presenters/v3/ApiRetrieveRunPresenter.server"; import { authenticateApiRequest } from "~/services/apiAuth.server"; @@ -70,7 +71,9 @@ export async function action({ request, params }: ActionFunctionArgs) { return json({ error: "Run not found" }, { status: 404 }); } - const presenter = new ApiRetrieveRunPresenter(); + const apiVersion = getApiVersion(request); + + const presenter = new ApiRetrieveRunPresenter(apiVersion); const result = await presenter.call(run, authenticationResult.environment); if (!result) { diff --git a/apps/webapp/app/routes/api.v1.runs.ts b/apps/webapp/app/routes/api.v1.runs.ts index 2fd5348f78..17a664f6ef 100644 --- a/apps/webapp/app/routes/api.v1.runs.ts +++ b/apps/webapp/app/routes/api.v1.runs.ts @@ -17,11 +17,12 @@ export const loader = createLoaderApiRoute( }, findResource: async () => 1, // This is a dummy function, we don't need to find a resource }, - async ({ searchParams, authentication }) => { + async ({ searchParams, authentication, apiVersion }) => { const presenter = new ApiRunListPresenter(); const result = await presenter.call( authentication.environment.project, searchParams, + apiVersion, authentication.environment ); diff --git a/apps/webapp/app/routes/api.v3.runs.$runId.ts b/apps/webapp/app/routes/api.v3.runs.$runId.ts index f144f8effd..de40a9a912 100644 --- a/apps/webapp/app/routes/api.v3.runs.$runId.ts +++ b/apps/webapp/app/routes/api.v3.runs.$runId.ts @@ -27,8 +27,8 @@ export const loader = createLoaderApiRoute( superScopes: ["read:runs", "read:all", "admin"], }, }, - async ({ authentication, resource }) => { - const presenter = new ApiRetrieveRunPresenter(); + async ({ authentication, resource, apiVersion }) => { + const presenter = new ApiRetrieveRunPresenter(apiVersion); const result = await presenter.call(resource, authentication.environment); if (!result) { diff --git a/apps/webapp/app/routes/engine.v1.runs.$runFriendlyId.wait.duration.ts b/apps/webapp/app/routes/engine.v1.runs.$runFriendlyId.wait.duration.ts index 24aa181404..199244b1da 100644 --- a/apps/webapp/app/routes/engine.v1.runs.$runFriendlyId.wait.duration.ts +++ b/apps/webapp/app/routes/engine.v1.runs.$runFriendlyId.wait.duration.ts @@ -50,7 +50,6 @@ const { action } = createActionApiRoute( waitpoints: waitpoint.id, projectId: authentication.environment.project.id, organizationId: authentication.environment.organization.id, - releaseConcurrency: body.releaseConcurrency, }); return json({ diff --git a/apps/webapp/app/routes/engine.v1.runs.$runFriendlyId.waitpoints.tokens.$waitpointFriendlyId.wait.ts b/apps/webapp/app/routes/engine.v1.runs.$runFriendlyId.waitpoints.tokens.$waitpointFriendlyId.wait.ts index 1b33ae0807..b190f3de95 100644 --- a/apps/webapp/app/routes/engine.v1.runs.$runFriendlyId.waitpoints.tokens.$waitpointFriendlyId.wait.ts +++ b/apps/webapp/app/routes/engine.v1.runs.$runFriendlyId.waitpoints.tokens.$waitpointFriendlyId.wait.ts @@ -13,13 +13,10 @@ const { action } = createActionApiRoute( runFriendlyId: z.string(), waitpointFriendlyId: z.string(), }), - body: z.object({ - releaseConcurrency: z.boolean().optional(), - }), maxContentLength: 1024 * 10, // 10KB method: "POST", }, - async ({ authentication, body, params }) => { + async ({ authentication, params }) => { // Resume tokens are actually just waitpoints const waitpointId = WaitpointId.toId(params.waitpointFriendlyId); const runId = RunId.toId(params.runFriendlyId); @@ -42,7 +39,6 @@ const { action } = createActionApiRoute( waitpoints: [waitpointId], projectId: authentication.environment.project.id, organizationId: authentication.environment.organization.id, - releaseConcurrency: body.releaseConcurrency, }); return json( diff --git a/apps/webapp/app/routes/realtime.v1.batches.$batchId.ts b/apps/webapp/app/routes/realtime.v1.batches.$batchId.ts index 8cbc4d2b7e..17a759e6ca 100644 --- a/apps/webapp/app/routes/realtime.v1.batches.$batchId.ts +++ b/apps/webapp/app/routes/realtime.v1.batches.$batchId.ts @@ -26,11 +26,12 @@ export const loader = createLoaderApiRoute( superScopes: ["read:runs", "read:all", "admin"], }, }, - async ({ authentication, request, resource: batchRun }) => { + async ({ authentication, request, resource: batchRun, apiVersion }) => { return realtimeClient.streamBatch( request.url, authentication.environment, batchRun.id, + apiVersion, authentication.realtime, request.headers.get("x-trigger-electric-version") ?? undefined ); diff --git a/apps/webapp/app/routes/realtime.v1.runs.$runId.ts b/apps/webapp/app/routes/realtime.v1.runs.$runId.ts index 935b4b5b05..35a34b01b4 100644 --- a/apps/webapp/app/routes/realtime.v1.runs.$runId.ts +++ b/apps/webapp/app/routes/realtime.v1.runs.$runId.ts @@ -39,11 +39,12 @@ export const loader = createLoaderApiRoute( superScopes: ["read:runs", "read:all", "admin"], }, }, - async ({ authentication, request, resource: run }) => { + async ({ authentication, request, resource: run, apiVersion }) => { return realtimeClient.streamRun( request.url, authentication.environment, run.id, + apiVersion, authentication.realtime, request.headers.get("x-trigger-electric-version") ?? undefined ); diff --git a/apps/webapp/app/routes/realtime.v1.runs.ts b/apps/webapp/app/routes/realtime.v1.runs.ts index 51fb48f8dd..1819265ee7 100644 --- a/apps/webapp/app/routes/realtime.v1.runs.ts +++ b/apps/webapp/app/routes/realtime.v1.runs.ts @@ -24,11 +24,12 @@ export const loader = createLoaderApiRoute( superScopes: ["read:runs", "read:all", "admin"], }, }, - async ({ searchParams, authentication, request }) => { + async ({ searchParams, authentication, request, apiVersion }) => { return realtimeClient.streamRuns( request.url, authentication.environment, searchParams, + apiVersion, authentication.realtime, request.headers.get("x-trigger-electric-version") ?? undefined ); diff --git a/apps/webapp/app/routes/realtime.v2.streams.$runId.$streamId.ts b/apps/webapp/app/routes/realtime.v2.streams.$runId.$streamId.ts deleted file mode 100644 index 9f22701a78..0000000000 --- a/apps/webapp/app/routes/realtime.v2.streams.$runId.$streamId.ts +++ /dev/null @@ -1,87 +0,0 @@ -import { z } from "zod"; -import { $replica } from "~/db.server"; -import { - createActionApiRoute, - createLoaderApiRoute, -} from "~/services/routeBuilders/apiBuilder.server"; -import { v2RealtimeStreams } from "~/services/realtime/v2StreamsGlobal.server"; - -const ParamsSchema = z.object({ - runId: z.string(), - streamId: z.string(), -}); - -const { action } = createActionApiRoute( - { - params: ParamsSchema, - }, - async ({ request, params, authentication }) => { - if (!request.body) { - return new Response("No body provided", { status: 400 }); - } - - const run = await $replica.taskRun.findFirst({ - where: { - friendlyId: params.runId, - runtimeEnvironmentId: authentication.environment.id, - }, - include: { - batch: { - select: { - friendlyId: true, - }, - }, - }, - }); - - if (!run) { - return new Response("Run not found", { status: 404 }); - } - - return v2RealtimeStreams.ingestData(request.body, run.id, params.streamId); - } -); - -export { action }; - -export const loader = createLoaderApiRoute( - { - params: ParamsSchema, - allowJWT: true, - corsStrategy: "all", - findResource: async (params, auth) => { - return $replica.taskRun.findFirst({ - where: { - friendlyId: params.runId, - runtimeEnvironmentId: auth.environment.id, - }, - include: { - batch: { - select: { - friendlyId: true, - }, - }, - }, - }); - }, - authorization: { - action: "read", - resource: (run) => ({ - runs: run.friendlyId, - tags: run.runTags, - batch: run.batch?.friendlyId, - tasks: run.taskIdentifier, - }), - superScopes: ["read:runs", "read:all", "admin"], - }, - }, - async ({ params, request, resource: run, authentication }) => { - return v2RealtimeStreams.streamResponse( - request, - run.id, - params.streamId, - authentication.environment, - request.signal - ); - } -); diff --git a/apps/webapp/app/routes/resources.taskruns.$runParam.debug.ts b/apps/webapp/app/routes/resources.taskruns.$runParam.debug.ts index c900ca568e..ba48f7085a 100644 --- a/apps/webapp/app/routes/resources.taskruns.$runParam.debug.ts +++ b/apps/webapp/app/routes/resources.taskruns.$runParam.debug.ts @@ -31,6 +31,7 @@ export async function loader({ request, params }: LoaderFunctionArgs) { organizationId: true, project: true, maximumConcurrencyLimit: true, + concurrencyLimitBurstFactor: true, organization: { select: { id: true, @@ -101,7 +102,7 @@ export async function loader({ request, params }: LoaderFunctionArgs) { run.runtimeEnvironment ); - const queueCurrentConcurrencyKey = engine.runQueue.keys.currentConcurrencyKey( + const queueCurrentConcurrencyKey = engine.runQueue.keys.queueCurrentConcurrencyKey( run.runtimeEnvironment, run.queue, run.concurrencyKey ?? undefined @@ -120,10 +121,6 @@ export async function loader({ request, params }: LoaderFunctionArgs) { run.runtimeEnvironment ); - const releaseConcurrencyBucketKey = `engine:release-concurrency:org:${run.runtimeEnvironment.organizationId}:proj:${run.runtimeEnvironment.project.id}:env:${run.runtimeEnvironment.id}:bucket`; - const releaseConcurrencyQueueKey = `engine:release-concurrency:org:${run.runtimeEnvironment.organizationId}:proj:${run.runtimeEnvironment.project.id}:env:${run.runtimeEnvironment.id}:queue`; - const releaseConcurrencyMetadataKey = `engine:release-concurrency:org:${run.runtimeEnvironment.organizationId}:proj:${run.runtimeEnvironment.project.id}:env:${run.runtimeEnvironment.id}:metadata`; - const withPrefix = (key: string) => `engine:runqueue:${key}`; const keys = [ @@ -143,22 +140,6 @@ export async function loader({ request, params }: LoaderFunctionArgs) { label: "Env concurrency limit", key: withPrefix(envConcurrencyLimitKey), }, - { - label: "Release concurrency bucket", - key: releaseConcurrencyBucketKey, - }, - { - label: "Release concurrency queue", - key: releaseConcurrencyQueueKey, - }, - { - label: "Release concurrency metadata", - key: releaseConcurrencyMetadataKey, - }, - { - label: "Release concurrency releasings", - key: "engine:release-concurrency:releasings", - }, ]; return typedjson({ diff --git a/apps/webapp/app/runEngine/concerns/idempotencyKeys.server.ts b/apps/webapp/app/runEngine/concerns/idempotencyKeys.server.ts index 8847b5e4e2..689e7a5736 100644 --- a/apps/webapp/app/runEngine/concerns/idempotencyKeys.server.ts +++ b/apps/webapp/app/runEngine/concerns/idempotencyKeys.server.ts @@ -81,7 +81,6 @@ export class IdempotencyKeyConcern { projectId: request.environment.projectId, organizationId: request.environment.organizationId, tx: this.prisma, - releaseConcurrency: request.body.options?.releaseConcurrency, }); } ); diff --git a/apps/webapp/app/runEngine/concerns/runChainStates.server.ts b/apps/webapp/app/runEngine/concerns/runChainStates.server.ts deleted file mode 100644 index b0e9bf2064..0000000000 --- a/apps/webapp/app/runEngine/concerns/runChainStates.server.ts +++ /dev/null @@ -1,276 +0,0 @@ -import { PrismaClientOrTransaction, TaskRun } from "@trigger.dev/database"; -import { RunChainStateManager, TriggerTaskRequest } from "../types"; -import { RunChainState } from "@trigger.dev/core/v3/schemas"; -import { logger } from "~/services/logger.server"; -import { EngineServiceValidationError } from "./errors"; - -export class DefaultRunChainStateManager implements RunChainStateManager { - private readonly prisma: PrismaClientOrTransaction; - private readonly isReleaseConcurrencyEnabled: boolean; - - constructor(prisma: PrismaClientOrTransaction, isReleaseConcurrencyEnabled: boolean) { - this.prisma = prisma; - this.isReleaseConcurrencyEnabled = isReleaseConcurrencyEnabled; - } - - async validateRunChain( - request: TriggerTaskRequest, - { - parentRun, - queueName, - lockedQueueId, - }: { parentRun?: TaskRun; queueName: string; lockedQueueId?: string } - ): Promise { - // if there is no parent run, the chain resets - if (!parentRun) { - return {}; - } - - const parsedParentRunChainState = RunChainState.safeParse(parentRun.runChainState ?? {}); - - if (!parsedParentRunChainState.success) { - logger.error("Invalid run chain state for parent run", { - runId: parentRun.id, - runState: parentRun.runChainState, - error: parsedParentRunChainState.error, - }); - - return {}; - } - - const parentRunChainState = parsedParentRunChainState.data; - - if ( - typeof request.body.options?.resumeParentOnCompletion === "boolean" && - !request.body.options.resumeParentOnCompletion - ) { - return parentRunChainState; - } - - // Now we need to check if the parent run will hold concurrency, or if it will release it - // if it will hold concurrency, we need to account for the parent run's concurrency - // Then, along with the new run's concurrency, - // we need to determine if the new run will ever be able to run, or are we in a deadlock situation - // We need to check the concurrency limit against the concurrency limit of the environment, and the queue of the new run - // We'll also need the queue of the parent run, to determine if the parent run will release and which queue to add to - // Since the parent run is already running, it will definitely have a locked queue associated with it - const { concurrency } = parentRunChainState; - - const parentLockedQueueId = parentRun.lockedQueueId; - - if (!parentLockedQueueId) { - logger.error("Parent run has no locked queue, cannot determine run chain state", { - runId: parentRun.id, - runState: parentRun.runChainState, - }); - - return {}; - } - - const parentQueueState = await this.#getParentQueueState( - parentRunChainState, - parentLockedQueueId - ); - - // We first need to check if the release concurrency system is enabled, - // If it is not enabled, then we can assume the parent run will hold the concurrency, - // for the env and the queue - // If it is enabled, we never hold the concurrency for the env, just for the queue - if (!this.isReleaseConcurrencyEnabled) { - parentQueueState.holding += 1; - - const newRunChainState = { - ...parentRunChainState, - concurrency: { - queues: [ - ...(concurrency?.queues ?? []).filter((queue) => queue.id !== parentLockedQueueId), - parentQueueState, - ], - environment: (concurrency?.environment ?? 0) + 1, - }, - }; - - return await this.#validateNewRunChainState(request, newRunChainState, { - parentRun, - queueName, - lockedQueueId, - }); - } - - // Now we need to determine if the parent run will release the concurrency - // if it does, we will add to the holding count for the queue - const willReleaseConcurrency = await this.#determineIfParentRunWillReleaseConcurrency( - request, - parentLockedQueueId - ); - - if (!willReleaseConcurrency) { - parentQueueState.holding += 1; - } - - const newRunChainState = { - ...parentRunChainState, - concurrency: { - queues: [ - ...(concurrency?.queues ?? []).filter((queue) => queue.id !== parentLockedQueueId), - parentQueueState, - ], - environment: concurrency?.environment ?? 0, - }, - }; - - return await this.#validateNewRunChainState(request, newRunChainState, { - parentRun, - queueName, - lockedQueueId, - }); - } - - // Performs the deadlock detection logic once the new run chain state is determined - // Needs to account for everything held, plus the new run's concurrency - async #validateNewRunChainState( - request: TriggerTaskRequest, - runChainState: RunChainState, - { - parentRun, - queueName, - lockedQueueId, - }: { parentRun?: TaskRun; queueName: string; lockedQueueId?: string } - ) { - logger.debug("Validating new run chain state", { - runChainState, - }); - - const environmentConcurrency = (runChainState.concurrency?.environment ?? 0) + 1; - - if ( - request.environment.maximumConcurrencyLimit > 0 && - environmentConcurrency > request.environment.maximumConcurrencyLimit - ) { - const environmentDetails = `The environment has a concurrency limit of ${request.environment.maximumConcurrencyLimit}, and the chain would require ${environmentConcurrency}`; - throw new EngineServiceValidationError(this.createDeadlockErrorMessage(environmentDetails)); - } - - if (!lockedQueueId) { - return runChainState; - } - - const queueConcurrencyState = runChainState.concurrency?.queues.find( - (queue) => queue.id === lockedQueueId - ); - - if (!queueConcurrencyState) { - return runChainState; - } - - const queueConcurrency = queueConcurrencyState.holding + 1; - - const queue = await this.prisma.taskQueue.findFirst({ - where: { - id: lockedQueueId, - }, - select: { - concurrencyLimit: true, - }, - }); - - if (!queue) { - return runChainState; - } - - const queueConcurrencyLimit = queue.concurrencyLimit; - - if ( - typeof queueConcurrencyLimit === "number" && - queueConcurrencyLimit !== 0 && - queueConcurrency > queueConcurrencyLimit - ) { - const queueDetails = `The queue '${queueName}' has a concurrency limit of ${queueConcurrencyLimit}, and the chain would require ${queueConcurrency}`; - throw new EngineServiceValidationError(this.createDeadlockErrorMessage(queueDetails)); - } - - return runChainState; - } - - async #determineIfParentRunWillReleaseConcurrency( - request: TriggerTaskRequest, - parentLockedQueueId: string - ) { - if (typeof request.body.options?.releaseConcurrency === "boolean") { - return request.body.options.releaseConcurrency; - } - - const parentQueue = await this.prisma.taskQueue.findFirst({ - where: { - id: parentLockedQueueId, - }, - select: { - releaseConcurrencyOnWaitpoint: true, - concurrencyLimit: true, - }, - }); - - logger.debug("Determining if parent run will release concurrency", { - parentQueue, - }); - - if ( - typeof parentQueue?.concurrencyLimit === "undefined" || - parentQueue.concurrencyLimit === null - ) { - return true; - } - - if (typeof parentQueue?.releaseConcurrencyOnWaitpoint === "boolean") { - return parentQueue.releaseConcurrencyOnWaitpoint; - } - - return false; - } - - async #getParentQueueState(runChainState: RunChainState, parentLockedQueueId: string) { - const newQueueState = runChainState.concurrency?.queues.find( - (queue) => queue.id === parentLockedQueueId - ); - - if (newQueueState) { - return newQueueState; - } - - const parentQueue = await this.prisma.taskQueue.findFirst({ - where: { - id: parentLockedQueueId, - }, - }); - - if (!parentQueue) { - throw new Error("Deadlock detection failed, parent queue not found"); - } - - return { - id: parentQueue.id, - name: parentQueue.name, - holding: 0, - }; - } - - private createDeadlockErrorMessage(details: string) { - return `Deadlock detected: This task run cannot be triggered because it would create a concurrency deadlock. - -A deadlock occurs when a chain of task runs (parent -> child) would collectively hold more concurrency than is available, making it impossible for the child run to ever execute. - -Current situation: -${details} - -This usually happens when: -1. A parent task triggers a child task using triggerAndWait() -2. Both tasks use the same queue -3. The parent task doesn't release its concurrency while waiting (releaseConcurrency: false) - -To fix this, you can: -1. Enable releaseConcurrencyOnWaitpoint on the queue -2. Use a different queue for the child task -3. Increase the concurrency limits -4. Use trigger() instead of triggerAndWait() if you don't need to wait`; - } -} diff --git a/apps/webapp/app/runEngine/services/triggerTask.server.ts b/apps/webapp/app/runEngine/services/triggerTask.server.ts index caa9aafc40..7809f4ef4d 100644 --- a/apps/webapp/app/runEngine/services/triggerTask.server.ts +++ b/apps/webapp/app/runEngine/services/triggerTask.server.ts @@ -30,7 +30,6 @@ import { IdempotencyKeyConcern } from "../concerns/idempotencyKeys.server"; import type { PayloadProcessor, QueueManager, - RunChainStateManager, RunNumberIncrementer, TraceEventConcern, TriggerTaskRequest, @@ -47,7 +46,6 @@ export class RunEngineTriggerTaskService { private readonly engine: RunEngine; private readonly tracer: Tracer; private readonly traceEventConcern: TraceEventConcern; - private readonly runChainStateManager: RunChainStateManager; constructor(opts: { prisma: PrismaClientOrTransaction; @@ -58,7 +56,6 @@ export class RunEngineTriggerTaskService { idempotencyKeyConcern: IdempotencyKeyConcern; runNumberIncrementer: RunNumberIncrementer; traceEventConcern: TraceEventConcern; - runChainStateManager: RunChainStateManager; tracer: Tracer; }) { this.prisma = opts.prisma; @@ -70,7 +67,6 @@ export class RunEngineTriggerTaskService { this.runNumberIncrementer = opts.runNumberIncrementer; this.tracer = opts.tracer; this.traceEventConcern = opts.traceEventConcern; - this.runChainStateManager = opts.runChainStateManager; } public async call({ @@ -228,12 +224,6 @@ export class RunEngineTriggerTaskService { const depth = parentRun ? parentRun.depth + 1 : 0; - const runChainState = await this.runChainStateManager.validateRunChain(triggerRequest, { - parentRun: parentRun ?? undefined, - queueName, - lockedQueueId, - }); - const workerQueue = await this.queueConcern.getWorkerQueue(environment); try { @@ -300,13 +290,11 @@ export class RunEngineTriggerTaskService { : undefined, machine: body.options?.machine, priorityMs: body.options?.priority ? body.options.priority * 1_000 : undefined, - releaseConcurrency: body.options?.releaseConcurrency, queueTimestamp: options.queueTimestamp ?? (parentRun && body.options?.resumeParentOnCompletion ? parentRun.queueTimestamp ?? undefined : undefined), - runChainState, scheduleId: options.scheduleId, scheduleInstanceId: options.scheduleInstanceId, createdAt: options.overrideCreatedAt, diff --git a/apps/webapp/app/runEngine/types.ts b/apps/webapp/app/runEngine/types.ts index 524d2ac714..9523999d54 100644 --- a/apps/webapp/app/runEngine/types.ts +++ b/apps/webapp/app/runEngine/types.ts @@ -144,10 +144,3 @@ export interface TraceEventConcern { callback: (span: TracedEventSpan) => Promise ): Promise; } - -export interface RunChainStateManager { - validateRunChain( - request: TriggerTaskRequest, - options: { parentRun?: TaskRun; queueName: string; lockedQueueId?: string } - ): Promise; -} diff --git a/apps/webapp/app/services/realtime/databaseRealtimeStreams.server.ts b/apps/webapp/app/services/realtime/databaseRealtimeStreams.server.ts deleted file mode 100644 index 9e1eed221e..0000000000 --- a/apps/webapp/app/services/realtime/databaseRealtimeStreams.server.ts +++ /dev/null @@ -1,74 +0,0 @@ -import { PrismaClient } from "@trigger.dev/database"; -import { AuthenticatedEnvironment } from "../apiAuth.server"; -import { logger } from "../logger.server"; -import { RealtimeClient } from "../realtimeClient.server"; -import { StreamIngestor, StreamResponder } from "./types"; - -export type DatabaseRealtimeStreamsOptions = { - prisma: PrismaClient; - realtimeClient: RealtimeClient; -}; - -// Class implementing both interfaces -export class DatabaseRealtimeStreams implements StreamIngestor, StreamResponder { - constructor(private options: DatabaseRealtimeStreamsOptions) {} - - async streamResponse( - request: Request, - runId: string, - streamId: string, - environment: AuthenticatedEnvironment, - signal: AbortSignal - ): Promise { - return this.options.realtimeClient.streamChunks( - request.url, - environment, - runId, - streamId, - signal, - request.headers.get("x-trigger-electric-version") ?? undefined - ); - } - - async ingestData( - stream: ReadableStream, - runId: string, - streamId: string - ): Promise { - try { - const textStream = stream.pipeThrough(new TextDecoderStream()); - - const reader = textStream.getReader(); - let sequence = 0; - - while (true) { - const { done, value } = await reader.read(); - - if (done || !value) { - break; - } - - logger.debug("[DatabaseRealtimeStreams][ingestData] Reading data", { - streamId, - runId, - value, - }); - - await this.options.prisma.realtimeStreamChunk.create({ - data: { - runId, - key: streamId, - sequence: sequence++, - value, - }, - }); - } - - return new Response(null, { status: 200 }); - } catch (error) { - logger.error("[DatabaseRealtimeStreams][ingestData] Error in ingestData:", { error }); - - return new Response(null, { status: 500 }); - } - } -} diff --git a/apps/webapp/app/services/realtime/v2StreamsGlobal.server.ts b/apps/webapp/app/services/realtime/v2StreamsGlobal.server.ts deleted file mode 100644 index a086850ee7..0000000000 --- a/apps/webapp/app/services/realtime/v2StreamsGlobal.server.ts +++ /dev/null @@ -1,13 +0,0 @@ -import { prisma } from "~/db.server"; -import { singleton } from "~/utils/singleton"; -import { realtimeClient } from "../realtimeClientGlobal.server"; -import { DatabaseRealtimeStreams } from "./databaseRealtimeStreams.server"; - -function initializeDatabaseRealtimeStreams() { - return new DatabaseRealtimeStreams({ - prisma, - realtimeClient, - }); -} - -export const v2RealtimeStreams = singleton("dbRealtimeStreams", initializeDatabaseRealtimeStreams); diff --git a/apps/webapp/app/services/realtimeClient.server.ts b/apps/webapp/app/services/realtimeClient.server.ts index ab777e0277..68bccc9f6a 100644 --- a/apps/webapp/app/services/realtimeClient.server.ts +++ b/apps/webapp/app/services/realtimeClient.server.ts @@ -11,6 +11,7 @@ import { Cache, createCache, DefaultStatefulContext, Namespace } from "@unkey/ca import { MemoryStore } from "@unkey/cache/stores"; import { RedisCacheStore } from "./unkey/redisCacheStore.server"; import { env } from "~/env.server"; +import { API_VERSIONS, CURRENT_API_VERSION } from "~/api/versions"; export interface CachedLimitProvider { getCachedLimit: (organizationId: string, defaultValue: number) => Promise; @@ -107,37 +108,29 @@ export class RealtimeClient { this.cache = cache; } - async streamChunks( + async streamRun( url: URL | string, environment: RealtimeEnvironment, runId: string, - streamId: string, - signal?: AbortSignal, + apiVersion: API_VERSIONS, + requestOptions?: RealtimeRequestOptions, clientVersion?: string ) { - return this.#streamChunksWhere( + return this.#streamRunsWhere( url, environment, - `"runId"='${runId}' AND "key"='${streamId}'`, - signal, + `id='${runId}'`, + apiVersion, + requestOptions, clientVersion ); } - async streamRun( - url: URL | string, - environment: RealtimeEnvironment, - runId: string, - requestOptions?: RealtimeRequestOptions, - clientVersion?: string - ) { - return this.#streamRunsWhere(url, environment, `id='${runId}'`, requestOptions, clientVersion); - } - async streamBatch( url: URL | string, environment: RealtimeEnvironment, batchId: string, + apiVersion: API_VERSIONS, requestOptions?: RealtimeRequestOptions, clientVersion?: string ) { @@ -148,13 +141,21 @@ export class RealtimeClient { const whereClause = whereClauses.join(" AND "); - return this.#streamRunsWhere(url, environment, whereClause, requestOptions, clientVersion); + return this.#streamRunsWhere( + url, + environment, + whereClause, + apiVersion, + requestOptions, + clientVersion + ); } async streamRuns( url: URL | string, environment: RealtimeEnvironment, params: RealtimeRunsParams, + apiVersion: API_VERSIONS, requestOptions?: RealtimeRequestOptions, clientVersion?: string ) { @@ -176,6 +177,7 @@ export class RealtimeClient { url, environment, whereClause, + apiVersion, requestOptions, clientVersion ); @@ -269,6 +271,7 @@ export class RealtimeClient { url: URL | string, environment: RealtimeEnvironment, whereClause: string, + apiVersion: API_VERSIONS, requestOptions?: RealtimeRequestOptions, clientVersion?: string ) { @@ -280,7 +283,13 @@ export class RealtimeClient { clientVersion ); - return this.#performElectricRequest(electricUrl, environment, undefined, clientVersion); + return this.#performElectricRequest( + electricUrl, + environment, + apiVersion, + undefined, + clientVersion + ); } #constructRunsElectricUrl( @@ -334,43 +343,10 @@ export class RealtimeClient { return electricUrl; } - async #streamChunksWhere( - url: URL | string, - environment: RealtimeEnvironment, - whereClause: string, - signal?: AbortSignal, - clientVersion?: string - ) { - const electricUrl = this.#constructChunksElectricUrl(url, whereClause, clientVersion); - - return this.#performElectricRequest(electricUrl, environment, signal, clientVersion); - } - - #constructChunksElectricUrl(url: URL | string, whereClause: string, clientVersion?: string): URL { - const $url = new URL(url.toString()); - - const electricUrl = new URL(`${this.options.electricOrigin}/v1/shape`); - - // Copy over all the url search params to the electric url - $url.searchParams.forEach((value, key) => { - electricUrl.searchParams.set(key, value); - }); - - electricUrl.searchParams.set("where", whereClause); - electricUrl.searchParams.set("table", `public."RealtimeStreamChunk"`); - - if (!clientVersion) { - // If the client version is not provided, that means we're using an older client - // This means the client will be sending shape_id instead of handle - electricUrl.searchParams.set("handle", electricUrl.searchParams.get("shape_id") ?? ""); - } - - return electricUrl; - } - async #performElectricRequest( url: URL, environment: RealtimeEnvironment, + apiVersion: API_VERSIONS, signal?: AbortSignal, clientVersion?: string ) { @@ -386,13 +362,13 @@ export class RealtimeClient { if (!shapeId) { // If the shapeId is not present, we're just getting the initial value - return longPollingFetch(url.toString(), { signal }, rewriteResponseHeaders); + return this.#doLongPollingFetch(url, apiVersion, signal, rewriteResponseHeaders); } const isLive = isLiveRequestUrl(url); if (!isLive) { - return longPollingFetch(url.toString(), { signal }, rewriteResponseHeaders); + return this.#doLongPollingFetch(url, apiVersion, signal, rewriteResponseHeaders); } const requestId = randomUUID(); @@ -434,7 +410,12 @@ export class RealtimeClient { try { // ... (rest of your existing code for the long polling request) - const response = await longPollingFetch(url.toString(), { signal }, rewriteResponseHeaders); + const response = await this.#doLongPollingFetch( + url, + apiVersion, + signal, + rewriteResponseHeaders + ); // If this is the initial request, the response.headers['electric-handle'] will be the shapeId // And we may need to set the "createdAt" filter timestamp keyed by the shapeId @@ -452,6 +433,40 @@ export class RealtimeClient { } } + async #doLongPollingFetch( + url: URL, + apiVersion: API_VERSIONS, + signal?: AbortSignal, + rewriteResponseHeaders?: Record + ) { + if (apiVersion === CURRENT_API_VERSION) { + return longPollingFetch(url.toString(), { signal }, rewriteResponseHeaders); + } + + const response = await longPollingFetch(url.toString(), { signal }, rewriteResponseHeaders); + + return this.#rewriteResponseForNoneApiVersion(response); + } + + async #rewriteResponseForNoneApiVersion(response: Response) { + // Get the raw response body + const responseBody = await response.text(); + + // Rewrite the response body + const rewrittenResponseBody = this.#rewriteResponseBodyForNoneApiVersion(responseBody); + + // Return the rewritten response + return new Response(rewrittenResponseBody, { + status: response.status, + headers: response.headers, + }); + } + + // Rewrites "status":"DEQUEUED" to "status":"EXECUTING" + #rewriteResponseBodyForNoneApiVersion(responseBody: string) { + return responseBody.replace(/"status":"DEQUEUED"/g, '"status":"EXECUTING"'); + } + async #incrementAndCheck(environmentId: string, requestId: string, limit: number) { const key = this.#getKey(environmentId); const now = Date.now(); diff --git a/apps/webapp/app/services/routeBuilders/apiBuilder.server.ts b/apps/webapp/app/services/routeBuilders/apiBuilder.server.ts index fae78713db..9e161abe07 100644 --- a/apps/webapp/app/services/routeBuilders/apiBuilder.server.ts +++ b/apps/webapp/app/services/routeBuilders/apiBuilder.server.ts @@ -21,6 +21,7 @@ import { AuthenticatedWorkerInstance, WorkerGroupTokenService, } from "~/v3/services/worker/workerGroupTokenService.server"; +import { API_VERSIONS, getApiVersion } from "~/api/versions"; type AnyZodSchema = z.ZodFirstPartySchemaTypes | z.ZodDiscriminatedUnion; @@ -87,6 +88,7 @@ type ApiKeyHandlerFunction< authentication: ApiAuthenticationResultSuccess; request: Request; resource: NonNullable; + apiVersion: API_VERSIONS; }) => Promise; export function createLoaderApiRoute< @@ -237,6 +239,8 @@ export function createLoaderApiRoute< } } + const apiVersion = getApiVersion(request); + const result = await handler({ params: parsedParams, searchParams: parsedSearchParams, @@ -244,6 +248,7 @@ export function createLoaderApiRoute< authentication: authenticationResult, request, resource, + apiVersion, }); return await wrapResponse(request, result, corsStrategy !== "none"); } catch (error) { @@ -307,6 +312,7 @@ type PATHandlerFunction< : undefined; authentication: PersonalAccessTokenAuthenticationResult; request: Request; + apiVersion: API_VERSIONS; }) => Promise; export function createLoaderPATApiRoute< @@ -390,12 +396,15 @@ export function createLoaderPATApiRoute< parsedHeaders = headers.data; } + const apiVersion = getApiVersion(request); + const result = await handler({ params: parsedParams, searchParams: parsedSearchParams, headers: parsedHeaders, authentication: authenticationResult, request, + apiVersion, }); return await wrapResponse(request, result, corsStrategy !== "none"); } catch (error) { diff --git a/apps/webapp/app/v3/runEngine.server.ts b/apps/webapp/app/v3/runEngine.server.ts index 6c9a11c2a8..30f344e724 100644 --- a/apps/webapp/app/v3/runEngine.server.ts +++ b/apps/webapp/app/v3/runEngine.server.ts @@ -40,6 +40,7 @@ function createRunEngine() { }, queue: { defaultEnvConcurrency: env.DEFAULT_ENV_EXECUTION_CONCURRENCY_LIMIT, + defaultEnvConcurrencyBurstFactor: env.DEFAULT_ENV_EXECUTION_CONCURRENCY_BURST_FACTOR, logLevel: env.RUN_ENGINE_RUN_QUEUE_LOG_LEVEL, redis: { keyPrefix: "engine:", @@ -103,26 +104,6 @@ function createRunEngine() { EXECUTING_WITH_WAITPOINTS: env.RUN_ENGINE_TIMEOUT_EXECUTING_WITH_WAITPOINTS, SUSPENDED: env.RUN_ENGINE_TIMEOUT_SUSPENDED, }, - releaseConcurrency: { - disabled: env.RUN_ENGINE_RELEASE_CONCURRENCY_ENABLED === "0", - disableConsumers: env.RUN_ENGINE_RELEASE_CONCURRENCY_DISABLE_CONSUMERS === "1", - maxTokensRatio: env.RUN_ENGINE_RELEASE_CONCURRENCY_MAX_TOKENS_RATIO, - releasingsMaxAge: env.RUN_ENGINE_RELEASE_CONCURRENCY_RELEASINGS_MAX_AGE, - releasingsPollInterval: env.RUN_ENGINE_RELEASE_CONCURRENCY_RELEASINGS_POLL_INTERVAL, - maxRetries: env.RUN_ENGINE_RELEASE_CONCURRENCY_MAX_RETRIES, - consumersCount: env.RUN_ENGINE_RELEASE_CONCURRENCY_CONSUMERS_COUNT, - pollInterval: env.RUN_ENGINE_RELEASE_CONCURRENCY_POLL_INTERVAL, - batchSize: env.RUN_ENGINE_RELEASE_CONCURRENCY_BATCH_SIZE, - redis: { - keyPrefix: "engine:", - port: env.RUN_ENGINE_RUN_QUEUE_REDIS_PORT ?? undefined, - host: env.RUN_ENGINE_RUN_QUEUE_REDIS_HOST ?? undefined, - username: env.RUN_ENGINE_RUN_QUEUE_REDIS_USERNAME ?? undefined, - password: env.RUN_ENGINE_RUN_QUEUE_REDIS_PASSWORD ?? undefined, - enableAutoPipelining: true, - ...(env.RUN_ENGINE_RUN_QUEUE_REDIS_TLS_DISABLED === "true" ? {} : { tls: {} }), - }, - }, retryWarmStartThresholdMs: env.RUN_ENGINE_RETRY_WARM_START_THRESHOLD_MS, }); diff --git a/apps/webapp/app/v3/services/alerts/deliverAlert.server.ts b/apps/webapp/app/v3/services/alerts/deliverAlert.server.ts index 2d1abe8910..ecbf8ef1bc 100644 --- a/apps/webapp/app/v3/services/alerts/deliverAlert.server.ts +++ b/apps/webapp/app/v3/services/alerts/deliverAlert.server.ts @@ -12,6 +12,7 @@ import { type DeploymentSuccessWebhook, isOOMRunError, type RunFailedWebhook, + RunStatus, TaskRunError, } from "@trigger.dev/core/v3"; import { type ProjectAlertChannelType, type ProjectAlertType } from "@trigger.dev/database"; @@ -40,6 +41,7 @@ import { alertsRateLimiter } from "~/v3/alertsRateLimiter.server"; import { alertsWorker } from "~/v3/alertsWorker.server"; import { generateFriendlyId } from "~/v3/friendlyIdentifiers"; import { BaseService } from "../baseService.server"; +import { CURRENT_API_VERSION } from "~/api/versions"; type FoundAlert = Prisma.Result< typeof prisma.projectAlert, @@ -352,7 +354,10 @@ export class DeliverAlertService extends BaseService { run: { id: alert.taskRun.friendlyId, number: alert.taskRun.number, - status: ApiRetrieveRunPresenter.apiStatusFromRunStatus(alert.taskRun.status), + status: ApiRetrieveRunPresenter.apiStatusFromRunStatus( + alert.taskRun.status, + CURRENT_API_VERSION + ) as RunStatus, createdAt: alert.taskRun.createdAt, startedAt: alert.taskRun.startedAt ?? undefined, completedAt: alert.taskRun.completedAt ?? undefined, diff --git a/apps/webapp/app/v3/services/createBackgroundWorker.server.ts b/apps/webapp/app/v3/services/createBackgroundWorker.server.ts index cc8bb2ca99..5ca2d5d387 100644 --- a/apps/webapp/app/v3/services/createBackgroundWorker.server.ts +++ b/apps/webapp/app/v3/services/createBackgroundWorker.server.ts @@ -254,7 +254,6 @@ async function createWorkerTask( { name: task.queue?.name ?? `task/${task.id}`, concurrencyLimit: task.queue?.concurrencyLimit, - releaseConcurrencyOnWaitpoint: task.queue?.releaseConcurrencyOnWaitpoint, }, task.id, task.queue?.name ? "NAMED" : "VIRTUAL", @@ -375,9 +374,6 @@ async function createWorkerQueue( concurrencyLimit ?? null, orderableName, queueType, - typeof queue.releaseConcurrencyOnWaitpoint === "boolean" - ? queue.releaseConcurrencyOnWaitpoint - : false, worker, prisma ); @@ -422,7 +418,6 @@ async function upsertWorkerQueueRecord( concurrencyLimit: number | null, orderableName: string, queueType: TaskQueueType, - releaseConcurrencyOnWaitpoint: boolean, worker: BackgroundWorker, prisma: PrismaClientOrTransaction, attempt: number = 0 @@ -447,7 +442,6 @@ async function upsertWorkerQueueRecord( name: queueName, orderableName, concurrencyLimit, - releaseConcurrencyOnWaitpoint, runtimeEnvironmentId: worker.runtimeEnvironmentId, projectId: worker.projectId, type: queueType, @@ -468,7 +462,6 @@ async function upsertWorkerQueueRecord( version: "V2", orderableName, concurrencyLimit, - releaseConcurrencyOnWaitpoint, }, }); } @@ -482,7 +475,6 @@ async function upsertWorkerQueueRecord( concurrencyLimit, orderableName, queueType, - releaseConcurrencyOnWaitpoint, worker, prisma, attempt + 1 diff --git a/apps/webapp/app/v3/services/pauseQueue.server.ts b/apps/webapp/app/v3/services/pauseQueue.server.ts index 529756bed8..f4e18eab4b 100644 --- a/apps/webapp/app/v3/services/pauseQueue.server.ts +++ b/apps/webapp/app/v3/services/pauseQueue.server.ts @@ -89,7 +89,6 @@ export class PauseQueueService extends BaseService { queued: results[0]?.[updatedQueue.name] ?? 0, concurrencyLimit: updatedQueue.concurrencyLimit ?? null, paused: updatedQueue.paused, - releaseConcurrencyOnWaitpoint: updatedQueue.releaseConcurrencyOnWaitpoint, }), }; } catch (error) { diff --git a/apps/webapp/app/v3/services/triggerTask.server.ts b/apps/webapp/app/v3/services/triggerTask.server.ts index baf348c6e9..f7974dd04e 100644 --- a/apps/webapp/app/v3/services/triggerTask.server.ts +++ b/apps/webapp/app/v3/services/triggerTask.server.ts @@ -1,10 +1,8 @@ import { TriggerTaskRequestBody } from "@trigger.dev/core/v3"; import { RunEngineVersion, TaskRun } from "@trigger.dev/database"; -import { env } from "~/env.server"; import { IdempotencyKeyConcern } from "~/runEngine/concerns/idempotencyKeys.server"; import { DefaultPayloadProcessor } from "~/runEngine/concerns/payloads.server"; import { DefaultQueueManager } from "~/runEngine/concerns/queues.server"; -import { DefaultRunChainStateManager } from "~/runEngine/concerns/runChainStates.server"; import { DefaultRunNumberIncrementer } from "~/runEngine/concerns/runNumbers.server"; import { DefaultTraceEventsConcern } from "~/runEngine/concerns/traceEvents.server"; import { RunEngineTriggerTaskService } from "~/runEngine/services/triggerTask.server"; @@ -108,10 +106,6 @@ export class TriggerTaskService extends WithRunEngine { ), runNumberIncrementer: new DefaultRunNumberIncrementer(), traceEventConcern, - runChainStateManager: new DefaultRunChainStateManager( - this._prisma, - env.RUN_ENGINE_RELEASE_CONCURRENCY_ENABLED === "1" - ), tracer: tracer, }); diff --git a/apps/webapp/app/v3/taskRunHeartbeatFailed.server.ts b/apps/webapp/app/v3/taskRunHeartbeatFailed.server.ts index 8c1c4a7e39..8359cc4a4a 100644 --- a/apps/webapp/app/v3/taskRunHeartbeatFailed.server.ts +++ b/apps/webapp/app/v3/taskRunHeartbeatFailed.server.ts @@ -50,6 +50,7 @@ export class TaskRunHeartbeatFailedService extends BaseService { switch (taskRun.status) { case "PENDING": + case "DEQUEUED": case "WAITING_TO_RESUME": case "PAUSED": { const backInQueue = await marqs?.nackMessage(taskRun.id); diff --git a/apps/webapp/app/v3/tracing.server.ts b/apps/webapp/app/v3/tracing.server.ts index 16359c9dd4..936cf9a572 100644 --- a/apps/webapp/app/v3/tracing.server.ts +++ b/apps/webapp/app/v3/tracing.server.ts @@ -1,6 +1,8 @@ -import { Span, SpanOptions, SpanStatusCode, Tracer } from "@opentelemetry/api"; +import { Span, SpanKind, SpanOptions, SpanStatusCode, Tracer } from "@opentelemetry/api"; import { Logger, SeverityNumber } from "@opentelemetry/api-logs"; import { flattenAttributes } from "@trigger.dev/core/v3/utils/flattenAttributes"; +import { AuthenticatedEnvironment } from "~/services/apiAuth.server"; +import { attributesFromAuthenticatedEnv } from "./tracer.server"; export async function startSpan( tracer: Tracer, @@ -32,6 +34,42 @@ export async function startSpan( }); } +export async function startSpanWithEnv( + tracer: Tracer, + name: string, + env: AuthenticatedEnvironment, + fn: (span: Span) => Promise, + options?: SpanOptions +): Promise { + return startSpan( + tracer, + name, + async (span) => { + try { + return await fn(span); + } catch (e) { + if (e instanceof Error) { + span.recordException(e); + } else { + span.recordException(new Error(String(e))); + } + + throw e; + } finally { + span.end(); + } + }, + { + attributes: { + ...attributesFromAuthenticatedEnv(env), + ...options?.attributes, + }, + kind: SpanKind.SERVER, + ...options, + } + ); +} + export async function emitDebugLog( logger: Logger, message: string, diff --git a/apps/webapp/test/engine/triggerTask.test.ts b/apps/webapp/test/engine/triggerTask.test.ts index d90ae99b84..1ccc022586 100644 --- a/apps/webapp/test/engine/triggerTask.test.ts +++ b/apps/webapp/test/engine/triggerTask.test.ts @@ -22,7 +22,6 @@ import { IOPacket } from "@trigger.dev/core/v3"; import { TaskRun } from "@trigger.dev/database"; import { IdempotencyKeyConcern } from "~/runEngine/concerns/idempotencyKeys.server"; import { DefaultQueueManager } from "~/runEngine/concerns/queues.server"; -import { DefaultRunChainStateManager } from "~/runEngine/concerns/runChainStates.server"; import { EntitlementValidationParams, MaxAttemptsValidationParams, @@ -37,7 +36,6 @@ import { ValidationResult, } from "~/runEngine/types"; import { RunEngineTriggerTaskService } from "../../app/runEngine/services/triggerTask.server"; -import { setTimeout } from "node:timers/promises"; vi.setConfig({ testTimeout: 30_000 }); // 30 seconds timeout @@ -156,8 +154,6 @@ describe("RunEngineTriggerTaskService", () => { new MockTraceEventConcern() ); - const runChainStateManager = new DefaultRunChainStateManager(prisma, true); - const triggerTaskService = new RunEngineTriggerTaskService({ engine, prisma, @@ -167,7 +163,6 @@ describe("RunEngineTriggerTaskService", () => { idempotencyKeyConcern, validator: new MockTriggerTaskValidator(), traceEventConcern: new MockTraceEventConcern(), - runChainStateManager, tracer: trace.getTracer("test", "0.0.0"), }); @@ -249,8 +244,6 @@ describe("RunEngineTriggerTaskService", () => { new MockTraceEventConcern() ); - const runChainStateManager = new DefaultRunChainStateManager(prisma, true); - const triggerTaskService = new RunEngineTriggerTaskService({ engine, prisma, @@ -260,7 +253,6 @@ describe("RunEngineTriggerTaskService", () => { idempotencyKeyConcern, validator: new MockTriggerTaskValidator(), traceEventConcern: new MockTraceEventConcern(), - runChainStateManager, tracer: trace.getTracer("test", "0.0.0"), }); @@ -393,8 +385,6 @@ describe("RunEngineTriggerTaskService", () => { new MockTraceEventConcern() ); - const runChainStateManager = new DefaultRunChainStateManager(prisma, true); - const triggerTaskService = new RunEngineTriggerTaskService({ engine, prisma, @@ -404,7 +394,6 @@ describe("RunEngineTriggerTaskService", () => { idempotencyKeyConcern, validator: new MockTriggerTaskValidator(), traceEventConcern: new MockTraceEventConcern(), - runChainStateManager, tracer: trace.getTracer("test", "0.0.0"), }); @@ -482,795 +471,4 @@ describe("RunEngineTriggerTaskService", () => { await engine.quit(); } ); - - containerTest( - "should handle run chains correctly when release concurrency is enabled", - async ({ prisma, redisOptions }) => { - const engine = new RunEngine({ - prisma, - worker: { - redis: redisOptions, - workers: 1, - tasksPerWorker: 10, - pollIntervalMs: 100, - }, - queue: { - redis: redisOptions, - masterQueueConsumersDisabled: true, - processWorkerQueueDebounceMs: 100, - }, - runLock: { - redis: redisOptions, - }, - machines: { - defaultMachine: "small-1x", - machines: { - "small-1x": { - name: "small-1x" as const, - cpu: 0.5, - memory: 0.5, - centsPerMs: 0.0001, - }, - }, - baseCostInCents: 0.0005, - }, - tracer: trace.getTracer("test", "0.0.0"), - }); - - const authenticatedEnvironment = await setupAuthenticatedEnvironment(prisma, "PRODUCTION"); - - const taskIdentifier = "test-task"; - - //create background worker - const { worker } = await setupBackgroundWorker( - engine, - authenticatedEnvironment, - taskIdentifier, - undefined, - undefined, - { - releaseConcurrencyOnWaitpoint: false, - concurrencyLimit: 2, - } - ); - - const queuesManager = new DefaultQueueManager(prisma, engine); - - const idempotencyKeyConcern = new IdempotencyKeyConcern( - prisma, - engine, - new MockTraceEventConcern() - ); - - const runChainStateManager = new DefaultRunChainStateManager(prisma, true); - - const triggerTaskService = new RunEngineTriggerTaskService({ - engine, - prisma, - runNumberIncrementer: new MockRunNumberIncrementer(), - payloadProcessor: new MockPayloadProcessor(), - queueConcern: queuesManager, - idempotencyKeyConcern, - validator: new MockTriggerTaskValidator(), - traceEventConcern: new MockTraceEventConcern(), - runChainStateManager, - tracer: trace.getTracer("test", "0.0.0"), - }); - - const result = await triggerTaskService.call({ - taskId: taskIdentifier, - environment: authenticatedEnvironment, - body: { payload: { test: "test" } }, - }); - - console.log(result); - - expect(result).toBeDefined(); - expect(result?.run.friendlyId).toBeDefined(); - expect(result?.run.status).toBe("PENDING"); - expect(result?.isCached).toBe(false); - - // Lets make sure the task is in the queue - const queueLength = await engine.runQueue.lengthOfQueue( - authenticatedEnvironment, - `task/${taskIdentifier}` - ); - expect(queueLength).toBe(1); - - await setTimeout(500); - - // Now we need to dequeue the run so so we can trigger a subtask - const dequeued = await engine.dequeueFromWorkerQueue({ - consumerId: "test_12345", - workerQueue: result?.run.workerQueue!, - }); - - expect(dequeued.length).toBe(1); - expect(dequeued[0].run.id).toBe(result?.run.id); - - // Now, lets trigger a subtask, with the same task identifier and queue - const subtaskResult = await triggerTaskService.call({ - taskId: taskIdentifier, - environment: authenticatedEnvironment, - body: { - payload: { test: "test" }, - options: { - parentRunId: result?.run.friendlyId, - resumeParentOnCompletion: true, - lockToVersion: worker.version, - }, - }, - }); - - expect(subtaskResult).toBeDefined(); - expect(subtaskResult?.run.status).toBe("PENDING"); - expect(subtaskResult?.run.parentTaskRunId).toBe(result?.run.id); - expect(subtaskResult?.run.lockedQueueId).toBeDefined(); - expect(subtaskResult?.run.runChainState).toEqual({ - concurrency: { - queues: [ - { id: subtaskResult?.run.lockedQueueId, name: subtaskResult?.run.queue, holding: 1 }, - ], - environment: 0, - }, - }); - - await setTimeout(500); - - // Okay, now lets dequeue the subtask - const dequeuedSubtask = await engine.dequeueFromWorkerQueue({ - consumerId: "test_12345", - workerQueue: subtaskResult?.run.workerQueue!, - }); - - expect(dequeuedSubtask.length).toBe(1); - expect(dequeuedSubtask[0].run.id).toBe(subtaskResult?.run.id); - - // Now, when we trigger the subtask, it should raise a deadlock error - await expect( - triggerTaskService.call({ - taskId: taskIdentifier, - environment: authenticatedEnvironment, - body: { - payload: { test: "test" }, - options: { - parentRunId: subtaskResult?.run.friendlyId, - resumeParentOnCompletion: true, - lockToVersion: worker.version, - }, - }, - }) - ).rejects.toThrow("Deadlock detected"); - - await engine.quit(); - } - ); - - containerTest( - "should handle run chains with multiple queues correctly", - async ({ prisma, redisOptions }) => { - const engine = new RunEngine({ - prisma, - worker: { - redis: redisOptions, - workers: 1, - tasksPerWorker: 10, - pollIntervalMs: 100, - }, - queue: { - redis: redisOptions, - masterQueueConsumersDisabled: true, - processWorkerQueueDebounceMs: 100, - }, - runLock: { - redis: redisOptions, - }, - machines: { - defaultMachine: "small-1x", - machines: { - "small-1x": { - name: "small-1x" as const, - cpu: 0.5, - memory: 0.5, - centsPerMs: 0.0001, - }, - }, - baseCostInCents: 0.0005, - }, - tracer: trace.getTracer("test", "0.0.0"), - }); - - const authenticatedEnvironment = await setupAuthenticatedEnvironment(prisma, "PRODUCTION"); - const taskIdentifier1 = "test-task-1"; - const taskIdentifier2 = "test-task-2"; - - // Create a background worker - const { worker } = await setupBackgroundWorker( - engine, - authenticatedEnvironment, - [taskIdentifier1, taskIdentifier2], - undefined, - undefined, - { - releaseConcurrencyOnWaitpoint: false, - concurrencyLimit: 2, - } - ); - - const queuesManager = new DefaultQueueManager(prisma, engine); - const idempotencyKeyConcern = new IdempotencyKeyConcern( - prisma, - engine, - new MockTraceEventConcern() - ); - const runChainStateManager = new DefaultRunChainStateManager(prisma, true); - - const triggerTaskService = new RunEngineTriggerTaskService({ - engine, - prisma, - runNumberIncrementer: new MockRunNumberIncrementer(), - payloadProcessor: new MockPayloadProcessor(), - queueConcern: queuesManager, - idempotencyKeyConcern, - validator: new MockTriggerTaskValidator(), - traceEventConcern: new MockTraceEventConcern(), - runChainStateManager, - tracer: trace.getTracer("test", "0.0.0"), - }); - - // Trigger parent run on queue1 - const parentResult = await triggerTaskService.call({ - taskId: taskIdentifier1, - environment: authenticatedEnvironment, - body: { - payload: { test: "test" }, - options: { - lockToVersion: worker.version, - }, - }, - }); - - expect(parentResult).toBeDefined(); - expect(parentResult?.run.queue).toBe(`task/${taskIdentifier1}`); - expect(parentResult?.run.lockedQueueId).toBeDefined(); - - await setTimeout(500); - - // Dequeue the parent run to simulate it running - const dequeued = await engine.dequeueFromWorkerQueue({ - consumerId: "test_12345", - workerQueue: parentResult?.run.workerQueue!, - }); - - expect(dequeued.length).toBe(1); - expect(dequeued[0].run.id).toBe(parentResult?.run.id); - - // Now trigger a child run on queue2 - const childResult = await triggerTaskService.call({ - taskId: taskIdentifier2, - environment: authenticatedEnvironment, - body: { - payload: { test: "test" }, - options: { - parentRunId: parentResult?.run.friendlyId, - resumeParentOnCompletion: true, - lockToVersion: worker.version, - }, - }, - }); - - expect(childResult).toBeDefined(); - expect(childResult?.run.queue).toBe(`task/${taskIdentifier2}`); - expect(childResult?.run.lockedQueueId).toBeDefined(); - expect(childResult?.run.parentTaskRunId).toBe(parentResult?.run.id); - - // Verify the run chain state - expect(childResult?.run.runChainState).toEqual({ - concurrency: { - queues: [ - { id: parentResult?.run.lockedQueueId, name: parentResult?.run.queue, holding: 1 }, - ], - environment: 0, - }, - }); - - // Now lets trigger task 1 again, and it should be able to run - const childResult2 = await triggerTaskService.call({ - taskId: taskIdentifier1, - environment: authenticatedEnvironment, - body: { - payload: { test: "test" }, - options: { - parentRunId: childResult?.run.friendlyId, - resumeParentOnCompletion: true, - lockToVersion: worker.version, - }, - }, - }); - - expect(childResult2).toBeDefined(); - expect(childResult2?.run.status).toBe("PENDING"); - expect(childResult2?.run.parentTaskRunId).toBe(childResult?.run.id); - expect(childResult2?.run.lockedQueueId).toBeDefined(); - expect(childResult2?.run.runChainState).toMatchObject({ - concurrency: { - queues: [ - { id: parentResult?.run.lockedQueueId, name: parentResult?.run.queue, holding: 1 }, - { id: childResult?.run.lockedQueueId, name: childResult?.run.queue, holding: 1 }, - ], - environment: 0, - }, - }); - - // Now lets trigger task 2 again, and it should be able to run - const childResult3 = await triggerTaskService.call({ - taskId: taskIdentifier2, - environment: authenticatedEnvironment, - body: { - payload: { test: "test" }, - options: { - parentRunId: childResult2?.run.friendlyId, - resumeParentOnCompletion: true, - lockToVersion: worker.version, - }, - }, - }); - - expect(childResult3).toBeDefined(); - expect(childResult3?.run.status).toBe("PENDING"); - expect(childResult3?.run.parentTaskRunId).toBe(childResult2?.run.id); - expect(childResult3?.run.lockedQueueId).toBeDefined(); - expect(childResult3?.run.runChainState).toMatchObject({ - concurrency: { - queues: [ - { id: childResult?.run.lockedQueueId, name: childResult?.run.queue, holding: 1 }, - { id: parentResult?.run.lockedQueueId, name: parentResult?.run.queue, holding: 2 }, - ], - environment: 0, - }, - }); - - // Now lets trigger task 1 again, and it should deadlock - await expect( - triggerTaskService.call({ - taskId: taskIdentifier1, - environment: authenticatedEnvironment, - body: { - payload: { test: "test" }, - options: { - parentRunId: childResult3?.run.friendlyId, - resumeParentOnCompletion: true, - lockToVersion: worker.version, - }, - }, - }) - ).rejects.toThrow("Deadlock detected"); - - await engine.quit(); - } - ); - - containerTest( - "should handle run chains with explicit releaseConcurrency option", - async ({ prisma, redisOptions }) => { - const engine = new RunEngine({ - prisma, - worker: { - redis: redisOptions, - workers: 1, - tasksPerWorker: 10, - pollIntervalMs: 100, - }, - queue: { - redis: redisOptions, - }, - runLock: { - redis: redisOptions, - }, - machines: { - defaultMachine: "small-1x", - machines: { - "small-1x": { - name: "small-1x" as const, - cpu: 0.5, - memory: 0.5, - centsPerMs: 0.0001, - }, - }, - baseCostInCents: 0.0005, - }, - tracer: trace.getTracer("test", "0.0.0"), - }); - - const authenticatedEnvironment = await setupAuthenticatedEnvironment(prisma, "PRODUCTION"); - const taskIdentifier1 = "test-task-1"; - const taskIdentifier2 = "test-task-2"; - - // Create a background worker - const { worker } = await setupBackgroundWorker( - engine, - authenticatedEnvironment, - [taskIdentifier1, taskIdentifier2], - undefined, - undefined, - { - releaseConcurrencyOnWaitpoint: false, - concurrencyLimit: 2, - } - ); - - const queuesManager = new DefaultQueueManager(prisma, engine); - const idempotencyKeyConcern = new IdempotencyKeyConcern( - prisma, - engine, - new MockTraceEventConcern() - ); - const runChainStateManager = new DefaultRunChainStateManager(prisma, true); - - const triggerTaskService = new RunEngineTriggerTaskService({ - engine, - prisma, - runNumberIncrementer: new MockRunNumberIncrementer(), - payloadProcessor: new MockPayloadProcessor(), - queueConcern: queuesManager, - idempotencyKeyConcern, - validator: new MockTriggerTaskValidator(), - traceEventConcern: new MockTraceEventConcern(), - runChainStateManager, - tracer: trace.getTracer("test", "0.0.0"), - }); - - // Trigger parent run on queue1 - const parentResult = await triggerTaskService.call({ - taskId: taskIdentifier1, - environment: authenticatedEnvironment, - body: { - payload: { test: "test" }, - options: { - lockToVersion: worker.version, - }, - }, - }); - - expect(parentResult).toBeDefined(); - expect(parentResult?.run.queue).toBe(`task/${taskIdentifier1}`); - expect(parentResult?.run.lockedQueueId).toBeDefined(); - - await setTimeout(500); - - // Dequeue the parent run to simulate it running - const dequeued = await engine.dequeueFromWorkerQueue({ - consumerId: "test_12345", - workerQueue: parentResult?.run.workerQueue!, - }); - - expect(dequeued.length).toBe(1); - expect(dequeued[0].run.id).toBe(parentResult?.run.id); - - // Now trigger a child run on queue2 - const childResult = await triggerTaskService.call({ - taskId: taskIdentifier2, - environment: authenticatedEnvironment, - body: { - payload: { test: "test" }, - options: { - parentRunId: parentResult?.run.friendlyId, - resumeParentOnCompletion: true, - lockToVersion: worker.version, - releaseConcurrency: true, - }, - }, - }); - - expect(childResult).toBeDefined(); - expect(childResult?.run.queue).toBe(`task/${taskIdentifier2}`); - expect(childResult?.run.lockedQueueId).toBeDefined(); - expect(childResult?.run.parentTaskRunId).toBe(parentResult?.run.id); - - // Verify the run chain state - expect(childResult?.run.runChainState).toEqual({ - concurrency: { - queues: [ - { id: parentResult?.run.lockedQueueId, name: parentResult?.run.queue, holding: 0 }, - ], - environment: 0, - }, - }); - - await engine.quit(); - } - ); - - containerTest( - "should handle run chains when release concurrency is disabled", - async ({ prisma, redisOptions }) => { - const engine = new RunEngine({ - prisma, - worker: { - redis: redisOptions, - workers: 1, - tasksPerWorker: 10, - pollIntervalMs: 100, - }, - queue: { - redis: redisOptions, - masterQueueConsumersDisabled: true, - processWorkerQueueDebounceMs: 100, - }, - runLock: { - redis: redisOptions, - }, - machines: { - defaultMachine: "small-1x", - machines: { - "small-1x": { - name: "small-1x" as const, - cpu: 0.5, - memory: 0.5, - centsPerMs: 0.0001, - }, - }, - baseCostInCents: 0.0005, - }, - tracer: trace.getTracer("test", "0.0.0"), - }); - - const authenticatedEnvironment = await setupAuthenticatedEnvironment(prisma, "PRODUCTION"); - const taskIdentifier1 = "test-task-1"; - const taskIdentifier2 = "test-task-2"; - - // Create a background worker - const { worker } = await setupBackgroundWorker( - engine, - authenticatedEnvironment, - [taskIdentifier1, taskIdentifier2], - undefined, - undefined, - { - releaseConcurrencyOnWaitpoint: true, - concurrencyLimit: 2, - } - ); - - const queuesManager = new DefaultQueueManager(prisma, engine); - const idempotencyKeyConcern = new IdempotencyKeyConcern( - prisma, - engine, - new MockTraceEventConcern() - ); - const runChainStateManager = new DefaultRunChainStateManager(prisma, false); - - const triggerTaskService = new RunEngineTriggerTaskService({ - engine, - prisma, - runNumberIncrementer: new MockRunNumberIncrementer(), - payloadProcessor: new MockPayloadProcessor(), - queueConcern: queuesManager, - idempotencyKeyConcern, - validator: new MockTriggerTaskValidator(), - traceEventConcern: new MockTraceEventConcern(), - runChainStateManager, - tracer: trace.getTracer("test", "0.0.0"), - }); - - // Trigger parent run on queue1 - const parentResult = await triggerTaskService.call({ - taskId: taskIdentifier1, - environment: authenticatedEnvironment, - body: { - payload: { test: "test" }, - options: { - lockToVersion: worker.version, - }, - }, - }); - - expect(parentResult).toBeDefined(); - expect(parentResult?.run.queue).toBe(`task/${taskIdentifier1}`); - expect(parentResult?.run.lockedQueueId).toBeDefined(); - - await setTimeout(500); - - // Dequeue the parent run to simulate it running - const dequeued = await engine.dequeueFromWorkerQueue({ - consumerId: "test_12345", - workerQueue: parentResult?.run.workerQueue!, - }); - - expect(dequeued.length).toBe(1); - expect(dequeued[0].run.id).toBe(parentResult?.run.id); - - // Now trigger a child run on queue2 - const childResult = await triggerTaskService.call({ - taskId: taskIdentifier2, - environment: authenticatedEnvironment, - body: { - payload: { test: "test" }, - options: { - parentRunId: parentResult?.run.friendlyId, - resumeParentOnCompletion: true, - lockToVersion: worker.version, - }, - }, - }); - - expect(childResult).toBeDefined(); - expect(childResult?.run.queue).toBe(`task/${taskIdentifier2}`); - expect(childResult?.run.lockedQueueId).toBeDefined(); - expect(childResult?.run.parentTaskRunId).toBe(parentResult?.run.id); - - // Verify the run chain state - expect(childResult?.run.runChainState).toEqual({ - concurrency: { - queues: [ - { id: parentResult?.run.lockedQueueId, name: parentResult?.run.queue, holding: 1 }, - ], - environment: 1, - }, - }); - - await engine.quit(); - } - ); - - containerTest( - "should handle run chains correctly when the parent run queue doesn't have a concurrency limit", - async ({ prisma, redisOptions }) => { - const engine = new RunEngine({ - prisma, - worker: { - redis: redisOptions, - workers: 1, - tasksPerWorker: 10, - pollIntervalMs: 100, - }, - queue: { - redis: redisOptions, - masterQueueConsumersDisabled: true, - processWorkerQueueDebounceMs: 100, - }, - runLock: { - redis: redisOptions, - }, - machines: { - defaultMachine: "small-1x", - machines: { - "small-1x": { - name: "small-1x" as const, - cpu: 0.5, - memory: 0.5, - centsPerMs: 0.0001, - }, - }, - baseCostInCents: 0.0005, - }, - tracer: trace.getTracer("test", "0.0.0"), - }); - - const authenticatedEnvironment = await setupAuthenticatedEnvironment(prisma, "PRODUCTION"); - - const taskIdentifier = "test-task"; - - //create background worker - const { worker } = await setupBackgroundWorker( - engine, - authenticatedEnvironment, - taskIdentifier, - undefined, - undefined, - { - releaseConcurrencyOnWaitpoint: false, - concurrencyLimit: null, - } - ); - - const queuesManager = new DefaultQueueManager(prisma, engine); - - const idempotencyKeyConcern = new IdempotencyKeyConcern( - prisma, - engine, - new MockTraceEventConcern() - ); - - const runChainStateManager = new DefaultRunChainStateManager(prisma, true); - - const triggerTaskService = new RunEngineTriggerTaskService({ - engine, - prisma, - runNumberIncrementer: new MockRunNumberIncrementer(), - payloadProcessor: new MockPayloadProcessor(), - queueConcern: queuesManager, - idempotencyKeyConcern, - validator: new MockTriggerTaskValidator(), - traceEventConcern: new MockTraceEventConcern(), - runChainStateManager, - tracer: trace.getTracer("test", "0.0.0"), - }); - - const result = await triggerTaskService.call({ - taskId: taskIdentifier, - environment: authenticatedEnvironment, - body: { payload: { test: "test" } }, - }); - - expect(result).toBeDefined(); - expect(result?.run.friendlyId).toBeDefined(); - expect(result?.run.status).toBe("PENDING"); - expect(result?.isCached).toBe(false); - - // Lets make sure the task is in the queue - const queueLength = await engine.runQueue.lengthOfQueue( - authenticatedEnvironment, - `task/${taskIdentifier}` - ); - expect(queueLength).toBe(1); - - await setTimeout(500); - - // Now we need to dequeue the run so so we can trigger a subtask - const dequeued = await engine.dequeueFromWorkerQueue({ - consumerId: "test_12345", - workerQueue: result?.run.workerQueue!, - }); - - expect(dequeued.length).toBe(1); - expect(dequeued[0].run.id).toBe(result?.run.id); - - // Now, lets trigger a subtask, with the same task identifier and queue - const subtaskResult = await triggerTaskService.call({ - taskId: taskIdentifier, - environment: authenticatedEnvironment, - body: { - payload: { test: "test" }, - options: { - parentRunId: result?.run.friendlyId, - resumeParentOnCompletion: true, - lockToVersion: worker.version, - }, - }, - }); - - expect(subtaskResult).toBeDefined(); - expect(subtaskResult?.run.status).toBe("PENDING"); - expect(subtaskResult?.run.parentTaskRunId).toBe(result?.run.id); - expect(subtaskResult?.run.lockedQueueId).toBeDefined(); - expect(subtaskResult?.run.runChainState).toEqual({ - concurrency: { - queues: [ - { id: subtaskResult?.run.lockedQueueId, name: subtaskResult?.run.queue, holding: 0 }, - ], - environment: 0, - }, - }); - - await setTimeout(500); - - // Okay, now lets dequeue the subtask - const dequeuedSubtask = await engine.dequeueFromWorkerQueue({ - consumerId: "test_12345", - workerQueue: subtaskResult?.run.workerQueue!, - }); - - expect(dequeuedSubtask.length).toBe(1); - expect(dequeuedSubtask[0].run.id).toBe(subtaskResult?.run.id); - - // Now, when we trigger the subtask, it should NOT raise a deadlock error - await expect( - triggerTaskService.call({ - taskId: taskIdentifier, - environment: authenticatedEnvironment, - body: { - payload: { test: "test" }, - options: { - parentRunId: subtaskResult?.run.friendlyId, - resumeParentOnCompletion: true, - lockToVersion: worker.version, - }, - }, - }) - ).resolves.toBeDefined(); - - await engine.quit(); - } - ); }); diff --git a/apps/webapp/test/fairDequeuingStrategy.test.ts b/apps/webapp/test/fairDequeuingStrategy.test.ts index 486b6ca3c4..3b4a6a375b 100644 --- a/apps/webapp/test/fairDequeuingStrategy.test.ts +++ b/apps/webapp/test/fairDequeuingStrategy.test.ts @@ -1,5 +1,12 @@ -import { redisTest } from "@internal/testcontainers"; import { describe, expect, vi } from "vitest"; + +// Mock the db prisma client +vi.mock("~/db.server", () => ({ + prisma: {}, + $replica: {}, +})); + +import { redisTest } from "@internal/testcontainers"; import { FairDequeuingStrategy } from "../app/v3/marqs/fairDequeuingStrategy.server.js"; import { calculateStandardDeviation, diff --git a/apps/webapp/test/realtimeClient.test.ts b/apps/webapp/test/realtimeClient.test.ts index 6a94a31dee..d98213e5b1 100644 --- a/apps/webapp/test/realtimeClient.test.ts +++ b/apps/webapp/test/realtimeClient.test.ts @@ -2,6 +2,7 @@ import { containerWithElectricAndRedisTest } from "@internal/testcontainers"; import { expect, describe } from "vitest"; import { RealtimeClient } from "../app/services/realtimeClient.server.js"; import Redis from "ioredis"; +import { CURRENT_API_VERSION, NON_SPECIFIC_API_VERSION } from "~/api/versions.js"; describe.skipIf(process.env.GITHUB_ACTIONS)("RealtimeClient", () => { containerWithElectricAndRedisTest( @@ -72,6 +73,7 @@ describe.skipIf(process.env.GITHUB_ACTIONS)("RealtimeClient", () => { "http://localhost:3000?offset=-1", environment, run.id, + NON_SPECIFIC_API_VERSION, {}, "0.8.1" ); @@ -82,6 +84,7 @@ describe.skipIf(process.env.GITHUB_ACTIONS)("RealtimeClient", () => { "http://localhost:3000?offset=-1", environment, run.id, + NON_SPECIFIC_API_VERSION, {}, "0.8.1" ); @@ -110,6 +113,7 @@ describe.skipIf(process.env.GITHUB_ACTIONS)("RealtimeClient", () => { `http://localhost:3000?offset=0_0&live=true&handle=${shapeId}`, environment, run.id, + NON_SPECIFIC_API_VERSION, {}, "0.8.1" ); @@ -120,6 +124,7 @@ describe.skipIf(process.env.GITHUB_ACTIONS)("RealtimeClient", () => { `http://localhost:3000?offset=0_0&live=true&handle=${shapeId}`, environment, run.id, + NON_SPECIFIC_API_VERSION, {}, "0.8.1" ); @@ -221,6 +226,7 @@ describe.skipIf(process.env.GITHUB_ACTIONS)("RealtimeClient", () => { { tags: ["test:tag:1234"], }, + NON_SPECIFIC_API_VERSION, {}, "0.8.1" ); @@ -303,7 +309,8 @@ describe.skipIf(process.env.GITHUB_ACTIONS)("RealtimeClient", () => { const initialResponsePromise = client.streamRun( "http://localhost:3000?offset=-1", environment, - run.id + run.id, + NON_SPECIFIC_API_VERSION ); const initializeResponsePromise2 = new Promise((resolve) => { @@ -312,6 +319,7 @@ describe.skipIf(process.env.GITHUB_ACTIONS)("RealtimeClient", () => { "http://localhost:3000?offset=-1", environment, run.id, + NON_SPECIFIC_API_VERSION, {}, "0.8.1" ); @@ -339,7 +347,8 @@ describe.skipIf(process.env.GITHUB_ACTIONS)("RealtimeClient", () => { const liveResponsePromise = client.streamRun( `http://localhost:3000?offset=0_0&live=true&shape_id=${shapeId}`, environment, - run.id + run.id, + NON_SPECIFIC_API_VERSION ); const liveResponsePromise2 = new Promise((resolve) => { @@ -347,7 +356,8 @@ describe.skipIf(process.env.GITHUB_ACTIONS)("RealtimeClient", () => { const response = await client.streamRun( `http://localhost:3000?offset=0_0&live=true&shape_id=${shapeId}`, environment, - run.id + run.id, + NON_SPECIFIC_API_VERSION ); resolve(response); @@ -375,4 +385,164 @@ describe.skipIf(process.env.GITHUB_ACTIONS)("RealtimeClient", () => { expect(liveResponse2.status).toBe(429); } ); + + containerWithElectricAndRedisTest( + "Should rewrite the DEQUEUED status to EXECUTING for older trigger api versions", + { timeout: 30_000 }, + async ({ redisOptions, electricOrigin, prisma }) => { + const redis = new Redis(redisOptions); + + const client = new RealtimeClient({ + electricOrigin, + keyPrefix: "test:realtime", + redis: { + host: redis.options.host, + port: redis.options.port, + tlsDisabled: true, + }, + expiryTimeInSeconds: 5, + cachedLimitProvider: { + async getCachedLimit() { + return 1; + }, + }, + }); + + const organization = await prisma.organization.create({ + data: { + title: "test-org", + slug: "test-org", + }, + }); + + const project = await prisma.project.create({ + data: { + name: "test-project", + slug: "test-project", + organizationId: organization.id, + externalRef: "test-project", + }, + }); + + const environment = await prisma.runtimeEnvironment.create({ + data: { + projectId: project.id, + organizationId: organization.id, + slug: "test", + type: "DEVELOPMENT", + shortcode: "1234", + apiKey: "tr_dev_1234", + pkApiKey: "pk_test_1234", + }, + }); + + const run = await prisma.taskRun.create({ + data: { + taskIdentifier: "test-task", + friendlyId: "run_1234", + payload: "{}", + payloadType: "application/json", + traceId: "trace_1234", + spanId: "span_1234", + queue: "test-queue", + projectId: project.id, + runtimeEnvironmentId: environment.id, + status: "DEQUEUED", + }, + }); + + const initialResponse = await client.streamRun( + "http://localhost:3000?offset=-1", + environment, + run.id, + NON_SPECIFIC_API_VERSION + ); + + const responseBody = (await initialResponse.json()) as any; + + const firstChunk = responseBody[0]; + + expect(firstChunk.value.status).toBe("EXECUTING"); + } + ); + + containerWithElectricAndRedisTest( + "Should NOT rewrite the DEQUEUED status to EXECUTING for newer trigger api versions", + { timeout: 30_000 }, + async ({ redisOptions, electricOrigin, prisma }) => { + const redis = new Redis(redisOptions); + + const client = new RealtimeClient({ + electricOrigin, + keyPrefix: "test:realtime", + redis: { + host: redis.options.host, + port: redis.options.port, + tlsDisabled: true, + }, + expiryTimeInSeconds: 5, + cachedLimitProvider: { + async getCachedLimit() { + return 1; + }, + }, + }); + + const organization = await prisma.organization.create({ + data: { + title: "test-org", + slug: "test-org", + }, + }); + + const project = await prisma.project.create({ + data: { + name: "test-project", + slug: "test-project", + organizationId: organization.id, + externalRef: "test-project", + }, + }); + + const environment = await prisma.runtimeEnvironment.create({ + data: { + projectId: project.id, + organizationId: organization.id, + slug: "test", + type: "DEVELOPMENT", + shortcode: "1234", + apiKey: "tr_dev_1234", + pkApiKey: "pk_test_1234", + }, + }); + + const run = await prisma.taskRun.create({ + data: { + taskIdentifier: "test-task", + friendlyId: "run_1234", + payload: "{}", + payloadType: "application/json", + traceId: "trace_1234", + spanId: "span_1234", + queue: "test-queue", + projectId: project.id, + runtimeEnvironmentId: environment.id, + status: "DEQUEUED", + }, + }); + + const initialResponse = await client.streamRun( + "http://localhost:3000?offset=-1", + environment, + run.id, + CURRENT_API_VERSION + ); + + const responseBody = (await initialResponse.json()) as any; + + const firstChunk = responseBody[0]; + + expect(firstChunk.value.status).toBe("DEQUEUED"); + } + ); }); diff --git a/internal-packages/database/prisma/migrations/20250715124534_add_concurrency_limit_burst_factor_to_runtime_environment/migration.sql b/internal-packages/database/prisma/migrations/20250715124534_add_concurrency_limit_burst_factor_to_runtime_environment/migration.sql new file mode 100644 index 0000000000..035e022ebb --- /dev/null +++ b/internal-packages/database/prisma/migrations/20250715124534_add_concurrency_limit_burst_factor_to_runtime_environment/migration.sql @@ -0,0 +1,2 @@ +-- AlterTable +ALTER TABLE "RuntimeEnvironment" ADD COLUMN "concurrencyLimitBurstFactor" DECIMAL(4,2) NOT NULL DEFAULT 2.00; diff --git a/internal-packages/database/prisma/migrations/20250716145327_drop_release_concurrency_on_waitpoint_on_task_queue/migration.sql b/internal-packages/database/prisma/migrations/20250716145327_drop_release_concurrency_on_waitpoint_on_task_queue/migration.sql new file mode 100644 index 0000000000..fd8c361c97 --- /dev/null +++ b/internal-packages/database/prisma/migrations/20250716145327_drop_release_concurrency_on_waitpoint_on_task_queue/migration.sql @@ -0,0 +1,8 @@ +/* + Warnings: + + - You are about to drop the column `releaseConcurrencyOnWaitpoint` on the `TaskQueue` table. All the data in the column will be lost. + +*/ +-- AlterTable +ALTER TABLE "TaskQueue" DROP COLUMN "releaseConcurrencyOnWaitpoint"; \ No newline at end of file diff --git a/internal-packages/database/prisma/migrations/20250716150513_add_dequeued_task_run_status/migration.sql b/internal-packages/database/prisma/migrations/20250716150513_add_dequeued_task_run_status/migration.sql new file mode 100644 index 0000000000..6eb4c66512 --- /dev/null +++ b/internal-packages/database/prisma/migrations/20250716150513_add_dequeued_task_run_status/migration.sql @@ -0,0 +1,2 @@ +-- AlterEnum +ALTER TYPE "TaskRunStatus" ADD VALUE 'DEQUEUED'; \ No newline at end of file diff --git a/internal-packages/database/prisma/schema.prisma b/internal-packages/database/prisma/schema.prisma index 3cce556333..58e1b11003 100644 --- a/internal-packages/database/prisma/schema.prisma +++ b/internal-packages/database/prisma/schema.prisma @@ -247,8 +247,9 @@ model RuntimeEnvironment { ///A memorable code for the environment shortcode String - maximumConcurrencyLimit Int @default(5) - paused Boolean @default(false) + maximumConcurrencyLimit Int @default(5) + concurrencyLimitBurstFactor Decimal @default("2.00") @db.Decimal(4, 2) + paused Boolean @default(false) autoEnableInternalSources Boolean @default(true) @@ -770,6 +771,9 @@ enum TaskRunStatus { /// Task hasn't been deployed yet but is waiting to be executed. Deprecated in favor of PENDING_VERSION WAITING_FOR_DEPLOY + /// Task has been dequeued from the queue but is not yet executing + DEQUEUED + /// Task is currently being executed by a worker EXECUTING @@ -1439,9 +1443,6 @@ model TaskQueue { paused Boolean @default(false) - /// If true, when a run is paused and waiting for waitpoints to be completed, the run will release the concurrency capacity. - releaseConcurrencyOnWaitpoint Boolean @default(false) - createdAt DateTime @default(now()) updatedAt DateTime @updatedAt diff --git a/internal-packages/database/src/transaction.ts b/internal-packages/database/src/transaction.ts index beb9e26751..ac671a6f8e 100644 --- a/internal-packages/database/src/transaction.ts +++ b/internal-packages/database/src/transaction.ts @@ -9,6 +9,8 @@ export type PrismaClientOrTransaction = PrismaClient | PrismaTransactionClient; export type PrismaReplicaClient = Omit; +export const Decimal = Prisma.Decimal; + function isTransactionClient(prisma: PrismaClientOrTransaction): prisma is PrismaTransactionClient { return !("$transaction" in prisma); } diff --git a/internal-packages/run-engine/src/engine/index.ts b/internal-packages/run-engine/src/engine/index.ts index 2b4dd77c3d..8fa677a1c1 100644 --- a/internal-packages/run-engine/src/engine/index.ts +++ b/internal-packages/run-engine/src/engine/index.ts @@ -31,6 +31,7 @@ import { MinimalAuthenticatedEnvironment } from "../shared/index.js"; import { NotImplementedError, RunDuplicateIdempotencyKeyError } from "./errors.js"; import { EventBus, EventBusEvents } from "./eventBus.js"; import { RunLocker } from "./locking.js"; +import { getFinalRunStatuses } from "./statuses.js"; import { BatchSystem } from "./systems/batchSystem.js"; import { CheckpointSystem } from "./systems/checkpointSystem.js"; import { DelayedRunSystem } from "./systems/delayedRunSystem.js"; @@ -44,14 +45,12 @@ import { } from "./systems/executionSnapshotSystem.js"; import { PendingVersionSystem } from "./systems/pendingVersionSystem.js"; import { RaceSimulationSystem } from "./systems/raceSimulationSystem.js"; -import { ReleaseConcurrencySystem } from "./systems/releaseConcurrencySystem.js"; import { RunAttemptSystem } from "./systems/runAttemptSystem.js"; import { SystemResources } from "./systems/systems.js"; import { TtlSystem } from "./systems/ttlSystem.js"; import { WaitpointSystem } from "./systems/waitpointSystem.js"; import { EngineWorker, HeartbeatTimeouts, RunEngineOptions, TriggerParams } from "./types.js"; import { workerCatalog } from "./workerCatalog.js"; -import { getFinalRunStatuses, isFinalRunStatus } from "./statuses.js"; export class RunEngine { private runLockRedis: Redis; @@ -76,7 +75,6 @@ export class RunEngine { delayedRunSystem: DelayedRunSystem; ttlSystem: TtlSystem; pendingVersionSystem: PendingVersionSystem; - releaseConcurrencySystem: ReleaseConcurrencySystem; raceSimulationSystem: RaceSimulationSystem = new RaceSimulationSystem(); constructor(private readonly options: RunEngineOptions) { @@ -127,6 +125,7 @@ export class RunEngine { defaultEnvConcurrencyLimit: options.queue?.defaultEnvConcurrency ?? 10, }), defaultEnvConcurrency: options.queue?.defaultEnvConcurrency ?? 10, + defaultEnvConcurrencyBurstFactor: options.queue?.defaultEnvConcurrencyBurstFactor, logger: new Logger("RunQueue", options.queue?.logLevel ?? "info"), redis: { ...options.queue.redis, keyPrefix: `${options.queue.redis.keyPrefix}runqueue:` }, retryOptions: options.queue?.retryOptions, @@ -239,38 +238,6 @@ export class RunEngine { raceSimulationSystem: this.raceSimulationSystem, }; - this.releaseConcurrencySystem = new ReleaseConcurrencySystem({ - resources, - maxTokensRatio: options.releaseConcurrency?.maxTokensRatio, - releasingsMaxAge: options.releaseConcurrency?.releasingsMaxAge, - releasingsPollInterval: options.releaseConcurrency?.releasingsPollInterval, - queueOptions: - typeof options.releaseConcurrency?.disabled === "boolean" && - options.releaseConcurrency.disabled - ? undefined - : { - disableConsumers: options.releaseConcurrency?.disableConsumers, - redis: { - ...options.queue.redis, // Use base queue redis options - ...options.releaseConcurrency?.redis, // Allow overrides - keyPrefix: `${options.queue.redis.keyPrefix ?? ""}release-concurrency:`, - }, - retry: { - maxRetries: options.releaseConcurrency?.maxRetries ?? 5, - backoff: { - minDelay: options.releaseConcurrency?.backoff?.minDelay ?? 1000, - maxDelay: options.releaseConcurrency?.backoff?.maxDelay ?? 10000, - factor: options.releaseConcurrency?.backoff?.factor ?? 2, - }, - }, - consumersCount: options.releaseConcurrency?.consumersCount ?? 1, - pollInterval: options.releaseConcurrency?.pollInterval ?? 1000, - batchSize: options.releaseConcurrency?.batchSize ?? 10, - tracer: this.tracer, - meter: this.meter, - }, - }); - this.executionSnapshotSystem = new ExecutionSnapshotSystem({ resources, heartbeatTimeouts: this.heartbeatTimeouts, @@ -283,7 +250,6 @@ export class RunEngine { this.checkpointSystem = new CheckpointSystem({ resources, - releaseConcurrencySystem: this.releaseConcurrencySystem, executionSnapshotSystem: this.executionSnapshotSystem, enqueueSystem: this.enqueueSystem, }); @@ -302,7 +268,6 @@ export class RunEngine { resources, executionSnapshotSystem: this.executionSnapshotSystem, enqueueSystem: this.enqueueSystem, - releaseConcurrencySystem: this.releaseConcurrencySystem, }); this.ttlSystem = new TtlSystem({ @@ -323,7 +288,6 @@ export class RunEngine { delayedRunSystem: this.delayedRunSystem, machines: this.options.machines, retryWarmStartThresholdMs: this.options.retryWarmStartThresholdMs, - releaseConcurrencySystem: this.releaseConcurrencySystem, }); this.dequeueSystem = new DequeueSystem({ @@ -331,7 +295,6 @@ export class RunEngine { executionSnapshotSystem: this.executionSnapshotSystem, runAttemptSystem: this.runAttemptSystem, machines: this.options.machines, - releaseConcurrencySystem: this.releaseConcurrencySystem, }); } @@ -385,8 +348,6 @@ export class RunEngine { machine, workerId, runnerId, - releaseConcurrency, - runChainState, scheduleId, scheduleInstanceId, createdAt, @@ -462,7 +423,6 @@ export class RunEngine { seedMetadataType, maxDurationInSeconds, machinePreset: machine, - runChainState, scheduleId, scheduleInstanceId, createdAt, @@ -536,7 +496,6 @@ export class RunEngine { workerId, runnerId, tx: prisma, - releaseConcurrency, }); } @@ -945,7 +904,6 @@ export class RunEngine { waitpoints, projectId, organizationId, - releaseConcurrency, timeout, spanIdToComplete, batch, @@ -957,7 +915,6 @@ export class RunEngine { waitpoints: string | string[]; projectId: string; organizationId: string; - releaseConcurrency?: boolean; timeout?: Date; spanIdToComplete?: string; batch?: { id: string; index?: number }; @@ -970,7 +927,6 @@ export class RunEngine { waitpoints, projectId, organizationId, - releaseConcurrency, timeout, spanIdToComplete, batch, @@ -1160,7 +1116,6 @@ export class RunEngine { async quit() { try { //stop the run queue - await this.releaseConcurrencySystem.quit(); await this.runQueue.quit(); await this.worker.stop(); await this.runLock.quit(); diff --git a/internal-packages/run-engine/src/engine/releaseConcurrencyTokenBucketQueue.ts b/internal-packages/run-engine/src/engine/releaseConcurrencyTokenBucketQueue.ts deleted file mode 100644 index 64fc1911f6..0000000000 --- a/internal-packages/run-engine/src/engine/releaseConcurrencyTokenBucketQueue.ts +++ /dev/null @@ -1,1120 +0,0 @@ -import { Callback, createRedisClient, Redis, Result, type RedisOptions } from "@internal/redis"; -import { - startSpan, - Tracer, - Meter, - getMeter, - ValueType, - ObservableResult, - Attributes, -} from "@internal/tracing"; -import { Logger } from "@trigger.dev/core/logger"; -import { z } from "zod"; -import { setInterval } from "node:timers/promises"; -import { flattenAttributes } from "@trigger.dev/core/v3"; - -export type ReleaseConcurrencyQueueRetryOptions = { - maxRetries?: number; - backoff?: { - minDelay?: number; // Defaults to 1000 - maxDelay?: number; // Defaults to 60000 - factor?: number; // Defaults to 2 - }; -}; - -export type ReleaseConcurrencyValidatorResult = { - releaseQueue: T; - releaserId: string; - shouldRefill: boolean; -}; - -export type ReleaseConcurrencyQueueOptions = { - redis: RedisOptions; - /** - * @returns true if the run was successful, false if the token should be returned to the bucket - */ - executor: (releaseQueue: T, releaserId: string) => Promise; - validateReleaserId?: ( - releaserId: string - ) => Promise | undefined>; - releasingsMaxAge?: number; - releasingsPollInterval?: number; - keys: { - fromDescriptor: (releaseQueue: T) => string; - toDescriptor: (releaseQueue: string) => T; - }; - maxTokens: (descriptor: T) => Promise; - consumersCount?: number; - masterQueuesKey?: string; - tracer?: Tracer; - meter?: Meter; - logger?: Logger; - pollInterval?: number; - batchSize?: number; - retry?: ReleaseConcurrencyQueueRetryOptions; - disableConsumers?: boolean; -}; - -const QueueItemMetadata = z.object({ - retryCount: z.number(), - lastAttempt: z.number(), -}); - -type QueueItemMetadata = z.infer; - -export class ReleaseConcurrencyTokenBucketQueue { - private redis: Redis; - private logger: Logger; - private meter: Meter; - private abortController: AbortController; - private consumers: ReleaseConcurrencyQueueConsumer[]; - private sweeper?: ReleaseConcurrencyReleasingsSweeper; - - private keyPrefix: string; - private masterQueuesKey: string; - private consumersCount: number; - private pollInterval: number; - private keys: ReleaseConcurrencyQueueOptions["keys"]; - private maxTokens: ReleaseConcurrencyQueueOptions["maxTokens"]; - private batchSize: number; - private maxRetries: number; - private backoff: NonNullable>; - - constructor(private readonly options: ReleaseConcurrencyQueueOptions) { - this.redis = createRedisClient(options.redis); - this.keyPrefix = options.redis.keyPrefix ?? "re2:release-concurrency-queue:"; - this.logger = options.logger ?? new Logger("ReleaseConcurrencyQueue", "debug"); - this.meter = options.meter ?? getMeter("release-concurrency"); - this.abortController = new AbortController(); - this.consumers = []; - - this.masterQueuesKey = options.masterQueuesKey ?? "master-queue"; - this.consumersCount = options.consumersCount ?? 1; - this.pollInterval = options.pollInterval ?? 1000; - this.keys = options.keys; - this.maxTokens = options.maxTokens; - this.batchSize = options.batchSize ?? 5; - this.maxRetries = options.retry?.maxRetries ?? 3; - this.backoff = { - minDelay: options.retry?.backoff?.minDelay ?? 1000, - maxDelay: options.retry?.backoff?.maxDelay ?? 60000, - factor: options.retry?.backoff?.factor ?? 2, - }; - - // Set up OpenTelemetry metrics - const releasingsLengthGauge = this.meter.createObservableGauge( - "release_concurrency.releasings.length", - { - description: "Number of items in the releasings sorted set", - unit: "items", - valueType: ValueType.INT, - } - ); - - const masterQueueLengthGauge = this.meter.createObservableGauge( - "release_concurrency.master_queue.length", - { - description: "Number of items in the master queue sorted set", - unit: "items", - valueType: ValueType.INT, - } - ); - - releasingsLengthGauge.addCallback(this.#updateReleasingsLength.bind(this)); - masterQueueLengthGauge.addCallback(this.#updateMasterQueueLength.bind(this)); - - this.#registerCommands(); - - if (!options.disableConsumers) { - this.#startConsumers(); - this.#startReleasingsSweeper(); - } - } - - public async quit() { - this.abortController.abort(); - await this.redis.quit(); - } - - async #updateReleasingsLength(observableResult: ObservableResult) { - const releasingsLength = await this.redis.zcard(this.#releasingsKey()); - observableResult.observe(releasingsLength); - } - - async #updateMasterQueueLength(observableResult: ObservableResult) { - const masterQueueLength = await this.redis.zcard(this.masterQueuesKey); - observableResult.observe(masterQueueLength); - } - - /** - * Attempt to release concurrency for a run. - * - * If there is a token available, then immediately release the concurrency - * If there is no token available, then we'll add the operation to a queue - * and wait until the token is available. - */ - public async attemptToRelease(releaseQueueDescriptor: T, releaserId: string) { - const maxTokens = await this.#callMaxTokens(releaseQueueDescriptor); - - if (maxTokens === 0) { - this.logger.debug("No tokens available, skipping release", { - releaseQueueDescriptor, - releaserId, - maxTokens, - }); - - return; - } - - const releaseQueue = this.keys.fromDescriptor(releaseQueueDescriptor); - - const result = await this.redis.consumeToken( - this.masterQueuesKey, - this.#bucketKey(releaseQueue), - this.#queueKey(releaseQueue), - this.#metadataKey(releaseQueue), - this.#releasingsKey(), - releaseQueue, - releaserId, - String(maxTokens), - String(Date.now()) - ); - - this.logger.info("Consumed token in attemptToRelease", { - releaseQueueDescriptor, - releaserId, - maxTokens, - result, - releaseQueue, - }); - - if (!!result) { - await this.#callExecutor(releaseQueueDescriptor, releaserId, { - retryCount: 0, - lastAttempt: Date.now(), - }); - } else { - this.logger.info("No token available, adding to queue", { - releaseQueueDescriptor, - releaserId, - maxTokens, - releaseQueue, - }); - } - } - - /** - * Consume a token from the token bucket for a release queue. - * - * This is mainly used for testing purposes - */ - public async consumeToken(releaseQueueDescriptor: T, releaserId: string) { - const maxTokens = await this.#callMaxTokens(releaseQueueDescriptor); - const releaseQueue = this.keys.fromDescriptor(releaseQueueDescriptor); - - if (maxTokens === 0) { - this.logger.debug("No tokens available, skipping consume", { - releaseQueueDescriptor, - releaserId, - maxTokens, - releaseQueue, - }); - - return; - } - - await this.redis.consumeToken( - this.masterQueuesKey, - this.#bucketKey(releaseQueue), - this.#queueKey(releaseQueue), - this.#metadataKey(releaseQueue), - this.#releasingsKey(), - releaseQueue, - releaserId, - String(maxTokens), - String(Date.now()) - ); - - this.logger.debug("Consumed token in consumeToken", { - releaseQueueDescriptor, - releaserId, - maxTokens, - releaseQueue, - }); - } - - /** - * Return a token to the token bucket for a release queue. - * - * This is mainly used for testing purposes - */ - public async returnToken(releaseQueueDescriptor: T, releaserId: string) { - const releaseQueue = this.keys.fromDescriptor(releaseQueueDescriptor); - - this.logger.debug("Returning token in returnToken", { - releaseQueueDescriptor, - releaserId, - }); - - await this.redis.returnTokenOnly( - this.masterQueuesKey, - this.#bucketKey(releaseQueue), - this.#queueKey(releaseQueue), - this.#metadataKey(releaseQueue), - this.#releasingsKey(), - releaseQueue, - releaserId - ); - - this.logger.debug("Returned token in returnToken", { - releaseQueueDescriptor, - releaserId, - releaseQueue, - }); - } - - /** - * Refill the token bucket for a release queue. - * - * This will add the amount of tokens to the token bucket. - */ - public async refillTokens(releaseQueueDescriptor: T, amount: number = 1) { - const maxTokens = await this.#callMaxTokens(releaseQueueDescriptor); - const releaseQueue = this.keys.fromDescriptor(releaseQueueDescriptor); - - if (amount < 0) { - this.logger.debug("Cannot refill with negative tokens", { - releaseQueueDescriptor, - amount, - }); - - throw new Error("Cannot refill with negative tokens"); - } - - if (amount === 0) { - this.logger.debug("Cannot refill with 0 tokens", { - releaseQueueDescriptor, - amount, - }); - - return []; - } - - await this.redis.refillTokens( - this.masterQueuesKey, - this.#bucketKey(releaseQueue), - this.#queueKey(releaseQueue), - releaseQueue, - String(amount), - String(maxTokens) - ); - - this.logger.debug("Refilled tokens in refillTokens", { - releaseQueueDescriptor, - releaseQueue, - amount, - maxTokens, - }); - } - - public async getReleaseQueueMetrics(releaseQueueDescriptor: T) { - const releaseQueue = this.keys.fromDescriptor(releaseQueueDescriptor); - const currentTokensRaw = await this.redis.get(this.#bucketKey(releaseQueue)); - const queueLength = await this.redis.zcard(this.#queueKey(releaseQueue)); - - const currentTokens = currentTokensRaw ? Number(currentTokensRaw) : undefined; - - return { currentTokens, queueLength }; - } - - /** - * Refill a token only if the releaserId is in the releasings set. - * Returns true if the token was refilled, false if the releaserId was not found in the releasings set. - */ - public async refillTokenIfInReleasings( - releaseQueueDescriptor: T, - releaserId: string - ): Promise { - const maxTokens = await this.#callMaxTokens(releaseQueueDescriptor); - const releaseQueue = this.keys.fromDescriptor(releaseQueueDescriptor); - - if (maxTokens === 0) { - this.logger.debug("No tokens available, skipping refill", { - releaseQueueDescriptor, - releaserId, - maxTokens, - releaseQueue, - }); - - return false; - } - - const result = await this.redis.refillTokenIfInReleasings( - this.masterQueuesKey, - this.#bucketKey(releaseQueue), - this.#queueKey(releaseQueue), - this.#metadataKey(releaseQueue), - this.#releasingsKey(), - releaseQueue, - releaserId, - String(maxTokens) - ); - - this.logger.debug("Attempted to refill token if in releasings", { - releaseQueueDescriptor, - releaserId, - maxTokens, - releaseQueue, - result, - }); - - return result === "true"; - } - - /** - * Get the next queue that has available capacity and process one item from it - * Returns true if an item was processed, false if no items were available - */ - public async processNextAvailableQueue(): Promise { - const result = await this.redis.processMasterQueue( - this.masterQueuesKey, - this.keyPrefix, - this.batchSize, - String(Date.now()) - ); - - if (!result || result.length === 0) { - return false; - } - - await Promise.allSettled( - result.map(([queue, releaserId, metadata]) => { - const itemMetadata = QueueItemMetadata.parse(JSON.parse(metadata)); - const releaseQueueDescriptor = this.keys.toDescriptor(queue); - return this.#callExecutor(releaseQueueDescriptor, releaserId, itemMetadata); - }) - ); - - return true; - } - - async #callExecutor(releaseQueueDescriptor: T, releaserId: string, metadata: QueueItemMetadata) { - try { - this.logger.info("Calling executor for release", { releaseQueueDescriptor, releaserId }); - - const released = await this.options.executor(releaseQueueDescriptor, releaserId); - - if (released) { - this.logger.info("Executor released concurrency", { releaseQueueDescriptor, releaserId }); - } else { - this.logger.info("Executor did not release concurrency", { - releaseQueueDescriptor, - releaserId, - }); - - // Return the token but don't requeue - const releaseQueue = this.keys.fromDescriptor(releaseQueueDescriptor); - await this.redis.returnTokenOnly( - this.masterQueuesKey, - this.#bucketKey(releaseQueue), - this.#queueKey(releaseQueue), - this.#metadataKey(releaseQueue), - this.#releasingsKey(), - releaseQueue, - releaserId - ); - } - } catch (error) { - this.logger.error("Error executing run:", { error }); - - if (metadata.retryCount >= this.maxRetries) { - this.logger.error("Max retries reached:", { - releaseQueueDescriptor, - releaserId, - retryCount: metadata.retryCount, - }); - - // Return the token but don't requeue - const releaseQueue = this.keys.fromDescriptor(releaseQueueDescriptor); - await this.redis.returnTokenOnly( - this.masterQueuesKey, - this.#bucketKey(releaseQueue), - this.#queueKey(releaseQueue), - this.#metadataKey(releaseQueue), - this.#releasingsKey(), - releaseQueue, - releaserId - ); - - this.logger.info("Returned token:", { releaseQueueDescriptor, releaserId }); - - return; - } - - const updatedMetadata: QueueItemMetadata = { - ...metadata, - retryCount: metadata.retryCount + 1, - lastAttempt: Date.now(), - }; - - const releaseQueue = this.keys.fromDescriptor(releaseQueueDescriptor); - - await this.redis.returnTokenAndRequeue( - this.masterQueuesKey, - this.#bucketKey(releaseQueue), - this.#queueKey(releaseQueue), - this.#metadataKey(releaseQueue), - releaseQueue, - releaserId, - JSON.stringify(updatedMetadata), - this.#calculateBackoffScore(updatedMetadata) - ); - } - } - - // Make sure maxTokens is an integer (round down) - // And if it throws, return 0 - async #callMaxTokens(releaseQueueDescriptor: T) { - try { - const maxTokens = await this.maxTokens(releaseQueueDescriptor); - return Math.floor(maxTokens); - } catch (error) { - return 0; - } - } - - #bucketKey(releaseQueue: string) { - return `${releaseQueue}:bucket`; - } - - #queueKey(releaseQueue: string) { - return `${releaseQueue}:queue`; - } - - #metadataKey(releaseQueue: string) { - return `${releaseQueue}:metadata`; - } - - #releasingsKey() { - return "releasings"; - } - - #startConsumers() { - const consumerCount = this.consumersCount; - - for (let i = 0; i < consumerCount; i++) { - const consumer = new ReleaseConcurrencyQueueConsumer( - this, - this.pollInterval, - this.abortController.signal, - this.logger - ); - this.consumers.push(consumer); - // Start the consumer and don't await it - consumer.start().catch((error) => { - this.logger.error("Consumer failed to start:", { error, consumerId: i }); - }); - } - } - - #startReleasingsSweeper() { - if (this.options.validateReleaserId) { - this.sweeper = new ReleaseConcurrencyReleasingsSweeper( - this, - this.options.validateReleaserId, - this.options.releasingsPollInterval ?? 60_000, - this.options.releasingsMaxAge ?? 60_000 * 30, - this.abortController.signal, - this.logger - ); - this.sweeper.start(); - } - } - - #calculateBackoffScore(item: QueueItemMetadata): string { - const delay = Math.min( - this.backoff.maxDelay, - this.backoff.minDelay * Math.pow(this.backoff.factor, item.retryCount) - ); - return String(Date.now() + delay); - } - - async getQueueMetrics(): Promise< - Array<{ releaseQueue: string; currentTokens: number; queueLength: number }> - > { - const streamRedis = this.redis.duplicate(); - const queuePattern = `${this.keyPrefix}*:queue`; - const stream = streamRedis.scanStream({ - match: queuePattern, - type: "zset", - count: 100, - }); - - let resolvePromise: ( - value: Array<{ releaseQueue: string; currentTokens: number; queueLength: number }> - ) => void; - let rejectPromise: (reason?: any) => void; - - const promise = new Promise< - Array<{ releaseQueue: string; currentTokens: number; queueLength: number }> - >((resolve, reject) => { - resolvePromise = resolve; - rejectPromise = reject; - }); - - const metrics: Map< - string, - { releaseQueue: string; currentTokens: number; queueLength: number } - > = new Map(); - - async function getMetricsForKeys(queueKeys: string[]) { - if (queueKeys.length === 0) { - return []; - } - - const pipeline = streamRedis.pipeline(); - - queueKeys.forEach((queueKey) => { - const releaseQueue = queueKey - .replace(":queue", "") - .replace(streamRedis.options.keyPrefix ?? "", ""); - const bucketKey = `${releaseQueue}:bucket`; - - pipeline.get(bucketKey); - pipeline.zcard(`${releaseQueue}:queue`); - }); - - const result = await pipeline.exec(); - - if (!result) { - return []; - } - - const results = result.map(([resultError, queueLengthOrCurrentTokens]) => { - if (resultError) { - return null; - } - - return queueLengthOrCurrentTokens ? Number(queueLengthOrCurrentTokens) : 0; - }); - - // Now zip the results with the queue keys - const zippedResults = queueKeys.map((queueKey, index) => { - const releaseQueue = queueKey - .replace(":queue", "") - .replace(streamRedis.options.keyPrefix ?? "", ""); - - // Current tokens are at indexes 0, 2, 4, 6, etc. - // Queue length are at indexes 1, 3, 5, 7, etc. - - const currentTokens = results[index * 2]; - const queueLength = results[index * 2 + 1]; - - if (typeof currentTokens !== "number" || typeof queueLength !== "number") { - return null; - } - - return { - releaseQueue, - currentTokens: currentTokens, - queueLength: queueLength, - }; - }); - - return zippedResults.filter((result) => result !== null); - } - - stream.on("end", () => { - streamRedis.quit(); - resolvePromise(Array.from(metrics.values())); - }); - - stream.on("error", (error) => { - this.logger.error("Error getting queue metrics:", { error }); - - stream.pause(); - streamRedis.quit(); - rejectPromise(error); - }); - - stream.on("data", async (keys) => { - stream.pause(); - - const uniqueKeys = Array.from(new Set(keys)); - - if (uniqueKeys.length === 0) { - stream.resume(); - return; - } - - const unresolvedKeys = uniqueKeys.filter((key) => !metrics.has(key)); - - if (unresolvedKeys.length === 0) { - stream.resume(); - return; - } - - this.logger.debug("Fetching queue metrics for keys", { keys: uniqueKeys }); - - await getMetricsForKeys(unresolvedKeys).then((results) => { - results.forEach((result) => { - if (result) { - metrics.set(result.releaseQueue, result); - } - }); - - stream.resume(); - }); - }); - - return promise; - } - - async getReleasings(maxAge: number) { - const releasings = await this.redis.zrangebyscore( - this.#releasingsKey(), - 0, - Date.now() - maxAge - ); - return releasings; - } - - async removeReleaserIdFromReleasings(releaserId: string) { - await this.redis.zrem(this.#releasingsKey(), releaserId); - } - - #registerCommands() { - this.redis.defineCommand("consumeToken", { - numberOfKeys: 5, - lua: ` -local masterQueuesKey = KEYS[1] -local bucketKey = KEYS[2] -local queueKey = KEYS[3] -local metadataKey = KEYS[4] -local releasingsKey = KEYS[5] - -local releaseQueue = ARGV[1] -local releaserId = ARGV[2] -local maxTokens = tonumber(ARGV[3]) -local score = ARGV[4] - --- Get the current token count -local currentTokens = tonumber(redis.call("GET", bucketKey) or maxTokens) - --- If we have enough tokens, then consume them -if currentTokens >= 1 then - local newCurrentTokens = currentTokens - 1 - - redis.call("SET", bucketKey, newCurrentTokens) - redis.call("ZREM", queueKey, releaserId) - redis.call("ZADD", releasingsKey, score, releaserId) - - -- Clean up metadata when successfully consuming - redis.call("HDEL", metadataKey, releaserId) - - -- Get queue length after removing the item - local queueLength = redis.call("ZCARD", queueKey) - - -- If we still have tokens and items in queue, update available queues - if newCurrentTokens > 0 and queueLength > 0 then - redis.call("ZADD", masterQueuesKey, newCurrentTokens, releaseQueue) - else - redis.call("ZREM", masterQueuesKey, releaseQueue) - end - - return true -end - --- If we don't have enough tokens, then we need to add the operation to the queue -redis.call("ZADD", queueKey, score, releaserId) - --- Initialize or update metadata -local metadata = cjson.encode({ - retryCount = 0, - lastAttempt = tonumber(score) -}) -redis.call("HSET", metadataKey, releaserId, metadata) - --- Remove from the master queue -redis.call("ZREM", masterQueuesKey, releaseQueue) - -return false - `, - }); - - this.redis.defineCommand("refillTokens", { - numberOfKeys: 3, - lua: ` -local masterQueuesKey = KEYS[1] -local bucketKey = KEYS[2] -local queueKey = KEYS[3] - -local releaseQueue = ARGV[1] -local amount = tonumber(ARGV[2]) -local maxTokens = tonumber(ARGV[3]) - -local currentTokens = tonumber(redis.call("GET", bucketKey) or maxTokens) - --- Add the amount of tokens to the token bucket -local newTokens = currentTokens + amount - --- If we have more tokens than the max, then set the token bucket to the max -if newTokens > maxTokens then - newTokens = maxTokens -end - -redis.call("SET", bucketKey, newTokens) - --- Get the number of items in the queue -local queueLength = redis.call("ZCARD", queueKey) - --- If we have tokens available and items in the queue, add to available queues -if newTokens > 0 and queueLength > 0 then - redis.call("ZADD", masterQueuesKey, newTokens, releaseQueue) -else - redis.call("ZREM", masterQueuesKey, releaseQueue) -end - `, - }); - - this.redis.defineCommand("processMasterQueue", { - numberOfKeys: 1, - lua: ` -local masterQueuesKey = KEYS[1] - -local keyPrefix = ARGV[1] -local batchSize = tonumber(ARGV[2]) -local currentTime = tonumber(ARGV[3]) --- Get the queue with the highest number of available tokens -local queues = redis.call("ZREVRANGE", masterQueuesKey, 0, 0, "WITHSCORES") -if #queues == 0 then - return nil -end - -local queueName = queues[1] -local availableTokens = tonumber(queues[2]) - -local bucketKey = keyPrefix .. queueName .. ":bucket" -local queueKey = keyPrefix .. queueName .. ":queue" -local metadataKey = keyPrefix .. queueName .. ":metadata" - --- Get the oldest item from the queue -local items = redis.call("ZRANGEBYSCORE", queueKey, 0, currentTime, "LIMIT", 0, batchSize) -if #items == 0 then --- No items ready to be processed yet - return nil -end - --- Calculate how many items we can actually process -local itemsToProcess = math.min(#items, availableTokens) -local results = {} - --- Consume tokens and collect results -local currentTokens = tonumber(redis.call("GET", bucketKey)) -redis.call("SET", bucketKey, currentTokens - itemsToProcess) - --- Remove the items from the queue and add to results -for i = 1, itemsToProcess do - local releaserId = items[i] - redis.call("ZREM", queueKey, releaserId) - - -- Get metadata before removing it - local metadata = redis.call("HGET", metadataKey, releaserId) - redis.call("HDEL", metadataKey, releaserId) - - table.insert(results, { queueName, releaserId, metadata }) -end - --- Get remaining queue length -local queueLength = redis.call("ZCARD", queueKey) - --- Update available queues score or remove if no more tokens -local remainingTokens = currentTokens - itemsToProcess -if remainingTokens > 0 and queueLength > 0 then - redis.call("ZADD", masterQueuesKey, remainingTokens, queueName) -else - redis.call("ZREM", masterQueuesKey, queueName) -end - -return results - `, - }); - - this.redis.defineCommand("returnTokenAndRequeue", { - numberOfKeys: 4, - lua: ` -local masterQueuesKey = KEYS[1] -local bucketKey = KEYS[2] -local queueKey = KEYS[3] -local metadataKey = KEYS[4] - -local releaseQueue = ARGV[1] -local releaserId = ARGV[2] -local metadata = ARGV[3] -local score = ARGV[4] - --- Return the token to the bucket -local currentTokens = tonumber(redis.call("GET", bucketKey)) -local remainingTokens = currentTokens + 1 -redis.call("SET", bucketKey, remainingTokens) - --- Add the item back to the queue -redis.call("ZADD", queueKey, score, releaserId) - --- Add the metadata back to the item -redis.call("HSET", metadataKey, releaserId, metadata) - --- Update the master queue -local queueLength = redis.call("ZCARD", queueKey) -if queueLength > 0 then - redis.call("ZADD", masterQueuesKey, remainingTokens, releaseQueue) -else - redis.call("ZREM", masterQueuesKey, releaseQueue) -end - -return redis.status_reply("true") - `, - }); - - this.redis.defineCommand("returnTokenOnly", { - numberOfKeys: 5, - lua: ` -local masterQueuesKey = KEYS[1] -local bucketKey = KEYS[2] -local queueKey = KEYS[3] -local metadataKey = KEYS[4] -local releasingsKey = KEYS[5] - -local releaseQueue = ARGV[1] -local releaserId = ARGV[2] - -local removedFromReleasings = redis.call("ZREM", releasingsKey, releaserId) - -if removedFromReleasings == 0 then - return redis.status_reply("false") -end - --- Return the token to the bucket -local currentTokens = tonumber(redis.call("GET", bucketKey)) -local remainingTokens = currentTokens + 1 -redis.call("SET", bucketKey, remainingTokens) - --- Clean up metadata -redis.call("HDEL", metadataKey, releaserId) - --- Update the master queue based on remaining queue length -local queueLength = redis.call("ZCARD", queueKey) -if queueLength > 0 then - redis.call("ZADD", masterQueuesKey, remainingTokens, releaseQueue) -else - redis.call("ZREM", masterQueuesKey, releaseQueue) -end - -return redis.status_reply("true") - `, - }); - - this.redis.defineCommand("refillTokenIfInReleasings", { - numberOfKeys: 5, - lua: ` -local masterQueuesKey = KEYS[1] -local bucketKey = KEYS[2] -local queueKey = KEYS[3] -local metadataKey = KEYS[4] -local releasingsKey = KEYS[5] - -local releaseQueue = ARGV[1] -local releaserId = ARGV[2] -local maxTokens = tonumber(ARGV[3]) - -local removedFromReleasings = redis.call("ZREM", releasingsKey, releaserId) - -if removedFromReleasings == 0 then - return redis.status_reply("false") -end - --- Return the token to the bucket -local currentTokens = tonumber(redis.call("GET", bucketKey) or maxTokens) -local remainingTokens = currentTokens + 1 - --- Don't exceed maxTokens -if remainingTokens > maxTokens then - remainingTokens = maxTokens -end - -redis.call("SET", bucketKey, remainingTokens) - --- Clean up any metadata just in case -redis.call("HDEL", metadataKey, releaserId) - --- Update the master queue based on remaining queue length -local queueLength = redis.call("ZCARD", queueKey) -if queueLength > 0 then - redis.call("ZADD", masterQueuesKey, remainingTokens, releaseQueue) -else - redis.call("ZREM", masterQueuesKey, releaseQueue) -end - -return redis.status_reply("true") - `, - }); - } -} - -declare module "@internal/redis" { - interface RedisCommander { - consumeToken( - masterQueuesKey: string, - bucketKey: string, - queueKey: string, - metadataKey: string, - releasingsKey: string, - releaseQueue: string, - releaserId: string, - maxTokens: string, - score: string, - callback?: Callback - ): Result; - - refillTokens( - masterQueuesKey: string, - bucketKey: string, - queueKey: string, - releaseQueue: string, - amount: string, - maxTokens: string, - callback?: Callback - ): Result; - - processMasterQueue( - masterQueuesKey: string, - keyPrefix: string, - batchSize: number, - currentTime: string, - callback?: Callback<[string, string, string][]> - ): Result<[string, string, string][], Context>; - - returnTokenAndRequeue( - masterQueuesKey: string, - bucketKey: string, - queueKey: string, - metadataKey: string, - releaseQueue: string, - releaserId: string, - metadata: string, - score: string, - callback?: Callback - ): Result; - - returnTokenOnly( - masterQueuesKey: string, - bucketKey: string, - queueKey: string, - metadataKey: string, - releasingsKey: string, - releaseQueue: string, - releaserId: string, - callback?: Callback - ): Result; - - refillTokenIfInReleasings( - masterQueuesKey: string, - bucketKey: string, - queueKey: string, - metadataKey: string, - releasingsKey: string, - releaseQueue: string, - releaserId: string, - maxTokens: string, - callback?: Callback - ): Result; - } -} - -class ReleaseConcurrencyQueueConsumer { - private logger: Logger; - - constructor( - private readonly queue: ReleaseConcurrencyTokenBucketQueue, - private readonly pollInterval: number, - private readonly signal: AbortSignal, - logger?: Logger - ) { - this.logger = logger ?? new Logger("QueueConsumer"); - } - - async start() { - try { - for await (const _ of setInterval(this.pollInterval, null, { signal: this.signal })) { - try { - const processed = await this.queue.processNextAvailableQueue(); - if (!processed) { - continue; - } - } catch (error) { - this.logger.error("Error processing queue:", { error }); - } - } - } catch (error) { - if (error instanceof Error && error.name !== "AbortError") { - this.logger.error("Consumer loop error:", { error }); - } - } - } -} - -class ReleaseConcurrencyReleasingsSweeper { - private readonly logger: Logger; - - constructor( - private readonly queue: ReleaseConcurrencyTokenBucketQueue, - private readonly validateReleaserId: ( - releaserId: string - ) => Promise | undefined>, - private readonly pollInterval: number, - private readonly maxAge: number, - private readonly signal: AbortSignal, - logger?: Logger - ) { - this.queue = queue; - this.logger = logger ?? new Logger("ReleaseConcurrencyReleasingsSweeper"); - } - - async start() { - try { - for await (const _ of setInterval(this.pollInterval, null, { signal: this.signal })) { - try { - await this.sweep(); - } catch (error) { - this.logger.error("Error sweeping releasings:", { error }); - } - } - } catch (error) { - if (error instanceof Error && error.name !== "AbortError") { - this.logger.error("Sweeper loop error:", { error }); - } - } - } - - private async sweep() { - const releasings = await this.queue.getReleasings(this.maxAge); - - this.logger.debug("Sweeping releasings:", { releasings }); - - for (const releaserId of releasings) { - const result = await this.validateReleaserId(releaserId); - - this.logger.debug("Validated releaserId:", { releaserId, result }); - - if (!result) { - // We need to remove the releaserId from the releasings set - await this.queue.removeReleaserIdFromReleasings(releaserId); - continue; - } - - if (result.shouldRefill) { - await this.queue.refillTokenIfInReleasings(result.releaseQueue, result.releaserId); - } - } - } -} diff --git a/internal-packages/run-engine/src/engine/statuses.ts b/internal-packages/run-engine/src/engine/statuses.ts index 36f0825013..ce7e9c8129 100644 --- a/internal-packages/run-engine/src/engine/statuses.ts +++ b/internal-packages/run-engine/src/engine/statuses.ts @@ -59,8 +59,3 @@ export function isFinalRunStatus(status: TaskRunStatus): boolean { export function getFinalRunStatuses(): TaskRunStatus[] { return finalStatuses; } - -export function canReleaseConcurrency(status: TaskRunExecutionStatus): boolean { - const releaseableStatuses: TaskRunExecutionStatus[] = ["SUSPENDED", "EXECUTING_WITH_WAITPOINTS"]; - return releaseableStatuses.includes(status); -} diff --git a/internal-packages/run-engine/src/engine/systems/checkpointSystem.ts b/internal-packages/run-engine/src/engine/systems/checkpointSystem.ts index 1220ce43d2..5c6cf2aa57 100644 --- a/internal-packages/run-engine/src/engine/systems/checkpointSystem.ts +++ b/internal-packages/run-engine/src/engine/systems/checkpointSystem.ts @@ -11,25 +11,22 @@ import { import { SystemResources } from "./systems.js"; import { ServiceValidationError } from "../errors.js"; import { EnqueueSystem } from "./enqueueSystem.js"; -import { ReleaseConcurrencySystem } from "./releaseConcurrencySystem.js"; + export type CheckpointSystemOptions = { resources: SystemResources; executionSnapshotSystem: ExecutionSnapshotSystem; enqueueSystem: EnqueueSystem; - releaseConcurrencySystem: ReleaseConcurrencySystem; }; export class CheckpointSystem { private readonly $: SystemResources; private readonly executionSnapshotSystem: ExecutionSnapshotSystem; private readonly enqueueSystem: EnqueueSystem; - private readonly releaseConcurrencySystem: ReleaseConcurrencySystem; constructor(private readonly options: CheckpointSystemOptions) { this.$ = options.resources; this.executionSnapshotSystem = options.executionSnapshotSystem; this.enqueueSystem = options.enqueueSystem; - this.releaseConcurrencySystem = options.releaseConcurrencySystem; } /** @@ -195,14 +192,14 @@ export class CheckpointSystem { checkpointId: taskRunCheckpoint.id, }); - this.$.logger.debug("Refilling token bucket for release concurrency queue", { + this.$.logger.debug("Releasing concurrency for run because it was checkpointed", { snapshot, + newSnapshot, }); - // Refill the token bucket for the release concurrency queue - await this.releaseConcurrencySystem.refillTokensForSnapshot( - snapshot.previousSnapshotId ?? snapshot.id - ); + if (run.organizationId) { + await this.$.runQueue.releaseAllConcurrency(run.organizationId, run.id); + } return { ok: true as const, @@ -233,12 +230,14 @@ export class CheckpointSystem { runnerId, }); - this.$.logger.debug("Refilling token bucket for release concurrency queue", { + this.$.logger.debug("Releasing concurrency for run because it was checkpointed", { snapshot, + newSnapshot, }); - // Refill the token bucket for the release concurrency queue - await this.releaseConcurrencySystem.refillTokensForSnapshot(snapshot.id); + if (run.organizationId) { + await this.$.runQueue.releaseAllConcurrency(run.organizationId, run.id); + } return { ok: true as const, diff --git a/internal-packages/run-engine/src/engine/systems/dequeueSystem.ts b/internal-packages/run-engine/src/engine/systems/dequeueSystem.ts index bff9ec9cd6..8ce967ccb6 100644 --- a/internal-packages/run-engine/src/engine/systems/dequeueSystem.ts +++ b/internal-packages/run-engine/src/engine/systems/dequeueSystem.ts @@ -9,7 +9,6 @@ import { getMachinePreset } from "../machinePresets.js"; import { isDequeueableExecutionStatus } from "../statuses.js"; import { RunEngineOptions } from "../types.js"; import { ExecutionSnapshotSystem, getLatestExecutionSnapshot } from "./executionSnapshotSystem.js"; -import { ReleaseConcurrencySystem } from "./releaseConcurrencySystem.js"; import { RunAttemptSystem } from "./runAttemptSystem.js"; import { SystemResources } from "./systems.js"; @@ -18,20 +17,17 @@ export type DequeueSystemOptions = { machines: RunEngineOptions["machines"]; executionSnapshotSystem: ExecutionSnapshotSystem; runAttemptSystem: RunAttemptSystem; - releaseConcurrencySystem: ReleaseConcurrencySystem; }; export class DequeueSystem { private readonly $: SystemResources; private readonly executionSnapshotSystem: ExecutionSnapshotSystem; private readonly runAttemptSystem: RunAttemptSystem; - private readonly releaseConcurrencySystem: ReleaseConcurrencySystem; constructor(private readonly options: DequeueSystemOptions) { this.$ = options.resources; this.executionSnapshotSystem = options.executionSnapshotSystem; this.runAttemptSystem = options.runAttemptSystem; - this.releaseConcurrencySystem = options.releaseConcurrencySystem; } /** @@ -166,12 +162,6 @@ export class DequeueSystem { } ); - if (snapshot.previousSnapshotId) { - await this.releaseConcurrencySystem.refillTokensForSnapshot( - snapshot.previousSnapshotId - ); - } - await sendNotificationToWorker({ runId, snapshot: newSnapshot, @@ -325,6 +315,7 @@ export class DequeueSystem { lockedById: result.task.id, lockedToVersionId: result.worker.id, lockedQueueId: result.queue.id, + status: "DEQUEUED", startedAt, baseCostInCents: this.options.machines.baseCostInCents, machinePreset: machinePreset.name, diff --git a/internal-packages/run-engine/src/engine/systems/releaseConcurrencySystem.ts b/internal-packages/run-engine/src/engine/systems/releaseConcurrencySystem.ts deleted file mode 100644 index c89c1fe709..0000000000 --- a/internal-packages/run-engine/src/engine/systems/releaseConcurrencySystem.ts +++ /dev/null @@ -1,347 +0,0 @@ -import { TaskRunExecutionSnapshot } from "@trigger.dev/database"; -import { z } from "zod"; -import { - ReleaseConcurrencyQueueOptions, - ReleaseConcurrencyTokenBucketQueue, -} from "../releaseConcurrencyTokenBucketQueue.js"; -import { canReleaseConcurrency } from "../statuses.js"; -import { getLatestExecutionSnapshot } from "./executionSnapshotSystem.js"; -import { SystemResources } from "./systems.js"; - -const ReleaseConcurrencyMetadata = z.object({ - releaseConcurrency: z.boolean().optional(), -}); - -type ReleaseConcurrencyMetadata = z.infer; - -export type ReleaseConcurrencySystemOptions = { - resources: SystemResources; - maxTokensRatio?: number; - releasingsMaxAge?: number; - releasingsPollInterval?: number; - queueOptions?: Omit< - ReleaseConcurrencyQueueOptions<{ - orgId: string; - projectId: string; - envId: string; - }>, - "executor" | "validateReleaserId" | "keys" | "maxTokens" - >; -}; - -export class ReleaseConcurrencySystem { - private readonly $: SystemResources; - releaseConcurrencyQueue?: ReleaseConcurrencyTokenBucketQueue<{ - orgId: string; - projectId: string; - envId: string; - }>; - - constructor(private readonly options: ReleaseConcurrencySystemOptions) { - this.$ = options.resources; - - if (options.queueOptions) { - this.releaseConcurrencyQueue = new ReleaseConcurrencyTokenBucketQueue({ - ...options.queueOptions, - releasingsMaxAge: this.options.releasingsMaxAge, - releasingsPollInterval: this.options.releasingsPollInterval, - executor: async (descriptor, snapshotId) => { - return await this.executeReleaseConcurrencyForSnapshot(snapshotId); - }, - keys: { - fromDescriptor: (descriptor) => - `org:${descriptor.orgId}:proj:${descriptor.projectId}:env:${descriptor.envId}`, - toDescriptor: (name) => ({ - orgId: name.split(":")[1], - projectId: name.split(":")[3], - envId: name.split(":")[5], - }), - }, - maxTokens: async (descriptor) => { - const environment = await this.$.prisma.runtimeEnvironment.findFirstOrThrow({ - where: { id: descriptor.envId }, - select: { - maximumConcurrencyLimit: true, - }, - }); - - return environment.maximumConcurrencyLimit * (this.options.maxTokensRatio ?? 1.0); - }, - validateReleaserId: async (releaserId) => { - return this.validateSnapshotShouldRefillToken(releaserId); - }, - }); - } - } - - async validateSnapshotShouldRefillToken(releaserId: string) { - const snapshot = await this.$.prisma.taskRunExecutionSnapshot.findFirst({ - where: { id: releaserId }, - select: { - id: true, - run: { - select: { - id: true, - status: true, - }, - }, - organizationId: true, - projectId: true, - environmentId: true, - executionStatus: true, - }, - }); - - if (!snapshot) { - return; - } - - const latestSnapshot = await getLatestExecutionSnapshot(this.$.prisma, snapshot.run.id); - - this.$.logger.debug("Checking if snapshot should refill", { - snapshot, - latestSnapshot, - }); - - return { - releaseQueue: { - orgId: snapshot.organizationId, - projectId: snapshot.projectId, - envId: snapshot.environmentId, - }, - releaserId: snapshot.id, - shouldRefill: latestSnapshot.id !== snapshot.id, - }; - } - - public async consumeToken( - descriptor: { orgId: string; projectId: string; envId: string }, - releaserId: string - ) { - if (!this.releaseConcurrencyQueue) { - return; - } - - await this.releaseConcurrencyQueue.consumeToken(descriptor, releaserId); - } - - /** - * This is used in tests only - */ - public async returnToken( - descriptor: { orgId: string; projectId: string; envId: string }, - releaserId: string - ) { - if (!this.releaseConcurrencyQueue) { - return; - } - - await this.releaseConcurrencyQueue.returnToken(descriptor, releaserId); - } - - public async quit() { - if (!this.releaseConcurrencyQueue) { - return; - } - - await this.releaseConcurrencyQueue.quit(); - } - - public async refillTokensForSnapshot(snapshotId: string | undefined): Promise; - public async refillTokensForSnapshot(snapshot: TaskRunExecutionSnapshot): Promise; - public async refillTokensForSnapshot( - snapshotOrId: TaskRunExecutionSnapshot | string | undefined - ) { - if (!this.releaseConcurrencyQueue) { - return; - } - - if (typeof snapshotOrId === "undefined") { - return; - } - - const snapshot = - typeof snapshotOrId === "string" - ? await this.$.prisma.taskRunExecutionSnapshot.findFirst({ - where: { id: snapshotOrId }, - }) - : snapshotOrId; - - if (!snapshot) { - this.$.logger.error("Snapshot not found", { - snapshotId: snapshotOrId, - }); - - return; - } - - if (snapshot.executionStatus !== "EXECUTING_WITH_WAITPOINTS") { - this.$.logger.debug("Snapshot is not in a valid state to refill tokens", { - snapshot, - }); - - return; - } - - await this.releaseConcurrencyQueue.refillTokenIfInReleasings( - { - orgId: snapshot.organizationId, - projectId: snapshot.projectId, - envId: snapshot.environmentId, - }, - snapshot.id - ); - } - - public async releaseConcurrencyForSnapshot(snapshot: TaskRunExecutionSnapshot) { - if (!this.releaseConcurrencyQueue) { - this.$.logger.debug("Release concurrency queue not enabled, skipping release", { - snapshotId: snapshot.id, - }); - - return; - } - - // Go ahead and release concurrency immediately if the run is in a development environment - if (snapshot.environmentType === "DEVELOPMENT") { - this.$.logger.debug("Immediate release of concurrency for development environment", { - snapshotId: snapshot.id, - }); - - return await this.executeReleaseConcurrencyForSnapshot(snapshot.id); - } - - await this.releaseConcurrencyQueue.attemptToRelease( - { - orgId: snapshot.organizationId, - projectId: snapshot.projectId, - envId: snapshot.environmentId, - }, - snapshot.id - ); - } - - public async executeReleaseConcurrencyForSnapshot(snapshotId: string): Promise { - if (!this.releaseConcurrencyQueue) { - return false; - } - - this.$.logger.debug("Executing released concurrency", { - snapshotId, - }); - - // Fetch the snapshot - const snapshot = await this.$.prisma.taskRunExecutionSnapshot.findFirst({ - where: { id: snapshotId }, - select: { - id: true, - previousSnapshotId: true, - executionStatus: true, - organizationId: true, - metadata: true, - runId: true, - run: { - select: { - lockedQueueId: true, - }, - }, - }, - }); - - if (!snapshot) { - this.$.logger.error("Snapshot not found", { - snapshotId, - }); - - return false; - } - - // - Runlock the run - // - Get latest snapshot - // - If the run is non suspended or going to be, then bail - // - If the run is suspended or going to be, then release the concurrency - return await this.$.runLock.lock( - "executeReleaseConcurrencyForSnapshot", - [snapshot.runId], - async () => { - const latestSnapshot = await getLatestExecutionSnapshot(this.$.prisma, snapshot.runId); - - const isValidSnapshot = - latestSnapshot.id === snapshot.id || - // Case 2: The provided snapshotId matches the previous snapshot - // AND we're in SUSPENDED state (which is valid) - (latestSnapshot.previousSnapshotId === snapshot.id && - latestSnapshot.executionStatus === "SUSPENDED"); - - if (!isValidSnapshot) { - this.$.logger.error("Tried to release concurrency on an invalid snapshot", { - latestSnapshot, - snapshot, - }); - - return false; - } - - if (!canReleaseConcurrency(latestSnapshot.executionStatus)) { - this.$.logger.debug("Run is not in a state to release concurrency", { - runId: snapshot.runId, - snapshot: latestSnapshot, - }); - - return false; - } - - const metadata = this.#parseMetadata(snapshot.metadata); - - if (typeof metadata.releaseConcurrency === "boolean") { - if (metadata.releaseConcurrency) { - await this.$.runQueue.releaseAllConcurrency(snapshot.organizationId, snapshot.runId); - - return true; - } - - await this.$.runQueue.releaseEnvConcurrency(snapshot.organizationId, snapshot.runId); - - return true; - } - - // Get the locked queue - const taskQueue = snapshot.run.lockedQueueId - ? await this.$.prisma.taskQueue.findFirst({ - where: { - id: snapshot.run.lockedQueueId, - }, - }) - : undefined; - - if ( - taskQueue && - (typeof taskQueue.concurrencyLimit === "undefined" || - taskQueue.releaseConcurrencyOnWaitpoint) - ) { - await this.$.runQueue.releaseAllConcurrency(snapshot.organizationId, snapshot.runId); - - return true; - } - - await this.$.runQueue.releaseEnvConcurrency(snapshot.organizationId, snapshot.runId); - - return true; - } - ); - } - - #parseMetadata(metadata?: unknown): ReleaseConcurrencyMetadata { - if (!metadata) { - return {}; - } - - const result = ReleaseConcurrencyMetadata.safeParse(metadata); - - if (!result.success) { - return {}; - } - - return result.data; - } -} diff --git a/internal-packages/run-engine/src/engine/systems/runAttemptSystem.ts b/internal-packages/run-engine/src/engine/systems/runAttemptSystem.ts index bec96f9e77..f56c9a6ebb 100644 --- a/internal-packages/run-engine/src/engine/systems/runAttemptSystem.ts +++ b/internal-packages/run-engine/src/engine/systems/runAttemptSystem.ts @@ -1,4 +1,5 @@ import { startSpan } from "@internal/tracing"; +import { tryCatch } from "@trigger.dev/core/utils"; import { CompleteRunAttemptResult, ExecutionResult, @@ -33,10 +34,8 @@ import { ExecutionSnapshotSystem, getLatestExecutionSnapshot, } from "./executionSnapshotSystem.js"; -import { ReleaseConcurrencySystem } from "./releaseConcurrencySystem.js"; import { SystemResources } from "./systems.js"; import { WaitpointSystem } from "./waitpointSystem.js"; -import { tryCatch } from "@trigger.dev/core/utils"; export type RunAttemptSystemOptions = { resources: SystemResources; @@ -44,7 +43,6 @@ export type RunAttemptSystemOptions = { batchSystem: BatchSystem; waitpointSystem: WaitpointSystem; delayedRunSystem: DelayedRunSystem; - releaseConcurrencySystem: ReleaseConcurrencySystem; retryWarmStartThresholdMs?: number; machines: RunEngineOptions["machines"]; }; @@ -55,7 +53,6 @@ export class RunAttemptSystem { private readonly batchSystem: BatchSystem; private readonly waitpointSystem: WaitpointSystem; private readonly delayedRunSystem: DelayedRunSystem; - private readonly releaseConcurrencySystem: ReleaseConcurrencySystem; constructor(private readonly options: RunAttemptSystemOptions) { this.$ = options.resources; @@ -63,7 +60,6 @@ export class RunAttemptSystem { this.batchSystem = options.batchSystem; this.waitpointSystem = options.waitpointSystem; this.delayedRunSystem = options.delayedRunSystem; - this.releaseConcurrencySystem = options.releaseConcurrencySystem; } public async startRunAttempt({ @@ -1071,8 +1067,6 @@ export class RunAttemptSystem { removeFromWorkerQueue: true, }); - await this.releaseConcurrencySystem.refillTokensForSnapshot(latestSnapshot); - //if executing, we need to message the worker to cancel the run and put it into `PENDING_CANCEL` status if (isExecuting(latestSnapshot.executionStatus)) { const newSnapshot = await this.executionSnapshotSystem.createExecutionSnapshot(prisma, { diff --git a/internal-packages/run-engine/src/engine/systems/waitpointSystem.ts b/internal-packages/run-engine/src/engine/systems/waitpointSystem.ts index 0ddd3c33bb..236afe008c 100644 --- a/internal-packages/run-engine/src/engine/systems/waitpointSystem.ts +++ b/internal-packages/run-engine/src/engine/systems/waitpointSystem.ts @@ -1,40 +1,35 @@ import { timeoutError, tryCatch } from "@trigger.dev/core/v3"; import { WaitpointId } from "@trigger.dev/core/v3/isomorphic"; import { - $transaction, Prisma, PrismaClientOrTransaction, + TaskQueue, TaskRunExecutionSnapshot, TaskRunExecutionStatus, Waitpoint, } from "@trigger.dev/database"; +import { assertNever } from "assert-never"; import { nanoid } from "nanoid"; import { sendNotificationToWorker } from "../eventBus.js"; -import { isExecuting, isFinishedOrPendingFinished } from "../statuses.js"; import { EnqueueSystem } from "./enqueueSystem.js"; import { ExecutionSnapshotSystem, getLatestExecutionSnapshot } from "./executionSnapshotSystem.js"; import { SystemResources } from "./systems.js"; -import { ReleaseConcurrencySystem } from "./releaseConcurrencySystem.js"; -import { assertNever } from "assert-never"; export type WaitpointSystemOptions = { resources: SystemResources; executionSnapshotSystem: ExecutionSnapshotSystem; enqueueSystem: EnqueueSystem; - releaseConcurrencySystem: ReleaseConcurrencySystem; }; export class WaitpointSystem { private readonly $: SystemResources; private readonly executionSnapshotSystem: ExecutionSnapshotSystem; - private readonly releaseConcurrencySystem: ReleaseConcurrencySystem; private readonly enqueueSystem: EnqueueSystem; constructor(private readonly options: WaitpointSystemOptions) { this.$ = options.resources; this.executionSnapshotSystem = options.executionSnapshotSystem; this.enqueueSystem = options.enqueueSystem; - this.releaseConcurrencySystem = options.releaseConcurrencySystem; } public async clearBlockingWaitpoints({ @@ -355,7 +350,6 @@ export class WaitpointSystem { waitpoints, projectId, organizationId, - releaseConcurrency, timeout, spanIdToComplete, batch, @@ -367,7 +361,6 @@ export class WaitpointSystem { waitpoints: string | string[]; projectId: string; organizationId: string; - releaseConcurrency?: boolean; timeout?: Date; spanIdToComplete?: string; batch?: { id: string; index?: number }; @@ -436,9 +429,6 @@ export class WaitpointSystem { snapshot: { executionStatus: newStatus, description: "Run was blocked by a waitpoint.", - metadata: { - releaseConcurrency, - }, }, previousSnapshotId: snapshot.id, environmentId: snapshot.environmentId, @@ -453,11 +443,6 @@ export class WaitpointSystem { // Let the worker know immediately, so it can suspend the run await sendNotificationToWorker({ runId, snapshot, eventBus: this.$.eventBus }); - - if (isRunBlocked) { - //release concurrency - await this.releaseConcurrencySystem.releaseConcurrencyForSnapshot(snapshot); - } } if (timeout) { @@ -536,6 +521,7 @@ export class WaitpointSystem { id: true, type: true, maximumConcurrencyLimit: true, + concurrencyLimitBurstFactor: true, project: { select: { id: true } }, organization: { select: { id: true } }, }, @@ -609,77 +595,45 @@ export class WaitpointSystem { return "skipped"; } case "EXECUTING_WITH_WAITPOINTS": { - const result = await this.$.runQueue.reacquireConcurrency( - run.runtimeEnvironment.organization.id, - runId - ); - - if (result) { - const newSnapshot = await this.executionSnapshotSystem.createExecutionSnapshot( - this.$.prisma, - { - run: { - id: runId, - status: snapshot.runStatus, - attemptNumber: snapshot.attemptNumber, - }, - snapshot: { - executionStatus: "EXECUTING", - description: "Run was continued, whilst still executing.", - }, - previousSnapshotId: snapshot.id, - environmentId: snapshot.environmentId, - environmentType: snapshot.environmentType, - projectId: snapshot.projectId, - organizationId: snapshot.organizationId, - batchId: snapshot.batchId ?? undefined, - completedWaitpoints: blockingWaitpoints.map((b) => ({ - id: b.waitpoint.id, - index: b.batchIndex ?? undefined, - })), - } - ); - - await this.releaseConcurrencySystem.refillTokensForSnapshot(snapshot); - - this.$.logger.debug( - `continueRunIfUnblocked: run was still executing, sending notification`, - { - runId, - snapshot, - newSnapshot, - } - ); - - await sendNotificationToWorker({ - runId, - snapshot: newSnapshot, - eventBus: this.$.eventBus, - }); - } else { - // Because we cannot reacquire the concurrency, we need to enqueue the run again - // and because the run is still executing, we need to set the status to QUEUED_EXECUTING - const newSnapshot = await this.enqueueSystem.enqueueRun({ - run, - env: run.runtimeEnvironment, + const newSnapshot = await this.executionSnapshotSystem.createExecutionSnapshot( + this.$.prisma, + { + run: { + id: runId, + status: snapshot.runStatus, + attemptNumber: snapshot.attemptNumber, + }, snapshot: { - status: "QUEUED_EXECUTING", - description: "Run can continue, but is waiting for concurrency", + executionStatus: "EXECUTING", + description: "Run was continued, whilst still executing.", }, previousSnapshotId: snapshot.id, + environmentId: snapshot.environmentId, + environmentType: snapshot.environmentType, + projectId: snapshot.projectId, + organizationId: snapshot.organizationId, batchId: snapshot.batchId ?? undefined, completedWaitpoints: blockingWaitpoints.map((b) => ({ id: b.waitpoint.id, index: b.batchIndex ?? undefined, })), - }); + } + ); - this.$.logger.debug(`continueRunIfUnblocked: run goes to QUEUED_EXECUTING`, { + this.$.logger.debug( + `continueRunIfUnblocked: run was still executing, sending notification`, + { runId, snapshot, newSnapshot, - }); - } + } + ); + + await sendNotificationToWorker({ + runId, + snapshot: newSnapshot, + eventBus: this.$.eventBus, + }); break; } diff --git a/internal-packages/run-engine/src/engine/tests/checkpoints.test.ts b/internal-packages/run-engine/src/engine/tests/checkpoints.test.ts index edd56a2b34..ed6e1699b3 100644 --- a/internal-packages/run-engine/src/engine/tests/checkpoints.test.ts +++ b/internal-packages/run-engine/src/engine/tests/checkpoints.test.ts @@ -573,7 +573,6 @@ describe("RunEngine checkpoints", () => { waitpoints: waitpointResult.waitpoint.id, projectId: authenticatedEnvironment.projectId, organizationId: authenticatedEnvironment.organizationId, - releaseConcurrency: true, // Important: Release concurrency when blocking }); // Verify run is blocked @@ -776,238 +775,6 @@ describe("RunEngine checkpoints", () => { } ); - containerTest( - "when a checkpoint is created while the run is in QUEUED_EXECUTING state, the run is QUEUED", - async ({ prisma, redisOptions }) => { - const authenticatedEnvironment = await setupAuthenticatedEnvironment(prisma, "PRODUCTION"); - - const engine = new RunEngine({ - prisma, - worker: { - redis: redisOptions, - workers: 1, - tasksPerWorker: 10, - pollIntervalMs: 100, - }, - queue: { - redis: redisOptions, - masterQueueConsumersDisabled: true, - processWorkerQueueDebounceMs: 50, - }, - runLock: { - redis: redisOptions, - }, - machines: { - defaultMachine: "small-1x", - machines: { - "small-1x": { - name: "small-1x" as const, - cpu: 0.5, - memory: 0.5, - centsPerMs: 0.0001, - }, - }, - baseCostInCents: 0.0001, - }, - tracer: trace.getTracer("test", "0.0.0"), - }); - - try { - const taskIdentifier = "test-task"; - - // Create background worker - await setupBackgroundWorker( - engine, - authenticatedEnvironment, - taskIdentifier, - undefined, - undefined, - { - concurrencyLimit: 1, - } - ); - - // Create first run with queue concurrency limit of 1 - const firstRun = await engine.trigger( - { - number: 1, - friendlyId: "run_first", - environment: authenticatedEnvironment, - taskIdentifier, - payload: "{}", - payloadType: "application/json", - context: {}, - traceContext: {}, - traceId: "t12345-first", - spanId: "s12345-first", - workerQueue: "main", - queue: "task/test-task", - isTest: false, - tags: [], - }, - prisma - ); - - await setTimeout(500); - - // Dequeue and start the first run - const dequeuedFirst = await engine.dequeueFromWorkerQueue({ - consumerId: "test_12345", - workerQueue: "main", - }); - expect(dequeuedFirst.length).toBe(1); - assertNonNullable(dequeuedFirst[0]); - - const firstAttempt = await engine.startRunAttempt({ - runId: dequeuedFirst[0].run.id, - snapshotId: dequeuedFirst[0].snapshot.id, - }); - expect(firstAttempt.snapshot.executionStatus).toBe("EXECUTING"); - - // Create a manual waitpoint for the first run - const waitpoint = await engine.createManualWaitpoint({ - environmentId: authenticatedEnvironment.id, - projectId: authenticatedEnvironment.projectId, - }); - expect(waitpoint.waitpoint.status).toBe("PENDING"); - - // Block the first run with releaseConcurrency set to true - const blockedResult = await engine.blockRunWithWaitpoint({ - runId: firstRun.id, - waitpoints: waitpoint.waitpoint.id, - projectId: authenticatedEnvironment.projectId, - organizationId: authenticatedEnvironment.organizationId, - releaseConcurrency: true, - }); - - // Verify first run is blocked - const firstRunData = await engine.getRunExecutionData({ runId: firstRun.id }); - expect(firstRunData?.snapshot.executionStatus).toBe("EXECUTING_WITH_WAITPOINTS"); - - // Create and start second run on the same queue - const secondRun = await engine.trigger( - { - number: 2, - friendlyId: "run_second", - environment: authenticatedEnvironment, - taskIdentifier, - payload: "{}", - payloadType: "application/json", - context: {}, - traceContext: {}, - traceId: "t12345-second", - spanId: "s12345-second", - workerQueue: "main", - queue: "task/test-task", - isTest: false, - tags: [], - }, - prisma - ); - - await setTimeout(500); - - // Dequeue and start the second run - const dequeuedSecond = await engine.dequeueFromWorkerQueue({ - consumerId: "test_12345", - workerQueue: "main", - }); - expect(dequeuedSecond.length).toBe(1); - assertNonNullable(dequeuedSecond[0]); - - const secondAttempt = await engine.startRunAttempt({ - runId: dequeuedSecond[0].run.id, - snapshotId: dequeuedSecond[0].snapshot.id, - }); - expect(secondAttempt.snapshot.executionStatus).toBe("EXECUTING"); - - // Now complete the waitpoint for the first run - await engine.completeWaitpoint({ - id: waitpoint.waitpoint.id, - }); - - // Wait for the continueRunIfUnblocked to process - await setTimeout(500); - - // Verify the first run is now in QUEUED_EXECUTING state - const executionDataAfter = await engine.getRunExecutionData({ runId: firstRun.id }); - expect(executionDataAfter?.snapshot.executionStatus).toBe("QUEUED_EXECUTING"); - expect(executionDataAfter?.snapshot.description).toBe( - "Run can continue, but is waiting for concurrency" - ); - - // Verify the waitpoint is no longer blocking the first run - const runWaitpoint = await prisma.taskRunWaitpoint.findFirst({ - where: { - taskRunId: firstRun.id, - }, - include: { - waitpoint: true, - }, - }); - expect(runWaitpoint).toBeNull(); - - // Verify the waitpoint itself is completed - const completedWaitpoint = await prisma.waitpoint.findUnique({ - where: { - id: waitpoint.waitpoint.id, - }, - }); - assertNonNullable(completedWaitpoint); - expect(completedWaitpoint.status).toBe("COMPLETED"); - - // Create checkpoint after waitpoint completion - const checkpointResult = await engine.createCheckpoint({ - runId: firstRun.id, - snapshotId: firstRunData?.snapshot.id!, - checkpoint: { - type: "DOCKER", - reason: "TEST_CHECKPOINT", - location: "test-location", - imageRef: "test-image-ref", - }, - }); - - expect(checkpointResult.ok).toBe(true); - const checkpoint = checkpointResult.ok ? checkpointResult.snapshot : null; - assertNonNullable(checkpoint); - expect(checkpoint.executionStatus).toBe("QUEUED"); - - // Complete the second run so the first run can be dequeued - const result = await engine.completeRunAttempt({ - runId: dequeuedSecond[0].run.id, - snapshotId: secondAttempt.snapshot.id, - completion: { - ok: true, - id: dequeuedSecond[0].run.id, - output: `{"foo":"bar"}`, - outputType: "application/json", - }, - }); - - await setTimeout(1000); - - // Verify the first run is back in the queue - const queuedRun = await engine.dequeueFromWorkerQueue({ - consumerId: "test_12345", - workerQueue: "main", - }); - expect(queuedRun.length).toBe(1); - assertNonNullable(queuedRun[0]); - - // Now we can continue the run - const continueResult = await engine.continueRunExecution({ - runId: firstRun.id, - snapshotId: queuedRun[0].snapshot.id, - }); - - expect(continueResult.snapshot.executionStatus).toBe("EXECUTING"); - } finally { - await engine.quit(); - } - } - ); - containerTest("batchTriggerAndWait resume after checkpoint", async ({ prisma, redisOptions }) => { //create environment const authenticatedEnvironment = await setupAuthenticatedEnvironment(prisma, "PRODUCTION"); diff --git a/internal-packages/run-engine/src/engine/tests/releaseConcurrency.test.ts b/internal-packages/run-engine/src/engine/tests/releaseConcurrency.test.ts deleted file mode 100644 index d22387bcec..0000000000 --- a/internal-packages/run-engine/src/engine/tests/releaseConcurrency.test.ts +++ /dev/null @@ -1,1394 +0,0 @@ -import { assertNonNullable } from "@internal/testcontainers"; -import { setTimeout } from "node:timers/promises"; -import { EventBusEventArgs } from "../eventBus.js"; -import { setupAuthenticatedEnvironment, setupBackgroundWorker } from "./setup.js"; -import { engineTest } from "./utils/engineTest.js"; - -vi.setConfig({ testTimeout: 60_000 }); - -describe("RunEngine Releasing Concurrency", () => { - engineTest.scoped({ - engineOptions: { - queue: { masterQueueConsumersDisabled: true, processWorkerQueueDebounceMs: 50 }, - }, - }); - - engineTest("defaults to releasing env concurrency only", async ({ engine, prisma }) => { - //create environment - const authenticatedEnvironment = await setupAuthenticatedEnvironment(prisma, "PRODUCTION"); - - const taskIdentifier = "test-task"; - - await setupBackgroundWorker(engine, authenticatedEnvironment, taskIdentifier); - - const run = await engine.trigger( - { - number: 1, - friendlyId: "run_p1234", - environment: authenticatedEnvironment, - taskIdentifier, - payload: "{}", - payloadType: "application/json", - context: {}, - traceContext: {}, - traceId: "t12345", - spanId: "s12345", - workerQueue: "main", - queue: `task/${taskIdentifier}`, - isTest: false, - tags: [], - }, - prisma - ); - - await setTimeout(500); - - const dequeued = await engine.dequeueFromWorkerQueue({ - consumerId: "test_12345", - workerQueue: "main", - }); - - const queueConcurrency = await engine.runQueue.currentConcurrencyOfQueue( - authenticatedEnvironment, - `task/${taskIdentifier}` - ); - - expect(queueConcurrency).toBe(1); - - const envConcurrency = await engine.runQueue.currentConcurrencyOfEnvironment( - authenticatedEnvironment - ); - - expect(envConcurrency).toBe(1); - - // create an attempt - const attemptResult = await engine.startRunAttempt({ - runId: dequeued[0].run.id, - snapshotId: dequeued[0].snapshot.id, - }); - - expect(attemptResult.snapshot.executionStatus).toBe("EXECUTING"); - - // create a manual waitpoint - const result = await engine.createManualWaitpoint({ - environmentId: authenticatedEnvironment.id, - projectId: authenticatedEnvironment.projectId, - }); - - // Block the run, not specifying any release concurrency option - const executingWithWaitpointSnapshot = await engine.blockRunWithWaitpoint({ - runId: run.id, - waitpoints: result.waitpoint.id, - projectId: authenticatedEnvironment.projectId, - organizationId: authenticatedEnvironment.organizationId, - }); - - expect(executingWithWaitpointSnapshot.executionStatus).toBe("EXECUTING_WITH_WAITPOINTS"); - - // Now confirm the queue has the same concurrency as before - const queueConcurrencyAfter = await engine.runQueue.currentConcurrencyOfQueue( - authenticatedEnvironment, - `task/${taskIdentifier}` - ); - - expect(queueConcurrencyAfter).toBe(1); - - // Now confirm the environment has a concurrency of 0 - const envConcurrencyAfter = await engine.runQueue.currentConcurrencyOfEnvironment( - authenticatedEnvironment - ); - - expect(envConcurrencyAfter).toBe(0); - - await engine.completeWaitpoint({ - id: result.waitpoint.id, - }); - - await setTimeout(500); - - // Test that we've reacquired the queue concurrency - const queueConcurrencyAfterWaitpoint = await engine.runQueue.currentConcurrencyOfQueue( - authenticatedEnvironment, - `task/${taskIdentifier}` - ); - - expect(queueConcurrencyAfterWaitpoint).toBe(1); - - // Test that we've reacquired the environment concurrency - const envConcurrencyAfterWaitpoint = await engine.runQueue.currentConcurrencyOfEnvironment( - authenticatedEnvironment - ); - - expect(envConcurrencyAfterWaitpoint).toBe(1); - - // Now we are going to block with another waitpoint, this time specifiying we want to release the concurrency in the waitpoint - const result2 = await engine.createManualWaitpoint({ - environmentId: authenticatedEnvironment.id, - projectId: authenticatedEnvironment.projectId, - }); - - const executingWithWaitpointSnapshot2 = await engine.blockRunWithWaitpoint({ - runId: run.id, - waitpoints: result2.waitpoint.id, - projectId: authenticatedEnvironment.projectId, - organizationId: authenticatedEnvironment.organizationId, - releaseConcurrency: true, - }); - - expect(executingWithWaitpointSnapshot2.executionStatus).toBe("EXECUTING_WITH_WAITPOINTS"); - - // Test that we've released the queue concurrency - const queueConcurrencyAfterWaitpoint2 = await engine.runQueue.currentConcurrencyOfQueue( - authenticatedEnvironment, - `task/${taskIdentifier}` - ); - - expect(queueConcurrencyAfterWaitpoint2).toBe(0); - - // Test that we've released the environment concurrency - const envConcurrencyAfterWaitpoint2 = await engine.runQueue.currentConcurrencyOfEnvironment( - authenticatedEnvironment - ); - - expect(envConcurrencyAfterWaitpoint2).toBe(0); - - // Complete the waitpoint and make sure the run reacquires the queue and environment concurrency - await engine.completeWaitpoint({ - id: result2.waitpoint.id, - }); - - await setTimeout(500); - - // Test that we've reacquired the queue concurrency - const queueConcurrencyAfterWaitpoint3 = await engine.runQueue.currentConcurrencyOfQueue( - authenticatedEnvironment, - `task/${taskIdentifier}` - ); - - expect(queueConcurrencyAfterWaitpoint3).toBe(1); - - await engine.quit(); - }); - - engineTest("releases all concurrency when configured on queue", async ({ engine, prisma }) => { - //create environment - const authenticatedEnvironment = await setupAuthenticatedEnvironment(prisma, "PRODUCTION"); - - const taskIdentifier = "test-task"; - - await setupBackgroundWorker( - engine, - authenticatedEnvironment, - taskIdentifier, - undefined, - undefined, - { - releaseConcurrencyOnWaitpoint: true, - } - ); - - const run = await engine.trigger( - { - number: 1, - friendlyId: "run_p1234", - environment: authenticatedEnvironment, - taskIdentifier, - payload: "{}", - payloadType: "application/json", - context: {}, - traceContext: {}, - traceId: "t12345", - spanId: "s12345", - workerQueue: "main", - queue: `task/${taskIdentifier}`, - isTest: false, - tags: [], - }, - prisma - ); - - await setTimeout(500); - - const dequeued = await engine.dequeueFromWorkerQueue({ - consumerId: "test_12345", - workerQueue: "main", - }); - - const queueConcurrency = await engine.runQueue.currentConcurrencyOfQueue( - authenticatedEnvironment, - `task/${taskIdentifier}` - ); - - expect(queueConcurrency).toBe(1); - - const envConcurrency = await engine.runQueue.currentConcurrencyOfEnvironment( - authenticatedEnvironment - ); - - expect(envConcurrency).toBe(1); - - // create an attempt - const attemptResult = await engine.startRunAttempt({ - runId: dequeued[0].run.id, - snapshotId: dequeued[0].snapshot.id, - }); - - expect(attemptResult.snapshot.executionStatus).toBe("EXECUTING"); - - // create a manual waitpoint - const result = await engine.createManualWaitpoint({ - environmentId: authenticatedEnvironment.id, - projectId: authenticatedEnvironment.projectId, - }); - - // Block the run, not specifying any release concurrency option - const executingWithWaitpointSnapshot = await engine.blockRunWithWaitpoint({ - runId: run.id, - waitpoints: result.waitpoint.id, - projectId: authenticatedEnvironment.projectId, - organizationId: authenticatedEnvironment.organizationId, - }); - - expect(executingWithWaitpointSnapshot.executionStatus).toBe("EXECUTING_WITH_WAITPOINTS"); - - // Now confirm the queue has the same concurrency as before - const queueConcurrencyAfter = await engine.runQueue.currentConcurrencyOfQueue( - authenticatedEnvironment, - `task/${taskIdentifier}` - ); - - expect(queueConcurrencyAfter).toBe(0); - - // Now confirm the environment has a concurrency of 0 - const envConcurrencyAfter = await engine.runQueue.currentConcurrencyOfEnvironment( - authenticatedEnvironment - ); - - expect(envConcurrencyAfter).toBe(0); - - // Complete the waitpoint and make sure the run reacquires the queue and environment concurrency - await engine.completeWaitpoint({ - id: result.waitpoint.id, - }); - - await setTimeout(500); - - // Test that we've reacquired the queue concurrency - const queueConcurrencyAfterWaitpoint = await engine.runQueue.currentConcurrencyOfQueue( - authenticatedEnvironment, - `task/${taskIdentifier}` - ); - - expect(queueConcurrencyAfterWaitpoint).toBe(1); - - // Test that we've reacquired the environment concurrency - const envConcurrencyAfterWaitpoint = await engine.runQueue.currentConcurrencyOfEnvironment( - authenticatedEnvironment - ); - - expect(envConcurrencyAfterWaitpoint).toBe(1); - - // Now we are going to block with another waitpoint, this time specifiying we dont want to release the concurrency in the waitpoint - const result2 = await engine.createManualWaitpoint({ - environmentId: authenticatedEnvironment.id, - projectId: authenticatedEnvironment.projectId, - }); - - const executingWithWaitpointSnapshot2 = await engine.blockRunWithWaitpoint({ - runId: run.id, - waitpoints: result2.waitpoint.id, - projectId: authenticatedEnvironment.projectId, - organizationId: authenticatedEnvironment.organizationId, - releaseConcurrency: false, - }); - - expect(executingWithWaitpointSnapshot2.executionStatus).toBe("EXECUTING_WITH_WAITPOINTS"); - - // Test that we've not released the queue concurrency - const queueConcurrencyAfterWaitpoint2 = await engine.runQueue.currentConcurrencyOfQueue( - authenticatedEnvironment, - `task/${taskIdentifier}` - ); - - expect(queueConcurrencyAfterWaitpoint2).toBe(1); - - // Test that we've still released the environment concurrency since we always release env concurrency - const envConcurrencyAfterWaitpoint2 = await engine.runQueue.currentConcurrencyOfEnvironment( - authenticatedEnvironment - ); - - expect(envConcurrencyAfterWaitpoint2).toBe(0); - }); - - engineTest("releases all concurrency for unlimited queues", async ({ engine, prisma }) => { - //create environment - const authenticatedEnvironment = await setupAuthenticatedEnvironment(prisma, "PRODUCTION"); - - const taskIdentifier = "test-task"; - - await setupBackgroundWorker( - engine, - authenticatedEnvironment, - taskIdentifier, - undefined, - undefined, - { - releaseConcurrencyOnWaitpoint: true, - concurrencyLimit: null, - } - ); - - const run = await engine.trigger( - { - number: 1, - friendlyId: "run_p1234", - environment: authenticatedEnvironment, - taskIdentifier, - payload: "{}", - payloadType: "application/json", - context: {}, - traceContext: {}, - traceId: "t12345", - spanId: "s12345", - workerQueue: "main", - queue: `task/${taskIdentifier}`, - isTest: false, - tags: [], - }, - prisma - ); - - await setTimeout(500); - - const dequeued = await engine.dequeueFromWorkerQueue({ - consumerId: "test_12345", - workerQueue: "main", - }); - - const queueConcurrency = await engine.runQueue.currentConcurrencyOfQueue( - authenticatedEnvironment, - `task/${taskIdentifier}` - ); - - expect(queueConcurrency).toBe(1); - - const envConcurrency = await engine.runQueue.currentConcurrencyOfEnvironment( - authenticatedEnvironment - ); - - expect(envConcurrency).toBe(1); - - // create an attempt - const attemptResult = await engine.startRunAttempt({ - runId: dequeued[0].run.id, - snapshotId: dequeued[0].snapshot.id, - }); - - expect(attemptResult.snapshot.executionStatus).toBe("EXECUTING"); - - // create a manual waitpoint - const result = await engine.createManualWaitpoint({ - environmentId: authenticatedEnvironment.id, - projectId: authenticatedEnvironment.projectId, - }); - - // Block the run, not specifying any release concurrency option - const executingWithWaitpointSnapshot = await engine.blockRunWithWaitpoint({ - runId: run.id, - waitpoints: result.waitpoint.id, - projectId: authenticatedEnvironment.projectId, - organizationId: authenticatedEnvironment.organizationId, - }); - - expect(executingWithWaitpointSnapshot.executionStatus).toBe("EXECUTING_WITH_WAITPOINTS"); - - // Now confirm the queue has the same concurrency as before - const queueConcurrencyAfter = await engine.runQueue.currentConcurrencyOfQueue( - authenticatedEnvironment, - `task/${taskIdentifier}` - ); - - expect(queueConcurrencyAfter).toBe(0); - - // Now confirm the environment has a concurrency of 0 - const envConcurrencyAfter = await engine.runQueue.currentConcurrencyOfEnvironment( - authenticatedEnvironment - ); - - expect(envConcurrencyAfter).toBe(0); - - // Complete the waitpoint and make sure the run reacquires the queue and environment concurrency - await engine.completeWaitpoint({ - id: result.waitpoint.id, - }); - - await setTimeout(500); - - // Test that we've reacquired the queue concurrency - const queueConcurrencyAfterWaitpoint = await engine.runQueue.currentConcurrencyOfQueue( - authenticatedEnvironment, - `task/${taskIdentifier}` - ); - - expect(queueConcurrencyAfterWaitpoint).toBe(1); - - // Test that we've reacquired the environment concurrency - const envConcurrencyAfterWaitpoint = await engine.runQueue.currentConcurrencyOfEnvironment( - authenticatedEnvironment - ); - - expect(envConcurrencyAfterWaitpoint).toBe(1); - - // Now we are going to block with another waitpoint, this time specifiying we dont want to release the concurrency in the waitpoint - const result2 = await engine.createManualWaitpoint({ - environmentId: authenticatedEnvironment.id, - projectId: authenticatedEnvironment.projectId, - }); - - const executingWithWaitpointSnapshot2 = await engine.blockRunWithWaitpoint({ - runId: run.id, - waitpoints: result2.waitpoint.id, - projectId: authenticatedEnvironment.projectId, - organizationId: authenticatedEnvironment.organizationId, - releaseConcurrency: false, - }); - - expect(executingWithWaitpointSnapshot2.executionStatus).toBe("EXECUTING_WITH_WAITPOINTS"); - - // Test that we've not released the queue concurrency - const queueConcurrencyAfterWaitpoint2 = await engine.runQueue.currentConcurrencyOfQueue( - authenticatedEnvironment, - `task/${taskIdentifier}` - ); - - expect(queueConcurrencyAfterWaitpoint2).toBe(1); - - // Test that we've still released the environment concurrency since we always release env concurrency - const envConcurrencyAfterWaitpoint2 = await engine.runQueue.currentConcurrencyOfEnvironment( - authenticatedEnvironment - ); - - expect(envConcurrencyAfterWaitpoint2).toBe(0); - }); - - describe("when maxTokenRation is 0.1", () => { - engineTest.scoped({ - engineOptions: { - releaseConcurrency: { - maxTokensRatio: 0.1, // 10% of the concurrency limit = 1 token - maxRetries: 3, - consumersCount: 1, - pollInterval: 500, - batchSize: 1, - }, - }, - }); - - engineTest( - "delays env concurrency release when token unavailable", - async ({ engine, prisma }) => { - //create environment - const authenticatedEnvironment = await setupAuthenticatedEnvironment(prisma, "PRODUCTION"); - - const taskIdentifier = "test-task"; - - await setupBackgroundWorker(engine, authenticatedEnvironment, taskIdentifier); - - const run = await engine.trigger( - { - number: 1, - friendlyId: "run_p1234", - environment: authenticatedEnvironment, - taskIdentifier, - payload: "{}", - payloadType: "application/json", - context: {}, - traceContext: {}, - traceId: "t12345", - spanId: "s12345", - workerQueue: "main", - queue: `task/${taskIdentifier}`, - isTest: false, - tags: [], - }, - prisma - ); - - await setTimeout(500); - - const dequeued = await engine.dequeueFromWorkerQueue({ - consumerId: "test_12345", - workerQueue: "main", - }); - - const queueConcurrency = await engine.runQueue.currentConcurrencyOfQueue( - authenticatedEnvironment, - `task/${taskIdentifier}` - ); - - expect(queueConcurrency).toBe(1); - - const envConcurrency = await engine.runQueue.currentConcurrencyOfEnvironment( - authenticatedEnvironment - ); - - expect(envConcurrency).toBe(1); - - // create an attempt - const attemptResult = await engine.startRunAttempt({ - runId: dequeued[0].run.id, - snapshotId: dequeued[0].snapshot.id, - }); - - expect(attemptResult.snapshot.executionStatus).toBe("EXECUTING"); - - // create a manual waitpoint - const result = await engine.createManualWaitpoint({ - environmentId: authenticatedEnvironment.id, - projectId: authenticatedEnvironment.projectId, - }); - - await engine.releaseConcurrencySystem.consumeToken( - { - orgId: authenticatedEnvironment.organizationId, - projectId: authenticatedEnvironment.projectId, - envId: authenticatedEnvironment.id, - }, - "test_12345" - ); - - // Block the run, not specifying any release concurrency option - const executingWithWaitpointSnapshot = await engine.blockRunWithWaitpoint({ - runId: run.id, - waitpoints: result.waitpoint.id, - projectId: authenticatedEnvironment.projectId, - organizationId: authenticatedEnvironment.organizationId, - }); - - expect(executingWithWaitpointSnapshot.executionStatus).toBe("EXECUTING_WITH_WAITPOINTS"); - - // Now confirm the queue has the same concurrency as before - const queueConcurrencyAfter = await engine.runQueue.currentConcurrencyOfQueue( - authenticatedEnvironment, - `task/${taskIdentifier}` - ); - - expect(queueConcurrencyAfter).toBe(1); - - // Now confirm the environment is the same as before - const envConcurrencyAfter = await engine.runQueue.currentConcurrencyOfEnvironment( - authenticatedEnvironment - ); - - expect(envConcurrencyAfter).toBe(1); - - // Now we return the token so the concurrency can be released - await engine.releaseConcurrencySystem.returnToken( - { - orgId: authenticatedEnvironment.organizationId, - projectId: authenticatedEnvironment.projectId, - envId: authenticatedEnvironment.id, - }, - "test_12345" - ); - - // Wait until the token is released - await setTimeout(1_000); - - // Now the environment should have a concurrency of 0 - const envConcurrencyAfterReturn = await engine.runQueue.currentConcurrencyOfEnvironment( - authenticatedEnvironment - ); - - expect(envConcurrencyAfterReturn).toBe(0); - - // and the queue should have a concurrency of 1 - const queueConcurrencyAfterReturn = await engine.runQueue.currentConcurrencyOfQueue( - authenticatedEnvironment, - `task/${taskIdentifier}` - ); - - expect(queueConcurrencyAfterReturn).toBe(1); - } - ); - - engineTest("delays env concurrency release after checkpoint", async ({ prisma, engine }) => { - //create environment - const authenticatedEnvironment = await setupAuthenticatedEnvironment(prisma, "PRODUCTION"); - - const taskIdentifier = "test-task"; - - await setupBackgroundWorker(engine, authenticatedEnvironment, taskIdentifier); - - const run = await engine.trigger( - { - number: 1, - friendlyId: "run_p1234", - environment: authenticatedEnvironment, - taskIdentifier, - payload: "{}", - payloadType: "application/json", - context: {}, - traceContext: {}, - traceId: "t12345", - spanId: "s12345", - workerQueue: "main", - queue: `task/${taskIdentifier}`, - isTest: false, - tags: [], - }, - prisma - ); - - await setTimeout(500); - - const dequeued = await engine.dequeueFromWorkerQueue({ - consumerId: "test_12345", - workerQueue: "main", - }); - - const queueConcurrency = await engine.runQueue.currentConcurrencyOfQueue( - authenticatedEnvironment, - `task/${taskIdentifier}` - ); - - expect(queueConcurrency).toBe(1); - - const envConcurrency = await engine.runQueue.currentConcurrencyOfEnvironment( - authenticatedEnvironment - ); - - expect(envConcurrency).toBe(1); - - // create an attempt - const attemptResult = await engine.startRunAttempt({ - runId: dequeued[0].run.id, - snapshotId: dequeued[0].snapshot.id, - }); - - expect(attemptResult.snapshot.executionStatus).toBe("EXECUTING"); - - // create a manual waitpoint - const result = await engine.createManualWaitpoint({ - environmentId: authenticatedEnvironment.id, - projectId: authenticatedEnvironment.projectId, - }); - - await engine.releaseConcurrencySystem.consumeToken( - { - orgId: authenticatedEnvironment.organizationId, - projectId: authenticatedEnvironment.projectId, - envId: authenticatedEnvironment.id, - }, - "test_12345" - ); - - // Block the run, not specifying any release concurrency option - const executingWithWaitpointSnapshot = await engine.blockRunWithWaitpoint({ - runId: run.id, - waitpoints: result.waitpoint.id, - projectId: authenticatedEnvironment.projectId, - organizationId: authenticatedEnvironment.organizationId, - }); - - expect(executingWithWaitpointSnapshot.executionStatus).toBe("EXECUTING_WITH_WAITPOINTS"); - - // Now confirm the queue has the same concurrency as before - const queueConcurrencyAfter = await engine.runQueue.currentConcurrencyOfQueue( - authenticatedEnvironment, - `task/${taskIdentifier}` - ); - - expect(queueConcurrencyAfter).toBe(1); - - // Now confirm the environment is the same as before - const envConcurrencyAfter = await engine.runQueue.currentConcurrencyOfEnvironment( - authenticatedEnvironment - ); - - expect(envConcurrencyAfter).toBe(1); - - const checkpointResult = await engine.createCheckpoint({ - runId: run.id, - snapshotId: executingWithWaitpointSnapshot.id, - checkpoint: { - type: "DOCKER", - reason: "TEST_CHECKPOINT", - location: "test-location", - imageRef: "test-image-ref", - }, - }); - - expect(checkpointResult.ok).toBe(true); - - const snapshot = checkpointResult.ok ? checkpointResult.snapshot : null; - assertNonNullable(snapshot); - - console.log("Snapshot", snapshot); - - expect(snapshot.executionStatus).toBe("SUSPENDED"); - - // Now we return the token so the concurrency can be released - await engine.releaseConcurrencySystem.returnToken( - { - orgId: authenticatedEnvironment.organizationId, - projectId: authenticatedEnvironment.projectId, - envId: authenticatedEnvironment.id, - }, - "test_12345" - ); - - // Wait until the token is released - await setTimeout(1_000); - - // Now the environment should have a concurrency of 0 - const envConcurrencyAfterReturn = await engine.runQueue.currentConcurrencyOfEnvironment( - authenticatedEnvironment - ); - - expect(envConcurrencyAfterReturn).toBe(0); - - // and the queue should have a concurrency of 1 - const queueConcurrencyAfterReturn = await engine.runQueue.currentConcurrencyOfQueue( - authenticatedEnvironment, - `task/${taskIdentifier}` - ); - - expect(queueConcurrencyAfterReturn).toBe(1); - }); - - engineTest("maintains concurrency after waitpoint completion", async ({ engine, prisma }) => { - //create environment - const authenticatedEnvironment = await setupAuthenticatedEnvironment(prisma, "PRODUCTION"); - - const taskIdentifier = "test-task"; - - await setupBackgroundWorker(engine, authenticatedEnvironment, taskIdentifier); - - const run = await engine.trigger( - { - number: 1, - friendlyId: "run_p1234", - environment: authenticatedEnvironment, - taskIdentifier, - payload: "{}", - payloadType: "application/json", - context: {}, - traceContext: {}, - traceId: "t12345", - spanId: "s12345", - workerQueue: "main", - queue: `task/${taskIdentifier}`, - isTest: false, - tags: [], - }, - prisma - ); - - await setTimeout(500); - - const dequeued = await engine.dequeueFromWorkerQueue({ - consumerId: "test_12345", - workerQueue: "main", - }); - - const queueConcurrency = await engine.runQueue.currentConcurrencyOfQueue( - authenticatedEnvironment, - `task/${taskIdentifier}` - ); - - expect(queueConcurrency).toBe(1); - - const envConcurrency = await engine.runQueue.currentConcurrencyOfEnvironment( - authenticatedEnvironment - ); - - expect(envConcurrency).toBe(1); - - // create an attempt - const attemptResult = await engine.startRunAttempt({ - runId: dequeued[0].run.id, - snapshotId: dequeued[0].snapshot.id, - }); - - expect(attemptResult.snapshot.executionStatus).toBe("EXECUTING"); - - // create a manual waitpoint - const result = await engine.createManualWaitpoint({ - environmentId: authenticatedEnvironment.id, - projectId: authenticatedEnvironment.projectId, - }); - - await engine.releaseConcurrencySystem.consumeToken( - { - orgId: authenticatedEnvironment.organizationId, - projectId: authenticatedEnvironment.projectId, - envId: authenticatedEnvironment.id, - }, - "test_12345" - ); - - // Block the run, not specifying any release concurrency option - const executingWithWaitpointSnapshot = await engine.blockRunWithWaitpoint({ - runId: run.id, - waitpoints: result.waitpoint.id, - projectId: authenticatedEnvironment.projectId, - organizationId: authenticatedEnvironment.organizationId, - }); - - expect(executingWithWaitpointSnapshot.executionStatus).toBe("EXECUTING_WITH_WAITPOINTS"); - - // Now confirm the queue has the same concurrency as before - const queueConcurrencyAfter = await engine.runQueue.currentConcurrencyOfQueue( - authenticatedEnvironment, - `task/${taskIdentifier}` - ); - - expect(queueConcurrencyAfter).toBe(1); - - // Now confirm the environment is the same as before - const envConcurrencyAfter = await engine.runQueue.currentConcurrencyOfEnvironment( - authenticatedEnvironment - ); - - expect(envConcurrencyAfter).toBe(1); - - // Complete the waitpoint - await engine.completeWaitpoint({ - id: result.waitpoint.id, - }); - - await setTimeout(1_000); - - // Verify the first run is now in EXECUTING state - const executionDataAfter = await engine.getRunExecutionData({ runId: run.id }); - expect(executionDataAfter?.snapshot.executionStatus).toBe("EXECUTING"); - - // Now we return the token so the concurrency can be released - await engine.releaseConcurrencySystem.returnToken( - { - orgId: authenticatedEnvironment.organizationId, - projectId: authenticatedEnvironment.projectId, - envId: authenticatedEnvironment.id, - }, - "test_12345" - ); - - // give the release concurrency system time to run - await setTimeout(1_000); - - // Now the environment should have a concurrency of 1 - const envConcurrencyAfterReturn = await engine.runQueue.currentConcurrencyOfEnvironment( - authenticatedEnvironment - ); - - expect(envConcurrencyAfterReturn).toBe(1); - - // and the queue should have a concurrency of 1 - const queueConcurrencyAfterReturn = await engine.runQueue.currentConcurrencyOfQueue( - authenticatedEnvironment, - `task/${taskIdentifier}` - ); - - expect(queueConcurrencyAfterReturn).toBe(1); - }); - - engineTest( - "refills token bucket after waitpoint completion when snapshot not in release queue", - async ({ prisma, engine }) => { - //create environment - const authenticatedEnvironment = await setupAuthenticatedEnvironment(prisma, "PRODUCTION"); - - const taskIdentifier = "test-task"; - - await setupBackgroundWorker(engine, authenticatedEnvironment, taskIdentifier); - - const run = await engine.trigger( - { - number: 1, - friendlyId: "run_p1234", - environment: authenticatedEnvironment, - taskIdentifier, - payload: "{}", - payloadType: "application/json", - context: {}, - traceContext: {}, - traceId: "t12345", - spanId: "s12345", - workerQueue: "main", - queue: `task/${taskIdentifier}`, - isTest: false, - tags: [], - }, - prisma - ); - - await setTimeout(500); - - const dequeued = await engine.dequeueFromWorkerQueue({ - consumerId: "test_12345", - workerQueue: "main", - }); - - const queueConcurrency = await engine.runQueue.currentConcurrencyOfQueue( - authenticatedEnvironment, - `task/${taskIdentifier}` - ); - - expect(queueConcurrency).toBe(1); - - const envConcurrency = await engine.runQueue.currentConcurrencyOfEnvironment( - authenticatedEnvironment - ); - - expect(envConcurrency).toBe(1); - - // create an attempt - const attemptResult = await engine.startRunAttempt({ - runId: dequeued[0].run.id, - snapshotId: dequeued[0].snapshot.id, - }); - - expect(attemptResult.snapshot.executionStatus).toBe("EXECUTING"); - - // create a manual waitpoint - const result = await engine.createManualWaitpoint({ - environmentId: authenticatedEnvironment.id, - projectId: authenticatedEnvironment.projectId, - }); - - // Block the run, not specifying any release concurrency option - const executingWithWaitpointSnapshot = await engine.blockRunWithWaitpoint({ - runId: run.id, - waitpoints: result.waitpoint.id, - projectId: authenticatedEnvironment.projectId, - organizationId: authenticatedEnvironment.organizationId, - }); - - expect(executingWithWaitpointSnapshot.executionStatus).toBe("EXECUTING_WITH_WAITPOINTS"); - - // Now confirm the environment concurrency has been released - const envConcurrencyAfter = await engine.runQueue.currentConcurrencyOfEnvironment( - authenticatedEnvironment - ); - - expect(envConcurrencyAfter).toBe(0); - - // And confirm the release concurrency system has consumed the token - const queueMetrics = - await engine.releaseConcurrencySystem.releaseConcurrencyQueue?.getReleaseQueueMetrics({ - orgId: authenticatedEnvironment.organizationId, - projectId: authenticatedEnvironment.projectId, - envId: authenticatedEnvironment.id, - }); - - expect(queueMetrics?.currentTokens).toBe(0); - - await engine.completeWaitpoint({ - id: result.waitpoint.id, - }); - - await setTimeout(1_000); - - const executionData2 = await engine.getRunExecutionData({ runId: run.id }); - expect(executionData2?.snapshot.executionStatus).toBe("EXECUTING"); - - const queueMetricsAfter = - await engine.releaseConcurrencySystem.releaseConcurrencyQueue?.getReleaseQueueMetrics({ - orgId: authenticatedEnvironment.organizationId, - projectId: authenticatedEnvironment.projectId, - envId: authenticatedEnvironment.id, - }); - - expect(queueMetricsAfter?.currentTokens).toBe(1); - } - ); - }); - - engineTest( - "refills token bucket after waitpoint completion when unable to reacquire concurrency, after dequeuing the queued executing run", - async ({ prisma, engine }) => { - //create environment - const authenticatedEnvironment = await setupAuthenticatedEnvironment(prisma, "PRODUCTION"); - - const taskIdentifier = "test-task"; - - await setupBackgroundWorker( - engine, - authenticatedEnvironment, - taskIdentifier, - undefined, - undefined, - { - concurrencyLimit: 1, - } - ); - - const run = await engine.trigger( - { - number: 1, - friendlyId: "run_p1234", - environment: authenticatedEnvironment, - taskIdentifier, - payload: "{}", - payloadType: "application/json", - context: {}, - traceContext: {}, - traceId: "t12345", - spanId: "s12345", - workerQueue: "main", - queue: `task/${taskIdentifier}`, - isTest: false, - tags: [], - }, - prisma - ); - - await setTimeout(500); - - const dequeued = await engine.dequeueFromWorkerQueue({ - consumerId: "test_12345", - workerQueue: "main", - }); - - const queueConcurrency = await engine.runQueue.currentConcurrencyOfQueue( - authenticatedEnvironment, - `task/${taskIdentifier}` - ); - - expect(queueConcurrency).toBe(1); - - const envConcurrency = await engine.runQueue.currentConcurrencyOfEnvironment( - authenticatedEnvironment - ); - - expect(envConcurrency).toBe(1); - - // create an attempt - const attemptResult = await engine.startRunAttempt({ - runId: dequeued[0].run.id, - snapshotId: dequeued[0].snapshot.id, - }); - - expect(attemptResult.snapshot.executionStatus).toBe("EXECUTING"); - - // create a manual waitpoint - const result = await engine.createManualWaitpoint({ - environmentId: authenticatedEnvironment.id, - projectId: authenticatedEnvironment.projectId, - }); - - // Block the run, specifying the release concurrency option as true - const executingWithWaitpointSnapshot = await engine.blockRunWithWaitpoint({ - runId: run.id, - waitpoints: result.waitpoint.id, - projectId: authenticatedEnvironment.projectId, - organizationId: authenticatedEnvironment.organizationId, - releaseConcurrency: true, - }); - - expect(executingWithWaitpointSnapshot.executionStatus).toBe("EXECUTING_WITH_WAITPOINTS"); - - // Now confirm the environment concurrency has been released - const envConcurrencyAfter = await engine.runQueue.currentConcurrencyOfEnvironment( - authenticatedEnvironment - ); - - expect(envConcurrencyAfter).toBe(0); - - const queueConcurrencyAfter = await engine.runQueue.currentConcurrencyOfQueue( - authenticatedEnvironment, - `task/${taskIdentifier}` - ); - - expect(queueConcurrencyAfter).toBe(0); - - // And confirm the release concurrency system has consumed the token - const queueMetrics = - await engine.releaseConcurrencySystem.releaseConcurrencyQueue?.getReleaseQueueMetrics({ - orgId: authenticatedEnvironment.organizationId, - projectId: authenticatedEnvironment.projectId, - envId: authenticatedEnvironment.id, - }); - - expect(queueMetrics?.currentTokens).toBe(9); - - // Create and start second run on the same queue - const secondRun = await engine.trigger( - { - number: 2, - friendlyId: "run_second", - environment: authenticatedEnvironment, - taskIdentifier, - payload: "{}", - payloadType: "application/json", - context: {}, - traceContext: {}, - traceId: "t12345-second", - spanId: "s12345-second", - workerQueue: "main", - queue: "task/test-task", - isTest: false, - tags: [], - }, - prisma - ); - - // Dequeue and start the second run - await setTimeout(500); - - const dequeuedSecond = await engine.dequeueFromWorkerQueue({ - consumerId: "test_12345", - workerQueue: "main", - }); - - // Now confirm the environment concurrency has been released - const envConcurrencyAfterSecond = await engine.runQueue.currentConcurrencyOfEnvironment( - authenticatedEnvironment - ); - - expect(envConcurrencyAfterSecond).toBe(1); - - const queueConcurrencyAfterSecond = await engine.runQueue.currentConcurrencyOfQueue( - authenticatedEnvironment, - `task/${taskIdentifier}` - ); - - expect(queueConcurrencyAfterSecond).toBe(1); - - const secondAttempt = await engine.startRunAttempt({ - runId: dequeuedSecond[0].run.id, - snapshotId: dequeuedSecond[0].snapshot.id, - }); - - expect(secondAttempt.snapshot.executionStatus).toBe("EXECUTING"); - - // Complete the waitpoint that's blocking the first run - await engine.completeWaitpoint({ - id: result.waitpoint.id, - }); - - await setTimeout(1_000); - - // Verify that the first run could not reacquire the concurrency so it's back in the queue - const executionData2 = await engine.getRunExecutionData({ runId: run.id }); - expect(executionData2?.snapshot.executionStatus).toBe("QUEUED_EXECUTING"); - - const queueMetricsAfter = - await engine.releaseConcurrencySystem.releaseConcurrencyQueue?.getReleaseQueueMetrics({ - orgId: authenticatedEnvironment.organizationId, - projectId: authenticatedEnvironment.projectId, - envId: authenticatedEnvironment.id, - }); - - // We've consumed 1 token, so we should have 9 left - expect(queueMetricsAfter?.currentTokens).toBe(9); - - // Complete the second run so the first run can be dequeued - await engine.completeRunAttempt({ - runId: dequeuedSecond[0].run.id, - snapshotId: secondAttempt.snapshot.id, - completion: { - ok: true, - id: dequeuedSecond[0].run.id, - output: `{"foo":"bar"}`, - outputType: "application/json", - }, - }); - - await setTimeout(500); - - let event: EventBusEventArgs<"workerNotification">[0] | undefined = undefined; - engine.eventBus.on("workerNotification", (result) => { - event = result; - }); - - await setTimeout(500); - - // Verify the first run is back in the queue - const queuedRun = await engine.dequeueFromWorkerQueue({ - consumerId: "test_12345", - workerQueue: "main", - }); - - // We don't actually return the run here from dequeuing, it's instead sent to the cluster as a workerNotification - expect(queuedRun.length).toBe(0); - - assertNonNullable(event); - const notificationEvent = event as EventBusEventArgs<"workerNotification">[0]; - expect(notificationEvent.run.id).toBe(run.id); - expect(notificationEvent.snapshot.executionStatus).toBe("EXECUTING"); - - // Make sure the token bucket is refilled - const queueMetricsAfterSecondFinished = - await engine.releaseConcurrencySystem.releaseConcurrencyQueue?.getReleaseQueueMetrics({ - orgId: authenticatedEnvironment.organizationId, - projectId: authenticatedEnvironment.projectId, - envId: authenticatedEnvironment.id, - }); - - expect(queueMetricsAfterSecondFinished?.currentTokens).toBe(10); - } - ); - - describe("release concurrency sweeper system", () => { - engineTest.scoped({ - engineOptions: { - releaseConcurrency: { - maxTokensRatio: 1, - maxRetries: 3, - consumersCount: 1, - pollInterval: 500, - releasingsPollInterval: 500, - batchSize: 1, - releasingsMaxAge: 2_000, - }, - }, - }); - - engineTest( - "refills token bucket after the run has a new snapshot created by the release concurrency sweeper system", - async ({ prisma, engine }) => { - //create environment - const authenticatedEnvironment = await setupAuthenticatedEnvironment(prisma, "PRODUCTION"); - - const taskIdentifier = "test-task"; - - await setupBackgroundWorker( - engine, - authenticatedEnvironment, - taskIdentifier, - undefined, - undefined, - { - concurrencyLimit: 1, - } - ); - - const run = await engine.trigger( - { - number: 1, - friendlyId: "run_p1234", - environment: authenticatedEnvironment, - taskIdentifier, - payload: "{}", - payloadType: "application/json", - context: {}, - traceContext: {}, - traceId: "t12345", - spanId: "s12345", - workerQueue: "main", - queue: `task/${taskIdentifier}`, - isTest: false, - tags: [], - }, - prisma - ); - - await setTimeout(500); - - const dequeued = await engine.dequeueFromWorkerQueue({ - consumerId: "test_12345", - workerQueue: "main", - }); - - const queueConcurrency = await engine.runQueue.currentConcurrencyOfQueue( - authenticatedEnvironment, - `task/${taskIdentifier}` - ); - - expect(queueConcurrency).toBe(1); - - const envConcurrency = await engine.runQueue.currentConcurrencyOfEnvironment( - authenticatedEnvironment - ); - - expect(envConcurrency).toBe(1); - - // create an attempt - const attemptResult = await engine.startRunAttempt({ - runId: dequeued[0].run.id, - snapshotId: dequeued[0].snapshot.id, - }); - - expect(attemptResult.snapshot.executionStatus).toBe("EXECUTING"); - - // create a manual waitpoint - const result = await engine.createManualWaitpoint({ - environmentId: authenticatedEnvironment.id, - projectId: authenticatedEnvironment.projectId, - }); - - // Block the run, specifying the release concurrency option as true - const executingWithWaitpointSnapshot = await engine.blockRunWithWaitpoint({ - runId: run.id, - waitpoints: result.waitpoint.id, - projectId: authenticatedEnvironment.projectId, - organizationId: authenticatedEnvironment.organizationId, - releaseConcurrency: true, - }); - - expect(executingWithWaitpointSnapshot.executionStatus).toBe("EXECUTING_WITH_WAITPOINTS"); - - // Now confirm the environment concurrency has been released - const envConcurrencyAfter = await engine.runQueue.currentConcurrencyOfEnvironment( - authenticatedEnvironment - ); - - expect(envConcurrencyAfter).toBe(0); - - const queueConcurrencyAfter = await engine.runQueue.currentConcurrencyOfQueue( - authenticatedEnvironment, - `task/${taskIdentifier}` - ); - - expect(queueConcurrencyAfter).toBe(0); - - // And confirm the release concurrency system has consumed the token - const queueMetrics = - await engine.releaseConcurrencySystem.releaseConcurrencyQueue?.getReleaseQueueMetrics({ - orgId: authenticatedEnvironment.organizationId, - projectId: authenticatedEnvironment.projectId, - envId: authenticatedEnvironment.id, - }); - - expect(queueMetrics?.currentTokens).toBe(9); - - await setTimeout(3_000); - - const queueMetricsAfter = - await engine.releaseConcurrencySystem.releaseConcurrencyQueue?.getReleaseQueueMetrics({ - orgId: authenticatedEnvironment.organizationId, - projectId: authenticatedEnvironment.projectId, - envId: authenticatedEnvironment.id, - }); - - expect(queueMetricsAfter?.currentTokens).toBe(9); - - // Now we create a new snapshot for the run, which will cause the sweeper system to refill the token bucket - await engine.executionSnapshotSystem.createExecutionSnapshot(prisma, { - run, - snapshot: { - executionStatus: "PENDING_CANCEL", - description: "Pending cancel", - }, - environmentId: authenticatedEnvironment.id, - environmentType: "PRODUCTION", - projectId: authenticatedEnvironment.projectId, - organizationId: authenticatedEnvironment.organizationId, - }); - - await setTimeout(3_000); - - const queueMetricsAfterRefill = - await engine.releaseConcurrencySystem.releaseConcurrencyQueue?.getReleaseQueueMetrics({ - orgId: authenticatedEnvironment.organizationId, - projectId: authenticatedEnvironment.projectId, - envId: authenticatedEnvironment.id, - }); - - expect(queueMetricsAfterRefill?.currentTokens).toBe(10); - } - ); - }); -}); diff --git a/internal-packages/run-engine/src/engine/tests/releaseConcurrencyTokenBucketQueue.test.ts b/internal-packages/run-engine/src/engine/tests/releaseConcurrencyTokenBucketQueue.test.ts deleted file mode 100644 index 74073566e7..0000000000 --- a/internal-packages/run-engine/src/engine/tests/releaseConcurrencyTokenBucketQueue.test.ts +++ /dev/null @@ -1,791 +0,0 @@ -import { redisTest, StartedRedisContainer } from "@internal/testcontainers"; -import { ReleaseConcurrencyTokenBucketQueue } from "../releaseConcurrencyTokenBucketQueue.js"; -import { setTimeout } from "node:timers/promises"; - -type TestQueueDescriptor = { - name: string; -}; - -function createReleaseConcurrencyQueue( - redisContainer: StartedRedisContainer, - maxTokens: number = 2 -) { - const executedRuns: { releaseQueue: string; runId: string }[] = []; - - const queue = new ReleaseConcurrencyTokenBucketQueue({ - redis: { - keyPrefix: "release-queue:test:", - host: redisContainer.getHost(), - port: redisContainer.getPort(), - }, - executor: async (releaseQueue, runId) => { - executedRuns.push({ releaseQueue: releaseQueue.name, runId }); - return true; - }, - maxTokens: async (_) => maxTokens, - keys: { - fromDescriptor: (descriptor) => descriptor.name, - toDescriptor: (name) => ({ name }), - }, - pollInterval: 100, - }); - - return { - queue, - executedRuns, - }; -} - -describe("ReleaseConcurrencyQueue", () => { - redisTest("Should manage token bucket and queue correctly", async ({ redisContainer }) => { - const { queue, executedRuns } = createReleaseConcurrencyQueue(redisContainer, 2); - - try { - // First two attempts should execute immediately (we have 2 tokens) - await queue.attemptToRelease({ name: "test-queue" }, "run1"); - await queue.attemptToRelease({ name: "test-queue" }, "run2"); - - // Verify first two runs were executed - expect(executedRuns).toHaveLength(2); - expect(executedRuns).toContainEqual({ releaseQueue: "test-queue", runId: "run1" }); - expect(executedRuns).toContainEqual({ releaseQueue: "test-queue", runId: "run2" }); - - // Third attempt should be queued (no tokens left) - await queue.attemptToRelease({ name: "test-queue" }, "run3"); - expect(executedRuns).toHaveLength(2); // Still 2, run3 is queued - - // Refill one token, should execute run3 - await queue.refillTokens({ name: "test-queue" }, 1); - - // Now we need to wait for the queue to be processed - await setTimeout(1000); - - expect(executedRuns).toHaveLength(3); - expect(executedRuns).toContainEqual({ releaseQueue: "test-queue", runId: "run3" }); - } finally { - await queue.quit(); - } - }); - - redisTest("Should handle multiple refills correctly", async ({ redisContainer }) => { - const { queue, executedRuns } = createReleaseConcurrencyQueue(redisContainer, 3); - - try { - // Queue up 5 runs (more than maxTokens) - await queue.attemptToRelease({ name: "test-queue" }, "run1"); - await queue.attemptToRelease({ name: "test-queue" }, "run2"); - await queue.attemptToRelease({ name: "test-queue" }, "run3"); - await queue.attemptToRelease({ name: "test-queue" }, "run4"); - await queue.attemptToRelease({ name: "test-queue" }, "run5"); - - // First 3 should be executed immediately (maxTokens = 3) - expect(executedRuns).toHaveLength(3); - expect(executedRuns).toContainEqual({ releaseQueue: "test-queue", runId: "run1" }); - expect(executedRuns).toContainEqual({ releaseQueue: "test-queue", runId: "run2" }); - expect(executedRuns).toContainEqual({ releaseQueue: "test-queue", runId: "run3" }); - - // Refill 2 tokens - await queue.refillTokens({ name: "test-queue" }, 2); - - await setTimeout(1000); - - // Should execute the remaining 2 runs - expect(executedRuns).toHaveLength(5); - expect(executedRuns).toContainEqual({ releaseQueue: "test-queue", runId: "run4" }); - expect(executedRuns).toContainEqual({ releaseQueue: "test-queue", runId: "run5" }); - } finally { - await queue.quit(); - } - }); - - redisTest("Should handle multiple queues independently", async ({ redisContainer }) => { - const { queue, executedRuns } = createReleaseConcurrencyQueue(redisContainer, 1); - - try { - // Add runs to different queues - await queue.attemptToRelease({ name: "queue1" }, "run1"); - await queue.attemptToRelease({ name: "queue1" }, "run2"); - await queue.attemptToRelease({ name: "queue2" }, "run3"); - await queue.attemptToRelease({ name: "queue2" }, "run4"); - - // Only first run from each queue should be executed - expect(executedRuns).toHaveLength(2); - expect(executedRuns).toContainEqual({ releaseQueue: "queue1", runId: "run1" }); - expect(executedRuns).toContainEqual({ releaseQueue: "queue2", runId: "run3" }); - - // Refill tokens for queue1 - await queue.refillTokens({ name: "queue1" }, 1); - - await setTimeout(1000); - - // Should only execute the queued run from queue1 - expect(executedRuns).toHaveLength(3); - expect(executedRuns).toContainEqual({ releaseQueue: "queue1", runId: "run2" }); - - // Refill tokens for queue2 - await queue.refillTokens({ name: "queue2" }, 1); - - await setTimeout(1000); - - // Should execute the queued run from queue2 - expect(executedRuns).toHaveLength(4); - expect(executedRuns).toContainEqual({ releaseQueue: "queue2", runId: "run4" }); - } finally { - await queue.quit(); - } - }); - - redisTest("Should not allow refilling more than maxTokens", async ({ redisContainer }) => { - const { queue, executedRuns } = createReleaseConcurrencyQueue(redisContainer, 1); - - try { - // Add two runs - await queue.attemptToRelease({ name: "test-queue" }, "run1"); - await queue.attemptToRelease({ name: "test-queue" }, "run2"); - - // First run should be executed immediately - expect(executedRuns).toHaveLength(1); - expect(executedRuns).toContainEqual({ releaseQueue: "test-queue", runId: "run1" }); - - // Refill with more tokens than needed - await queue.refillTokens({ name: "test-queue" }, 5); - - await setTimeout(1000); - - // Should only execute the one remaining run - expect(executedRuns).toHaveLength(2); - expect(executedRuns).toContainEqual({ releaseQueue: "test-queue", runId: "run2" }); - - // Add another run - should NOT execute immediately because we don't have excess tokens - await queue.attemptToRelease({ name: "test-queue" }, "run3"); - expect(executedRuns).toHaveLength(2); - } finally { - await queue.quit(); - } - }); - - redisTest("Should maintain FIFO order when releasing", async ({ redisContainer }) => { - const { queue, executedRuns } = createReleaseConcurrencyQueue(redisContainer, 1); - - try { - // Queue up multiple runs - await queue.attemptToRelease({ name: "test-queue" }, "run1"); - await queue.attemptToRelease({ name: "test-queue" }, "run2"); - await queue.attemptToRelease({ name: "test-queue" }, "run3"); - await queue.attemptToRelease({ name: "test-queue" }, "run4"); - - // First run should be executed immediately - expect(executedRuns).toHaveLength(1); - expect(executedRuns[0]).toEqual({ releaseQueue: "test-queue", runId: "run1" }); - - // Refill tokens one at a time and verify order - await queue.refillTokens({ name: "test-queue" }, 1); - - await setTimeout(1000); - - expect(executedRuns).toHaveLength(2); - expect(executedRuns[1]).toEqual({ releaseQueue: "test-queue", runId: "run2" }); - - await queue.refillTokens({ name: "test-queue" }, 1); - - await setTimeout(1000); - - expect(executedRuns).toHaveLength(3); - expect(executedRuns[2]).toEqual({ releaseQueue: "test-queue", runId: "run3" }); - - await queue.refillTokens({ name: "test-queue" }, 1); - - await setTimeout(1000); - - expect(executedRuns).toHaveLength(4); - expect(executedRuns[3]).toEqual({ releaseQueue: "test-queue", runId: "run4" }); - } finally { - await queue.quit(); - } - }); - - redisTest( - "Should handle executor failures by returning the token and adding the item into the queue", - async ({ redisContainer }) => { - let shouldFail = true; - - const executedRuns: { releaseQueue: string; runId: string }[] = []; - - const queue = new ReleaseConcurrencyTokenBucketQueue({ - redis: { - keyPrefix: "release-queue:test:", - host: redisContainer.getHost(), - port: redisContainer.getPort(), - }, - executor: async (releaseQueue, runId) => { - if (shouldFail) { - throw new Error("Executor failed"); - } - executedRuns.push({ releaseQueue, runId }); - return true; - }, - maxTokens: async (_) => 2, - keys: { - fromDescriptor: (descriptor) => descriptor, - toDescriptor: (name) => name, - }, - batchSize: 2, - retry: { - maxRetries: 2, - backoff: { - minDelay: 100, - maxDelay: 1000, - factor: 1, - }, - }, - pollInterval: 50, - }); - - try { - // Attempt to release with failing executor - await queue.attemptToRelease("test-queue", "run1"); - // Does not execute because the executor throws an error - expect(executedRuns).toHaveLength(0); - - // Token should have been returned to the bucket so this should try to execute immediately and fail again - await queue.attemptToRelease("test-queue", "run2"); - expect(executedRuns).toHaveLength(0); - - // Allow executor to succeed - shouldFail = false; - - await setTimeout(1000); - - // Should now execute successfully - expect(executedRuns).toHaveLength(2); - expect(executedRuns[0]).toEqual({ releaseQueue: "test-queue", runId: "run1" }); - expect(executedRuns[1]).toEqual({ releaseQueue: "test-queue", runId: "run2" }); - } finally { - await queue.quit(); - } - } - ); - - redisTest("Should handle invalid token amounts", async ({ redisContainer }) => { - const { queue, executedRuns } = createReleaseConcurrencyQueue(redisContainer, 1); - - try { - // Try to refill with negative tokens - await expect(queue.refillTokens({ name: "test-queue" }, -1)).rejects.toThrow(); - - // Try to refill with zero tokens - await queue.refillTokens({ name: "test-queue" }, 0); - - await setTimeout(1000); - - expect(executedRuns).toHaveLength(0); - - // Verify normal operation still works - await queue.attemptToRelease({ name: "test-queue" }, "run1"); - expect(executedRuns).toHaveLength(1); - } finally { - await queue.quit(); - } - }); - - redisTest("Should handle concurrent operations correctly", async ({ redisContainer }) => { - const executedRuns: { releaseQueue: string; runId: string }[] = []; - - const queue = new ReleaseConcurrencyTokenBucketQueue({ - redis: { - keyPrefix: "release-queue:test:", - host: redisContainer.getHost(), - port: redisContainer.getPort(), - }, - executor: async (releaseQueue, runId) => { - // Add small delay to simulate work - await setTimeout(10); - executedRuns.push({ releaseQueue, runId }); - return true; - }, - keys: { - fromDescriptor: (descriptor) => descriptor, - toDescriptor: (name) => name, - }, - maxTokens: async (_) => 2, - batchSize: 5, - pollInterval: 50, - }); - - try { - // Attempt multiple concurrent releases - await Promise.all([ - queue.attemptToRelease("test-queue", "run1"), - queue.attemptToRelease("test-queue", "run2"), - queue.attemptToRelease("test-queue", "run3"), - queue.attemptToRelease("test-queue", "run4"), - ]); - - // Should only execute maxTokens (2) runs - expect(executedRuns).toHaveLength(2); - - // Attempt concurrent refills - await queue.refillTokens("test-queue", 2); - - await setTimeout(1000); - - // Should execute remaining runs - expect(executedRuns).toHaveLength(4); - - // Verify all runs were executed exactly once - const runCounts = executedRuns.reduce( - (acc, { runId }) => { - acc[runId] = (acc[runId] || 0) + 1; - return acc; - }, - {} as Record - ); - - Object.values(runCounts).forEach((count) => { - expect(count).toBe(1); - }); - } finally { - await queue.quit(); - } - }); - - redisTest("Should clean up Redis resources on quit", async ({ redisContainer }) => { - const { queue } = createReleaseConcurrencyQueue(redisContainer, 1); - - // Add some data - await queue.attemptToRelease({ name: "test-queue" }, "run1"); - await queue.attemptToRelease({ name: "test-queue" }, "run2"); - - // Quit the queue - await queue.quit(); - - // Verify we can't perform operations after quit - await expect(queue.attemptToRelease({ name: "test-queue" }, "run3")).rejects.toThrow(); - await expect(queue.refillTokens({ name: "test-queue" }, 1)).rejects.toThrow(); - }); - - redisTest("Should stop retrying after max retries is reached", async ({ redisContainer }) => { - let failCount = 0; - const executedRuns: { releaseQueue: string; runId: string; attempt: number }[] = []; - - const queue = new ReleaseConcurrencyTokenBucketQueue({ - redis: { - keyPrefix: "release-queue:test:", - host: redisContainer.getHost(), - port: redisContainer.getPort(), - }, - executor: async (releaseQueue, runId) => { - failCount++; - executedRuns.push({ releaseQueue, runId, attempt: failCount }); - throw new Error("Executor failed"); - }, - keys: { - fromDescriptor: (descriptor) => descriptor, - toDescriptor: (name) => name, - }, - maxTokens: async (_) => 1, - retry: { - maxRetries: 2, // Set max retries to 2 (will attempt 3 times total: initial + 2 retries) - backoff: { - minDelay: 100, - maxDelay: 200, - factor: 1, - }, - }, - pollInterval: 50, // Reduce poll interval for faster test - }); - - try { - // Attempt to release - this will fail and retry - await queue.attemptToRelease("test-queue", "run1"); - - // Wait for retries to occur - await setTimeout(2000); - - // Should have attempted exactly 3 times (initial + 2 retries) - expect(executedRuns).toHaveLength(3); - expect(executedRuns[0]).toEqual({ releaseQueue: "test-queue", runId: "run1", attempt: 1 }); - expect(executedRuns[1]).toEqual({ releaseQueue: "test-queue", runId: "run1", attempt: 2 }); - expect(executedRuns[2]).toEqual({ releaseQueue: "test-queue", runId: "run1", attempt: 3 }); - - // Verify that no more retries occur - await setTimeout(1000); - expect(executedRuns).toHaveLength(3); // Should still be 3 - - // Attempt a new release to verify the token was returned - let secondRunAttempted = false; - const queue2 = new ReleaseConcurrencyTokenBucketQueue({ - redis: { - keyPrefix: "release-queue:test:", - host: redisContainer.getHost(), - port: redisContainer.getPort(), - }, - executor: async (releaseQueue, runId) => { - secondRunAttempted = true; - return true; - }, - keys: { - fromDescriptor: (descriptor) => descriptor, - toDescriptor: (name) => name, - }, - maxTokens: async (_) => 1, - retry: { - maxRetries: 2, - backoff: { - minDelay: 100, - maxDelay: 1000, - factor: 1, - }, - }, - pollInterval: 50, - }); - - await queue2.attemptToRelease("test-queue", "run2"); - expect(secondRunAttempted).toBe(true); // Should execute immediately because token was returned - - await queue2.quit(); - } finally { - await queue.quit(); - } - }); - - redisTest("Should handle max retries in batch processing", async ({ redisContainer }) => { - const executedRuns: { releaseQueue: string; runId: string; attempt: number }[] = []; - const runAttempts: Record = {}; - - const queue = new ReleaseConcurrencyTokenBucketQueue({ - redis: { - keyPrefix: "release-queue:test:", - host: redisContainer.getHost(), - port: redisContainer.getPort(), - }, - executor: async (releaseQueue, runId) => { - runAttempts[runId] = (runAttempts[runId] || 0) + 1; - executedRuns.push({ releaseQueue, runId, attempt: runAttempts[runId] }); - throw new Error("Executor failed"); - }, - keys: { - fromDescriptor: (descriptor) => descriptor, - toDescriptor: (name) => name, - }, - maxTokens: async (_) => 3, - retry: { - maxRetries: 2, - backoff: { - minDelay: 100, - maxDelay: 1000, - factor: 1, - }, - }, - batchSize: 3, - pollInterval: 100, - }); - - try { - // Queue up multiple runs - await Promise.all([ - queue.attemptToRelease("test-queue", "run1"), - queue.attemptToRelease("test-queue", "run2"), - queue.attemptToRelease("test-queue", "run3"), - ]); - - // Wait for all retries to complete - await setTimeout(2000); - - // Each run should have been attempted exactly 3 times - expect(Object.values(runAttempts)).toHaveLength(3); // 3 runs - Object.values(runAttempts).forEach((attempts) => { - expect(attempts).toBe(3); // Each run attempted 3 times - }); - - // Verify execution order maintained retry attempts for each run - const run1Attempts = executedRuns.filter((r) => r.runId === "run1"); - const run2Attempts = executedRuns.filter((r) => r.runId === "run2"); - const run3Attempts = executedRuns.filter((r) => r.runId === "run3"); - - expect(run1Attempts).toHaveLength(3); - expect(run2Attempts).toHaveLength(3); - expect(run3Attempts).toHaveLength(3); - - // Verify attempts are numbered correctly for each run - [run1Attempts, run2Attempts, run3Attempts].forEach((attempts) => { - expect(attempts.map((a) => a.attempt)).toEqual([1, 2, 3]); - }); - - // Verify no more retries occur - await setTimeout(1000); - expect(executedRuns).toHaveLength(9); // 3 runs * 3 attempts each - } finally { - await queue.quit(); - } - }); - - redisTest( - "Should return token but not requeue when executor returns false", - async ({ redisContainer }) => { - const executedRuns: { releaseQueue: string; runId: string }[] = []; - const runResults: Record = { - run1: true, // This will succeed - run2: false, // This will return false, returning the token without requeuing - run3: true, // This should execute immediately when run2's token is returned - }; - - const queue = new ReleaseConcurrencyTokenBucketQueue({ - redis: { - keyPrefix: "release-queue:test:", - host: redisContainer.getHost(), - port: redisContainer.getPort(), - }, - executor: async (releaseQueue, runId) => { - const success = runResults[runId]; - - executedRuns.push({ releaseQueue, runId }); - - return success; - }, - keys: { - fromDescriptor: (descriptor) => descriptor, - toDescriptor: (name) => name, - }, - maxTokens: async (_) => 2, // Only 2 tokens available at a time - pollInterval: 100, - }); - - try { - // First run should execute and succeed - await queue.attemptToRelease("test-queue", "run1"); - expect(executedRuns).toHaveLength(1); - expect(executedRuns[0]).toEqual({ releaseQueue: "test-queue", runId: "run1" }); - - // Second run should execute but return false, returning the token - await queue.attemptToRelease("test-queue", "run2"); - expect(executedRuns).toHaveLength(2); - expect(executedRuns[1]).toEqual({ releaseQueue: "test-queue", runId: "run2" }); - - // Third run should be able to execute immediately since run2 returned its token - await queue.attemptToRelease("test-queue", "run3"); - - expect(executedRuns).toHaveLength(3); - expect(executedRuns[2]).toEqual({ releaseQueue: "test-queue", runId: "run3" }); - - // Verify that run2 was not retried (it should have been skipped) - const run2Attempts = executedRuns.filter((r) => r.runId === "run2"); - expect(run2Attempts).toHaveLength(1); // Only executed once, not retried - } finally { - await queue.quit(); - } - } - ); - - redisTest("Should implement exponential backoff between retries", async ({ redisContainer }) => { - const executionTimes: number[] = []; - let startTime: number; - - const minDelay = 100; - const factor = 2; - - const queue = new ReleaseConcurrencyTokenBucketQueue({ - redis: { - keyPrefix: "release-queue:test:", - host: redisContainer.getHost(), - port: redisContainer.getPort(), - }, - executor: async (releaseQueue, runId) => { - const now = Date.now(); - executionTimes.push(now); - console.log(`Execution at ${now - startTime}ms from start`); - throw new Error("Executor failed"); - }, - keys: { - fromDescriptor: (descriptor) => descriptor, - toDescriptor: (name) => name, - }, - maxTokens: async (_) => 1, - retry: { - maxRetries: 2, - backoff: { - minDelay, - maxDelay: 1000, - factor, - }, - }, - pollInterval: 50, - }); - - try { - startTime = Date.now(); - await queue.attemptToRelease("test-queue", "run1"); - - // Wait for all retries to complete - await setTimeout(1000); - - // Should have 3 execution times (initial + 2 retries) - expect(executionTimes).toHaveLength(3); - - const intervals = executionTimes.slice(1).map((time, i) => time - executionTimes[i]); - console.log("Intervals between retries:", intervals); - - // First retry should be after ~200ms (minDelay + processing overhead) - const expectedFirstDelay = minDelay * 2; // Account for observed overhead - expect(intervals[0]).toBeGreaterThanOrEqual(expectedFirstDelay * 0.8); - expect(intervals[0]).toBeLessThanOrEqual(expectedFirstDelay * 1.5); - - // Second retry should be after ~400ms (first delay * factor) - const expectedSecondDelay = expectedFirstDelay * factor; - expect(intervals[1]).toBeGreaterThanOrEqual(expectedSecondDelay * 0.8); - expect(intervals[1]).toBeLessThanOrEqual(expectedSecondDelay * 1.5); - - // Log expected vs actual delays - console.log("Expected delays:", { first: expectedFirstDelay, second: expectedSecondDelay }); - } finally { - await queue.quit(); - } - }); - - redisTest("Should not execute or queue when maxTokens is 0", async ({ redisContainer }) => { - const { queue, executedRuns } = createReleaseConcurrencyQueue(redisContainer, 0); - - try { - // Attempt to release with maxTokens of 0 - await queue.attemptToRelease({ name: "test-queue" }, "run1"); - await queue.attemptToRelease({ name: "test-queue" }, "run2"); - - // Wait some time to ensure no processing occurs - await setTimeout(1000); - - // Should not have executed any runs - expect(executedRuns).toHaveLength(0); - } finally { - await queue.quit(); - } - }); - - // Makes sure that the maxTokens is an integer (round down) - // And if it throws, returns 0 - redisTest("Should handle maxTokens errors", async ({ redisContainer }) => { - const { queue, executedRuns } = createReleaseConcurrencyQueue(redisContainer, 0.5); - - try { - // Attempt to release with maxTokens of 0 - await queue.attemptToRelease({ name: "test-queue" }, "run1"); - await queue.attemptToRelease({ name: "test-queue" }, "run2"); - - // Wait some time to ensure no processing occurs - await setTimeout(1000); - - // Should not have executed any runs - expect(executedRuns).toHaveLength(0); - } finally { - await queue.quit(); - } - }); - - redisTest( - "refillTokenIfInReleasings should refill token when releaserId is in the releasings set", - async ({ redisContainer }) => { - const { queue, executedRuns } = createReleaseConcurrencyQueue(redisContainer, 2); - - try { - // Use up all tokens - await queue.attemptToRelease({ name: "test-queue" }, "run1"); - - // Try to refill token for a releaserId that's not in queue - const wasRefilled = await queue.refillTokenIfInReleasings({ name: "test-queue" }, "run1"); - expect(wasRefilled).toBe(true); - - // Verify we can now execute a new run - await queue.attemptToRelease({ name: "test-queue" }, "run2"); - await setTimeout(100); - expect(executedRuns).toHaveLength(2); - } finally { - await queue.quit(); - } - } - ); - - redisTest( - "refillTokenIfInReleasings should not refill token when releaserId is not in the releasings set", - async ({ redisContainer }) => { - const { queue, executedRuns } = createReleaseConcurrencyQueue(redisContainer, 1); - - try { - // Use the only token - await queue.attemptToRelease({ name: "test-queue" }, "run1"); - expect(executedRuns).toHaveLength(1); - - // Queue up run2 - await queue.attemptToRelease({ name: "test-queue" }, "run2"); - expect(executedRuns).toHaveLength(1); // run2 is queued - - // Try to refill token for run2 which is in queue - const wasRefilled = await queue.refillTokenIfInReleasings({ name: "test-queue" }, "run2"); - expect(wasRefilled).toBe(false); - - // Verify run2 is still queued by refilling a token normally - await queue.refillTokenIfInReleasings({ name: "test-queue" }, "run1"); - await setTimeout(100); - expect(executedRuns).toHaveLength(2); - expect(executedRuns[1]).toEqual({ releaseQueue: "test-queue", runId: "run2" }); - } finally { - await queue.quit(); - } - } - ); - - redisTest( - "refillTokenIfInReleasings should handle multiple queues independently", - async ({ redisContainer }) => { - const { queue, executedRuns } = createReleaseConcurrencyQueue(redisContainer, 1); - - try { - // Use tokens in both queues - await queue.attemptToRelease({ name: "queue1" }, "run1"); - await queue.attemptToRelease({ name: "queue2" }, "run2"); - expect(executedRuns).toHaveLength(2); - - // Queue up more runs - await queue.attemptToRelease({ name: "queue1" }, "run3"); - await queue.attemptToRelease({ name: "queue2" }, "run4"); - expect(executedRuns).toHaveLength(2); // run3 and run4 are queued - - // Try to refill tokens for different releaserIds - const wasRefilled1 = await queue.refillTokenIfInReleasings({ name: "queue1" }, "run1"); - const wasRefilled2 = await queue.refillTokenIfInReleasings({ name: "queue2" }, "run4"); - - expect(wasRefilled1).toBe(true); // run1 not in queue1 - expect(wasRefilled2).toBe(false); // run4 is in queue2 - - // Verify queue1 can execute a new run with the refilled token - await queue.attemptToRelease({ name: "queue1" }, "run5"); - await setTimeout(100); - expect(executedRuns).toHaveLength(3); - expect(executedRuns[2]).toEqual({ releaseQueue: "queue1", runId: "run5" }); - } finally { - await queue.quit(); - } - } - ); - - redisTest("refillTokenIfInReleasings should not exceed maxTokens", async ({ redisContainer }) => { - const { queue } = createReleaseConcurrencyQueue(redisContainer, 1); - - try { - // First consume a token - await queue.attemptToRelease({ name: "test-queue" }, "run1"); - - // First refill should work - const firstRefill = await queue.refillTokenIfInReleasings({ name: "test-queue" }, "run1"); - expect(firstRefill).toBe(true); - - // Second refill should work but not exceed maxTokens - const secondRefill = await queue.refillTokenIfInReleasings({ name: "test-queue" }, "run2"); - expect(secondRefill).toBe(false); - - // Get metrics to verify token count - const metrics = await queue.getReleaseQueueMetrics({ name: "test-queue" }); - expect(metrics.currentTokens).toBe(1); // Should not exceed maxTokens - } finally { - await queue.quit(); - } - }); -}); diff --git a/internal-packages/run-engine/src/engine/tests/setup.ts b/internal-packages/run-engine/src/engine/tests/setup.ts index 116eb043c8..85c11a2218 100644 --- a/internal-packages/run-engine/src/engine/tests/setup.ts +++ b/internal-packages/run-engine/src/engine/tests/setup.ts @@ -6,6 +6,7 @@ import { import { MachineConfig, RetryOptions } from "@trigger.dev/core/v3/schemas"; import { BackgroundWorkerTask, + Decimal, Prisma, PrismaClient, RunEngineVersion, @@ -64,6 +65,7 @@ export async function setupAuthenticatedEnvironment( pkApiKey: "pk_api_key", shortcode: "short_code", maximumConcurrencyLimit: 10, + concurrencyLimitBurstFactor: new Decimal(2.0), }, }); @@ -87,10 +89,11 @@ export async function setupBackgroundWorker( retryOptions?: RetryOptions, queueOptions?: { customQueues?: string[]; - releaseConcurrencyOnWaitpoint?: boolean; concurrencyLimit?: number | null; } ) { + await engine.runQueue.updateEnvConcurrencyLimits(environment); + const latestWorkers = await engine.prisma.backgroundWorker.findMany({ where: { runtimeEnvironmentId: environment.id, @@ -166,10 +169,6 @@ export async function setupBackgroundWorker( id: worker.id, }, }, - releaseConcurrencyOnWaitpoint: - typeof queueOptions?.releaseConcurrencyOnWaitpoint === "boolean" - ? queueOptions.releaseConcurrencyOnWaitpoint - : undefined, tasks: { connect: { id: task.id, @@ -228,10 +227,6 @@ export async function setupBackgroundWorker( id: worker.id, }, }, - releaseConcurrencyOnWaitpoint: - typeof queueOptions?.releaseConcurrencyOnWaitpoint === "boolean" - ? queueOptions.releaseConcurrencyOnWaitpoint - : undefined, }, update: { concurrencyLimit: diff --git a/internal-packages/run-engine/src/engine/tests/utils/engineTest.ts b/internal-packages/run-engine/src/engine/tests/utils/engineTest.ts index 4c3563e1e6..67fb542921 100644 --- a/internal-packages/run-engine/src/engine/tests/utils/engineTest.ts +++ b/internal-packages/run-engine/src/engine/tests/utils/engineTest.ts @@ -34,15 +34,6 @@ type EngineOptions = { machines?: RunEngineOptions["machines"]["machines"]; baseCostInCents?: number; }; - releaseConcurrency?: { - maxTokensRatio?: number; - maxRetries?: number; - consumersCount?: number; - pollInterval?: number; - batchSize?: number; - releasingsPollInterval?: number; - releasingsMaxAge?: number; - }; }; const engineOptions = async ({}: TaskContext, use: Use) => { @@ -68,13 +59,6 @@ const engineOptions = async ({}: TaskContext, use: Use) => { }, baseCostInCents: 0.0001, }, - releaseConcurrency: { - maxTokensRatio: 1, - maxRetries: 3, - consumersCount: 1, - pollInterval: 500, - batchSize: 1, - }, }; await use(options); @@ -114,15 +98,6 @@ const engine = async ( machines: engineOptions.machines?.machines ?? {}, baseCostInCents: engineOptions.machines?.baseCostInCents ?? 0.0001, }, - releaseConcurrency: { - maxTokensRatio: engineOptions.releaseConcurrency?.maxTokensRatio ?? 1, - maxRetries: engineOptions.releaseConcurrency?.maxRetries ?? 3, - consumersCount: engineOptions.releaseConcurrency?.consumersCount ?? 1, - pollInterval: engineOptions.releaseConcurrency?.pollInterval ?? 500, - batchSize: engineOptions.releaseConcurrency?.batchSize ?? 1, - releasingsPollInterval: engineOptions.releaseConcurrency?.releasingsPollInterval, - releasingsMaxAge: engineOptions.releaseConcurrency?.releasingsMaxAge, - }, tracer: trace.getTracer("test", "0.0.0"), }); diff --git a/internal-packages/run-engine/src/engine/tests/waitpoints.test.ts b/internal-packages/run-engine/src/engine/tests/waitpoints.test.ts index 25414a91e8..544c11e6d0 100644 --- a/internal-packages/run-engine/src/engine/tests/waitpoints.test.ts +++ b/internal-packages/run-engine/src/engine/tests/waitpoints.test.ts @@ -100,7 +100,6 @@ describe("RunEngine Waitpoints", () => { waitpoints: [waitpoint.id], projectId: authenticatedEnvironment.project.id, organizationId: authenticatedEnvironment.organization.id, - releaseConcurrency: true, }); expect(result.executionStatus).toBe("EXECUTING_WITH_WAITPOINTS"); expect(result.runStatus).toBe("EXECUTING"); @@ -1130,220 +1129,6 @@ describe("RunEngine Waitpoints", () => { } }); - containerTest( - "continueRunIfUnblocked enqueues run when cannot reacquire concurrency", - async ({ prisma, redisOptions }) => { - const authenticatedEnvironment = await setupAuthenticatedEnvironment(prisma, "PRODUCTION"); - - const engine = new RunEngine({ - prisma, - worker: { - redis: redisOptions, - workers: 1, - tasksPerWorker: 10, - pollIntervalMs: 100, - }, - queue: { - redis: redisOptions, - }, - runLock: { - redis: redisOptions, - }, - machines: { - defaultMachine: "small-1x", - machines: { - "small-1x": { - name: "small-1x" as const, - cpu: 0.5, - memory: 0.5, - centsPerMs: 0.0001, - }, - }, - baseCostInCents: 0.0001, - }, - tracer: trace.getTracer("test", "0.0.0"), - }); - - try { - const taskIdentifier = "test-task"; - - // Create background worker - await setupBackgroundWorker( - engine, - authenticatedEnvironment, - taskIdentifier, - undefined, - undefined, - { - concurrencyLimit: 1, - } - ); - - // Create first run with queue concurrency limit of 1 - const firstRun = await engine.trigger( - { - number: 1, - friendlyId: "run_first", - environment: authenticatedEnvironment, - taskIdentifier, - payload: "{}", - payloadType: "application/json", - context: {}, - traceContext: {}, - traceId: "t12345-first", - spanId: "s12345-first", - workerQueue: "main", - queue: "task/test-task", - isTest: false, - tags: [], - }, - prisma - ); - - // Dequeue and start the first run - await setTimeout(500); - const dequeuedFirst = await engine.dequeueFromWorkerQueue({ - consumerId: "test_12345", - workerQueue: "main", - }); - - const firstAttempt = await engine.startRunAttempt({ - runId: dequeuedFirst[0].run.id, - snapshotId: dequeuedFirst[0].snapshot.id, - }); - expect(firstAttempt.snapshot.executionStatus).toBe("EXECUTING"); - - // Create a manual waitpoint for the first run - const waitpoint = await engine.createManualWaitpoint({ - environmentId: authenticatedEnvironment.id, - projectId: authenticatedEnvironment.projectId, - }); - expect(waitpoint.waitpoint.status).toBe("PENDING"); - - // Block the first run with releaseConcurrency set to true - const blockedResult = await engine.blockRunWithWaitpoint({ - runId: firstRun.id, - waitpoints: waitpoint.waitpoint.id, - projectId: authenticatedEnvironment.projectId, - organizationId: authenticatedEnvironment.organizationId, - releaseConcurrency: true, - }); - - // Verify first run is blocked - const firstRunData = await engine.getRunExecutionData({ runId: firstRun.id }); - expect(firstRunData?.snapshot.executionStatus).toBe("EXECUTING_WITH_WAITPOINTS"); - - // Create and start second run on the same queue - const secondRun = await engine.trigger( - { - number: 2, - friendlyId: "run_second", - environment: authenticatedEnvironment, - taskIdentifier, - payload: "{}", - payloadType: "application/json", - context: {}, - traceContext: {}, - traceId: "t12345-second", - spanId: "s12345-second", - workerQueue: "main", - queue: "task/test-task", - isTest: false, - tags: [], - }, - prisma - ); - - // Dequeue and start the second run - await setTimeout(500); - const dequeuedSecond = await engine.dequeueFromWorkerQueue({ - consumerId: "test_12345", - workerQueue: "main", - }); - - const secondAttempt = await engine.startRunAttempt({ - runId: dequeuedSecond[0].run.id, - snapshotId: dequeuedSecond[0].snapshot.id, - }); - expect(secondAttempt.snapshot.executionStatus).toBe("EXECUTING"); - - // Now complete the waitpoint for the first run - await engine.completeWaitpoint({ - id: waitpoint.waitpoint.id, - }); - - // Wait for the continueRunIfUnblocked to process - await setTimeout(500); - - // Verify the first run is now in QUEUED_EXECUTING state - const executionDataAfter = await engine.getRunExecutionData({ runId: firstRun.id }); - expect(executionDataAfter?.snapshot.executionStatus).toBe("QUEUED_EXECUTING"); - expect(executionDataAfter?.snapshot.description).toBe( - "Run can continue, but is waiting for concurrency" - ); - - // Verify the waitpoint is no longer blocking the first run - const runWaitpoint = await prisma.taskRunWaitpoint.findFirst({ - where: { - taskRunId: firstRun.id, - }, - include: { - waitpoint: true, - }, - }); - expect(runWaitpoint).toBeNull(); - - // Verify the waitpoint itself is completed - const completedWaitpoint = await prisma.waitpoint.findUnique({ - where: { - id: waitpoint.waitpoint.id, - }, - }); - assertNonNullable(completedWaitpoint); - expect(completedWaitpoint.status).toBe("COMPLETED"); - - // Complete the second run so the first run can be dequeued - const result = await engine.completeRunAttempt({ - runId: dequeuedSecond[0].run.id, - snapshotId: secondAttempt.snapshot.id, - completion: { - ok: true, - id: dequeuedSecond[0].run.id, - output: `{"foo":"bar"}`, - outputType: "application/json", - }, - }); - - await setTimeout(500); - - let event: EventBusEventArgs<"workerNotification">[0] | undefined = undefined; - engine.eventBus.on("workerNotification", (result) => { - event = result; - }); - - // Verify the first run is back in the queue - const queuedRun = await engine.dequeueFromWorkerQueue({ - consumerId: "test_12345", - workerQueue: "main", - }); - - expect(queuedRun.length).toBe(0); - - // Get the latest execution snapshot and make sure it's EXECUTING - const executionData = await engine.getRunExecutionData({ runId: firstRun.id }); - assertNonNullable(executionData); - expect(executionData.snapshot.executionStatus).toBe("EXECUTING"); - - assertNonNullable(event); - const notificationEvent = event as EventBusEventArgs<"workerNotification">[0]; - expect(notificationEvent.run.id).toBe(firstRun.id); - expect(notificationEvent.snapshot.executionStatus).toBe("EXECUTING"); - } finally { - await engine.quit(); - } - } - ); - containerTest( "getSnapshotsSince returns correct snapshots and handles errors", async ({ prisma, redisOptions }) => { @@ -1424,7 +1209,6 @@ describe("RunEngine Waitpoints", () => { waitpoints: [waitpoint.id], projectId: authenticatedEnvironment.project.id, organizationId: authenticatedEnvironment.organization.id, - releaseConcurrency: true, }); // Wait for the waitpoint to complete and unblock (snapshot 3) diff --git a/internal-packages/run-engine/src/engine/types.ts b/internal-packages/run-engine/src/engine/types.ts index 7082ed3486..5884e0ab9b 100644 --- a/internal-packages/run-engine/src/engine/types.ts +++ b/internal-packages/run-engine/src/engine/types.ts @@ -1,19 +1,18 @@ import { type RedisOptions } from "@internal/redis"; -import { Worker, type WorkerConcurrencyOptions } from "@trigger.dev/redis-worker"; import { Meter, Tracer } from "@internal/tracing"; +import { Logger, LogLevel } from "@trigger.dev/core/logger"; import { MachinePreset, MachinePresetName, - QueueOptions, RetryOptions, RunChainState, } from "@trigger.dev/core/v3"; import { PrismaClient, PrismaReplicaClient } from "@trigger.dev/database"; +import { Worker, type WorkerConcurrencyOptions } from "@trigger.dev/redis-worker"; import { FairQueueSelectionStrategyOptions } from "../run-queue/fairQueueSelectionStrategy.js"; import { MinimalAuthenticatedEnvironment } from "../shared/index.js"; -import { workerCatalog } from "./workerCatalog.js"; -import { Logger, LogLevel } from "@trigger.dev/core/logger"; import { LockRetryConfig } from "./locking.js"; +import { workerCatalog } from "./workerCatalog.js"; export type RunEngineOptions = { prisma: PrismaClient; @@ -39,6 +38,7 @@ export type RunEngineOptions = { workerOptions?: WorkerConcurrencyOptions; retryOptions?: RetryOptions; defaultEnvConcurrency?: number; + defaultEnvConcurrencyBurstFactor?: number; logLevel?: LogLevel; queueSelectionStrategyOptions?: Pick< FairQueueSelectionStrategyOptions, @@ -66,23 +66,6 @@ export type RunEngineOptions = { meter?: Meter; logger?: Logger; logLevel?: LogLevel; - releaseConcurrency?: { - disabled?: boolean; - maxTokensRatio?: number; - releasingsMaxAge?: number; - releasingsPollInterval?: number; - redis?: Partial; - maxRetries?: number; - consumersCount?: number; - pollInterval?: number; - batchSize?: number; - backoff?: { - minDelay?: number; // Defaults to 1000 - maxDelay?: number; // Defaults to 60000 - factor?: number; // Defaults to 2 - }; - disableConsumers?: boolean; - }; }; export type HeartbeatTimeouts = { @@ -142,8 +125,6 @@ export type TriggerParams = { machine?: MachinePresetName; workerId?: string; runnerId?: string; - releaseConcurrency?: boolean; - runChainState?: RunChainState; scheduleId?: string; scheduleInstanceId?: string; createdAt?: Date; diff --git a/internal-packages/run-engine/src/run-queue/fairQueueSelectionStrategy.ts b/internal-packages/run-engine/src/run-queue/fairQueueSelectionStrategy.ts index 26ef1d8093..5f2f031bdb 100644 --- a/internal-packages/run-engine/src/run-queue/fairQueueSelectionStrategy.ts +++ b/internal-packages/run-engine/src/run-queue/fairQueueSelectionStrategy.ts @@ -499,15 +499,19 @@ export class FairQueueSelectionStrategy implements RunQueueSelectionStrategy { span.setAttribute("org_id", env.orgId); span.setAttribute("project_id", env.projectId); - const [currentValue, limitValue] = await Promise.all([ + const [currentValue, limitValue, limitBurstFactor] = await Promise.all([ this.#getEnvCurrentConcurrency(env), this.#getEnvConcurrencyLimit(env), + this.#getEnvConcurrencyLimitBurstFactor(env), ]); span.setAttribute("current_value", currentValue); span.setAttribute("limit_value", limitValue); + span.setAttribute("limit_burst_factor", limitBurstFactor); - return { current: currentValue, limit: limitValue }; + const limit = Math.floor(limitValue * limitBurstFactor); + + return { current: currentValue, limit }; }); } @@ -584,6 +588,28 @@ export class FairQueueSelectionStrategy implements RunQueueSelectionStrategy { }); } + async #getEnvConcurrencyLimitBurstFactor(env: EnvDescriptor) { + return await startSpan( + this.options.tracer, + "getEnvConcurrencyLimitBurstFactor", + async (span) => { + span.setAttribute("env_id", env.envId); + span.setAttribute("org_id", env.orgId); + span.setAttribute("project_id", env.projectId); + + const key = this.options.keys.envConcurrencyLimitBurstFactorKey(env); + + const result = await this._redis.get(key); + + if (typeof result === "string") { + return Number(result); + } + + return 1; + } + ); + } + #envDescriptorFromFairQueue(queue: FairQueue): EnvDescriptor { return { envId: queue.env, diff --git a/internal-packages/run-engine/src/run-queue/index.test.ts b/internal-packages/run-engine/src/run-queue/index.test.ts index 5480b8d2ed..81aa60b687 100644 --- a/internal-packages/run-engine/src/run-queue/index.test.ts +++ b/internal-packages/run-engine/src/run-queue/index.test.ts @@ -8,6 +8,7 @@ import { InputPayload } from "./types.js"; import { createRedisClient } from "@internal/redis"; import { FairQueueSelectionStrategy } from "./fairQueueSelectionStrategy.js"; import { RunQueueFullKeyProducer } from "./keyProducer.js"; +import { Decimal } from "@trigger.dev/database"; const testOptions = { name: "rq", @@ -30,6 +31,7 @@ const authenticatedEnvProd = { id: "e1234", type: "PRODUCTION" as const, maximumConcurrencyLimit: 10, + concurrencyLimitBurstFactor: new Decimal(1.0), project: { id: "p1234" }, organization: { id: "o1234" }, }; @@ -38,6 +40,7 @@ const authenticatedEnvDev = { id: "e1234", type: "DEVELOPMENT" as const, maximumConcurrencyLimit: 10, + concurrencyLimitBurstFactor: new Decimal(1.0), project: { id: "p1234" }, organization: { id: "o1234" }, }; @@ -949,33 +952,6 @@ describe("RunQueue", () => { 0 ); expect(await queue.currentConcurrencyOfEnvironment(authenticatedEnvProd)).toBe(0); - - //reacquire the concurrency - await queue.reacquireConcurrency(authenticatedEnvProd.organization.id, message.messageId); - - //check concurrencies are back to what they were before - expect(await queue.currentConcurrencyOfQueue(authenticatedEnvProd, messageProd.queue)).toBe( - 1 - ); - expect(await queue.currentConcurrencyOfEnvironment(authenticatedEnvProd)).toBe(1); - - //release the concurrency (with the queue this time) - await queue.releaseAllConcurrency(authenticatedEnvProd.organization.id, message.messageId); - - //concurrencies - expect(await queue.currentConcurrencyOfQueue(authenticatedEnvProd, messageProd.queue)).toBe( - 0 - ); - expect(await queue.currentConcurrencyOfEnvironment(authenticatedEnvProd)).toBe(0); - - //reacquire the concurrency - await queue.reacquireConcurrency(authenticatedEnvProd.organization.id, message.messageId); - - //check concurrencies are back to what they were before - expect(await queue.currentConcurrencyOfQueue(authenticatedEnvProd, messageProd.queue)).toBe( - 1 - ); - expect(await queue.currentConcurrencyOfEnvironment(authenticatedEnvProd)).toBe(1); } finally { try { await queue.quit(); diff --git a/internal-packages/run-engine/src/run-queue/index.ts b/internal-packages/run-engine/src/run-queue/index.ts index 3fc4d0883c..1afd7b41b1 100644 --- a/internal-packages/run-engine/src/run-queue/index.ts +++ b/internal-packages/run-engine/src/run-queue/index.ts @@ -1,26 +1,41 @@ import { + createRedisClient, + type Callback, + type Redis, + type RedisOptions, + type Result, +} from "@internal/redis"; +import { + Attributes, context, + getMeter, + Meter, + ObservableResult, propagation, + SEMATTRS_MESSAGE_ID, + SEMATTRS_MESSAGING_OPERATION, + SEMATTRS_MESSAGING_SYSTEM, Span, SpanKind, SpanOptions, Tracer, - SEMATTRS_MESSAGE_ID, - SEMATTRS_MESSAGING_OPERATION, - SEMATTRS_MESSAGING_SYSTEM, - Meter, - getMeter, ValueType, - ObservableResult, - Attributes, } from "@internal/tracing"; +import { promiseWithResolvers, tryCatch } from "@trigger.dev/core"; import { Logger, LogLevel } from "@trigger.dev/core/logger"; import { calculateNextRetryDelay, flattenAttributes } from "@trigger.dev/core/v3"; import { type RetryOptions } from "@trigger.dev/core/v3/schemas"; +import { Decimal } from "@trigger.dev/database"; +import { CronSchema, Worker, type WorkerConcurrencyOptions } from "@trigger.dev/redis-worker"; +import { nanoid } from "nanoid"; +import { Readable } from "node:stream"; +import { setInterval, setTimeout } from "node:timers/promises"; +import { z } from "zod"; import { attributesFromAuthenticatedEnv, MinimalAuthenticatedEnvironment, } from "../shared/index.js"; +import { MessageNotFoundError } from "./errors.js"; import { InputPayload, OutputPayload, @@ -28,21 +43,6 @@ import { RunQueueKeyProducer, RunQueueSelectionStrategy, } from "./types.js"; -import { - createRedisClient, - type Redis, - type Callback, - type RedisOptions, - type Result, -} from "@internal/redis"; -import { MessageNotFoundError } from "./errors.js"; -import { promiseWithResolvers, tryCatch } from "@trigger.dev/core"; -import { setInterval } from "node:timers/promises"; -import { nanoid } from "nanoid"; -import { CronSchema, Worker, type WorkerConcurrencyOptions } from "@trigger.dev/redis-worker"; -import { z } from "zod"; -import { Readable } from "node:stream"; -import { setTimeout } from "node:timers/promises"; const SemanticAttributes = { QUEUE: "runqueue.queue", @@ -60,6 +60,7 @@ export type RunQueueOptions = { tracer: Tracer; redis: RedisOptions; defaultEnvConcurrency: number; + defaultEnvConcurrencyBurstFactor?: number; windowSize?: number; keys: RunQueueKeyProducer; queueSelectionStrategy: RunQueueSelectionStrategy; @@ -310,9 +311,11 @@ export class RunQueue { } public async updateEnvConcurrencyLimits(env: MinimalAuthenticatedEnvironment) { - await this.#callUpdateGlobalConcurrencyLimits({ + await this.#callUpdateEnvironmentConcurrencyLimits({ envConcurrencyLimitKey: this.keys.envConcurrencyLimitKey(env), envConcurrencyLimit: env.maximumConcurrencyLimit, + envConcurrencyLimitBurstFactorKey: this.keys.envConcurrencyLimitBurstFactorKey(env), + envConcurrencyLimitBurstFactor: env.concurrencyLimitBurstFactor.toString(), }); } @@ -322,6 +325,18 @@ export class RunQueue { return result ? Number(result) : this.options.defaultEnvConcurrency; } + public async getEnvConcurrencyLimitWithBurstFactor(env: MinimalAuthenticatedEnvironment) { + const result = await this.redis.get(this.keys.envConcurrencyLimitBurstFactorKey(env)); + + const burstFactor = result + ? Number(result) + : this.options.defaultEnvConcurrencyBurstFactor ?? 1; + + const limit = await this.getEnvConcurrencyLimit(env); + + return Math.floor(limit * burstFactor); + } + public async lengthOfQueue( env: MinimalAuthenticatedEnvironment, queue: string, @@ -383,7 +398,7 @@ export class RunQueue { queue: string, concurrencyKey?: string ) { - return this.redis.scard(this.keys.currentConcurrencyKey(env, queue, concurrencyKey)); + return this.redis.scard(this.keys.queueCurrentConcurrencyKey(env, queue, concurrencyKey)); } public async currentConcurrencyOfQueues( @@ -394,7 +409,7 @@ export class RunQueue { // Queue up all SCARD commands in the pipeline queues.forEach((queue) => { - pipeline.scard(this.keys.currentConcurrencyKey(env, queue)); + pipeline.scard(this.keys.queueCurrentDequeuedKey(env, queue)); }); // Execute pipeline and get results @@ -456,8 +471,17 @@ export class RunQueue { ); } + /** + * Get the current concurrency of the environment + * @param env - The environment to get the current concurrency of + * @returns The current concurrency of the environment + */ public async currentConcurrencyOfEnvironment(env: MinimalAuthenticatedEnvironment) { - return this.redis.scard(this.keys.envCurrentConcurrencyKey(env)); + // We are now using the envCurrentDequeuedKey to track the current "displayed" concurrency of the environment + // Operationally, we use the envCurrentConcurrencyKey to track the actual concurrency of the environment + // The currentDequeuedKey is incremented when a message is dequeued from the worker queue, + // wherease the currentConcurrencyKey is incremented when a message is dequeued from the message queue and put into the worker queue + return this.redis.scard(this.keys.envCurrentDequeuedKey(env)); } public async messageExists(orgId: string, messageId: string) { @@ -478,29 +502,27 @@ export class RunQueue { return; } - const deserializedMessage = safeJsonParse(rawMessage); + const [error, message] = parseRawMessage(rawMessage); - const message = OutputPayload.safeParse(deserializedMessage); - - if (!message.success) { + if (error) { this.logger.error(`[${this.name}] Failed to parse message`, { messageKey, - error: message.error, + error, service: this.name, - deserializedMessage, + message: message ?? rawMessage, }); - - return deserializedMessage as OutputPayload; } - span.setAttributes({ - [SemanticAttributes.QUEUE]: message.data.queue, - [SemanticAttributes.RUN_ID]: message.data.runId, - [SemanticAttributes.CONCURRENCY_KEY]: message.data.concurrencyKey, - [SemanticAttributes.WORKER_QUEUE]: this.#getWorkerQueueFromMessage(message.data), - }); + if (message) { + span.setAttributes({ + [SemanticAttributes.QUEUE]: message.queue, + [SemanticAttributes.RUN_ID]: message.runId, + [SemanticAttributes.CONCURRENCY_KEY]: message.concurrencyKey, + [SemanticAttributes.WORKER_QUEUE]: this.#getWorkerQueueFromMessage(message), + }); + } - return message.data; + return message; }, { attributes: { @@ -778,8 +800,10 @@ export class RunQueue { }); return this.redis.releaseConcurrency( - this.keys.currentConcurrencyKeyFromQueue(message.queue), + this.keys.queueCurrentConcurrencyKeyFromQueue(message.queue), this.keys.envCurrentConcurrencyKeyFromQueue(message.queue), + this.keys.queueCurrentDequeuedKeyFromQueue(message.queue), + this.keys.envCurrentDequeuedKeyFromQueue(message.queue), messageId ); }, @@ -794,87 +818,6 @@ export class RunQueue { ); } - public async releaseEnvConcurrency(orgId: string, messageId: string) { - return this.#trace( - "releaseEnvConcurrency", - async (span) => { - const message = await this.readMessage(orgId, messageId); - - if (!message) { - this.logger.log(`[${this.name}].releaseEnvConcurrency() message not found`, { - messageId, - service: this.name, - }); - return; - } - - span.setAttributes({ - [SemanticAttributes.QUEUE]: message.queue, - [SemanticAttributes.ORG_ID]: message.orgId, - [SemanticAttributes.RUN_ID]: messageId, - [SemanticAttributes.CONCURRENCY_KEY]: message.concurrencyKey, - }); - - return this.redis.releaseEnvConcurrency( - this.keys.envCurrentConcurrencyKeyFromQueue(message.queue), - messageId - ); - }, - { - kind: SpanKind.CONSUMER, - attributes: { - [SEMATTRS_MESSAGING_OPERATION]: "releaseEnvConcurrency", - [SEMATTRS_MESSAGE_ID]: messageId, - [SEMATTRS_MESSAGING_SYSTEM]: "runqueue", - }, - } - ); - } - - public async reacquireConcurrency(orgId: string, messageId: string) { - return this.#trace( - "reacquireConcurrency", - async (span) => { - const message = await this.readMessage(orgId, messageId); - - if (!message) { - throw new MessageNotFoundError(messageId); - } - - span.setAttributes({ - [SemanticAttributes.QUEUE]: message.queue, - [SemanticAttributes.ORG_ID]: message.orgId, - [SemanticAttributes.RUN_ID]: messageId, - [SemanticAttributes.CONCURRENCY_KEY]: message.concurrencyKey, - }); - - const queueCurrentConcurrencyKey = this.keys.currentConcurrencyKeyFromQueue(message.queue); - const envCurrentConcurrencyKey = this.keys.envCurrentConcurrencyKeyFromQueue(message.queue); - const queueConcurrencyLimitKey = this.keys.concurrencyLimitKeyFromQueue(message.queue); - const envConcurrencyLimitKey = this.keys.envConcurrencyLimitKeyFromQueue(message.queue); - - const result = await this.redis.reacquireConcurrency( - queueCurrentConcurrencyKey, - envCurrentConcurrencyKey, - queueConcurrencyLimitKey, - envConcurrencyLimitKey, - messageId, - String(this.options.defaultEnvConcurrency) - ); - - return !!result; - }, - { - kind: SpanKind.CONSUMER, - attributes: { - [SEMATTRS_MESSAGING_OPERATION]: "releaseConcurrency", - [SEMATTRS_MESSAGE_ID]: messageId, - [SEMATTRS_MESSAGING_SYSTEM]: "runqueue", - }, - } - ); - } - public async removeEnvironmentQueuesFromMasterQueue( runtimeEnvironmentId: string, organizationId: string, @@ -994,6 +937,7 @@ export class RunQueue { type: data.environmentType, //this isn't used in enqueueMessage maximumConcurrencyLimit: -1, + concurrencyLimitBurstFactor: new Decimal(1.0), project: { id: data.projectId, }, @@ -1354,8 +1298,10 @@ export class RunQueue { async #callEnqueueMessage(message: OutputPayloadV2) { const queueKey = message.queue; const messageKey = this.keys.messageKey(message.orgId, message.runId); - const queueCurrentConcurrencyKey = this.keys.currentConcurrencyKeyFromQueue(message.queue); + const queueCurrentConcurrencyKey = this.keys.queueCurrentConcurrencyKeyFromQueue(message.queue); const envCurrentConcurrencyKey = this.keys.envCurrentConcurrencyKeyFromQueue(message.queue); + const queueCurrentDequeuedKey = this.keys.queueCurrentDequeuedKeyFromQueue(message.queue); + const envCurrentDequeuedKey = this.keys.envCurrentDequeuedKeyFromQueue(message.queue); const envQueueKey = this.keys.envQueueKeyFromQueue(message.queue); const masterQueueKey = this.keys.masterQueueKeyForEnvironment( message.environmentId, @@ -1372,6 +1318,8 @@ export class RunQueue { messageKey, queueCurrentConcurrencyKey, envCurrentConcurrencyKey, + queueCurrentDequeuedKey, + envCurrentDequeuedKey, envQueueKey, queueName, messageId, @@ -1387,6 +1335,8 @@ export class RunQueue { messageKey, queueCurrentConcurrencyKey, envCurrentConcurrencyKey, + queueCurrentDequeuedKey, + envCurrentDequeuedKey, envQueueKey, queueName, messageId, @@ -1404,9 +1354,11 @@ export class RunQueue { shard: number; maxCount: number; }): Promise { - const queueConcurrencyLimitKey = this.keys.concurrencyLimitKeyFromQueue(messageQueue); - const queueCurrentConcurrencyKey = this.keys.currentConcurrencyKeyFromQueue(messageQueue); + const queueConcurrencyLimitKey = this.keys.queueConcurrencyLimitKeyFromQueue(messageQueue); + const queueCurrentConcurrencyKey = this.keys.queueCurrentConcurrencyKeyFromQueue(messageQueue); const envConcurrencyLimitKey = this.keys.envConcurrencyLimitKeyFromQueue(messageQueue); + const envConcurrencyLimitBurstFactorKey = + this.keys.envConcurrencyLimitBurstFactorKeyFromQueue(messageQueue); const envCurrentConcurrencyKey = this.keys.envCurrentConcurrencyKeyFromQueue(messageQueue); const messageKeyPrefix = this.keys.messageKeyPrefixFromQueue(messageQueue); const envQueueKey = this.keys.envQueueKeyFromQueue(messageQueue); @@ -1416,6 +1368,7 @@ export class RunQueue { messageQueue, queueConcurrencyLimitKey, envConcurrencyLimitKey, + envConcurrencyLimitBurstFactorKey, queueCurrentConcurrencyKey, envCurrentConcurrencyKey, messageKeyPrefix, @@ -1430,6 +1383,7 @@ export class RunQueue { messageQueue, queueConcurrencyLimitKey, envConcurrencyLimitKey, + envConcurrencyLimitBurstFactorKey, queueCurrentConcurrencyKey, envCurrentConcurrencyKey, messageKeyPrefix, @@ -1439,6 +1393,7 @@ export class RunQueue { messageQueue, String(Date.now()), String(this.options.defaultEnvConcurrency), + String(this.options.defaultEnvConcurrencyBurstFactor ?? 1), this.options.redis.keyPrefix ?? "", String(maxCount) ); @@ -1552,7 +1507,7 @@ export class RunQueue { const [, messageKey] = result; - const message = await this.readMessageFromKey(messageKey); + const message = await this.#dequeueMessageFromKey(messageKey); if (!message) { return; @@ -1575,8 +1530,10 @@ export class RunQueue { const messageId = message.runId; const messageKey = this.keys.messageKey(message.orgId, messageId); const messageQueue = message.queue; - const queueCurrentConcurrencyKey = this.keys.currentConcurrencyKeyFromQueue(message.queue); + const queueCurrentConcurrencyKey = this.keys.queueCurrentConcurrencyKeyFromQueue(message.queue); const envCurrentConcurrencyKey = this.keys.envCurrentConcurrencyKeyFromQueue(message.queue); + const queueCurrentDequeuedKey = this.keys.queueCurrentDequeuedKeyFromQueue(message.queue); + const envCurrentDequeuedKey = this.keys.envCurrentDequeuedKeyFromQueue(message.queue); const envQueueKey = this.keys.envQueueKeyFromQueue(message.queue); const masterQueueKey = this.keys.masterQueueKeyForEnvironment( message.environmentId, @@ -1591,6 +1548,8 @@ export class RunQueue { messageQueue, queueCurrentConcurrencyKey, envCurrentConcurrencyKey, + queueCurrentDequeuedKey, + envCurrentDequeuedKey, envQueueKey, messageId, masterQueueKey, @@ -1607,6 +1566,8 @@ export class RunQueue { messageQueue, queueCurrentConcurrencyKey, envCurrentConcurrencyKey, + queueCurrentDequeuedKey, + envCurrentDequeuedKey, envQueueKey, workerQueueKey, messageId, @@ -1620,8 +1581,10 @@ export class RunQueue { const messageId = message.runId; const messageKey = this.keys.messageKey(message.orgId, message.runId); const messageQueue = message.queue; - const queueCurrentConcurrencyKey = this.keys.currentConcurrencyKeyFromQueue(message.queue); + const queueCurrentConcurrencyKey = this.keys.queueCurrentConcurrencyKeyFromQueue(message.queue); const envCurrentConcurrencyKey = this.keys.envCurrentConcurrencyKeyFromQueue(message.queue); + const queueCurrentDequeuedKey = this.keys.queueCurrentDequeuedKeyFromQueue(message.queue); + const envCurrentDequeuedKey = this.keys.envCurrentDequeuedKeyFromQueue(message.queue); const envQueueKey = this.keys.envQueueKeyFromQueue(message.queue); const masterQueueKey = this.keys.masterQueueKeyForEnvironment( message.environmentId, @@ -1637,6 +1600,8 @@ export class RunQueue { masterQueueKey, queueCurrentConcurrencyKey, envCurrentConcurrencyKey, + queueCurrentDequeuedKey, + envCurrentDequeuedKey, envQueueKey, messageId, messageScore, @@ -1651,6 +1616,8 @@ export class RunQueue { messageQueue, queueCurrentConcurrencyKey, envCurrentConcurrencyKey, + queueCurrentDequeuedKey, + envCurrentDequeuedKey, envQueueKey, //args messageId, @@ -1664,8 +1631,10 @@ export class RunQueue { const messageId = message.runId; const messageKey = this.keys.messageKey(message.orgId, message.runId); const messageQueue = message.queue; - const queueCurrentConcurrencyKey = this.keys.currentConcurrencyKeyFromQueue(message.queue); + const queueCurrentConcurrencyKey = this.keys.queueCurrentConcurrencyKeyFromQueue(message.queue); const envCurrentConcurrencyKey = this.keys.envCurrentConcurrencyKeyFromQueue(message.queue); + const queueCurrentDequeuedKey = this.keys.queueCurrentDequeuedKeyFromQueue(message.queue); + const envCurrentDequeuedKey = this.keys.envCurrentDequeuedKeyFromQueue(message.queue); const envQueueKey = this.keys.envQueueKeyFromQueue(message.queue); const deadLetterQueueKey = this.keys.deadLetterQueueKeyFromQueue(message.queue); const masterQueueKey = this.keys.masterQueueKeyForEnvironment( @@ -1679,6 +1648,8 @@ export class RunQueue { messageQueue, queueCurrentConcurrencyKey, envCurrentConcurrencyKey, + queueCurrentDequeuedKey, + envCurrentDequeuedKey, envQueueKey, deadLetterQueueKey, messageId, @@ -1686,16 +1657,22 @@ export class RunQueue { ); } - #callUpdateGlobalConcurrencyLimits({ + #callUpdateEnvironmentConcurrencyLimits({ envConcurrencyLimitKey, envConcurrencyLimit, + envConcurrencyLimitBurstFactorKey, + envConcurrencyLimitBurstFactor, }: { envConcurrencyLimitKey: string; envConcurrencyLimit: number; + envConcurrencyLimitBurstFactorKey: string; + envConcurrencyLimitBurstFactor: string; }) { - return this.redis.updateGlobalConcurrencyLimits( + return this.redis.updateEnvironmentConcurrencyLimits( envConcurrencyLimitKey, - String(envConcurrencyLimit) + envConcurrencyLimitBurstFactorKey, + String(envConcurrencyLimit), + envConcurrencyLimitBurstFactor ); } @@ -1963,6 +1940,30 @@ export class RunQueue { }); } + async #dequeueMessageFromKey(messageKey: string) { + const rawMessage = await this.redis.dequeueMessageFromKey( + messageKey, + this.options.redis.keyPrefix ?? "" + ); + + if (!rawMessage) { + return; + } + + const [error, message] = parseRawMessage(rawMessage); + + if (error) { + this.logger.error(`[${this.name}] Failed to parse message`, { + messageKey, + error, + service: this.name, + message: message ?? rawMessage, + }); + } + + return message; + } + #registerCommands() { this.redis.defineCommand("migrateLegacyMasterQueues", { numberOfKeys: 1, @@ -1988,14 +1989,16 @@ end }); this.redis.defineCommand("enqueueMessage", { - numberOfKeys: 6, + numberOfKeys: 8, lua: ` local masterQueueKey = KEYS[1] local queueKey = KEYS[2] local messageKey = KEYS[3] local queueCurrentConcurrencyKey = KEYS[4] local envCurrentConcurrencyKey = KEYS[5] -local envQueueKey = KEYS[6] +local queueCurrentDequeuedKey = KEYS[6] +local envCurrentDequeuedKey = KEYS[7] +local envQueueKey = KEYS[8] local queueName = ARGV[1] local messageId = ARGV[2] @@ -2023,33 +2026,38 @@ end -- Update the concurrency keys redis.call('SREM', queueCurrentConcurrencyKey, messageId) redis.call('SREM', envCurrentConcurrencyKey, messageId) +redis.call('SREM', queueCurrentDequeuedKey, messageId) +redis.call('SREM', envCurrentDequeuedKey, messageId) `, }); this.redis.defineCommand("dequeueMessagesFromQueue", { - numberOfKeys: 8, + numberOfKeys: 9, lua: ` local queueKey = KEYS[1] local queueConcurrencyLimitKey = KEYS[2] local envConcurrencyLimitKey = KEYS[3] -local queueCurrentConcurrencyKey = KEYS[4] -local envCurrentConcurrencyKey = KEYS[5] -local messageKeyPrefix = KEYS[6] -local envQueueKey = KEYS[7] -local masterQueueKey = KEYS[8] +local envConcurrencyLimitBurstFactorKey = KEYS[4] +local queueCurrentConcurrencyKey = KEYS[5] +local envCurrentConcurrencyKey = KEYS[6] +local messageKeyPrefix = KEYS[7] +local envQueueKey = KEYS[8] +local masterQueueKey = KEYS[9] local queueName = ARGV[1] local currentTime = tonumber(ARGV[2]) local defaultEnvConcurrencyLimit = ARGV[3] -local keyPrefix = ARGV[4] -local maxCount = tonumber(ARGV[5] or '1') +local defaultEnvConcurrencyBurstFactor = ARGV[4] +local keyPrefix = ARGV[5] +local maxCount = tonumber(ARGV[6] or '1') -- Check current env concurrency against the limit local envCurrentConcurrency = tonumber(redis.call('SCARD', envCurrentConcurrencyKey) or '0') local envConcurrencyLimit = tonumber(redis.call('GET', envConcurrencyLimitKey) or defaultEnvConcurrencyLimit) -local totalEnvConcurrencyLimit = envConcurrencyLimit +local envConcurrencyLimitBurstFactor = tonumber(redis.call('GET', envConcurrencyLimitBurstFactorKey) or defaultEnvConcurrencyBurstFactor) +local envConcurrencyLimitWithBurstFactor = math.floor(envConcurrencyLimit * envConcurrencyLimitBurstFactor) -if envCurrentConcurrency >= totalEnvConcurrencyLimit then +if envCurrentConcurrency >= envConcurrencyLimitWithBurstFactor then return nil end @@ -2064,7 +2072,7 @@ if queueCurrentConcurrency >= totalQueueConcurrencyLimit then end -- Calculate how many messages we can actually dequeue based on concurrency limits -local envAvailableCapacity = totalEnvConcurrencyLimit - envCurrentConcurrency +local envAvailableCapacity = envConcurrencyLimitWithBurstFactor - envCurrentConcurrency local queueAvailableCapacity = totalQueueConcurrencyLimit - queueCurrentConcurrency local actualMaxCount = math.min(maxCount, envAvailableCapacity, queueAvailableCapacity) @@ -2121,8 +2129,40 @@ return results `, }); + this.redis.defineCommand("dequeueMessageFromKey", { + numberOfKeys: 1, + lua: ` +-- Keys: +local messageKey = KEYS[1] + +-- Args: +local keyPrefix = ARGV[1] + +-- Get the raw messages string +local message = redis.call('GET', messageKey) + +if not message then + return nil +end + +-- Parse the message +local messageData = cjson.decode(message) + +-- Construct the "current dequeued" keys for the queue and the environment +local queueCurrentDequeuedKey = keyPrefix .. messageData.queue .. ':currentDequeued' +local envCurrentDequeuedKey = keyPrefix .. string.match(messageData.queue, "(.+):queue:") .. ":currentDequeued" + +-- Update the concurrency keys +redis.call('SADD', queueCurrentDequeuedKey, messageData.runId) +redis.call('SADD', envCurrentDequeuedKey, messageData.runId) + +-- Return the message data +return message + `, + }); + this.redis.defineCommand("acknowledgeMessage", { - numberOfKeys: 7, + numberOfKeys: 9, lua: ` -- Keys: local masterQueueKey = KEYS[1] @@ -2130,8 +2170,10 @@ local messageKey = KEYS[2] local messageQueueKey = KEYS[3] local queueCurrentConcurrencyKey = KEYS[4] local envCurrentConcurrencyKey = KEYS[5] -local envQueueKey = KEYS[6] -local workerQueueKey = KEYS[7] +local queueCurrentDequeuedKey = KEYS[6] +local envCurrentDequeuedKey = KEYS[7] +local envQueueKey = KEYS[8] +local workerQueueKey = KEYS[9] -- Args: local messageId = ARGV[1] @@ -2157,6 +2199,8 @@ end -- Update the concurrency keys redis.call('SREM', queueCurrentConcurrencyKey, messageId) redis.call('SREM', envCurrentConcurrencyKey, messageId) +redis.call('SREM', queueCurrentDequeuedKey, messageId) +redis.call('SREM', envCurrentDequeuedKey, messageId) -- Remove the message from the worker queue if removeFromWorkerQueue == '1' then @@ -2166,7 +2210,7 @@ end }); this.redis.defineCommand("nackMessage", { - numberOfKeys: 6, + numberOfKeys: 8, lua: ` -- Keys: local masterQueueKey = KEYS[1] @@ -2174,7 +2218,9 @@ local messageKey = KEYS[2] local messageQueueKey = KEYS[3] local queueCurrentConcurrencyKey = KEYS[4] local envCurrentConcurrencyKey = KEYS[5] -local envQueueKey = KEYS[6] +local queueCurrentDequeuedKey = KEYS[6] +local envCurrentDequeuedKey = KEYS[7] +local envQueueKey = KEYS[8] -- Args: local messageId = ARGV[1] @@ -2188,6 +2234,8 @@ redis.call('SET', messageKey, messageData) -- Update the concurrency keys redis.call('SREM', queueCurrentConcurrencyKey, messageId) redis.call('SREM', envCurrentConcurrencyKey, messageId) +redis.call('SREM', queueCurrentDequeuedKey, messageId) +redis.call('SREM', envCurrentDequeuedKey, messageId) -- Enqueue the message into the queue redis.call('ZADD', messageQueueKey, messageScore, messageId) @@ -2204,7 +2252,7 @@ end }); this.redis.defineCommand("moveToDeadLetterQueue", { - numberOfKeys: 7, + numberOfKeys: 9, lua: ` -- Keys: local masterQueueKey = KEYS[1] @@ -2212,8 +2260,10 @@ local messageKey = KEYS[2] local messageQueue = KEYS[3] local queueCurrentConcurrencyKey = KEYS[4] local envCurrentConcurrencyKey = KEYS[5] -local envQueueKey = KEYS[6] -local deadLetterQueueKey = KEYS[7] +local queueCurrentDequeuedKey = KEYS[6] +local envCurrentDequeuedKey = KEYS[7] +local envQueueKey = KEYS[8] +local deadLetterQueueKey = KEYS[9] -- Args: local messageId = ARGV[1] @@ -2237,15 +2287,19 @@ redis.call('ZADD', deadLetterQueueKey, tonumber(redis.call('TIME')[1]), messageI -- Update the concurrency keys redis.call('SREM', queueCurrentConcurrencyKey, messageId) redis.call('SREM', envCurrentConcurrencyKey, messageId) +redis.call('SREM', queueCurrentDequeuedKey, messageId) +redis.call('SREM', envCurrentDequeuedKey, messageId) `, }); this.redis.defineCommand("releaseConcurrency", { - numberOfKeys: 2, + numberOfKeys: 4, lua: ` -- Keys: local queueCurrentConcurrencyKey = KEYS[1] local envCurrentConcurrencyKey = KEYS[2] +local queueCurrentDequeuedKey = KEYS[3] +local envCurrentDequeuedKey = KEYS[4] -- Args: local messageId = ARGV[1] @@ -2253,83 +2307,24 @@ local messageId = ARGV[1] -- Update the concurrency keys redis.call('SREM', queueCurrentConcurrencyKey, messageId) redis.call('SREM', envCurrentConcurrencyKey, messageId) +redis.call('SREM', queueCurrentDequeuedKey, messageId) +redis.call('SREM', envCurrentDequeuedKey, messageId) `, }); - this.redis.defineCommand("releaseEnvConcurrency", { - numberOfKeys: 1, - lua: ` --- Keys: -local envCurrentConcurrencyKey = KEYS[1] - --- Args: -local messageId = ARGV[1] - --- Update the concurrency keys -redis.call('SREM', envCurrentConcurrencyKey, messageId) -`, - }); - - this.redis.defineCommand("reacquireConcurrency", { - numberOfKeys: 4, - lua: ` --- Keys: -local queueCurrentConcurrencyKey = KEYS[1] -local envCurrentConcurrencyKey = KEYS[2] -local queueConcurrencyLimitKey = KEYS[3] -local envConcurrencyLimitKey = KEYS[4] - --- Args: -local messageId = ARGV[1] -local defaultEnvConcurrencyLimit = ARGV[2] - --- Check if the message is already in either current concurrency set -local isInQueueConcurrency = redis.call('SISMEMBER', queueCurrentConcurrencyKey, messageId) == 1 -local isInEnvConcurrency = redis.call('SISMEMBER', envCurrentConcurrencyKey, messageId) == 1 - --- If it's already in both sets, we're done -if isInQueueConcurrency and isInEnvConcurrency then - return true -end - --- Check current env concurrency against the limit -local envCurrentConcurrency = tonumber(redis.call('SCARD', envCurrentConcurrencyKey) or '0') -local envConcurrencyLimit = tonumber(redis.call('GET', envConcurrencyLimitKey) or defaultEnvConcurrencyLimit) -local totalEnvConcurrencyLimit = envConcurrencyLimit - -if envCurrentConcurrency >= totalEnvConcurrencyLimit then - return false -end - --- Check current queue concurrency against the limit -if not isInQueueConcurrency then - local queueCurrentConcurrency = tonumber(redis.call('SCARD', queueCurrentConcurrencyKey) or '0') - local queueConcurrencyLimit = math.min(tonumber(redis.call('GET', queueConcurrencyLimitKey) or '1000000'), envConcurrencyLimit) - local totalQueueConcurrencyLimit = queueConcurrencyLimit - - if queueCurrentConcurrency >= totalQueueConcurrencyLimit then - return false - end -end - --- Update the concurrency keys -redis.call('SADD', queueCurrentConcurrencyKey, messageId) -redis.call('SADD', envCurrentConcurrencyKey, messageId) - -return true -`, - }); - - this.redis.defineCommand("updateGlobalConcurrencyLimits", { - numberOfKeys: 1, + this.redis.defineCommand("updateEnvironmentConcurrencyLimits", { + numberOfKeys: 2, lua: ` --- Keys: envConcurrencyLimitKey +-- Keys: envConcurrencyLimitKey, envConcurrencyLimitBurstFactorKey local envConcurrencyLimitKey = KEYS[1] +local envConcurrencyLimitBurstFactorKey = KEYS[2] -- Args: envConcurrencyLimit local envConcurrencyLimit = ARGV[1] +local envConcurrencyLimitBurstFactor = ARGV[2] redis.call('SET', envConcurrencyLimitKey, envConcurrencyLimit) +redis.call('SET', envConcurrencyLimitBurstFactorKey, envConcurrencyLimitBurstFactor) `, }); @@ -2415,6 +2410,8 @@ declare module "@internal/redis" { messageKey: string, queueCurrentConcurrencyKey: string, envCurrentConcurrencyKey: string, + queueCurrentDequeuedKey: string, + envCurrentDequeuedKey: string, envQueueKey: string, //args queueName: string, @@ -2429,6 +2426,7 @@ declare module "@internal/redis" { childQueue: string, queueConcurrencyLimitKey: string, envConcurrencyLimitKey: string, + envConcurrencyLimitBurstFactorKey: string, queueCurrentConcurrencyKey: string, envCurrentConcurrencyKey: string, messageKeyPrefix: string, @@ -2438,28 +2436,32 @@ declare module "@internal/redis" { childQueueName: string, currentTime: string, defaultEnvConcurrencyLimit: string, + defaultEnvConcurrencyBurstFactor: string, keyPrefix: string, maxCount: string, callback?: Callback ): Result; - dequeueMessageFromWorkerQueue( + dequeueMessageFromKey( // keys - workerQueueKey: string, + messageKey: string, // args keyPrefix: string, - timeoutInSeconds: string, - callback?: Callback<[string, string]> - ): Result<[string, string] | null, Context>; + callback?: Callback + ): Result; acknowledgeMessage( + // keys masterQueueKey: string, messageKey: string, messageQueue: string, - concurrencyKey: string, - envConcurrencyKey: string, + queueCurrentConcurrencyKey: string, + envCurrentConcurrencyKey: string, + queueCurrentDequeuedKey: string, + envCurrentDequeuedKey: string, envQueueKey: string, workerQueueKey: string, + // args messageId: string, messageQueueName: string, messageKeyValue: string, @@ -2468,12 +2470,16 @@ declare module "@internal/redis" { ): Result; nackMessage( + // keys masterQueueKey: string, messageKey: string, messageQueue: string, queueCurrentConcurrencyKey: string, envCurrentConcurrencyKey: string, + queueCurrentDequeuedKey: string, + envCurrentDequeuedKey: string, envQueueKey: string, + // args messageId: string, messageQueueName: string, messageData: string, @@ -2482,44 +2488,39 @@ declare module "@internal/redis" { ): Result; moveToDeadLetterQueue( + // keys masterQueueKey: string, messageKey: string, messageQueue: string, queueCurrentConcurrencyKey: string, envCurrentConcurrencyKey: string, + queueCurrentDequeuedKey: string, + envCurrentDequeuedKey: string, envQueueKey: string, deadLetterQueueKey: string, + // args messageId: string, messageQueueName: string, callback?: Callback ): Result; releaseConcurrency( + // keys queueCurrentConcurrencyKey: string, envCurrentConcurrencyKey: string, + queueCurrentDequeuedKey: string, + envCurrentDequeuedKey: string, + // args messageId: string, callback?: Callback ): Result; - releaseEnvConcurrency( - envCurrentConcurrencyKey: string, - messageId: string, - callback?: Callback - ): Result; - - reacquireConcurrency( - queueCurrentConcurrencyKey: string, - envCurrentConcurrencyKey: string, - queueConcurrencyLimitKey: string, - envConcurrencyLimitKey: string, - messageId: string, - defaultEnvConcurrencyLimit: string, - callback?: Callback - ): Result; - - updateGlobalConcurrencyLimits( + updateEnvironmentConcurrencyLimits( + // keys envConcurrencyLimitKey: string, + envConcurrencyLimitBurstFactorKey: string, envConcurrencyLimit: string, + envConcurrencyLimitBurstFactor: string, callback?: Callback ): Result; @@ -2536,3 +2537,20 @@ declare module "@internal/redis" { ): Result; } } + +type ParseRawMessageResult = [Error | null, OutputPayload | null]; + +function parseRawMessage(rawMessage: string): ParseRawMessageResult { + const deserializedMessage = safeJsonParse(rawMessage); + + const message = OutputPayload.safeParse(deserializedMessage); + + // If the message is not valid, return the error to be logged + // but also still return the deserialized message as the OutputPayload + // to prevent forward compatibility issues + if (!message.success) { + return [message.error, deserializedMessage as OutputPayload]; + } + + return [null, message.data]; +} diff --git a/internal-packages/run-engine/src/run-queue/keyProducer.ts b/internal-packages/run-engine/src/run-queue/keyProducer.ts index d2ac500f7a..cff3b78af7 100644 --- a/internal-packages/run-engine/src/run-queue/keyProducer.ts +++ b/internal-packages/run-engine/src/run-queue/keyProducer.ts @@ -1,11 +1,12 @@ -import { MinimalAuthenticatedEnvironment } from "../shared/index.js"; +import type { RunQueueKeyProducerEnvironment } from "./types.js"; import { EnvDescriptor, QueueDescriptor, RunQueueKeyProducer } from "./types.js"; import { jumpHash } from "@trigger.dev/core/v3/serverOnly"; const constants = { CURRENT_CONCURRENCY_PART: "currentConcurrency", + CURRENT_DEQUEUED_PART: "currentDequeued", CONCURRENCY_LIMIT_PART: "concurrency", - DISABLED_CONCURRENCY_LIMIT_PART: "disabledConcurrency", + CONCURRENCY_LIMIT_BURST_FACTOR_PART: "concurrencyBurstFactor", ENV_PART: "env", ORG_PART: "org", PROJECT_PART: "proj", @@ -41,13 +42,13 @@ export class RunQueueFullKeyProducer implements RunQueueKeyProducer { return [constants.WORKER_QUEUE_PART, workerQueue].join(":"); } - queueConcurrencyLimitKey(env: MinimalAuthenticatedEnvironment, queue: string) { + queueConcurrencyLimitKey(env: RunQueueKeyProducerEnvironment, queue: string) { return [this.queueKey(env, queue), constants.CONCURRENCY_LIMIT_PART].join(":"); } envConcurrencyLimitKey(env: EnvDescriptor): string; - envConcurrencyLimitKey(env: MinimalAuthenticatedEnvironment): string; - envConcurrencyLimitKey(envOrDescriptor: EnvDescriptor | MinimalAuthenticatedEnvironment): string { + envConcurrencyLimitKey(env: RunQueueKeyProducerEnvironment): string; + envConcurrencyLimitKey(envOrDescriptor: EnvDescriptor | RunQueueKeyProducerEnvironment): string { if ("id" in envOrDescriptor) { return [ this.orgKeySection(envOrDescriptor.organization.id), @@ -65,6 +66,28 @@ export class RunQueueFullKeyProducer implements RunQueueKeyProducer { } } + envConcurrencyLimitBurstFactorKey(env: EnvDescriptor): string; + envConcurrencyLimitBurstFactorKey(env: RunQueueKeyProducerEnvironment): string; + envConcurrencyLimitBurstFactorKey( + envOrDescriptor: EnvDescriptor | RunQueueKeyProducerEnvironment + ): string { + if ("id" in envOrDescriptor) { + return [ + this.orgKeySection(envOrDescriptor.organization.id), + this.projKeySection(envOrDescriptor.project.id), + this.envKeySection(envOrDescriptor.id), + constants.CONCURRENCY_LIMIT_BURST_FACTOR_PART, + ].join(":"); + } else { + return [ + this.orgKeySection(envOrDescriptor.orgId), + this.projKeySection(envOrDescriptor.projectId), + this.envKeySection(envOrDescriptor.envId), + constants.CONCURRENCY_LIMIT_BURST_FACTOR_PART, + ].join(":"); + } + } + queueKey( orgId: string, projId: string, @@ -72,9 +95,9 @@ export class RunQueueFullKeyProducer implements RunQueueKeyProducer { queue: string, concurrencyKey?: string ): string; - queueKey(env: MinimalAuthenticatedEnvironment, queue: string, concurrencyKey?: string): string; + queueKey(env: RunQueueKeyProducerEnvironment, queue: string, concurrencyKey?: string): string; queueKey( - envOrOrgId: MinimalAuthenticatedEnvironment | string, + envOrOrgId: RunQueueKeyProducerEnvironment | string, projIdOrQueue: string, envIdConcurrencyKey?: string, queue?: string, @@ -101,7 +124,7 @@ export class RunQueueFullKeyProducer implements RunQueueKeyProducer { .join(":"); } - envQueueKey(env: MinimalAuthenticatedEnvironment) { + envQueueKey(env: RunQueueKeyProducerEnvironment) { return [this.orgKeySection(env.organization.id), this.envKeySection(env.id)].join(":"); } @@ -110,17 +133,29 @@ export class RunQueueFullKeyProducer implements RunQueueKeyProducer { return [this.orgKeySection(orgId), this.envKeySection(envId)].join(":"); } - concurrencyLimitKeyFromQueue(queue: string) { + queueConcurrencyLimitKeyFromQueue(queue: string) { const concurrencyQueueName = queue.replace(/:ck:.+$/, ""); return `${concurrencyQueueName}:${constants.CONCURRENCY_LIMIT_PART}`; } - currentConcurrencyKeyFromQueue(queue: string) { + queueCurrentConcurrencyKeyFromQueue(queue: string) { return `${queue}:${constants.CURRENT_CONCURRENCY_PART}`; } - currentConcurrencyKey( - env: MinimalAuthenticatedEnvironment, + queueCurrentDequeuedKeyFromQueue(queue: string) { + return `${queue}:${constants.CURRENT_DEQUEUED_PART}`; + } + + queueCurrentDequeuedKey( + env: RunQueueKeyProducerEnvironment, + queue: string, + concurrencyKey?: string + ): string { + return [this.queueKey(env, queue, concurrencyKey), constants.CURRENT_DEQUEUED_PART].join(":"); + } + + queueCurrentConcurrencyKey( + env: RunQueueKeyProducerEnvironment, queue: string, concurrencyKey?: string ): string { @@ -129,11 +164,6 @@ export class RunQueueFullKeyProducer implements RunQueueKeyProducer { ); } - disabledConcurrencyLimitKeyFromQueue(queue: string) { - const { orgId } = this.descriptorFromQueue(queue); - return `{${constants.ORG_PART}:${orgId}}:${constants.DISABLED_CONCURRENCY_LIMIT_PART}`; - } - envConcurrencyLimitKeyFromQueue(queue: string) { const { orgId, projectId, envId } = this.descriptorFromQueue(queue); @@ -144,6 +174,16 @@ export class RunQueueFullKeyProducer implements RunQueueKeyProducer { }); } + envConcurrencyLimitBurstFactorKeyFromQueue(queue: string) { + const { orgId, projectId, envId } = this.descriptorFromQueue(queue); + + return this.envConcurrencyLimitBurstFactorKey({ + orgId, + projectId, + envId, + }); + } + envCurrentConcurrencyKeyFromQueue(queue: string) { const { orgId, envId, projectId } = this.descriptorFromQueue(queue); @@ -155,9 +195,9 @@ export class RunQueueFullKeyProducer implements RunQueueKeyProducer { } envCurrentConcurrencyKey(env: EnvDescriptor): string; - envCurrentConcurrencyKey(env: MinimalAuthenticatedEnvironment): string; + envCurrentConcurrencyKey(env: RunQueueKeyProducerEnvironment): string; envCurrentConcurrencyKey( - envOrDescriptor: EnvDescriptor | MinimalAuthenticatedEnvironment + envOrDescriptor: EnvDescriptor | RunQueueKeyProducerEnvironment ): string { if ("id" in envOrDescriptor) { return [ @@ -176,6 +216,36 @@ export class RunQueueFullKeyProducer implements RunQueueKeyProducer { } } + envCurrentDequeuedKeyFromQueue(queue: string) { + const { orgId, envId, projectId } = this.descriptorFromQueue(queue); + + return this.envCurrentDequeuedKey({ + orgId, + projectId, + envId, + }); + } + + envCurrentDequeuedKey(env: EnvDescriptor): string; + envCurrentDequeuedKey(env: RunQueueKeyProducerEnvironment): string; + envCurrentDequeuedKey(envOrDescriptor: EnvDescriptor | RunQueueKeyProducerEnvironment): string { + if ("id" in envOrDescriptor) { + return [ + this.orgKeySection(envOrDescriptor.organization.id), + this.projKeySection(envOrDescriptor.project.id), + this.envKeySection(envOrDescriptor.id), + constants.CURRENT_DEQUEUED_PART, + ].join(":"); + } else { + return [ + this.orgKeySection(envOrDescriptor.orgId), + this.projKeySection(envOrDescriptor.projectId), + this.envKeySection(envOrDescriptor.envId), + constants.CURRENT_DEQUEUED_PART, + ].join(":"); + } + } + messageKeyPrefixFromQueue(queue: string) { const { orgId } = this.descriptorFromQueue(queue); return `${this.orgKeySection(orgId)}:${constants.MESSAGE_PART}:`; @@ -199,9 +269,9 @@ export class RunQueueFullKeyProducer implements RunQueueKeyProducer { return this.descriptorFromQueue(queue).projectId; } - deadLetterQueueKey(env: MinimalAuthenticatedEnvironment): string; + deadLetterQueueKey(env: RunQueueKeyProducerEnvironment): string; deadLetterQueueKey(env: EnvDescriptor): string; - deadLetterQueueKey(envOrDescriptor: EnvDescriptor | MinimalAuthenticatedEnvironment): string { + deadLetterQueueKey(envOrDescriptor: EnvDescriptor | RunQueueKeyProducerEnvironment): string { if ("id" in envOrDescriptor) { return [ this.orgKeySection(envOrDescriptor.organization.id), diff --git a/internal-packages/run-engine/src/run-queue/tests/ack.test.ts b/internal-packages/run-engine/src/run-queue/tests/ack.test.ts index f04358ecb6..354e7ded94 100644 --- a/internal-packages/run-engine/src/run-queue/tests/ack.test.ts +++ b/internal-packages/run-engine/src/run-queue/tests/ack.test.ts @@ -7,6 +7,7 @@ import { FairQueueSelectionStrategy } from "../fairQueueSelectionStrategy.js"; import { RunQueue } from "../index.js"; import { RunQueueFullKeyProducer } from "../keyProducer.js"; import { InputPayload } from "../types.js"; +import { Decimal } from "@trigger.dev/database"; const testOptions = { name: "rq", @@ -28,6 +29,7 @@ const authenticatedEnvDev = { id: "e1234", type: "DEVELOPMENT" as const, maximumConcurrencyLimit: 10, + concurrencyLimitBurstFactor: new Decimal(2.0), project: { id: "p1234" }, organization: { id: "o1234" }, }; diff --git a/internal-packages/run-engine/src/run-queue/tests/concurrencySweeper.test.ts b/internal-packages/run-engine/src/run-queue/tests/concurrencySweeper.test.ts index 342cba674a..739a6bb190 100644 --- a/internal-packages/run-engine/src/run-queue/tests/concurrencySweeper.test.ts +++ b/internal-packages/run-engine/src/run-queue/tests/concurrencySweeper.test.ts @@ -7,6 +7,7 @@ import { FairQueueSelectionStrategy } from "../fairQueueSelectionStrategy.js"; import { RunQueue } from "../index.js"; import { RunQueueFullKeyProducer } from "../keyProducer.js"; import { InputPayload } from "../types.js"; +import { Decimal } from "@trigger.dev/database"; const testOptions = { name: "rq", @@ -28,6 +29,7 @@ const authenticatedEnvDev = { id: "e1234", type: "DEVELOPMENT" as const, maximumConcurrencyLimit: 10, + concurrencyLimitBurstFactor: new Decimal(2.0), project: { id: "p1234" }, organization: { id: "o1234" }, }; diff --git a/internal-packages/run-engine/src/run-queue/tests/dequeueMessageFromWorkerQueue.test.ts b/internal-packages/run-engine/src/run-queue/tests/dequeueMessageFromWorkerQueue.test.ts index 4d1a038a78..08cea00cc1 100644 --- a/internal-packages/run-engine/src/run-queue/tests/dequeueMessageFromWorkerQueue.test.ts +++ b/internal-packages/run-engine/src/run-queue/tests/dequeueMessageFromWorkerQueue.test.ts @@ -6,6 +6,7 @@ import { RunQueue } from "../index.js"; import { RunQueueFullKeyProducer } from "../keyProducer.js"; import { InputPayload } from "../types.js"; import { setTimeout } from "node:timers/promises"; +import { Decimal } from "@trigger.dev/database"; const testOptions = { name: "rq", @@ -26,6 +27,7 @@ const authenticatedEnvDev = { id: "e1234", type: "DEVELOPMENT" as const, maximumConcurrencyLimit: 10, + concurrencyLimitBurstFactor: new Decimal(2.0), project: { id: "p1234" }, organization: { id: "o1234" }, }; @@ -196,6 +198,218 @@ describe("RunQueue.dequeueMessageFromWorkerQueue", () => { } ); + redisTest( + "should not dequeue when env current concurrency equals env concurrency limit with burst factor", + async ({ redisContainer }) => { + const queue = new RunQueue({ + ...testOptions, + queueSelectionStrategy: new FairQueueSelectionStrategy({ + redis: { + keyPrefix: "runqueue:test:", + host: redisContainer.getHost(), + port: redisContainer.getPort(), + }, + keys: testOptions.keys, + }), + redis: { + keyPrefix: "runqueue:test:", + host: redisContainer.getHost(), + port: redisContainer.getPort(), + }, + }); + + try { + // Set env concurrency limit to 1 + await queue.updateEnvConcurrencyLimits({ + ...authenticatedEnvDev, + maximumConcurrencyLimit: 1, + concurrencyLimitBurstFactor: new Decimal(2.0), + }); + + // Enqueue first message + await queue.enqueueMessage({ + env: authenticatedEnvDev, + message: messageDev, + workerQueue: "main", + }); + + // Dequeue first message to occupy the concurrency + await setTimeout(1000); + + const dequeued1 = await queue.dequeueMessageFromWorkerQueue("test_12345", "main"); + assertNonNullable(dequeued1); + + // Enqueue second message + await queue.enqueueMessage({ + env: authenticatedEnvDev, + message: { ...messageDev, runId: "r4322", queue: "task/my-task-2" }, + workerQueue: "main", + }); + + await setTimeout(1000); + + // Try to dequeue second message + const dequeued2 = await queue.dequeueMessageFromWorkerQueue("test_12345", "main"); + expect(dequeued2).toBeDefined(); + assertNonNullable(dequeued2); + expect(dequeued2.messageId).toEqual("r4322"); + expect(dequeued2.message.orgId).toEqual(messageDev.orgId); + expect(dequeued2.message.version).toEqual("2"); + + const envConcurrency = await queue.currentConcurrencyOfEnvironment(authenticatedEnvDev); + expect(envConcurrency).toBe(2); + + // Enqueue the third message + await queue.enqueueMessage({ + env: authenticatedEnvDev, + message: { ...messageDev, runId: "r4323", queue: "task/my-task-3" }, + workerQueue: "main", + }); + + await setTimeout(1000); + + // Try to dequeue third message + const dequeued3 = await queue.dequeueMessageFromWorkerQueue("test_12345", "main"); + expect(dequeued3).toBeUndefined(); + + const envConcurrencyAfter = await queue.currentConcurrencyOfEnvironment( + authenticatedEnvDev + ); + expect(envConcurrencyAfter).toBe(2); + } finally { + await queue.quit(); + } + } + ); + + redisTest( + "should dequeue when env current concurrency equals env concurrency limit with burst factor", + async ({ redisContainer }) => { + const queue = new RunQueue({ + ...testOptions, + queueSelectionStrategy: new FairQueueSelectionStrategy({ + redis: { + keyPrefix: "runqueue:test:", + host: redisContainer.getHost(), + port: redisContainer.getPort(), + }, + keys: testOptions.keys, + }), + redis: { + keyPrefix: "runqueue:test:", + host: redisContainer.getHost(), + port: redisContainer.getPort(), + }, + }); + + try { + // Set env concurrency limit to 1 + await queue.updateEnvConcurrencyLimits({ + ...authenticatedEnvDev, + maximumConcurrencyLimit: 1, + concurrencyLimitBurstFactor: new Decimal(2.0), + }); + + // Enqueue first message + await queue.enqueueMessage({ + env: authenticatedEnvDev, + message: messageDev, + workerQueue: "main", + }); + + // Dequeue first message to occupy the concurrency + await setTimeout(1000); + + const dequeued1 = await queue.dequeueMessageFromWorkerQueue("test_12345", "main"); + assertNonNullable(dequeued1); + + // Enqueue second message + await queue.enqueueMessage({ + env: authenticatedEnvDev, + message: { ...messageDev, runId: "r4322", queue: "task/my-task-2" }, + workerQueue: "main", + }); + + await setTimeout(1000); + + // Try to dequeue second message + const dequeued2 = await queue.dequeueMessageFromWorkerQueue("test_12345", "main"); + expect(dequeued2).toBeDefined(); + assertNonNullable(dequeued2); + expect(dequeued2.messageId).toEqual("r4322"); + expect(dequeued2.message.orgId).toEqual(messageDev.orgId); + expect(dequeued2.message.version).toEqual("2"); + + const envConcurrency = await queue.currentConcurrencyOfEnvironment(authenticatedEnvDev); + expect(envConcurrency).toBe(2); + } finally { + await queue.quit(); + } + } + ); + + redisTest( + "should not dequeue on a single queue when env current concurrency equals env concurrency limit with burst factor", + async ({ redisContainer }) => { + const queue = new RunQueue({ + ...testOptions, + queueSelectionStrategy: new FairQueueSelectionStrategy({ + redis: { + keyPrefix: "runqueue:test:", + host: redisContainer.getHost(), + port: redisContainer.getPort(), + }, + keys: testOptions.keys, + }), + redis: { + keyPrefix: "runqueue:test:", + host: redisContainer.getHost(), + port: redisContainer.getPort(), + }, + }); + + try { + // Set env concurrency limit to 1 + await queue.updateEnvConcurrencyLimits({ + ...authenticatedEnvDev, + maximumConcurrencyLimit: 1, + concurrencyLimitBurstFactor: new Decimal(2.0), + }); + + // Enqueue first message + await queue.enqueueMessage({ + env: authenticatedEnvDev, + message: messageDev, + workerQueue: "main", + }); + + // Dequeue first message to occupy the concurrency + await setTimeout(1000); + + const dequeued1 = await queue.dequeueMessageFromWorkerQueue("test_12345", "main"); + assertNonNullable(dequeued1); + + // Enqueue second message + await queue.enqueueMessage({ + env: authenticatedEnvDev, + message: { ...messageDev, runId: "r4322" }, + workerQueue: "main", + }); + + await setTimeout(1000); + + // Try to dequeue second message + const dequeued2 = await queue.dequeueMessageFromWorkerQueue("test_12345", "main"); + expect(dequeued2).toBeUndefined(); + + const envConcurrency = await queue.currentConcurrencyOfEnvironment(authenticatedEnvDev); + expect(envConcurrency).toBe(1); + } finally { + await queue.quit(); + } + } + ); + redisTest( "should respect queue concurrency limits when dequeuing", async ({ redisContainer }) => { diff --git a/internal-packages/run-engine/src/run-queue/tests/enqueueMessage.test.ts b/internal-packages/run-engine/src/run-queue/tests/enqueueMessage.test.ts index 907f99fe4f..bf4ed87f29 100644 --- a/internal-packages/run-engine/src/run-queue/tests/enqueueMessage.test.ts +++ b/internal-packages/run-engine/src/run-queue/tests/enqueueMessage.test.ts @@ -7,6 +7,7 @@ import { FairQueueSelectionStrategy } from "../fairQueueSelectionStrategy.js"; import { RunQueue } from "../index.js"; import { RunQueueFullKeyProducer } from "../keyProducer.js"; import { InputPayload } from "../types.js"; +import { Decimal } from "@trigger.dev/database"; const testOptions = { name: "rq", @@ -28,6 +29,7 @@ const authenticatedEnvDev = { id: "e1234", type: "DEVELOPMENT" as const, maximumConcurrencyLimit: 10, + concurrencyLimitBurstFactor: new Decimal(2.0), project: { id: "p1234" }, organization: { id: "o1234" }, }; diff --git a/internal-packages/run-engine/src/run-queue/tests/fairQueueSelectionStrategy.test.ts b/internal-packages/run-engine/src/run-queue/tests/fairQueueSelectionStrategy.test.ts index 853f4ac3e0..fa315d3d8a 100644 --- a/internal-packages/run-engine/src/run-queue/tests/fairQueueSelectionStrategy.test.ts +++ b/internal-packages/run-engine/src/run-queue/tests/fairQueueSelectionStrategy.test.ts @@ -76,6 +76,54 @@ describe("FairDequeuingStrategy", () => { expect(result).toHaveLength(0); }); + redisTest( + "should respect env concurrency limits with burst factor", + async ({ redisOptions: redis }) => { + const keyProducer = new RunQueueFullKeyProducer(); + const strategy = new FairQueueSelectionStrategy({ + redis, + keys: keyProducer, + defaultEnvConcurrencyLimit: 2, + parentQueueLimit: 100, + seed: "test-seed-3", + }); + + await setupQueue({ + redis, + keyProducer, + parentQueue: "parent-queue", + score: Date.now() - 1000, + queueId: "queue-1", + orgId: "org-1", + projectId: "proj-1", + envId: "env-1", + }); + + await setupConcurrency({ + redis, + keyProducer, + env: { + envId: "env-1", + projectId: "proj-1", + orgId: "org-1", + currentConcurrency: 3, + limit: 2, + limitBurstFactor: 2, + }, + }); + + const result = await strategy.distributeFairQueuesFromParentQueue( + "parent-queue", + "consumer-1" + ); + expect(result).toHaveLength(1); + expect(result[0]).toEqual({ + envId: "env-1", + queues: [keyProducer.queueKey("org-1", "proj-1", "env-1", "queue-1")], + }); + } + ); + redisTest("should respect parentQueueLimit", async ({ redisOptions: redis }) => { const keyProducer = new RunQueueFullKeyProducer(); const strategy = new FairQueueSelectionStrategy({ @@ -1021,143 +1069,140 @@ describe("FairDequeuingStrategy", () => { expect(selectionPercentages["env-2"]).toBeGreaterThan(40); // Verify that env-4 (lowest average age) gets selected in less than 20% of iterations - expect(selectionPercentages["env-4"] || 0).toBeLessThan(20); + expect(selectionPercentages["env-4"] || 0).toBeLessThan(20); } ); - redisTest( - "#selectTopEnvs groups queues by environment", - async ({ redisOptions: redis }) => { - const keyProducer = new RunQueueFullKeyProducer(); - const strategy = new FairQueueSelectionStrategy({ - redis, - keys: keyProducer, - defaultEnvConcurrencyLimit: 5, - parentQueueLimit: 100, - seed: "group-test", - maximumEnvCount: 2, - }); + redisTest("#selectTopEnvs groups queues by environment", async ({ redisOptions: redis }) => { + const keyProducer = new RunQueueFullKeyProducer(); + const strategy = new FairQueueSelectionStrategy({ + redis, + keys: keyProducer, + defaultEnvConcurrencyLimit: 5, + parentQueueLimit: 100, + seed: "group-test", + maximumEnvCount: 2, + }); - const now = Date.now(); + const now = Date.now(); - // env-1 with two queues from different orgs/projects - await setupQueue({ - redis, - keyProducer, - parentQueue: "parent-queue", - score: now - 1000, - queueId: "queue-1-old", - orgId: "org-a", - projectId: "proj-a", - envId: "env-1", - }); + // env-1 with two queues from different orgs/projects + await setupQueue({ + redis, + keyProducer, + parentQueue: "parent-queue", + score: now - 1000, + queueId: "queue-1-old", + orgId: "org-a", + projectId: "proj-a", + envId: "env-1", + }); - await setupQueue({ - redis, - keyProducer, - parentQueue: "parent-queue", - score: now - 10, - queueId: "queue-1-new", - orgId: "org-b", - projectId: "proj-b", - envId: "env-1", - }); + await setupQueue({ + redis, + keyProducer, + parentQueue: "parent-queue", + score: now - 10, + queueId: "queue-1-new", + orgId: "org-b", + projectId: "proj-b", + envId: "env-1", + }); - await setupQueue({ - redis, - keyProducer, - parentQueue: "parent-queue", - score: now - 400, - queueId: "queue-2", - orgId: "org-2", - projectId: "proj-2", - envId: "env-2", - }); + await setupQueue({ + redis, + keyProducer, + parentQueue: "parent-queue", + score: now - 400, + queueId: "queue-2", + orgId: "org-2", + projectId: "proj-2", + envId: "env-2", + }); - await setupQueue({ - redis, - keyProducer, - parentQueue: "parent-queue", - score: now - 300, - queueId: "queue-3", - orgId: "org-3", - projectId: "proj-3", - envId: "env-3", - }); + await setupQueue({ + redis, + keyProducer, + parentQueue: "parent-queue", + score: now - 300, + queueId: "queue-3", + orgId: "org-3", + projectId: "proj-3", + envId: "env-3", + }); - // Setup concurrency limits - await setupConcurrency({ - redis, - keyProducer, - env: { - envId: "env-1", - projectId: "proj-a", - orgId: "org-a", - currentConcurrency: 0, - limit: 5, - }, - }); + // Setup concurrency limits + await setupConcurrency({ + redis, + keyProducer, + env: { + envId: "env-1", + projectId: "proj-a", + orgId: "org-a", + currentConcurrency: 0, + limit: 5, + }, + }); - await setupConcurrency({ - redis, - keyProducer, - env: { - envId: "env-1", - projectId: "proj-b", - orgId: "org-b", - currentConcurrency: 0, - limit: 5, - }, - }); + await setupConcurrency({ + redis, + keyProducer, + env: { + envId: "env-1", + projectId: "proj-b", + orgId: "org-b", + currentConcurrency: 0, + limit: 5, + }, + }); - await setupConcurrency({ - redis, - keyProducer, - env: { - envId: "env-2", - projectId: "proj-2", - orgId: "org-2", - currentConcurrency: 0, - limit: 5, - }, - }); + await setupConcurrency({ + redis, + keyProducer, + env: { + envId: "env-2", + projectId: "proj-2", + orgId: "org-2", + currentConcurrency: 0, + limit: 5, + }, + }); - await setupConcurrency({ - redis, - keyProducer, - env: { - envId: "env-3", - projectId: "proj-3", - orgId: "org-3", - currentConcurrency: 0, - limit: 5, - }, - }); + await setupConcurrency({ + redis, + keyProducer, + env: { + envId: "env-3", + projectId: "proj-3", + orgId: "org-3", + currentConcurrency: 0, + limit: 5, + }, + }); - const envResult = await strategy.distributeFairQueuesFromParentQueue( - "parent-queue", - "consumer-1" - ); + const envResult = await strategy.distributeFairQueuesFromParentQueue( + "parent-queue", + "consumer-1" + ); - const result = flattenResults(envResult); + const result = flattenResults(envResult); - const queuesByEnv = result.reduce( - (acc, queueId) => { - const envId = keyProducer.envIdFromQueue(queueId); - if (!acc[envId]) { - acc[envId] = []; - } - acc[envId].push(queueId); - return acc; - }, - {} as Record - ); + const queuesByEnv = result.reduce( + (acc, queueId) => { + const envId = keyProducer.envIdFromQueue(queueId); + if (!acc[envId]) { + acc[envId] = []; + } + acc[envId].push(queueId); + return acc; + }, + {} as Record + ); - expect(Object.keys(queuesByEnv).length).toBe(2); - expect(queuesByEnv["env-1"]).toBeDefined(); - expect(queuesByEnv["env-1"].length).toBe(2); - } - ); + expect(Object.keys(queuesByEnv).length).toBe(2); + expect(queuesByEnv["env-1"]).toBeDefined(); + expect(queuesByEnv["env-1"].length).toBe(2); + }); }); // Helper function to flatten results for counting @@ -1205,6 +1250,7 @@ type SetupConcurrencyOptions = { orgId: string; currentConcurrency: number; limit?: number; + limitBurstFactor?: number; }; }; @@ -1218,6 +1264,13 @@ async function setupConcurrency({ redis, keyProducer, env }: SetupConcurrencyOpt await $redis.set(keyProducer.envConcurrencyLimitKey(env), env.limit.toString()); } + if (typeof env.limitBurstFactor === "number") { + await $redis.set( + keyProducer.envConcurrencyLimitBurstFactorKey(env), + String(env.limitBurstFactor) + ); + } + if (env.currentConcurrency > 0) { // Set current concurrency by adding dummy members to the set const envCurrentKey = keyProducer.envCurrentConcurrencyKey(env); diff --git a/internal-packages/run-engine/src/run-queue/tests/keyProducer.test.ts b/internal-packages/run-engine/src/run-queue/tests/keyProducer.test.ts index 88bbd55177..8b980749ea 100644 --- a/internal-packages/run-engine/src/run-queue/tests/keyProducer.test.ts +++ b/internal-packages/run-engine/src/run-queue/tests/keyProducer.test.ts @@ -9,7 +9,6 @@ describe("KeyProducer", () => { { id: "e1234", type: "PRODUCTION", - maximumConcurrencyLimit: 10, project: { id: "p1234" }, organization: { id: "o1234" }, }, @@ -23,20 +22,40 @@ describe("KeyProducer", () => { const key = keyProducer.envConcurrencyLimitKey({ id: "e1234", type: "PRODUCTION", - maximumConcurrencyLimit: 10, project: { id: "p1234" }, organization: { id: "o1234" }, }); expect(key).toBe("{org:o1234}:proj:p1234:env:e1234:concurrency"); }); + it("envConcurrencyLimitBurstFactorKey", () => { + const keyProducer = new RunQueueFullKeyProducer(); + const key = keyProducer.envConcurrencyLimitBurstFactorKey({ + id: "e1234", + type: "PRODUCTION", + project: { id: "p1234" }, + organization: { id: "o1234" }, + }); + expect(key).toBe("{org:o1234}:proj:p1234:env:e1234:concurrencyBurstFactor"); + }); + + it("envCurrentDequeuedKey", () => { + const keyProducer = new RunQueueFullKeyProducer(); + const key = keyProducer.envCurrentDequeuedKey({ + orgId: "o1234", + projectId: "p1234", + envId: "e1234", + }); + + expect(key).toBe("{org:o1234}:proj:p1234:env:e1234:currentDequeued"); + }); + it("queueKey (no concurrency)", () => { const keyProducer = new RunQueueFullKeyProducer(); const key = keyProducer.queueKey( { id: "e1234", type: "PRODUCTION", - maximumConcurrencyLimit: 10, project: { id: "p1234" }, organization: { id: "o1234" }, }, @@ -51,7 +70,6 @@ describe("KeyProducer", () => { { id: "e1234", type: "PRODUCTION", - maximumConcurrencyLimit: 10, project: { id: "p1234" }, organization: { id: "o1234" }, }, @@ -61,81 +79,76 @@ describe("KeyProducer", () => { expect(key).toBe("{org:o1234}:proj:p1234:env:e1234:queue:task/task-name:ck:c1234"); }); - it("concurrencyLimitKeyFromQueue (w concurrency)", () => { + it("queueConcurrencyLimitKeyFromQueue (w concurrency)", () => { const keyProducer = new RunQueueFullKeyProducer(); const queueKey = keyProducer.queueKey( { id: "e1234", type: "PRODUCTION", - maximumConcurrencyLimit: 10, project: { id: "p1234" }, organization: { id: "o1234" }, }, "task/task-name", "c1234" ); - const key = keyProducer.concurrencyLimitKeyFromQueue(queueKey); + const key = keyProducer.queueConcurrencyLimitKeyFromQueue(queueKey); expect(key).toBe("{org:o1234}:proj:p1234:env:e1234:queue:task/task-name:concurrency"); }); - it("concurrencyLimitKeyFromQueue (no concurrency)", () => { + it("queueConcurrencyLimitKeyFromQueue (no concurrency)", () => { const keyProducer = new RunQueueFullKeyProducer(); const queueKey = keyProducer.queueKey( { id: "e1234", type: "PRODUCTION", - maximumConcurrencyLimit: 10, project: { id: "p1234" }, organization: { id: "o1234" }, }, "task/task-name" ); - const key = keyProducer.concurrencyLimitKeyFromQueue(queueKey); + const key = keyProducer.queueConcurrencyLimitKeyFromQueue(queueKey); expect(key).toBe("{org:o1234}:proj:p1234:env:e1234:queue:task/task-name:concurrency"); }); - it("currentConcurrencyKeyFromQueue (w concurrency)", () => { + it("queueCurrentConcurrencyKeyFromQueue (w concurrency)", () => { const keyProducer = new RunQueueFullKeyProducer(); const queueKey = keyProducer.queueKey( { id: "e1234", type: "PRODUCTION", - maximumConcurrencyLimit: 10, project: { id: "p1234" }, organization: { id: "o1234" }, }, "task/task-name", "c1234" ); - const key = keyProducer.currentConcurrencyKeyFromQueue(queueKey); + const key = keyProducer.queueCurrentConcurrencyKeyFromQueue(queueKey); expect(key).toBe( "{org:o1234}:proj:p1234:env:e1234:queue:task/task-name:ck:c1234:currentConcurrency" ); }); - it("currentConcurrencyKeyFromQueue (no concurrency)", () => { + it("queueCurrentConcurrencyKeyFromQueue (no concurrency)", () => { const keyProducer = new RunQueueFullKeyProducer(); const queueKey = keyProducer.queueKey( { id: "e1234", type: "PRODUCTION", - maximumConcurrencyLimit: 10, project: { id: "p1234" }, organization: { id: "o1234" }, }, "task/task-name" ); - const key = keyProducer.currentConcurrencyKeyFromQueue(queueKey); + const key = keyProducer.queueCurrentConcurrencyKeyFromQueue(queueKey); expect(key).toBe("{org:o1234}:proj:p1234:env:e1234:queue:task/task-name:currentConcurrency"); }); - it("currentConcurrencyKey (w concurrency)", () => { + it("queueCurrentConcurrencyKey (w concurrency)", () => { const keyProducer = new RunQueueFullKeyProducer(); - const key = keyProducer.currentConcurrencyKey( + const key = keyProducer.queueCurrentConcurrencyKey( { id: "e1234", type: "PRODUCTION", - maximumConcurrencyLimit: 10, project: { id: "p1234" }, organization: { id: "o1234" }, }, @@ -147,13 +160,12 @@ describe("KeyProducer", () => { ); }); - it("currentConcurrencyKey (no concurrency)", () => { + it("queueCurrentConcurrencyKey (no concurrency)", () => { const keyProducer = new RunQueueFullKeyProducer(); - const key = keyProducer.currentConcurrencyKey( + const key = keyProducer.queueCurrentConcurrencyKey( { id: "e1234", type: "PRODUCTION", - maximumConcurrencyLimit: 10, project: { id: "p1234" }, organization: { id: "o1234" }, }, @@ -163,20 +175,69 @@ describe("KeyProducer", () => { expect(key).toBe("{org:o1234}:proj:p1234:env:e1234:queue:task/task-name:currentConcurrency"); }); - it("disabledConcurrencyLimitKeyFromQueue", () => { + it("queueCurrentDequeuedKeyFromQueue (w concurrency)", () => { + const keyProducer = new RunQueueFullKeyProducer(); + const queueKey = keyProducer.queueKey( + { + id: "e1234", + type: "PRODUCTION", + project: { id: "p1234" }, + organization: { id: "o1234" }, + }, + "task/task-name", + "c1234" + ); + + const key = keyProducer.queueCurrentDequeuedKeyFromQueue(queueKey); + expect(key).toBe( + "{org:o1234}:proj:p1234:env:e1234:queue:task/task-name:ck:c1234:currentDequeued" + ); + }); + + it("queueCurrentDequeuedKeyFromQueue (no concurrency)", () => { const keyProducer = new RunQueueFullKeyProducer(); const queueKey = keyProducer.queueKey( { id: "e1234", type: "PRODUCTION", - maximumConcurrencyLimit: 10, project: { id: "p1234" }, organization: { id: "o1234" }, }, "task/task-name" ); - const key = keyProducer.disabledConcurrencyLimitKeyFromQueue(queueKey); - expect(key).toBe("{org:o1234}:disabledConcurrency"); + const key = keyProducer.queueCurrentDequeuedKeyFromQueue(queueKey); + expect(key).toBe("{org:o1234}:proj:p1234:env:e1234:queue:task/task-name:currentDequeued"); + }); + + it("queueCurrentDequeuedKey (w concurrency)", () => { + const keyProducer = new RunQueueFullKeyProducer(); + const key = keyProducer.queueCurrentDequeuedKey( + { + id: "e1234", + type: "PRODUCTION", + project: { id: "p1234" }, + organization: { id: "o1234" }, + }, + "task/task-name", + "c1234" + ); + expect(key).toBe( + "{org:o1234}:proj:p1234:env:e1234:queue:task/task-name:ck:c1234:currentDequeued" + ); + }); + + it("queueCurrentDequeuedKey (no concurrency)", () => { + const keyProducer = new RunQueueFullKeyProducer(); + const key = keyProducer.queueCurrentDequeuedKey( + { + id: "e1234", + type: "PRODUCTION", + project: { id: "p1234" }, + organization: { id: "o1234" }, + }, + "task/task-name" + ); + expect(key).toBe("{org:o1234}:proj:p1234:env:e1234:queue:task/task-name:currentDequeued"); }); it("envConcurrencyLimitKeyFromQueue", () => { @@ -185,7 +246,6 @@ describe("KeyProducer", () => { { id: "e1234", type: "PRODUCTION", - maximumConcurrencyLimit: 10, project: { id: "p1234" }, organization: { id: "o1234" }, }, @@ -195,13 +255,27 @@ describe("KeyProducer", () => { expect(key).toBe("{org:o1234}:proj:p1234:env:e1234:concurrency"); }); + it("envConcurrencyLimitBurstFactorKeyFromQueue", () => { + const keyProducer = new RunQueueFullKeyProducer(); + const queueKey = keyProducer.queueKey( + { + id: "e1234", + type: "PRODUCTION", + project: { id: "p1234" }, + organization: { id: "o1234" }, + }, + "task/task-name" + ); + const key = keyProducer.envConcurrencyLimitBurstFactorKeyFromQueue(queueKey); + expect(key).toBe("{org:o1234}:proj:p1234:env:e1234:concurrencyBurstFactor"); + }); + it("envCurrentConcurrencyKeyFromQueue", () => { const keyProducer = new RunQueueFullKeyProducer(); const queueKey = keyProducer.queueKey( { id: "e1234", type: "PRODUCTION", - maximumConcurrencyLimit: 10, project: { id: "p1234" }, organization: { id: "o1234" }, }, @@ -211,12 +285,26 @@ describe("KeyProducer", () => { expect(key).toBe("{org:o1234}:proj:p1234:env:e1234:currentConcurrency"); }); + it("envCurrentDequeuedKeyFromQueue", () => { + const keyProducer = new RunQueueFullKeyProducer(); + const queueKey = keyProducer.queueKey( + { + id: "e1234", + type: "PRODUCTION", + project: { id: "p1234" }, + organization: { id: "o1234" }, + }, + "task/task-name" + ); + const key = keyProducer.envCurrentDequeuedKeyFromQueue(queueKey); + expect(key).toBe("{org:o1234}:proj:p1234:env:e1234:currentDequeued"); + }); + it("envCurrentConcurrencyKey", () => { const keyProducer = new RunQueueFullKeyProducer(); const key = keyProducer.envCurrentConcurrencyKey({ id: "e1234", type: "PRODUCTION", - maximumConcurrencyLimit: 10, project: { id: "p1234" }, organization: { id: "o1234" }, }); @@ -235,7 +323,6 @@ describe("KeyProducer", () => { { id: "e1234", type: "PRODUCTION", - maximumConcurrencyLimit: 10, project: { id: "p1234" }, organization: { id: "o1234" }, }, @@ -257,7 +344,6 @@ describe("KeyProducer", () => { { id: "e1234", type: "PRODUCTION", - maximumConcurrencyLimit: 10, project: { id: "p1234" }, organization: { id: "o1234" }, }, diff --git a/internal-packages/run-engine/src/run-queue/tests/nack.test.ts b/internal-packages/run-engine/src/run-queue/tests/nack.test.ts index fe1623e376..8bfd8b3a76 100644 --- a/internal-packages/run-engine/src/run-queue/tests/nack.test.ts +++ b/internal-packages/run-engine/src/run-queue/tests/nack.test.ts @@ -6,6 +6,7 @@ import { RunQueue } from "../index.js"; import { RunQueueFullKeyProducer } from "../keyProducer.js"; import { InputPayload } from "../types.js"; import { setTimeout } from "node:timers/promises"; +import { Decimal } from "@trigger.dev/database"; const testOptions = { name: "rq", @@ -26,6 +27,7 @@ const authenticatedEnvDev = { id: "e1234", type: "DEVELOPMENT" as const, maximumConcurrencyLimit: 10, + concurrencyLimitBurstFactor: new Decimal(2.0), project: { id: "p1234" }, organization: { id: "o1234" }, }; diff --git a/internal-packages/run-engine/src/run-queue/tests/reacquireConcurrency.test.ts b/internal-packages/run-engine/src/run-queue/tests/reacquireConcurrency.test.ts deleted file mode 100644 index 81cf549636..0000000000 --- a/internal-packages/run-engine/src/run-queue/tests/reacquireConcurrency.test.ts +++ /dev/null @@ -1,349 +0,0 @@ -import { assertNonNullable, redisTest } from "@internal/testcontainers"; -import { trace } from "@internal/tracing"; -import { Logger } from "@trigger.dev/core/logger"; -import { FairQueueSelectionStrategy } from "../fairQueueSelectionStrategy.js"; -import { RunQueue } from "../index.js"; -import { RunQueueFullKeyProducer } from "../keyProducer.js"; -import { InputPayload } from "../types.js"; -import { MessageNotFoundError } from "../errors.js"; -import { setTimeout } from "node:timers/promises"; - -const testOptions = { - name: "rq", - tracer: trace.getTracer("rq"), - workers: 1, - defaultEnvConcurrency: 25, - enableRebalancing: false, - logger: new Logger("RunQueue", "warn"), - retryOptions: { - maxAttempts: 5, - factor: 1.1, - minTimeoutInMs: 100, - maxTimeoutInMs: 1_000, - randomize: true, - }, - keys: new RunQueueFullKeyProducer(), -}; - -const authenticatedEnvProd = { - id: "e1234", - type: "PRODUCTION" as const, - maximumConcurrencyLimit: 10, - project: { id: "p1234" }, - organization: { id: "o1234" }, -}; - -const messageProd: InputPayload = { - runId: "r1234", - taskIdentifier: "task/my-task", - orgId: "o1234", - projectId: "p1234", - environmentId: "e1234", - environmentType: "PRODUCTION", - queue: "task/my-task", - timestamp: Date.now(), - attempt: 0, -}; - -describe("RunQueue.reacquireConcurrency", () => { - redisTest( - "It should return true if we can reacquire the concurrency", - async ({ redisContainer }) => { - const queue = new RunQueue({ - ...testOptions, - queueSelectionStrategy: new FairQueueSelectionStrategy({ - redis: { - keyPrefix: "runqueue:test:", - host: redisContainer.getHost(), - port: redisContainer.getPort(), - }, - keys: testOptions.keys, - }), - redis: { - keyPrefix: "runqueue:test:", - host: redisContainer.getHost(), - port: redisContainer.getPort(), - }, - }); - - try { - await queue.updateEnvConcurrencyLimits({ - ...authenticatedEnvProd, - maximumConcurrencyLimit: 1, - }); - - await queue.enqueueMessage({ - env: authenticatedEnvProd, - message: messageProd, - workerQueue: authenticatedEnvProd.id, - }); - - await setTimeout(1000); - - const message = await queue.dequeueMessageFromWorkerQueue( - "test_12345", - authenticatedEnvProd.id - ); - assertNonNullable(message); - - //concurrencies - expect(await queue.currentConcurrencyOfQueue(authenticatedEnvProd, messageProd.queue)).toBe( - 1 - ); - expect(await queue.currentConcurrencyOfEnvironment(authenticatedEnvProd)).toBe(1); - - // First, release the concurrency - await queue.releaseAllConcurrency(authenticatedEnvProd.organization.id, messageProd.runId); - - //reacquire the concurrency - const result = await queue.reacquireConcurrency( - authenticatedEnvProd.organization.id, - messageProd.runId - ); - expect(result).toBe(true); - - //concurrencies - expect(await queue.currentConcurrencyOfQueue(authenticatedEnvProd, messageProd.queue)).toBe( - 1 - ); - expect(await queue.currentConcurrencyOfEnvironment(authenticatedEnvProd)).toBe(1); - } finally { - await queue.quit(); - } - } - ); - - redisTest( - "It should return true if the run is already being counted as concurrency", - async ({ redisContainer }) => { - const queue = new RunQueue({ - ...testOptions, - queueSelectionStrategy: new FairQueueSelectionStrategy({ - redis: { - keyPrefix: "runqueue:test:", - host: redisContainer.getHost(), - port: redisContainer.getPort(), - }, - keys: testOptions.keys, - }), - redis: { - keyPrefix: "runqueue:test:", - host: redisContainer.getHost(), - port: redisContainer.getPort(), - }, - }); - - try { - await queue.updateEnvConcurrencyLimits({ - ...authenticatedEnvProd, - maximumConcurrencyLimit: 1, - }); - - await queue.enqueueMessage({ - env: authenticatedEnvProd, - message: messageProd, - workerQueue: authenticatedEnvProd.id, - }); - - await setTimeout(1000); - - const message = await queue.dequeueMessageFromWorkerQueue( - "test_12345", - authenticatedEnvProd.id - ); - assertNonNullable(message); - - //concurrencies - expect(await queue.currentConcurrencyOfQueue(authenticatedEnvProd, messageProd.queue)).toBe( - 1 - ); - expect(await queue.currentConcurrencyOfEnvironment(authenticatedEnvProd)).toBe(1); - - //reacquire the concurrency - const result = await queue.reacquireConcurrency( - authenticatedEnvProd.organization.id, - messageProd.runId - ); - expect(result).toBe(true); - - //concurrencies - expect(await queue.currentConcurrencyOfQueue(authenticatedEnvProd, messageProd.queue)).toBe( - 1 - ); - expect(await queue.currentConcurrencyOfEnvironment(authenticatedEnvProd)).toBe(1); - } finally { - await queue.quit(); - } - } - ); - - redisTest( - "It should return true if the run is already being counted as concurrency", - async ({ redisContainer }) => { - const queue = new RunQueue({ - ...testOptions, - queueSelectionStrategy: new FairQueueSelectionStrategy({ - redis: { - keyPrefix: "runqueue:test:", - host: redisContainer.getHost(), - port: redisContainer.getPort(), - }, - keys: testOptions.keys, - }), - redis: { - keyPrefix: "runqueue:test:", - host: redisContainer.getHost(), - port: redisContainer.getPort(), - }, - }); - - try { - await queue.updateEnvConcurrencyLimits({ - ...authenticatedEnvProd, - maximumConcurrencyLimit: 1, - }); - - await queue.enqueueMessage({ - env: authenticatedEnvProd, - message: messageProd, - workerQueue: authenticatedEnvProd.id, - }); - - await setTimeout(1000); - - const message = await queue.dequeueMessageFromWorkerQueue( - "test_12345", - authenticatedEnvProd.id - ); - assertNonNullable(message); - - //concurrencies - expect(await queue.currentConcurrencyOfQueue(authenticatedEnvProd, messageProd.queue)).toBe( - 1 - ); - expect(await queue.currentConcurrencyOfEnvironment(authenticatedEnvProd)).toBe(1); - - //reacquire the concurrency - const result = await queue.reacquireConcurrency( - authenticatedEnvProd.organization.id, - messageProd.runId - ); - expect(result).toBe(true); - - //concurrencies - expect(await queue.currentConcurrencyOfQueue(authenticatedEnvProd, messageProd.queue)).toBe( - 1 - ); - expect(await queue.currentConcurrencyOfEnvironment(authenticatedEnvProd)).toBe(1); - } finally { - await queue.quit(); - } - } - ); - - redisTest( - "It should false if the run is not in the current concurrency set and there is no capacity in the environment", - async ({ redisContainer }) => { - const queue = new RunQueue({ - ...testOptions, - queueSelectionStrategy: new FairQueueSelectionStrategy({ - redis: { - keyPrefix: "runqueue:test:", - host: redisContainer.getHost(), - port: redisContainer.getPort(), - }, - keys: testOptions.keys, - }), - redis: { - keyPrefix: "runqueue:test:", - host: redisContainer.getHost(), - port: redisContainer.getPort(), - }, - }); - - try { - await queue.updateEnvConcurrencyLimits({ - ...authenticatedEnvProd, - maximumConcurrencyLimit: 1, - }); - - await queue.enqueueMessage({ - env: authenticatedEnvProd, - message: messageProd, - workerQueue: authenticatedEnvProd.id, - }); - - await setTimeout(1000); - - const message = await queue.dequeueMessageFromWorkerQueue( - "test_12345", - authenticatedEnvProd.id - ); - assertNonNullable(message); - expect(message.message.runId).toBe(messageProd.runId); - - //concurrencies - expect(await queue.currentConcurrencyOfQueue(authenticatedEnvProd, messageProd.queue)).toBe( - 1 - ); - expect(await queue.currentConcurrencyOfEnvironment(authenticatedEnvProd)).toBe(1); - - // Enqueue a second message - await queue.enqueueMessage({ - env: authenticatedEnvProd, - message: { - ...messageProd, - runId: "r1235", - queue: "task/my-task-2", - }, - workerQueue: authenticatedEnvProd.id, - }); - - //reacquire the concurrency - const result = await queue.reacquireConcurrency( - authenticatedEnvProd.organization.id, - "r1235" - ); - expect(result).toBe(false); - - //concurrencies - expect(await queue.currentConcurrencyOfQueue(authenticatedEnvProd, messageProd.queue)).toBe( - 1 - ); - expect(await queue.currentConcurrencyOfQueue(authenticatedEnvProd, "task/my-task-2")).toBe( - 0 - ); - expect(await queue.currentConcurrencyOfEnvironment(authenticatedEnvProd)).toBe(1); - } finally { - await queue.quit(); - } - } - ); - - redisTest("It should throw an error if the message is not found", async ({ redisContainer }) => { - const queue = new RunQueue({ - ...testOptions, - queueSelectionStrategy: new FairQueueSelectionStrategy({ - redis: { - keyPrefix: "runqueue:test:", - host: redisContainer.getHost(), - port: redisContainer.getPort(), - }, - keys: testOptions.keys, - }), - redis: { - keyPrefix: "runqueue:test:", - host: redisContainer.getHost(), - port: redisContainer.getPort(), - }, - }); - - try { - await expect( - queue.reacquireConcurrency(authenticatedEnvProd.organization.id, "r1235") - ).rejects.toThrow(MessageNotFoundError); - } finally { - await queue.quit(); - } - }); -}); diff --git a/internal-packages/run-engine/src/run-queue/tests/releaseConcurrency.test.ts b/internal-packages/run-engine/src/run-queue/tests/releaseConcurrency.test.ts index bb436b8cb4..4570821317 100644 --- a/internal-packages/run-engine/src/run-queue/tests/releaseConcurrency.test.ts +++ b/internal-packages/run-engine/src/run-queue/tests/releaseConcurrency.test.ts @@ -6,6 +6,7 @@ import { RunQueue } from "../index.js"; import { RunQueueFullKeyProducer } from "../keyProducer.js"; import { InputPayload } from "../types.js"; import { setTimeout } from "node:timers/promises"; +import { Decimal } from "@trigger.dev/database"; const testOptions = { name: "rq", @@ -28,6 +29,7 @@ const authenticatedEnvProd = { id: "e1234", type: "PRODUCTION" as const, maximumConcurrencyLimit: 10, + concurrencyLimitBurstFactor: new Decimal(2.0), project: { id: "p1234" }, organization: { id: "o1234" }, }; diff --git a/internal-packages/run-engine/src/run-queue/types.ts b/internal-packages/run-engine/src/run-queue/types.ts index 38420ce897..ee1ce41b79 100644 --- a/internal-packages/run-engine/src/run-queue/types.ts +++ b/internal-packages/run-engine/src/run-queue/types.ts @@ -46,6 +46,11 @@ export type EnvDescriptor = { envId: string; }; +export type RunQueueKeyProducerEnvironment = Omit< + MinimalAuthenticatedEnvironment, + "maximumConcurrencyLimit" | "concurrencyLimitBurstFactor" +>; + export interface RunQueueKeyProducer { //queue queueKey( @@ -55,7 +60,7 @@ export interface RunQueueKeyProducer { queue: string, concurrencyKey?: string ): string; - queueKey(env: MinimalAuthenticatedEnvironment, queue: string, concurrencyKey?: string): string; + queueKey(env: RunQueueKeyProducerEnvironment, queue: string, concurrencyKey?: string): string; legacyMasterQueueKey(masterQueueName: string): string; @@ -64,26 +69,41 @@ export interface RunQueueKeyProducer { masterQueueShardForEnvironment(envId: string, shardCount: number): number; workerQueueKey(workerQueue: string): string; - envQueueKey(env: MinimalAuthenticatedEnvironment): string; + envQueueKey(env: RunQueueKeyProducerEnvironment): string; envQueueKeyFromQueue(queue: string): string; - queueConcurrencyLimitKey(env: MinimalAuthenticatedEnvironment, queue: string): string; - concurrencyLimitKeyFromQueue(queue: string): string; - currentConcurrencyKeyFromQueue(queue: string): string; - currentConcurrencyKey( - env: MinimalAuthenticatedEnvironment, + queueConcurrencyLimitKey(env: RunQueueKeyProducerEnvironment, queue: string): string; + queueConcurrencyLimitKeyFromQueue(queue: string): string; + queueCurrentConcurrencyKeyFromQueue(queue: string): string; + queueCurrentConcurrencyKey( + env: RunQueueKeyProducerEnvironment, + queue: string, + concurrencyKey?: string + ): string; + queueCurrentDequeuedKeyFromQueue(queue: string): string; + queueCurrentDequeuedKey( + env: RunQueueKeyProducerEnvironment, queue: string, concurrencyKey?: string ): string; - disabledConcurrencyLimitKeyFromQueue(queue: string): string; + //env oncurrency envCurrentConcurrencyKey(env: EnvDescriptor): string; - envCurrentConcurrencyKey(env: MinimalAuthenticatedEnvironment): string; + envCurrentConcurrencyKey(env: RunQueueKeyProducerEnvironment): string; envConcurrencyLimitKey(env: EnvDescriptor): string; - envConcurrencyLimitKey(env: MinimalAuthenticatedEnvironment): string; + envConcurrencyLimitKey(env: RunQueueKeyProducerEnvironment): string; + + envCurrentDequeuedKey(env: EnvDescriptor): string; + envCurrentDequeuedKey(env: RunQueueKeyProducerEnvironment): string; + + envConcurrencyLimitBurstFactorKey(env: EnvDescriptor): string; + envConcurrencyLimitBurstFactorKey(env: RunQueueKeyProducerEnvironment): string; + envConcurrencyLimitBurstFactorKeyFromQueue(queue: string): string; envConcurrencyLimitKeyFromQueue(queue: string): string; envCurrentConcurrencyKeyFromQueue(queue: string): string; + envCurrentDequeuedKeyFromQueue(queue: string): string; + //message payload messageKeyPrefixFromQueue(queue: string): string; messageKey(orgId: string, messageId: string): string; @@ -93,7 +113,7 @@ export interface RunQueueKeyProducer { projectIdFromQueue(queue: string): string; descriptorFromQueue(queue: string): QueueDescriptor; - deadLetterQueueKey(env: MinimalAuthenticatedEnvironment): string; + deadLetterQueueKey(env: RunQueueKeyProducerEnvironment): string; deadLetterQueueKey(env: EnvDescriptor): string; deadLetterQueueKeyFromQueue(queue: string): string; diff --git a/internal-packages/run-engine/src/shared/index.ts b/internal-packages/run-engine/src/shared/index.ts index 3e541bcd5f..e2b36e464e 100644 --- a/internal-packages/run-engine/src/shared/index.ts +++ b/internal-packages/run-engine/src/shared/index.ts @@ -9,6 +9,7 @@ export type MinimalAuthenticatedEnvironment = { id: AuthenticatedEnvironment["id"]; type: AuthenticatedEnvironment["type"]; maximumConcurrencyLimit: AuthenticatedEnvironment["maximumConcurrencyLimit"]; + concurrencyLimitBurstFactor: AuthenticatedEnvironment["concurrencyLimitBurstFactor"]; project: { id: AuthenticatedEnvironment["project"]["id"]; }; diff --git a/internal-packages/testcontainers/src/utils.ts b/internal-packages/testcontainers/src/utils.ts index 9a5098351f..fd3077d163 100644 --- a/internal-packages/testcontainers/src/utils.ts +++ b/internal-packages/testcontainers/src/utils.ts @@ -1,3 +1,4 @@ +import { createClient } from "@clickhouse/client"; import { PostgreSqlContainer, StartedPostgreSqlContainer } from "@testcontainers/postgresql"; import { RedisContainer, StartedRedisContainer } from "@testcontainers/redis"; import { tryCatch } from "@trigger.dev/core"; @@ -7,11 +8,8 @@ import { isDebug } from "std-env"; import { GenericContainer, StartedNetwork, StartedTestContainer, Wait } from "testcontainers"; import { x } from "tinyexec"; import { expect, TaskContext } from "vitest"; -import { getContainerMetadata, getTaskMetadata, logCleanup } from "./logs"; -import { logSetup } from "./logs"; import { ClickHouseContainer, runClickhouseMigrations } from "./clickhouse"; -import { createClient } from "@clickhouse/client"; -import { readdir, readFile } from "node:fs/promises"; +import { getContainerMetadata, getTaskMetadata, logCleanup, logSetup } from "./logs"; export async function createPostgresContainer(network: StartedNetwork) { const container = await new PostgreSqlContainer("docker.io/postgres:14") diff --git a/packages/core/src/v3/apiClient/index.ts b/packages/core/src/v3/apiClient/index.ts index 0b5857c154..e230374974 100644 --- a/packages/core/src/v3/apiClient/index.ts +++ b/packages/core/src/v3/apiClient/index.ts @@ -77,6 +77,7 @@ import { SubscribeToRunsQueryParams, UpdateEnvironmentVariableParams, } from "./types.js"; +import { API_VERSION, API_VERSION_HEADER_NAME } from "./version.js"; export type CreateWaitpointTokenResponse = Prettify< CreateWaitpointTokenResponseBody & { @@ -777,11 +778,9 @@ export class ApiClient { { runFriendlyId, waitpointFriendlyId, - releaseConcurrency, }: { runFriendlyId: string; waitpointFriendlyId: string; - releaseConcurrency?: boolean; }, requestOptions?: ZodFetchOptions ) { @@ -791,9 +790,6 @@ export class ApiClient { { method: "POST", headers: this.#getHeaders(false), - body: JSON.stringify({ - releaseConcurrency, - }), }, mergeRequestOptions(this.defaultRequestOptions, requestOptions) ); @@ -1040,6 +1036,8 @@ export class ApiClient { headers["x-trigger-client"] = "browser"; } + headers[API_VERSION_HEADER_NAME] = API_VERSION; + return headers; } diff --git a/packages/core/src/v3/apiClient/runStream.ts b/packages/core/src/v3/apiClient/runStream.ts index 0083b411a9..43478af33f 100644 --- a/packages/core/src/v3/apiClient/runStream.ts +++ b/packages/core/src/v3/apiClient/runStream.ts @@ -30,7 +30,6 @@ export type RunShape = TRunTypes extends AnyRunTy output?: TRunTypes["output"]; createdAt: Date; updatedAt: Date; - number: number; status: RunStatus; durationMs: number; costInCents: number; @@ -46,6 +45,13 @@ export type RunShape = TRunTypes extends AnyRunTy metadata?: Record; error?: SerializedError; isTest: boolean; + isQueued: boolean; + isExecuting: boolean; + isWaiting: boolean; + isCompleted: boolean; + isFailed: boolean; + isSuccess: boolean; + isCancelled: boolean; } : never; @@ -414,15 +420,16 @@ export class RunSubscription { ? await parsePacket({ data: row.metadata, dataType: row.metadataType }) : undefined; + const status = apiStatusFromRunStatus(row.status); + return { id: row.friendlyId, createdAt: row.createdAt, updatedAt: row.updatedAt, taskIdentifier: row.taskIdentifier, - status: apiStatusFromRunStatus(row.status), + status, payload, output, - number: row.number ?? 0, durationMs: row.usageDurationMs ?? 0, costInCents: row.costInCents ?? 0, baseCostInCents: row.baseCostInCents ?? 0, @@ -436,31 +443,49 @@ export class RunSubscription { error: row.error ? createJsonErrorObject(row.error) : undefined, isTest: row.isTest ?? false, metadata, + ...booleanHelpersFromRunStatus(status), } as RunShape; } } +const queuedStatuses = ["PENDING_VERSION", "QUEUED", "PENDING", "DELAYED"]; +const waitingStatuses = ["WAITING"]; +const executingStatuses = ["DEQUEUED", "EXECUTING"]; +const failedStatuses = ["FAILED", "CRASHED", "SYSTEM_FAILURE", "EXPIRED", "TIMED_OUT"]; +const successfulStatuses = ["COMPLETED"]; + +function booleanHelpersFromRunStatus(status: RunStatus) { + return { + isQueued: queuedStatuses.includes(status), + isWaiting: waitingStatuses.includes(status), + isExecuting: executingStatuses.includes(status), + isCompleted: successfulStatuses.includes(status) || failedStatuses.includes(status), + isFailed: failedStatuses.includes(status), + isSuccess: successfulStatuses.includes(status), + isCancelled: status === "CANCELED", + }; +} + function apiStatusFromRunStatus(status: string): RunStatus { switch (status) { case "DELAYED": { return "DELAYED"; } + case "WAITING_FOR_DEPLOY": case "PENDING_VERSION": { return "PENDING_VERSION"; } - case "WAITING_FOR_DEPLOY": { - return "WAITING_FOR_DEPLOY"; - } case "PENDING": { return "QUEUED"; } case "PAUSED": case "WAITING_TO_RESUME": { - return "FROZEN"; + return "WAITING"; } - case "RETRYING_AFTER_FAILURE": { - return "REATTEMPTING"; + case "DEQUEUED": { + return "DEQUEUED"; } + case "RETRYING_AFTER_FAILURE": case "EXECUTING": { return "EXECUTING"; } @@ -473,12 +498,10 @@ function apiStatusFromRunStatus(status: string): RunStatus { case "SYSTEM_FAILURE": { return "SYSTEM_FAILURE"; } - case "INTERRUPTED": { - return "INTERRUPTED"; - } case "CRASHED": { return "CRASHED"; } + case "INTERRUPTED": case "COMPLETED_WITH_ERRORS": { return "FAILED"; } @@ -489,7 +512,7 @@ function apiStatusFromRunStatus(status: string): RunStatus { return "TIMED_OUT"; } default: { - throw new Error(`Unknown status: ${status}`); + return "QUEUED"; } } } diff --git a/packages/core/src/v3/apiClient/stream.ts b/packages/core/src/v3/apiClient/stream.ts index 428fad8e94..fc7bcf3db1 100644 --- a/packages/core/src/v3/apiClient/stream.ts +++ b/packages/core/src/v3/apiClient/stream.ts @@ -10,6 +10,7 @@ import { type ShapeStreamInterface, } from "@electric-sql/client"; import { AsyncIterableStream, createAsyncIterableStream } from "../streams/asyncIterableStream.js"; +import { API_VERSION_HEADER_NAME, API_VERSION } from "./version.js"; export type ZodShapeStreamOptions = { headers?: Record; @@ -43,6 +44,7 @@ export function zodShapeStream( headers: { ...options?.headers, "x-trigger-electric-version": "1.0.0-beta.1", + [API_VERSION_HEADER_NAME]: API_VERSION, }, fetchClient: options?.fetchClient, signal: abortController.signal, diff --git a/packages/core/src/v3/apiClient/version.ts b/packages/core/src/v3/apiClient/version.ts new file mode 100644 index 0000000000..4b2cf3afe6 --- /dev/null +++ b/packages/core/src/v3/apiClient/version.ts @@ -0,0 +1,2 @@ +export const API_VERSION = "2025-07-16"; +export const API_VERSION_HEADER_NAME = "x-trigger-api-version"; diff --git a/packages/core/src/v3/resource-catalog/standardResourceCatalog.ts b/packages/core/src/v3/resource-catalog/standardResourceCatalog.ts index 92ae0a33df..7468b63d80 100644 --- a/packages/core/src/v3/resource-catalog/standardResourceCatalog.ts +++ b/packages/core/src/v3/resource-catalog/standardResourceCatalog.ts @@ -26,24 +26,18 @@ export class StandardResourceCatalog implements ResourceCatalog { registerQueueMetadata(queue: QueueManifest): void { const existingQueue = this._queueMetadata.get(queue.name); - //if it exists already AND concurrencyLimit or releaseConcurrencyOnWaitpoint is different, log a warning + //if it exists already AND concurrencyLimit is different, log a warning if (existingQueue) { const isConcurrencyLimitDifferent = existingQueue.concurrencyLimit !== queue.concurrencyLimit; - const isReleaseConcurrencyOnWaitpointDifferent = - existingQueue.releaseConcurrencyOnWaitpoint !== queue.releaseConcurrencyOnWaitpoint; - if (isConcurrencyLimitDifferent || isReleaseConcurrencyOnWaitpointDifferent) { + if (isConcurrencyLimitDifferent) { let message = `Queue "${queue.name}" is defined twice, with different settings.`; if (isConcurrencyLimitDifferent) { message += `\n - concurrencyLimit: ${existingQueue.concurrencyLimit} vs ${queue.concurrencyLimit}`; } - if (isReleaseConcurrencyOnWaitpointDifferent) { - message += `\n - releaseConcurrencyOnWaitpoint: ${existingQueue.releaseConcurrencyOnWaitpoint} vs ${queue.releaseConcurrencyOnWaitpoint}`; - } message += "\n Keeping the first definition:"; message += `\n - concurrencyLimit: ${existingQueue.concurrencyLimit}`; - message += `\n - releaseConcurrencyOnWaitpoint: ${existingQueue.releaseConcurrencyOnWaitpoint}`; console.warn(message); return; } diff --git a/packages/core/src/v3/schemas/api.ts b/packages/core/src/v3/schemas/api.ts index 3521b8f955..cdf455cbfe 100644 --- a/packages/core/src/v3/schemas/api.ts +++ b/packages/core/src/v3/schemas/api.ts @@ -133,7 +133,6 @@ export const TriggerTaskRequestBody = z.object({ test: z.boolean().optional(), ttl: z.string().or(z.number().nonnegative().int()).optional(), priority: z.number().optional(), - releaseConcurrency: z.boolean().optional(), bulkActionId: z.string().optional(), }) .optional(), @@ -639,18 +638,16 @@ export const TimezonesResult = z.object({ export type TimezonesResult = z.infer; export const RunStatus = z.enum([ - /// Task is waiting for a version update because it cannot execute without additional information (task, queue, etc.). Replaces WAITING_FOR_DEPLOY + /// Task is waiting for a version update because it cannot execute without additional information (task, queue, etc.) "PENDING_VERSION", - /// Task hasn't been deployed yet but is waiting to be executed - "WAITING_FOR_DEPLOY", /// Task is waiting to be executed by a worker "QUEUED", + /// Task is waiting to be executed by a worker + "DEQUEUED", /// Task is currently being executed by a worker "EXECUTING", - /// Task has failed and is waiting to be retried - "REATTEMPTING", /// Task has been paused by the system, and will be resumed by the system - "FROZEN", + "WAITING", /// Task has been completed successfully "COMPLETED", /// Task has been canceled by the user @@ -659,8 +656,6 @@ export const RunStatus = z.enum([ "FAILED", /// Task has crashed and won't be retried, most likely the worker ran out of resources, e.g. memory or storage "CRASHED", - /// Task was interrupted during execution, mostly this happens in development environments - "INTERRUPTED", /// Task has failed to complete, due to an error in the system "SYSTEM_FAILURE", /// Task has been scheduled to run at a specific time @@ -718,6 +713,7 @@ const CommonRunFields = { version: z.string().optional(), isQueued: z.boolean(), isExecuting: z.boolean(), + isWaiting: z.boolean(), isCompleted: z.boolean(), isSuccess: z.boolean(), isFailed: z.boolean(), @@ -759,19 +755,6 @@ export const RetrieveRunResponse = z.object({ parent: RelatedRunDetails.optional(), children: z.array(RelatedRunDetails).optional(), }), - attempts: z.array( - z - .object({ - id: z.string(), - status: AttemptStatus, - createdAt: z.coerce.date(), - updatedAt: z.coerce.date(), - startedAt: z.coerce.date().optional(), - completedAt: z.coerce.date().optional(), - error: SerializedError.optional(), - }) - .optional() - ), attemptCount: z.number().default(0), }); @@ -1057,15 +1040,6 @@ export const WaitForDurationRequestBody = z.object({ */ idempotencyKeyTTL: z.string().optional(), - /** - * If set to true, this will cause the waitpoint to release the current run from the queue's concurrency. - * - * This is useful if you want to allow other runs to execute while the waiting - * - * @default false - */ - releaseConcurrency: z.boolean().optional(), - /** * The date that the waitpoint will complete. */ diff --git a/packages/core/src/v3/schemas/queues.ts b/packages/core/src/v3/schemas/queues.ts index 33ae3e1447..2b511eb44c 100644 --- a/packages/core/src/v3/schemas/queues.ts +++ b/packages/core/src/v3/schemas/queues.ts @@ -32,8 +32,6 @@ export const QueueItem = z.object({ concurrencyLimit: z.number().nullable(), /** Whether the queue is paused. If it's paused, no new runs will be started. */ paused: z.boolean(), - /** Whether the queue releases concurrency on waitpoints. */ - releaseConcurrencyOnWaitpoint: z.boolean(), }); export type QueueItem = z.infer; diff --git a/packages/core/src/v3/schemas/runEngine.ts b/packages/core/src/v3/schemas/runEngine.ts index 7c3a3d4a00..ef6ef170ce 100644 --- a/packages/core/src/v3/schemas/runEngine.ts +++ b/packages/core/src/v3/schemas/runEngine.ts @@ -23,6 +23,7 @@ export const TaskRunStatus = { PENDING: "PENDING", PENDING_VERSION: "PENDING_VERSION", WAITING_FOR_DEPLOY: "WAITING_FOR_DEPLOY", + DEQUEUED: "DEQUEUED", EXECUTING: "EXECUTING", WAITING_TO_RESUME: "WAITING_TO_RESUME", RETRYING_AFTER_FAILURE: "RETRYING_AFTER_FAILURE", diff --git a/packages/core/src/v3/schemas/schemas.ts b/packages/core/src/v3/schemas/schemas.ts index f54ee4791c..660f9dea38 100644 --- a/packages/core/src/v3/schemas/schemas.ts +++ b/packages/core/src/v3/schemas/schemas.ts @@ -164,11 +164,6 @@ export const QueueManifest = z.object({ * * If this property is omitted, the task can potentially use up the full concurrency of an environment */ concurrencyLimit: z.number().int().min(0).max(100000).optional().nullable(), - /** An optional property that specifies whether to release concurrency on waitpoint. - * - * If this property is omitted, the task will not release concurrency on waitpoint. - */ - releaseConcurrencyOnWaitpoint: z.boolean().optional(), }); export type QueueManifest = z.infer; diff --git a/packages/core/src/v3/serverOnly/index.ts b/packages/core/src/v3/serverOnly/index.ts index 6111d04de9..3b28ab95fb 100644 --- a/packages/core/src/v3/serverOnly/index.ts +++ b/packages/core/src/v3/serverOnly/index.ts @@ -5,3 +5,4 @@ export * from "./singleton.js"; export * from "./shutdownManager.js"; export * from "./k8s.js"; export * from "./jumpHash.js"; +export * from "../apiClient/version.js"; diff --git a/packages/core/src/v3/types/queues.ts b/packages/core/src/v3/types/queues.ts index ceb3a45e5e..9e87f136e2 100644 --- a/packages/core/src/v3/types/queues.ts +++ b/packages/core/src/v3/types/queues.ts @@ -35,9 +35,4 @@ export type QueueOptions = { * * If this property is omitted, the task can potentially use up the full concurrency of an environment */ concurrencyLimit?: number; - /** An optional property that specifies whether to release concurrency on waitpoint. - * - * If this property is omitted, the task will not release concurrency on waitpoint. - */ - releaseConcurrencyOnWaitpoint?: boolean; }; diff --git a/packages/core/src/v3/types/tasks.ts b/packages/core/src/v3/types/tasks.ts index e64e1684ac..f9595b51e6 100644 --- a/packages/core/src/v3/types/tasks.ts +++ b/packages/core/src/v3/types/tasks.ts @@ -215,7 +215,6 @@ type CommonTaskOptions< queue?: { name?: string; concurrencyLimit?: number; - releaseConcurrencyOnWaitpoint?: boolean; }; /** Configure the spec of the [machine](https://trigger.dev/docs/machines) you want your task to run on. * @@ -842,16 +841,7 @@ export type TriggerOptions = { version?: string; }; -export type TriggerAndWaitOptions = Omit & { - /** - * If set to true, this will cause the waitpoint to release the current run from the queue's concurrency. - * - * This is useful if you want to allow other runs to execute while the child task is executing - * - * @default false - */ - releaseConcurrency?: boolean; -}; +export type TriggerAndWaitOptions = Omit; export type BatchTriggerOptions = { /** * If no idempotencyKey is set on an individual item in the batch, it will use this key on each item + the array index. diff --git a/packages/react-hooks/src/hooks/useRealtime.ts b/packages/react-hooks/src/hooks/useRealtime.ts index 4810b6db0a..9492c085de 100644 --- a/packages/react-hooks/src/hooks/useRealtime.ts +++ b/packages/react-hooks/src/hooks/useRealtime.ts @@ -591,7 +591,7 @@ async function processRealtimeBatch( } } -// Inserts and then orders by the run number, and ensures that the run is not duplicated +// Inserts and then orders by the run createdAt timestamp, and ensures that the run is not duplicated function insertRunShapeInOrder( previousRuns: RealtimeRun[], run: RealtimeRun @@ -601,8 +601,8 @@ function insertRunShapeInOrder( return previousRuns.map((r) => (r.id === run.id ? run : r)); } - const runNumber = run.number; - const index = previousRuns.findIndex((r) => r.number > runNumber); + const runCreatedAt = run.createdAt; + const index = previousRuns.findIndex((r) => r.createdAt > runCreatedAt); if (index === -1) { return [...previousRuns, run]; } diff --git a/packages/trigger-sdk/src/v3/shared.ts b/packages/trigger-sdk/src/v3/shared.ts index 801a138b5a..5d5d896621 100644 --- a/packages/trigger-sdk/src/v3/shared.ts +++ b/packages/trigger-sdk/src/v3/shared.ts @@ -215,7 +215,6 @@ export function createTask< resourceCatalog.registerQueueMetadata({ name: queue.name, concurrencyLimit: queue.concurrencyLimit, - releaseConcurrencyOnWaitpoint: queue.releaseConcurrencyOnWaitpoint, }); } @@ -347,7 +346,6 @@ export function createSchemaTask< resourceCatalog.registerQueueMetadata({ name: queue.name, concurrencyLimit: queue.concurrencyLimit, - releaseConcurrencyOnWaitpoint: queue.releaseConcurrencyOnWaitpoint, }); } @@ -1333,7 +1331,6 @@ async function triggerAndWait_internal => { return new ManualWaitpointPromise(async (resolve, reject) => { try { @@ -646,7 +618,6 @@ export const wait = { const response = await apiClient.waitForWaitpointToken({ runFriendlyId: ctx.run.id, waitpointFriendlyId: tokenId, - releaseConcurrency: options?.releaseConcurrency, }); if (!response.success) { diff --git a/references/hello-world/src/trigger/deadlocks.ts b/references/hello-world/src/trigger/deadlocks.ts index 8bf35de738..1fd567f653 100644 --- a/references/hello-world/src/trigger/deadlocks.ts +++ b/references/hello-world/src/trigger/deadlocks.ts @@ -3,12 +3,10 @@ import { task, queue } from "@trigger.dev/sdk"; const deadlockQueue = queue({ name: "deadlock-queue", concurrencyLimit: 1, - releaseConcurrencyOnWaitpoint: false, }); export const deadlockReleasingQueue = queue({ name: "deadlock-releasing-queue", - releaseConcurrencyOnWaitpoint: true, }); export const deadlockTester = task({ diff --git a/references/hello-world/src/trigger/example.ts b/references/hello-world/src/trigger/example.ts index 8460eda65e..7f476dbcfd 100644 --- a/references/hello-world/src/trigger/example.ts +++ b/references/hello-world/src/trigger/example.ts @@ -58,12 +58,7 @@ export const parentTask = task({ machine: "medium-1x", run: async (payload: any, { ctx }) => { logger.log("Hello, world from the parent", { payload }); - await childTask.triggerAndWait( - { message: "Hello, world!" }, - { - releaseConcurrency: true, - } - ); + await childTask.triggerAndWait({ message: "Hello, world!" }); }, }); diff --git a/references/hello-world/src/trigger/realtime.ts b/references/hello-world/src/trigger/realtime.ts index 5a4d571c05..67dcf1804e 100644 --- a/references/hello-world/src/trigger/realtime.ts +++ b/references/hello-world/src/trigger/realtime.ts @@ -39,7 +39,7 @@ export const realtimeUpToDateTask = task({ run: async ({ runId }: { runId?: string }) => { if (!runId) { const handle = await helloWorldTask.trigger( - { hello: "world" }, + { hello: "world", sleepFor: 1000 }, { tags: ["hello-world", "realtime"], } diff --git a/references/hello-world/src/trigger/release-concurrency.ts b/references/hello-world/src/trigger/release-concurrency.ts index fe9b4ceaaf..42a22ff9f9 100644 --- a/references/hello-world/src/trigger/release-concurrency.ts +++ b/references/hello-world/src/trigger/release-concurrency.ts @@ -6,14 +6,12 @@ import { setTimeout } from "node:timers/promises"; const releaseEnabledQueue = queue({ name: "release-concurrency-test-queue-enabled", concurrencyLimit: 2, - releaseConcurrencyOnWaitpoint: true, }); // Queue with concurrency limit but release disabled const releaseDisabledQueue = queue({ name: "release-concurrency-test-queue-disabled", concurrencyLimit: 2, - releaseConcurrencyOnWaitpoint: false, }); // Task that runs on the release-enabled queue @@ -28,7 +26,7 @@ const releaseEnabledTask = task({ logger.info(`Run ${payload.id} started at ${startedAt}`); // Wait and release concurrency - await wait.for({ seconds: payload.waitSeconds, releaseConcurrency: true }); + await wait.for({ seconds: payload.waitSeconds }); const resumedAt = Date.now(); await setTimeout(2000); // Additional work after resuming diff --git a/references/hello-world/src/trigger/statuses.ts b/references/hello-world/src/trigger/statuses.ts new file mode 100644 index 0000000000..23c1b1f5ea --- /dev/null +++ b/references/hello-world/src/trigger/statuses.ts @@ -0,0 +1,29 @@ +import { logger, runs, task } from "@trigger.dev/sdk"; + +export const statusesTest = task({ + id: "statuses-test", + run: async () => { + console.log("statusesTest"); + }, +}); + +export const subscribeToRun = task({ + id: "subscribe-to-run", + run: async (payload: { runId: string }) => { + const subscription = runs.subscribeToRun(payload.runId, { + stopOnCompletion: false, + }); + + for await (const event of subscription) { + logger.info("run event", { event }); + } + }, +}); + +export const retrieveRun = task({ + id: "retrieve-run", + run: async (payload: { runId: string }) => { + const run = await runs.retrieve(payload.runId); + logger.info("run", { run }); + }, +}); diff --git a/references/hello-world/src/trigger/waits.ts b/references/hello-world/src/trigger/waits.ts index 9a0afa642f..14d1cbccd1 100644 --- a/references/hello-world/src/trigger/waits.ts +++ b/references/hello-world/src/trigger/waits.ts @@ -80,7 +80,7 @@ export const waitToken = task({ logger.log("Retrieved token", retrievedToken); //wait for the token - const result = await wait.forToken<{ foo: string }>(token, { releaseConcurrency: true }); + const result = await wait.forToken<{ foo: string }>(token); if (!result.ok) { logger.log("Token timeout", result); } else { @@ -124,7 +124,6 @@ export const waitForDuration = task({ seconds: duration, idempotencyKey: idempotency, idempotencyKeyTTL, - releaseConcurrency: true, }); await wait.until({ date: new Date(Date.now() + duration * 1000) });