Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
24 changes: 20 additions & 4 deletions web/app/api/v1/agent/restore/complete/route.ts
Original file line number Diff line number Diff line change
@@ -1,11 +1,11 @@
import { NextRequest, NextResponse } from "next/server";
import { eq } from "drizzle-orm";
import { revalidatePath } from "next/cache";
import { type NextRequest, NextResponse } from "next/server";
import { db } from "@/db";
import { volumeBackups } from "@/db/schema";
import { eq } from "drizzle-orm";
import { verifyAgentRequest } from "@/lib/agent-auth";
import { inngest } from "@/lib/inngest/client";
import { inngestEvents } from "@/lib/inngest/events";
import { revalidatePath } from "next/cache";

export async function POST(request: NextRequest) {
const body = await request.text();
Expand Down Expand Up @@ -63,6 +63,13 @@ export async function POST(request: NextRequest) {
serviceId: backup.serviceId,
}),
);
await inngest.send(
inngestEvents.migrationRestoreFinished.create({
backupId,
serviceId: backup.serviceId,
status: "completed",
}),
);
}
} else {
await inngest.send(
Expand All @@ -76,11 +83,20 @@ export async function POST(request: NextRequest) {
);

if (isMigration) {
const message = error || "Restore failed";
await inngest.send(
inngestEvents.migrationRestoreFailed.create({
backupId,
serviceId: backup.serviceId,
error: error || "Restore failed",
error: message,
}),
);
await inngest.send(
inngestEvents.migrationRestoreFinished.create({
backupId,
serviceId: backup.serviceId,
status: "failed",
error: message,
}),
);
}
Expand Down
1 change: 1 addition & 0 deletions web/components/service/details/deployment-progress.tsx
Original file line number Diff line number Diff line change
Expand Up @@ -31,6 +31,7 @@ const MIGRATION_STAGES: Record<string, string> = {
stopping: "Stopping service",
backing_up: "Creating backup",
restoring: "Restoring volumes",
deploying_target: "Starting on new server",
starting: "Starting on new server",
failed: "Migration failed",
};
Expand Down
44 changes: 32 additions & 12 deletions web/lib/agent-status.ts
Original file line number Diff line number Diff line change
Expand Up @@ -30,6 +30,27 @@ type ContainerStatus = {
healthStatus: "none" | "starting" | "healthy" | "unhealthy";
};

function isMigrationTargetStarting(status: string | null | undefined) {
return status === "deploying_target" || status === "starting";
}

async function completeTargetMigration(serviceId: string) {
await db
.update(services)
.set({
migrationStatus: null,
migrationTargetServerId: null,
migrationBackupId: null,
migrationError: null,
})
.where(
and(
eq(services.id, serviceId),
inArray(services.migrationStatus, ["deploying_target", "starting"]),
),
);
}

export type StatusReport = {
resources?: {
cpuCores: number;
Expand Down Expand Up @@ -209,17 +230,11 @@ export async function applyStatusReport(
);
}

if (service?.migrationStatus === "deploying_target") {
if (isMigrationTargetStarting(service?.migrationStatus)) {
console.log(
`[migration] target service ${service.id} healthy, promoting`,
);
await db
.update(services)
.set({
migrationStatus: null,
migrationTargetServerId: null,
})
.where(eq(services.id, service.id));
await completeTargetMigration(service.id);
}
}
}
Expand Down Expand Up @@ -285,10 +300,11 @@ export async function applyStatusReport(
);
}

if (service?.migrationStatus === "deploying_target") {
if (isMigrationTargetStarting(service?.migrationStatus)) {
console.log(
`[migration] deployment ${deployment.id} healthy (no health check), sending event`,
`[migration] deployment ${deployment.id} healthy (no health check), promoting`,
);
await completeTargetMigration(deployment.serviceId);
}
continue;
}
Expand All @@ -310,6 +326,9 @@ export async function applyStatusReport(
(deployment.status === "running" || deployment.status === "healthy");
const healthRecovered =
healthStatus === "healthy" || healthStatus === "none";
if (canAutoheal && healthRecovered) {
await completeTargetMigration(deployment.serviceId);
}

if (canAutoheal && healthStatus === "unhealthy") {
const unhealthyReportCount = (deployment.unhealthyReportCount ?? 0) + 1;
Expand Down Expand Up @@ -430,10 +449,11 @@ export async function applyStatusReport(
.where(eq(services.id, deployment.serviceId))
.then((r) => r[0]);

if (deployedService?.migrationStatus === "deploying_target") {
if (isMigrationTargetStarting(deployedService?.migrationStatus)) {
console.log(
`[migration] deployment ${deployment.id} healthy, sending event`,
`[migration] deployment ${deployment.id} healthy, promoting`,
);
await completeTargetMigration(deployment.serviceId);
}
}

Expand Down
1 change: 1 addition & 0 deletions web/lib/inngest/events/index.ts
Original file line number Diff line number Diff line change
Expand Up @@ -41,6 +41,7 @@ export const inngestEvents = {
migrationCancelled: defineEvent("migration/cancelled"),
migrationRestoreCompleted: defineEvent("migration/restore-completed"),
migrationRestoreFailed: defineEvent("migration/restore-failed"),
migrationRestoreFinished: defineEvent("migration/restore-finished"),

backupStarted: defineEvent("backup/started"),

Expand Down
8 changes: 8 additions & 0 deletions web/lib/inngest/events/migration.ts
Original file line number Diff line number Diff line change
Expand Up @@ -27,4 +27,12 @@ export type MigrationEvents = {
error: string;
};
};
"migration/restore-finished": {
data: {
backupId: string;
serviceId: string;
status: "completed" | "failed";
error?: string;
};
};
};
38 changes: 14 additions & 24 deletions web/lib/inngest/functions/migration-workflow.ts
Original file line number Diff line number Diff line change
Expand Up @@ -254,29 +254,17 @@ export const migrationWorkflow = inngest.createFunction(

const restoreResults = await Promise.all(
backupIds.map((backupId) =>
group.parallel(() => {
const completedPromise = step
.waitForEvent(`wait-restore-${backupId}`, {
event: inngestEvents.migrationRestoreCompleted,
timeout: "30m",
if: `async.data.backupId == "${backupId}" && async.data.serviceId == "${serviceId}"`,
})
.then((result) => ({ status: "completed" as const, result }));

const failedPromise = step
.waitForEvent(`wait-restore-failed-${backupId}`, {
event: inngestEvents.migrationRestoreFailed,
timeout: "30m",
if: `async.data.backupId == "${backupId}" && async.data.serviceId == "${serviceId}"`,
})
.then((result) => ({ status: "failed" as const, result }));

return Promise.race([completedPromise, failedPromise]);
}),
group.parallel(() =>
step.waitForEvent(`wait-restore-${backupId}`, {
event: inngestEvents.migrationRestoreFinished,
timeout: "30m",
if: `async.data.backupId == "${backupId}" && async.data.serviceId == "${serviceId}"`,
}),
),
),
);

const restoreTimedOut = restoreResults.some((r) => r.result === null);
const restoreTimedOut = restoreResults.some((r) => r === null);
if (restoreTimedOut) {
await step.run("handle-restore-timeout", async () => {
await db
Expand All @@ -290,15 +278,17 @@ export const migrationWorkflow = inngest.createFunction(
return { status: "failed", reason: "restore_timeout" };
}

const restoreFailure = restoreResults.find((r) => r.status === "failed");
const restoreFailure = restoreResults.find(
(r) => r?.data.status === "failed",
);
if (restoreFailure) {
await step.run("handle-restore-failure", async () => {
await db
.update(services)
.set({
migrationStatus: "failed",
migrationError:
restoreFailure.result?.data.error || "Restore failed",
restoreFailure.data.error || "Restore failed",
})
.where(eq(services.id, serviceId));
});
Expand All @@ -308,7 +298,7 @@ export const migrationWorkflow = inngest.createFunction(
await step.run("deploy-target", async () => {
await db
.update(services)
.set({ migrationStatus: "starting" })
.set({ migrationStatus: "deploying_target" })
.where(eq(services.id, serviceId));

await db
Expand All @@ -330,7 +320,7 @@ export const migrationWorkflow = inngest.createFunction(
await deployServiceInternal(serviceId);
});

await step.run("complete-migration", async () => {
await step.run("finalize-migration", async () => {
await db
.update(services)
.set({
Expand Down
Loading