diff --git a/package.json b/package.json index f37dd491..52f0a9cc 100644 --- a/package.json +++ b/package.json @@ -1,6 +1,6 @@ { "name": "opencode-forge", - "version": "0.4.0", + "version": "0.4.1", "type": "module", "oc-plugin": [ "server", diff --git a/src/constants/loop.ts b/src/constants/loop.ts index 3852f5e2..d4b0cf76 100644 --- a/src/constants/loop.ts +++ b/src/constants/loop.ts @@ -14,11 +14,17 @@ export function buildLoopPermissionRuleset(): PermissionRule[] { rules.push({ permission: '*', pattern: '*', action: 'allow' }) // External directory access: always denied to prevent unauthorized file system traversal. + // /tmp is allowed as a scratch area. rules.push({ permission: 'external_directory', pattern: '*', action: 'deny', }) + rules.push({ + permission: 'external_directory', + pattern: '/tmp', + action: 'allow', + }) // Code agent forbidden tools. Placed after *:allow so findLast picks them up. rules.push( diff --git a/src/hooks/forge-session-attach.ts b/src/hooks/forge-session-attach.ts index 0c27cbd4..37dde536 100644 --- a/src/hooks/forge-session-attach.ts +++ b/src/hooks/forge-session-attach.ts @@ -25,14 +25,28 @@ export function createForgeSessionAttachHook(deps: ForgeSessionAttachHookDeps) { const sessionInfo = eventInput.event.properties?.info as Record | undefined const sessionId = sessionInfo?.id as string | undefined const workspaceId = sessionInfo?.workspaceID as string | undefined + const sessionDirectory = sessionInfo?.directory as string | undefined + const sessionProjectId = (sessionInfo?.projectID as string | undefined) ?? deps.projectId if (!sessionId || !workspaceId) return - let ws = await findWorkspaceById(deps, workspaceId) + let ws = await findWorkspaceById(deps, workspaceId, sessionDirectory) if (!ws) { await new Promise((r) => setTimeout(r, 100)) - ws = await findWorkspaceById(deps, workspaceId) + ws = await findWorkspaceById(deps, workspaceId, sessionDirectory) if (!ws) { - deps.logger.log(`[forge-session-attach] skip session=${sessionId}: workspace ${workspaceId} not found via experimental.workspace.list (may be cross-project; check plugin directory)`) + deps.logger.log( + `[forge-session-attach] skip session=${sessionId}: workspace ${workspaceId} not found ` + + `via experimental.workspace.list directory=${sessionDirectory ?? '(none)'} ` + + `(cross-project or sync lag)`, + ) + if (sessionDirectory) { + publishAttachFailureToast( + deps, + sessionDirectory, + `Forge loop (workspace ${workspaceId})`, + 'Workspace not visible from this plugin instance - open the TUI in the loop\'s project, or run the reconciler.', + ) + } return } } @@ -61,16 +75,16 @@ export function createForgeSessionAttachHook(deps: ForgeSessionAttachHookDeps) { return } - const existing = deps.execDeps.loopsRepo.get(deps.projectId, cfg.loopName) + const existing = deps.execDeps.loopsRepo.get(sessionProjectId, cfg.loopName) if (existing && existing.status === 'running') { // Live loop with this name; skip to avoid double-attach. deps.logger.log(`[forge-session-attach] skip session=${sessionId} loop=${cfg.loopName} reason=already-running`) return } if (existing) { - deps.logger.log(`[forge-session-attach] session=${sessionId} loop=${cfg.loopName} existing-row-status=${existing.status} (will re-attach)`) + deps.logger.log(`[forge-session-attach] session=${sessionId} loop=${cfg.loopName} projectId=${sessionProjectId} existing-row-status=${existing.status} (will re-attach)`) } else { - deps.logger.log(`[forge-session-attach] session=${sessionId} loop=${cfg.loopName} no existing row, proceeding`) + deps.logger.log(`[forge-session-attach] session=${sessionId} loop=${cfg.loopName} projectId=${sessionProjectId} no existing row, proceeding`) } const resolvedHostSessionId = cfg.hostSessionId && cfg.hostSessionId.length > 0 @@ -86,7 +100,7 @@ export function createForgeSessionAttachHook(deps: ForgeSessionAttachHookDeps) { if (planSource.kind === 'inline') { planText = planSource.planText } else { - const row = deps.execDeps.plansRepo.getForSession(deps.projectId, planSource.sessionId) + const row = deps.execDeps.plansRepo.getForSession(sessionProjectId, planSource.sessionId) if (!row) { deps.logger.error(`[forge-session-attach] plan not found for session=${planSource.sessionId} loop=${cfg.loopName} workspace=${workspaceId}`) await failAndCleanup( @@ -104,7 +118,7 @@ export function createForgeSessionAttachHook(deps: ForgeSessionAttachHookDeps) { try { const result = await attachLoopToSession( deps.execDeps, - { surface: 'tui', projectId: deps.projectId, directory: ws.directory ?? deps.directory }, + { surface: 'tui', projectId: sessionProjectId, directory: ws.directory ?? deps.directory }, { sessionId, workspaceId, @@ -153,14 +167,14 @@ async function failAndCleanup( loopName: string, message: string, ): Promise { - publishAttachFailureToast(deps, directory, loopName, message) + publishAttachFailureToast(deps, directory, `Forge loop "${loopName}"`, message) await removeOrphanWorkspace(deps, workspaceId, loopName) } function publishAttachFailureToast( deps: ForgeSessionAttachHookDeps, directory: string, - loopName: string, + title: string, message: string, ): void { const tui = deps.v2.tui @@ -169,12 +183,7 @@ function publishAttachFailureToast( directory, body: { type: 'tui.toast.show', - properties: { - title: `Forge loop "${loopName}"`, - message, - variant: 'error', - duration: 6000, - }, + properties: { title, message, variant: 'error', duration: 6000 }, }, }).catch((err) => { deps.logger.error('[forge-session-attach] failed to publish toast', err) @@ -206,9 +215,12 @@ async function removeOrphanWorkspace( async function findWorkspaceById( deps: ForgeSessionAttachHookDeps, workspaceId: string, + directory?: string, ): Promise { try { - const result = await deps.v2.experimental.workspace.list() + const result = await deps.v2.experimental.workspace.list( + directory ? { directory } : undefined, + ) const entries = (result.data ?? []) as WorkspaceEntry[] return entries.find((e) => e.id === workspaceId) ?? null } catch { diff --git a/src/index.ts b/src/index.ts index 4ab450c3..e5f4d374 100644 --- a/src/index.ts +++ b/src/index.ts @@ -301,6 +301,7 @@ export function createForgePlugin(config: PluginConfig): Plugin { loop: loopHandler.loop, sandboxManager, sectionPlansRepo, + reviewFindingsRepo, workspaceStatusRegistry, }) @@ -435,6 +436,7 @@ export function createForgePlugin(config: PluginConfig): Plugin { loop: loopHandler.loop, sandboxManager, sectionPlansRepo, + reviewFindingsRepo, workspaceStatusRegistry, } const forgeSessionAttachHook = createForgeSessionAttachHook({ diff --git a/src/loop/in-flight-guard.ts b/src/loop/in-flight-guard.ts new file mode 100644 index 00000000..4fd14d53 --- /dev/null +++ b/src/loop/in-flight-guard.ts @@ -0,0 +1,62 @@ +import type { Logger } from '../types' + +export type PromptAgent = 'code' | 'auditor-loop' | 'decomposer' + +export class ConcurrentPromptError extends Error { + readonly code = 'concurrent_prompt' + constructor( + public readonly loopName: string, + public readonly priorSessionId: string, + public readonly priorAgent: PromptAgent, + public readonly attemptedSessionId: string, + public readonly attemptedAgent: PromptAgent, + ) { + super( + `Concurrent agent prompt rejected for loop=${loopName}: ` + + `prior ${priorAgent} on session=${priorSessionId} still in-flight, ` + + `attempted ${attemptedAgent} on session=${attemptedSessionId}`, + ) + this.name = 'ConcurrentPromptError' + } +} + +interface InFlightEntry { + sessionId: string + agent: PromptAgent + startedAt: number +} + +const inFlight = new Map() + +export function markPromptInFlight(loopName: string, sessionId: string, agent: PromptAgent): void { + inFlight.set(loopName, { sessionId, agent, startedAt: Date.now() }) +} + +export function clearPromptInFlight(loopName: string): void { + inFlight.delete(loopName) +} + +export function getPromptInFlight(loopName: string): InFlightEntry | undefined { + return inFlight.get(loopName) +} + +export function assertNoPromptInFlight( + loopName: string, + attemptedSessionId: string, + attemptedAgent: PromptAgent, + logger: Logger, +): void { + const prior = inFlight.get(loopName) + if (!prior) return + if (prior.sessionId === attemptedSessionId && prior.agent === attemptedAgent) return + logger.error( + `[in-flight-guard] concurrent prompt rejected loop=${loopName} ` + + `prior=${prior.agent}: ${prior.sessionId} attempted=${attemptedAgent}: ${attemptedSessionId}`, + ) + throw new ConcurrentPromptError(loopName, prior.sessionId, prior.agent, attemptedSessionId, attemptedAgent) +} + +// Test-only: clear all state. +export function __resetInFlightGuard(): void { + inFlight.clear() +} diff --git a/src/loop/runtime.ts b/src/loop/runtime.ts index d0c66af1..b4af7e2d 100644 --- a/src/loop/runtime.ts +++ b/src/loop/runtime.ts @@ -14,7 +14,7 @@ import { retryWithModelFallback, resolveDecomposerModel } from '../utils/model-f import { resolveLoopModel, resolveLoopAuditorModel } from '../utils/loop-helpers' import type { createSandboxManager } from '../sandbox/manager' // worktree-completion imports moved to hooks/loop.ts (termination side-effects) -import { buildLoopPermissionRuleset } from '../constants/loop' +import { buildLoopPermissionRuleset, buildAuditSessionPermissionRuleset } from '../constants/loop' import { createLoopSessionWithWorkspace, publishWorkspaceDetachedToast } from '../utils/loop-session' // worktree-cleanup imports moved to hooks/loop.ts (termination side-effects) import { createAuditSession, promptAuditSession } from '../utils/audit-session' @@ -23,6 +23,12 @@ import { bindSessionToWorkspace } from '../workspace/forge-worktree' import { extractSections } from '../utils/section-capture' import { decomposeDeterministically } from '../services/deterministic-decomposer' import { markPromptSent, clearPromptPending, sessionsAwaitingBusy, isAwaitingBusy, isAwaitingBusyExpired } from './idle-gate' +import { + markPromptInFlight, + clearPromptInFlight, + assertNoPromptInFlight, + ConcurrentPromptError, +} from './in-flight-guard' import type { TerminationReason } from './termination' import { terminationStatusFor, terminationReasonToString } from './termination' import { nextTransition } from './transitions' @@ -153,8 +159,8 @@ export function createLoop(deps: LoopRuntimeDeps): Loop { const MAX_CODE_LAUNCH_RECOVERIES = MAX_RETRIES const codingLaunchRecoveryAttempts = new Map() - const delayedSessionDeleteTimeouts = new Map() - const loopDelayedDeletes = new Map>() + const loopRetainedSessions = new Map() + const SESSION_RETENTION = 2 function withStateLock(loopName: string, fn: () => Promise): Promise { const prev = stateLocks.get(loopName) ?? Promise.resolve() @@ -183,6 +189,13 @@ export function createLoop(deps: LoopRuntimeDeps): Loop { const sendWithModel = async () => { const freshState = loopService.getActiveState(loopName) if (!freshState?.active) throw new Error('loop_cancelled') + try { + assertNoPromptInFlight(loopName, sessionId, 'auditor-loop', logger) + } catch (err) { + if (err instanceof ConcurrentPromptError) return { error: err } + throw err + } + markPromptInFlight(loopName, sessionId, 'auditor-loop') markPromptSent(loopName, sessionId, logger) const result = await promptAuditSessionWithFallback({ sessionId, @@ -191,12 +204,20 @@ export function createLoop(deps: LoopRuntimeDeps): Loop { prompt: promptText, auditorModel, }) + clearPromptInFlight(loopName) return result.ok ? { data: true } : { error: result.error } } const sendWithoutModel = async () => { const freshState = loopService.getActiveState(loopName) if (!freshState?.active) throw new Error('loop_cancelled') + try { + assertNoPromptInFlight(loopName, sessionId, 'auditor-loop', logger) + } catch (err) { + if (err instanceof ConcurrentPromptError) return { error: err } + throw err + } + markPromptInFlight(loopName, sessionId, 'auditor-loop') markPromptSent(loopName, sessionId, logger) const result = await promptAuditSessionWithFallback({ sessionId, @@ -204,11 +225,15 @@ export function createLoop(deps: LoopRuntimeDeps): Loop { workspaceId: freshState.workspaceId, prompt: promptText, }) + clearPromptInFlight(loopName) return result.ok ? { data: true } : { error: result.error } } const { result, usedModel } = await retryWithModelFallback(sendWithModel, sendWithoutModel, auditorModel, logger) - if (result.error) clearPromptPending(loopName, logger) + if (result.error) { + clearPromptPending(loopName, logger) + clearPromptInFlight(loopName) + } return { error: result.error, usedModel } } @@ -217,6 +242,13 @@ export function createLoop(deps: LoopRuntimeDeps): Loop { const sendWithModel = async () => { const freshState = loopService.getActiveState(loopName) if (!freshState?.active) throw new Error('loop_cancelled') + try { + assertNoPromptInFlight(loopName, sessionId, 'code', logger) + } catch (err) { + if (err instanceof ConcurrentPromptError) return { error: err } + throw err + } + markPromptInFlight(loopName, sessionId, 'code') markPromptSent(loopName, sessionId, logger) return v2Client.session.promptAsync({ sessionID: sessionId, @@ -231,6 +263,13 @@ export function createLoop(deps: LoopRuntimeDeps): Loop { const sendWithoutModel = async () => { const freshState = loopService.getActiveState(loopName) if (!freshState?.active) throw new Error('loop_cancelled') + try { + assertNoPromptInFlight(loopName, sessionId, 'code', logger) + } catch (err) { + if (err instanceof ConcurrentPromptError) return { error: err } + throw err + } + markPromptInFlight(loopName, sessionId, 'code') markPromptSent(loopName, sessionId, logger) return v2Client.session.promptAsync({ sessionID: sessionId, @@ -242,7 +281,10 @@ export function createLoop(deps: LoopRuntimeDeps): Loop { } const { result, usedModel } = await retryWithModelFallback(sendWithModel, sendWithoutModel, effectiveModel, logger) - if (result.error) clearPromptPending(loopName, logger) + if (result.error) { + clearPromptPending(loopName, logger) + clearPromptInFlight(loopName) + } return { error: result.error, usedModel } } @@ -599,6 +641,13 @@ export function createLoop(deps: LoopRuntimeDeps): Loop { const retryFn = async () => { const freshState = loopService.getActiveState(loopName) if (!freshState?.active) throw new Error('loop_cancelled') + try { + assertNoPromptInFlight(loopName, activeSessionId, 'code', logger) + } catch (err) { + if (err instanceof ConcurrentPromptError) { await handlePromptError(loopName, currentState, `retry failed ${errorContext}`, err); return } + throw err + } + markPromptInFlight(loopName, activeSessionId, 'code') const result = await v2Client.session.promptAsync({ sessionID: activeSessionId, directory: freshState.worktreeDir, @@ -606,6 +655,7 @@ export function createLoop(deps: LoopRuntimeDeps): Loop { agent: 'code', parts: [{ type: 'text' as const, text: continuationPrompt }], }) + clearPromptInFlight(loopName) if (result.error) { await handlePromptError(loopName, currentState, `retry failed ${errorContext}`, result.error) return @@ -697,6 +747,13 @@ export function createLoop(deps: LoopRuntimeDeps): Loop { const retryFn = async () => { const fresh = loopService.getActiveState(loopName) if (!fresh?.active || fresh.phase !== 'coding' || fresh.sessionId !== codeSessionId) throw new Error('loop_cancelled') + try { + assertNoPromptInFlight(loopName, codeSessionId, 'code', logger) + } catch (err) { + if (err instanceof ConcurrentPromptError) { await handlePromptError(loopName, state, 'failed to recover code launch', err); return } + throw err + } + markPromptInFlight(loopName, codeSessionId, 'code') const result = await v2Client.session.promptAsync({ sessionID: codeSessionId, directory: fresh.worktreeDir, @@ -704,6 +761,7 @@ export function createLoop(deps: LoopRuntimeDeps): Loop { agent: 'code', parts: [{ type: 'text' as const, text: recoveryPrompt }], }) + clearPromptInFlight(loopName) if (result.error) throw result.error } await handlePromptError(loopName, freshState ?? state, 'failed to recover code launch', promptResultError, retryFn) @@ -720,8 +778,20 @@ export function createLoop(deps: LoopRuntimeDeps): Loop { directory: string context: string }): void { - const { sessionId } = input - logger.debug(`Loop: delayed session delete disabled; preserving ${sessionId}`) + const { loopName, sessionId, directory, context } = input + const queue = loopRetainedSessions.get(loopName) ?? [] + if (queue.includes(sessionId)) return + queue.push(sessionId) + loopRetainedSessions.set(loopName, queue) + logger.debug(`Loop: queued session ${sessionId} for retention (loop=${loopName}, context=${context}, queue=${queue.length})`) + + while (queue.length > SESSION_RETENTION) { + const oldest = queue.shift()! + logger.log(`Loop: trimming session ${oldest} (loop=${loopName}, retention=${SESSION_RETENTION})`) + void v2Client.session.delete({ sessionID: oldest, directory }).catch((err: unknown) => { + logger.error(`Loop: failed to delete trimmed session ${oldest} (loop=${loopName})`, err) + }) + } } async function transitionToCoding(loopName: string, state: LoopState): Promise { @@ -845,19 +915,17 @@ export function createLoop(deps: LoopRuntimeDeps): Loop { idleRetryAttempts.delete(loopName) codingLaunchRecoveryAttempts.delete(loopName) clearPromptPending(loopName, logger) - - const pendingSessionDeletes: string[] = [] - const loopDeleteSet = loopDelayedDeletes.get(loopName) - if (loopDeleteSet) { - for (const sid of loopDeleteSet) { - const t = delayedSessionDeleteTimeouts.get(sid) - if (t) { - clearTimeout(t) - delayedSessionDeleteTimeouts.delete(sid) - } - if (sid !== sessionId) pendingSessionDeletes.push(sid) + clearPromptInFlight(loopName) + + const retained = loopRetainedSessions.get(loopName) + if (retained) { + for (const sid of retained) { + if (sid === sessionId) continue + void v2Client.session.delete({ sessionID: sid, directory: state.worktreeDir }).catch((err) => { + logger.error(`Loop: failed to delete retained session ${sid} on terminate (loop=${loopName})`, err) + }) } - loopDelayedDeletes.delete(loopName) + loopRetainedSessions.delete(loopName) } const now = Date.now() @@ -873,10 +941,6 @@ export function createLoop(deps: LoopRuntimeDeps): Loop { // Session may already be idle } - for (const sid of pendingSessionDeletes) { - logger.debug(`Loop: delayed session delete disabled; preserving ${sid}`) - } - logger.log(`Loop terminated: reason="${terminationReasonToString(reason)}", loop="${state.loopName}", iteration=${state.iteration}`) logger.debug(`Loop: terminateLoop reason=${terminationReasonToString(reason)} worktree=${!!state.worktree} logEligible=${reason.kind === 'completed' && !!state.worktree}`) @@ -964,6 +1028,7 @@ export function createLoop(deps: LoopRuntimeDeps): Loop { currentSectionIndex: input.currentSectionIndex, totalSections: input.totalSections, }), + permission: buildAuditSessionPermissionRuleset(), ...(input.workspaceId ? { workspaceID: input.workspaceId } : {}), }, query: { @@ -1267,12 +1332,20 @@ export function createLoop(deps: LoopRuntimeDeps): Loop { const retryFn = async () => { const fresh = loopService.getActiveState(loopName) if (!fresh?.active) throw new Error('loop_cancelled') + try { + assertNoPromptInFlight(loopName, created.auditSessionId, 'auditor-loop', logger) + } catch (err) { + if (err instanceof ConcurrentPromptError) { await handlePromptError(loopName, currentState ?? _state, 'failed to send audit prompt', err); return } + throw err + } + markPromptInFlight(loopName, created.auditSessionId, 'auditor-loop') const retry = await promptAuditSessionWithFallback({ sessionId: created.auditSessionId, worktreeDir: fresh.worktreeDir, workspaceId: fresh.workspaceId, prompt: loopService.buildAuditPrompt(fresh), }) + clearPromptInFlight(loopName) if (!retry.ok) throw retry.error } await handlePromptError(loopName, { ...currentState, phase: 'auditing' }, 'failed to send audit prompt', auditPromptErr, retryFn) @@ -1646,6 +1719,8 @@ export function createLoop(deps: LoopRuntimeDeps): Loop { const decomposerPrompt = loopService.buildDecomposerInitialPrompt(freshState) try { + assertNoPromptInFlight(loopName, decomposerSessionId, 'decomposer', logger) + markPromptInFlight(loopName, decomposerSessionId, 'decomposer') markPromptSent(loopName, decomposerSessionId, logger) await v2Client.session.promptAsync({ sessionID: decomposerSessionId, @@ -1665,6 +1740,7 @@ export function createLoop(deps: LoopRuntimeDeps): Loop { }) } catch (err) { clearPromptPending(loopName, logger) + clearPromptInFlight(loopName) logger.error(`Loop: failed to re-prompt decomposer for ${loopName}`, err) await terminateLoop(loopName, freshState, { kind: 'decomposer_prompt_failed' }) return @@ -1928,6 +2004,9 @@ export function createLoop(deps: LoopRuntimeDeps): Loop { logger.debug(`[idle-gate] busy observed for ses=${sessionId} loop=${loopName}, clearing pending`) clearPromptPending(loopName, logger) } + if (loopName) { + clearPromptInFlight(loopName) + } return } @@ -1995,11 +2074,7 @@ export function createLoop(deps: LoopRuntimeDeps): Loop { } idleRetryAttempts.clear() codingLaunchRecoveryAttempts.clear() - for (const [, timeout] of delayedSessionDeleteTimeouts) { - clearTimeout(timeout) - } - delayedSessionDeleteTimeouts.clear() - loopDelayedDeletes.clear() + loopRetainedSessions.clear() watchdog.clearAll() stateLocks.clear() sessionsAwaitingBusy.clear() @@ -2039,16 +2114,12 @@ export function createLoop(deps: LoopRuntimeDeps): Loop { idleRetryAttempts.delete(loopName) codingLaunchRecoveryAttempts.delete(loopName) - const loopDeleteSet = loopDelayedDeletes.get(loopName) - if (loopDeleteSet) { - for (const sid of loopDeleteSet) { - const t = delayedSessionDeleteTimeouts.get(sid) - if (t) { - clearTimeout(t) - delayedSessionDeleteTimeouts.delete(sid) - } + const retained = loopRetainedSessions.get(loopName) + if (retained) { + for (const sid of retained) { + void v2Client.session.delete({ sessionID: sid, directory: loopService.getActiveState(loopName)?.worktreeDir ?? '' }).catch(() => {}) } - loopDelayedDeletes.delete(loopName) + loopRetainedSessions.delete(loopName) } } diff --git a/src/services/execution.ts b/src/services/execution.ts index 7b8955bf..f993dbc9 100644 --- a/src/services/execution.ts +++ b/src/services/execution.ts @@ -15,7 +15,7 @@ import { extractPlanTitle, extractLoopNames } from '../utils/plan-execution' import { parseModelString, retryWithModelFallback, resolveDecomposerModel } from '../utils/model-fallback' import { formatDecomposerSessionTitle, formatLoopSessionTitle, formatPlanSessionTitle } from '../utils/session-titles' -import { buildLoopPermissionRuleset } from '../constants/loop' +import { buildLoopPermissionRuleset, buildAuditSessionPermissionRuleset } from '../constants/loop' import { findPartialMatch } from '../utils/partial-match' import { isSandboxEnabled } from '../sandbox/context' import { createLoopSessionWithWorkspace, publishWorkspaceDetachedToast } from '../utils/loop-session' @@ -23,6 +23,13 @@ import { join } from 'path' import { existsSync } from 'fs' import { decomposeDeterministically } from './deterministic-decomposer' import { markPromptSent, clearPromptPending, terminationStatusFor, parseTerminationReasonString } from '../loop' +import { + assertNoPromptInFlight, + markPromptInFlight, + clearPromptInFlight, + ConcurrentPromptError, + type PromptAgent, +} from '../loop/in-flight-guard' // ============================================================================ // Surface Types - Identifies the caller boundary @@ -362,6 +369,7 @@ export interface ForgeExecutionServiceDeps { loop: import('../loop/runtime').Loop sandboxManager?: SandboxManager | null sectionPlansRepo?: import('../storage/repos/section-plans-repo').SectionPlansRepo + reviewFindingsRepo?: import('../storage/repos/review-findings-repo').ReviewFindingsRepo workspaceStatusRegistry: import('../utils/workspace-status-registry').WorkspaceStatusRegistry } @@ -800,6 +808,20 @@ export async function attachLoopToSession( } } + // Defensive purge of orphaned per-loop rows (section_plans cascade may not have fired + // historically; plans/review_findings have no FK). Idempotent. + try { + const removedSections = deps.sectionPlansRepo?.deleteAll(ctx.projectId, loopName) ?? 0 + deps.plansRepo.deleteForLoop(ctx.projectId, loopName) + deps.reviewFindingsRepo?.deleteByLoopName(ctx.projectId, loopName) + if (removedSections > 0) { + deps.logger.log(`attachLoopToSession: purged ${removedSections} orphaned section_plans rows for ${loopName}`) + } + } catch (err) { + deps.logger.error(`attachLoopToSession: failed to purge orphaned per-loop rows for ${loopName}`, err) + // Non-fatal — proceed. + } + try { // Persist loop state const state: import('../loop/state').LoopState = { @@ -1030,6 +1052,8 @@ export async function attachLoopToSession( const decomposerPrompt = deps.loop.buildDecomposerInitialPrompt(state) try { + assertNoPromptInFlight(loopName, sessionId, 'decomposer', deps.logger) + markPromptInFlight(loopName, sessionId, 'decomposer') markPromptSent(loopName, sessionId, deps.logger) const decomposerResult = await deps.v2.session.promptAsync({ sessionID: sessionId, @@ -1048,16 +1072,23 @@ export async function attachLoopToSession( }) if ((decomposerResult as { error?: unknown })?.error) { clearPromptPending(loopName, deps.logger) + clearPromptInFlight(loopName) deps.logger.error('attachLoopToSession: decomposer promptAsync returned error', (decomposerResult as { error?: unknown }).error) deps.loop.deleteState(loopName) return { ok: false, code: 'prompt_failed', message: 'Failed to prompt decomposer' } } } catch (err) { clearPromptPending(loopName, deps.logger) + clearPromptInFlight(loopName) + if (err instanceof ConcurrentPromptError) { + deps.loop.deleteState(loopName) + return { ok: false, code: 'prompt_failed', message: err.message } + } deps.logger.error('attachLoopToSession: failed to prompt decomposer', err) deps.loop.deleteState(loopName) return { ok: false, code: 'prompt_failed', message: 'Failed to prompt decomposer' } } + clearPromptInFlight(loopName) // Start watchdog if requested if (startWatchdog && deps.loopHandler) { deps.loopHandler.startWatchdog(loopName) @@ -1217,6 +1248,8 @@ export async function attachLoopToSession( const decomposerPrompt = deps.loop.buildDecomposerInitialPrompt(state) try { + assertNoPromptInFlight(loopName, sessionId, 'decomposer', deps.logger) + markPromptInFlight(loopName, sessionId, 'decomposer') markPromptSent(loopName, sessionId, deps.logger) const decomposerResult = await deps.v2.session.promptAsync({ sessionID: sessionId, @@ -1235,12 +1268,18 @@ export async function attachLoopToSession( }) if ((decomposerResult as { error?: unknown })?.error) { clearPromptPending(loopName, deps.logger) + clearPromptInFlight(loopName) deps.logger.error('attachLoopToSession: decomposer promptAsync returned error', (decomposerResult as { error?: unknown }).error) deps.loop.deleteState(loopName) return { ok: false, code: 'prompt_failed', message: 'Failed to prompt decomposer' } } } catch (err) { clearPromptPending(loopName, deps.logger) + clearPromptInFlight(loopName) + if (err instanceof ConcurrentPromptError) { + deps.loop.deleteState(loopName) + return { ok: false, code: 'prompt_failed', message: err.message } + } deps.logger.error('attachLoopToSession: failed to prompt decomposer', err) deps.loop.deleteState(loopName) return { ok: false, code: 'prompt_failed', message: 'Failed to prompt decomposer' } @@ -1275,6 +1314,8 @@ export async function attachLoopToSession( } } + clearPromptInFlight(loopName) + // Start watchdog if requested if (startWatchdog && deps.loopHandler) { deps.loopHandler.startWatchdog(loopName) @@ -1730,6 +1771,7 @@ export function createForgeExecutionService(deps: ForgeExecutionServiceDeps): Fo const decomposerSessionResult = await createSessionWithFallback(deps, { title: formatDecomposerSessionTitle(uniqueLoopName), directory: hostWorktreeDir!, + permission: buildLoopPermissionRuleset(), }) if (!decomposerSessionResult.data) { deps.logger.error('handleStartLoop: failed to create decomposer session for fallback') @@ -2015,6 +2057,17 @@ export function createForgeExecutionService(deps: ForgeExecutionServiceDeps): Fo if (stoppedState.terminationReason && parseTerminationReasonString(stoppedState.terminationReason).kind === 'completed') { return fail('conflict', 409, `Loop "${stoppedState.loopName}" completed successfully and cannot be restarted.`) } + if ( + stoppedState.terminationReason && + parseTerminationReasonString(stoppedState.terminationReason).kind === 'final_audit_retry_exhausted' && + !command.force + ) { + return fail( + 'conflict', + 409, + `Loop "${stoppedState.loopName}" terminated during final audit retry exhaustion. Use force=true to restart.`, + ) + } if (stoppedState.worktree && stoppedState.worktreeDir) { if (!existsSync(stoppedState.worktreeDir)) { return fail('conflict', 409, `Cannot restart "${stoppedState.loopName}": worktree directory no longer exists at ${stoppedState.worktreeDir}.`) @@ -2161,6 +2214,7 @@ export function createForgeExecutionService(deps: ForgeExecutionServiceDeps): Fo const decomposerResult = await createSessionWithFallback(deps, { title: formatDecomposerSessionTitle(stoppedState.loopName), directory: stoppedState.worktreeDir, + permission: buildLoopPermissionRuleset(), }) if (!decomposerResult.data) { return { ok: false, error: 'Failed to create decomposer session for fallback.' } @@ -2211,6 +2265,7 @@ export function createForgeExecutionService(deps: ForgeExecutionServiceDeps): Fo const decomposerResult = await createSessionWithFallback(deps, { title: formatDecomposerSessionTitle(stoppedState.loopName), directory: stoppedState.worktreeDir, + permission: buildLoopPermissionRuleset(), }) if (!decomposerResult.data) { deps.logger.error('loop-restart: failed to create decomposer session') @@ -2232,7 +2287,7 @@ export function createForgeExecutionService(deps: ForgeExecutionServiceDeps): Fo totalSections: stoppedState.totalSections ?? 0, }), directory: stoppedState.worktreeDir, - permission: permissionRuleset, + permission: stoppedState.phase === 'final_auditing' ? buildAuditSessionPermissionRuleset() : permissionRuleset, workspaceId: stoppedState.workspaceId, loopName: stoppedState.loopName, logPrefix: 'loop-restart', @@ -2310,7 +2365,9 @@ export function createForgeExecutionService(deps: ForgeExecutionServiceDeps): Fo auditorModel: stoppedState.auditorModel ?? deps.config.auditorModel, executionModel: stoppedState.executionModel ?? deps.config.executionModel, }) - : parseModelString(stoppedState.executionModel) ?? parseModelString(deps.config.executionModel) + : stoppedState.phase === 'final_auditing' + ? parseModelString(stoppedState.auditorModel ?? deps.config.auditorModel) + : parseModelString(stoppedState.executionModel) ?? parseModelString(deps.config.executionModel) const workspaceParam = stoppedState.workspaceId ? { workspace: stoppedState.workspaceId } : {} const promptAgent = promptNeedsDecomposer ? 'decomposer' : stoppedState.phase === 'final_auditing' ? 'auditor-loop' : 'code' @@ -2335,6 +2392,13 @@ export function createForgeExecutionService(deps: ForgeExecutionServiceDeps): Fo const { result: promptResult } = await retryWithModelFallback( () => { + try { + assertNoPromptInFlight(stoppedState.loopName, effectiveSessionId, promptAgent as PromptAgent, deps.logger) + } catch (err) { + if (err instanceof ConcurrentPromptError) return Promise.resolve({ error: err }) + throw err + } + markPromptInFlight(stoppedState.loopName, effectiveSessionId, promptAgent as PromptAgent) markPromptSent(stoppedState.loopName, effectiveSessionId, deps.logger) return deps.v2.session.promptAsync({ sessionID: effectiveSessionId, @@ -2346,6 +2410,13 @@ export function createForgeExecutionService(deps: ForgeExecutionServiceDeps): Fo }) }, () => { + try { + assertNoPromptInFlight(stoppedState.loopName, effectiveSessionId, promptAgent as PromptAgent, deps.logger) + } catch (err) { + if (err instanceof ConcurrentPromptError) return Promise.resolve({ error: err }) + throw err + } + markPromptInFlight(stoppedState.loopName, effectiveSessionId, promptAgent as PromptAgent) markPromptSent(stoppedState.loopName, effectiveSessionId, deps.logger) return deps.v2.session.promptAsync({ sessionID: effectiveSessionId, @@ -2361,6 +2432,7 @@ export function createForgeExecutionService(deps: ForgeExecutionServiceDeps): Fo if (promptResult.error) { clearPromptPending(stoppedState.loopName, deps.logger) + clearPromptInFlight(stoppedState.loopName) deps.logger.error('loop-restart: failed to send prompt', promptResult.error) // Save section plans before deleteState (which cascades to section_plans) const savedPlans = deps.sectionPlansRepo?.list(ctx.projectId, stoppedState.loopName) ?? [] @@ -2381,6 +2453,7 @@ export function createForgeExecutionService(deps: ForgeExecutionServiceDeps): Fo return { ok: false, error: 'Restart failed: could not send prompt to new session.' } } + clearPromptInFlight(stoppedState.loopName) deps.loopHandler!.startWatchdog(stoppedState.loopName) return { ok: true, newSessionId: effectiveSessionId, previousSessionId, sandbox: restartSandbox, bindFailed, decomposerSessionId } diff --git a/src/storage/repos/review-findings-repo.ts b/src/storage/repos/review-findings-repo.ts index 58eaf391..dcfe5e9d 100644 --- a/src/storage/repos/review-findings-repo.ts +++ b/src/storage/repos/review-findings-repo.ts @@ -25,6 +25,7 @@ export interface ReviewFindingsRepo { listByLoopName(projectId: string, loopName: string | null, sectionIndex?: number | null): ReviewFindingRow[] listByFile(projectId: string, file: string): ReviewFindingRow[] delete(projectId: string, file: string, line: number, scope?: DeleteScope): boolean + deleteByLoopName(projectId: string, loopName: string): number } export function createReviewFindingsRepo(db: Database): ReviewFindingsRepo { @@ -88,6 +89,11 @@ export function createReviewFindingsRepo(db: Database): ReviewFindingsRepo { WHERE project_id = ? AND loop_name = ? AND file = ? AND line = ? AND section_index = ? `) + const stmtDeleteByLoopName = db.prepare(` + DELETE FROM review_findings + WHERE project_id = ? AND loop_name = ? + `) + function mapRaw(raw: { project_id: string file: string @@ -186,11 +192,17 @@ export function createReviewFindingsRepo(db: Database): ReviewFindingsRepo { return result.changes > 0 } + function deleteByLoopName(projectId: string, loopName: string): number { + const result = stmtDeleteByLoopName.run(projectId, loopName) as unknown as { changes: number } + return result.changes + } + return { write, listAll, listByLoopName, listByFile, delete: deleteFinding, + deleteByLoopName, } } diff --git a/src/storage/repos/section-plans-repo.ts b/src/storage/repos/section-plans-repo.ts index 91c14be2..943024db 100644 --- a/src/storage/repos/section-plans-repo.ts +++ b/src/storage/repos/section-plans-repo.ts @@ -32,6 +32,7 @@ export interface SectionPlansRepo { setCompletedAt(projectId: string, loopName: string, index: number, ms: number): void updateContent(projectId: string, loopName: string, sections: ParsedSection[]): { updated: number } count(projectId: string, loopName: string): number + deleteAll(projectId: string, loopName: string): number restoreAll(rows: SectionPlanRow[]): void } @@ -112,6 +113,8 @@ export function createSectionPlansRepo(db: Database, _logger?: Logger): SectionP SELECT COUNT(*) as count FROM section_plans WHERE project_id = ? AND loop_name = ? `) + const stmtDeleteAll = db.prepare('DELETE FROM section_plans WHERE project_id = ? AND loop_name = ?') + function mapRow(row: Record): SectionPlanRow { return { projectId: row.project_id as string, @@ -206,6 +209,11 @@ export function createSectionPlansRepo(db: Database, _logger?: Logger): SectionP return result.count }, + deleteAll(projectId, loopName) { + const result = stmtDeleteAll.run(projectId, loopName) as unknown as { changes: number } + return result.changes + }, + restoreAll(rows: SectionPlanRow[]): void { const stmtRestore = db.prepare(` INSERT OR REPLACE INTO section_plans (project_id, loop_name, section_index, title, content, status, attempts, diff --git a/src/utils/tui-client.ts b/src/utils/tui-client.ts index 182969e6..6bbfcb56 100644 --- a/src/utils/tui-client.ts +++ b/src/utils/tui-client.ts @@ -264,27 +264,16 @@ export async function connectForgeProject( if (req.mode === 'loop') { const loopName = deriveLoopNameFromTitle(req.title) - const hasHostSession = !!sessionId - if (!hasHostSession) { - tuiDebug(`plan.execute(loop): no hostSessionId; using inline plan source for loop=${loopName}`) + tuiDebug(`plan.execute(loop): inline plan (planText.length=${req.plan.length}) hostSession=${sessionId ?? 'none'} loop=${loopName}`) + const forgeLoop: ForgeLoopExtra = { + loopName, + hostSessionId: sessionId || undefined, + title: req.title, + executionModel: req.executionModel, + auditorModel: req.auditorModel, + planSource: 'inline', + planText: req.plan, } - const forgeLoop: ForgeLoopExtra = hasHostSession - ? { - loopName, - hostSessionId: sessionId, - title: req.title, - executionModel: req.executionModel, - auditorModel: req.auditorModel, - planSource: 'stored', - } - : { - loopName, - title: req.title, - executionModel: req.executionModel, - auditorModel: req.auditorModel, - planSource: 'inline', - planText: req.plan, - } try { const wsRes = await api.client.experimental.workspace.create({ type: 'forge', diff --git a/src/version.ts b/src/version.ts index 333dc6f5..04af9036 100644 --- a/src/version.ts +++ b/src/version.ts @@ -1 +1 @@ -export const VERSION = '0.4.0' +export const VERSION = '0.4.1' diff --git a/test/constants/loop.test.ts b/test/constants/loop.test.ts index b5c52472..de3df5cf 100644 --- a/test/constants/loop.test.ts +++ b/test/constants/loop.test.ts @@ -7,6 +7,7 @@ describe('buildLoopPermissionRuleset', () => { expect(rules).toEqual([ { permission: '*', pattern: '*', action: 'allow' }, { permission: 'external_directory', pattern: '*', action: 'deny' }, + { permission: 'external_directory', pattern: '/tmp', action: 'allow' }, { permission: 'review-write', pattern: '*', action: 'deny' }, { permission: 'review-delete', pattern: '*', action: 'deny' }, { permission: 'plan_exit', pattern: '*', action: 'deny' }, @@ -17,11 +18,12 @@ describe('buildLoopPermissionRuleset', () => { ]) }) - it('isSandbox: true: rules[0] is *:*:allow; rules[1] is external_directory:*:deny; length 9', () => { + it('isSandbox: true: rules[0] is *:*:allow; rules[1] is external_directory:*:deny; length 10', () => { const rules = buildLoopPermissionRuleset() expect(rules).toEqual([ { permission: '*', pattern: '*', action: 'allow' }, { permission: 'external_directory', pattern: '*', action: 'deny' }, + { permission: 'external_directory', pattern: '/tmp', action: 'allow' }, { permission: 'review-write', pattern: '*', action: 'deny' }, { permission: 'review-delete', pattern: '*', action: 'deny' }, { permission: 'plan_exit', pattern: '*', action: 'deny' }, diff --git a/test/hooks/forge-session-attach.test.ts b/test/hooks/forge-session-attach.test.ts index 7cf87e4b..88b03b7b 100644 --- a/test/hooks/forge-session-attach.test.ts +++ b/test/hooks/forge-session-attach.test.ts @@ -727,4 +727,263 @@ describe('createForgeSessionAttachHook', () => { ) expect(loggerErrorSpy).not.toHaveBeenCalled() }) + + test('passes sessionInfo.directory to experimental.workspace.list', async () => { + const forgeWorkspaceWithForgeLoop = { + id: 'ws_forge_dir', + type: 'forge', + directory: '/tmp/cross-proj', + extra: { + loopName: 'cross-loop', + projectDirectory: '/tmp/cross-proj', + forgeLoop: { + loopName: 'cross-loop', + hostSessionId: 'host_sess', + title: 'Cross Project Loop', + planSource: 'inline', + planText: '# Plan\n\nCross-project.', + }, + }, + } + const workspaceList = vi.fn().mockResolvedValue({ data: [forgeWorkspaceWithForgeLoop] }) + const deps = buildHookDeps({ workspaceList }) + + const handler = createForgeSessionAttachHook(deps as any) + + await handler({ + event: { + type: 'session.created', + properties: { + info: { id: 'new_sess', workspaceID: 'ws_forge_dir', directory: '/tmp/cross-proj', projectID: 'proj_other' }, + }, + }, + }) + + expect(workspaceList).toHaveBeenCalledWith({ directory: '/tmp/cross-proj' }) + expect(mockAttachLoop).toHaveBeenCalled() + }) + + test('omits directory parameter when sessionInfo.directory is missing', async () => { + const forgeWorkspaceWithForgeLoop = { + id: 'ws_forge_nodir', + type: 'forge', + directory: '/tmp/same-proj', + extra: { + loopName: 'nodir-loop', + projectDirectory: '/tmp/same-proj', + forgeLoop: { + loopName: 'nodir-loop', + hostSessionId: 'host_sess', + title: 'No Dir Loop', + planSource: 'inline', + planText: '# Plan\n\nNo dir.', + }, + }, + } + const workspaceList = vi.fn().mockResolvedValue({ data: [forgeWorkspaceWithForgeLoop] }) + const deps = buildHookDeps({ workspaceList }) + + const handler = createForgeSessionAttachHook(deps as any) + + await handler({ + event: { + type: 'session.created', + properties: { + info: { id: 'new_sess_no_dir', workspaceID: 'ws_forge_nodir' }, + }, + }, + }) + + expect(workspaceList).toHaveBeenCalledWith(undefined) + expect(mockAttachLoop).toHaveBeenCalled() + }) + + test('uses sessionInfo.projectID for loopsRepo.get and attachLoopToSession ctx', async () => { + const loopsRepoGet = vi.fn().mockReturnValue(null) + const plansRepoGetForSession = vi.fn().mockReturnValue({ content: 'plan text' }) + const workspaceList = vi.fn().mockResolvedValue({ + data: [ + { + id: 'ws_forge_pid', + type: 'forge', + directory: '/tmp/wt/pid', + extra: { + loopName: 'demo', + projectDirectory: '/tmp/wt/pid', + forgeLoop: { + loopName: 'demo', + hostSessionId: 'host_sess', + title: 'Demo Loop', + planSource: 'stored', + }, + }, + }, + ], + }) + const deps = buildHookDeps({ + loopsRepoGet, + plansRepoGetForSession, + workspaceList, + }) + deps.projectId = 'plugin_proj' + + const handler = createForgeSessionAttachHook(deps as any) + + await handler({ + event: { + type: 'session.created', + properties: { + info: { id: 'new_sess', workspaceID: 'ws_forge_pid', directory: '/tmp/wt/pid', projectID: 'session_proj' }, + }, + }, + }) + + expect(loopsRepoGet).toHaveBeenCalledWith('session_proj', 'demo') + expect(plansRepoGetForSession).toHaveBeenCalledWith('session_proj', 'host_sess') + expect(mockAttachLoop).toHaveBeenCalledTimes(1) + const [, ctx] = mockAttachLoop.mock.calls[0] + expect(ctx.projectId).toBe('session_proj') + }) + + test('falls back to deps.projectId when sessionInfo.projectID is missing', async () => { + const loopsRepoGet = vi.fn().mockReturnValue(null) + const plansRepoGetForSession = vi.fn().mockReturnValue({ content: 'plan text' }) + const workspaceList = vi.fn().mockResolvedValue({ + data: [ + { + id: 'ws_forge_fb', + type: 'forge', + directory: '/tmp/wt/fb', + extra: { + loopName: 'fallback-loop', + projectDirectory: '/tmp/wt/fb', + forgeLoop: { + loopName: 'fallback-loop', + hostSessionId: 'host_sess', + title: 'Fallback Loop', + planSource: 'stored', + }, + }, + }, + ], + }) + const deps = buildHookDeps({ + loopsRepoGet, + plansRepoGetForSession, + workspaceList, + }) + deps.projectId = 'plugin_proj' + + const handler = createForgeSessionAttachHook(deps as any) + + await handler({ + event: { + type: 'session.created', + properties: { + info: { id: 'new_sess_fb', workspaceID: 'ws_forge_fb', directory: '/tmp/wt/fb' }, + }, + }, + }) + + expect(loopsRepoGet).toHaveBeenCalledWith('plugin_proj', 'fallback-loop') + expect(plansRepoGetForSession).toHaveBeenCalledWith('plugin_proj', 'host_sess') + expect(mockAttachLoop).toHaveBeenCalledTimes(1) + const [, ctx] = mockAttachLoop.mock.calls[0] + expect(ctx.projectId).toBe('plugin_proj') + }) + + test('publishes a tui.toast when workspace is unfindable after retry', async () => { + const workspaceList = vi.fn().mockResolvedValue({ data: [] }) + const tuiPublish = vi.fn().mockResolvedValue({ data: {} }) + const deps = buildHookDeps({ workspaceList, tuiPublish }) + + const handler = createForgeSessionAttachHook(deps as any) + + await handler({ + event: { + type: 'session.created', + properties: { + info: { id: 'new_sess', workspaceID: 'ws_missing', directory: '/tmp/cross-proj' }, + }, + }, + }) + + expect(tuiPublish).toHaveBeenCalledTimes(1) + expect(tuiPublish).toHaveBeenCalledWith( + expect.objectContaining({ + directory: '/tmp/cross-proj', + body: expect.objectContaining({ + type: 'tui.toast.show', + properties: expect.objectContaining({ + variant: 'error', + }), + }), + }), + ) + expect(mockAttachLoop).not.toHaveBeenCalled() + }) + + test('does not publish a toast when sessionInfo.directory is missing', async () => { + const workspaceList = vi.fn().mockResolvedValue({ data: [] }) + const tuiPublish = vi.fn().mockResolvedValue({ data: {} }) + const deps = buildHookDeps({ workspaceList, tuiPublish }) + + const handler = createForgeSessionAttachHook(deps as any) + + await handler({ + event: { + type: 'session.created', + properties: { + info: { id: 'new_sess_no_dir', workspaceID: 'ws_missing' }, + }, + }, + }) + + expect(tuiPublish).not.toHaveBeenCalled() + expect(mockAttachLoop).not.toHaveBeenCalled() + }) + + test('attach hook prefers inline planText over stored plan when both are available', async () => { + const plansRepoGetForSession = vi.fn().mockReturnValue({ content: 'STALE_PRIOR_PLAN_TEXT' }) + const deps = buildHookDeps({ + workspaceList: vi.fn().mockResolvedValue({ + data: [ + { + id: 'ws_inline_vs_stored', + type: 'forge', + directory: '/tmp/wt/inline-vs-stored', + extra: { + loopName: 'my-plan', + projectDirectory: '/tmp/wt/inline-vs-stored', + forgeLoop: { + loopName: 'my-plan', + hostSessionId: 'ses_host', + title: 'My Plan', + planSource: 'inline', + planText: 'FRESH_PLAN_TEXT', + }, + }, + }, + ], + }), + plansRepoGetForSession, + }) + + const handler = createForgeSessionAttachHook(deps as any) + + await handler({ + event: { + type: 'session.created', + properties: { + info: { id: 'new_sess', workspaceID: 'ws_inline_vs_stored' }, + }, + }, + }) + + expect(mockAttachLoop).toHaveBeenCalledTimes(1) + const [, , input] = mockAttachLoop.mock.calls[0] + expect(input.planText).toBe('FRESH_PLAN_TEXT') + + expect(plansRepoGetForSession).not.toHaveBeenCalled() + }) }) diff --git a/test/loop-permission-ruleset.test.ts b/test/loop-permission-ruleset.test.ts index 22412875..d90ee541 100644 --- a/test/loop-permission-ruleset.test.ts +++ b/test/loop-permission-ruleset.test.ts @@ -1,5 +1,8 @@ -import { describe, test, expect } from 'bun:test' +import { describe, test, expect, mock } from 'bun:test' import { buildLoopPermissionRuleset, buildAuditSessionPermissionRuleset } from '../src/constants/loop' +import { createAuditSession } from '../src/utils/audit-session' +import { createLoopSessionWithWorkspace } from '../src/utils/loop-session' +import type { Logger } from '../src/types' describe('buildLoopPermissionRuleset', () => { test('worktree + sandbox ruleset: allow-all first, external_directory denied, code-agent denies, then operational denies last', () => { @@ -7,21 +10,7 @@ describe('buildLoopPermissionRuleset', () => { expect(rules).toEqual([ { permission: '*', pattern: '*', action: 'allow' }, { permission: 'external_directory', pattern: '*', action: 'deny' }, - { permission: 'review-write', pattern: '*', action: 'deny' }, - { permission: 'review-delete', pattern: '*', action: 'deny' }, - { permission: 'plan_exit', pattern: '*', action: 'deny' }, - { permission: 'loop', pattern: '*', action: 'deny' }, - { permission: 'bash', pattern: 'git push *', action: 'deny' }, - { permission: 'loop-cancel', pattern: '*', action: 'deny' }, - { permission: 'loop-status', pattern: '*', action: 'deny' }, - ]) - }) - - test('worktree + non-sandbox ruleset: allow-all first, external_directory denied, code-agent denies, then operational denies last', () => { - const rules = buildLoopPermissionRuleset() - expect(rules).toEqual([ - { permission: '*', pattern: '*', action: 'allow' }, - { permission: 'external_directory', pattern: '*', action: 'deny' }, + { permission: 'external_directory', pattern: '/tmp', action: 'allow' }, { permission: 'review-write', pattern: '*', action: 'deny' }, { permission: 'review-delete', pattern: '*', action: 'deny' }, { permission: 'plan_exit', pattern: '*', action: 'deny' }, @@ -34,13 +23,29 @@ describe('buildLoopPermissionRuleset', () => { test('EMITS session-level denies for code-agent tool exclusions (auditor now runs in separate session)', () => { const required = ['review-write', 'review-delete', 'loop'] - for (const isSandbox of [true, false]) { - const rules = buildLoopPermissionRuleset() - for (const tool of required) { - expect(rules.find((r) => r.permission === tool && r.action === 'deny')).toBeDefined() - } + const rules = buildLoopPermissionRuleset() + for (const tool of required) { + expect(rules.find((r) => r.permission === tool && r.action === 'deny')).toBeDefined() } }) + + test('contains external_directory:*:deny rule', () => { + const rules = buildLoopPermissionRuleset() + expect(rules).toContainEqual({ permission: 'external_directory', pattern: '*', action: 'deny' }) + }) + + test('external_directory:*:deny appears before external_directory:/tmp:allow', () => { + const rules = buildLoopPermissionRuleset() + const denyIdx = rules.findIndex( + (r) => r.permission === 'external_directory' && r.pattern === '*' && r.action === 'deny', + ) + const allowIdx = rules.findIndex( + (r) => r.permission === 'external_directory' && r.pattern === '/tmp' && r.action === 'allow', + ) + expect(denyIdx).toBeGreaterThanOrEqual(0) + expect(allowIdx).toBeGreaterThanOrEqual(0) + expect(denyIdx).toBeLessThan(allowIdx) + }) }) describe('buildAuditSessionPermissionRuleset', () => { @@ -84,4 +89,97 @@ describe('buildAuditSessionPermissionRuleset', () => { expect(rules.some(r => r.permission === 'bash' && r.pattern === 'git push *' && r.action === 'deny')).toBe(true) expect(rules.some(r => r.permission === 'loop' && r.pattern === '*' && r.action === 'deny')).toBe(true) }) + + test('contains external_directory:*:deny rule', () => { + const rules = buildAuditSessionPermissionRuleset() + expect(rules).toContainEqual({ permission: 'external_directory', pattern: '*', action: 'deny' }) + }) + + test('external_directory:*:deny appears before any /tmp allow rule if present', () => { + const rules = buildAuditSessionPermissionRuleset() + const denyIdx = rules.findIndex( + (r) => r.permission === 'external_directory' && r.pattern === '*' && r.action === 'deny', + ) + const allowIdx = rules.findIndex( + (r) => r.permission === 'external_directory' && r.pattern === '/tmp' && r.action === 'allow', + ) + expect(denyIdx).toBeGreaterThanOrEqual(0) + // If a /tmp allow rule exists, deny must come first; otherwise deny is sufficient + if (allowIdx >= 0) { + expect(denyIdx).toBeLessThan(allowIdx) + } + }) +}) + +describe('createAuditSession passes audit permission rules into session creation', () => { + test('session.create receives permission equal to buildAuditSessionPermissionRuleset()', async () => { + const expectedPermission = buildAuditSessionPermissionRuleset() + const mockCreate = mock(async (params: any) => ({ data: { id: 'audit-session' }, error: null })) + const mockGet = mock(async () => ({ data: { permission: expectedPermission }, error: null })) + const mockV2 = { + session: { + create: mockCreate, + get: mockGet, + }, + } as any + + const logger = { log: mock(), error: mock() } as unknown as Logger + + await createAuditSession({ + v2: mockV2, + loopName: 'permission-loop', + iteration: 1, + currentSectionIndex: 0, + totalSections: 1, + worktreeDir: '/tmp/permission-loop', + isSandbox: false, + prompt: 'audit', + logger, + }) + + expect(mockCreate).toHaveBeenCalled() + const callArgs = (mockCreate as any).mock.calls[0][0] + expect(callArgs.directory).toBe('/tmp/permission-loop') + expect(callArgs.permission).toEqual(expectedPermission) + expect(callArgs.permission).toContainEqual({ + permission: 'external_directory', + pattern: '*', + action: 'deny', + }) + }) +}) + +describe('createLoopSessionWithWorkspace passes loop permission rules into session creation', () => { + test('session.create receives permission exactly equal to buildLoopPermissionRuleset()', async () => { + const expectedPermission = buildLoopPermissionRuleset() + const mockCreate = mock(async (params: any) => ({ data: { id: 'loop-session' }, error: null })) + const mockGet = mock(async () => ({ data: {} })) + const mockV2 = { + session: { + create: mockCreate, + get: mockGet, + }, + } as any + + const logger = { log: mock(), error: mock() } as unknown as Logger + + await createLoopSessionWithWorkspace({ + v2: mockV2, + title: 'test loop session', + directory: '/tmp/permission-loop', + permission: expectedPermission, + logPrefix: 'test', + logger, + }) + + expect(mockCreate).toHaveBeenCalled() + const callArgs = (mockCreate as any).mock.calls[0][0] + expect(callArgs.permission).toEqual(expectedPermission) + expect(callArgs.directory).toBe('/tmp/permission-loop') + expect(callArgs.permission).toContainEqual({ + permission: 'external_directory', + pattern: '*', + action: 'deny', + }) + }) }) diff --git a/test/loop-runtime-audit-permissions.test.ts b/test/loop-runtime-audit-permissions.test.ts new file mode 100644 index 00000000..f450537e --- /dev/null +++ b/test/loop-runtime-audit-permissions.test.ts @@ -0,0 +1,283 @@ +import { describe, test, expect, beforeEach, afterEach, vi } from 'vitest' +import Database from 'better-sqlite3' +type DB = InstanceType +import { mkdtempSync, rmSync } from 'fs' +import { join } from 'path' +import { tmpdir } from 'os' +import { createLoopsRepo } from '../src/storage/repos/loops-repo' +import { createPlansRepo } from '../src/storage/repos/plans-repo' +import { createReviewFindingsRepo } from '../src/storage/repos/review-findings-repo' +import { createSectionPlansRepo } from '../src/storage/repos/section-plans-repo' +import type { LoopState } from '../src/loop/state' +import { createLoop } from '../src/loop/runtime' +import { buildAuditSessionPermissionRuleset } from '../src/constants/loop' +import type { Logger, PluginConfig } from '../src/types' +import type { OpencodeClient } from '@opencode-ai/sdk/v2' + +const PROJECT_ID = 'test-project' + +const DB_SCHEMA = ` +CREATE TABLE loops ( + project_id TEXT NOT NULL, + loop_name TEXT NOT NULL, + status TEXT NOT NULL, + current_session_id TEXT NOT NULL, + worktree INTEGER NOT NULL, + worktree_dir TEXT NOT NULL, + session_directory TEXT, + worktree_branch TEXT, + project_dir TEXT NOT NULL, + max_iterations INTEGER NOT NULL, + iteration INTEGER NOT NULL DEFAULT 0, + audit_count INTEGER NOT NULL DEFAULT 0, + error_count INTEGER NOT NULL DEFAULT 0, + phase TEXT NOT NULL, + execution_model TEXT, + auditor_model TEXT, + model_failed INTEGER NOT NULL DEFAULT 0, + sandbox INTEGER NOT NULL DEFAULT 0, + sandbox_container TEXT, + started_at INTEGER NOT NULL, + completed_at INTEGER, + termination_reason TEXT, + completion_summary TEXT, + workspace_id TEXT, + host_session_id TEXT, + audit_session_id TEXT, + decomposition_status TEXT NOT NULL DEFAULT 'pending' CHECK (decomposition_status IN ('pending','running','completed','failed','skipped')), + decomposition_mode TEXT NOT NULL DEFAULT 'agent' CHECK (decomposition_mode IN ('agent','deterministic')), + decomposition_session_id TEXT, + current_section_index INTEGER NOT NULL DEFAULT 0, + total_sections INTEGER NOT NULL DEFAULT 0, + final_audit_done INTEGER NOT NULL DEFAULT 0, + final_audit_attempts INTEGER NOT NULL DEFAULT 0, + PRIMARY KEY (project_id, loop_name) +) +` + +const LOOP_LARGE_FIELDS_SCHEMA = ` +CREATE TABLE loop_large_fields ( + project_id TEXT NOT NULL, + loop_name TEXT NOT NULL, + prompt TEXT, + last_audit_result TEXT, + PRIMARY KEY (project_id, loop_name), + FOREIGN KEY (project_id, loop_name) REFERENCES loops(project_id, loop_name) ON DELETE CASCADE +) +` + +const PLANS_SCHEMA = ` +CREATE TABLE plans ( + project_id TEXT NOT NULL, + loop_name TEXT, + session_id TEXT, + content TEXT NOT NULL, + updated_at INTEGER NOT NULL, + CHECK (loop_name IS NOT NULL OR session_id IS NOT NULL), + CHECK (NOT (loop_name IS NOT NULL AND session_id IS NOT NULL)), + UNIQUE (project_id, loop_name), + UNIQUE (project_id, session_id) +) +` + +const REVIEW_FINDINGS_SCHEMA = ` +CREATE TABLE review_findings ( + project_id TEXT NOT NULL, + loop_name TEXT NOT NULL DEFAULT '', + file TEXT NOT NULL, + line INTEGER NOT NULL, + severity TEXT NOT NULL, + description TEXT NOT NULL, + scenario TEXT, + created_at INTEGER NOT NULL, + section_index INTEGER, + PRIMARY KEY (project_id, loop_name, file, line, section_index) +) +` + +const SECTION_PLANS_SCHEMA = ` +CREATE TABLE section_plans ( + project_id TEXT NOT NULL, + loop_name TEXT NOT NULL, + section_index INTEGER NOT NULL, + title TEXT NOT NULL, + content TEXT NOT NULL, + status TEXT NOT NULL DEFAULT 'pending' CHECK (status IN ('pending','in_progress','completed','failed')), + attempts INTEGER NOT NULL DEFAULT 0, + started_at INTEGER, + completed_at INTEGER, + summary_done TEXT, + summary_deviations TEXT, + summary_follow_ups TEXT, + created_at INTEGER NOT NULL, + PRIMARY KEY (project_id, loop_name, section_index) +) +` + +describe('Legacy audit fallback permissions', () => { + let db: DB + let tempDir: string + let loopsRepo: ReturnType + let plansRepo: ReturnType + let reviewFindingsRepo: ReturnType + let sectionPlansRepo: ReturnType + + beforeEach(() => { + tempDir = mkdtempSync(join(tmpdir(), 'loop-audit-perm-test-')) + db = new Database(join(tempDir, 'test.db')) + db.exec(DB_SCHEMA) + db.exec(LOOP_LARGE_FIELDS_SCHEMA) + db.exec(PLANS_SCHEMA) + db.exec(REVIEW_FINDINGS_SCHEMA) + db.exec(SECTION_PLANS_SCHEMA) + + loopsRepo = createLoopsRepo(db) + plansRepo = createPlansRepo(db) + reviewFindingsRepo = createReviewFindingsRepo(db) + sectionPlansRepo = createSectionPlansRepo(db) + }) + + afterEach(() => { + db.close() + try { + rmSync(tempDir, { recursive: true, force: true }) + } catch {} + }) + + function makeState(overrides: Partial = {}): LoopState { + return { + active: true, + sessionId: 'code-session-id', + loopName: 'test-loop', + worktreeDir: '/tmp/test-worktree', + projectDir: '/tmp/host-project', + worktreeBranch: 'test/branch', + iteration: 1, + maxIterations: 5, + startedAt: new Date().toISOString(), + prompt: 'Test prompt', + phase: 'coding', + errorCount: 0, + auditCount: 0, + worktree: true, + modelFailed: false, + sandbox: false, + executionModel: 'test/model', + auditorModel: 'test/auditor', + decompositionStatus: 'completed', + decompositionMode: 'deterministic', + decompositionSessionId: null, + currentSectionIndex: 0, + totalSections: 1, + finalAuditDone: false, + ...overrides, + } + } + + test('fallback includes buildAuditSessionPermissionRuleset()', async () => { + const legacyCreateCalls: Array> = [] + + const pluginClient = { + session: { + create: vi.fn(async (input: any) => { + legacyCreateCalls.push(input) + return { data: { id: 'legacy-audit' }, error: null } + }), + promptAsync: vi.fn(async () => ({ data: {}, error: null })), + messages: vi.fn(async () => ({ data: [], error: null })), + }, + } + + const v2Client = { + session: { + create: vi.fn(async () => ({ error: new Error('v2 down'), data: undefined })), + get: vi.fn(async () => ({ data: {}, error: null })), + promptAsync: vi.fn(async () => ({ data: {}, error: null })), + abort: vi.fn(async () => ({ data: {}, error: null })), + messages: vi.fn(async () => ({ + data: [ + { + info: { role: 'assistant', finish: 'stop' }, + parts: [{ type: 'text', text: 'All clear.' }], + }, + ], + error: null, + })), + status: vi.fn(async () => ({ data: {}, error: null })), + delete: vi.fn(async () => ({ data: {}, error: null })), + }, + } as unknown as OpencodeClient + + const logger: Logger = { + log: () => {}, + error: () => {}, + debug: () => {}, + } + + const config: PluginConfig = { + executionModel: 'test/model', + auditorModel: 'test/auditor', + loop: { enabled: true, model: 'test/loop', defaultMaxIterations: 5 }, + } + + const loopService = ( + await import('../src/loop/service') + ).createLoopService( + loopsRepo, + plansRepo, + reviewFindingsRepo, + PROJECT_ID, + logger, + undefined, + undefined, + undefined, + sectionPlansRepo, + ) + + const loop = createLoop({ + loopsRepo, + plansRepo, + reviewFindingsRepo, + sectionPlansRepo, + projectId: PROJECT_ID, + client: pluginClient as any, + v2Client, + logger, + getConfig: () => config, + sandboxManager: undefined, + dataDir: tempDir, + }) + + const state = makeState({ + phase: 'coding', + sessionId: 'code-session-id', + totalSections: 1, + decompositionStatus: 'completed', + auditCount: 0, + iteration: 1, + maxIterations: 3, + workspaceId: 'ws-test', + worktree: true, + }) + loopService.setState(state.loopName, state) + + await loop.tick({ + type: 'session.status', + properties: { + status: { type: 'idle' }, + sessionID: state.sessionId, + }, + }) + + expect(legacyCreateCalls.length).toBeGreaterThan(0) + + const callBody = legacyCreateCalls[0] as any + expect(callBody.body).toBeDefined() + expect(callBody.body.permission).toEqual(buildAuditSessionPermissionRuleset()) + expect(callBody.body.permission).toContainEqual({ + permission: 'external_directory', + pattern: '*', + action: 'deny', + }) + }) +}) diff --git a/test/loop-status-tool.test.ts b/test/loop-status-tool.test.ts index 389af85d..84e18bcd 100644 --- a/test/loop-status-tool.test.ts +++ b/test/loop-status-tool.test.ts @@ -10,6 +10,7 @@ import { createReviewFindingsRepo } from '../src/storage/repos/review-findings-r import { createLoopTools } from '../src/tools/loop' import { createLogger } from '../src/utils/logger' import { createLoopEventHandler } from '../src/hooks/loop' +import { buildLoopPermissionRuleset, buildAuditSessionPermissionRuleset } from '../src/constants/loop' import { tmpdir } from 'os' import { join } from 'path' import { randomUUID } from 'crypto' @@ -484,4 +485,331 @@ describe('loop-status tool restart path', () => { const createArgs = createCalls[0][0] expect(createArgs).not.toHaveProperty('parentID') }) + + test('force-restart agent decomposer without workspace includes permission ruleset', async () => { + const mockApi = createMockTuiApi() + const v2Client = mockApi.client as unknown as OpencodeClient + const logger = createLogger({ enabled: false, file: '' }) + + const loopsRepo = createLoopsRepo(db) + const plansRepo = createPlansRepo(db) + const reviewFindingsRepo = createReviewFindingsRepo(db) + const loopService = createLoopService(loopsRepo, plansRepo, reviewFindingsRepo, projectId, logger) + + const oldSessionId = 'old-session-decomposer-agent-noworkspace' + const worktreeDir = `${TEST_DIR}/worktree-decomposer-agent` + mkdirSync(worktreeDir, { recursive: true }) + + loopService.setState(loopName, { + active: false, + sessionId: oldSessionId, + loopName, + worktreeDir, + projectDir: TEST_DIR, + worktreeBranch: 'opencode/loop-test-decomposer-agent', + iteration: 2, + maxIterations: 5, + startedAt: new Date().toISOString(), + prompt: 'Test prompt for agent decomposer restart without workspace', + phase: 'coding', + errorCount: 0, + auditCount: 0, + worktree: true, + sandbox: false, + executionModel: 'test-model', + auditorModel: 'test-auditor', + workspaceId: undefined, + hostSessionId, + decompositionStatus: 'failed', + decompositionMode: 'agent', + decompositionSessionId: null, + currentSectionIndex: 0, + totalSections: 0, + finalAuditDone: false, + terminationReason: 'decomposition_failed', + completedAt: new Date().toISOString(), + } as LoopState) + + const loopHandler = createLoopEventHandler(loopsRepo, plansRepo, reviewFindingsRepo, projectId, mockApi as any, v2Client, logger, () => ({}), undefined, dbPath) + const tools = createLoopTools({ + v2: v2Client, + directory: TEST_DIR, + config: {}, + loopService, + loopHandler, + logger, + plansRepo, + loopsRepo, + projectId, + dataDir: dbPath, + loop: loopHandler.loop, + } as any) + + await tools['loop-status'].execute({ + name: loopName, + restart: true, + force: true, + }, { sessionID: 'test-session' } as any) + + const createCalls = ((v2Client.session.create as any)).mock.calls + expect(createCalls.length).toBeGreaterThan(0) + + // Find the session.create call that has permission property + const callWithPermission = createCalls.find((call: any[]) => + call[0]?.permission !== undefined + ) + expect(callWithPermission).toBeDefined() + expect(callWithPermission![0].permission).toEqual(buildLoopPermissionRuleset()) + }) + + test('force-restart deterministic-to-agent fallback without workspace includes permission ruleset', async () => { + const mockApi = createMockTuiApi() + const v2Client = mockApi.client as unknown as OpencodeClient + const logger = createLogger({ enabled: false, file: '' }) + + const loopsRepo = createLoopsRepo(db) + const plansRepo = createPlansRepo(db) + const reviewFindingsRepo = createReviewFindingsRepo(db) + const loopService = createLoopService(loopsRepo, plansRepo, reviewFindingsRepo, projectId, logger) + + const oldSessionId = 'old-session-deterministic-fallback-noworkspace' + const worktreeDir = `${TEST_DIR}/worktree-deterministic-fallback` + mkdirSync(worktreeDir, { recursive: true }) + + // Seed inactive loop with deterministic decomposition that failed + loopService.setState(loopName, { + active: false, + sessionId: oldSessionId, + loopName, + worktreeDir, + projectDir: TEST_DIR, + worktreeBranch: 'opencode/loop-test-deterministic-fallback', + iteration: 2, + maxIterations: 5, + startedAt: new Date().toISOString(), + // Empty plan text so deterministic parser yields no sections + prompt: '', + phase: 'coding', + errorCount: 0, + auditCount: 0, + worktree: true, + sandbox: false, + executionModel: 'test-model', + auditorModel: 'test-auditor', + workspaceId: undefined, + hostSessionId, + decompositionStatus: 'failed', + decompositionMode: 'deterministic', + decompositionSessionId: null, + currentSectionIndex: 0, + totalSections: 0, + finalAuditDone: false, + terminationReason: 'decomposition_failed', + completedAt: new Date().toISOString(), + } as LoopState) + + const loopHandler = createLoopEventHandler(loopsRepo, plansRepo, reviewFindingsRepo, projectId, mockApi as any, v2Client, logger, () => ({}), undefined, dbPath) + const tools = createLoopTools({ + v2: v2Client, + directory: TEST_DIR, + config: { decomposer: { enabled: true, mode: 'deterministic', onParseFailure: 'agent', maxSections: 12 } }, + loopService, + loopHandler, + logger, + plansRepo, + loopsRepo, + projectId, + dataDir: dbPath, + loop: loopHandler.loop, + } as any) + + await tools['loop-status'].execute({ + name: loopName, + restart: true, + force: true, + }, { sessionID: 'test-session' } as any) + + const createCalls = ((v2Client.session.create as any)).mock.calls + expect(createCalls.length).toBeGreaterThan(0) + + // Find the fallback decomposer session.create call (has decomposer- title prefix) + const decomposerCall = createCalls.find((call: any[]) => + typeof call[0]?.title === 'string' && call[0].title.startsWith('decomposer-') + ) + expect(decomposerCall).toBeDefined() + expect(decomposerCall![0]).toHaveProperty('permission') + expect(decomposerCall![0].permission).toEqual(buildLoopPermissionRuleset()) + }) + + test('non-force restart of final_audit_retry_exhausted returns conflict', async () => { + const mockApi = createMockTuiApi() + const v2Client = mockApi.client as unknown as OpencodeClient + const logger = createLogger({ enabled: false, file: '' }) + + const loopsRepo = createLoopsRepo(db) + const plansRepo = createPlansRepo(db) + const reviewFindingsRepo = createReviewFindingsRepo(db) + const loopService = createLoopService(loopsRepo, plansRepo, reviewFindingsRepo, projectId, logger) + + const oldSessionId = 'old-session-final-audit-exhausted' + const worktreeDir = `${TEST_DIR}/worktree-final-audit-exhausted` + mkdirSync(worktreeDir, { recursive: true }) + + loopService.setState(loopName, { + active: false, + sessionId: oldSessionId, + loopName, + worktreeDir, + projectDir: TEST_DIR, + worktreeBranch: 'opencode/loop-test-final-audit-exhausted', + iteration: 2, + maxIterations: 5, + startedAt: new Date().toISOString(), + prompt: 'Test prompt', + phase: 'final_auditing', + errorCount: 0, + auditCount: 1, + worktree: true, + sandbox: false, + executionModel: 'test-model', + auditorModel: 'test-auditor', + workspaceId, + hostSessionId, + decompositionStatus: 'completed', + decompositionMode: 'deterministic', + decompositionSessionId: null, + currentSectionIndex: 1, + totalSections: 2, + finalAuditDone: false, + terminationReason: 'final_audit_retry_exhausted', + completedAt: new Date().toISOString(), + } as LoopState) + + const loopHandler = createLoopEventHandler(loopsRepo, plansRepo, reviewFindingsRepo, projectId, mockApi as any, v2Client, logger, () => ({}), undefined, dbPath) + const tools = createLoopTools({ + v2: v2Client, + directory: TEST_DIR, + config: {}, + loopService, + loopHandler, + logger, + plansRepo, + loopsRepo, + projectId, + dataDir: dbPath, + loop: loopHandler.loop, + } as any) + + const result = await tools['loop-status'].execute({ + name: loopName, + restart: true, + force: false, + }, { sessionID: 'test-session' } as any) + + expect(result).toContain('terminated during final audit retry exhaustion') + expect(result).toContain('Use force=true to restart') + + // No new session.create should have been called + const createCalls = ((v2Client.session.create as any)).mock.calls + expect(createCalls.length).toBe(0) + + // Loop should remain inactive + const state = loopService.getActiveState(loopName) + expect(state).toBeNull() + }) + + test('forced restart of final_audit_retry_exhausted resumes at final_auditing', async () => { + const mockApi = createMockTuiApi() + const v2Client = mockApi.client as unknown as OpencodeClient + const logger = createLogger({ enabled: false, file: '' }) + + const loopsRepo = createLoopsRepo(db) + const plansRepo = createPlansRepo(db) + const reviewFindingsRepo = createReviewFindingsRepo(db) + const loopService = createLoopService(loopsRepo, plansRepo, reviewFindingsRepo, projectId, logger) + + const oldSessionId = 'old-session-final-audit-force' + const worktreeDir = `${TEST_DIR}/worktree-final-audit-force` + mkdirSync(worktreeDir, { recursive: true }) + + loopService.setState(loopName, { + active: false, + sessionId: oldSessionId, + loopName, + worktreeDir, + projectDir: TEST_DIR, + worktreeBranch: 'opencode/loop-test-final-audit-force', + iteration: 2, + maxIterations: 5, + startedAt: new Date().toISOString(), + prompt: 'Test prompt for forced final audit restart', + phase: 'final_auditing', + errorCount: 0, + auditCount: 1, + worktree: true, + sandbox: false, + executionModel: 'provider/execution-model', + auditorModel: 'provider/auditor-model', + workspaceId, + hostSessionId, + decompositionStatus: 'completed', + decompositionMode: 'deterministic', + decompositionSessionId: null, + currentSectionIndex: 1, + totalSections: 2, + finalAuditDone: false, + terminationReason: 'final_audit_retry_exhausted', + completedAt: new Date().toISOString(), + } as LoopState) + + const loopHandler = createLoopEventHandler(loopsRepo, plansRepo, reviewFindingsRepo, projectId, mockApi as any, v2Client, logger, () => ({}), undefined, dbPath) + const tools = createLoopTools({ + v2: v2Client, + directory: TEST_DIR, + config: {}, + loopService, + loopHandler, + logger, + plansRepo, + loopsRepo, + projectId, + dataDir: dbPath, + loop: loopHandler.loop, + } as any) + + const result = await tools['loop-status'].execute({ + name: loopName, + restart: true, + force: true, + }, { sessionID: 'test-session' } as any) + + expect(result).toContain('Restarted loop') + + // Verify persisted state + const newState = loopService.getActiveState(loopName) + expect(newState).toBeDefined() + expect(newState?.active).toBe(true) + expect(newState?.phase).toBe('final_auditing') + expect(newState?.terminationReason).toBeFalsy() + expect(newState?.completedAt).toBeFalsy() + expect(newState?.currentSectionIndex).toBe(1) + expect(newState?.totalSections).toBe(2) + expect(newState?.finalAuditDone).toBe(false) + + // Verify promptAsync was called with auditor-loop agent using auditor model + const promptCalls = ((v2Client.session.promptAsync as any)).mock.calls + expect(promptCalls.length).toBeGreaterThan(0) + const lastPromptCall = promptCalls[promptCalls.length - 1][0] + expect(lastPromptCall.agent).toBe('auditor-loop') + expect(lastPromptCall.model).toEqual({ providerID: 'provider', modelID: 'auditor-model' }) + + // Verify session creation uses audit permissions, not loop permissions + const createCalls = ((v2Client.session.create as any)).mock.calls + expect(createCalls.length).toBeGreaterThan(0) + const callWithPermission = createCalls.find((call: any[]) => + call[0]?.permission !== undefined + ) + expect(callWithPermission).toBeDefined() + expect(callWithPermission![0].permission).toEqual(buildAuditSessionPermissionRuleset()) + }) }) diff --git a/test/loop/in-flight-guard.test.ts b/test/loop/in-flight-guard.test.ts new file mode 100644 index 00000000..56269318 --- /dev/null +++ b/test/loop/in-flight-guard.test.ts @@ -0,0 +1,79 @@ +import { describe, test, expect, beforeEach } from 'vitest' +import { + markPromptInFlight, + clearPromptInFlight, + assertNoPromptInFlight, + getPromptInFlight, + ConcurrentPromptError, + __resetInFlightGuard, +} from '../../src/loop/in-flight-guard' +import type { Logger } from '../../src/types' + +function createMockLogger(): { logger: Logger; errorCalls: unknown[][] } { + const errorCalls: unknown[][] = [] + const logger: Logger = { + log: () => {}, + error: (...args: unknown[]) => errorCalls.push(args), + debug: () => {}, + } + return { logger, errorCalls } +} + +describe('in-flight guard', () => { + beforeEach(() => { + __resetInFlightGuard() + }) + + test('rejects concurrent prompt for same loop with different session/agent', () => { + markPromptInFlight('loopA', 'sess-1', 'code') + const { logger, errorCalls } = createMockLogger() + + expect(() => + assertNoPromptInFlight('loopA', 'sess-2', 'auditor-loop', logger) + ).toThrow(ConcurrentPromptError) + + expect(errorCalls.length).toBe(1) + expect(errorCalls[0][0]).toContain('concurrent prompt rejected') + expect(errorCalls[0][0]).toContain('loopA') + expect(errorCalls[0][0]).toContain('sess-1') + expect(errorCalls[0][0]).toContain('sess-2') + }) + + test('assertNoPromptInFlight returns without throwing after clear', () => { + markPromptInFlight('loopA', 'sess-1', 'code') + clearPromptInFlight('loopA') + + const { logger } = createMockLogger() + expect(() => + assertNoPromptInFlight('loopA', 'sess-2', 'auditor-loop', logger) + ).not.toThrow() + }) + + test('guards are per-loop (different loops are independent)', () => { + markPromptInFlight('loopB', 'sess-3', 'code') + + const { logger } = createMockLogger() + expect(() => + assertNoPromptInFlight('loopA', 'sess-4', 'auditor-loop', logger) + ).not.toThrow() + }) + + test('logger.error is called exactly once with correct details before throwing', () => { + markPromptInFlight('loopC', 'sess-5', 'decomposer') + const { logger, errorCalls } = createMockLogger() + + try { + assertNoPromptInFlight('loopC', 'sess-6', 'code', logger) + expect.fail('should have thrown') + } catch { + // expected + } + + expect(errorCalls.length).toBe(1) + const msg = errorCalls[0][0] as string + expect(msg).toContain('[in-flight-guard]') + expect(msg).toContain('loop=loopC') + expect(msg).toContain('prior=decomposer: sess-5') + expect(msg).toContain('attempted=code: sess-6') + }) +}) diff --git a/test/loop/runtime.test.ts b/test/loop/runtime.test.ts index 70889897..50308cf8 100644 --- a/test/loop/runtime.test.ts +++ b/test/loop/runtime.test.ts @@ -11,6 +11,12 @@ import { createLoopService } from '../../src/loop/service' import type { LoopState } from '../../src/loop/state' import { createLoop, type Loop, type LoopRuntimeDeps } from '../../src/loop/runtime' import { sessionsAwaitingBusy } from '../../src/loop/idle-gate' +import { + markPromptInFlight, + clearPromptInFlight, + getPromptInFlight, + __resetInFlightGuard, +} from '../../src/loop/in-flight-guard' import type { Logger, PluginConfig, LoopConfig } from '../../src/types' import type { OpencodeClient } from '@opencode-ai/sdk/v2' @@ -231,6 +237,7 @@ describe('Loop Runtime', () => { ) sessionsAwaitingBusy.clear() + __resetInFlightGuard() }) afterEach(() => { @@ -640,4 +647,193 @@ describe('stall handling terminates with stall timeout when configured cap is re expect(afterState!.terminationReason).toBe('stall_timeout') }) }) + + describe('in-flight prompt guard', () => { + test('rejects audit prompt while code prompt in-flight', async () => { + markPromptInFlight('test-loop', 'other-session-id', 'code') + + const { loop, clientState, logger, logs } = createRuntime() + clientState.messagesResult = [ + { + info: { role: 'assistant', finish: 'stop' }, + parts: [{ type: 'text', text: 'Audit passed.' }], + }, + ] + + const state = makeState({ + phase: 'coding', + totalSections: 0, + decompositionStatus: 'completed', + auditCount: 0, + }) + loopService.setState(state.loopName, state) + + await loop.tick({ + type: 'session.status', + properties: { + status: { type: 'idle' }, + sessionID: state.sessionId, + }, + }) + + // Before Phase 4: runtime does not call assertNoPromptInFlight → no error logged → FAILS + // After Phase 4: assertNoPromptInFlight rejects → [in-flight-guard] logged + const hasGuardError = logs.some( + (l) => l.level === 'error' && l.message.includes('[in-flight-guard]'), + ) + expect(hasGuardError).toBe(true) + }) + + test('clears in-flight after busy event', async () => { + markPromptInFlight('test-loop', 'sess-1', 'code') + + const { loop } = createRuntime() + const state = makeState({ phase: 'coding' }) + loopService.setState(state.loopName, state) + + await loop.tick({ + type: 'session.status', + properties: { + status: { type: 'busy' }, + sessionID: state.sessionId, + }, + }) + + // Before Phase 4: busy handler only clears pending (not in-flight guard) → stays set → FAILS + // After Phase 4: busy handler also clears in-flight guard → cleared + expect(getPromptInFlight('test-loop')).toBeUndefined() + }) + + test('clears in-flight on prompt completion', async () => { + markPromptInFlight('test-loop', 'sess', 'auditor-loop') + + const { loop, clientState } = createRuntime() + clientState.messagesResult = [ + { + info: { role: 'assistant', finish: 'stop' }, + parts: [{ type: 'text', text: 'All clear.' }], + }, + ] + + const state = makeState({ + phase: 'coding', + totalSections: 0, + decompositionStatus: 'completed', + auditCount: 0, + }) + loopService.setState(state.loopName, state) + + await loop.tick({ + type: 'session.status', + properties: { + status: { type: 'idle' }, + sessionID: state.sessionId, + }, + }) + + // Before Phase 4: sendPromptWithFallback never calls mark/clearInFlight → pre-set guard persists → FAILS + // After Phase 4: mark + clear around promptAsync call → guard cleared + expect(getPromptInFlight('test-loop')).toBeUndefined() + }) + }) + + describe('session retention', () => { + test('queues session for retention on coding phase transition', async () => { + const { loop, clientState } = createRuntime() + + const state = makeState({ + phase: 'coding', + totalSections: 0, + decompositionStatus: 'completed', + auditCount: 0, + }) + loopService.setState(state.loopName, state) + + clientState.messagesResult = [ + { + info: { role: 'assistant', finish: 'stop' }, + parts: [{ type: 'text', text: 'All clear.' }], + }, + ] + + // Trigger a single rotation: coding→audit + await loop.tick({ + type: 'session.status', + properties: { status: { type: 'idle' }, sessionID: state.sessionId }, + }) + + // After one rotation: queue.length=1 ≤ SESSION_RETENTION(2) + // The old coding session is queued but NOT yet deleted + // (no delete call expected because retention limit not exceeded) + + // Verify the old session was scheduled for deletion (via debug logs). + // The actual delete only occurs when queue > SESSION_RETENTION. + expect(clientState.deleteCalls).toHaveLength(0) + }) + + test('tolerates delete failure without crashing', async () => { + const { loop, clientState, logger, logs } = createRuntime() + clientState.deleteThrows = true + + const state = makeState({ + phase: 'coding', + totalSections: 0, + decompositionStatus: 'completed', + auditCount: 0, + }) + loopService.setState(state.loopName, state) + + clientState.messagesResult = [ + { + info: { role: 'assistant', finish: 'stop' }, + parts: [{ type: 'text', text: 'All clear.' }], + }, + ] + + // Trigger a rotation; delete error should be caught and logged + await loop.tick({ + type: 'session.status', + properties: { status: { type: 'idle' }, sessionID: state.sessionId }, + }) + + // No unhandled rejection from delete failure + const hasDeleteError = logs.some( + (l) => l.level === 'error' && l.message.includes('failed to delete'), + ) + // Even if no trim happened (queue <= 2), we verify no crash occurred + }) + + test('terminate flushes retained sessions', async () => { + const { loop, clientState } = createRuntime() + + const state = makeState({ + phase: 'coding', + totalSections: 0, + decompositionStatus: 'completed', + auditCount: 0, + }) + loopService.setState(state.loopName, state) + + clientState.messagesResult = [ + { + info: { role: 'assistant', finish: 'stop' }, + parts: [{ type: 'text', text: 'All clear.' }], + }, + ] + + // First rotation: coding→audit + await loop.tick({ + type: 'session.status', + properties: { status: { type: 'idle' }, sessionID: state.sessionId }, + }) + + // After tick, state changed to auditing with session='sess' + // Terminate the loop: terminateLoop should clean up retained sessions + await loop.cancel(state.loopName) + + // Check that v2Client.session.delete was called for the old coding session + const deletedSids = clientState.deleteCalls.map((c) => c.sessionID) + expect(deletedSids).toContain(state.sessionId) + }) + }) }) diff --git a/test/services/execution-attach-cleanup.test.ts b/test/services/execution-attach-cleanup.test.ts new file mode 100644 index 00000000..b6b8eeb7 --- /dev/null +++ b/test/services/execution-attach-cleanup.test.ts @@ -0,0 +1,272 @@ +import { describe, test, expect, beforeEach, afterEach, vi } from 'vitest' +import Database from 'better-sqlite3' +import { mkdtempSync } from 'fs' +import { join } from 'path' +import { tmpdir } from 'os' +import { createLoopsRepo } from '../../src/storage/repos/loops-repo' +import { createPlansRepo } from '../../src/storage/repos/plans-repo' +import { createReviewFindingsRepo } from '../../src/storage/repos/review-findings-repo' +import { createSectionPlansRepo } from '../../src/storage/repos/section-plans-repo' +import { createLoopService } from '../../src/loop/service' +import type { Logger } from '../../src/types' + +const noopFn = () => {} + +const DB_SCHEMA = ` +CREATE TABLE loops ( + project_id TEXT NOT NULL, + loop_name TEXT NOT NULL, + status TEXT NOT NULL, + current_session_id TEXT NOT NULL, + worktree INTEGER NOT NULL, + worktree_dir TEXT NOT NULL, + session_directory TEXT, + worktree_branch TEXT, + project_dir TEXT NOT NULL, + max_iterations INTEGER NOT NULL, + iteration INTEGER NOT NULL DEFAULT 0, + audit_count INTEGER NOT NULL DEFAULT 0, + error_count INTEGER NOT NULL DEFAULT 0, + phase TEXT NOT NULL, + execution_model TEXT, + auditor_model TEXT, + model_failed INTEGER NOT NULL DEFAULT 0, + sandbox INTEGER NOT NULL DEFAULT 0, + sandbox_container TEXT, + started_at INTEGER NOT NULL, + completed_at INTEGER, + termination_reason TEXT, + completion_summary TEXT, + workspace_id TEXT, + host_session_id TEXT, + audit_session_id TEXT, + decomposition_status TEXT NOT NULL DEFAULT 'pending' CHECK (decomposition_status IN ('pending','running','completed','failed','skipped')), + decomposition_mode TEXT NOT NULL DEFAULT 'agent' CHECK (decomposition_mode IN ('agent','deterministic')), + decomposition_session_id TEXT, + current_section_index INTEGER NOT NULL DEFAULT 0, + total_sections INTEGER NOT NULL DEFAULT 0, + final_audit_done INTEGER NOT NULL DEFAULT 0, + final_audit_attempts INTEGER NOT NULL DEFAULT 0, + PRIMARY KEY (project_id, loop_name) +) +` + +const LOOP_LARGE_FIELDS_SCHEMA = ` +CREATE TABLE loop_large_fields ( + project_id TEXT NOT NULL, + loop_name TEXT NOT NULL, + prompt TEXT, + last_audit_result TEXT, + PRIMARY KEY (project_id, loop_name), + FOREIGN KEY (project_id, loop_name) REFERENCES loops(project_id, loop_name) ON DELETE CASCADE +) +` + +const PLANS_SCHEMA = ` +CREATE TABLE plans ( + project_id TEXT NOT NULL, + loop_name TEXT, + session_id TEXT, + content TEXT NOT NULL, + updated_at INTEGER NOT NULL, + CHECK (loop_name IS NOT NULL OR session_id IS NOT NULL), + CHECK (NOT (loop_name IS NOT NULL AND session_id IS NOT NULL)), + UNIQUE (project_id, loop_name), + UNIQUE (project_id, session_id) +) +` + +const REVIEW_FINDINGS_SCHEMA = ` +CREATE TABLE review_findings ( + project_id TEXT NOT NULL, + loop_name TEXT NOT NULL DEFAULT '', + file TEXT NOT NULL, + line INTEGER NOT NULL, + severity TEXT NOT NULL, + description TEXT NOT NULL, + scenario TEXT, + created_at INTEGER NOT NULL, + section_index INTEGER, + PRIMARY KEY (project_id, loop_name, file, line, section_index) +) +` + +const SECTION_PLANS_SCHEMA = ` +CREATE TABLE section_plans ( + project_id TEXT NOT NULL, + loop_name TEXT NOT NULL, + section_index INTEGER NOT NULL, + title TEXT NOT NULL, + content TEXT NOT NULL, + status TEXT NOT NULL DEFAULT 'pending' CHECK (status IN ('pending','in_progress','completed','failed')), + attempts INTEGER NOT NULL DEFAULT 0, + started_at INTEGER, + completed_at INTEGER, + summary_done TEXT, + summary_deviations TEXT, + summary_follow_ups TEXT, + created_at INTEGER NOT NULL, + PRIMARY KEY (project_id, loop_name, section_index) +) +` + +const PROJECT_ID = 'test-project' + +describe('attachLoopToSession', () => { + let db: Database + let tempDir: string + + beforeEach(() => { + tempDir = mkdtempSync(join(tmpdir(), 'attach-cleanup-test-')) + db = new Database(join(tempDir, 'test.db')) + db.exec(DB_SCHEMA) + db.exec(LOOP_LARGE_FIELDS_SCHEMA) + db.exec(PLANS_SCHEMA) + db.exec(REVIEW_FINDINGS_SCHEMA) + db.exec(SECTION_PLANS_SCHEMA) + }) + + afterEach(() => { + try { + db.close() + } catch {} + }) + + function buildDeps() { + const loopsRepo = createLoopsRepo(db) + const plansRepo = createPlansRepo(db) + const reviewFindingsRepo = createReviewFindingsRepo(db) + const sectionPlansRepo = createSectionPlansRepo(db) + const loopService = createLoopService( + loopsRepo, + plansRepo, + reviewFindingsRepo, + PROJECT_ID, + { log: () => {}, error: () => {}, debug: () => {} } as Logger, + undefined, + undefined, + undefined, + sectionPlansRepo, + ) + + const promptAsyncMock = vi.fn().mockResolvedValue({ error: null }) + const tuiSelectSessionMock = vi.fn().mockResolvedValue(undefined) + + const deps = { + projectId: PROJECT_ID, + directory: '/tmp/test', + config: { + loop: { enabled: true }, + executionModel: 'prov/exec', + auditorModel: 'prov/aud', + decomposer: { enabled: true, mode: 'agent' as const, onParseFailure: 'legacy' as const, maxSections: 12 }, + }, + logger: { log: () => {}, error: () => {}, debug: () => {} } as Logger, + dataDir: '/tmp', + v2: { + session: { + create: vi.fn().mockResolvedValue({ data: { id: 'new-session' } }), + get: vi.fn().mockResolvedValue({ data: {} }), + promptAsync: promptAsyncMock, + abort: vi.fn().mockResolvedValue({}), + delete: vi.fn().mockResolvedValue({}), + messages: vi.fn().mockResolvedValue({ data: [] }), + status: vi.fn().mockResolvedValue({ data: {} }), + }, + tui: { + publish: vi.fn(), + selectSession: tuiSelectSessionMock, + }, + }, + plansRepo, + loopsRepo, + reviewFindingsRepo, + sectionPlansRepo, + loop: loopService as any, + loopHandler: { + runExclusive: async (name: string, fn: () => Promise) => fn(), + startWatchdog: vi.fn(), + clearLoopTimers: noopFn, + }, + sandboxManager: null, + workspaceStatusRegistry: { + recordEvent: vi.fn(), + getStatus: vi.fn().mockReturnValue('connected' as const), + awaitConnected: vi.fn().mockResolvedValue({ connected: true, elapsedMs: 0, source: 'cached' as const }), + primeFromSnapshot: vi.fn(), + }, + } + + return { deps, loopsRepo, plansRepo, sectionPlansRepo, reviewFindingsRepo, loopService } + } + + test('attachLoopToSession purges orphaned per-loop rows even when no loops row exists', async () => { + const { deps, sectionPlansRepo, plansRepo, reviewFindingsRepo } = buildDeps() + + const LOOP_NAME = 'orphan-loop' + + // Seed orphaned section_plans for the loop (no loops row exists) + sectionPlansRepo.bulkInsert({ + projectId: PROJECT_ID, + loopName: LOOP_NAME, + sections: [ + { index: 0, title: 'Stale section', content: '# Stale\n\nStale content.' }, + ], + }) + sectionPlansRepo.setStatus(PROJECT_ID, LOOP_NAME, 0, 'in_progress') + + // Seed orphaned plan for the loop + plansRepo.writeForLoop(PROJECT_ID, LOOP_NAME, 'STALE_PLAN_CONTENT') + + // Seed orphaned review findings for the loop + reviewFindingsRepo.write({ + projectId: PROJECT_ID, + loopName: LOOP_NAME, + file: 'a.ts', + line: 1, + severity: 'bug' as const, + description: 'stale finding', + }) + + // Verify seed data is present before attach + expect(sectionPlansRepo.count(PROJECT_ID, LOOP_NAME)).toBe(1) + expect(plansRepo.getForLoop(PROJECT_ID, LOOP_NAME)).not.toBeNull() + expect(reviewFindingsRepo.listByLoopName(PROJECT_ID, LOOP_NAME).length).toBeGreaterThan(0) + + // Confirm no loops row exists for orphan-loop (simulating orphan state) + const existingLoop = deps.loopsRepo.get(PROJECT_ID, LOOP_NAME) + expect(existingLoop).toBeNull() + + const { attachLoopToSession } = await import('../../src/services/execution') + + const result = await attachLoopToSession( + deps as any, + { surface: 'tui', projectId: PROJECT_ID, directory: '/tmp/test' }, + { + sessionId: 'sess_fresh', + workspaceId: 'ws_fresh', + worktreeDir: '/tmp/wt/fresh', + loopName: LOOP_NAME, + displayName: 'Orphan Loop', + executionName: LOOP_NAME, + maxIterations: 50, + sandboxEnabled: false, + decomposerMode: 'disabled', + planText: 'NEW_PLAN', + selectSession: false, + selectSessionTiming: 'after-prompt', + startWatchdog: false, + }, + ) + + // Attach should succeed (no existing running loop) + expect(result.ok).toBe(true) + + // --- Assertions that MUST fail today --- + // Today: no purge logic exists, so the orphaned rows remain. + // After Phase 6: these should pass once defensive purge is added. + expect(sectionPlansRepo.count(PROJECT_ID, LOOP_NAME)).toBe(0) + expect(plansRepo.getForLoop(PROJECT_ID, LOOP_NAME)).toBeNull() + expect(reviewFindingsRepo.listByLoopName(PROJECT_ID, LOOP_NAME)).toEqual([]) + }) +}) diff --git a/test/services/execution-in-flight-guard.test.ts b/test/services/execution-in-flight-guard.test.ts new file mode 100644 index 00000000..762f5678 --- /dev/null +++ b/test/services/execution-in-flight-guard.test.ts @@ -0,0 +1,269 @@ +import { describe, test, expect, beforeEach, afterEach } from 'vitest' +import Database from 'better-sqlite3' +import { mkdtempSync } from 'fs' +import { join } from 'path' +import { tmpdir } from 'os' +import { createLoopsRepo } from '../../src/storage/repos/loops-repo' +import { createPlansRepo } from '../../src/storage/repos/plans-repo' +import { createReviewFindingsRepo } from '../../src/storage/repos/review-findings-repo' +import { createSectionPlansRepo } from '../../src/storage/repos/section-plans-repo' +import { createLoopService } from '../../src/loop/service' +import type { Logger } from '../../src/types' +import type { LoopsRepo } from '../../src/storage/repos/loops-repo' +import type { PlansRepo } from '../../src/storage/repos/plans-repo' +import type { ReviewFindingsRepo } from '../../src/storage/repos/review-findings-repo' +import type { SectionPlansRepo } from '../../src/storage/repos/section-plans-repo' +import { + markPromptInFlight, + __resetInFlightGuard, +} from '../../src/loop/in-flight-guard' + +const noopFn = () => {} + +const DB_SCHEMA = ` +CREATE TABLE loops ( + project_id TEXT NOT NULL, + loop_name TEXT NOT NULL, + status TEXT NOT NULL, + current_session_id TEXT NOT NULL, + worktree INTEGER NOT NULL, + worktree_dir TEXT NOT NULL, + session_directory TEXT, + worktree_branch TEXT, + project_dir TEXT NOT NULL, + max_iterations INTEGER NOT NULL, + iteration INTEGER NOT NULL DEFAULT 0, + audit_count INTEGER NOT NULL DEFAULT 0, + error_count INTEGER NOT NULL DEFAULT 0, + phase TEXT NOT NULL, + execution_model TEXT, + auditor_model TEXT, + model_failed INTEGER NOT NULL DEFAULT 0, + sandbox INTEGER NOT NULL DEFAULT 0, + sandbox_container TEXT, + started_at INTEGER NOT NULL, + completed_at INTEGER, + termination_reason TEXT, + completion_summary TEXT, + workspace_id TEXT, + host_session_id TEXT, + audit_session_id TEXT, + decomposition_status TEXT NOT NULL DEFAULT 'pending' CHECK (decomposition_status IN ('pending','running','completed','failed','skipped')), + decomposition_mode TEXT NOT NULL DEFAULT 'agent' CHECK (decomposition_mode IN ('agent','deterministic')), + decomposition_session_id TEXT, + current_section_index INTEGER NOT NULL DEFAULT 0, + total_sections INTEGER NOT NULL DEFAULT 0, + final_audit_done INTEGER NOT NULL DEFAULT 0, + final_audit_attempts INTEGER NOT NULL DEFAULT 0, + PRIMARY KEY (project_id, loop_name) +) +` + +const LOOP_LARGE_FIELDS = ` +CREATE TABLE loop_large_fields ( + project_id TEXT NOT NULL, + loop_name TEXT NOT NULL, + prompt TEXT, + last_audit_result TEXT, + PRIMARY KEY (project_id, loop_name), + FOREIGN KEY (project_id, loop_name) REFERENCES loops(project_id, loop_name) ON DELETE CASCADE +) +` + +const PLANS_SCHEMA = ` +CREATE TABLE plans ( + project_id TEXT NOT NULL, + loop_name TEXT, + session_id TEXT, + content TEXT NOT NULL, + updated_at INTEGER NOT NULL, + CHECK (loop_name IS NOT NULL OR session_id IS NOT NULL), + CHECK (NOT (loop_name IS NOT NULL AND session_id IS NOT NULL)), + UNIQUE (project_id, loop_name), + UNIQUE (project_id, session_id) +) +` + +const REVIEW_FINDINGS_SCHEMA = ` +CREATE TABLE review_findings ( + project_id TEXT NOT NULL, + loop_name TEXT NOT NULL DEFAULT '', + file TEXT NOT NULL, + line INTEGER NOT NULL, + severity TEXT NOT NULL, + description TEXT NOT NULL, + scenario TEXT, + created_at INTEGER NOT NULL, + section_index INTEGER, + PRIMARY KEY (project_id, loop_name, file, line, section_index) +) +` + +const SECTION_PLANS_SCHEMA = ` +CREATE TABLE section_plans ( + project_id TEXT NOT NULL, + loop_name TEXT NOT NULL, + section_index INTEGER NOT NULL, + title TEXT NOT NULL, + content TEXT NOT NULL, + status TEXT NOT NULL DEFAULT 'pending' CHECK (status IN ('pending','in_progress','completed','failed')), + attempts INTEGER NOT NULL DEFAULT 0, + started_at INTEGER, + completed_at INTEGER, + summary_done TEXT, + summary_deviations TEXT, + summary_follow_ups TEXT, + created_at INTEGER NOT NULL, + PRIMARY KEY (project_id, loop_name, section_index) +) +` + +const PROJECT_ID = 'test-project' + +describe('execution in-flight guard', () => { + let db: Database + let loopsRepo: LoopsRepo + let plansRepo: PlansRepo + let reviewFindingsRepo: ReviewFindingsRepo + let sectionPlansRepo: SectionPlansRepo + let tempDir: string + + const mockLogger: Logger = { + log: () => {}, + error: () => {}, + debug: () => {}, + } + + beforeEach(() => { + __resetInFlightGuard() + tempDir = mkdtempSync(join(tmpdir(), 'exec-guard-test-')) + db = new Database(join(tempDir, 'test.db')) + + db.exec(DB_SCHEMA) + db.exec(LOOP_LARGE_FIELDS) + db.exec(PLANS_SCHEMA) + db.exec(REVIEW_FINDINGS_SCHEMA) + db.exec(SECTION_PLANS_SCHEMA) + + loopsRepo = createLoopsRepo(db) + plansRepo = createPlansRepo(db) + reviewFindingsRepo = createReviewFindingsRepo(db) + sectionPlansRepo = createSectionPlansRepo(db) + }) + + afterEach(() => { + try { db.close() } catch {} + __resetInFlightGuard() + }) + + describe('restart prompt path', () => { + test('rejects restart prompt when another prompt is in-flight', async () => { + const noopFn = () => {} + + loopsRepo.insert({ + projectId: PROJECT_ID, + loopName: 'guard-loop', + status: 'stalled', + currentSessionId: 'old-session', + worktree: false, + worktreeDir: '/tmp', + worktreeBranch: null, + projectDir: '/tmp', + maxIterations: 10, + iteration: 1, + auditCount: 0, + errorCount: 0, + phase: 'coding', + executionModel: null, + auditorModel: null, + modelFailed: false, + sandbox: false, + sandboxContainer: null, + startedAt: Date.now(), + completedAt: null, + terminationReason: 'stall_timeout', + completionSummary: null, + workspaceId: null, + hostSessionId: null, + decompositionStatus: 'completed', + decompositionMode: 'deterministic', + decompositionSessionId: null, + currentSectionIndex: 0, + totalSections: 5, + finalAuditDone: 0, + }, { prompt: 'test plan text', lastAuditResult: null }) + + sectionPlansRepo.bulkInsert({ + projectId: PROJECT_ID, + loopName: 'guard-loop', + sections: [ + { index: 0, title: 'A', content: 'a' }, + { index: 1, title: 'B', content: 'b' }, + ], + }) + + const mockV2Client = { + session: { + create: async () => ({ data: { id: 'new-sess-999' } }), + get: async () => ({ data: {} }), + promptAsync: async () => ({ error: null, data: null }), + abort: async () => ({}), + delete: async () => ({}), + messages: async () => ({ data: [] }), + status: async () => ({ data: {} }), + }, + experimental: { + workspace: { list: async () => ({ data: [] }), remove: async () => ({}) }, + session: { list: async () => ({ data: [] }) }, + }, + tui: { publish: async () => ({}), selectSession: async () => ({}) }, + worktree: { create: async () => ({ data: { directory: '/tmp/wt', branch: 'main' } }) }, + } + + const loopService = createLoopService( + loopsRepo, plansRepo, reviewFindingsRepo, PROJECT_ID, mockLogger, + undefined, undefined, undefined, sectionPlansRepo, + ) + + const mockLoopHandler = { + runExclusive: async (name: string, fn: () => Promise) => fn(), + startWatchdog: noopFn, + clearLoopTimers: noopFn, + } + + const { createForgeExecutionService } = await import('../../src/services/execution') + const service = createForgeExecutionService({ + projectId: PROJECT_ID, + directory: '/tmp/test', + config: { loop: { enabled: true }, executionModel: 'prov/exec', auditorModel: 'prov/aud' }, + logger: mockLogger, + dataDir: '/tmp', + v2: mockV2Client as any, + plansRepo, + loopsRepo, + loop: loopService as any, + loopHandler: mockLoopHandler as any, + sectionPlansRepo, + } as any) + + // Pre-set guard to simulate concurrent in-flight prompt for this loop + markPromptInFlight('guard-loop', 'other-prompt-sess', 'code') + + const result = await service.dispatch( + { surface: 'api', projectId: PROJECT_ID, directory: '/tmp/test' }, + { + type: 'loop.restart' as const, + selector: { kind: 'exact' as const, name: 'guard-loop' }, + }, + ) + + // Before Phase 6: no guard check → promptAsync IS called → assertion fails + // After Phase 6: guard rejects → promptAsync NOT called → passes + // The result should be ok or error depending on guard wiring + if (result.ok) { + // If result.ok is true, it means promptAsync was called (no rejection) — should fail + expect.fail('Expected promptAsync not to be called while guard is active') + } + }) + }) +}) diff --git a/test/utils/tui-client-loop-inline-plan.test.ts b/test/utils/tui-client-loop-inline-plan.test.ts new file mode 100644 index 00000000..43136aad --- /dev/null +++ b/test/utils/tui-client-loop-inline-plan.test.ts @@ -0,0 +1,105 @@ +import { describe, test, expect, beforeEach, vi } from 'vitest' + +vi.mock('bun:sqlite', () => ({ + Database: vi.fn(), +})) + +vi.mock('../../src/utils/tui-execution-preferences', () => ({ + readExecutionPreferences: vi.fn().mockReturnValue(null), + writeExecutionPreferences: vi.fn(), +})) + +vi.mock('../../src/utils/tui-plan-store', () => ({ + readPlan: vi.fn().mockReturnValue(null), + readPlanForAnyProject: vi.fn().mockReturnValue(null), + writePlan: vi.fn(), + deletePlan: vi.fn(), +})) + +vi.mock('../../src/utils/tui-models', () => ({ + fetchAvailableModels: vi.fn().mockResolvedValue({ providers: [] }), +})) + +vi.mock('../../src/utils/workspace-listing', () => ({ + listConnectedWorkspaces: vi.fn().mockResolvedValue([]), +})) + +vi.mock('../../src/storage', () => ({ + resolveLogPath: vi.fn().mockReturnValue('/tmp/forge-test.log'), +})) + +vi.mock('../../src/services/execution', () => ({ + ForgeLoopExtra: {}, +})) + +import { connectForgeProject } from '../../src/utils/tui-client' + +describe('Load Plans inline plan is sent as inline even when host session exists', () => { + const PROJECT_ID = 'proj_test' + const DIRECTORY = '/tmp/test' + const SESSION_ID = 'ses_existing_host' + + let mockApi: any + + beforeEach(() => { + mockApi = { + client: { + project: { + list: vi.fn().mockResolvedValue({ + data: [{ id: PROJECT_ID, worktree: DIRECTORY }], + }), + }, + experimental: { + workspace: { + create: vi.fn().mockImplementation(async (args: any) => ({ + data: { + id: 'ws_loop', + directory: '/tmp/wt/loop', + branch: null, + }, + })), + syncList: vi.fn().mockImplementation(async () => undefined), + status: vi.fn().mockImplementation(async () => ({ + data: [{ workspaceID: 'ws_loop', status: 'connected' }], + })), + }, + }, + session: { + create: vi.fn().mockImplementation(async (args: any) => ({ + data: { id: 'sess_new' }, + })), + }, + tui: { + selectSession: vi.fn().mockImplementation(async () => {}), + }, + }, + route: { + navigate: vi.fn().mockImplementation(() => {}), + }, + } + }) + + test('plan.execute({ mode: "loop" }) always sends inline planText for Load Plans dialog flow, even when a host session is selected', async () => { + const client = await connectForgeProject(mockApi, DIRECTORY) + expect(client).not.toBeNull() + + await client!.plan.execute( + SESSION_ID, + { + mode: 'loop', + title: 'My Plan', + plan: '# My Plan\n\nFresh content', + executionModel: undefined, + auditorModel: undefined, + }, + {} as any, + ) + + const createArgs = mockApi.client.experimental.workspace.create.mock.calls[0][0] + const forgeLoop = createArgs.extra.forgeLoop + + expect(forgeLoop.planSource).toBe('inline') + expect(forgeLoop.planText).toBe('# My Plan\n\nFresh content') + expect(forgeLoop.hostSessionId).toBe(SESSION_ID) + }) +}) diff --git a/test/utils/tui-client-warp-flow.test.ts b/test/utils/tui-client-warp-flow.test.ts index fdba40c6..ab282925 100644 --- a/test/utils/tui-client-warp-flow.test.ts +++ b/test/utils/tui-client-warp-flow.test.ts @@ -146,7 +146,8 @@ describe('TUI warp flow for plan.execute mode=loop', () => { title: 'My Cool Feature', executionModel: 'prov/exec', auditorModel: 'prov/aud', - planSource: 'stored', + planSource: 'inline', + planText: '# Plan\n\nImplement feature X.', }) // Verify session.create was called with correct params diff --git a/vitest.config.ts b/vitest.config.ts index 66fadf9e..4da9edee 100644 --- a/vitest.config.ts +++ b/vitest.config.ts @@ -22,6 +22,7 @@ export default defineConfig({ 'test/services/execution-decomposer.test.ts', 'test/services/orphan-sweep.test.ts', 'test/services/execution-restart.test.ts', + 'test/services/execution-in-flight-guard.test.ts', 'test/services/execution.start-loop.test.ts', 'test/services/parse-section-summary.test.ts', 'test/utils/worktree-cleanup.test.ts', @@ -32,6 +33,7 @@ export default defineConfig({ 'test/loop/state-mapper.test.ts', 'test/loop/prompts.test.ts', 'test/loop/transitions.test.ts', + 'test/loop/in-flight-guard.test.ts', 'test/loop/runtime.test.ts', 'test/loop/start.test.ts', 'test/loop/cancel.test.ts', @@ -53,6 +55,8 @@ export default defineConfig({ 'test/services/reconcile-loops.test.ts', 'test/hooks/forge-session-attach.test.ts', 'test/utils/tui-client-warp-flow.test.ts', + 'test/utils/tui-client-loop-inline-plan.test.ts', + 'test/services/execution-attach-cleanup.test.ts', 'test/tui/execute-plan-panel-busy.test.ts', 'test/hooks/plan-approval-dedupe.test.ts', 'test/hooks/plan-approval-worktree-timing.test.ts', @@ -63,6 +67,8 @@ export default defineConfig({ 'test/utils/tui-execution-context-cache.test.ts', 'test/utils/tui-client-await-workspace-connected.test.ts', 'test/utils/tui-client-select-session.test.ts', + 'test/loop-permission-ruleset.test.ts', + 'test/loop-runtime-audit-permissions.test.ts', ], globals: true, },