fix: handle CLI session expired errors gracefully instead of crashing gateway (#31090)

* fix: handle CLI session expired errors gracefully

- Add session_expired to FailoverReason type
- Add isCliSessionExpiredErrorMessage to detect expired CLI sessions
- Modify runCliAgent to retry with new session when session expires
- Update agentCommand to clear expired session IDs from session store
- Add proper error handling to prevent gateway crashes on expired sessions

Fixes #30986

* fix: add session_expired to AuthProfileFailureReason and missing log import

* fix: type cli-runner usage field to match EmbeddedPiAgentMeta

* fix: harden CLI session-expiry recovery handling

* build: regenerate host env security policy swift

---------

Co-authored-by: Peter Steinberger <steipete@gmail.com>
This commit is contained in:
Frank Yang
2026-03-02 09:11:05 +08:00
committed by GitHub
parent a95c8077e8
commit ed86252aa5
9 changed files with 481 additions and 206 deletions

View File

@@ -22,17 +22,17 @@ enum HostEnvSecurityPolicy {
"PS4",
"GCONV_PATH",
"IFS",
"SSLKEYLOGFILE",
"SSLKEYLOGFILE"
]
static let blockedOverrideKeys: Set<String> = [
"HOME",
"ZDOTDIR",
"ZDOTDIR"
]
static let blockedPrefixes: [String] = [
"DYLD_",
"LD_",
"BASH_FUNC_",
"BASH_FUNC_"
]
}

View File

@@ -43,6 +43,7 @@ export type AuthProfileFailureReason =
| "billing"
| "timeout"
| "model_not_found"
| "session_expired"
| "unknown";
/** Per-profile usage statistics for round-robin and cooldown tracking */

View File

@@ -153,6 +153,50 @@ describe("runCliAgent with process supervisor", () => {
).rejects.toThrow("exceeded timeout");
});
it("rethrows the retry failure when session-expired recovery retry also fails", async () => {
supervisorSpawnMock.mockResolvedValueOnce(
createManagedRun({
reason: "exit",
exitCode: 1,
exitSignal: null,
durationMs: 150,
stdout: "",
stderr: "session expired",
timedOut: false,
noOutputTimedOut: false,
}),
);
supervisorSpawnMock.mockResolvedValueOnce(
createManagedRun({
reason: "exit",
exitCode: 1,
exitSignal: null,
durationMs: 150,
stdout: "",
stderr: "rate limit exceeded",
timedOut: false,
noOutputTimedOut: false,
}),
);
await expect(
runCliAgent({
sessionId: "s1",
sessionKey: "agent:main:subagent:retry",
sessionFile: "/tmp/session.jsonl",
workspaceDir: "/tmp",
prompt: "hi",
provider: "codex-cli",
model: "gpt-5.2-codex",
timeoutMs: 1_000,
runId: "run-retry-failure",
cliSessionId: "thread-123",
}),
).rejects.toThrow("rate limit exceeded");
expect(supervisorSpawnMock).toHaveBeenCalledTimes(2);
});
it("falls back to per-agent workspace when workspaceDir is missing", async () => {
const tempDir = await fs.mkdtemp(path.join(os.tmpdir(), "openclaw-cli-runner-"));
const fallbackWorkspace = path.join(tempDir, "workspace-main");

View File

@@ -122,204 +122,221 @@ export async function runCliAgent(params: {
agentId: sessionAgentId,
});
const { sessionId: cliSessionIdToSend, isNew } = resolveSessionIdToSend({
backend,
cliSessionId: params.cliSessionId,
});
const useResume = Boolean(
params.cliSessionId &&
cliSessionIdToSend &&
backend.resumeArgs &&
backend.resumeArgs.length > 0,
);
const sessionIdSent = cliSessionIdToSend
? useResume || Boolean(backend.sessionArg) || Boolean(backend.sessionArgs?.length)
? cliSessionIdToSend
: undefined
: undefined;
const systemPromptArg = resolveSystemPromptUsage({
backend,
isNewSession: isNew,
systemPrompt,
});
let imagePaths: string[] | undefined;
let cleanupImages: (() => Promise<void>) | undefined;
let prompt = params.prompt;
if (params.images && params.images.length > 0) {
const imagePayload = await writeCliImages(params.images);
imagePaths = imagePayload.paths;
cleanupImages = imagePayload.cleanup;
if (!backend.imageArg) {
prompt = appendImagePathsToPrompt(prompt, imagePaths);
}
}
const { argsPrompt, stdin } = resolvePromptInput({
backend,
prompt,
});
const stdinPayload = stdin ?? "";
const baseArgs = useResume ? (backend.resumeArgs ?? backend.args ?? []) : (backend.args ?? []);
const resolvedArgs = useResume
? baseArgs.map((entry) => entry.replaceAll("{sessionId}", cliSessionIdToSend ?? ""))
: baseArgs;
const args = buildCliArgs({
backend,
baseArgs: resolvedArgs,
modelId: normalizedModel,
sessionId: cliSessionIdToSend,
systemPrompt: systemPromptArg,
imagePaths,
promptArg: argsPrompt,
useResume,
});
const serialize = backend.serialize ?? true;
const queueKey = serialize ? backendResolved.id : `${backendResolved.id}:${params.runId}`;
try {
const output = await enqueueCliRun(queueKey, async () => {
log.info(
`cli exec: provider=${params.provider} model=${normalizedModel} promptChars=${params.prompt.length}`,
);
const logOutputText = isTruthyEnvValue(process.env.OPENCLAW_CLAUDE_CLI_LOG_OUTPUT);
if (logOutputText) {
const logArgs: string[] = [];
for (let i = 0; i < args.length; i += 1) {
const arg = args[i] ?? "";
if (arg === backend.systemPromptArg) {
const systemPromptValue = args[i + 1] ?? "";
logArgs.push(arg, `<systemPrompt:${systemPromptValue.length} chars>`);
i += 1;
continue;
}
if (arg === backend.sessionArg) {
logArgs.push(arg, args[i + 1] ?? "");
i += 1;
continue;
}
if (arg === backend.modelArg) {
logArgs.push(arg, args[i + 1] ?? "");
i += 1;
continue;
}
if (arg === backend.imageArg) {
logArgs.push(arg, "<image>");
i += 1;
continue;
}
logArgs.push(arg);
}
if (argsPrompt) {
const promptIndex = logArgs.indexOf(argsPrompt);
if (promptIndex >= 0) {
logArgs[promptIndex] = `<prompt:${argsPrompt.length} chars>`;
}
}
log.info(`cli argv: ${backend.command} ${logArgs.join(" ")}`);
}
const env = (() => {
const next = { ...process.env, ...backend.env };
for (const key of backend.clearEnv ?? []) {
delete next[key];
}
return next;
})();
const noOutputTimeoutMs = resolveCliNoOutputTimeoutMs({
backend,
timeoutMs: params.timeoutMs,
useResume,
});
const supervisor = getProcessSupervisor();
const scopeKey = buildCliSupervisorScopeKey({
backend,
backendId: backendResolved.id,
cliSessionId: useResume ? cliSessionIdToSend : undefined,
});
const managedRun = await supervisor.spawn({
sessionId: params.sessionId,
backendId: backendResolved.id,
scopeKey,
replaceExistingScope: Boolean(useResume && scopeKey),
mode: "child",
argv: [backend.command, ...args],
timeoutMs: params.timeoutMs,
noOutputTimeoutMs,
cwd: workspaceDir,
env,
input: stdinPayload,
});
const result = await managedRun.wait();
const stdout = result.stdout.trim();
const stderr = result.stderr.trim();
if (logOutputText) {
if (stdout) {
log.info(`cli stdout:\n${stdout}`);
}
if (stderr) {
log.info(`cli stderr:\n${stderr}`);
}
}
if (shouldLogVerbose()) {
if (stdout) {
log.debug(`cli stdout:\n${stdout}`);
}
if (stderr) {
log.debug(`cli stderr:\n${stderr}`);
}
}
if (result.exitCode !== 0 || result.reason !== "exit") {
if (result.reason === "no-output-timeout" || result.noOutputTimedOut) {
const timeoutReason = `CLI produced no output for ${Math.round(noOutputTimeoutMs / 1000)}s and was terminated.`;
log.warn(
`cli watchdog timeout: provider=${params.provider} model=${modelId} session=${cliSessionIdToSend ?? params.sessionId} noOutputTimeoutMs=${noOutputTimeoutMs} pid=${managedRun.pid ?? "unknown"}`,
);
throw new FailoverError(timeoutReason, {
reason: "timeout",
provider: params.provider,
model: modelId,
status: resolveFailoverStatus("timeout"),
});
}
if (result.reason === "overall-timeout") {
const timeoutReason = `CLI exceeded timeout (${Math.round(params.timeoutMs / 1000)}s) and was terminated.`;
throw new FailoverError(timeoutReason, {
reason: "timeout",
provider: params.provider,
model: modelId,
status: resolveFailoverStatus("timeout"),
});
}
const err = stderr || stdout || "CLI failed.";
const reason = classifyFailoverReason(err) ?? "unknown";
const status = resolveFailoverStatus(reason);
throw new FailoverError(err, {
reason,
provider: params.provider,
model: modelId,
status,
});
}
const outputMode = useResume ? (backend.resumeOutput ?? backend.output) : backend.output;
if (outputMode === "text") {
return { text: stdout, sessionId: undefined };
}
if (outputMode === "jsonl") {
const parsed = parseCliJsonl(stdout, backend);
return parsed ?? { text: stdout };
}
const parsed = parseCliJson(stdout, backend);
return parsed ?? { text: stdout };
// Helper function to execute CLI with given session ID
const executeCliWithSession = async (
cliSessionIdToUse?: string,
): Promise<{
text: string;
sessionId?: string;
usage?: {
input?: number;
output?: number;
cacheRead?: number;
cacheWrite?: number;
total?: number;
};
}> => {
const { sessionId: resolvedSessionId, isNew } = resolveSessionIdToSend({
backend,
cliSessionId: cliSessionIdToUse,
});
const useResume = Boolean(
cliSessionIdToUse && resolvedSessionId && backend.resumeArgs && backend.resumeArgs.length > 0,
);
const systemPromptArg = resolveSystemPromptUsage({
backend,
isNewSession: isNew,
systemPrompt,
});
let imagePaths: string[] | undefined;
let cleanupImages: (() => Promise<void>) | undefined;
let prompt = params.prompt;
if (params.images && params.images.length > 0) {
const imagePayload = await writeCliImages(params.images);
imagePaths = imagePayload.paths;
cleanupImages = imagePayload.cleanup;
if (!backend.imageArg) {
prompt = appendImagePathsToPrompt(prompt, imagePaths);
}
}
const { argsPrompt, stdin } = resolvePromptInput({
backend,
prompt,
});
const stdinPayload = stdin ?? "";
const baseArgs = useResume ? (backend.resumeArgs ?? backend.args ?? []) : (backend.args ?? []);
const resolvedArgs = useResume
? baseArgs.map((entry) => entry.replaceAll("{sessionId}", resolvedSessionId ?? ""))
: baseArgs;
const args = buildCliArgs({
backend,
baseArgs: resolvedArgs,
modelId: normalizedModel,
sessionId: resolvedSessionId,
systemPrompt: systemPromptArg,
imagePaths,
promptArg: argsPrompt,
useResume,
});
const serialize = backend.serialize ?? true;
const queueKey = serialize ? backendResolved.id : `${backendResolved.id}:${params.runId}`;
try {
const output = await enqueueCliRun(queueKey, async () => {
log.info(
`cli exec: provider=${params.provider} model=${normalizedModel} promptChars=${params.prompt.length}`,
);
const logOutputText = isTruthyEnvValue(process.env.OPENCLAW_CLAUDE_CLI_LOG_OUTPUT);
if (logOutputText) {
const logArgs: string[] = [];
for (let i = 0; i < args.length; i += 1) {
const arg = args[i] ?? "";
if (arg === backend.systemPromptArg) {
const systemPromptValue = args[i + 1] ?? "";
logArgs.push(arg, `<systemPrompt:${systemPromptValue.length} chars>`);
i += 1;
continue;
}
if (arg === backend.sessionArg) {
logArgs.push(arg, args[i + 1] ?? "");
i += 1;
continue;
}
if (arg === backend.modelArg) {
logArgs.push(arg, args[i + 1] ?? "");
i += 1;
continue;
}
if (arg === backend.imageArg) {
logArgs.push(arg, "<image>");
i += 1;
continue;
}
logArgs.push(arg);
}
if (argsPrompt) {
const promptIndex = logArgs.indexOf(argsPrompt);
if (promptIndex >= 0) {
logArgs[promptIndex] = `<prompt:${argsPrompt.length} chars>`;
}
}
log.info(`cli argv: ${backend.command} ${logArgs.join(" ")}`);
}
const env = (() => {
const next = { ...process.env, ...backend.env };
for (const key of backend.clearEnv ?? []) {
delete next[key];
}
return next;
})();
const noOutputTimeoutMs = resolveCliNoOutputTimeoutMs({
backend,
timeoutMs: params.timeoutMs,
useResume,
});
const supervisor = getProcessSupervisor();
const scopeKey = buildCliSupervisorScopeKey({
backend,
backendId: backendResolved.id,
cliSessionId: useResume ? resolvedSessionId : undefined,
});
const managedRun = await supervisor.spawn({
sessionId: params.sessionId,
backendId: backendResolved.id,
scopeKey,
replaceExistingScope: Boolean(useResume && scopeKey),
mode: "child",
argv: [backend.command, ...args],
timeoutMs: params.timeoutMs,
noOutputTimeoutMs,
cwd: workspaceDir,
env,
input: stdinPayload,
});
const result = await managedRun.wait();
const stdout = result.stdout.trim();
const stderr = result.stderr.trim();
if (logOutputText) {
if (stdout) {
log.info(`cli stdout:\n${stdout}`);
}
if (stderr) {
log.info(`cli stderr:\n${stderr}`);
}
}
if (shouldLogVerbose()) {
if (stdout) {
log.debug(`cli stdout:\n${stdout}`);
}
if (stderr) {
log.debug(`cli stderr:\n${stderr}`);
}
}
if (result.exitCode !== 0 || result.reason !== "exit") {
if (result.reason === "no-output-timeout" || result.noOutputTimedOut) {
const timeoutReason = `CLI produced no output for ${Math.round(noOutputTimeoutMs / 1000)}s and was terminated.`;
log.warn(
`cli watchdog timeout: provider=${params.provider} model=${modelId} session=${resolvedSessionId ?? params.sessionId} noOutputTimeoutMs=${noOutputTimeoutMs} pid=${managedRun.pid ?? "unknown"}`,
);
throw new FailoverError(timeoutReason, {
reason: "timeout",
provider: params.provider,
model: modelId,
status: resolveFailoverStatus("timeout"),
});
}
if (result.reason === "overall-timeout") {
const timeoutReason = `CLI exceeded timeout (${Math.round(params.timeoutMs / 1000)}s) and was terminated.`;
throw new FailoverError(timeoutReason, {
reason: "timeout",
provider: params.provider,
model: modelId,
status: resolveFailoverStatus("timeout"),
});
}
const err = stderr || stdout || "CLI failed.";
const reason = classifyFailoverReason(err) ?? "unknown";
const status = resolveFailoverStatus(reason);
throw new FailoverError(err, {
reason,
provider: params.provider,
model: modelId,
status,
});
}
const outputMode = useResume ? (backend.resumeOutput ?? backend.output) : backend.output;
if (outputMode === "text") {
return { text: stdout, sessionId: undefined };
}
if (outputMode === "jsonl") {
const parsed = parseCliJsonl(stdout, backend);
return parsed ?? { text: stdout };
}
const parsed = parseCliJson(stdout, backend);
return parsed ?? { text: stdout };
});
return output;
} finally {
if (cleanupImages) {
await cleanupImages();
}
}
};
// Try with the provided CLI session ID first
try {
const output = await executeCliWithSession(params.cliSessionId);
const text = output.text?.trim();
const payloads = text ? [{ text }] : undefined;
@@ -328,7 +345,7 @@ export async function runCliAgent(params: {
meta: {
durationMs: Date.now() - started,
agentMeta: {
sessionId: output.sessionId ?? sessionIdSent ?? params.sessionId ?? "",
sessionId: output.sessionId ?? params.cliSessionId ?? params.sessionId ?? "",
provider: params.provider,
model: modelId,
usage: output.usage,
@@ -337,6 +354,34 @@ export async function runCliAgent(params: {
};
} catch (err) {
if (err instanceof FailoverError) {
// Check if this is a session expired error and we have a session to clear
if (err.reason === "session_expired" && params.cliSessionId && params.sessionKey) {
log.warn(
`CLI session expired, clearing session ID and retrying: provider=${params.provider} session=${redactRunIdentifier(params.cliSessionId)}`,
);
// Clear the expired session ID from the session entry
// This requires access to the session store, which we don't have here
// We'll need to modify the caller to handle this case
// For now, retry without the session ID to create a new session
const output = await executeCliWithSession(undefined);
const text = output.text?.trim();
const payloads = text ? [{ text }] : undefined;
return {
payloads,
meta: {
durationMs: Date.now() - started,
agentMeta: {
sessionId: output.sessionId ?? params.sessionId ?? "",
provider: params.provider,
model: modelId,
usage: output.usage,
},
},
};
}
throw err;
}
const message = err instanceof Error ? err.message : String(err);
@@ -351,10 +396,6 @@ export async function runCliAgent(params: {
});
}
throw err;
} finally {
if (cleanupImages) {
await cleanupImages();
}
}
}

View File

@@ -59,6 +59,8 @@ export function resolveFailoverStatus(reason: FailoverReason): number | undefine
return 400;
case "model_not_found":
return 404;
case "session_expired":
return 410; // Gone - session no longer exists
default:
return undefined;
}

View File

@@ -883,6 +883,27 @@ export function isModelNotFoundErrorMessage(raw: string): boolean {
return false;
}
function isCliSessionExpiredErrorMessage(raw: string): boolean {
if (!raw) {
return false;
}
const lower = raw.toLowerCase();
return (
lower.includes("session not found") ||
lower.includes("session does not exist") ||
lower.includes("session expired") ||
lower.includes("session invalid") ||
lower.includes("conversation not found") ||
lower.includes("conversation does not exist") ||
lower.includes("conversation expired") ||
lower.includes("conversation invalid") ||
lower.includes("no such session") ||
lower.includes("invalid session") ||
lower.includes("session id not found") ||
lower.includes("conversation id not found")
);
}
export function classifyFailoverReason(raw: string): FailoverReason | null {
if (isImageDimensionErrorMessage(raw)) {
return null;
@@ -890,6 +911,9 @@ export function classifyFailoverReason(raw: string): FailoverReason | null {
if (isImageSizeError(raw)) {
return null;
}
if (isCliSessionExpiredErrorMessage(raw)) {
return "session_expired";
}
if (isModelNotFoundErrorMessage(raw)) {
return "model_not_found";
}

View File

@@ -8,4 +8,5 @@ export type FailoverReason =
| "billing"
| "timeout"
| "model_not_found"
| "session_expired"
| "unknown";

View File

@@ -4,7 +4,9 @@ import { beforeEach, describe, expect, it, type MockInstance, vi } from "vitest"
import { withTempHome as withTempHomeBase } from "../../test/helpers/temp-home.js";
import "../cron/isolated-agent.mocks.js";
import * as cliRunnerModule from "../agents/cli-runner.js";
import { FailoverError } from "../agents/failover-error.js";
import { loadModelCatalog } from "../agents/model-catalog.js";
import * as modelSelectionModule from "../agents/model-selection.js";
import { runEmbeddedPiAgent } from "../agents/pi-embedded.js";
import type { OpenClawConfig } from "../config/config.js";
import * as configModule from "../config/config.js";
@@ -148,6 +150,7 @@ beforeEach(() => {
},
});
vi.mocked(loadModelCatalog).mockResolvedValue([]);
vi.mocked(modelSelectionModule.isCliProvider).mockImplementation(() => false);
});
describe("agentCommand", () => {
@@ -640,6 +643,66 @@ describe("agentCommand", () => {
});
});
it("clears stale Claude CLI legacy session IDs before retrying after session expiration", async () => {
vi.mocked(modelSelectionModule.isCliProvider).mockImplementation(
(provider) => provider.trim().toLowerCase() === "claude-cli",
);
try {
await withTempHome(async (home) => {
const store = path.join(home, "sessions.json");
const sessionKey = "agent:main:subagent:cli-expired";
writeSessionStoreSeed(store, {
[sessionKey]: {
sessionId: "session-cli-123",
updatedAt: Date.now(),
providerOverride: "claude-cli",
modelOverride: "opus",
cliSessionIds: { "claude-cli": "stale-cli-session" },
claudeCliSessionId: "stale-legacy-session",
},
});
mockConfig(home, store, {
model: { primary: "claude-cli/opus", fallbacks: [] },
models: { "claude-cli/opus": {} },
});
runCliAgentSpy
.mockRejectedValueOnce(
new FailoverError("session expired", {
reason: "session_expired",
provider: "claude-cli",
model: "opus",
status: 410,
}),
)
.mockRejectedValue(new Error("retry failed"));
await expect(agentCommand({ message: "hi", sessionKey }, runtime)).rejects.toThrow(
"retry failed",
);
expect(runCliAgentSpy).toHaveBeenCalledTimes(2);
const firstCall = runCliAgentSpy.mock.calls[0]?.[0] as
| { cliSessionId?: string }
| undefined;
const secondCall = runCliAgentSpy.mock.calls[1]?.[0] as
| { cliSessionId?: string }
| undefined;
expect(firstCall?.cliSessionId).toBe("stale-cli-session");
expect(secondCall?.cliSessionId).toBeUndefined();
const saved = JSON.parse(fs.readFileSync(store, "utf-8")) as Record<
string,
{ cliSessionIds?: Record<string, string>; claudeCliSessionId?: string }
>;
const entry = saved[sessionKey];
expect(entry?.cliSessionIds?.["claude-cli"]).toBeUndefined();
expect(entry?.claudeCliSessionId).toBeUndefined();
});
} finally {
vi.mocked(modelSelectionModule.isCliProvider).mockImplementation(() => false);
}
});
it("rejects unknown agent overrides", async () => {
await withTempHome(async (home) => {
const store = path.join(home, "sessions.json");

View File

@@ -1,6 +1,9 @@
import { getAcpSessionManager } from "../acp/control-plane/manager.js";
import { resolveAcpAgentPolicyError, resolveAcpDispatchPolicyError } from "../acp/policy.js";
import { toAcpRuntimeError } from "../acp/runtime/errors.js";
import { createSubsystemLogger } from "../logging/subsystem.js";
const log = createSubsystemLogger("commands/agent");
import {
listAgentIds,
resolveAgentDir,
@@ -12,8 +15,9 @@ import {
import { ensureAuthProfileStore } from "../agents/auth-profiles.js";
import { clearSessionAuthProfileOverride } from "../agents/auth-profiles/session-override.js";
import { runCliAgent } from "../agents/cli-runner.js";
import { getCliSessionId } from "../agents/cli-session.js";
import { getCliSessionId, setCliSessionId } from "../agents/cli-session.js";
import { DEFAULT_MODEL, DEFAULT_PROVIDER } from "../agents/defaults.js";
import { FailoverError } from "../agents/failover-error.js";
import { formatAgentInternalEventsForPrompt } from "../agents/internal-events.js";
import { AGENT_LANE_SUBAGENT } from "../agents/lanes.js";
import { loadModelCatalog } from "../agents/model-catalog.js";
@@ -23,6 +27,7 @@ import {
isCliProvider,
modelKey,
normalizeModelRef,
normalizeProviderId,
resolveConfiguredModelRef,
resolveDefaultModelForAgent,
resolveThinkingDefault,
@@ -89,7 +94,8 @@ type OverrideFieldClearedByDelete =
| "authProfileOverrideCompactionCount"
| "fallbackNoticeSelectedModel"
| "fallbackNoticeActiveModel"
| "fallbackNoticeReason";
| "fallbackNoticeReason"
| "claudeCliSessionId";
const OVERRIDE_FIELDS_CLEARED_BY_DELETE: OverrideFieldClearedByDelete[] = [
"providerOverride",
@@ -100,6 +106,7 @@ const OVERRIDE_FIELDS_CLEARED_BY_DELETE: OverrideFieldClearedByDelete[] = [
"fallbackNoticeSelectedModel",
"fallbackNoticeActiveModel",
"fallbackNoticeReason",
"claudeCliSessionId",
];
async function persistSessionEntry(params: PersistSessionEntryParams): Promise<void> {
@@ -162,6 +169,8 @@ function runAgentAttempt(params: {
agentDir: string;
onAgentEvent: (evt: { stream: string; data?: Record<string, unknown> }) => void;
primaryProvider: string;
sessionStore?: Record<string, SessionEntry>;
storePath?: string;
}) {
const senderIsOwner = params.opts.senderIsOwner ?? true;
const effectivePrompt = resolveFallbackRetryPrompt({
@@ -187,6 +196,94 @@ function runAgentAttempt(params: {
cliSessionId,
images: params.isFallbackRetry ? undefined : params.opts.images,
streamParams: params.opts.streamParams,
}).catch(async (err) => {
// Handle CLI session expired error
if (
err instanceof FailoverError &&
err.reason === "session_expired" &&
cliSessionId &&
params.sessionKey &&
params.sessionStore &&
params.storePath
) {
log.warn(
`CLI session expired, clearing from session store: provider=${params.providerOverride} sessionKey=${params.sessionKey}`,
);
// Clear the expired session ID from the session store
const entry = params.sessionStore[params.sessionKey];
if (entry) {
const updatedEntry = { ...entry };
if (params.providerOverride === "claude-cli") {
delete updatedEntry.claudeCliSessionId;
}
if (updatedEntry.cliSessionIds) {
const normalizedProvider = normalizeProviderId(params.providerOverride);
const newCliSessionIds = { ...updatedEntry.cliSessionIds };
delete newCliSessionIds[normalizedProvider];
updatedEntry.cliSessionIds = newCliSessionIds;
}
updatedEntry.updatedAt = Date.now();
await persistSessionEntry({
sessionStore: params.sessionStore,
sessionKey: params.sessionKey,
storePath: params.storePath,
entry: updatedEntry,
});
// Update the session entry reference
params.sessionEntry = updatedEntry;
}
// Retry with no session ID (will create a new session)
return runCliAgent({
sessionId: params.sessionId,
sessionKey: params.sessionKey,
agentId: params.sessionAgentId,
sessionFile: params.sessionFile,
workspaceDir: params.workspaceDir,
config: params.cfg,
prompt: effectivePrompt,
provider: params.providerOverride,
model: params.modelOverride,
thinkLevel: params.resolvedThinkLevel,
timeoutMs: params.timeoutMs,
runId: params.runId,
extraSystemPrompt: params.opts.extraSystemPrompt,
cliSessionId: undefined, // No session ID to force new session
images: params.isFallbackRetry ? undefined : params.opts.images,
streamParams: params.opts.streamParams,
}).then(async (result) => {
// Update session store with new CLI session ID if available
if (
result.meta.agentMeta?.sessionId &&
params.sessionKey &&
params.sessionStore &&
params.storePath
) {
const entry = params.sessionStore[params.sessionKey];
if (entry) {
const updatedEntry = { ...entry };
setCliSessionId(
updatedEntry,
params.providerOverride,
result.meta.agentMeta.sessionId,
);
updatedEntry.updatedAt = Date.now();
await persistSessionEntry({
sessionStore: params.sessionStore,
sessionKey: params.sessionKey,
storePath: params.storePath,
entry: updatedEntry,
});
}
}
return result;
});
}
throw err;
});
}
@@ -766,6 +863,8 @@ export async function agentCommand(
resolvedVerboseLevel,
agentDir,
primaryProvider: provider,
sessionStore,
storePath,
onAgentEvent: (evt) => {
// Track lifecycle end for fallback emission below.
if (