mirror of
https://github.com/openclaw/openclaw.git
synced 2026-03-30 08:12:21 +00:00
fix: handle CLI session expired errors gracefully instead of crashing gateway (#31090)
* fix: handle CLI session expired errors gracefully - Add session_expired to FailoverReason type - Add isCliSessionExpiredErrorMessage to detect expired CLI sessions - Modify runCliAgent to retry with new session when session expires - Update agentCommand to clear expired session IDs from session store - Add proper error handling to prevent gateway crashes on expired sessions Fixes #30986 * fix: add session_expired to AuthProfileFailureReason and missing log import * fix: type cli-runner usage field to match EmbeddedPiAgentMeta * fix: harden CLI session-expiry recovery handling * build: regenerate host env security policy swift --------- Co-authored-by: Peter Steinberger <steipete@gmail.com>
This commit is contained in:
@@ -22,17 +22,17 @@ enum HostEnvSecurityPolicy {
|
||||
"PS4",
|
||||
"GCONV_PATH",
|
||||
"IFS",
|
||||
"SSLKEYLOGFILE",
|
||||
"SSLKEYLOGFILE"
|
||||
]
|
||||
|
||||
static let blockedOverrideKeys: Set<String> = [
|
||||
"HOME",
|
||||
"ZDOTDIR",
|
||||
"ZDOTDIR"
|
||||
]
|
||||
|
||||
static let blockedPrefixes: [String] = [
|
||||
"DYLD_",
|
||||
"LD_",
|
||||
"BASH_FUNC_",
|
||||
"BASH_FUNC_"
|
||||
]
|
||||
}
|
||||
|
||||
@@ -43,6 +43,7 @@ export type AuthProfileFailureReason =
|
||||
| "billing"
|
||||
| "timeout"
|
||||
| "model_not_found"
|
||||
| "session_expired"
|
||||
| "unknown";
|
||||
|
||||
/** Per-profile usage statistics for round-robin and cooldown tracking */
|
||||
|
||||
@@ -153,6 +153,50 @@ describe("runCliAgent with process supervisor", () => {
|
||||
).rejects.toThrow("exceeded timeout");
|
||||
});
|
||||
|
||||
it("rethrows the retry failure when session-expired recovery retry also fails", async () => {
|
||||
supervisorSpawnMock.mockResolvedValueOnce(
|
||||
createManagedRun({
|
||||
reason: "exit",
|
||||
exitCode: 1,
|
||||
exitSignal: null,
|
||||
durationMs: 150,
|
||||
stdout: "",
|
||||
stderr: "session expired",
|
||||
timedOut: false,
|
||||
noOutputTimedOut: false,
|
||||
}),
|
||||
);
|
||||
supervisorSpawnMock.mockResolvedValueOnce(
|
||||
createManagedRun({
|
||||
reason: "exit",
|
||||
exitCode: 1,
|
||||
exitSignal: null,
|
||||
durationMs: 150,
|
||||
stdout: "",
|
||||
stderr: "rate limit exceeded",
|
||||
timedOut: false,
|
||||
noOutputTimedOut: false,
|
||||
}),
|
||||
);
|
||||
|
||||
await expect(
|
||||
runCliAgent({
|
||||
sessionId: "s1",
|
||||
sessionKey: "agent:main:subagent:retry",
|
||||
sessionFile: "/tmp/session.jsonl",
|
||||
workspaceDir: "/tmp",
|
||||
prompt: "hi",
|
||||
provider: "codex-cli",
|
||||
model: "gpt-5.2-codex",
|
||||
timeoutMs: 1_000,
|
||||
runId: "run-retry-failure",
|
||||
cliSessionId: "thread-123",
|
||||
}),
|
||||
).rejects.toThrow("rate limit exceeded");
|
||||
|
||||
expect(supervisorSpawnMock).toHaveBeenCalledTimes(2);
|
||||
});
|
||||
|
||||
it("falls back to per-agent workspace when workspaceDir is missing", async () => {
|
||||
const tempDir = await fs.mkdtemp(path.join(os.tmpdir(), "openclaw-cli-runner-"));
|
||||
const fallbackWorkspace = path.join(tempDir, "workspace-main");
|
||||
|
||||
@@ -122,204 +122,221 @@ export async function runCliAgent(params: {
|
||||
agentId: sessionAgentId,
|
||||
});
|
||||
|
||||
const { sessionId: cliSessionIdToSend, isNew } = resolveSessionIdToSend({
|
||||
backend,
|
||||
cliSessionId: params.cliSessionId,
|
||||
});
|
||||
const useResume = Boolean(
|
||||
params.cliSessionId &&
|
||||
cliSessionIdToSend &&
|
||||
backend.resumeArgs &&
|
||||
backend.resumeArgs.length > 0,
|
||||
);
|
||||
const sessionIdSent = cliSessionIdToSend
|
||||
? useResume || Boolean(backend.sessionArg) || Boolean(backend.sessionArgs?.length)
|
||||
? cliSessionIdToSend
|
||||
: undefined
|
||||
: undefined;
|
||||
const systemPromptArg = resolveSystemPromptUsage({
|
||||
backend,
|
||||
isNewSession: isNew,
|
||||
systemPrompt,
|
||||
});
|
||||
|
||||
let imagePaths: string[] | undefined;
|
||||
let cleanupImages: (() => Promise<void>) | undefined;
|
||||
let prompt = params.prompt;
|
||||
if (params.images && params.images.length > 0) {
|
||||
const imagePayload = await writeCliImages(params.images);
|
||||
imagePaths = imagePayload.paths;
|
||||
cleanupImages = imagePayload.cleanup;
|
||||
if (!backend.imageArg) {
|
||||
prompt = appendImagePathsToPrompt(prompt, imagePaths);
|
||||
}
|
||||
}
|
||||
|
||||
const { argsPrompt, stdin } = resolvePromptInput({
|
||||
backend,
|
||||
prompt,
|
||||
});
|
||||
const stdinPayload = stdin ?? "";
|
||||
const baseArgs = useResume ? (backend.resumeArgs ?? backend.args ?? []) : (backend.args ?? []);
|
||||
const resolvedArgs = useResume
|
||||
? baseArgs.map((entry) => entry.replaceAll("{sessionId}", cliSessionIdToSend ?? ""))
|
||||
: baseArgs;
|
||||
const args = buildCliArgs({
|
||||
backend,
|
||||
baseArgs: resolvedArgs,
|
||||
modelId: normalizedModel,
|
||||
sessionId: cliSessionIdToSend,
|
||||
systemPrompt: systemPromptArg,
|
||||
imagePaths,
|
||||
promptArg: argsPrompt,
|
||||
useResume,
|
||||
});
|
||||
|
||||
const serialize = backend.serialize ?? true;
|
||||
const queueKey = serialize ? backendResolved.id : `${backendResolved.id}:${params.runId}`;
|
||||
|
||||
try {
|
||||
const output = await enqueueCliRun(queueKey, async () => {
|
||||
log.info(
|
||||
`cli exec: provider=${params.provider} model=${normalizedModel} promptChars=${params.prompt.length}`,
|
||||
);
|
||||
const logOutputText = isTruthyEnvValue(process.env.OPENCLAW_CLAUDE_CLI_LOG_OUTPUT);
|
||||
if (logOutputText) {
|
||||
const logArgs: string[] = [];
|
||||
for (let i = 0; i < args.length; i += 1) {
|
||||
const arg = args[i] ?? "";
|
||||
if (arg === backend.systemPromptArg) {
|
||||
const systemPromptValue = args[i + 1] ?? "";
|
||||
logArgs.push(arg, `<systemPrompt:${systemPromptValue.length} chars>`);
|
||||
i += 1;
|
||||
continue;
|
||||
}
|
||||
if (arg === backend.sessionArg) {
|
||||
logArgs.push(arg, args[i + 1] ?? "");
|
||||
i += 1;
|
||||
continue;
|
||||
}
|
||||
if (arg === backend.modelArg) {
|
||||
logArgs.push(arg, args[i + 1] ?? "");
|
||||
i += 1;
|
||||
continue;
|
||||
}
|
||||
if (arg === backend.imageArg) {
|
||||
logArgs.push(arg, "<image>");
|
||||
i += 1;
|
||||
continue;
|
||||
}
|
||||
logArgs.push(arg);
|
||||
}
|
||||
if (argsPrompt) {
|
||||
const promptIndex = logArgs.indexOf(argsPrompt);
|
||||
if (promptIndex >= 0) {
|
||||
logArgs[promptIndex] = `<prompt:${argsPrompt.length} chars>`;
|
||||
}
|
||||
}
|
||||
log.info(`cli argv: ${backend.command} ${logArgs.join(" ")}`);
|
||||
}
|
||||
|
||||
const env = (() => {
|
||||
const next = { ...process.env, ...backend.env };
|
||||
for (const key of backend.clearEnv ?? []) {
|
||||
delete next[key];
|
||||
}
|
||||
return next;
|
||||
})();
|
||||
const noOutputTimeoutMs = resolveCliNoOutputTimeoutMs({
|
||||
backend,
|
||||
timeoutMs: params.timeoutMs,
|
||||
useResume,
|
||||
});
|
||||
const supervisor = getProcessSupervisor();
|
||||
const scopeKey = buildCliSupervisorScopeKey({
|
||||
backend,
|
||||
backendId: backendResolved.id,
|
||||
cliSessionId: useResume ? cliSessionIdToSend : undefined,
|
||||
});
|
||||
|
||||
const managedRun = await supervisor.spawn({
|
||||
sessionId: params.sessionId,
|
||||
backendId: backendResolved.id,
|
||||
scopeKey,
|
||||
replaceExistingScope: Boolean(useResume && scopeKey),
|
||||
mode: "child",
|
||||
argv: [backend.command, ...args],
|
||||
timeoutMs: params.timeoutMs,
|
||||
noOutputTimeoutMs,
|
||||
cwd: workspaceDir,
|
||||
env,
|
||||
input: stdinPayload,
|
||||
});
|
||||
const result = await managedRun.wait();
|
||||
|
||||
const stdout = result.stdout.trim();
|
||||
const stderr = result.stderr.trim();
|
||||
if (logOutputText) {
|
||||
if (stdout) {
|
||||
log.info(`cli stdout:\n${stdout}`);
|
||||
}
|
||||
if (stderr) {
|
||||
log.info(`cli stderr:\n${stderr}`);
|
||||
}
|
||||
}
|
||||
if (shouldLogVerbose()) {
|
||||
if (stdout) {
|
||||
log.debug(`cli stdout:\n${stdout}`);
|
||||
}
|
||||
if (stderr) {
|
||||
log.debug(`cli stderr:\n${stderr}`);
|
||||
}
|
||||
}
|
||||
|
||||
if (result.exitCode !== 0 || result.reason !== "exit") {
|
||||
if (result.reason === "no-output-timeout" || result.noOutputTimedOut) {
|
||||
const timeoutReason = `CLI produced no output for ${Math.round(noOutputTimeoutMs / 1000)}s and was terminated.`;
|
||||
log.warn(
|
||||
`cli watchdog timeout: provider=${params.provider} model=${modelId} session=${cliSessionIdToSend ?? params.sessionId} noOutputTimeoutMs=${noOutputTimeoutMs} pid=${managedRun.pid ?? "unknown"}`,
|
||||
);
|
||||
throw new FailoverError(timeoutReason, {
|
||||
reason: "timeout",
|
||||
provider: params.provider,
|
||||
model: modelId,
|
||||
status: resolveFailoverStatus("timeout"),
|
||||
});
|
||||
}
|
||||
if (result.reason === "overall-timeout") {
|
||||
const timeoutReason = `CLI exceeded timeout (${Math.round(params.timeoutMs / 1000)}s) and was terminated.`;
|
||||
throw new FailoverError(timeoutReason, {
|
||||
reason: "timeout",
|
||||
provider: params.provider,
|
||||
model: modelId,
|
||||
status: resolveFailoverStatus("timeout"),
|
||||
});
|
||||
}
|
||||
const err = stderr || stdout || "CLI failed.";
|
||||
const reason = classifyFailoverReason(err) ?? "unknown";
|
||||
const status = resolveFailoverStatus(reason);
|
||||
throw new FailoverError(err, {
|
||||
reason,
|
||||
provider: params.provider,
|
||||
model: modelId,
|
||||
status,
|
||||
});
|
||||
}
|
||||
|
||||
const outputMode = useResume ? (backend.resumeOutput ?? backend.output) : backend.output;
|
||||
|
||||
if (outputMode === "text") {
|
||||
return { text: stdout, sessionId: undefined };
|
||||
}
|
||||
if (outputMode === "jsonl") {
|
||||
const parsed = parseCliJsonl(stdout, backend);
|
||||
return parsed ?? { text: stdout };
|
||||
}
|
||||
|
||||
const parsed = parseCliJson(stdout, backend);
|
||||
return parsed ?? { text: stdout };
|
||||
// Helper function to execute CLI with given session ID
|
||||
const executeCliWithSession = async (
|
||||
cliSessionIdToUse?: string,
|
||||
): Promise<{
|
||||
text: string;
|
||||
sessionId?: string;
|
||||
usage?: {
|
||||
input?: number;
|
||||
output?: number;
|
||||
cacheRead?: number;
|
||||
cacheWrite?: number;
|
||||
total?: number;
|
||||
};
|
||||
}> => {
|
||||
const { sessionId: resolvedSessionId, isNew } = resolveSessionIdToSend({
|
||||
backend,
|
||||
cliSessionId: cliSessionIdToUse,
|
||||
});
|
||||
const useResume = Boolean(
|
||||
cliSessionIdToUse && resolvedSessionId && backend.resumeArgs && backend.resumeArgs.length > 0,
|
||||
);
|
||||
const systemPromptArg = resolveSystemPromptUsage({
|
||||
backend,
|
||||
isNewSession: isNew,
|
||||
systemPrompt,
|
||||
});
|
||||
|
||||
let imagePaths: string[] | undefined;
|
||||
let cleanupImages: (() => Promise<void>) | undefined;
|
||||
let prompt = params.prompt;
|
||||
if (params.images && params.images.length > 0) {
|
||||
const imagePayload = await writeCliImages(params.images);
|
||||
imagePaths = imagePayload.paths;
|
||||
cleanupImages = imagePayload.cleanup;
|
||||
if (!backend.imageArg) {
|
||||
prompt = appendImagePathsToPrompt(prompt, imagePaths);
|
||||
}
|
||||
}
|
||||
|
||||
const { argsPrompt, stdin } = resolvePromptInput({
|
||||
backend,
|
||||
prompt,
|
||||
});
|
||||
const stdinPayload = stdin ?? "";
|
||||
const baseArgs = useResume ? (backend.resumeArgs ?? backend.args ?? []) : (backend.args ?? []);
|
||||
const resolvedArgs = useResume
|
||||
? baseArgs.map((entry) => entry.replaceAll("{sessionId}", resolvedSessionId ?? ""))
|
||||
: baseArgs;
|
||||
const args = buildCliArgs({
|
||||
backend,
|
||||
baseArgs: resolvedArgs,
|
||||
modelId: normalizedModel,
|
||||
sessionId: resolvedSessionId,
|
||||
systemPrompt: systemPromptArg,
|
||||
imagePaths,
|
||||
promptArg: argsPrompt,
|
||||
useResume,
|
||||
});
|
||||
|
||||
const serialize = backend.serialize ?? true;
|
||||
const queueKey = serialize ? backendResolved.id : `${backendResolved.id}:${params.runId}`;
|
||||
|
||||
try {
|
||||
const output = await enqueueCliRun(queueKey, async () => {
|
||||
log.info(
|
||||
`cli exec: provider=${params.provider} model=${normalizedModel} promptChars=${params.prompt.length}`,
|
||||
);
|
||||
const logOutputText = isTruthyEnvValue(process.env.OPENCLAW_CLAUDE_CLI_LOG_OUTPUT);
|
||||
if (logOutputText) {
|
||||
const logArgs: string[] = [];
|
||||
for (let i = 0; i < args.length; i += 1) {
|
||||
const arg = args[i] ?? "";
|
||||
if (arg === backend.systemPromptArg) {
|
||||
const systemPromptValue = args[i + 1] ?? "";
|
||||
logArgs.push(arg, `<systemPrompt:${systemPromptValue.length} chars>`);
|
||||
i += 1;
|
||||
continue;
|
||||
}
|
||||
if (arg === backend.sessionArg) {
|
||||
logArgs.push(arg, args[i + 1] ?? "");
|
||||
i += 1;
|
||||
continue;
|
||||
}
|
||||
if (arg === backend.modelArg) {
|
||||
logArgs.push(arg, args[i + 1] ?? "");
|
||||
i += 1;
|
||||
continue;
|
||||
}
|
||||
if (arg === backend.imageArg) {
|
||||
logArgs.push(arg, "<image>");
|
||||
i += 1;
|
||||
continue;
|
||||
}
|
||||
logArgs.push(arg);
|
||||
}
|
||||
if (argsPrompt) {
|
||||
const promptIndex = logArgs.indexOf(argsPrompt);
|
||||
if (promptIndex >= 0) {
|
||||
logArgs[promptIndex] = `<prompt:${argsPrompt.length} chars>`;
|
||||
}
|
||||
}
|
||||
log.info(`cli argv: ${backend.command} ${logArgs.join(" ")}`);
|
||||
}
|
||||
|
||||
const env = (() => {
|
||||
const next = { ...process.env, ...backend.env };
|
||||
for (const key of backend.clearEnv ?? []) {
|
||||
delete next[key];
|
||||
}
|
||||
return next;
|
||||
})();
|
||||
const noOutputTimeoutMs = resolveCliNoOutputTimeoutMs({
|
||||
backend,
|
||||
timeoutMs: params.timeoutMs,
|
||||
useResume,
|
||||
});
|
||||
const supervisor = getProcessSupervisor();
|
||||
const scopeKey = buildCliSupervisorScopeKey({
|
||||
backend,
|
||||
backendId: backendResolved.id,
|
||||
cliSessionId: useResume ? resolvedSessionId : undefined,
|
||||
});
|
||||
|
||||
const managedRun = await supervisor.spawn({
|
||||
sessionId: params.sessionId,
|
||||
backendId: backendResolved.id,
|
||||
scopeKey,
|
||||
replaceExistingScope: Boolean(useResume && scopeKey),
|
||||
mode: "child",
|
||||
argv: [backend.command, ...args],
|
||||
timeoutMs: params.timeoutMs,
|
||||
noOutputTimeoutMs,
|
||||
cwd: workspaceDir,
|
||||
env,
|
||||
input: stdinPayload,
|
||||
});
|
||||
const result = await managedRun.wait();
|
||||
|
||||
const stdout = result.stdout.trim();
|
||||
const stderr = result.stderr.trim();
|
||||
if (logOutputText) {
|
||||
if (stdout) {
|
||||
log.info(`cli stdout:\n${stdout}`);
|
||||
}
|
||||
if (stderr) {
|
||||
log.info(`cli stderr:\n${stderr}`);
|
||||
}
|
||||
}
|
||||
if (shouldLogVerbose()) {
|
||||
if (stdout) {
|
||||
log.debug(`cli stdout:\n${stdout}`);
|
||||
}
|
||||
if (stderr) {
|
||||
log.debug(`cli stderr:\n${stderr}`);
|
||||
}
|
||||
}
|
||||
|
||||
if (result.exitCode !== 0 || result.reason !== "exit") {
|
||||
if (result.reason === "no-output-timeout" || result.noOutputTimedOut) {
|
||||
const timeoutReason = `CLI produced no output for ${Math.round(noOutputTimeoutMs / 1000)}s and was terminated.`;
|
||||
log.warn(
|
||||
`cli watchdog timeout: provider=${params.provider} model=${modelId} session=${resolvedSessionId ?? params.sessionId} noOutputTimeoutMs=${noOutputTimeoutMs} pid=${managedRun.pid ?? "unknown"}`,
|
||||
);
|
||||
throw new FailoverError(timeoutReason, {
|
||||
reason: "timeout",
|
||||
provider: params.provider,
|
||||
model: modelId,
|
||||
status: resolveFailoverStatus("timeout"),
|
||||
});
|
||||
}
|
||||
if (result.reason === "overall-timeout") {
|
||||
const timeoutReason = `CLI exceeded timeout (${Math.round(params.timeoutMs / 1000)}s) and was terminated.`;
|
||||
throw new FailoverError(timeoutReason, {
|
||||
reason: "timeout",
|
||||
provider: params.provider,
|
||||
model: modelId,
|
||||
status: resolveFailoverStatus("timeout"),
|
||||
});
|
||||
}
|
||||
const err = stderr || stdout || "CLI failed.";
|
||||
const reason = classifyFailoverReason(err) ?? "unknown";
|
||||
const status = resolveFailoverStatus(reason);
|
||||
throw new FailoverError(err, {
|
||||
reason,
|
||||
provider: params.provider,
|
||||
model: modelId,
|
||||
status,
|
||||
});
|
||||
}
|
||||
|
||||
const outputMode = useResume ? (backend.resumeOutput ?? backend.output) : backend.output;
|
||||
|
||||
if (outputMode === "text") {
|
||||
return { text: stdout, sessionId: undefined };
|
||||
}
|
||||
if (outputMode === "jsonl") {
|
||||
const parsed = parseCliJsonl(stdout, backend);
|
||||
return parsed ?? { text: stdout };
|
||||
}
|
||||
|
||||
const parsed = parseCliJson(stdout, backend);
|
||||
return parsed ?? { text: stdout };
|
||||
});
|
||||
|
||||
return output;
|
||||
} finally {
|
||||
if (cleanupImages) {
|
||||
await cleanupImages();
|
||||
}
|
||||
}
|
||||
};
|
||||
|
||||
// Try with the provided CLI session ID first
|
||||
try {
|
||||
const output = await executeCliWithSession(params.cliSessionId);
|
||||
const text = output.text?.trim();
|
||||
const payloads = text ? [{ text }] : undefined;
|
||||
|
||||
@@ -328,7 +345,7 @@ export async function runCliAgent(params: {
|
||||
meta: {
|
||||
durationMs: Date.now() - started,
|
||||
agentMeta: {
|
||||
sessionId: output.sessionId ?? sessionIdSent ?? params.sessionId ?? "",
|
||||
sessionId: output.sessionId ?? params.cliSessionId ?? params.sessionId ?? "",
|
||||
provider: params.provider,
|
||||
model: modelId,
|
||||
usage: output.usage,
|
||||
@@ -337,6 +354,34 @@ export async function runCliAgent(params: {
|
||||
};
|
||||
} catch (err) {
|
||||
if (err instanceof FailoverError) {
|
||||
// Check if this is a session expired error and we have a session to clear
|
||||
if (err.reason === "session_expired" && params.cliSessionId && params.sessionKey) {
|
||||
log.warn(
|
||||
`CLI session expired, clearing session ID and retrying: provider=${params.provider} session=${redactRunIdentifier(params.cliSessionId)}`,
|
||||
);
|
||||
|
||||
// Clear the expired session ID from the session entry
|
||||
// This requires access to the session store, which we don't have here
|
||||
// We'll need to modify the caller to handle this case
|
||||
|
||||
// For now, retry without the session ID to create a new session
|
||||
const output = await executeCliWithSession(undefined);
|
||||
const text = output.text?.trim();
|
||||
const payloads = text ? [{ text }] : undefined;
|
||||
|
||||
return {
|
||||
payloads,
|
||||
meta: {
|
||||
durationMs: Date.now() - started,
|
||||
agentMeta: {
|
||||
sessionId: output.sessionId ?? params.sessionId ?? "",
|
||||
provider: params.provider,
|
||||
model: modelId,
|
||||
usage: output.usage,
|
||||
},
|
||||
},
|
||||
};
|
||||
}
|
||||
throw err;
|
||||
}
|
||||
const message = err instanceof Error ? err.message : String(err);
|
||||
@@ -351,10 +396,6 @@ export async function runCliAgent(params: {
|
||||
});
|
||||
}
|
||||
throw err;
|
||||
} finally {
|
||||
if (cleanupImages) {
|
||||
await cleanupImages();
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
@@ -59,6 +59,8 @@ export function resolveFailoverStatus(reason: FailoverReason): number | undefine
|
||||
return 400;
|
||||
case "model_not_found":
|
||||
return 404;
|
||||
case "session_expired":
|
||||
return 410; // Gone - session no longer exists
|
||||
default:
|
||||
return undefined;
|
||||
}
|
||||
|
||||
@@ -883,6 +883,27 @@ export function isModelNotFoundErrorMessage(raw: string): boolean {
|
||||
return false;
|
||||
}
|
||||
|
||||
function isCliSessionExpiredErrorMessage(raw: string): boolean {
|
||||
if (!raw) {
|
||||
return false;
|
||||
}
|
||||
const lower = raw.toLowerCase();
|
||||
return (
|
||||
lower.includes("session not found") ||
|
||||
lower.includes("session does not exist") ||
|
||||
lower.includes("session expired") ||
|
||||
lower.includes("session invalid") ||
|
||||
lower.includes("conversation not found") ||
|
||||
lower.includes("conversation does not exist") ||
|
||||
lower.includes("conversation expired") ||
|
||||
lower.includes("conversation invalid") ||
|
||||
lower.includes("no such session") ||
|
||||
lower.includes("invalid session") ||
|
||||
lower.includes("session id not found") ||
|
||||
lower.includes("conversation id not found")
|
||||
);
|
||||
}
|
||||
|
||||
export function classifyFailoverReason(raw: string): FailoverReason | null {
|
||||
if (isImageDimensionErrorMessage(raw)) {
|
||||
return null;
|
||||
@@ -890,6 +911,9 @@ export function classifyFailoverReason(raw: string): FailoverReason | null {
|
||||
if (isImageSizeError(raw)) {
|
||||
return null;
|
||||
}
|
||||
if (isCliSessionExpiredErrorMessage(raw)) {
|
||||
return "session_expired";
|
||||
}
|
||||
if (isModelNotFoundErrorMessage(raw)) {
|
||||
return "model_not_found";
|
||||
}
|
||||
|
||||
@@ -8,4 +8,5 @@ export type FailoverReason =
|
||||
| "billing"
|
||||
| "timeout"
|
||||
| "model_not_found"
|
||||
| "session_expired"
|
||||
| "unknown";
|
||||
|
||||
@@ -4,7 +4,9 @@ import { beforeEach, describe, expect, it, type MockInstance, vi } from "vitest"
|
||||
import { withTempHome as withTempHomeBase } from "../../test/helpers/temp-home.js";
|
||||
import "../cron/isolated-agent.mocks.js";
|
||||
import * as cliRunnerModule from "../agents/cli-runner.js";
|
||||
import { FailoverError } from "../agents/failover-error.js";
|
||||
import { loadModelCatalog } from "../agents/model-catalog.js";
|
||||
import * as modelSelectionModule from "../agents/model-selection.js";
|
||||
import { runEmbeddedPiAgent } from "../agents/pi-embedded.js";
|
||||
import type { OpenClawConfig } from "../config/config.js";
|
||||
import * as configModule from "../config/config.js";
|
||||
@@ -148,6 +150,7 @@ beforeEach(() => {
|
||||
},
|
||||
});
|
||||
vi.mocked(loadModelCatalog).mockResolvedValue([]);
|
||||
vi.mocked(modelSelectionModule.isCliProvider).mockImplementation(() => false);
|
||||
});
|
||||
|
||||
describe("agentCommand", () => {
|
||||
@@ -640,6 +643,66 @@ describe("agentCommand", () => {
|
||||
});
|
||||
});
|
||||
|
||||
it("clears stale Claude CLI legacy session IDs before retrying after session expiration", async () => {
|
||||
vi.mocked(modelSelectionModule.isCliProvider).mockImplementation(
|
||||
(provider) => provider.trim().toLowerCase() === "claude-cli",
|
||||
);
|
||||
try {
|
||||
await withTempHome(async (home) => {
|
||||
const store = path.join(home, "sessions.json");
|
||||
const sessionKey = "agent:main:subagent:cli-expired";
|
||||
writeSessionStoreSeed(store, {
|
||||
[sessionKey]: {
|
||||
sessionId: "session-cli-123",
|
||||
updatedAt: Date.now(),
|
||||
providerOverride: "claude-cli",
|
||||
modelOverride: "opus",
|
||||
cliSessionIds: { "claude-cli": "stale-cli-session" },
|
||||
claudeCliSessionId: "stale-legacy-session",
|
||||
},
|
||||
});
|
||||
mockConfig(home, store, {
|
||||
model: { primary: "claude-cli/opus", fallbacks: [] },
|
||||
models: { "claude-cli/opus": {} },
|
||||
});
|
||||
runCliAgentSpy
|
||||
.mockRejectedValueOnce(
|
||||
new FailoverError("session expired", {
|
||||
reason: "session_expired",
|
||||
provider: "claude-cli",
|
||||
model: "opus",
|
||||
status: 410,
|
||||
}),
|
||||
)
|
||||
.mockRejectedValue(new Error("retry failed"));
|
||||
|
||||
await expect(agentCommand({ message: "hi", sessionKey }, runtime)).rejects.toThrow(
|
||||
"retry failed",
|
||||
);
|
||||
|
||||
expect(runCliAgentSpy).toHaveBeenCalledTimes(2);
|
||||
const firstCall = runCliAgentSpy.mock.calls[0]?.[0] as
|
||||
| { cliSessionId?: string }
|
||||
| undefined;
|
||||
const secondCall = runCliAgentSpy.mock.calls[1]?.[0] as
|
||||
| { cliSessionId?: string }
|
||||
| undefined;
|
||||
expect(firstCall?.cliSessionId).toBe("stale-cli-session");
|
||||
expect(secondCall?.cliSessionId).toBeUndefined();
|
||||
|
||||
const saved = JSON.parse(fs.readFileSync(store, "utf-8")) as Record<
|
||||
string,
|
||||
{ cliSessionIds?: Record<string, string>; claudeCliSessionId?: string }
|
||||
>;
|
||||
const entry = saved[sessionKey];
|
||||
expect(entry?.cliSessionIds?.["claude-cli"]).toBeUndefined();
|
||||
expect(entry?.claudeCliSessionId).toBeUndefined();
|
||||
});
|
||||
} finally {
|
||||
vi.mocked(modelSelectionModule.isCliProvider).mockImplementation(() => false);
|
||||
}
|
||||
});
|
||||
|
||||
it("rejects unknown agent overrides", async () => {
|
||||
await withTempHome(async (home) => {
|
||||
const store = path.join(home, "sessions.json");
|
||||
|
||||
@@ -1,6 +1,9 @@
|
||||
import { getAcpSessionManager } from "../acp/control-plane/manager.js";
|
||||
import { resolveAcpAgentPolicyError, resolveAcpDispatchPolicyError } from "../acp/policy.js";
|
||||
import { toAcpRuntimeError } from "../acp/runtime/errors.js";
|
||||
import { createSubsystemLogger } from "../logging/subsystem.js";
|
||||
|
||||
const log = createSubsystemLogger("commands/agent");
|
||||
import {
|
||||
listAgentIds,
|
||||
resolveAgentDir,
|
||||
@@ -12,8 +15,9 @@ import {
|
||||
import { ensureAuthProfileStore } from "../agents/auth-profiles.js";
|
||||
import { clearSessionAuthProfileOverride } from "../agents/auth-profiles/session-override.js";
|
||||
import { runCliAgent } from "../agents/cli-runner.js";
|
||||
import { getCliSessionId } from "../agents/cli-session.js";
|
||||
import { getCliSessionId, setCliSessionId } from "../agents/cli-session.js";
|
||||
import { DEFAULT_MODEL, DEFAULT_PROVIDER } from "../agents/defaults.js";
|
||||
import { FailoverError } from "../agents/failover-error.js";
|
||||
import { formatAgentInternalEventsForPrompt } from "../agents/internal-events.js";
|
||||
import { AGENT_LANE_SUBAGENT } from "../agents/lanes.js";
|
||||
import { loadModelCatalog } from "../agents/model-catalog.js";
|
||||
@@ -23,6 +27,7 @@ import {
|
||||
isCliProvider,
|
||||
modelKey,
|
||||
normalizeModelRef,
|
||||
normalizeProviderId,
|
||||
resolveConfiguredModelRef,
|
||||
resolveDefaultModelForAgent,
|
||||
resolveThinkingDefault,
|
||||
@@ -89,7 +94,8 @@ type OverrideFieldClearedByDelete =
|
||||
| "authProfileOverrideCompactionCount"
|
||||
| "fallbackNoticeSelectedModel"
|
||||
| "fallbackNoticeActiveModel"
|
||||
| "fallbackNoticeReason";
|
||||
| "fallbackNoticeReason"
|
||||
| "claudeCliSessionId";
|
||||
|
||||
const OVERRIDE_FIELDS_CLEARED_BY_DELETE: OverrideFieldClearedByDelete[] = [
|
||||
"providerOverride",
|
||||
@@ -100,6 +106,7 @@ const OVERRIDE_FIELDS_CLEARED_BY_DELETE: OverrideFieldClearedByDelete[] = [
|
||||
"fallbackNoticeSelectedModel",
|
||||
"fallbackNoticeActiveModel",
|
||||
"fallbackNoticeReason",
|
||||
"claudeCliSessionId",
|
||||
];
|
||||
|
||||
async function persistSessionEntry(params: PersistSessionEntryParams): Promise<void> {
|
||||
@@ -162,6 +169,8 @@ function runAgentAttempt(params: {
|
||||
agentDir: string;
|
||||
onAgentEvent: (evt: { stream: string; data?: Record<string, unknown> }) => void;
|
||||
primaryProvider: string;
|
||||
sessionStore?: Record<string, SessionEntry>;
|
||||
storePath?: string;
|
||||
}) {
|
||||
const senderIsOwner = params.opts.senderIsOwner ?? true;
|
||||
const effectivePrompt = resolveFallbackRetryPrompt({
|
||||
@@ -187,6 +196,94 @@ function runAgentAttempt(params: {
|
||||
cliSessionId,
|
||||
images: params.isFallbackRetry ? undefined : params.opts.images,
|
||||
streamParams: params.opts.streamParams,
|
||||
}).catch(async (err) => {
|
||||
// Handle CLI session expired error
|
||||
if (
|
||||
err instanceof FailoverError &&
|
||||
err.reason === "session_expired" &&
|
||||
cliSessionId &&
|
||||
params.sessionKey &&
|
||||
params.sessionStore &&
|
||||
params.storePath
|
||||
) {
|
||||
log.warn(
|
||||
`CLI session expired, clearing from session store: provider=${params.providerOverride} sessionKey=${params.sessionKey}`,
|
||||
);
|
||||
|
||||
// Clear the expired session ID from the session store
|
||||
const entry = params.sessionStore[params.sessionKey];
|
||||
if (entry) {
|
||||
const updatedEntry = { ...entry };
|
||||
if (params.providerOverride === "claude-cli") {
|
||||
delete updatedEntry.claudeCliSessionId;
|
||||
}
|
||||
if (updatedEntry.cliSessionIds) {
|
||||
const normalizedProvider = normalizeProviderId(params.providerOverride);
|
||||
const newCliSessionIds = { ...updatedEntry.cliSessionIds };
|
||||
delete newCliSessionIds[normalizedProvider];
|
||||
updatedEntry.cliSessionIds = newCliSessionIds;
|
||||
}
|
||||
updatedEntry.updatedAt = Date.now();
|
||||
|
||||
await persistSessionEntry({
|
||||
sessionStore: params.sessionStore,
|
||||
sessionKey: params.sessionKey,
|
||||
storePath: params.storePath,
|
||||
entry: updatedEntry,
|
||||
});
|
||||
|
||||
// Update the session entry reference
|
||||
params.sessionEntry = updatedEntry;
|
||||
}
|
||||
|
||||
// Retry with no session ID (will create a new session)
|
||||
return runCliAgent({
|
||||
sessionId: params.sessionId,
|
||||
sessionKey: params.sessionKey,
|
||||
agentId: params.sessionAgentId,
|
||||
sessionFile: params.sessionFile,
|
||||
workspaceDir: params.workspaceDir,
|
||||
config: params.cfg,
|
||||
prompt: effectivePrompt,
|
||||
provider: params.providerOverride,
|
||||
model: params.modelOverride,
|
||||
thinkLevel: params.resolvedThinkLevel,
|
||||
timeoutMs: params.timeoutMs,
|
||||
runId: params.runId,
|
||||
extraSystemPrompt: params.opts.extraSystemPrompt,
|
||||
cliSessionId: undefined, // No session ID to force new session
|
||||
images: params.isFallbackRetry ? undefined : params.opts.images,
|
||||
streamParams: params.opts.streamParams,
|
||||
}).then(async (result) => {
|
||||
// Update session store with new CLI session ID if available
|
||||
if (
|
||||
result.meta.agentMeta?.sessionId &&
|
||||
params.sessionKey &&
|
||||
params.sessionStore &&
|
||||
params.storePath
|
||||
) {
|
||||
const entry = params.sessionStore[params.sessionKey];
|
||||
if (entry) {
|
||||
const updatedEntry = { ...entry };
|
||||
setCliSessionId(
|
||||
updatedEntry,
|
||||
params.providerOverride,
|
||||
result.meta.agentMeta.sessionId,
|
||||
);
|
||||
updatedEntry.updatedAt = Date.now();
|
||||
|
||||
await persistSessionEntry({
|
||||
sessionStore: params.sessionStore,
|
||||
sessionKey: params.sessionKey,
|
||||
storePath: params.storePath,
|
||||
entry: updatedEntry,
|
||||
});
|
||||
}
|
||||
}
|
||||
return result;
|
||||
});
|
||||
}
|
||||
throw err;
|
||||
});
|
||||
}
|
||||
|
||||
@@ -766,6 +863,8 @@ export async function agentCommand(
|
||||
resolvedVerboseLevel,
|
||||
agentDir,
|
||||
primaryProvider: provider,
|
||||
sessionStore,
|
||||
storePath,
|
||||
onAgentEvent: (evt) => {
|
||||
// Track lifecycle end for fallback emission below.
|
||||
if (
|
||||
|
||||
Reference in New Issue
Block a user