mirror of
https://github.com/openclaw/openclaw.git
synced 2026-04-18 12:17:26 +00:00
fix(agents): skip auth-profile failure on overload
This commit is contained in:
@@ -252,6 +252,24 @@ const mockFailedThenSuccessfulAttempt = (errorMessage = "rate limit") => {
|
||||
);
|
||||
};
|
||||
|
||||
const mockPromptErrorThenSuccessfulAttempt = (errorMessage: string) => {
|
||||
runEmbeddedAttemptMock
|
||||
.mockResolvedValueOnce(
|
||||
makeAttempt({
|
||||
promptError: new Error(errorMessage),
|
||||
}),
|
||||
)
|
||||
.mockResolvedValueOnce(
|
||||
makeAttempt({
|
||||
assistantTexts: ["ok"],
|
||||
lastAssistant: buildAssistant({
|
||||
stopReason: "stop",
|
||||
content: [{ type: "text", text: "ok" }],
|
||||
}),
|
||||
}),
|
||||
);
|
||||
};
|
||||
|
||||
async function runAutoPinnedOpenAiTurn(params: {
|
||||
agentDir: string;
|
||||
workspaceDir: string;
|
||||
@@ -320,6 +338,28 @@ async function runAutoPinnedRotationCase(params: {
|
||||
});
|
||||
}
|
||||
|
||||
async function runAutoPinnedPromptErrorRotationCase(params: {
|
||||
errorMessage: string;
|
||||
sessionKey: string;
|
||||
runId: string;
|
||||
}) {
|
||||
runEmbeddedAttemptMock.mockClear();
|
||||
return withAgentWorkspace(async ({ agentDir, workspaceDir }) => {
|
||||
await writeAuthStore(agentDir);
|
||||
mockPromptErrorThenSuccessfulAttempt(params.errorMessage);
|
||||
await runAutoPinnedOpenAiTurn({
|
||||
agentDir,
|
||||
workspaceDir,
|
||||
sessionKey: params.sessionKey,
|
||||
runId: params.runId,
|
||||
});
|
||||
|
||||
expect(runEmbeddedAttemptMock).toHaveBeenCalledTimes(2);
|
||||
const usageStats = await readUsageStats(agentDir);
|
||||
return { usageStats };
|
||||
});
|
||||
}
|
||||
|
||||
function mockSingleSuccessfulAttempt() {
|
||||
runEmbeddedAttemptMock.mockResolvedValueOnce(
|
||||
makeAttempt({
|
||||
@@ -639,13 +679,24 @@ describe("runEmbeddedPiAgent auth profile rotation", () => {
|
||||
expect(typeof usageStats["openai:p2"]?.lastUsed).toBe("number");
|
||||
});
|
||||
|
||||
it("rotates for overloaded prompt failures across auto-pinned profiles", async () => {
|
||||
it("rotates for overloaded assistant failures across auto-pinned profiles", async () => {
|
||||
const { usageStats } = await runAutoPinnedRotationCase({
|
||||
errorMessage: '{"type":"error","error":{"type":"overloaded_error","message":"Overloaded"}}',
|
||||
sessionKey: "agent:test:overloaded-rotation",
|
||||
runId: "run:overloaded-rotation",
|
||||
});
|
||||
expect(typeof usageStats["openai:p2"]?.lastUsed).toBe("number");
|
||||
expect(usageStats["openai:p1"]?.cooldownUntil).toBeUndefined();
|
||||
});
|
||||
|
||||
it("rotates for overloaded prompt failures across auto-pinned profiles", async () => {
|
||||
const { usageStats } = await runAutoPinnedPromptErrorRotationCase({
|
||||
errorMessage: '{"type":"error","error":{"type":"overloaded_error","message":"Overloaded"}}',
|
||||
sessionKey: "agent:test:overloaded-prompt-rotation",
|
||||
runId: "run:overloaded-prompt-rotation",
|
||||
});
|
||||
expect(typeof usageStats["openai:p2"]?.lastUsed).toBe("number");
|
||||
expect(usageStats["openai:p1"]?.cooldownUntil).toBeUndefined();
|
||||
});
|
||||
|
||||
it("rotates on timeout without cooling down the timed-out profile", async () => {
|
||||
|
||||
@@ -10,6 +10,7 @@ import { resolveOpenClawAgentDir } from "../agent-paths.js";
|
||||
import { hasConfiguredModelFallbacks } from "../agent-scope.js";
|
||||
import {
|
||||
isProfileInCooldown,
|
||||
type AuthProfileFailureReason,
|
||||
markAuthProfileFailure,
|
||||
markAuthProfileGood,
|
||||
markAuthProfileUsed,
|
||||
@@ -41,6 +42,7 @@ import {
|
||||
isLikelyContextOverflowError,
|
||||
isFailoverAssistantError,
|
||||
isFailoverErrorMessage,
|
||||
isOverloadedErrorMessage,
|
||||
parseImageSizeError,
|
||||
parseImageDimensionError,
|
||||
isRateLimitAssistantError,
|
||||
@@ -721,7 +723,7 @@ export async function runEmbeddedPiAgent(
|
||||
let runLoopIterations = 0;
|
||||
const maybeMarkAuthProfileFailure = async (failure: {
|
||||
profileId?: string;
|
||||
reason?: Parameters<typeof markAuthProfileFailure>[0]["reason"] | null;
|
||||
reason?: AuthProfileFailureReason | null;
|
||||
config?: RunEmbeddedPiAgentParams["config"];
|
||||
agentDir?: RunEmbeddedPiAgentParams["agentDir"];
|
||||
}) => {
|
||||
@@ -737,6 +739,21 @@ export async function runEmbeddedPiAgent(
|
||||
agentDir,
|
||||
});
|
||||
};
|
||||
const resolveAuthProfileFailureReason = (
|
||||
errorText: string,
|
||||
failoverReason: FailoverReason | null,
|
||||
): AuthProfileFailureReason | null => {
|
||||
if (!failoverReason || failoverReason === "timeout") {
|
||||
return null;
|
||||
}
|
||||
// Overloaded provider responses currently stay on the rate_limit failover lane
|
||||
// so existing retry/failover behavior keeps working, but they should not
|
||||
// be recorded as auth-profile failures.
|
||||
if (failoverReason === "rate_limit" && isOverloadedErrorMessage(errorText)) {
|
||||
return null;
|
||||
}
|
||||
return failoverReason;
|
||||
};
|
||||
try {
|
||||
let authRetryPending = false;
|
||||
// Hoisted so the retry-limit error path can use the most recent API total.
|
||||
@@ -1145,9 +1162,13 @@ export async function runEmbeddedPiAgent(
|
||||
};
|
||||
}
|
||||
const promptFailoverReason = classifyFailoverReason(errorText);
|
||||
const promptProfileFailureReason = resolveAuthProfileFailureReason(
|
||||
errorText,
|
||||
promptFailoverReason,
|
||||
);
|
||||
await maybeMarkAuthProfileFailure({
|
||||
profileId: lastProfileId,
|
||||
reason: promptFailoverReason,
|
||||
reason: promptProfileFailureReason,
|
||||
});
|
||||
if (
|
||||
isFailoverErrorMessage(errorText) &&
|
||||
@@ -1198,6 +1219,10 @@ export async function runEmbeddedPiAgent(
|
||||
const billingFailure = isBillingAssistantError(lastAssistant);
|
||||
const failoverFailure = isFailoverAssistantError(lastAssistant);
|
||||
const assistantFailoverReason = classifyFailoverReason(lastAssistant?.errorMessage ?? "");
|
||||
const assistantProfileFailureReason = resolveAuthProfileFailureReason(
|
||||
lastAssistant?.errorMessage ?? "",
|
||||
assistantFailoverReason,
|
||||
);
|
||||
const cloudCodeAssistFormatError = attempt.cloudCodeAssistFormatError;
|
||||
const imageDimensionError = parseImageDimensionError(lastAssistant?.errorMessage ?? "");
|
||||
|
||||
@@ -1237,10 +1262,7 @@ export async function runEmbeddedPiAgent(
|
||||
|
||||
if (shouldRotate) {
|
||||
if (lastProfileId) {
|
||||
const reason =
|
||||
timedOut || assistantFailoverReason === "timeout"
|
||||
? "timeout"
|
||||
: (assistantFailoverReason ?? "unknown");
|
||||
const reason = timedOut ? "timeout" : assistantProfileFailureReason;
|
||||
// Skip cooldown for timeouts: a timeout is model/network-specific,
|
||||
// not an auth issue. Marking the profile would poison fallback models
|
||||
// on the same provider (e.g. gpt-5.3 timeout blocks gpt-5.2).
|
||||
|
||||
Reference in New Issue
Block a user