mirror of
https://github.com/openclaw/openclaw.git
synced 2026-03-30 08:32:00 +00:00
Agents: add embedded error observations (#41336)
Merged via squash.
Prepared head SHA: 4900042298
Co-authored-by: altaywtf <9790196+altaywtf@users.noreply.github.com>
Co-authored-by: altaywtf <9790196+altaywtf@users.noreply.github.com>
Reviewed-by: @altaywtf
This commit is contained in:
@@ -20,6 +20,7 @@ Docs: https://docs.openclaw.ai
|
||||
- ACP/sessions.patch: allow `spawnedBy` and `spawnDepth` lineage fields on ACP session keys so `sessions_spawn` with `runtime: "acp"` no longer fails during child-session setup. Fixes #40971. (#40995) thanks @xaeon2026.
|
||||
- ACP/stop reason mapping: resolve gateway chat `state: "error"` completions as ACP `end_turn` instead of `refusal` so transient backend failures are not surfaced as deliberate refusals. (#41187) thanks @pejmanjohn.
|
||||
- ACP/setSessionMode: propagate gateway `sessions.patch` failures back to ACP clients so rejected mode changes no longer return silent success. (#41185) thanks @pejmanjohn.
|
||||
- Agents/embedded logs: add structured, sanitized lifecycle and failover observation events so overload and provider failures are easier to tail and filter. (#41336) thanks @altaywtf.
|
||||
|
||||
## 2026.3.8
|
||||
|
||||
|
||||
182
src/agents/pi-embedded-error-observation.test.ts
Normal file
182
src/agents/pi-embedded-error-observation.test.ts
Normal file
@@ -0,0 +1,182 @@
|
||||
import { afterEach, describe, expect, it, vi } from "vitest";
|
||||
import * as loggingConfigModule from "../logging/config.js";
|
||||
import {
|
||||
buildApiErrorObservationFields,
|
||||
buildTextObservationFields,
|
||||
sanitizeForConsole,
|
||||
} from "./pi-embedded-error-observation.js";
|
||||
|
||||
afterEach(() => {
|
||||
vi.restoreAllMocks();
|
||||
});
|
||||
|
||||
describe("buildApiErrorObservationFields", () => {
|
||||
it("redacts request ids and exposes stable hashes instead of raw payloads", () => {
|
||||
const observed = buildApiErrorObservationFields(
|
||||
'{"type":"error","error":{"type":"overloaded_error","message":"Overloaded"},"request_id":"req_overload"}',
|
||||
);
|
||||
|
||||
expect(observed).toMatchObject({
|
||||
rawErrorPreview: expect.stringContaining('"request_id":"sha256:'),
|
||||
rawErrorHash: expect.stringMatching(/^sha256:/),
|
||||
rawErrorFingerprint: expect.stringMatching(/^sha256:/),
|
||||
providerErrorType: "overloaded_error",
|
||||
providerErrorMessagePreview: "Overloaded",
|
||||
requestIdHash: expect.stringMatching(/^sha256:/),
|
||||
});
|
||||
expect(observed.rawErrorPreview).not.toContain("req_overload");
|
||||
});
|
||||
|
||||
it("forces token redaction for observation previews", () => {
|
||||
const observed = buildApiErrorObservationFields(
|
||||
"Authorization: Bearer sk-abcdefghijklmnopqrstuvwxyz123456",
|
||||
);
|
||||
|
||||
expect(observed.rawErrorPreview).not.toContain("sk-abcdefghijklmnopqrstuvwxyz123456");
|
||||
expect(observed.rawErrorPreview).toContain("sk-abc");
|
||||
expect(observed.rawErrorHash).toMatch(/^sha256:/);
|
||||
});
|
||||
|
||||
it("redacts observation-only header and cookie formats", () => {
|
||||
const observed = buildApiErrorObservationFields(
|
||||
"x-api-key: sk-abcdefghijklmnopqrstuvwxyz123456 Cookie: session=abcdefghijklmnopqrstuvwxyz123456",
|
||||
);
|
||||
|
||||
expect(observed.rawErrorPreview).not.toContain("abcdefghijklmnopqrstuvwxyz123456");
|
||||
expect(observed.rawErrorPreview).toContain("x-api-key: ***");
|
||||
expect(observed.rawErrorPreview).toContain("Cookie: session=");
|
||||
});
|
||||
|
||||
it("does not let cookie redaction consume unrelated fields on the same line", () => {
|
||||
const observed = buildApiErrorObservationFields(
|
||||
"Cookie: session=abcdefghijklmnopqrstuvwxyz123456 status=503 request_id=req_cookie",
|
||||
);
|
||||
|
||||
expect(observed.rawErrorPreview).toContain("Cookie: session=");
|
||||
expect(observed.rawErrorPreview).toContain("status=503");
|
||||
expect(observed.rawErrorPreview).toContain("request_id=sha256:");
|
||||
});
|
||||
|
||||
it("builds sanitized generic text observation fields", () => {
|
||||
const observed = buildTextObservationFields(
|
||||
'{"type":"error","error":{"type":"overloaded_error","message":"Overloaded"},"request_id":"req_prev"}',
|
||||
);
|
||||
|
||||
expect(observed).toMatchObject({
|
||||
textPreview: expect.stringContaining('"request_id":"sha256:'),
|
||||
textHash: expect.stringMatching(/^sha256:/),
|
||||
textFingerprint: expect.stringMatching(/^sha256:/),
|
||||
providerErrorType: "overloaded_error",
|
||||
providerErrorMessagePreview: "Overloaded",
|
||||
requestIdHash: expect.stringMatching(/^sha256:/),
|
||||
});
|
||||
expect(observed.textPreview).not.toContain("req_prev");
|
||||
});
|
||||
|
||||
it("redacts request ids in formatted plain-text errors", () => {
|
||||
const observed = buildApiErrorObservationFields(
|
||||
"LLM error overloaded_error: Overloaded (request_id: req_plaintext_123)",
|
||||
);
|
||||
|
||||
expect(observed).toMatchObject({
|
||||
rawErrorPreview: expect.stringContaining("request_id: sha256:"),
|
||||
rawErrorFingerprint: expect.stringMatching(/^sha256:/),
|
||||
requestIdHash: expect.stringMatching(/^sha256:/),
|
||||
});
|
||||
expect(observed.rawErrorPreview).not.toContain("req_plaintext_123");
|
||||
});
|
||||
|
||||
it("keeps fingerprints stable across request ids for equivalent errors", () => {
|
||||
const first = buildApiErrorObservationFields(
|
||||
'{"type":"error","error":{"type":"overloaded_error","message":"Overloaded"},"request_id":"req_001"}',
|
||||
);
|
||||
const second = buildApiErrorObservationFields(
|
||||
'{"type":"error","error":{"type":"overloaded_error","message":"Overloaded"},"request_id":"req_002"}',
|
||||
);
|
||||
|
||||
expect(first.rawErrorFingerprint).toBe(second.rawErrorFingerprint);
|
||||
expect(first.rawErrorHash).not.toBe(second.rawErrorHash);
|
||||
});
|
||||
|
||||
it("truncates oversized raw and provider previews", () => {
|
||||
const longMessage = "X".repeat(260);
|
||||
const observed = buildApiErrorObservationFields(
|
||||
`{"type":"error","error":{"type":"server_error","message":"${longMessage}"},"request_id":"req_long"}`,
|
||||
);
|
||||
|
||||
expect(observed.rawErrorPreview).toBeDefined();
|
||||
expect(observed.providerErrorMessagePreview).toBeDefined();
|
||||
expect(observed.rawErrorPreview?.length).toBeLessThanOrEqual(401);
|
||||
expect(observed.providerErrorMessagePreview?.length).toBeLessThanOrEqual(201);
|
||||
expect(observed.providerErrorMessagePreview?.endsWith("…")).toBe(true);
|
||||
});
|
||||
|
||||
it("caps oversized raw inputs before hashing and fingerprinting", () => {
|
||||
const oversized = "X".repeat(70_000);
|
||||
const bounded = "X".repeat(64_000);
|
||||
|
||||
expect(buildApiErrorObservationFields(oversized)).toMatchObject({
|
||||
rawErrorHash: buildApiErrorObservationFields(bounded).rawErrorHash,
|
||||
rawErrorFingerprint: buildApiErrorObservationFields(bounded).rawErrorFingerprint,
|
||||
});
|
||||
});
|
||||
|
||||
it("returns empty observation fields for empty input", () => {
|
||||
expect(buildApiErrorObservationFields(undefined)).toEqual({});
|
||||
expect(buildApiErrorObservationFields("")).toEqual({});
|
||||
expect(buildApiErrorObservationFields(" ")).toEqual({});
|
||||
});
|
||||
|
||||
it("re-reads configured redact patterns on each call", () => {
|
||||
const readLoggingConfig = vi.spyOn(loggingConfigModule, "readLoggingConfig");
|
||||
readLoggingConfig.mockReturnValueOnce(undefined);
|
||||
readLoggingConfig.mockReturnValueOnce({
|
||||
redactPatterns: [String.raw`\bcustom-secret-[A-Za-z0-9]+\b`],
|
||||
});
|
||||
|
||||
const first = buildApiErrorObservationFields("custom-secret-abc123");
|
||||
const second = buildApiErrorObservationFields("custom-secret-abc123");
|
||||
|
||||
expect(first.rawErrorPreview).toContain("custom-secret-abc123");
|
||||
expect(second.rawErrorPreview).not.toContain("custom-secret-abc123");
|
||||
expect(second.rawErrorPreview).toContain("custom");
|
||||
});
|
||||
|
||||
it("fails closed when observation sanitization throws", () => {
|
||||
vi.spyOn(loggingConfigModule, "readLoggingConfig").mockImplementation(() => {
|
||||
throw new Error("boom");
|
||||
});
|
||||
|
||||
expect(buildApiErrorObservationFields("request_id=req_123")).toEqual({});
|
||||
expect(buildTextObservationFields("request_id=req_123")).toEqual({
|
||||
textPreview: undefined,
|
||||
textHash: undefined,
|
||||
textFingerprint: undefined,
|
||||
httpCode: undefined,
|
||||
providerErrorType: undefined,
|
||||
providerErrorMessagePreview: undefined,
|
||||
requestIdHash: undefined,
|
||||
});
|
||||
});
|
||||
|
||||
it("ignores non-string configured redact patterns", () => {
|
||||
vi.spyOn(loggingConfigModule, "readLoggingConfig").mockReturnValue({
|
||||
redactPatterns: [
|
||||
123 as never,
|
||||
{ bad: true } as never,
|
||||
String.raw`\bcustom-secret-[A-Za-z0-9]+\b`,
|
||||
],
|
||||
});
|
||||
|
||||
const observed = buildApiErrorObservationFields("custom-secret-abc123");
|
||||
|
||||
expect(observed.rawErrorPreview).not.toContain("custom-secret-abc123");
|
||||
expect(observed.rawErrorPreview).toContain("custom");
|
||||
});
|
||||
});
|
||||
|
||||
describe("sanitizeForConsole", () => {
|
||||
it("strips control characters from console-facing values", () => {
|
||||
expect(sanitizeForConsole("run-1\nprovider\tmodel\rtest")).toBe("run-1 provider model test");
|
||||
});
|
||||
});
|
||||
199
src/agents/pi-embedded-error-observation.ts
Normal file
199
src/agents/pi-embedded-error-observation.ts
Normal file
@@ -0,0 +1,199 @@
|
||||
import { readLoggingConfig } from "../logging/config.js";
|
||||
import { redactIdentifier } from "../logging/redact-identifier.js";
|
||||
import { getDefaultRedactPatterns, redactSensitiveText } from "../logging/redact.js";
|
||||
import { getApiErrorPayloadFingerprint, parseApiErrorInfo } from "./pi-embedded-helpers.js";
|
||||
import { stableStringify } from "./stable-stringify.js";
|
||||
|
||||
const MAX_OBSERVATION_INPUT_CHARS = 64_000;
|
||||
const MAX_FINGERPRINT_MESSAGE_CHARS = 8_000;
|
||||
const RAW_ERROR_PREVIEW_MAX_CHARS = 400;
|
||||
const PROVIDER_ERROR_PREVIEW_MAX_CHARS = 200;
|
||||
const REQUEST_ID_RE = /\brequest[_ ]?id\b\s*[:=]\s*["'()]*([A-Za-z0-9._:-]+)/i;
|
||||
const OBSERVATION_EXTRA_REDACT_PATTERNS = [
|
||||
String.raw`\b(?:x-)?api[-_]?key\b\s*[:=]\s*(["']?)([^\s"'\\;]+)\1`,
|
||||
String.raw`"(?:api[-_]?key|api_key)"\s*:\s*"([^"]+)"`,
|
||||
String.raw`(?:\bCookie\b\s*[:=]\s*[^;=\s]+=|;\s*[^;=\s]+=)([^;\s\r\n]+)`,
|
||||
];
|
||||
|
||||
function resolveConfiguredRedactPatterns(): string[] {
|
||||
const configured = readLoggingConfig()?.redactPatterns;
|
||||
if (!Array.isArray(configured)) {
|
||||
return [];
|
||||
}
|
||||
return configured.filter((pattern): pattern is string => typeof pattern === "string");
|
||||
}
|
||||
|
||||
function truncateForObservation(text: string | undefined, maxChars: number): string | undefined {
|
||||
const trimmed = text?.trim();
|
||||
if (!trimmed) {
|
||||
return undefined;
|
||||
}
|
||||
return trimmed.length > maxChars ? `${trimmed.slice(0, maxChars)}…` : trimmed;
|
||||
}
|
||||
|
||||
function boundObservationInput(text: string | undefined): string | undefined {
|
||||
const trimmed = text?.trim();
|
||||
if (!trimmed) {
|
||||
return undefined;
|
||||
}
|
||||
return trimmed.length > MAX_OBSERVATION_INPUT_CHARS
|
||||
? trimmed.slice(0, MAX_OBSERVATION_INPUT_CHARS)
|
||||
: trimmed;
|
||||
}
|
||||
|
||||
export function sanitizeForConsole(text: string | undefined, maxChars = 200): string | undefined {
|
||||
const trimmed = text?.trim();
|
||||
if (!trimmed) {
|
||||
return undefined;
|
||||
}
|
||||
const withoutControlChars = Array.from(trimmed)
|
||||
.filter((char) => {
|
||||
const code = char.charCodeAt(0);
|
||||
return !(
|
||||
code <= 0x08 ||
|
||||
code === 0x0b ||
|
||||
code === 0x0c ||
|
||||
(code >= 0x0e && code <= 0x1f) ||
|
||||
code === 0x7f
|
||||
);
|
||||
})
|
||||
.join("");
|
||||
const sanitized = withoutControlChars
|
||||
.replace(/[\r\n\t]+/g, " ")
|
||||
.replace(/\s+/g, " ")
|
||||
.trim();
|
||||
return sanitized.length > maxChars ? `${sanitized.slice(0, maxChars)}…` : sanitized;
|
||||
}
|
||||
|
||||
function replaceRequestIdPreview(
|
||||
text: string | undefined,
|
||||
requestId: string | undefined,
|
||||
): string | undefined {
|
||||
if (!text || !requestId) {
|
||||
return text;
|
||||
}
|
||||
return text.split(requestId).join(redactIdentifier(requestId, { len: 12 }));
|
||||
}
|
||||
|
||||
function redactObservationText(text: string | undefined): string | undefined {
|
||||
if (!text) {
|
||||
return text;
|
||||
}
|
||||
// Observation logs must stay redacted even when operators disable general-purpose
|
||||
// log redaction, otherwise raw provider payloads leak back into always-on logs.
|
||||
const configuredPatterns = resolveConfiguredRedactPatterns();
|
||||
return redactSensitiveText(text, {
|
||||
mode: "tools",
|
||||
patterns: [
|
||||
...getDefaultRedactPatterns(),
|
||||
...configuredPatterns,
|
||||
...OBSERVATION_EXTRA_REDACT_PATTERNS,
|
||||
],
|
||||
});
|
||||
}
|
||||
|
||||
function extractRequestId(text: string | undefined): string | undefined {
|
||||
if (!text) {
|
||||
return undefined;
|
||||
}
|
||||
const match = text.match(REQUEST_ID_RE);
|
||||
return match?.[1]?.trim() || undefined;
|
||||
}
|
||||
|
||||
function buildObservationFingerprint(params: {
|
||||
raw: string;
|
||||
requestId?: string;
|
||||
httpCode?: string;
|
||||
type?: string;
|
||||
message?: string;
|
||||
}): string | null {
|
||||
const boundedMessage =
|
||||
params.message && params.message.length > MAX_FINGERPRINT_MESSAGE_CHARS
|
||||
? params.message.slice(0, MAX_FINGERPRINT_MESSAGE_CHARS)
|
||||
: params.message;
|
||||
const structured =
|
||||
params.httpCode || params.type || boundedMessage
|
||||
? stableStringify({
|
||||
httpCode: params.httpCode,
|
||||
type: params.type,
|
||||
message: boundedMessage,
|
||||
})
|
||||
: null;
|
||||
if (structured) {
|
||||
return structured;
|
||||
}
|
||||
if (params.requestId) {
|
||||
return params.raw.split(params.requestId).join("<request_id>");
|
||||
}
|
||||
return getApiErrorPayloadFingerprint(params.raw);
|
||||
}
|
||||
|
||||
export function buildApiErrorObservationFields(rawError?: string): {
|
||||
rawErrorPreview?: string;
|
||||
rawErrorHash?: string;
|
||||
rawErrorFingerprint?: string;
|
||||
httpCode?: string;
|
||||
providerErrorType?: string;
|
||||
providerErrorMessagePreview?: string;
|
||||
requestIdHash?: string;
|
||||
} {
|
||||
const trimmed = boundObservationInput(rawError);
|
||||
if (!trimmed) {
|
||||
return {};
|
||||
}
|
||||
try {
|
||||
const parsed = parseApiErrorInfo(trimmed);
|
||||
const requestId = parsed?.requestId ?? extractRequestId(trimmed);
|
||||
const requestIdHash = requestId ? redactIdentifier(requestId, { len: 12 }) : undefined;
|
||||
const rawFingerprint = buildObservationFingerprint({
|
||||
raw: trimmed,
|
||||
requestId,
|
||||
httpCode: parsed?.httpCode,
|
||||
type: parsed?.type,
|
||||
message: parsed?.message,
|
||||
});
|
||||
const redactedRawPreview = replaceRequestIdPreview(redactObservationText(trimmed), requestId);
|
||||
const redactedProviderMessage = replaceRequestIdPreview(
|
||||
redactObservationText(parsed?.message),
|
||||
requestId,
|
||||
);
|
||||
|
||||
return {
|
||||
rawErrorPreview: truncateForObservation(redactedRawPreview, RAW_ERROR_PREVIEW_MAX_CHARS),
|
||||
rawErrorHash: redactIdentifier(trimmed, { len: 12 }),
|
||||
rawErrorFingerprint: rawFingerprint
|
||||
? redactIdentifier(rawFingerprint, { len: 12 })
|
||||
: undefined,
|
||||
httpCode: parsed?.httpCode,
|
||||
providerErrorType: parsed?.type,
|
||||
providerErrorMessagePreview: truncateForObservation(
|
||||
redactedProviderMessage,
|
||||
PROVIDER_ERROR_PREVIEW_MAX_CHARS,
|
||||
),
|
||||
requestIdHash,
|
||||
};
|
||||
} catch {
|
||||
return {};
|
||||
}
|
||||
}
|
||||
|
||||
export function buildTextObservationFields(text?: string): {
|
||||
textPreview?: string;
|
||||
textHash?: string;
|
||||
textFingerprint?: string;
|
||||
httpCode?: string;
|
||||
providerErrorType?: string;
|
||||
providerErrorMessagePreview?: string;
|
||||
requestIdHash?: string;
|
||||
} {
|
||||
const observed = buildApiErrorObservationFields(text);
|
||||
return {
|
||||
textPreview: observed.rawErrorPreview,
|
||||
textHash: observed.rawErrorHash,
|
||||
textFingerprint: observed.rawErrorFingerprint,
|
||||
httpCode: observed.httpCode,
|
||||
providerErrorType: observed.providerErrorType,
|
||||
providerErrorMessagePreview: observed.providerErrorMessagePreview,
|
||||
requestIdHash: observed.requestIdHash,
|
||||
};
|
||||
}
|
||||
@@ -61,6 +61,7 @@ import { resolveGlobalLane, resolveSessionLane } from "./lanes.js";
|
||||
import { log } from "./logger.js";
|
||||
import { resolveModel } from "./model.js";
|
||||
import { runEmbeddedAttempt } from "./run/attempt.js";
|
||||
import { createFailoverDecisionLogger } from "./run/failover-observation.js";
|
||||
import type { RunEmbeddedPiAgentParams } from "./run/params.js";
|
||||
import { buildEmbeddedRunPayloads } from "./run/payloads.js";
|
||||
import {
|
||||
@@ -1226,11 +1227,26 @@ export async function runEmbeddedPiAgent(
|
||||
reason: promptProfileFailureReason,
|
||||
});
|
||||
const promptFailoverFailure = isFailoverErrorMessage(errorText);
|
||||
// Capture the failing profile before auth-profile rotation mutates `lastProfileId`.
|
||||
const failedPromptProfileId = lastProfileId;
|
||||
const logPromptFailoverDecision = createFailoverDecisionLogger({
|
||||
stage: "prompt",
|
||||
runId: params.runId,
|
||||
rawError: errorText,
|
||||
failoverReason: promptFailoverReason,
|
||||
profileFailureReason: promptProfileFailureReason,
|
||||
provider,
|
||||
model: modelId,
|
||||
profileId: failedPromptProfileId,
|
||||
fallbackConfigured,
|
||||
aborted,
|
||||
});
|
||||
if (
|
||||
promptFailoverFailure &&
|
||||
promptFailoverReason !== "timeout" &&
|
||||
(await advanceAuthProfile())
|
||||
) {
|
||||
logPromptFailoverDecision("rotate_profile");
|
||||
await maybeBackoffBeforeOverloadFailover(promptFailoverReason);
|
||||
continue;
|
||||
}
|
||||
@@ -1249,15 +1265,20 @@ export async function runEmbeddedPiAgent(
|
||||
// are configured so outer model fallback can continue on overload,
|
||||
// rate-limit, auth, or billing failures.
|
||||
if (fallbackConfigured && promptFailoverFailure) {
|
||||
const status = resolveFailoverStatus(promptFailoverReason ?? "unknown");
|
||||
logPromptFailoverDecision("fallback_model", { status });
|
||||
await maybeBackoffBeforeOverloadFailover(promptFailoverReason);
|
||||
throw new FailoverError(errorText, {
|
||||
reason: promptFailoverReason ?? "unknown",
|
||||
provider,
|
||||
model: modelId,
|
||||
profileId: lastProfileId,
|
||||
status: resolveFailoverStatus(promptFailoverReason ?? "unknown"),
|
||||
status,
|
||||
});
|
||||
}
|
||||
if (promptFailoverFailure || promptFailoverReason) {
|
||||
logPromptFailoverDecision("surface_error");
|
||||
}
|
||||
throw promptError;
|
||||
}
|
||||
|
||||
@@ -1282,6 +1303,21 @@ export async function runEmbeddedPiAgent(
|
||||
resolveAuthProfileFailureReason(assistantFailoverReason);
|
||||
const cloudCodeAssistFormatError = attempt.cloudCodeAssistFormatError;
|
||||
const imageDimensionError = parseImageDimensionError(lastAssistant?.errorMessage ?? "");
|
||||
// Capture the failing profile before auth-profile rotation mutates `lastProfileId`.
|
||||
const failedAssistantProfileId = lastProfileId;
|
||||
const logAssistantFailoverDecision = createFailoverDecisionLogger({
|
||||
stage: "assistant",
|
||||
runId: params.runId,
|
||||
rawError: lastAssistant?.errorMessage?.trim(),
|
||||
failoverReason: assistantFailoverReason,
|
||||
profileFailureReason: assistantProfileFailureReason,
|
||||
provider: activeErrorContext.provider,
|
||||
model: activeErrorContext.model,
|
||||
profileId: failedAssistantProfileId,
|
||||
fallbackConfigured,
|
||||
timedOut,
|
||||
aborted,
|
||||
});
|
||||
|
||||
if (
|
||||
authFailure &&
|
||||
@@ -1339,6 +1375,7 @@ export async function runEmbeddedPiAgent(
|
||||
|
||||
const rotated = await advanceAuthProfile();
|
||||
if (rotated) {
|
||||
logAssistantFailoverDecision("rotate_profile");
|
||||
await maybeBackoffBeforeOverloadFailover(assistantFailoverReason);
|
||||
continue;
|
||||
}
|
||||
@@ -1371,6 +1408,7 @@ export async function runEmbeddedPiAgent(
|
||||
const status =
|
||||
resolveFailoverStatus(assistantFailoverReason ?? "unknown") ??
|
||||
(isTimeoutErrorMessage(message) ? 408 : undefined);
|
||||
logAssistantFailoverDecision("fallback_model", { status });
|
||||
throw new FailoverError(message, {
|
||||
reason: assistantFailoverReason ?? "unknown",
|
||||
provider: activeErrorContext.provider,
|
||||
@@ -1379,6 +1417,7 @@ export async function runEmbeddedPiAgent(
|
||||
status,
|
||||
});
|
||||
}
|
||||
logAssistantFailoverDecision("surface_error");
|
||||
}
|
||||
|
||||
const usage = toNormalizedUsage(usageAccumulator);
|
||||
|
||||
@@ -0,0 +1,48 @@
|
||||
import { describe, expect, it } from "vitest";
|
||||
import { normalizeFailoverDecisionObservationBase } from "./failover-observation.js";
|
||||
|
||||
describe("normalizeFailoverDecisionObservationBase", () => {
|
||||
it("fills timeout observation reasons for deadline timeouts without provider error text", () => {
|
||||
expect(
|
||||
normalizeFailoverDecisionObservationBase({
|
||||
stage: "assistant",
|
||||
runId: "run:timeout",
|
||||
rawError: "",
|
||||
failoverReason: null,
|
||||
profileFailureReason: null,
|
||||
provider: "openai",
|
||||
model: "mock-1",
|
||||
profileId: "openai:p1",
|
||||
fallbackConfigured: false,
|
||||
timedOut: true,
|
||||
aborted: false,
|
||||
}),
|
||||
).toMatchObject({
|
||||
failoverReason: "timeout",
|
||||
profileFailureReason: "timeout",
|
||||
timedOut: true,
|
||||
});
|
||||
});
|
||||
|
||||
it("preserves explicit failover reasons", () => {
|
||||
expect(
|
||||
normalizeFailoverDecisionObservationBase({
|
||||
stage: "assistant",
|
||||
runId: "run:overloaded",
|
||||
rawError: '{"error":{"type":"overloaded_error"}}',
|
||||
failoverReason: "overloaded",
|
||||
profileFailureReason: "overloaded",
|
||||
provider: "openai",
|
||||
model: "mock-1",
|
||||
profileId: "openai:p1",
|
||||
fallbackConfigured: true,
|
||||
timedOut: true,
|
||||
aborted: false,
|
||||
}),
|
||||
).toMatchObject({
|
||||
failoverReason: "overloaded",
|
||||
profileFailureReason: "overloaded",
|
||||
timedOut: true,
|
||||
});
|
||||
});
|
||||
});
|
||||
76
src/agents/pi-embedded-runner/run/failover-observation.ts
Normal file
76
src/agents/pi-embedded-runner/run/failover-observation.ts
Normal file
@@ -0,0 +1,76 @@
|
||||
import { redactIdentifier } from "../../../logging/redact-identifier.js";
|
||||
import type { AuthProfileFailureReason } from "../../auth-profiles.js";
|
||||
import {
|
||||
buildApiErrorObservationFields,
|
||||
sanitizeForConsole,
|
||||
} from "../../pi-embedded-error-observation.js";
|
||||
import type { FailoverReason } from "../../pi-embedded-helpers.js";
|
||||
import { log } from "../logger.js";
|
||||
|
||||
export type FailoverDecisionLoggerInput = {
|
||||
stage: "prompt" | "assistant";
|
||||
decision: "rotate_profile" | "fallback_model" | "surface_error";
|
||||
runId?: string;
|
||||
rawError?: string;
|
||||
failoverReason: FailoverReason | null;
|
||||
profileFailureReason?: AuthProfileFailureReason | null;
|
||||
provider: string;
|
||||
model: string;
|
||||
profileId?: string;
|
||||
fallbackConfigured: boolean;
|
||||
timedOut?: boolean;
|
||||
aborted?: boolean;
|
||||
status?: number;
|
||||
};
|
||||
|
||||
export type FailoverDecisionLoggerBase = Omit<FailoverDecisionLoggerInput, "decision" | "status">;
|
||||
|
||||
export function normalizeFailoverDecisionObservationBase(
|
||||
base: FailoverDecisionLoggerBase,
|
||||
): FailoverDecisionLoggerBase {
|
||||
return {
|
||||
...base,
|
||||
failoverReason: base.failoverReason ?? (base.timedOut ? "timeout" : null),
|
||||
profileFailureReason: base.profileFailureReason ?? (base.timedOut ? "timeout" : null),
|
||||
};
|
||||
}
|
||||
|
||||
export function createFailoverDecisionLogger(
|
||||
base: FailoverDecisionLoggerBase,
|
||||
): (
|
||||
decision: FailoverDecisionLoggerInput["decision"],
|
||||
extra?: Pick<FailoverDecisionLoggerInput, "status">,
|
||||
) => void {
|
||||
const normalizedBase = normalizeFailoverDecisionObservationBase(base);
|
||||
const safeProfileId = normalizedBase.profileId
|
||||
? redactIdentifier(normalizedBase.profileId, { len: 12 })
|
||||
: undefined;
|
||||
const safeRunId = sanitizeForConsole(normalizedBase.runId) ?? "-";
|
||||
const safeProvider = sanitizeForConsole(normalizedBase.provider) ?? "-";
|
||||
const safeModel = sanitizeForConsole(normalizedBase.model) ?? "-";
|
||||
const profileText = safeProfileId ?? "-";
|
||||
const reasonText = normalizedBase.failoverReason ?? "none";
|
||||
return (decision, extra) => {
|
||||
const observedError = buildApiErrorObservationFields(normalizedBase.rawError);
|
||||
log.warn("embedded run failover decision", {
|
||||
event: "embedded_run_failover_decision",
|
||||
tags: ["error_handling", "failover", normalizedBase.stage, decision],
|
||||
runId: normalizedBase.runId,
|
||||
stage: normalizedBase.stage,
|
||||
decision,
|
||||
failoverReason: normalizedBase.failoverReason,
|
||||
profileFailureReason: normalizedBase.profileFailureReason,
|
||||
provider: normalizedBase.provider,
|
||||
model: normalizedBase.model,
|
||||
profileId: safeProfileId,
|
||||
fallbackConfigured: normalizedBase.fallbackConfigured,
|
||||
timedOut: normalizedBase.timedOut,
|
||||
aborted: normalizedBase.aborted,
|
||||
status: extra?.status,
|
||||
...observedError,
|
||||
consoleMessage:
|
||||
`embedded run failover decision: runId=${safeRunId} stage=${normalizedBase.stage} decision=${decision} ` +
|
||||
`reason=${reasonText} provider=${safeProvider}/${safeModel} profile=${profileText}`,
|
||||
});
|
||||
};
|
||||
}
|
||||
@@ -54,8 +54,13 @@ describe("handleAgentEnd", () => {
|
||||
|
||||
const warn = vi.mocked(ctx.log.warn);
|
||||
expect(warn).toHaveBeenCalledTimes(1);
|
||||
expect(warn.mock.calls[0]?.[0]).toContain("runId=run-1");
|
||||
expect(warn.mock.calls[0]?.[0]).toContain("error=connection refused");
|
||||
expect(warn.mock.calls[0]?.[0]).toBe("embedded run agent end");
|
||||
expect(warn.mock.calls[0]?.[1]).toMatchObject({
|
||||
event: "embedded_run_agent_end",
|
||||
runId: "run-1",
|
||||
error: "connection refused",
|
||||
rawErrorPreview: "connection refused",
|
||||
});
|
||||
expect(onAgentEvent).toHaveBeenCalledWith({
|
||||
stream: "lifecycle",
|
||||
data: {
|
||||
@@ -65,6 +70,59 @@ describe("handleAgentEnd", () => {
|
||||
});
|
||||
});
|
||||
|
||||
it("attaches raw provider error metadata without changing the console message", () => {
|
||||
const ctx = createContext({
|
||||
role: "assistant",
|
||||
stopReason: "error",
|
||||
provider: "anthropic",
|
||||
model: "claude-test",
|
||||
errorMessage: '{"type":"error","error":{"type":"overloaded_error","message":"Overloaded"}}',
|
||||
content: [{ type: "text", text: "" }],
|
||||
});
|
||||
|
||||
handleAgentEnd(ctx);
|
||||
|
||||
const warn = vi.mocked(ctx.log.warn);
|
||||
expect(warn).toHaveBeenCalledTimes(1);
|
||||
expect(warn.mock.calls[0]?.[0]).toBe("embedded run agent end");
|
||||
expect(warn.mock.calls[0]?.[1]).toMatchObject({
|
||||
event: "embedded_run_agent_end",
|
||||
runId: "run-1",
|
||||
error: "The AI service is temporarily overloaded. Please try again in a moment.",
|
||||
failoverReason: "overloaded",
|
||||
providerErrorType: "overloaded_error",
|
||||
});
|
||||
});
|
||||
|
||||
it("redacts logged error text before emitting lifecycle events", () => {
|
||||
const onAgentEvent = vi.fn();
|
||||
const ctx = createContext(
|
||||
{
|
||||
role: "assistant",
|
||||
stopReason: "error",
|
||||
errorMessage: "x-api-key: sk-abcdefghijklmnopqrstuvwxyz123456",
|
||||
content: [{ type: "text", text: "" }],
|
||||
},
|
||||
{ onAgentEvent },
|
||||
);
|
||||
|
||||
handleAgentEnd(ctx);
|
||||
|
||||
const warn = vi.mocked(ctx.log.warn);
|
||||
expect(warn.mock.calls[0]?.[1]).toMatchObject({
|
||||
event: "embedded_run_agent_end",
|
||||
error: "x-api-key: ***",
|
||||
rawErrorPreview: "x-api-key: ***",
|
||||
});
|
||||
expect(onAgentEvent).toHaveBeenCalledWith({
|
||||
stream: "lifecycle",
|
||||
data: {
|
||||
phase: "error",
|
||||
error: "x-api-key: ***",
|
||||
},
|
||||
});
|
||||
});
|
||||
|
||||
it("keeps non-error run-end logging on debug only", () => {
|
||||
const ctx = createContext(undefined);
|
||||
|
||||
|
||||
@@ -1,6 +1,11 @@
|
||||
import { emitAgentEvent } from "../infra/agent-events.js";
|
||||
import { createInlineCodeState } from "../markdown/code-spans.js";
|
||||
import { formatAssistantErrorText } from "./pi-embedded-helpers.js";
|
||||
import {
|
||||
buildApiErrorObservationFields,
|
||||
buildTextObservationFields,
|
||||
sanitizeForConsole,
|
||||
} from "./pi-embedded-error-observation.js";
|
||||
import { classifyFailoverReason, formatAssistantErrorText } from "./pi-embedded-helpers.js";
|
||||
import type { EmbeddedPiSubscribeContext } from "./pi-embedded-subscribe.handlers.types.js";
|
||||
import { isAssistantMessage } from "./pi-embedded-utils.js";
|
||||
|
||||
@@ -36,16 +41,31 @@ export function handleAgentEnd(ctx: EmbeddedPiSubscribeContext) {
|
||||
provider: lastAssistant.provider,
|
||||
model: lastAssistant.model,
|
||||
});
|
||||
const rawError = lastAssistant.errorMessage?.trim();
|
||||
const failoverReason = classifyFailoverReason(rawError ?? "");
|
||||
const errorText = (friendlyError || lastAssistant.errorMessage || "LLM request failed.").trim();
|
||||
ctx.log.warn(
|
||||
`embedded run agent end: runId=${ctx.params.runId} isError=true error=${errorText}`,
|
||||
);
|
||||
const observedError = buildApiErrorObservationFields(rawError);
|
||||
const safeErrorText =
|
||||
buildTextObservationFields(errorText).textPreview ?? "LLM request failed.";
|
||||
const safeRunId = sanitizeForConsole(ctx.params.runId) ?? "-";
|
||||
ctx.log.warn("embedded run agent end", {
|
||||
event: "embedded_run_agent_end",
|
||||
tags: ["error_handling", "lifecycle", "agent_end", "assistant_error"],
|
||||
runId: ctx.params.runId,
|
||||
isError: true,
|
||||
error: safeErrorText,
|
||||
failoverReason,
|
||||
provider: lastAssistant.provider,
|
||||
model: lastAssistant.model,
|
||||
...observedError,
|
||||
consoleMessage: `embedded run agent end: runId=${safeRunId} isError=true error=${safeErrorText}`,
|
||||
});
|
||||
emitAgentEvent({
|
||||
runId: ctx.params.runId,
|
||||
stream: "lifecycle",
|
||||
data: {
|
||||
phase: "error",
|
||||
error: errorText,
|
||||
error: safeErrorText,
|
||||
endedAt: Date.now(),
|
||||
},
|
||||
});
|
||||
@@ -53,7 +73,7 @@ export function handleAgentEnd(ctx: EmbeddedPiSubscribeContext) {
|
||||
stream: "lifecycle",
|
||||
data: {
|
||||
phase: "error",
|
||||
error: errorText,
|
||||
error: safeErrorText,
|
||||
},
|
||||
});
|
||||
} else {
|
||||
|
||||
@@ -12,8 +12,8 @@ import type {
|
||||
import type { NormalizedUsage } from "./usage.js";
|
||||
|
||||
export type EmbeddedSubscribeLogger = {
|
||||
debug: (message: string) => void;
|
||||
warn: (message: string) => void;
|
||||
debug: (message: string, meta?: Record<string, unknown>) => void;
|
||||
warn: (message: string, meta?: Record<string, unknown>) => void;
|
||||
};
|
||||
|
||||
export type ToolErrorSummary = {
|
||||
|
||||
Reference in New Issue
Block a user