From 0f15cfe21a1868f592fab17b0ce2a717dfcba993 Mon Sep 17 00:00:00 2001 From: Gustavo Madeira Santana Date: Fri, 13 Mar 2026 01:31:48 +0000 Subject: [PATCH] Agents: self-heal anthropic replay thinking history --- ...ed-runner.sanitize-session-history.test.ts | 33 ++++++++++++++++ src/agents/pi-embedded-runner/run/attempt.ts | 4 +- .../pi-embedded-runner/thinking.test.ts | 30 ++++++++++++++ src/agents/pi-embedded-runner/thinking.ts | 31 +++++++++++++-- .../pi-embedded-runner/transcript-hygiene.ts | 39 +++++++++++++------ src/agents/transcript-policy.ts | 2 + 6 files changed, 123 insertions(+), 16 deletions(-) diff --git a/src/agents/pi-embedded-runner.sanitize-session-history.test.ts b/src/agents/pi-embedded-runner.sanitize-session-history.test.ts index 229a8ec13ee..923ed50f7c2 100644 --- a/src/agents/pi-embedded-runner.sanitize-session-history.test.ts +++ b/src/agents/pi-embedded-runner.sanitize-session-history.test.ts @@ -843,6 +843,39 @@ describe("sanitizeSessionHistory", () => { expect(result[1]).toEqual(messages[1]); }); + it("preserves the latest anthropic assistant turn with thinking even after a later assistant error turn", async () => { + setNonGoogleModelApi(); + + const messages: AgentMessage[] = [ + makeUserMessage("hello"), + makeAssistantMessage( + [ + { type: "thinking", thinking: "latest reasoning", thinkingSignature: "sig" }, + { type: "text", text: "latest answer" }, + ], + { timestamp: nextTimestamp() }, + ), + makeUserMessage("follow up"), + makeAssistantMessage([{ type: "text", text: "" }], { + stopReason: "error", + timestamp: nextTimestamp(), + }), + makeUserMessage("try again"), + ]; + + const result = await sanitizeSessionHistory({ + messages, + modelApi: "anthropic-messages", + provider: "anthropic", + modelId: "claude-opus-4-6", + sessionManager: makeMockSessionManager(), + sessionId: TEST_SESSION_ID, + }); + + expect(result[1]).toEqual(messages[1]); + expect(result[3]).toEqual(messages[3]); + }); + it("does not drop thinking blocks for non-claude copilot models", async () => { setNonGoogleModelApi(); diff --git a/src/agents/pi-embedded-runner/run/attempt.ts b/src/agents/pi-embedded-runner/run/attempt.ts index edf976d16de..c01c881629e 100644 --- a/src/agents/pi-embedded-runner/run/attempt.ts +++ b/src/agents/pi-embedded-runner/run/attempt.ts @@ -1784,7 +1784,9 @@ export async function runEmbeddedAttempt( return inner(model, context, options); } const sanitized = dropThinkingBlocks(messages as unknown as AgentMessage[], { - preserveLatestAssistant: true, + preserveLatestAssistantWithThinking: + transcriptPolicy.preserveLatestAssistantWithThinking, + preserveLatestAssistant: !transcriptPolicy.preserveLatestAssistantWithThinking, }) as unknown; if (sanitized === messages) { return inner(model, context, options); diff --git a/src/agents/pi-embedded-runner/thinking.test.ts b/src/agents/pi-embedded-runner/thinking.test.ts index 5a42fb93112..2bb811cf781 100644 --- a/src/agents/pi-embedded-runner/thinking.test.ts +++ b/src/agents/pi-embedded-runner/thinking.test.ts @@ -88,4 +88,34 @@ describe("dropThinkingBlocks", () => { { type: "text", text: "latest answer" }, ]); }); + + it("preserves the latest assistant turn with thinking when a later assistant error turn exists", () => { + const messages: AgentMessage[] = [ + castAgentMessage({ role: "user", content: "hello" }), + castAgentMessage({ + role: "assistant", + content: [ + { type: "thinking", thinking: "latest reasoning", thinkingSignature: "sig" }, + { type: "text", text: "latest answer" }, + ], + }), + castAgentMessage({ role: "user", content: "follow up" }), + castAgentMessage({ + role: "assistant", + content: [{ type: "text", text: "" }], + }), + ]; + + const result = dropThinkingBlocks(messages, { + preserveLatestAssistantWithThinking: true, + }); + const reasoningAssistant = result[1] as Extract; + const trailingAssistant = result[3] as Extract; + + expect(reasoningAssistant.content).toEqual([ + { type: "thinking", thinking: "latest reasoning", thinkingSignature: "sig" }, + { type: "text", text: "latest answer" }, + ]); + expect(trailingAssistant.content).toEqual([{ type: "text", text: "" }]); + }); }); diff --git a/src/agents/pi-embedded-runner/thinking.ts b/src/agents/pi-embedded-runner/thinking.ts index 8fcd95db999..f3bb2780b35 100644 --- a/src/agents/pi-embedded-runner/thinking.ts +++ b/src/agents/pi-embedded-runner/thinking.ts @@ -21,6 +21,27 @@ function findLatestAssistantMessageIndex(messages: AgentMessage[]): number { return -1; } +function hasThinkingLikeBlock(block: unknown): boolean { + if (!block || typeof block !== "object") { + return false; + } + const type = (block as { type?: unknown }).type; + return type === "thinking" || type === "redacted_thinking"; +} + +function findLatestAssistantMessageWithThinkingIndex(messages: AgentMessage[]): number { + for (let i = messages.length - 1; i >= 0; i -= 1) { + const message = messages[i]; + if (!isAssistantMessageWithContent(message)) { + continue; + } + if (message.content.some(hasThinkingLikeBlock)) { + return i; + } + } + return -1; +} + /** * Strip all `type: "thinking"` content blocks from assistant messages. * @@ -37,13 +58,15 @@ function findLatestAssistantMessageIndex(messages: AgentMessage[]): number { */ export function dropThinkingBlocks( messages: AgentMessage[], - opts?: { preserveLatestAssistant?: boolean }, + opts?: { preserveLatestAssistant?: boolean; preserveLatestAssistantWithThinking?: boolean }, ): AgentMessage[] { let touched = false; const out: AgentMessage[] = []; - const latestAssistantIndex = opts?.preserveLatestAssistant - ? findLatestAssistantMessageIndex(messages) - : -1; + const latestAssistantIndex = opts?.preserveLatestAssistantWithThinking + ? findLatestAssistantMessageWithThinkingIndex(messages) + : opts?.preserveLatestAssistant + ? findLatestAssistantMessageIndex(messages) + : -1; for (let index = 0; index < messages.length; index += 1) { const msg = messages[index]; if (!isAssistantMessageWithContent(msg)) { diff --git a/src/agents/pi-embedded-runner/transcript-hygiene.ts b/src/agents/pi-embedded-runner/transcript-hygiene.ts index 9e7d406e1a5..d37d13bd351 100644 --- a/src/agents/pi-embedded-runner/transcript-hygiene.ts +++ b/src/agents/pi-embedded-runner/transcript-hygiene.ts @@ -49,9 +49,10 @@ function hasThinkingLikeBlock(block: unknown): block is { type: "thinking" | "re return type === "thinking" || type === "redacted_thinking"; } -function findLatestAssistantMessageWithThinking( - messages: AgentMessage[], -): Extract | null { +function findLatestAssistantMessageWithThinking(messages: AgentMessage[]): { + index: number; + message: Extract; +} | null { for (let i = messages.length - 1; i >= 0; i -= 1) { const message = messages[i]; if (!message || typeof message !== "object" || message.role !== "assistant") { @@ -59,7 +60,7 @@ function findLatestAssistantMessageWithThinking( } const assistant = message; if (Array.isArray(assistant.content) && assistant.content.some(hasThinkingLikeBlock)) { - return assistant; + return { index: i, message: assistant }; } } return null; @@ -74,16 +75,30 @@ function restoreLatestAssistantMessageWithThinking( return sanitizedMessages; } + const alignedCandidate = sanitizedMessages[originalLatestAssistant.index]; + if ( + alignedCandidate && + typeof alignedCandidate === "object" && + alignedCandidate.role === "assistant" + ) { + if (alignedCandidate === originalLatestAssistant.message) { + return sanitizedMessages; + } + const restored = [...sanitizedMessages]; + restored[originalLatestAssistant.index] = originalLatestAssistant.message; + return restored; + } + for (let i = sanitizedMessages.length - 1; i >= 0; i -= 1) { const candidate = sanitizedMessages[i]; if (!candidate || typeof candidate !== "object" || candidate.role !== "assistant") { continue; } - if (candidate === originalLatestAssistant) { + if (candidate === originalLatestAssistant.message) { return sanitizedMessages; } const restored = [...sanitizedMessages]; - restored[i] = originalLatestAssistant; + restored[i] = originalLatestAssistant.message; return restored; } @@ -408,7 +423,10 @@ export async function sanitizeSessionHistory(params: { }, ); const droppedThinking = policy.dropThinkingBlocks - ? dropThinkingBlocks(sanitizedImages, { preserveLatestAssistant: true }) + ? dropThinkingBlocks(sanitizedImages, { + preserveLatestAssistantWithThinking: policy.preserveLatestAssistantWithThinking, + preserveLatestAssistant: !policy.preserveLatestAssistantWithThinking, + }) : sanitizedImages; const sanitizedToolCalls = sanitizeToolCallInputs(droppedThinking, { allowedToolNames: params.allowedToolNames, @@ -438,10 +456,9 @@ export async function sanitizeSessionHistory(params: { downgradeOpenAIReasoningBlocks(sanitizedCompactionUsage), ) : sanitizedCompactionUsage; - const stableLatestAssistant = restoreLatestAssistantMessageWithThinking( - params.messages, - sanitizedOpenAI, - ); + const stableLatestAssistant = policy.preserveLatestAssistantWithThinking + ? restoreLatestAssistantMessageWithThinking(params.messages, sanitizedOpenAI) + : sanitizedOpenAI; if (hasSnapshot && (!priorSnapshot || modelChanged)) { appendModelSnapshot(params.sessionManager, { diff --git a/src/agents/transcript-policy.ts b/src/agents/transcript-policy.ts index d6d9ec5916a..6820bc2631d 100644 --- a/src/agents/transcript-policy.ts +++ b/src/agents/transcript-policy.ts @@ -25,6 +25,7 @@ export type TranscriptPolicy = { }; sanitizeThinkingSignatures: boolean; dropThinkingBlocks: boolean; + preserveLatestAssistantWithThinking: boolean; applyGoogleTurnOrdering: boolean; validateGeminiTurns: boolean; validateAnthropicTurns: boolean; @@ -116,6 +117,7 @@ export function resolveTranscriptPolicy(params: { sanitizeThoughtSignatures: isOpenAi ? undefined : sanitizeThoughtSignatures, sanitizeThinkingSignatures: false, dropThinkingBlocks, + preserveLatestAssistantWithThinking: isAnthropic, applyGoogleTurnOrdering: !isOpenAi && (isGoogle || isStrictOpenAiCompatible), validateGeminiTurns: !isOpenAi && (isGoogle || isStrictOpenAiCompatible), validateAnthropicTurns: !isOpenAi && (isAnthropic || isStrictOpenAiCompatible),