From 92199ac129931804e0fee7cab6d1ee262389fc03 Mon Sep 17 00:00:00 2001 From: Charles Dusek <38732970+cgdusek@users.noreply.github.com> Date: Sun, 1 Mar 2026 21:45:12 -0600 Subject: [PATCH] fix(agents): unblock gpt-5.3-codex API-key routing and replay (#31083) * fix(agents): unblock gpt-5.3-codex API-key replay path * fix(agents): scope OpenAI replay ID rewrites per turn * test: fix nodes-tool mock typing and reformat telegram accounts --- src/agents/model-fallback.test.ts | 4 +- src/agents/model-selection.test.ts | 8 +- src/agents/model-selection.ts | 17 --- ...ded-helpers.sanitizeuserfacingtext.test.ts | 120 +++++++++++++++ src/agents/pi-embedded-helpers.ts | 5 +- src/agents/pi-embedded-helpers/openai.ts | 140 ++++++++++++++++++ ...runner.openai-tool-id-preservation.test.ts | 50 ++++++- src/agents/pi-embedded-runner/google.ts | 5 +- src/agents/pi-embedded-runner/run/attempt.ts | 24 +++ 9 files changed, 347 insertions(+), 26 deletions(-) diff --git a/src/agents/model-fallback.test.ts b/src/agents/model-fallback.test.ts index 59fd9121dd6..0b527392ef1 100644 --- a/src/agents/model-fallback.test.ts +++ b/src/agents/model-fallback.test.ts @@ -174,7 +174,7 @@ async function expectSkippedUnavailableProvider(params: { } describe("runWithModelFallback", () => { - it("normalizes openai gpt-5.3 codex to openai-codex before running", async () => { + it("keeps openai gpt-5.3 codex on the openai provider before running", async () => { const cfg = makeCfg(); const run = vi.fn().mockResolvedValueOnce("ok"); @@ -187,7 +187,7 @@ describe("runWithModelFallback", () => { expect(result.result).toBe("ok"); expect(run).toHaveBeenCalledTimes(1); - expect(run).toHaveBeenCalledWith("openai-codex", "gpt-5.3-codex"); + expect(run).toHaveBeenCalledWith("openai", "gpt-5.3-codex"); }); it("falls back on unrecognized errors when candidates remain", async () => { diff --git a/src/agents/model-selection.test.ts b/src/agents/model-selection.test.ts index 7a09a478899..f97baebc956 100644 --- a/src/agents/model-selection.test.ts +++ b/src/agents/model-selection.test.ts @@ -70,17 +70,17 @@ describe("model-selection", () => { }); }); - it("normalizes openai gpt-5.3 codex refs to openai-codex provider", () => { + it("keeps openai gpt-5.3 codex refs on the openai provider", () => { expect(parseModelRef("openai/gpt-5.3-codex", "anthropic")).toEqual({ - provider: "openai-codex", + provider: "openai", model: "gpt-5.3-codex", }); expect(parseModelRef("gpt-5.3-codex", "openai")).toEqual({ - provider: "openai-codex", + provider: "openai", model: "gpt-5.3-codex", }); expect(parseModelRef("openai/gpt-5.3-codex-codex", "anthropic")).toEqual({ - provider: "openai-codex", + provider: "openai", model: "gpt-5.3-codex-codex", }); }); diff --git a/src/agents/model-selection.ts b/src/agents/model-selection.ts index 0123579298b..09a9ca90547 100644 --- a/src/agents/model-selection.ts +++ b/src/agents/model-selection.ts @@ -27,7 +27,6 @@ const ANTHROPIC_MODEL_ALIASES: Record = { "sonnet-4.6": "claude-sonnet-4-6", "sonnet-4.5": "claude-sonnet-4-5", }; -const OPENAI_CODEX_OAUTH_MODEL_PREFIXES = ["gpt-5.3-codex"] as const; function normalizeAliasKey(value: string): string { return value.trim().toLowerCase(); @@ -133,25 +132,9 @@ function normalizeProviderModelId(provider: string, model: string): string { return model; } -function shouldUseOpenAICodexProvider(provider: string, model: string): boolean { - if (provider !== "openai") { - return false; - } - const normalized = model.trim().toLowerCase(); - if (!normalized) { - return false; - } - return OPENAI_CODEX_OAUTH_MODEL_PREFIXES.some( - (prefix) => normalized === prefix || normalized.startsWith(`${prefix}-`), - ); -} - export function normalizeModelRef(provider: string, model: string): ModelRef { const normalizedProvider = normalizeProviderId(provider); const normalizedModel = normalizeProviderModelId(normalizedProvider, model.trim()); - if (shouldUseOpenAICodexProvider(normalizedProvider, normalizedModel)) { - return { provider: "openai-codex", model: normalizedModel }; - } return { provider: normalizedProvider, model: normalizedModel }; } diff --git a/src/agents/pi-embedded-helpers.sanitizeuserfacingtext.test.ts b/src/agents/pi-embedded-helpers.sanitizeuserfacingtext.test.ts index f29e2ebd63a..e3061518f2d 100644 --- a/src/agents/pi-embedded-helpers.sanitizeuserfacingtext.test.ts +++ b/src/agents/pi-embedded-helpers.sanitizeuserfacingtext.test.ts @@ -1,5 +1,6 @@ import { describe, expect, it } from "vitest"; import { + downgradeOpenAIFunctionCallReasoningPairs, downgradeOpenAIReasoningBlocks, isMessagingToolDuplicate, normalizeTextForComparison, @@ -318,6 +319,125 @@ describe("downgradeOpenAIReasoningBlocks", () => { }); }); +describe("downgradeOpenAIFunctionCallReasoningPairs", () => { + it("strips fc ids when reasoning cannot be replayed", () => { + const input = [ + { + role: "assistant", + content: [{ type: "toolCall", id: "call_123|fc_123", name: "read", arguments: {} }], + }, + { + role: "toolResult", + toolCallId: "call_123|fc_123", + toolName: "read", + content: [{ type: "text", text: "ok" }], + }, + ]; + + // oxlint-disable-next-line typescript/no-explicit-any + expect(downgradeOpenAIFunctionCallReasoningPairs(input as any)).toEqual([ + { + role: "assistant", + content: [{ type: "toolCall", id: "call_123", name: "read", arguments: {} }], + }, + { + role: "toolResult", + toolCallId: "call_123", + toolName: "read", + content: [{ type: "text", text: "ok" }], + }, + ]); + }); + + it("keeps fc ids when replayable reasoning is present", () => { + const input = [ + { + role: "assistant", + content: [ + { + type: "thinking", + thinking: "internal", + thinkingSignature: JSON.stringify({ id: "rs_123", type: "reasoning" }), + }, + { type: "toolCall", id: "call_123|fc_123", name: "read", arguments: {} }, + ], + }, + { + role: "toolResult", + toolCallId: "call_123|fc_123", + toolName: "read", + content: [{ type: "text", text: "ok" }], + }, + ]; + + // oxlint-disable-next-line typescript/no-explicit-any + expect(downgradeOpenAIFunctionCallReasoningPairs(input as any)).toEqual(input); + }); + + it("only rewrites tool results paired to the downgraded assistant turn", () => { + const input = [ + { + role: "assistant", + content: [{ type: "toolCall", id: "call_123|fc_123", name: "read", arguments: {} }], + }, + { + role: "toolResult", + toolCallId: "call_123|fc_123", + toolName: "read", + content: [{ type: "text", text: "turn1" }], + }, + { + role: "assistant", + content: [ + { + type: "thinking", + thinking: "internal", + thinkingSignature: JSON.stringify({ id: "rs_123", type: "reasoning" }), + }, + { type: "toolCall", id: "call_123|fc_123", name: "read", arguments: {} }, + ], + }, + { + role: "toolResult", + toolCallId: "call_123|fc_123", + toolName: "read", + content: [{ type: "text", text: "turn2" }], + }, + ]; + + // oxlint-disable-next-line typescript/no-explicit-any + expect(downgradeOpenAIFunctionCallReasoningPairs(input as any)).toEqual([ + { + role: "assistant", + content: [{ type: "toolCall", id: "call_123", name: "read", arguments: {} }], + }, + { + role: "toolResult", + toolCallId: "call_123", + toolName: "read", + content: [{ type: "text", text: "turn1" }], + }, + { + role: "assistant", + content: [ + { + type: "thinking", + thinking: "internal", + thinkingSignature: JSON.stringify({ id: "rs_123", type: "reasoning" }), + }, + { type: "toolCall", id: "call_123|fc_123", name: "read", arguments: {} }, + ], + }, + { + role: "toolResult", + toolCallId: "call_123|fc_123", + toolName: "read", + content: [{ type: "text", text: "turn2" }], + }, + ]); + }); +}); + describe("normalizeTextForComparison", () => { it.each([ { input: "Hello World", expected: "hello world" }, diff --git a/src/agents/pi-embedded-helpers.ts b/src/agents/pi-embedded-helpers.ts index dd10fdca3d1..7c48a346e4d 100644 --- a/src/agents/pi-embedded-helpers.ts +++ b/src/agents/pi-embedded-helpers.ts @@ -42,7 +42,10 @@ export { } from "./pi-embedded-helpers/errors.js"; export { isGoogleModelApi, sanitizeGoogleTurnOrdering } from "./pi-embedded-helpers/google.js"; -export { downgradeOpenAIReasoningBlocks } from "./pi-embedded-helpers/openai.js"; +export { + downgradeOpenAIFunctionCallReasoningPairs, + downgradeOpenAIReasoningBlocks, +} from "./pi-embedded-helpers/openai.js"; export { isEmptyAssistantMessageContent, sanitizeSessionMessagesImages, diff --git a/src/agents/pi-embedded-helpers/openai.ts b/src/agents/pi-embedded-helpers/openai.ts index 8564e0d2d7e..17cfa45e354 100644 --- a/src/agents/pi-embedded-helpers/openai.ts +++ b/src/agents/pi-embedded-helpers/openai.ts @@ -6,6 +6,11 @@ type OpenAIThinkingBlock = { thinkingSignature?: unknown; }; +type OpenAIToolCallBlock = { + type?: unknown; + id?: unknown; +}; + type OpenAIReasoningSignature = { id: string; type: string; @@ -59,6 +64,141 @@ function hasFollowingNonThinkingBlock( return false; } +function splitOpenAIFunctionCallPairing(id: string): { + callId: string; + itemId?: string; +} { + const separator = id.indexOf("|"); + if (separator <= 0 || separator >= id.length - 1) { + return { callId: id }; + } + return { + callId: id.slice(0, separator), + itemId: id.slice(separator + 1), + }; +} + +function isOpenAIToolCallType(type: unknown): boolean { + return type === "toolCall" || type === "toolUse" || type === "functionCall"; +} + +/** + * OpenAI can reject replayed `function_call` items with an `fc_*` id if the + * matching `reasoning` item is absent in the same assistant turn. + * + * When that pairing is missing, strip the `|fc_*` suffix from tool call ids so + * pi-ai omits `function_call.id` on replay. + */ +export function downgradeOpenAIFunctionCallReasoningPairs( + messages: AgentMessage[], +): AgentMessage[] { + let changed = false; + const rewrittenMessages: AgentMessage[] = []; + let pendingRewrittenIds: Map | null = null; + + for (const msg of messages) { + if (!msg || typeof msg !== "object") { + pendingRewrittenIds = null; + rewrittenMessages.push(msg); + continue; + } + + const role = (msg as { role?: unknown }).role; + if (role === "assistant") { + const assistantMsg = msg as Extract; + if (!Array.isArray(assistantMsg.content)) { + pendingRewrittenIds = null; + rewrittenMessages.push(msg); + continue; + } + + const localRewrittenIds = new Map(); + let seenReplayableReasoning = false; + let assistantChanged = false; + const nextContent = assistantMsg.content.map((block) => { + if (!block || typeof block !== "object") { + return block; + } + + const thinkingBlock = block as OpenAIThinkingBlock; + if ( + thinkingBlock.type === "thinking" && + parseOpenAIReasoningSignature(thinkingBlock.thinkingSignature) + ) { + seenReplayableReasoning = true; + return block; + } + + const toolCallBlock = block as OpenAIToolCallBlock; + if (!isOpenAIToolCallType(toolCallBlock.type) || typeof toolCallBlock.id !== "string") { + return block; + } + + const pairing = splitOpenAIFunctionCallPairing(toolCallBlock.id); + if (seenReplayableReasoning || !pairing.itemId || !pairing.itemId.startsWith("fc_")) { + return block; + } + + assistantChanged = true; + localRewrittenIds.set(toolCallBlock.id, pairing.callId); + return { + ...(block as unknown as Record), + id: pairing.callId, + } as typeof block; + }); + + pendingRewrittenIds = localRewrittenIds.size > 0 ? localRewrittenIds : null; + if (!assistantChanged) { + rewrittenMessages.push(msg); + continue; + } + changed = true; + rewrittenMessages.push({ ...assistantMsg, content: nextContent } as AgentMessage); + continue; + } + + if (role === "toolResult" && pendingRewrittenIds && pendingRewrittenIds.size > 0) { + const toolResult = msg as Extract & { + toolUseId?: unknown; + }; + let toolResultChanged = false; + const updates: Record = {}; + + if (typeof toolResult.toolCallId === "string") { + const nextToolCallId = pendingRewrittenIds.get(toolResult.toolCallId); + if (nextToolCallId && nextToolCallId !== toolResult.toolCallId) { + updates.toolCallId = nextToolCallId; + toolResultChanged = true; + } + } + + if (typeof toolResult.toolUseId === "string") { + const nextToolUseId = pendingRewrittenIds.get(toolResult.toolUseId); + if (nextToolUseId && nextToolUseId !== toolResult.toolUseId) { + updates.toolUseId = nextToolUseId; + toolResultChanged = true; + } + } + + if (!toolResultChanged) { + rewrittenMessages.push(msg); + continue; + } + changed = true; + rewrittenMessages.push({ + ...toolResult, + ...updates, + } as AgentMessage); + continue; + } + + pendingRewrittenIds = null; + rewrittenMessages.push(msg); + } + + return changed ? rewrittenMessages : messages; +} + /** * OpenAI Responses API can reject transcripts that contain a standalone `reasoning` item id * without the required following item. diff --git a/src/agents/pi-embedded-runner.openai-tool-id-preservation.test.ts b/src/agents/pi-embedded-runner.openai-tool-id-preservation.test.ts index 5d3a86eec2f..ee714903022 100644 --- a/src/agents/pi-embedded-runner.openai-tool-id-preservation.test.ts +++ b/src/agents/pi-embedded-runner.openai-tool-id-preservation.test.ts @@ -7,7 +7,7 @@ import { import { sanitizeSessionHistory } from "./pi-embedded-runner/google.js"; describe("sanitizeSessionHistory openai tool id preservation", () => { - it("keeps canonical call_id|fc_id pairings for same-model openai replay", async () => { + it("strips fc ids when replayable reasoning metadata is missing", async () => { const sessionEntries = [ makeModelSnapshotEntry({ provider: "openai", @@ -40,6 +40,54 @@ describe("sanitizeSessionHistory openai tool id preservation", () => { sessionId: "test-session", }); + const assistant = result[0] as { content?: Array<{ type?: string; id?: string }> }; + const toolCall = assistant.content?.find((block) => block.type === "toolCall"); + expect(toolCall?.id).toBe("call_123"); + + const toolResult = result[1] as { toolCallId?: string }; + expect(toolResult.toolCallId).toBe("call_123"); + }); + + it("keeps canonical call_id|fc_id pairings when replayable reasoning is present", async () => { + const sessionEntries = [ + makeModelSnapshotEntry({ + provider: "openai", + modelApi: "openai-responses", + modelId: "gpt-5.2-codex", + }), + ]; + const sessionManager = makeInMemorySessionManager(sessionEntries); + + const messages: AgentMessage[] = [ + { + role: "assistant", + content: [ + { + type: "thinking", + thinking: "internal reasoning", + thinkingSignature: JSON.stringify({ id: "rs_123", type: "reasoning" }), + }, + { type: "toolCall", id: "call_123|fc_123", name: "noop", arguments: {} }, + ], + } as unknown as AgentMessage, + { + role: "toolResult", + toolCallId: "call_123|fc_123", + toolName: "noop", + content: [{ type: "text", text: "ok" }], + isError: false, + } as unknown as AgentMessage, + ]; + + const result = await sanitizeSessionHistory({ + messages, + modelApi: "openai-responses", + provider: "openai", + modelId: "gpt-5.2-codex", + sessionManager, + sessionId: "test-session", + }); + const assistant = result[0] as { content?: Array<{ type?: string; id?: string }> }; const toolCall = assistant.content?.find((block) => block.type === "toolCall"); expect(toolCall?.id).toBe("call_123|fc_123"); diff --git a/src/agents/pi-embedded-runner/google.ts b/src/agents/pi-embedded-runner/google.ts index 429c1ddd9d9..9657c26686d 100644 --- a/src/agents/pi-embedded-runner/google.ts +++ b/src/agents/pi-embedded-runner/google.ts @@ -10,6 +10,7 @@ import { } from "../../sessions/input-provenance.js"; import { resolveImageSanitizationLimits } from "../image-sanitization.js"; import { + downgradeOpenAIFunctionCallReasoningPairs, downgradeOpenAIReasoningBlocks, isCompactionFailureError, isGoogleModelApi, @@ -464,7 +465,9 @@ export async function sanitizeSessionHistory(params: { }) : false; const sanitizedOpenAI = isOpenAIResponsesApi - ? downgradeOpenAIReasoningBlocks(sanitizedCompactionUsage) + ? downgradeOpenAIFunctionCallReasoningPairs( + downgradeOpenAIReasoningBlocks(sanitizedCompactionUsage), + ) : sanitizedCompactionUsage; if (hasSnapshot && (!priorSnapshot || modelChanged)) { diff --git a/src/agents/pi-embedded-runner/run/attempt.ts b/src/agents/pi-embedded-runner/run/attempt.ts index 45b98fc03c5..a4fca4ca59c 100644 --- a/src/agents/pi-embedded-runner/run/attempt.ts +++ b/src/agents/pi-embedded-runner/run/attempt.ts @@ -45,6 +45,7 @@ import { createOllamaStreamFn, OLLAMA_NATIVE_BASE_URL } from "../../ollama-strea import { createOpenAIWebSocketStreamFn, releaseWsSession } from "../../openai-ws-stream.js"; import { resolveOwnerDisplaySetting } from "../../owner-display.js"; import { + downgradeOpenAIFunctionCallReasoningPairs, isCloudCodeAssistFormatError, resolveBootstrapMaxChars, resolveBootstrapTotalMaxChars, @@ -1032,6 +1033,29 @@ export async function runEmbeddedAttempt( }; } + if ( + params.model.api === "openai-responses" || + params.model.api === "openai-codex-responses" + ) { + const inner = activeSession.agent.streamFn; + activeSession.agent.streamFn = (model, context, options) => { + const ctx = context as unknown as { messages?: unknown }; + const messages = ctx?.messages; + if (!Array.isArray(messages)) { + return inner(model, context, options); + } + const sanitized = downgradeOpenAIFunctionCallReasoningPairs(messages as AgentMessage[]); + if (sanitized === messages) { + return inner(model, context, options); + } + const nextContext = { + ...(context as unknown as Record), + messages: sanitized, + } as unknown; + return inner(model, nextContext as typeof context, options); + }; + } + // Some models emit tool names with surrounding whitespace (e.g. " read "). // pi-agent-core dispatches tool calls with exact string matching, so normalize // names on the live response stream before tool execution.