mirror of
https://github.com/openclaw/openclaw.git
synced 2026-04-19 07:37:27 +00:00
🤖 fix: preserve openai reasoning replay ids (#17792)
What: - disable tool-call id sanitization for OpenAI/OpenAI Codex transcript policy - gate id sanitization in image sanitizer to full mode only - keep orphan reasoning downgrade scoped to OpenAI model-switch replay path - update transcript policy, session-history, sanitizer, and reasoning replay tests - document OpenAI model-switch orphan-reasoning cleanup behavior in transcript hygiene reference Why: - OpenAI Responses replay depends on canonical call_id|fc_id pairings for reasoning followers - strict id rewriting in OpenAI path breaks follower matching and triggers rs_* orphan 400s - limiting scope avoids behavior expansion while fixing the identified regression Tests: - pnpm vitest run src/agents/transcript-policy.test.ts src/agents/pi-embedded-runner.sanitize-session-history.test.ts src/agents/openai-responses.reasoning-replay.test.ts - pnpm vitest run --config vitest.e2e.config.ts src/agents/transcript-policy.e2e.test.ts src/agents/pi-embedded-runner.sanitize-session-history.e2e.test.ts src/agents/pi-embedded-helpers.sanitize-session-messages-images.removes-empty-assistant-text-blocks-but-preserves.e2e.test.ts src/agents/pi-embedded-helpers.sanitizeuserfacingtext.e2e.test.ts - pnpm lint - pnpm format:check - pnpm check:docs - pnpm test (fails in current macOS bash 3.2 env at test/git-hooks-pre-commit.integration.test.ts: mapfile not found)
This commit is contained in:
@@ -115,6 +115,15 @@ describe("openai-responses reasoning replay", () => {
|
|||||||
expect(types).toContain("reasoning");
|
expect(types).toContain("reasoning");
|
||||||
expect(types).toContain("function_call");
|
expect(types).toContain("function_call");
|
||||||
expect(types.indexOf("reasoning")).toBeLessThan(types.indexOf("function_call"));
|
expect(types.indexOf("reasoning")).toBeLessThan(types.indexOf("function_call"));
|
||||||
|
|
||||||
|
const functionCall = input.find(
|
||||||
|
(item) =>
|
||||||
|
item &&
|
||||||
|
typeof item === "object" &&
|
||||||
|
(item as Record<string, unknown>).type === "function_call",
|
||||||
|
) as Record<string, unknown> | undefined;
|
||||||
|
expect(functionCall?.call_id).toBe("call_123");
|
||||||
|
expect(functionCall?.id).toBe("fc_123");
|
||||||
});
|
});
|
||||||
|
|
||||||
it("still replays reasoning when paired with an assistant message", async () => {
|
it("still replays reasoning when paired with an assistant message", async () => {
|
||||||
|
|||||||
@@ -160,6 +160,35 @@ describe("sanitizeSessionMessagesImages", () => {
|
|||||||
const toolResult = out[1] as { toolUseId?: string };
|
const toolResult = out[1] as { toolUseId?: string };
|
||||||
expect(toolResult.toolUseId).toBe("callabcitem123");
|
expect(toolResult.toolUseId).toBe("callabcitem123");
|
||||||
});
|
});
|
||||||
|
|
||||||
|
it("does not sanitize tool IDs in images-only mode", async () => {
|
||||||
|
const input = [
|
||||||
|
{
|
||||||
|
role: "assistant",
|
||||||
|
content: [{ type: "toolCall", id: "call_123|fc_456", name: "read", arguments: {} }],
|
||||||
|
},
|
||||||
|
{
|
||||||
|
role: "toolResult",
|
||||||
|
toolCallId: "call_123|fc_456",
|
||||||
|
toolName: "read",
|
||||||
|
content: [{ type: "text", text: "ok" }],
|
||||||
|
isError: false,
|
||||||
|
},
|
||||||
|
] satisfies AgentMessage[];
|
||||||
|
|
||||||
|
const out = await sanitizeSessionMessagesImages(input, "test", {
|
||||||
|
sanitizeMode: "images-only",
|
||||||
|
sanitizeToolCallIds: true,
|
||||||
|
toolCallIdMode: "strict",
|
||||||
|
});
|
||||||
|
|
||||||
|
const assistant = out[0] as unknown as { content?: Array<{ type?: string; id?: string }> };
|
||||||
|
const toolCall = assistant.content?.find((b) => b.type === "toolCall");
|
||||||
|
expect(toolCall?.id).toBe("call_123|fc_456");
|
||||||
|
|
||||||
|
const toolResult = out[1] as unknown as { toolCallId?: string };
|
||||||
|
expect(toolResult.toolCallId).toBe("call_123|fc_456");
|
||||||
|
});
|
||||||
it("filters whitespace-only assistant text blocks", async () => {
|
it("filters whitespace-only assistant text blocks", async () => {
|
||||||
const input = [
|
const input = [
|
||||||
{
|
{
|
||||||
|
|||||||
@@ -294,6 +294,27 @@ describe("downgradeOpenAIReasoningBlocks", () => {
|
|||||||
// oxlint-disable-next-line typescript/no-explicit-any
|
// oxlint-disable-next-line typescript/no-explicit-any
|
||||||
expect(downgradeOpenAIReasoningBlocks(input as any)).toEqual(input);
|
expect(downgradeOpenAIReasoningBlocks(input as any)).toEqual(input);
|
||||||
});
|
});
|
||||||
|
|
||||||
|
it("is idempotent for orphaned reasoning cleanup", () => {
|
||||||
|
const input = [
|
||||||
|
{
|
||||||
|
role: "assistant",
|
||||||
|
content: [
|
||||||
|
{
|
||||||
|
type: "thinking",
|
||||||
|
thinkingSignature: JSON.stringify({ id: "rs_orphan", type: "reasoning" }),
|
||||||
|
},
|
||||||
|
],
|
||||||
|
},
|
||||||
|
{ role: "user", content: "next" },
|
||||||
|
];
|
||||||
|
|
||||||
|
// oxlint-disable-next-line typescript/no-explicit-any
|
||||||
|
const once = downgradeOpenAIReasoningBlocks(input as any);
|
||||||
|
// oxlint-disable-next-line typescript/no-explicit-any
|
||||||
|
const twice = downgradeOpenAIReasoningBlocks(once as any);
|
||||||
|
expect(twice).toEqual(once);
|
||||||
|
});
|
||||||
});
|
});
|
||||||
|
|
||||||
describe("normalizeTextForComparison", () => {
|
describe("normalizeTextForComparison", () => {
|
||||||
|
|||||||
@@ -51,9 +51,10 @@ export async function sanitizeSessionMessagesImages(
|
|||||||
const allowNonImageSanitization = sanitizeMode === "full";
|
const allowNonImageSanitization = sanitizeMode === "full";
|
||||||
// We sanitize historical session messages because Anthropic can reject a request
|
// We sanitize historical session messages because Anthropic can reject a request
|
||||||
// if the transcript contains oversized base64 images (see MAX_IMAGE_DIMENSION_PX).
|
// if the transcript contains oversized base64 images (see MAX_IMAGE_DIMENSION_PX).
|
||||||
const sanitizedIds = options?.sanitizeToolCallIds
|
const sanitizedIds =
|
||||||
? sanitizeToolCallIdsForCloudCodeAssist(messages, options.toolCallIdMode)
|
allowNonImageSanitization && options?.sanitizeToolCallIds
|
||||||
: messages;
|
? sanitizeToolCallIdsForCloudCodeAssist(messages, options.toolCallIdMode)
|
||||||
|
: messages;
|
||||||
const out: AgentMessage[] = [];
|
const out: AgentMessage[] = [];
|
||||||
for (const msg of sanitizedIds) {
|
for (const msg of sanitizedIds) {
|
||||||
if (!msg || typeof msg !== "object") {
|
if (!msg || typeof msg !== "object") {
|
||||||
|
|||||||
@@ -0,0 +1,50 @@
|
|||||||
|
import type { AgentMessage } from "@mariozechner/pi-agent-core";
|
||||||
|
import { describe, expect, it } from "vitest";
|
||||||
|
import {
|
||||||
|
makeInMemorySessionManager,
|
||||||
|
makeModelSnapshotEntry,
|
||||||
|
} from "./pi-embedded-runner.sanitize-session-history.test-harness.js";
|
||||||
|
import { sanitizeSessionHistory } from "./pi-embedded-runner/google.js";
|
||||||
|
|
||||||
|
describe("sanitizeSessionHistory openai tool id preservation", () => {
|
||||||
|
it("keeps canonical call_id|fc_id pairings for same-model openai replay", async () => {
|
||||||
|
const sessionEntries = [
|
||||||
|
makeModelSnapshotEntry({
|
||||||
|
provider: "openai",
|
||||||
|
modelApi: "openai-responses",
|
||||||
|
modelId: "gpt-5.2-codex",
|
||||||
|
}),
|
||||||
|
];
|
||||||
|
const sessionManager = makeInMemorySessionManager(sessionEntries);
|
||||||
|
|
||||||
|
const messages: AgentMessage[] = [
|
||||||
|
{
|
||||||
|
role: "assistant",
|
||||||
|
content: [{ type: "toolCall", id: "call_123|fc_123", name: "noop", arguments: {} }],
|
||||||
|
},
|
||||||
|
{
|
||||||
|
role: "toolResult",
|
||||||
|
toolCallId: "call_123|fc_123",
|
||||||
|
toolName: "noop",
|
||||||
|
content: [{ type: "text", text: "ok" }],
|
||||||
|
isError: false,
|
||||||
|
} as unknown as AgentMessage,
|
||||||
|
];
|
||||||
|
|
||||||
|
const result = await sanitizeSessionHistory({
|
||||||
|
messages,
|
||||||
|
modelApi: "openai-responses",
|
||||||
|
provider: "openai",
|
||||||
|
modelId: "gpt-5.2-codex",
|
||||||
|
sessionManager,
|
||||||
|
sessionId: "test-session",
|
||||||
|
});
|
||||||
|
|
||||||
|
const assistant = result[0] as { content?: Array<{ type?: string; id?: string }> };
|
||||||
|
const toolCall = assistant.content?.find((block) => block.type === "toolCall");
|
||||||
|
expect(toolCall?.id).toBe("call_123|fc_123");
|
||||||
|
|
||||||
|
const toolResult = result[1] as { toolCallId?: string };
|
||||||
|
expect(toolResult.toolCallId).toBe("call_123|fc_123");
|
||||||
|
});
|
||||||
|
});
|
||||||
@@ -52,7 +52,7 @@ describe("sanitizeSessionHistory e2e smoke", () => {
|
|||||||
);
|
);
|
||||||
});
|
});
|
||||||
|
|
||||||
it("applies strict tool-call sanitization for openai-responses", async () => {
|
it("keeps images-only sanitize policy without tool-call id rewriting for openai-responses", async () => {
|
||||||
vi.mocked(helpers.isGoogleModelApi).mockReturnValue(false);
|
vi.mocked(helpers.isGoogleModelApi).mockReturnValue(false);
|
||||||
|
|
||||||
await sanitizeSessionHistory({
|
await sanitizeSessionHistory({
|
||||||
@@ -68,8 +68,7 @@ describe("sanitizeSessionHistory e2e smoke", () => {
|
|||||||
"session:history",
|
"session:history",
|
||||||
expect.objectContaining({
|
expect.objectContaining({
|
||||||
sanitizeMode: "images-only",
|
sanitizeMode: "images-only",
|
||||||
sanitizeToolCallIds: true,
|
sanitizeToolCallIds: false,
|
||||||
toolCallIdMode: "strict",
|
|
||||||
}),
|
}),
|
||||||
);
|
);
|
||||||
});
|
});
|
||||||
|
|||||||
@@ -98,7 +98,7 @@ describe("sanitizeSessionHistory", () => {
|
|||||||
);
|
);
|
||||||
});
|
});
|
||||||
|
|
||||||
it("sanitizes tool call ids for openai-responses while keeping images-only mode", async () => {
|
it("does not sanitize tool call ids for openai-responses", async () => {
|
||||||
vi.mocked(helpers.isGoogleModelApi).mockReturnValue(false);
|
vi.mocked(helpers.isGoogleModelApi).mockReturnValue(false);
|
||||||
|
|
||||||
await sanitizeSessionHistory({
|
await sanitizeSessionHistory({
|
||||||
@@ -112,11 +112,7 @@ describe("sanitizeSessionHistory", () => {
|
|||||||
expect(helpers.sanitizeSessionMessagesImages).toHaveBeenCalledWith(
|
expect(helpers.sanitizeSessionMessagesImages).toHaveBeenCalledWith(
|
||||||
mockMessages,
|
mockMessages,
|
||||||
"session:history",
|
"session:history",
|
||||||
expect.objectContaining({
|
expect.objectContaining({ sanitizeMode: "images-only", sanitizeToolCallIds: false }),
|
||||||
sanitizeMode: "images-only",
|
|
||||||
sanitizeToolCallIds: true,
|
|
||||||
toolCallIdMode: "strict",
|
|
||||||
}),
|
|
||||||
);
|
);
|
||||||
});
|
});
|
||||||
|
|
||||||
@@ -243,7 +239,7 @@ describe("sanitizeSessionHistory", () => {
|
|||||||
expect(result).toEqual(messages);
|
expect(result).toEqual(messages);
|
||||||
});
|
});
|
||||||
|
|
||||||
it("downgrades openai reasoning only when the model changes", async () => {
|
it("downgrades openai reasoning when the model changes", async () => {
|
||||||
const sessionEntries = [
|
const sessionEntries = [
|
||||||
makeModelSnapshotEntry({
|
makeModelSnapshotEntry({
|
||||||
provider: "anthropic",
|
provider: "anthropic",
|
||||||
|
|||||||
@@ -2,15 +2,15 @@ import { describe, expect, it } from "vitest";
|
|||||||
import { resolveTranscriptPolicy } from "./transcript-policy.js";
|
import { resolveTranscriptPolicy } from "./transcript-policy.js";
|
||||||
|
|
||||||
describe("resolveTranscriptPolicy e2e smoke", () => {
|
describe("resolveTranscriptPolicy e2e smoke", () => {
|
||||||
it("uses strict tool-call sanitization for OpenAI models", () => {
|
it("uses images-only sanitization without tool-call id rewriting for OpenAI models", () => {
|
||||||
const policy = resolveTranscriptPolicy({
|
const policy = resolveTranscriptPolicy({
|
||||||
provider: "openai",
|
provider: "openai",
|
||||||
modelId: "gpt-4o",
|
modelId: "gpt-4o",
|
||||||
modelApi: "openai",
|
modelApi: "openai",
|
||||||
});
|
});
|
||||||
expect(policy.sanitizeMode).toBe("images-only");
|
expect(policy.sanitizeMode).toBe("images-only");
|
||||||
expect(policy.sanitizeToolCallIds).toBe(true);
|
expect(policy.sanitizeToolCallIds).toBe(false);
|
||||||
expect(policy.toolCallIdMode).toBe("strict");
|
expect(policy.toolCallIdMode).toBeUndefined();
|
||||||
});
|
});
|
||||||
|
|
||||||
it("uses strict9 tool-call sanitization for Mistral-family models", () => {
|
it("uses strict9 tool-call sanitization for Mistral-family models", () => {
|
||||||
|
|||||||
@@ -30,13 +30,13 @@ describe("resolveTranscriptPolicy", () => {
|
|||||||
expect(policy.toolCallIdMode).toBe("strict9");
|
expect(policy.toolCallIdMode).toBe("strict9");
|
||||||
});
|
});
|
||||||
|
|
||||||
it("enables sanitizeToolCallIds for OpenAI provider", () => {
|
it("disables sanitizeToolCallIds for OpenAI provider", () => {
|
||||||
const policy = resolveTranscriptPolicy({
|
const policy = resolveTranscriptPolicy({
|
||||||
provider: "openai",
|
provider: "openai",
|
||||||
modelId: "gpt-4o",
|
modelId: "gpt-4o",
|
||||||
modelApi: "openai",
|
modelApi: "openai",
|
||||||
});
|
});
|
||||||
expect(policy.sanitizeToolCallIds).toBe(true);
|
expect(policy.sanitizeToolCallIds).toBe(false);
|
||||||
expect(policy.toolCallIdMode).toBe("strict");
|
expect(policy.toolCallIdMode).toBeUndefined();
|
||||||
});
|
});
|
||||||
});
|
});
|
||||||
|
|||||||
@@ -95,7 +95,7 @@ export function resolveTranscriptPolicy(params: {
|
|||||||
|
|
||||||
const needsNonImageSanitize = isGoogle || isAnthropic || isMistral || isOpenRouterGemini;
|
const needsNonImageSanitize = isGoogle || isAnthropic || isMistral || isOpenRouterGemini;
|
||||||
|
|
||||||
const sanitizeToolCallIds = isGoogle || isMistral || isAnthropic || isOpenAi;
|
const sanitizeToolCallIds = isGoogle || isMistral || isAnthropic;
|
||||||
const toolCallIdMode: ToolCallIdMode | undefined = isMistral
|
const toolCallIdMode: ToolCallIdMode | undefined = isMistral
|
||||||
? "strict9"
|
? "strict9"
|
||||||
: sanitizeToolCallIds
|
: sanitizeToolCallIds
|
||||||
@@ -109,7 +109,7 @@ export function resolveTranscriptPolicy(params: {
|
|||||||
|
|
||||||
return {
|
return {
|
||||||
sanitizeMode: isOpenAi ? "images-only" : needsNonImageSanitize ? "full" : "images-only",
|
sanitizeMode: isOpenAi ? "images-only" : needsNonImageSanitize ? "full" : "images-only",
|
||||||
sanitizeToolCallIds,
|
sanitizeToolCallIds: !isOpenAi && sanitizeToolCallIds,
|
||||||
toolCallIdMode,
|
toolCallIdMode,
|
||||||
repairToolUseResultPairing: !isOpenAi && repairToolUseResultPairing,
|
repairToolUseResultPairing: !isOpenAi && repairToolUseResultPairing,
|
||||||
preserveSignatures: isAntigravityClaudeModel,
|
preserveSignatures: isAntigravityClaudeModel,
|
||||||
|
|||||||
Reference in New Issue
Block a user