test(agents): tighten pi message typing and dedupe malformed tool-call cases

2026-05-10 09:42:44 +00:00 · 2026-03-03 01:42:26 +00:00
parent bd8c3230e8
commit 39520ad21b
7 changed files with 512 additions and 252 deletions
--- a/src/agents/pi-embedded-runner.sanitize-session-history.test.ts
+++ b/src/agents/pi-embedded-runner.sanitize-session-history.test.ts
@@ -1,4 +1,5 @@
 import type { AgentMessage } from "@mariozechner/pi-agent-core";
+import type { AssistantMessage, UserMessage, Usage } from "@mariozechner/pi-ai";
 import { beforeEach, describe, expect, it, vi } from "vitest";
 import * as helpers from "./pi-embedded-helpers.js";
 import {
@@ -23,6 +24,8 @@ vi.mock("./pi-embedded-helpers.js", async () => ({
 }));

 let sanitizeSessionHistory: SanitizeSessionHistoryFn;
+let testTimestamp = 1;
+const nextTimestamp = () => testTimestamp++;

 // We don't mock session-transcript-repair.js as it is a pure function and complicates mocking.
 // We rely on the real implementation which should pass through our simple messages.
@@ -58,23 +61,33 @@ describe("sanitizeSessionHistory", () => {

  const makeThinkingAndTextAssistantMessages = (
    thinkingSignature: string = "some_sig",
-  ): AgentMessage[] =>
-    [
-      { role: "user", content: "hello" },
-      {
-        role: "assistant",
-        content: [
-          {
-            type: "thinking",
-            thinking: "internal",
-            thinkingSignature,
-          },
-          { type: "text", text: "hi" },
-        ],
-      },
-    ] as unknown as AgentMessage[];
+  ): AgentMessage[] => {
+    const user: UserMessage = {
+      role: "user",
+      content: "hello",
+      timestamp: nextTimestamp(),
+    };
+    const assistant: AssistantMessage = {
+      role: "assistant",
+      content: [
+        {
+          type: "thinking",
+          thinking: "internal",
+          thinkingSignature,
+        },
+        { type: "text", text: "hi" },
+      ],
+      api: "openai-responses",
+      provider: "openai",
+      model: "gpt-5.2",
+      usage: makeUsage(0, 0, 0),
+      stopReason: "stop",
+      timestamp: nextTimestamp(),
+    };
+    return [user, assistant];
+  };

-  const makeUsage = (input: number, output: number, totalTokens: number) => ({
+  const makeUsage = (input: number, output: number, totalTokens: number): Usage => ({
    input,
    output,
    cacheRead: 0,
@@ -87,14 +100,40 @@ describe("sanitizeSessionHistory", () => {
    text: string;
    usage: ReturnType<typeof makeUsage>;
    timestamp?: number;
-  }) =>
-    ({
-      role: "assistant",
-      content: [{ type: "text", text: params.text }],
-      stopReason: "stop",
-      ...(typeof params.timestamp === "number" ? { timestamp: params.timestamp } : {}),
-      usage: params.usage,
-    }) as unknown as AgentMessage;
+  }): AssistantMessage => ({
+    role: "assistant",
+    content: [{ type: "text", text: params.text }],
+    api: "openai-responses",
+    provider: "openai",
+    model: "gpt-5.2",
+    stopReason: "stop",
+    timestamp: params.timestamp ?? nextTimestamp(),
+    usage: params.usage,
+  });
+
+  const makeUserMessage = (content: string, timestamp = nextTimestamp()): UserMessage => ({
+    role: "user",
+    content,
+    timestamp,
+  });
+
+  const makeAssistantMessage = (
+    content: AssistantMessage["content"],
+    params: {
+      stopReason?: AssistantMessage["stopReason"];
+      usage?: Usage;
+      timestamp?: number;
+    } = {},
+  ): AssistantMessage => ({
+    role: "assistant",
+    content,
+    api: "openai-responses",
+    provider: "openai",
+    model: "gpt-5.2",
+    usage: params.usage ?? makeUsage(0, 0, 0),
+    stopReason: params.stopReason ?? "stop",
+    timestamp: params.timestamp ?? nextTimestamp(),
+  });

  const makeCompactionSummaryMessage = (tokensBefore: number, timestamp: string) =>
    ({
@@ -123,6 +162,7 @@ describe("sanitizeSessionHistory", () => {
    >;

  beforeEach(async () => {
+    testTimestamp = 1;
    sanitizeSessionHistory = await loadSanitizeSessionHistoryWithCleanMocks();
  });

@@ -345,20 +385,19 @@ describe("sanitizeSessionHistory", () => {
  it("keeps reasoning-only assistant messages for openai-responses", async () => {
    setNonGoogleModelApi();

-    const messages = [
-      { role: "user", content: "hello" },
-      {
-        role: "assistant",
-        stopReason: "aborted",
-        content: [
+    const messages: AgentMessage[] = [
+      makeUserMessage("hello"),
+      makeAssistantMessage(
+        [
          {
            type: "thinking",
            thinking: "reasoning",
            thinkingSignature: "sig",
          },
        ],
-      },
-    ] as unknown as AgentMessage[];
+        { stopReason: "aborted" },
+      ),
+    ];

    const result = await sanitizeSessionHistory({
      messages,
@@ -373,12 +412,11 @@ describe("sanitizeSessionHistory", () => {
  });

  it("synthesizes missing tool results for openai-responses after repair", async () => {
-    const messages = [
-      {
-        role: "assistant",
-        content: [{ type: "toolCall", id: "call_1", name: "read", arguments: {} }],
-      },
-    ] as unknown as AgentMessage[];
+    const messages: AgentMessage[] = [
+      makeAssistantMessage([{ type: "toolCall", id: "call_1", name: "read", arguments: {} }], {
+        stopReason: "toolUse",
+      }),
+    ];

    const result = await sanitizeOpenAIHistory(messages);

@@ -389,49 +427,57 @@ describe("sanitizeSessionHistory", () => {
    expect(result[1]?.role).toBe("toolResult");
  });

-  it("drops malformed tool calls missing input or arguments", async () => {
-    const messages = [
-      {
-        role: "assistant",
-        content: [{ type: "toolCall", id: "call_1", name: "read" }],
-      },
-      { role: "user", content: "hello" },
-    ] as unknown as AgentMessage[];
-
-    const result = await sanitizeOpenAIHistory(messages, { sessionId: "test-session" });
-
-    expect(result.map((msg) => msg.role)).toEqual(["user"]);
-  });
-
-  it("drops malformed tool calls with invalid/overlong names", async () => {
-    const messages = [
-      {
-        role: "assistant",
-        content: [
+  it.each([
+    {
+      name: "missing input or arguments",
+      makeMessages: () =>
+        [
          {
-            type: "toolCall",
-            id: "call_bad",
-            name: 'toolu_01mvznfebfuu <|tool_call_argument_begin|> {"command"',
-            arguments: {},
-          },
-          { type: "toolCall", id: "call_long", name: `read_${"x".repeat(80)}`, arguments: {} },
-        ],
-      },
-      { role: "user", content: "hello" },
-    ] as unknown as AgentMessage[];
-
-    const result = await sanitizeOpenAIHistory(messages);
-
+            role: "assistant",
+            content: [{ type: "toolCall", id: "call_1", name: "read" }],
+          } as unknown as AgentMessage,
+          makeUserMessage("hello"),
+        ] as AgentMessage[],
+      overrides: { sessionId: "test-session" } as Partial<
+        Parameters<typeof sanitizeOpenAIHistory>[1]
+      >,
+    },
+    {
+      name: "invalid or overlong names",
+      makeMessages: () =>
+        [
+          makeAssistantMessage(
+            [
+              {
+                type: "toolCall",
+                id: "call_bad",
+                name: 'toolu_01mvznfebfuu <|tool_call_argument_begin|> {"command"',
+                arguments: {},
+              },
+              {
+                type: "toolCall",
+                id: "call_long",
+                name: `read_${"x".repeat(80)}`,
+                arguments: {},
+              },
+            ],
+            { stopReason: "toolUse" },
+          ),
+          makeUserMessage("hello"),
+        ] as AgentMessage[],
+      overrides: {} as Partial<Parameters<typeof sanitizeOpenAIHistory>[1]>,
+    },
+  ])("drops malformed tool calls: $name", async ({ makeMessages, overrides }) => {
+    const result = await sanitizeOpenAIHistory(makeMessages(), overrides);
    expect(result.map((msg) => msg.role)).toEqual(["user"]);
  });

  it("drops tool calls that are not in the allowed tool set", async () => {
-    const messages = [
-      {
-        role: "assistant",
-        content: [{ type: "toolCall", id: "call_1", name: "write", arguments: {} }],
-      },
-    ] as unknown as AgentMessage[];
+    const messages: AgentMessage[] = [
+      makeAssistantMessage([{ type: "toolCall", id: "call_1", name: "write", arguments: {} }], {
+        stopReason: "toolUse",
+      }),
+    ];

    const result = await sanitizeOpenAIHistory(messages, {
      allowedToolNames: ["read"],
@@ -478,25 +524,28 @@ describe("sanitizeSessionHistory", () => {
      }),
    ];
    const sessionManager = makeInMemorySessionManager(sessionEntries);
-    const messages = [
-      {
-        role: "assistant",
-        content: [{ type: "toolCall", id: "tool_abc123", name: "read", arguments: {} }],
-      },
+    const messages: AgentMessage[] = [
+      makeAssistantMessage([{ type: "toolCall", id: "tool_abc123", name: "read", arguments: {} }], {
+        stopReason: "toolUse",
+      }),
      {
        role: "toolResult",
        toolCallId: "tool_abc123",
        toolName: "read",
        content: [{ type: "text", text: "ok" }],
-      } as unknown as AgentMessage,
-      { role: "user", content: "continue" },
+        isError: false,
+        timestamp: nextTimestamp(),
+      },
+      makeUserMessage("continue"),
      {
        role: "toolResult",
        toolCallId: "tool_01VihkDRptyLpX1ApUPe7ooU",
        toolName: "read",
        content: [{ type: "text", text: "stale result" }],
-      } as unknown as AgentMessage,
-    ] as unknown as AgentMessage[];
+        isError: false,
+        timestamp: nextTimestamp(),
+      },
+    ];

    const result = await sanitizeSessionHistory({
      messages,
@@ -530,20 +579,17 @@ describe("sanitizeSessionHistory", () => {
  it("preserves assistant turn when all content is thinking blocks (github-copilot)", async () => {
    setNonGoogleModelApi();

-    const messages = [
-      { role: "user", content: "hello" },
-      {
-        role: "assistant",
-        content: [
-          {
-            type: "thinking",
-            thinking: "some reasoning",
-            thinkingSignature: "reasoning_text",
-          },
-        ],
-      },
-      { role: "user", content: "follow up" },
-    ] as unknown as AgentMessage[];
+    const messages: AgentMessage[] = [
+      makeUserMessage("hello"),
+      makeAssistantMessage([
+        {
+          type: "thinking",
+          thinking: "some reasoning",
+          thinkingSignature: "reasoning_text",
+        },
+      ]),
+      makeUserMessage("follow up"),
+    ];

    const result = await sanitizeGithubCopilotHistory({ messages });

@@ -556,21 +602,18 @@ describe("sanitizeSessionHistory", () => {
  it("preserves tool_use blocks when dropping thinking blocks (github-copilot)", async () => {
    setNonGoogleModelApi();

-    const messages = [
-      { role: "user", content: "read a file" },
-      {
-        role: "assistant",
-        content: [
-          {
-            type: "thinking",
-            thinking: "I should use the read tool",
-            thinkingSignature: "reasoning_text",
-          },
-          { type: "toolCall", id: "tool_123", name: "read", arguments: { path: "/tmp/test" } },
-          { type: "text", text: "Let me read that file." },
-        ],
-      },
-    ] as unknown as AgentMessage[];
+    const messages: AgentMessage[] = [
+      makeUserMessage("read a file"),
+      makeAssistantMessage([
+        {
+          type: "thinking",
+          thinking: "I should use the read tool",
+          thinkingSignature: "reasoning_text",
+        },
+        { type: "toolCall", id: "tool_123", name: "read", arguments: { path: "/tmp/test" } },
+        { type: "text", text: "Let me read that file." },
+      ]),
+    ];

    const result = await sanitizeGithubCopilotHistory({ messages });
    const types = getAssistantContentTypes(result);