From c343132dbb3a926a8cca6e4556452ee698b4bdc9 Mon Sep 17 00:00:00 2001 From: Peter Steinberger Date: Sun, 22 Feb 2026 11:29:10 +0000 Subject: [PATCH] fix(agents): harden bash tool and reply directive handling --- src/agents/bash-tools.process.ts | 87 +++++++------------ .../session-transcript-repair.e2e.test.ts | 37 ++++---- .../reply/get-reply-directives-apply.ts | 54 ++++++------ 3 files changed, 75 insertions(+), 103 deletions(-) diff --git a/src/agents/bash-tools.process.ts b/src/agents/bash-tools.process.ts index dbdb6f9976a..25248bf2218 100644 --- a/src/agents/bash-tools.process.ts +++ b/src/agents/bash-tools.process.ts @@ -278,6 +278,18 @@ export function createProcessTool( }); }; + const runningSessionResult = ( + session: ProcessSession, + text: string, + ): AgentToolResult => ({ + content: [{ type: "text", text }], + details: { + status: "running", + sessionId: params.sessionId, + name: deriveSessionName(session.command), + }, + }); + switch (params.action) { case "poll": { if (!scopedSession) { @@ -452,21 +464,12 @@ export function createProcessTool( if (params.eof) { resolved.stdin.end(); } - return { - content: [ - { - type: "text", - text: `Wrote ${(params.data ?? "").length} bytes to session ${params.sessionId}${ - params.eof ? " (stdin closed)" : "" - }.`, - }, - ], - details: { - status: "running", - sessionId: params.sessionId, - name: deriveSessionName(resolved.session.command), - }, - }; + return runningSessionResult( + resolved.session, + `Wrote ${(params.data ?? "").length} bytes to session ${params.sessionId}${ + params.eof ? " (stdin closed)" : "" + }.`, + ); } case "send-keys": { @@ -491,21 +494,11 @@ export function createProcessTool( }; } await writeToStdin(resolved.stdin, data); - return { - content: [ - { - type: "text", - text: - `Sent ${data.length} bytes to session ${params.sessionId}.` + - (warnings.length ? `\nWarnings:\n- ${warnings.join("\n- ")}` : ""), - }, - ], - details: { - status: "running", - sessionId: params.sessionId, - name: deriveSessionName(resolved.session.command), - }, - }; + return runningSessionResult( + resolved.session, + `Sent ${data.length} bytes to session ${params.sessionId}.` + + (warnings.length ? `\nWarnings:\n- ${warnings.join("\n- ")}` : ""), + ); } case "submit": { @@ -514,19 +507,10 @@ export function createProcessTool( return resolved.result; } await writeToStdin(resolved.stdin, "\r"); - return { - content: [ - { - type: "text", - text: `Submitted session ${params.sessionId} (sent CR).`, - }, - ], - details: { - status: "running", - sessionId: params.sessionId, - name: deriveSessionName(resolved.session.command), - }, - }; + return runningSessionResult( + resolved.session, + `Submitted session ${params.sessionId} (sent CR).`, + ); } case "paste": { @@ -547,19 +531,10 @@ export function createProcessTool( }; } await writeToStdin(resolved.stdin, payload); - return { - content: [ - { - type: "text", - text: `Pasted ${params.text?.length ?? 0} chars to session ${params.sessionId}.`, - }, - ], - details: { - status: "running", - sessionId: params.sessionId, - name: deriveSessionName(resolved.session.command), - }, - }; + return runningSessionResult( + resolved.session, + `Pasted ${params.text?.length ?? 0} chars to session ${params.sessionId}.`, + ); } case "kill": { diff --git a/src/agents/session-transcript-repair.e2e.test.ts b/src/agents/session-transcript-repair.e2e.test.ts index 68797cfeedc..e1422f7ea40 100644 --- a/src/agents/session-transcript-repair.e2e.test.ts +++ b/src/agents/session-transcript-repair.e2e.test.ts @@ -6,6 +6,19 @@ import { repairToolUseResultPairing, } from "./session-transcript-repair.js"; +const TOOL_CALL_BLOCK_TYPES = new Set(["toolCall", "toolUse", "functionCall"]); + +function getAssistantToolCallBlocks(messages: AgentMessage[]) { + const assistant = messages[0] as Extract | undefined; + if (!assistant || !Array.isArray(assistant.content)) { + return [] as Array<{ type?: unknown; id?: unknown; name?: unknown }>; + } + return assistant.content.filter((block) => { + const type = (block as { type?: unknown }).type; + return typeof type === "string" && TOOL_CALL_BLOCK_TYPES.has(type); + }) as Array<{ type?: unknown; id?: unknown; name?: unknown }>; +} + describe("sanitizeToolUseResultPairing", () => { const buildDuplicateToolResultInput = (opts?: { middleMessage?: unknown; @@ -229,13 +242,7 @@ describe("sanitizeToolCallInputs", () => { ] as unknown as AgentMessage[]; const out = sanitizeToolCallInputs(input); - const assistant = out[0] as Extract; - const toolCalls = Array.isArray(assistant.content) - ? assistant.content.filter((block) => { - const type = (block as { type?: unknown }).type; - return typeof type === "string" && ["toolCall", "toolUse", "functionCall"].includes(type); - }) - : []; + const toolCalls = getAssistantToolCallBlocks(out); expect(toolCalls).toHaveLength(1); expect((toolCalls[0] as { id?: unknown }).id).toBe("call_ok"); @@ -264,13 +271,7 @@ describe("sanitizeToolCallInputs", () => { ] as unknown as AgentMessage[]; const out = sanitizeToolCallInputs(input); - const assistant = out[0] as Extract; - const toolCalls = Array.isArray(assistant.content) - ? assistant.content.filter((block) => { - const type = (block as { type?: unknown }).type; - return typeof type === "string" && ["toolCall", "toolUse", "functionCall"].includes(type); - }) - : []; + const toolCalls = getAssistantToolCallBlocks(out); expect(toolCalls).toHaveLength(1); expect((toolCalls[0] as { name?: unknown }).name).toBe("read"); @@ -288,13 +289,7 @@ describe("sanitizeToolCallInputs", () => { ] as unknown as AgentMessage[]; const out = sanitizeToolCallInputs(input, { allowedToolNames: ["read"] }); - const assistant = out[0] as Extract; - const toolCalls = Array.isArray(assistant.content) - ? assistant.content.filter((block) => { - const type = (block as { type?: unknown }).type; - return typeof type === "string" && ["toolCall", "toolUse", "functionCall"].includes(type); - }) - : []; + const toolCalls = getAssistantToolCallBlocks(out); expect(toolCalls).toHaveLength(1); expect((toolCalls[0] as { name?: unknown }).name).toBe("read"); diff --git a/src/auto-reply/reply/get-reply-directives-apply.ts b/src/auto-reply/reply/get-reply-directives-apply.ts index fe42a2ca9e0..4232171a82b 100644 --- a/src/auto-reply/reply/get-reply-directives-apply.ts +++ b/src/auto-reply/reply/get-reply-directives-apply.ts @@ -102,6 +102,31 @@ export async function applyInlineDirectiveOverrides(params: { let { directives } = params; let { provider, model } = params; let { contextTokens } = params; + const directiveModelState = { + allowedModelKeys: modelState.allowedModelKeys, + allowedModelCatalog: modelState.allowedModelCatalog, + resetModelOverride: modelState.resetModelOverride, + }; + const createDirectiveHandlingBase = () => ({ + cfg, + directives, + sessionEntry, + sessionStore, + sessionKey, + storePath, + elevatedEnabled, + elevatedAllowed, + elevatedFailures, + messageProviderKey, + defaultProvider, + defaultModel, + aliasIndex, + ...directiveModelState, + provider, + model, + initialModelLabel, + formatModelSwitchEvent, + }); let directiveAck: ReplyPayload | undefined; @@ -135,26 +160,7 @@ export async function applyInlineDirectiveOverrides(params: { }); const currentThinkLevel = resolvedDefaultThinkLevel; const directiveReply = await handleDirectiveOnly({ - cfg, - directives, - sessionEntry, - sessionStore, - sessionKey, - storePath, - elevatedEnabled, - elevatedAllowed, - elevatedFailures, - messageProviderKey, - defaultProvider, - defaultModel, - aliasIndex, - allowedModelKeys: modelState.allowedModelKeys, - allowedModelCatalog: modelState.allowedModelCatalog, - resetModelOverride: modelState.resetModelOverride, - provider, - model, - initialModelLabel, - formatModelSwitchEvent, + ...createDirectiveHandlingBase(), currentThinkLevel, currentVerboseLevel, currentReasoningLevel, @@ -222,9 +228,7 @@ export async function applyInlineDirectiveOverrides(params: { defaultProvider, defaultModel, aliasIndex, - allowedModelKeys: modelState.allowedModelKeys, - allowedModelCatalog: modelState.allowedModelCatalog, - resetModelOverride: modelState.resetModelOverride, + ...directiveModelState, provider, model, initialModelLabel, @@ -232,9 +236,7 @@ export async function applyInlineDirectiveOverrides(params: { agentCfg, modelState: { resolveDefaultThinkingLevel: modelState.resolveDefaultThinkingLevel, - allowedModelKeys: modelState.allowedModelKeys, - allowedModelCatalog: modelState.allowedModelCatalog, - resetModelOverride: modelState.resetModelOverride, + ...directiveModelState, }, }); directiveAck = fastLane.directiveAck;