fix(agents): harden bash tool and reply directive handling

This commit is contained in:
Peter Steinberger
2026-02-22 11:29:10 +00:00
parent a4981efae3
commit c343132dbb
3 changed files with 75 additions and 103 deletions

View File

@@ -278,6 +278,18 @@ export function createProcessTool(
}); });
}; };
const runningSessionResult = (
session: ProcessSession,
text: string,
): AgentToolResult<unknown> => ({
content: [{ type: "text", text }],
details: {
status: "running",
sessionId: params.sessionId,
name: deriveSessionName(session.command),
},
});
switch (params.action) { switch (params.action) {
case "poll": { case "poll": {
if (!scopedSession) { if (!scopedSession) {
@@ -452,21 +464,12 @@ export function createProcessTool(
if (params.eof) { if (params.eof) {
resolved.stdin.end(); resolved.stdin.end();
} }
return { return runningSessionResult(
content: [ resolved.session,
{ `Wrote ${(params.data ?? "").length} bytes to session ${params.sessionId}${
type: "text", params.eof ? " (stdin closed)" : ""
text: `Wrote ${(params.data ?? "").length} bytes to session ${params.sessionId}${ }.`,
params.eof ? " (stdin closed)" : "" );
}.`,
},
],
details: {
status: "running",
sessionId: params.sessionId,
name: deriveSessionName(resolved.session.command),
},
};
} }
case "send-keys": { case "send-keys": {
@@ -491,21 +494,11 @@ export function createProcessTool(
}; };
} }
await writeToStdin(resolved.stdin, data); await writeToStdin(resolved.stdin, data);
return { return runningSessionResult(
content: [ resolved.session,
{ `Sent ${data.length} bytes to session ${params.sessionId}.` +
type: "text", (warnings.length ? `\nWarnings:\n- ${warnings.join("\n- ")}` : ""),
text: );
`Sent ${data.length} bytes to session ${params.sessionId}.` +
(warnings.length ? `\nWarnings:\n- ${warnings.join("\n- ")}` : ""),
},
],
details: {
status: "running",
sessionId: params.sessionId,
name: deriveSessionName(resolved.session.command),
},
};
} }
case "submit": { case "submit": {
@@ -514,19 +507,10 @@ export function createProcessTool(
return resolved.result; return resolved.result;
} }
await writeToStdin(resolved.stdin, "\r"); await writeToStdin(resolved.stdin, "\r");
return { return runningSessionResult(
content: [ resolved.session,
{ `Submitted session ${params.sessionId} (sent CR).`,
type: "text", );
text: `Submitted session ${params.sessionId} (sent CR).`,
},
],
details: {
status: "running",
sessionId: params.sessionId,
name: deriveSessionName(resolved.session.command),
},
};
} }
case "paste": { case "paste": {
@@ -547,19 +531,10 @@ export function createProcessTool(
}; };
} }
await writeToStdin(resolved.stdin, payload); await writeToStdin(resolved.stdin, payload);
return { return runningSessionResult(
content: [ resolved.session,
{ `Pasted ${params.text?.length ?? 0} chars to session ${params.sessionId}.`,
type: "text", );
text: `Pasted ${params.text?.length ?? 0} chars to session ${params.sessionId}.`,
},
],
details: {
status: "running",
sessionId: params.sessionId,
name: deriveSessionName(resolved.session.command),
},
};
} }
case "kill": { case "kill": {

View File

@@ -6,6 +6,19 @@ import {
repairToolUseResultPairing, repairToolUseResultPairing,
} from "./session-transcript-repair.js"; } from "./session-transcript-repair.js";
const TOOL_CALL_BLOCK_TYPES = new Set(["toolCall", "toolUse", "functionCall"]);
function getAssistantToolCallBlocks(messages: AgentMessage[]) {
const assistant = messages[0] as Extract<AgentMessage, { role: "assistant" }> | undefined;
if (!assistant || !Array.isArray(assistant.content)) {
return [] as Array<{ type?: unknown; id?: unknown; name?: unknown }>;
}
return assistant.content.filter((block) => {
const type = (block as { type?: unknown }).type;
return typeof type === "string" && TOOL_CALL_BLOCK_TYPES.has(type);
}) as Array<{ type?: unknown; id?: unknown; name?: unknown }>;
}
describe("sanitizeToolUseResultPairing", () => { describe("sanitizeToolUseResultPairing", () => {
const buildDuplicateToolResultInput = (opts?: { const buildDuplicateToolResultInput = (opts?: {
middleMessage?: unknown; middleMessage?: unknown;
@@ -229,13 +242,7 @@ describe("sanitizeToolCallInputs", () => {
] as unknown as AgentMessage[]; ] as unknown as AgentMessage[];
const out = sanitizeToolCallInputs(input); const out = sanitizeToolCallInputs(input);
const assistant = out[0] as Extract<AgentMessage, { role: "assistant" }>; const toolCalls = getAssistantToolCallBlocks(out);
const toolCalls = Array.isArray(assistant.content)
? assistant.content.filter((block) => {
const type = (block as { type?: unknown }).type;
return typeof type === "string" && ["toolCall", "toolUse", "functionCall"].includes(type);
})
: [];
expect(toolCalls).toHaveLength(1); expect(toolCalls).toHaveLength(1);
expect((toolCalls[0] as { id?: unknown }).id).toBe("call_ok"); expect((toolCalls[0] as { id?: unknown }).id).toBe("call_ok");
@@ -264,13 +271,7 @@ describe("sanitizeToolCallInputs", () => {
] as unknown as AgentMessage[]; ] as unknown as AgentMessage[];
const out = sanitizeToolCallInputs(input); const out = sanitizeToolCallInputs(input);
const assistant = out[0] as Extract<AgentMessage, { role: "assistant" }>; const toolCalls = getAssistantToolCallBlocks(out);
const toolCalls = Array.isArray(assistant.content)
? assistant.content.filter((block) => {
const type = (block as { type?: unknown }).type;
return typeof type === "string" && ["toolCall", "toolUse", "functionCall"].includes(type);
})
: [];
expect(toolCalls).toHaveLength(1); expect(toolCalls).toHaveLength(1);
expect((toolCalls[0] as { name?: unknown }).name).toBe("read"); expect((toolCalls[0] as { name?: unknown }).name).toBe("read");
@@ -288,13 +289,7 @@ describe("sanitizeToolCallInputs", () => {
] as unknown as AgentMessage[]; ] as unknown as AgentMessage[];
const out = sanitizeToolCallInputs(input, { allowedToolNames: ["read"] }); const out = sanitizeToolCallInputs(input, { allowedToolNames: ["read"] });
const assistant = out[0] as Extract<AgentMessage, { role: "assistant" }>; const toolCalls = getAssistantToolCallBlocks(out);
const toolCalls = Array.isArray(assistant.content)
? assistant.content.filter((block) => {
const type = (block as { type?: unknown }).type;
return typeof type === "string" && ["toolCall", "toolUse", "functionCall"].includes(type);
})
: [];
expect(toolCalls).toHaveLength(1); expect(toolCalls).toHaveLength(1);
expect((toolCalls[0] as { name?: unknown }).name).toBe("read"); expect((toolCalls[0] as { name?: unknown }).name).toBe("read");

View File

@@ -102,6 +102,31 @@ export async function applyInlineDirectiveOverrides(params: {
let { directives } = params; let { directives } = params;
let { provider, model } = params; let { provider, model } = params;
let { contextTokens } = params; let { contextTokens } = params;
const directiveModelState = {
allowedModelKeys: modelState.allowedModelKeys,
allowedModelCatalog: modelState.allowedModelCatalog,
resetModelOverride: modelState.resetModelOverride,
};
const createDirectiveHandlingBase = () => ({
cfg,
directives,
sessionEntry,
sessionStore,
sessionKey,
storePath,
elevatedEnabled,
elevatedAllowed,
elevatedFailures,
messageProviderKey,
defaultProvider,
defaultModel,
aliasIndex,
...directiveModelState,
provider,
model,
initialModelLabel,
formatModelSwitchEvent,
});
let directiveAck: ReplyPayload | undefined; let directiveAck: ReplyPayload | undefined;
@@ -135,26 +160,7 @@ export async function applyInlineDirectiveOverrides(params: {
}); });
const currentThinkLevel = resolvedDefaultThinkLevel; const currentThinkLevel = resolvedDefaultThinkLevel;
const directiveReply = await handleDirectiveOnly({ const directiveReply = await handleDirectiveOnly({
cfg, ...createDirectiveHandlingBase(),
directives,
sessionEntry,
sessionStore,
sessionKey,
storePath,
elevatedEnabled,
elevatedAllowed,
elevatedFailures,
messageProviderKey,
defaultProvider,
defaultModel,
aliasIndex,
allowedModelKeys: modelState.allowedModelKeys,
allowedModelCatalog: modelState.allowedModelCatalog,
resetModelOverride: modelState.resetModelOverride,
provider,
model,
initialModelLabel,
formatModelSwitchEvent,
currentThinkLevel, currentThinkLevel,
currentVerboseLevel, currentVerboseLevel,
currentReasoningLevel, currentReasoningLevel,
@@ -222,9 +228,7 @@ export async function applyInlineDirectiveOverrides(params: {
defaultProvider, defaultProvider,
defaultModel, defaultModel,
aliasIndex, aliasIndex,
allowedModelKeys: modelState.allowedModelKeys, ...directiveModelState,
allowedModelCatalog: modelState.allowedModelCatalog,
resetModelOverride: modelState.resetModelOverride,
provider, provider,
model, model,
initialModelLabel, initialModelLabel,
@@ -232,9 +236,7 @@ export async function applyInlineDirectiveOverrides(params: {
agentCfg, agentCfg,
modelState: { modelState: {
resolveDefaultThinkingLevel: modelState.resolveDefaultThinkingLevel, resolveDefaultThinkingLevel: modelState.resolveDefaultThinkingLevel,
allowedModelKeys: modelState.allowedModelKeys, ...directiveModelState,
allowedModelCatalog: modelState.allowedModelCatalog,
resetModelOverride: modelState.resetModelOverride,
}, },
}); });
directiveAck = fastLane.directiveAck; directiveAck = fastLane.directiveAck;