refactor(tests): deduplicate repeated fixtures in msteams and bash tests

This commit is contained in:
Peter Steinberger
2026-02-23 17:59:46 +00:00
parent b81bce703c
commit cc7a498ace
2 changed files with 567 additions and 318 deletions

View File

@@ -17,12 +17,37 @@ const longDelayCmd = isWin ? "Start-Sleep -Milliseconds 72" : "sleep 0.072";
const POLL_INTERVAL_MS = 15;
const BACKGROUND_POLL_TIMEOUT_MS = isWin ? 8000 : 1200;
const NOTIFY_EVENT_TIMEOUT_MS = isWin ? 12_000 : 5_000;
const PROCESS_STATUS_RUNNING = "running";
const PROCESS_STATUS_COMPLETED = "completed";
const PROCESS_STATUS_FAILED = "failed";
const OUTPUT_DONE = "done";
const OUTPUT_NOPE = "nope";
const OUTPUT_EXEC_COMPLETED = "Exec completed";
const OUTPUT_EXIT_CODE_1 = "Command exited with code 1";
const COMMAND_ECHO_HELLO = "echo hello";
const SCOPE_KEY_ALPHA = "agent:alpha";
const SCOPE_KEY_BETA = "agent:beta";
const TEST_EXEC_DEFAULTS = { security: "full" as const, ask: "off" as const };
const DEFAULT_NOTIFY_SESSION_KEY = "agent:main:main";
const ECHO_HI_COMMAND = "echo hi";
let callIdCounter = 0;
const nextCallId = () => `call${++callIdCounter}`;
type ExecToolInstance = ReturnType<typeof createExecTool>;
type ProcessToolInstance = ReturnType<typeof createProcessTool>;
type ExecToolArgs = Parameters<ExecToolInstance["execute"]>[1];
type ProcessToolArgs = Parameters<ProcessToolInstance["execute"]>[1];
type ExecToolConfig = Exclude<Parameters<typeof createExecTool>[0], undefined>;
const createTestExecTool = (
defaults?: Parameters<typeof createExecTool>[0],
): ReturnType<typeof createExecTool> => createExecTool({ ...TEST_EXEC_DEFAULTS, ...defaults });
const createDisallowedElevatedExecTool = (
defaultLevel: "off" | "on",
overrides: Partial<ExecToolConfig> = {},
) =>
createTestExecTool({
elevated: { enabled: true, allowed: false, defaultLevel },
...overrides,
});
const createNotifyOnExitExecTool = (overrides: Partial<ExecToolConfig> = {}) =>
createTestExecTool({
allowBackground: true,
@@ -57,6 +82,72 @@ const readNormalizedTextContent = (content: ToolTextContent) =>
const readTrimmedLines = (content: ToolTextContent) =>
(readTextContent(content) ?? "").split("\n").map((line) => line.trim());
const readTotalLines = (details: unknown) => (details as { totalLines?: number }).totalLines;
const readProcessStatus = (details: unknown) => (details as { status?: string }).status;
const readProcessStatusOrRunning = (details: unknown) =>
readProcessStatus(details) ?? PROCESS_STATUS_RUNNING;
const expectProcessStatus = (details: unknown, expected: string) => {
expect(readProcessStatus(details)).toBe(expected);
};
const expectTextContainsValues = (
text: string,
values: string[] | undefined,
shouldContain: boolean,
) => {
if (!values) {
return;
}
for (const value of values) {
if (shouldContain) {
expect(text).toContain(value);
} else {
expect(text).not.toContain(value);
}
}
};
const expectTextContainsAll = (text: string, expected?: string[]) => {
expectTextContainsValues(text, expected, true);
};
const expectTextContainsNone = (text: string, forbidden?: string[]) => {
expectTextContainsValues(text, forbidden, false);
};
const expectTextContains = (text: string | undefined, expected?: string) => {
if (expected === undefined) {
throw new Error("expected text assertion value");
}
expectTextContainsAll(text ?? "", [expected]);
};
type ProcessSessionSummary = { sessionId: string; name?: string };
const readProcessSessions = (details: unknown) =>
(details as { sessions: ProcessSessionSummary[] }).sessions;
const expectSessionMembership = (
sessions: ProcessSessionSummary[],
sessionId: string,
shouldExist: boolean,
) => {
expect(sessions.some((session) => session.sessionId === sessionId)).toBe(shouldExist);
};
const executeExecTool = (tool: ExecToolInstance, params: ExecToolArgs) =>
tool.execute(nextCallId(), params);
const executeProcessTool = (tool: ProcessToolInstance, params: ProcessToolArgs) =>
tool.execute(nextCallId(), params);
type ProcessPollResult = { status: string; output?: string };
async function listProcessSessions(tool: ProcessToolInstance) {
const list = await executeProcessTool(tool, { action: "list" });
return readProcessSessions(list.details);
}
async function pollProcessSession(params: {
tool: ProcessToolInstance;
sessionId: string;
}): Promise<ProcessPollResult> {
const poll = await executeProcessTool(params.tool, {
action: "poll",
sessionId: params.sessionId,
});
return {
status: readProcessStatusOrRunning(poll.details),
output: readTextContent(poll.content),
};
}
function applyDefaultShellEnv() {
if (!isWin && defaultShell) {
@@ -82,20 +173,16 @@ function useCapturedShellEnv() {
}
async function waitForCompletion(sessionId: string) {
let status = "running";
let status = PROCESS_STATUS_RUNNING;
await expect
.poll(
async () => {
const poll = await processTool.execute("call-wait", {
action: "poll",
sessionId,
});
status = (poll.details as { status: string }).status;
status = (await pollProcessSession({ tool: processTool, sessionId })).status;
return status;
},
{ timeout: BACKGROUND_POLL_TIMEOUT_MS, interval: POLL_INTERVAL_MS },
)
.not.toBe("running");
.not.toBe(PROCESS_STATUS_RUNNING);
return status;
}
@@ -110,34 +197,47 @@ function hasNotifyEventForPrefix(prefix: string): boolean {
return peekSystemEvents(DEFAULT_NOTIFY_SESSION_KEY).some((event) => event.includes(prefix));
}
async function startBackgroundSession(params: {
tool: ReturnType<typeof createExecTool>;
callId: string;
command: string;
}) {
const result = await params.tool.execute(params.callId, {
async function waitForNotifyEvent(sessionId: string) {
const prefix = sessionId.slice(0, 8);
let finished = getFinishedSession(sessionId);
let hasEvent = hasNotifyEventForPrefix(prefix);
await expect
.poll(
() => {
finished = getFinishedSession(sessionId);
hasEvent = hasNotifyEventForPrefix(prefix);
return Boolean(finished && hasEvent);
},
{ timeout: NOTIFY_EVENT_TIMEOUT_MS, interval: POLL_INTERVAL_MS },
)
.toBe(true);
return {
finished: finished ?? getFinishedSession(sessionId),
hasEvent: hasEvent || hasNotifyEventForPrefix(prefix),
};
}
async function startBackgroundSession(params: { tool: ExecToolInstance; command: string }) {
const result = await executeExecTool(params.tool, {
command: params.command,
background: true,
});
expect(result.details.status).toBe("running");
expectProcessStatus(result.details, PROCESS_STATUS_RUNNING);
return requireSessionId(result.details as { sessionId?: string });
}
async function runBackgroundEchoLines(lines: string[]) {
const sessionId = await startBackgroundSession({
tool: execTool,
callId: "call1",
command: echoLines(lines),
});
await waitForCompletion(sessionId);
return sessionId;
}
async function readProcessLog(
sessionId: string,
options: { offset?: number; limit?: number } = {},
) {
return processTool.execute("call-log", {
type ProcessLogWindow = { offset?: number; limit?: number };
async function readProcessLog(sessionId: string, options: ProcessLogWindow = {}) {
return executeProcessTool(processTool, {
action: "log",
sessionId,
...options,
@@ -154,28 +254,84 @@ const createNumberedLines = (count: number) =>
Array.from({ length: count }, (_value, index) => `line-${index + 1}`);
const LONG_LOG_LINE_COUNT = 201;
async function runBackgroundAndReadProcessLog(
lines: string[],
options: { offset?: number; limit?: number } = {},
) {
async function runBackgroundAndReadProcessLog(lines: string[], options: ProcessLogWindow = {}) {
const sessionId = await runBackgroundEchoLines(lines);
return readProcessLog(sessionId, options);
}
const readLongProcessLog = (options: { offset?: number; limit?: number } = {}) =>
const readLogSlice = async (lines: string[], options: ProcessLogWindow = {}) => {
const log = await runBackgroundAndReadProcessLog(lines, options);
return {
text: readNormalizedTextContent(log.content),
totalLines: readTotalLines(log.details),
};
};
const readLongProcessLog = (options: ProcessLogWindow = {}) =>
runBackgroundAndReadProcessLog(createNumberedLines(LONG_LOG_LINE_COUNT), options);
type LongLogExpectationCase = {
label: string;
options?: ProcessLogWindow;
firstLine: string;
lastLine?: string;
mustContain?: string[];
mustNotContain?: string[];
};
type ShortLogExpectationCase = {
label: string;
lines: string[];
options: ProcessLogWindow;
expectedText: string;
expectedTotalLines: number;
};
type DisallowedElevationCase = {
label: string;
defaultLevel: "off" | "on";
overrides?: Partial<ExecToolConfig>;
requestElevated?: boolean;
expectedError?: string;
expectedOutputIncludes?: string;
};
type NotifyNoopCase = {
label: string;
notifyOnExitEmptySuccess: boolean;
};
const NOOP_NOTIFY_CASES: NotifyNoopCase[] = [
{
label: "default behavior skips no-op completion events",
notifyOnExitEmptySuccess: false,
},
{
label: "explicitly enabling no-op completion emits completion events",
notifyOnExitEmptySuccess: true,
},
];
const expectNotifyNoopEvents = (
events: string[],
notifyOnExitEmptySuccess: boolean,
label: string,
) => {
if (!notifyOnExitEmptySuccess) {
expect(events, label).toEqual([]);
return;
}
expect(events.length, label).toBeGreaterThan(0);
expect(
events.some((event) => event.includes(OUTPUT_EXEC_COMPLETED)),
label,
).toBe(true);
};
async function runBackgroundAndWaitForCompletion(params: {
tool: ReturnType<typeof createExecTool>;
callId: string;
tool: ExecToolInstance;
command: string;
}) {
const sessionId = await startBackgroundSession(params);
const status = await waitForCompletion(sessionId);
expect(status).toBe("completed");
expect(status).toBe(PROCESS_STATUS_COMPLETED);
return { sessionId };
}
beforeEach(() => {
callIdCounter = 0;
resetProcessRegistryForTests();
resetSystemEventsForTest();
});
@@ -186,38 +342,37 @@ describe("exec tool backgrounding", () => {
it(
"backgrounds after yield and can be polled",
async () => {
const result = await execTool.execute("call1", {
const result = await executeExecTool(execTool, {
command: joinCommands([yieldDelayCmd, "echo done"]),
yieldMs: 10,
});
// Timing can race here: command may already be complete before the first response.
if (result.details.status === "completed") {
if (result.details.status === PROCESS_STATUS_COMPLETED) {
const text = readTextContent(result.content) ?? "";
expect(text).toContain("done");
expectTextContains(text, OUTPUT_DONE);
return;
}
expect(result.details.status).toBe("running");
expectProcessStatus(result.details, PROCESS_STATUS_RUNNING);
const sessionId = requireSessionId(result.details as { sessionId?: string });
let output = "";
await expect
.poll(
async () => {
const poll = await processTool.execute("call2", {
action: "poll",
const pollResult = await pollProcessSession({
tool: processTool,
sessionId,
});
const status = (poll.details as { status: string }).status;
output = readTextContent(poll.content) ?? "";
return status;
output = pollResult.output ?? "";
return pollResult.status;
},
{ timeout: BACKGROUND_POLL_TIMEOUT_MS, interval: POLL_INTERVAL_MS },
)
.toBe("completed");
.toBe(PROCESS_STATUS_COMPLETED);
expect(output).toContain("done");
expectTextContains(output, OUTPUT_DONE);
},
isWin ? 15_000 : 5_000,
);
@@ -225,15 +380,12 @@ describe("exec tool backgrounding", () => {
it("supports explicit background and derives session name from the command", async () => {
const sessionId = await startBackgroundSession({
tool: execTool,
callId: "call1",
command: "echo hello",
command: COMMAND_ECHO_HELLO,
});
const list = await processTool.execute("call2", { action: "list" });
const sessions = (list.details as { sessions: Array<{ sessionId: string; name?: string }> })
.sessions;
expect(sessions.some((s) => s.sessionId === sessionId)).toBe(true);
expect(sessions.find((s) => s.sessionId === sessionId)?.name).toBe("echo hello");
const sessions = await listProcessSessions(processTool);
expectSessionMembership(sessions, sessionId, true);
expect(sessions.find((s) => s.sessionId === sessionId)?.name).toBe(COMMAND_ECHO_HELLO);
});
it("uses default timeout when timeout is omitted", async () => {
@@ -243,102 +395,119 @@ describe("exec tool backgrounding", () => {
allowBackground: false,
});
await expect(
customBash.execute("call1", {
executeExecTool(customBash, {
command: longDelayCmd,
}),
).rejects.toThrow(/timed out/i);
});
it("rejects elevated requests when not allowed", async () => {
const customBash = createTestExecTool({
elevated: { enabled: true, allowed: false, defaultLevel: "off" },
messageProvider: "telegram",
sessionKey: DEFAULT_NOTIFY_SESSION_KEY,
});
it.each<DisallowedElevationCase>([
{
label: "rejects elevated requests when not allowed",
defaultLevel: "off",
overrides: {
messageProvider: "telegram",
sessionKey: DEFAULT_NOTIFY_SESSION_KEY,
},
requestElevated: true,
expectedError: "Context: provider=telegram session=agent:main:main",
},
{
label: "does not default to elevated when not allowed",
defaultLevel: "on",
overrides: {
backgroundMs: 1000,
timeoutSec: 5,
},
expectedOutputIncludes: "hi",
},
])(
"$label",
async ({ defaultLevel, overrides, requestElevated, expectedError, expectedOutputIncludes }) => {
const customBash = createDisallowedElevatedExecTool(defaultLevel, overrides);
if (expectedError) {
await expect(
executeExecTool(customBash, {
command: ECHO_HI_COMMAND,
elevated: requestElevated,
}),
).rejects.toThrow(expectedError);
return;
}
await expect(
customBash.execute("call1", {
command: "echo hi",
elevated: true,
}),
).rejects.toThrow("Context: provider=telegram session=agent:main:main");
const result = await executeExecTool(customBash, {
command: ECHO_HI_COMMAND,
});
expectTextContains(readTextContent(result.content), expectedOutputIncludes);
},
);
it.each<ShortLogExpectationCase>([
{
label: "logs line-based slices and defaults to last lines",
lines: ["one", "two", "three"],
options: { limit: 2 },
expectedText: "two\nthree",
expectedTotalLines: 3,
},
{
label: "supports line offsets for log slices",
lines: ["alpha", "beta", "gamma"],
options: { offset: 1, limit: 1 },
expectedText: "beta",
expectedTotalLines: 3,
},
])("$label", async ({ lines, options, expectedText, expectedTotalLines }) => {
const slice = await readLogSlice(lines, options);
expect(slice.text).toBe(expectedText);
expect(slice.totalLines).toBe(expectedTotalLines);
});
it("does not default to elevated when not allowed", async () => {
const customBash = createTestExecTool({
elevated: { enabled: true, allowed: false, defaultLevel: "on" },
backgroundMs: 1000,
timeoutSec: 5,
});
const result = await customBash.execute("call1", {
command: "echo hi",
});
const text = readTextContent(result.content) ?? "";
expect(text).toContain("hi");
});
it("logs line-based slices and defaults to last lines", async () => {
const { sessionId } = await runBackgroundAndWaitForCompletion({
tool: execTool,
callId: "call1",
command: echoLines(["one", "two", "three"]),
});
const log = await readProcessLog(sessionId, { limit: 2 });
expect(readNormalizedTextContent(log.content)).toBe("two\nthree");
expect(readTotalLines(log.details)).toBe(3);
});
it("applies default tail only when no explicit log window is provided", async () => {
const snapshot = readLogSnapshot(await readLongProcessLog());
expect(snapshot.text).toContain("showing last 200 of 201 lines");
expect(snapshot.lines[0]).toBe("line-2");
expect(snapshot.text).toContain("line-2");
expect(snapshot.text).toContain("line-201");
expect(snapshot.totalLines).toBe(LONG_LOG_LINE_COUNT);
});
it("supports line offsets for log slices", async () => {
const sessionId = await runBackgroundEchoLines(["alpha", "beta", "gamma"]);
const log = await readProcessLog(sessionId, { offset: 1, limit: 1 });
expect(readNormalizedTextContent(log.content)).toBe("beta");
});
it("keeps offset-only log requests unbounded by default tail mode", async () => {
const snapshot = readLogSnapshot(await readLongProcessLog({ offset: 30 }));
expect(snapshot.lines[0]).toBe("line-31");
expect(snapshot.lines[snapshot.lines.length - 1]).toBe("line-201");
expect(snapshot.text).not.toContain("showing last 200");
it.each<LongLogExpectationCase>([
{
label: "applies default tail only when no explicit log window is provided",
firstLine: "line-2",
mustContain: ["showing last 200 of 201 lines", "line-2", "line-201"],
},
{
label: "keeps offset-only log requests unbounded by default tail mode",
options: { offset: 30 },
firstLine: "line-31",
lastLine: "line-201",
mustNotContain: ["showing last 200"],
},
])("$label", async ({ options, firstLine, lastLine, mustContain, mustNotContain }) => {
const snapshot = readLogSnapshot(await readLongProcessLog(options));
expect(snapshot.lines[0]).toBe(firstLine);
if (lastLine) {
expect(snapshot.lines[snapshot.lines.length - 1]).toBe(lastLine);
}
expect(snapshot.totalLines).toBe(LONG_LOG_LINE_COUNT);
expectTextContainsAll(snapshot.text, mustContain);
expectTextContainsNone(snapshot.text, mustNotContain);
});
it("scopes process sessions by scopeKey", async () => {
const alphaTools = createScopedToolSet("agent:alpha");
const betaTools = createScopedToolSet("agent:beta");
const alphaTools = createScopedToolSet(SCOPE_KEY_ALPHA);
const betaTools = createScopedToolSet(SCOPE_KEY_BETA);
const sessionA = await startBackgroundSession({
tool: alphaTools.exec,
callId: "call1",
command: shortDelayCmd,
});
const sessionB = await startBackgroundSession({
tool: betaTools.exec,
callId: "call2",
command: shortDelayCmd,
});
const listA = await alphaTools.process.execute("call3", { action: "list" });
const sessionsA = (listA.details as { sessions: Array<{ sessionId: string }> }).sessions;
expect(sessionsA.some((s) => s.sessionId === sessionA)).toBe(true);
expect(sessionsA.some((s) => s.sessionId === sessionB)).toBe(false);
const sessionsA = await listProcessSessions(alphaTools.process);
expectSessionMembership(sessionsA, sessionA, true);
expectSessionMembership(sessionsA, sessionB, false);
const pollB = await betaTools.process.execute("call4", {
action: "poll",
const pollB = await pollProcessSession({
tool: betaTools.process,
sessionId: sessionA,
});
const pollBDetails = pollB.details as { status?: string };
expect(pollBDetails.status).toBe("failed");
expect(pollB.status).toBe(PROCESS_STATUS_FAILED);
});
});
@@ -348,15 +517,14 @@ describe("exec exit codes", () => {
it("treats non-zero exits as completed and appends exit code", async () => {
const command = isWin
? joinCommands(["Write-Output nope", "exit 1"])
: joinCommands(["echo nope", "exit 1"]);
const result = await execTool.execute("call1", { command });
: joinCommands([`echo ${OUTPUT_NOPE}`, "exit 1"]);
const result = await executeExecTool(execTool, { command });
const resultDetails = result.details as { status?: string; exitCode?: number | null };
expect(resultDetails.status).toBe("completed");
expectProcessStatus(resultDetails, PROCESS_STATUS_COMPLETED);
expect(resultDetails.exitCode).toBe(1);
const text = readNormalizedTextContent(result.content);
expect(text).toContain("nope");
expect(text).toContain("Command exited with code 1");
expectTextContainsAll(text, [OUTPUT_NOPE, OUTPUT_EXIT_CODE_1]);
});
});
@@ -366,67 +534,30 @@ describe("exec notifyOnExit", () => {
const sessionId = await startBackgroundSession({
tool,
callId: "call1",
command: echoAfterDelay("notify"),
});
const prefix = sessionId.slice(0, 8);
let finished = getFinishedSession(sessionId);
let hasEvent = hasNotifyEventForPrefix(prefix);
await expect
.poll(
() => {
finished = getFinishedSession(sessionId);
hasEvent = hasNotifyEventForPrefix(prefix);
return Boolean(finished && hasEvent);
},
{ timeout: NOTIFY_EVENT_TIMEOUT_MS, interval: POLL_INTERVAL_MS },
)
.toBe(true);
if (!finished) {
finished = getFinishedSession(sessionId);
}
if (!hasEvent) {
hasEvent = hasNotifyEventForPrefix(prefix);
}
const { finished, hasEvent } = await waitForNotifyEvent(sessionId);
expect(finished).toBeTruthy();
expect(hasEvent).toBe(true);
});
it("handles no-op completion events based on notifyOnExitEmptySuccess", async () => {
for (const testCase of [
{
label: "default behavior skips no-op completion events",
notifyOnExitEmptySuccess: false,
},
{
label: "explicitly enabling no-op completion emits completion events",
notifyOnExitEmptySuccess: true,
},
]) {
resetSystemEventsForTest();
it.each<NotifyNoopCase>(NOOP_NOTIFY_CASES)(
"$label",
async ({ label, notifyOnExitEmptySuccess }) => {
const tool = createNotifyOnExitExecTool(
testCase.notifyOnExitEmptySuccess ? { notifyOnExitEmptySuccess: true } : {},
notifyOnExitEmptySuccess ? { notifyOnExitEmptySuccess: true } : {},
);
await runBackgroundAndWaitForCompletion({
tool,
callId: "call-noop",
command: shortDelayCmd,
});
const events = peekSystemEvents(DEFAULT_NOTIFY_SESSION_KEY);
if (!testCase.notifyOnExitEmptySuccess) {
expect(events, testCase.label).toEqual([]);
} else {
expect(events.length, testCase.label).toBeGreaterThan(0);
expect(
events.some((event) => event.includes("Exec completed")),
testCase.label,
).toBe(true);
}
}
});
expectNotifyNoopEvents(events, notifyOnExitEmptySuccess, label);
},
);
});
describe("exec PATH handling", () => {
@@ -438,7 +569,7 @@ describe("exec PATH handling", () => {
process.env.PATH = basePath;
const tool = createTestExecTool({ pathPrepend: prepend });
const result = await tool.execute("call1", {
const result = await executeExecTool(tool, {
command: isWin ? "Write-Output $env:PATH" : "echo $PATH",
});