fix(agents): stabilize sessions_spawn e2e suite

This commit is contained in:
Peter Steinberger
2026-02-15 22:39:40 +00:00
parent a948212ca7
commit 6b4590be06
4 changed files with 188 additions and 204 deletions

View File

@@ -1,6 +1,6 @@
import { beforeEach, describe, expect, it } from "vitest"; import { beforeEach, describe, expect, it } from "vitest";
import { createOpenClawTools } from "./openclaw-tools.js";
import "./test-helpers/fast-core-tools.js"; import "./test-helpers/fast-core-tools.js";
import { createOpenClawTools } from "./openclaw-tools.js";
import { import {
getCallGatewayMock, getCallGatewayMock,
resetSessionsSpawnConfigOverride, resetSessionsSpawnConfigOverride,
@@ -9,6 +9,7 @@ import {
import { resetSubagentRegistryForTests } from "./subagent-registry.js"; import { resetSubagentRegistryForTests } from "./subagent-registry.js";
const callGatewayMock = getCallGatewayMock(); const callGatewayMock = getCallGatewayMock();
const setConfigOverride = setSessionsSpawnConfigOverride;
describe("openclaw-tools: subagents (sessions_spawn allowlist)", () => { describe("openclaw-tools: subagents (sessions_spawn allowlist)", () => {
beforeEach(() => { beforeEach(() => {

View File

@@ -1,16 +1,117 @@
import { beforeEach, describe, expect, it } from "vitest"; import { beforeEach, describe, expect, it, vi } from "vitest";
import { emitAgentEvent } from "../infra/agent-events.js"; import { emitAgentEvent } from "../infra/agent-events.js";
import { sleep } from "../utils.js";
import { createOpenClawTools } from "./openclaw-tools.js";
import "./test-helpers/fast-core-tools.js"; import "./test-helpers/fast-core-tools.js";
import { createOpenClawTools } from "./openclaw-tools.js";
import { import {
getCallGatewayMock, getCallGatewayMock,
resetSessionsSpawnConfigOverride, resetSessionsSpawnConfigOverride,
} from "./openclaw-tools.subagents.sessions-spawn.test-harness.js"; } from "./openclaw-tools.subagents.sessions-spawn.test-harness.js";
import { resetSubagentRegistryForTests } from "./subagent-registry.js"; import { resetSubagentRegistryForTests } from "./subagent-registry.js";
vi.mock("./pi-embedded.js", () => ({
isEmbeddedPiRunActive: () => false,
isEmbeddedPiRunStreaming: () => false,
queueEmbeddedPiMessage: () => false,
waitForEmbeddedPiRunEnd: async () => true,
}));
const callGatewayMock = getCallGatewayMock(); const callGatewayMock = getCallGatewayMock();
type GatewayRequest = { method?: string; params?: unknown };
type AgentWaitCall = { runId?: string; timeoutMs?: number };
function setupSessionsSpawnGatewayMock(opts: {
includeSessionsList?: boolean;
includeChatHistory?: boolean;
onAgentSubagentSpawn?: (params: unknown) => void;
onSessionsPatch?: (params: unknown) => void;
onSessionsDelete?: (params: unknown) => void;
agentWaitResult?: { status: "ok" | "timeout"; startedAt: number; endedAt: number };
}): {
calls: Array<GatewayRequest>;
waitCalls: Array<AgentWaitCall>;
getChild: () => { runId?: string; sessionKey?: string };
} {
const calls: Array<GatewayRequest> = [];
const waitCalls: Array<AgentWaitCall> = [];
let agentCallCount = 0;
let childRunId: string | undefined;
let childSessionKey: string | undefined;
callGatewayMock.mockImplementation(async (optsUnknown: unknown) => {
const request = optsUnknown as GatewayRequest;
calls.push(request);
if (request.method === "sessions.list" && opts.includeSessionsList) {
return {
sessions: [
{
key: "main",
lastChannel: "whatsapp",
lastTo: "+123",
},
],
};
}
if (request.method === "agent") {
agentCallCount += 1;
const runId = `run-${agentCallCount}`;
const params = request.params as { lane?: string; sessionKey?: string } | undefined;
// Only capture the first agent call (subagent spawn, not main agent trigger)
if (params?.lane === "subagent") {
childRunId = runId;
childSessionKey = params?.sessionKey ?? "";
opts.onAgentSubagentSpawn?.(params);
}
return {
runId,
status: "accepted",
acceptedAt: 1000 + agentCallCount,
};
}
if (request.method === "agent.wait") {
const params = request.params as AgentWaitCall | undefined;
waitCalls.push(params ?? {});
const res = opts.agentWaitResult ?? { status: "ok", startedAt: 1000, endedAt: 2000 };
return {
runId: params?.runId ?? "run-1",
...res,
};
}
if (request.method === "sessions.patch") {
opts.onSessionsPatch?.(request.params);
return { ok: true };
}
if (request.method === "sessions.delete") {
opts.onSessionsDelete?.(request.params);
return { ok: true };
}
if (request.method === "chat.history" && opts.includeChatHistory) {
return {
messages: [
{
role: "assistant",
content: [{ type: "text", text: "done" }],
},
],
};
}
return {};
});
return {
calls,
waitCalls,
getChild: () => ({ runId: childRunId, sessionKey: childSessionKey }),
};
}
describe("openclaw-tools: subagents (sessions_spawn lifecycle)", () => { describe("openclaw-tools: subagents (sessions_spawn lifecycle)", () => {
beforeEach(() => { beforeEach(() => {
resetSessionsSpawnConfigOverride(); resetSessionsSpawnConfigOverride();
@@ -19,75 +120,17 @@ describe("openclaw-tools: subagents (sessions_spawn lifecycle)", () => {
it("sessions_spawn runs cleanup flow after subagent completion", async () => { it("sessions_spawn runs cleanup flow after subagent completion", async () => {
resetSubagentRegistryForTests(); resetSubagentRegistryForTests();
callGatewayMock.mockReset(); callGatewayMock.mockReset();
const calls: Array<{ method?: string; params?: unknown }> = [];
let agentCallCount = 0;
let childRunId: string | undefined;
let childSessionKey: string | undefined;
const waitCalls: Array<{ runId?: string; timeoutMs?: number }> = [];
let patchParams: { key?: string; label?: string } = {}; let patchParams: { key?: string; label?: string } = {};
callGatewayMock.mockImplementation(async (opts: unknown) => { const ctx = setupSessionsSpawnGatewayMock({
const request = opts as { method?: string; params?: unknown }; includeSessionsList: true,
calls.push(request); includeChatHistory: true,
if (request.method === "sessions.list") { onSessionsPatch: (params) => {
return { const rec = params as { key?: string; label?: string } | undefined;
sessions: [ if (typeof rec?.label === "string" && rec.label.trim()) {
{ patchParams = { key: rec.key, label: rec.label };
key: "main",
lastChannel: "whatsapp",
lastTo: "+123",
},
],
};
}
if (request.method === "agent") {
agentCallCount += 1;
const runId = `run-${agentCallCount}`;
const params = request.params as {
message?: string;
sessionKey?: string;
lane?: string;
};
// Only capture the first agent call (subagent spawn, not main agent trigger)
if (params?.lane === "subagent") {
childRunId = runId;
childSessionKey = params?.sessionKey ?? "";
} }
return { },
runId,
status: "accepted",
acceptedAt: 2000 + agentCallCount,
};
}
if (request.method === "agent.wait") {
const params = request.params as { runId?: string; timeoutMs?: number } | undefined;
waitCalls.push(params ?? {});
return {
runId: params?.runId ?? "run-1",
status: "ok",
startedAt: 1000,
endedAt: 2000,
};
}
if (request.method === "sessions.patch") {
const params = request.params as { key?: string; label?: string } | undefined;
patchParams = { key: params?.key, label: params?.label };
return { ok: true };
}
if (request.method === "chat.history") {
return {
messages: [
{
role: "assistant",
content: [{ type: "text", text: "done" }],
},
],
};
}
if (request.method === "sessions.delete") {
return { ok: true };
}
return {};
}); });
const tool = createOpenClawTools({ const tool = createOpenClawTools({
@@ -108,11 +151,12 @@ describe("openclaw-tools: subagents (sessions_spawn lifecycle)", () => {
runId: "run-1", runId: "run-1",
}); });
if (!childRunId) { const child = ctx.getChild();
if (!child.runId) {
throw new Error("missing child runId"); throw new Error("missing child runId");
} }
emitAgentEvent({ emitAgentEvent({
runId: childRunId, runId: child.runId,
stream: "lifecycle", stream: "lifecycle",
data: { data: {
phase: "end", phase: "end",
@@ -121,18 +165,21 @@ describe("openclaw-tools: subagents (sessions_spawn lifecycle)", () => {
}, },
}); });
await sleep(0); vi.useFakeTimers();
await sleep(0); try {
await sleep(0); await vi.advanceTimersByTimeAsync(500);
} finally {
vi.useRealTimers();
}
const childWait = waitCalls.find((call) => call.runId === childRunId); const childWait = ctx.waitCalls.find((call) => call.runId === child.runId);
expect(childWait?.timeoutMs).toBe(1000); expect(childWait?.timeoutMs).toBe(1000);
// Cleanup should patch the label // Cleanup should patch the label
expect(patchParams.key).toBe(childSessionKey); expect(patchParams.key).toBe(child.sessionKey);
expect(patchParams.label).toBe("my-task"); expect(patchParams.label).toBe("my-task");
// Two agent calls: subagent spawn + main agent trigger // Two agent calls: subagent spawn + main agent trigger
const agentCalls = calls.filter((c) => c.method === "agent"); const agentCalls = ctx.calls.filter((c) => c.method === "agent");
expect(agentCalls).toHaveLength(2); expect(agentCalls).toHaveLength(2);
// First call: subagent spawn // First call: subagent spawn
@@ -145,62 +192,25 @@ describe("openclaw-tools: subagents (sessions_spawn lifecycle)", () => {
expect(second?.message).toContain("subagent task"); expect(second?.message).toContain("subagent task");
// No direct send to external channel (main agent handles delivery) // No direct send to external channel (main agent handles delivery)
const sendCalls = calls.filter((c) => c.method === "send"); const sendCalls = ctx.calls.filter((c) => c.method === "send");
expect(sendCalls.length).toBe(0); expect(sendCalls.length).toBe(0);
expect(childSessionKey?.startsWith("agent:main:subagent:")).toBe(true); expect(child.sessionKey?.startsWith("agent:main:subagent:")).toBe(true);
}); });
it("sessions_spawn runs cleanup via lifecycle events", async () => { it("sessions_spawn runs cleanup via lifecycle events", async () => {
resetSubagentRegistryForTests(); resetSubagentRegistryForTests();
callGatewayMock.mockReset(); callGatewayMock.mockReset();
const calls: Array<{ method?: string; params?: unknown }> = [];
let agentCallCount = 0;
let deletedKey: string | undefined; let deletedKey: string | undefined;
let childRunId: string | undefined; const ctx = setupSessionsSpawnGatewayMock({
let childSessionKey: string | undefined; onAgentSubagentSpawn: (params) => {
const waitCalls: Array<{ runId?: string; timeoutMs?: number }> = []; const rec = params as { channel?: string; timeout?: number } | undefined;
expect(rec?.channel).toBe("discord");
callGatewayMock.mockImplementation(async (opts: unknown) => { expect(rec?.timeout).toBe(1);
const request = opts as { method?: string; params?: unknown }; },
calls.push(request); onSessionsDelete: (params) => {
if (request.method === "agent") { const rec = params as { key?: string } | undefined;
agentCallCount += 1; deletedKey = rec?.key;
const runId = `run-${agentCallCount}`; },
const params = request.params as {
message?: string;
sessionKey?: string;
channel?: string;
timeout?: number;
lane?: string;
};
if (params?.lane === "subagent") {
childRunId = runId;
childSessionKey = params?.sessionKey ?? "";
expect(params?.channel).toBe("discord");
expect(params?.timeout).toBe(1);
}
return {
runId,
status: "accepted",
acceptedAt: 1000 + agentCallCount,
};
}
if (request.method === "agent.wait") {
const params = request.params as { runId?: string; timeoutMs?: number } | undefined;
waitCalls.push(params ?? {});
return {
runId: params?.runId ?? "run-1",
status: "ok",
startedAt: 1000,
endedAt: 2000,
};
}
if (request.method === "sessions.delete") {
const params = request.params as { key?: string } | undefined;
deletedKey = params?.key;
return { ok: true };
}
return {};
}); });
const tool = createOpenClawTools({ const tool = createOpenClawTools({
@@ -221,13 +231,14 @@ describe("openclaw-tools: subagents (sessions_spawn lifecycle)", () => {
runId: "run-1", runId: "run-1",
}); });
if (!childRunId) { const child = ctx.getChild();
if (!child.runId) {
throw new Error("missing child runId"); throw new Error("missing child runId");
} }
vi.useFakeTimers(); vi.useFakeTimers();
try { try {
emitAgentEvent({ emitAgentEvent({
runId: childRunId, runId: child.runId,
stream: "lifecycle", stream: "lifecycle",
data: { data: {
phase: "end", phase: "end",
@@ -241,10 +252,10 @@ describe("openclaw-tools: subagents (sessions_spawn lifecycle)", () => {
vi.useRealTimers(); vi.useRealTimers();
} }
const childWait = waitCalls.find((call) => call.runId === childRunId); const childWait = ctx.waitCalls.find((call) => call.runId === child.runId);
expect(childWait?.timeoutMs).toBe(1000); expect(childWait?.timeoutMs).toBe(1000);
const agentCalls = calls.filter((call) => call.method === "agent"); const agentCalls = ctx.calls.filter((call) => call.method === "agent");
expect(agentCalls).toHaveLength(2); expect(agentCalls).toHaveLength(2);
const first = agentCalls[0]?.params as const first = agentCalls[0]?.params as
@@ -259,7 +270,7 @@ describe("openclaw-tools: subagents (sessions_spawn lifecycle)", () => {
expect(first?.deliver).toBe(false); expect(first?.deliver).toBe(false);
expect(first?.channel).toBe("discord"); expect(first?.channel).toBe("discord");
expect(first?.sessionKey?.startsWith("agent:main:subagent:")).toBe(true); expect(first?.sessionKey?.startsWith("agent:main:subagent:")).toBe(true);
expect(childSessionKey?.startsWith("agent:main:subagent:")).toBe(true); expect(child.sessionKey?.startsWith("agent:main:subagent:")).toBe(true);
const second = agentCalls[1]?.params as const second = agentCalls[1]?.params as
| { | {
@@ -272,7 +283,7 @@ describe("openclaw-tools: subagents (sessions_spawn lifecycle)", () => {
expect(second?.deliver).toBe(true); expect(second?.deliver).toBe(true);
expect(second?.message).toContain("subagent task"); expect(second?.message).toContain("subagent task");
const sendCalls = calls.filter((c) => c.method === "send"); const sendCalls = ctx.calls.filter((c) => c.method === "send");
expect(sendCalls.length).toBe(0); expect(sendCalls.length).toBe(0);
expect(deletedKey?.startsWith("agent:main:subagent:")).toBe(true); expect(deletedKey?.startsWith("agent:main:subagent:")).toBe(true);
@@ -281,65 +292,19 @@ describe("openclaw-tools: subagents (sessions_spawn lifecycle)", () => {
it("sessions_spawn deletes session when cleanup=delete via agent.wait", async () => { it("sessions_spawn deletes session when cleanup=delete via agent.wait", async () => {
resetSubagentRegistryForTests(); resetSubagentRegistryForTests();
callGatewayMock.mockReset(); callGatewayMock.mockReset();
const calls: Array<{ method?: string; params?: unknown }> = [];
let agentCallCount = 0;
let deletedKey: string | undefined; let deletedKey: string | undefined;
let childRunId: string | undefined; const ctx = setupSessionsSpawnGatewayMock({
let childSessionKey: string | undefined; includeChatHistory: true,
const waitCalls: Array<{ runId?: string; timeoutMs?: number }> = []; onAgentSubagentSpawn: (params) => {
const rec = params as { channel?: string; timeout?: number } | undefined;
callGatewayMock.mockImplementation(async (opts: unknown) => { expect(rec?.channel).toBe("discord");
const request = opts as { method?: string; params?: unknown }; expect(rec?.timeout).toBe(1);
calls.push(request); },
if (request.method === "agent") { onSessionsDelete: (params) => {
agentCallCount += 1; const rec = params as { key?: string } | undefined;
const runId = `run-${agentCallCount}`; deletedKey = rec?.key;
const params = request.params as { },
message?: string; agentWaitResult: { status: "ok", startedAt: 3000, endedAt: 4000 },
sessionKey?: string;
channel?: string;
timeout?: number;
lane?: string;
};
// Only capture the first agent call (subagent spawn, not main agent trigger)
if (params?.lane === "subagent") {
childRunId = runId;
childSessionKey = params?.sessionKey ?? "";
expect(params?.channel).toBe("discord");
expect(params?.timeout).toBe(1);
}
return {
runId,
status: "accepted",
acceptedAt: 2000 + agentCallCount,
};
}
if (request.method === "agent.wait") {
const params = request.params as { runId?: string; timeoutMs?: number } | undefined;
waitCalls.push(params ?? {});
return {
runId: params?.runId ?? "run-1",
status: "ok",
startedAt: 3000,
endedAt: 4000,
};
}
if (request.method === "chat.history") {
return {
messages: [
{
role: "assistant",
content: [{ type: "text", text: "done" }],
},
],
};
}
if (request.method === "sessions.delete") {
const params = request.params as { key?: string } | undefined;
deletedKey = params?.key;
return { ok: true };
}
return {};
}); });
const tool = createOpenClawTools({ const tool = createOpenClawTools({
@@ -360,16 +325,20 @@ describe("openclaw-tools: subagents (sessions_spawn lifecycle)", () => {
runId: "run-1", runId: "run-1",
}); });
await sleep(0); vi.useFakeTimers();
await sleep(0); try {
await sleep(0); await vi.advanceTimersByTimeAsync(500);
} finally {
vi.useRealTimers();
}
const childWait = waitCalls.find((call) => call.runId === childRunId); const child = ctx.getChild();
const childWait = ctx.waitCalls.find((call) => call.runId === child.runId);
expect(childWait?.timeoutMs).toBe(1000); expect(childWait?.timeoutMs).toBe(1000);
expect(childSessionKey?.startsWith("agent:main:subagent:")).toBe(true); expect(child.sessionKey?.startsWith("agent:main:subagent:")).toBe(true);
// Two agent calls: subagent spawn + main agent trigger // Two agent calls: subagent spawn + main agent trigger
const agentCalls = calls.filter((call) => call.method === "agent"); const agentCalls = ctx.calls.filter((call) => call.method === "agent");
expect(agentCalls).toHaveLength(2); expect(agentCalls).toHaveLength(2);
// First call: subagent spawn // First call: subagent spawn
@@ -382,7 +351,7 @@ describe("openclaw-tools: subagents (sessions_spawn lifecycle)", () => {
expect(second?.deliver).toBe(true); expect(second?.deliver).toBe(true);
// No direct send to external channel (main agent handles delivery) // No direct send to external channel (main agent handles delivery)
const sendCalls = calls.filter((c) => c.method === "send"); const sendCalls = ctx.calls.filter((c) => c.method === "send");
expect(sendCalls.length).toBe(0); expect(sendCalls.length).toBe(0);
// Session should be deleted // Session should be deleted
@@ -446,9 +415,12 @@ describe("openclaw-tools: subagents (sessions_spawn lifecycle)", () => {
runId: "run-1", runId: "run-1",
}); });
await sleep(0); vi.useFakeTimers();
await sleep(0); try {
await sleep(0); await vi.advanceTimersByTimeAsync(500);
} finally {
vi.useRealTimers();
}
const mainAgentCall = calls const mainAgentCall = calls
.filter((call) => call.method === "agent") .filter((call) => call.method === "agent")

View File

@@ -271,7 +271,9 @@ describe("openclaw-tools: subagents (sessions_spawn model + thinking)", () => {
modelApplied: true, modelApplied: true,
}); });
const patchCall = calls.find((call) => call.method === "sessions.patch"); const patchCall = calls.find(
(call) => call.method === "sessions.patch" && (call.params as { model?: string })?.model,
);
expect(patchCall?.params).toMatchObject({ expect(patchCall?.params).toMatchObject({
model: "opencode/claude", model: "opencode/claude",
}); });
@@ -287,7 +289,11 @@ describe("openclaw-tools: subagents (sessions_spawn model + thinking)", () => {
const request = opts as { method?: string; params?: unknown }; const request = opts as { method?: string; params?: unknown };
calls.push(request); calls.push(request);
if (request.method === "sessions.patch") { if (request.method === "sessions.patch") {
throw new Error("invalid model: bad-model"); const params = request.params as { model?: unknown } | undefined;
if (typeof params?.model === "string" && params.model.trim()) {
throw new Error("invalid model: bad-model");
}
return { ok: true };
} }
if (request.method === "agent") { if (request.method === "agent") {
agentCallCount += 1; agentCallCount += 1;

View File

@@ -28,6 +28,7 @@ const SessionsSpawnToolSchema = Type.Object({
model: Type.Optional(Type.String()), model: Type.Optional(Type.String()),
thinking: Type.Optional(Type.String()), thinking: Type.Optional(Type.String()),
runTimeoutSeconds: Type.Optional(Type.Number({ minimum: 0 })), runTimeoutSeconds: Type.Optional(Type.Number({ minimum: 0 })),
timeoutSeconds: Type.Optional(Type.Number({ minimum: 0 })),
cleanup: optionalStringEnum(["delete", "keep"] as const), cleanup: optionalStringEnum(["delete", "keep"] as const),
}); });
@@ -97,10 +98,14 @@ export function createSessionsSpawnTool(opts?: {
}); });
// Default to 0 (no timeout) when omitted. Sub-agent runs are long-lived // Default to 0 (no timeout) when omitted. Sub-agent runs are long-lived
// by default and should not inherit the main agent 600s timeout. // by default and should not inherit the main agent 600s timeout.
const legacyTimeoutSeconds =
typeof params.timeoutSeconds === "number" && Number.isFinite(params.timeoutSeconds)
? Math.max(0, Math.floor(params.timeoutSeconds))
: undefined;
const runTimeoutSeconds = const runTimeoutSeconds =
typeof params.runTimeoutSeconds === "number" && Number.isFinite(params.runTimeoutSeconds) typeof params.runTimeoutSeconds === "number" && Number.isFinite(params.runTimeoutSeconds)
? Math.max(0, Math.floor(params.runTimeoutSeconds)) ? Math.max(0, Math.floor(params.runTimeoutSeconds))
: 0; : (legacyTimeoutSeconds ?? 0);
let modelWarning: string | undefined; let modelWarning: string | undefined;
let modelApplied = false; let modelApplied = false;