fix(subagents): announce delivery with descendant gating, frozen result refresh, and cron retry (#35080)

Thanks @tyler6204
2026-05-14 07:48:35 +00:00 · 2026-03-05 19:20:24 -08:00
parent fa3fafdde5
commit 81b93b9ce0
32 changed files with 3440 additions and 805 deletions
--- a/src/agents/subagent-registry.lifecycle-retry-grace.e2e.test.ts
+++ b/src/agents/subagent-registry.lifecycle-retry-grace.e2e.test.ts
@@ -14,6 +14,7 @@ type LifecycleData = {
 type LifecycleEvent = {
  stream?: string;
  runId: string;
+  sessionKey?: string;
  data?: LifecycleData;
 };

@@ -35,7 +36,10 @@ const loadConfigMock = vi.fn(() => ({
 }));
 const loadRegistryMock = vi.fn(() => new Map());
 const saveRegistryMock = vi.fn(() => {});
-const announceSpy = vi.fn(async () => true);
+const announceSpy = vi.fn(async (_params?: Record<string, unknown>) => true);
+const captureCompletionReplySpy = vi.fn(
+  async (_sessionKey?: string) => undefined as string | undefined,
+);

 vi.mock("../gateway/call.js", () => ({
  callGateway: callGatewayMock,
@@ -51,6 +55,7 @@ vi.mock("../config/config.js", () => ({

 vi.mock("./subagent-announce.js", () => ({
  runSubagentAnnounceFlow: announceSpy,
+  captureSubagentCompletionReply: captureCompletionReplySpy,
 }));

 vi.mock("../plugins/hook-runner-global.js", () => ({
@@ -71,10 +76,11 @@ describe("subagent registry lifecycle error grace", () => {

  beforeEach(() => {
    vi.useFakeTimers();
+    announceSpy.mockReset().mockResolvedValue(true);
+    captureCompletionReplySpy.mockReset().mockResolvedValue(undefined);
  });

  afterEach(() => {
-    announceSpy.mockClear();
    lifecycleHandler = undefined;
    mod.resetSubagentRegistryForTests({ persist: false });
    vi.useRealTimers();
@@ -85,6 +91,34 @@ describe("subagent registry lifecycle error grace", () => {
    await Promise.resolve();
  };

+  const waitForCleanupHandledFalse = async (runId: string) => {
+    for (let attempt = 0; attempt < 40; attempt += 1) {
+      const run = mod
+        .listSubagentRunsForRequester(MAIN_REQUESTER_SESSION_KEY)
+        .find((candidate) => candidate.runId === runId);
+      if (run?.cleanupHandled === false) {
+        return;
+      }
+      await vi.advanceTimersByTimeAsync(1);
+      await flushAsync();
+    }
+    throw new Error(`run ${runId} did not reach cleanupHandled=false in time`);
+  };
+
+  const waitForCleanupCompleted = async (runId: string) => {
+    for (let attempt = 0; attempt < 40; attempt += 1) {
+      const run = mod
+        .listSubagentRunsForRequester(MAIN_REQUESTER_SESSION_KEY)
+        .find((candidate) => candidate.runId === runId);
+      if (typeof run?.cleanupCompletedAt === "number") {
+        return run;
+      }
+      await vi.advanceTimersByTimeAsync(1);
+      await flushAsync();
+    }
+    throw new Error(`run ${runId} did not complete cleanup in time`);
+  };
+
  function registerCompletionRun(runId: string, childSuffix: string, task: string) {
    mod.registerSubagentRun({
      runId,
@@ -97,10 +131,15 @@ describe("subagent registry lifecycle error grace", () => {
    });
  }

-  function emitLifecycleEvent(runId: string, data: LifecycleData) {
+  function emitLifecycleEvent(
+    runId: string,
+    data: LifecycleData,
+    options?: { sessionKey?: string },
+  ) {
    lifecycleHandler?.({
      stream: "lifecycle",
      runId,
+      sessionKey: options?.sessionKey,
      data,
    });
  }
@@ -158,4 +197,183 @@ describe("subagent registry lifecycle error grace", () => {
    expect(readFirstAnnounceOutcome()?.status).toBe("error");
    expect(readFirstAnnounceOutcome()?.error).toBe("fatal failure");
  });
+
+  it("freezes completion result at run termination across deferred announce retries", async () => {
+    // Regression guard: late lifecycle noise must never overwrite the frozen completion reply.
+    registerCompletionRun("run-freeze", "freeze", "freeze test");
+    captureCompletionReplySpy.mockResolvedValueOnce("Final answer X");
+    announceSpy.mockResolvedValueOnce(false).mockResolvedValueOnce(true);
+
+    const endedAt = Date.now();
+    emitLifecycleEvent("run-freeze", { phase: "end", endedAt });
+    await flushAsync();
+
+    expect(announceSpy).toHaveBeenCalledTimes(1);
+    const firstCall = announceSpy.mock.calls[0]?.[0] as { roundOneReply?: string } | undefined;
+    expect(firstCall?.roundOneReply).toBe("Final answer X");
+
+    await waitForCleanupHandledFalse("run-freeze");
+
+    captureCompletionReplySpy.mockResolvedValueOnce("Late reply Y");
+    emitLifecycleEvent("run-freeze", { phase: "end", endedAt: endedAt + 100 });
+    await flushAsync();
+
+    expect(announceSpy).toHaveBeenCalledTimes(2);
+    const secondCall = announceSpy.mock.calls[1]?.[0] as { roundOneReply?: string } | undefined;
+    expect(secondCall?.roundOneReply).toBe("Final answer X");
+    expect(captureCompletionReplySpy).toHaveBeenCalledTimes(1);
+  });
+
+  it("refreshes frozen completion output from later turns in the same session", async () => {
+    registerCompletionRun("run-refresh", "refresh", "refresh frozen output test");
+    captureCompletionReplySpy.mockResolvedValueOnce(
+      "Both spawned. Waiting for completion events...",
+    );
+    announceSpy.mockResolvedValueOnce(false).mockResolvedValueOnce(true);
+
+    const endedAt = Date.now();
+    emitLifecycleEvent("run-refresh", { phase: "end", endedAt });
+    await flushAsync();
+
+    expect(announceSpy).toHaveBeenCalledTimes(1);
+    const firstCall = announceSpy.mock.calls[0]?.[0] as { roundOneReply?: string } | undefined;
+    expect(firstCall?.roundOneReply).toBe("Both spawned. Waiting for completion events...");
+
+    await waitForCleanupHandledFalse("run-refresh");
+
+    const runBeforeRefresh = mod
+      .listSubagentRunsForRequester(MAIN_REQUESTER_SESSION_KEY)
+      .find((candidate) => candidate.runId === "run-refresh");
+    const firstCapturedAt = runBeforeRefresh?.frozenResultCapturedAt ?? 0;
+
+    captureCompletionReplySpy.mockResolvedValueOnce(
+      "All 3 subagents complete. Here's the final summary.",
+    );
+    emitLifecycleEvent(
+      "run-refresh-followup-turn",
+      { phase: "end", endedAt: endedAt + 200 },
+      { sessionKey: "agent:main:subagent:refresh" },
+    );
+    await flushAsync();
+
+    const runAfterRefresh = mod
+      .listSubagentRunsForRequester(MAIN_REQUESTER_SESSION_KEY)
+      .find((candidate) => candidate.runId === "run-refresh");
+    expect(runAfterRefresh?.frozenResultText).toBe(
+      "All 3 subagents complete. Here's the final summary.",
+    );
+    expect((runAfterRefresh?.frozenResultCapturedAt ?? 0) >= firstCapturedAt).toBe(true);
+
+    emitLifecycleEvent("run-refresh", { phase: "end", endedAt: endedAt + 300 });
+    await flushAsync();
+
+    expect(announceSpy).toHaveBeenCalledTimes(2);
+    const secondCall = announceSpy.mock.calls[1]?.[0] as { roundOneReply?: string } | undefined;
+    expect(secondCall?.roundOneReply).toBe("All 3 subagents complete. Here's the final summary.");
+    expect(captureCompletionReplySpy).toHaveBeenCalledTimes(2);
+  });
+
+  it("ignores silent follow-up turns when refreshing frozen completion output", async () => {
+    registerCompletionRun("run-refresh-silent", "refresh-silent", "refresh silent test");
+    captureCompletionReplySpy.mockResolvedValueOnce("All work complete, final summary");
+    announceSpy.mockResolvedValueOnce(false).mockResolvedValueOnce(true);
+
+    const endedAt = Date.now();
+    emitLifecycleEvent("run-refresh-silent", { phase: "end", endedAt });
+    await flushAsync();
+    await waitForCleanupHandledFalse("run-refresh-silent");
+
+    captureCompletionReplySpy.mockResolvedValueOnce("NO_REPLY");
+    emitLifecycleEvent(
+      "run-refresh-silent-followup-turn",
+      { phase: "end", endedAt: endedAt + 200 },
+      { sessionKey: "agent:main:subagent:refresh-silent" },
+    );
+    await flushAsync();
+
+    const runAfterSilent = mod
+      .listSubagentRunsForRequester(MAIN_REQUESTER_SESSION_KEY)
+      .find((candidate) => candidate.runId === "run-refresh-silent");
+    expect(runAfterSilent?.frozenResultText).toBe("All work complete, final summary");
+
+    emitLifecycleEvent("run-refresh-silent", { phase: "end", endedAt: endedAt + 300 });
+    await flushAsync();
+
+    expect(announceSpy).toHaveBeenCalledTimes(2);
+    const secondCall = announceSpy.mock.calls[1]?.[0] as { roundOneReply?: string } | undefined;
+    expect(secondCall?.roundOneReply).toBe("All work complete, final summary");
+    expect(captureCompletionReplySpy).toHaveBeenCalledTimes(2);
+  });
+
+  it("regression, captures frozen completion output with 100KB cap and retains it for keep-mode cleanup", async () => {
+    registerCompletionRun("run-capped", "capped", "capped result test");
+    captureCompletionReplySpy.mockResolvedValueOnce("x".repeat(120 * 1024));
+    announceSpy.mockResolvedValueOnce(true);
+
+    emitLifecycleEvent("run-capped", { phase: "end", endedAt: Date.now() });
+    await flushAsync();
+
+    expect(announceSpy).toHaveBeenCalledTimes(1);
+    const call = announceSpy.mock.calls[0]?.[0] as { roundOneReply?: string } | undefined;
+    expect(call?.roundOneReply).toContain("[truncated: frozen completion output exceeded 100KB");
+    expect(Buffer.byteLength(call?.roundOneReply ?? "", "utf8")).toBeLessThanOrEqual(100 * 1024);
+
+    const run = await waitForCleanupCompleted("run-capped");
+    expect(typeof run.frozenResultText).toBe("string");
+    expect(run.frozenResultText).toContain("[truncated: frozen completion output exceeded 100KB");
+    expect(run.frozenResultCapturedAt).toBeTypeOf("number");
+  });
+
+  it("keeps parallel child completion results frozen even when late traffic arrives", async () => {
+    // Regression guard: fan-out retries must preserve each child's first frozen result text.
+    registerCompletionRun("run-parallel-a", "parallel-a", "parallel a");
+    registerCompletionRun("run-parallel-b", "parallel-b", "parallel b");
+    captureCompletionReplySpy
+      .mockResolvedValueOnce("Final answer A")
+      .mockResolvedValueOnce("Final answer B");
+    announceSpy
+      .mockResolvedValueOnce(false)
+      .mockResolvedValueOnce(false)
+      .mockResolvedValueOnce(true)
+      .mockResolvedValueOnce(true);
+
+    const parallelEndedAt = Date.now();
+    emitLifecycleEvent("run-parallel-a", { phase: "end", endedAt: parallelEndedAt });
+    emitLifecycleEvent("run-parallel-b", { phase: "end", endedAt: parallelEndedAt + 1 });
+    await flushAsync();
+
+    expect(announceSpy).toHaveBeenCalledTimes(2);
+    await waitForCleanupHandledFalse("run-parallel-a");
+    await waitForCleanupHandledFalse("run-parallel-b");
+
+    captureCompletionReplySpy.mockResolvedValue("Late overwrite");
+
+    emitLifecycleEvent("run-parallel-a", { phase: "end", endedAt: parallelEndedAt + 100 });
+    emitLifecycleEvent("run-parallel-b", { phase: "end", endedAt: parallelEndedAt + 101 });
+    await flushAsync();
+
+    expect(announceSpy).toHaveBeenCalledTimes(4);
+
+    const callsByRun = new Map<string, Array<{ roundOneReply?: string }>>();
+    for (const call of announceSpy.mock.calls) {
+      const params = (call?.[0] ?? {}) as { childRunId?: string; roundOneReply?: string };
+      const runId = params.childRunId;
+      if (!runId) {
+        continue;
+      }
+      const existing = callsByRun.get(runId) ?? [];
+      existing.push({ roundOneReply: params.roundOneReply });
+      callsByRun.set(runId, existing);
+    }
+
+    expect(callsByRun.get("run-parallel-a")?.map((entry) => entry.roundOneReply)).toEqual([
+      "Final answer A",
+      "Final answer A",
+    ]);
+    expect(callsByRun.get("run-parallel-b")?.map((entry) => entry.roundOneReply)).toEqual([
+      "Final answer B",
+      "Final answer B",
+    ]);
+    expect(captureCompletionReplySpy).toHaveBeenCalledTimes(2);
+  });
 });