mirror of
https://github.com/openclaw/openclaw.git
synced 2026-05-09 17:34:35 +00:00
feat: thread-bound subagents on Discord (#21805)
* docs: thread-bound subagents plan * docs: add exact thread-bound subagent implementation touchpoints * Docs: prioritize auto thread-bound subagent flow * Docs: add ACP harness thread-binding extensions * Discord: add thread-bound session routing and auto-bind spawn flow * Subagents: add focus commands and ACP/session binding lifecycle hooks * Tests: cover thread bindings, focus commands, and ACP unbind hooks * Docs: add plugin-hook appendix for thread-bound subagents * Plugins: add subagent lifecycle hook events * Core: emit subagent lifecycle hooks and decouple Discord bindings * Discord: handle subagent bind lifecycle via plugin hooks * Subagents: unify completion finalizer and split registry modules * Add subagent lifecycle events module * Hooks: fix subagent ended context key * Discord: share thread bindings across ESM and Jiti * Subagents: add persistent sessions_spawn mode for thread-bound sessions * Subagents: clarify thread intro and persistent completion copy * test(subagents): stabilize sessions_spawn lifecycle cleanup assertions * Discord: add thread-bound session TTL with auto-unfocus * Subagents: fail session spawns when thread bind fails * Subagents: cover thread session failure cleanup paths * Session: add thread binding TTL config and /session ttl controls * Tests: align discord reaction expectations * Agent: persist sessionFile for keyed subagent sessions * Discord: normalize imports after conflict resolution * Sessions: centralize sessionFile resolve/persist helper * Discord: harden thread-bound subagent session routing * Rebase: resolve upstream/main conflicts * Subagents: move thread binding into hooks and split bindings modules * Docs: add channel-agnostic subagent routing hook plan * Agents: decouple subagent routing from Discord * Discord: refactor thread-bound subagent flows * Subagents: prevent duplicate end hooks and orphaned failed sessions * Refactor: split subagent command and provider phases * Subagents: honor hook delivery target overrides * Discord: add thread binding kill switches and refresh plan doc * Discord: fix thread bind channel resolution * Routing: centralize account id normalization * Discord: clean up thread bindings on startup failures * Discord: add startup cleanup regression tests * Docs: add long-term thread-bound subagent architecture * Docs: split session binding plan and dedupe thread-bound doc * Subagents: add channel-agnostic session binding routing * Subagents: stabilize announce completion routing tests * Subagents: cover multi-bound completion routing * Subagents: suppress lifecycle hooks on failed thread bind * tests: fix discord provider mock typing regressions * docs/protocol: sync slash command aliases and delete param models * fix: add changelog entry for Discord thread-bound subagents (#21805) (thanks @onutc) --------- Co-authored-by: Shadow <hi@shadowing.dev>
This commit is contained in:
@@ -6,36 +6,38 @@ import { type DeliveryContext, normalizeDeliveryContext } from "../utils/deliver
|
||||
import { resetAnnounceQueuesForTests } from "./subagent-announce-queue.js";
|
||||
import { runSubagentAnnounceFlow, type SubagentRunOutcome } from "./subagent-announce.js";
|
||||
import {
|
||||
loadSubagentRegistryFromDisk,
|
||||
saveSubagentRegistryToDisk,
|
||||
} from "./subagent-registry.store.js";
|
||||
SUBAGENT_ENDED_OUTCOME_KILLED,
|
||||
SUBAGENT_ENDED_REASON_COMPLETE,
|
||||
SUBAGENT_ENDED_REASON_ERROR,
|
||||
SUBAGENT_ENDED_REASON_KILLED,
|
||||
type SubagentLifecycleEndedReason,
|
||||
} from "./subagent-lifecycle-events.js";
|
||||
import {
|
||||
resolveCleanupCompletionReason,
|
||||
resolveDeferredCleanupDecision,
|
||||
} from "./subagent-registry-cleanup.js";
|
||||
import {
|
||||
emitSubagentEndedHookOnce,
|
||||
resolveLifecycleOutcomeFromRunOutcome,
|
||||
runOutcomesEqual,
|
||||
} from "./subagent-registry-completion.js";
|
||||
import {
|
||||
countActiveDescendantRunsFromRuns,
|
||||
countActiveRunsForSessionFromRuns,
|
||||
findRunIdsByChildSessionKeyFromRuns,
|
||||
listDescendantRunsForRequesterFromRuns,
|
||||
listRunsForRequesterFromRuns,
|
||||
resolveRequesterForChildSessionFromRuns,
|
||||
} from "./subagent-registry-queries.js";
|
||||
import {
|
||||
getSubagentRunsSnapshotForRead,
|
||||
persistSubagentRunsToDisk,
|
||||
restoreSubagentRunsFromDisk,
|
||||
} from "./subagent-registry-state.js";
|
||||
import type { SubagentRunRecord } from "./subagent-registry.types.js";
|
||||
import { resolveAgentTimeoutMs } from "./timeout.js";
|
||||
|
||||
export type SubagentRunRecord = {
|
||||
runId: string;
|
||||
childSessionKey: string;
|
||||
requesterSessionKey: string;
|
||||
requesterOrigin?: DeliveryContext;
|
||||
requesterDisplayKey: string;
|
||||
task: string;
|
||||
cleanup: "delete" | "keep";
|
||||
label?: string;
|
||||
model?: string;
|
||||
runTimeoutSeconds?: number;
|
||||
createdAt: number;
|
||||
startedAt?: number;
|
||||
endedAt?: number;
|
||||
outcome?: SubagentRunOutcome;
|
||||
archiveAtMs?: number;
|
||||
cleanupCompletedAt?: number;
|
||||
cleanupHandled?: boolean;
|
||||
suppressAnnounceReason?: "steer-restart" | "killed";
|
||||
expectsCompletionMessage?: boolean;
|
||||
/** Number of times announce delivery has been attempted and returned false (deferred). */
|
||||
announceRetryCount?: number;
|
||||
/** Timestamp of the last announce retry attempt (for backoff). */
|
||||
lastAnnounceRetryAt?: number;
|
||||
};
|
||||
export type { SubagentRunRecord } from "./subagent-registry.types.js";
|
||||
|
||||
const subagentRuns = new Map<string, SubagentRunRecord>();
|
||||
let sweeper: NodeJS.Timeout | null = null;
|
||||
@@ -77,19 +79,117 @@ function logAnnounceGiveUp(entry: SubagentRunRecord, reason: "retry-limit" | "ex
|
||||
}
|
||||
|
||||
function persistSubagentRuns() {
|
||||
try {
|
||||
saveSubagentRegistryToDisk(subagentRuns);
|
||||
} catch {
|
||||
// ignore persistence failures
|
||||
}
|
||||
persistSubagentRunsToDisk(subagentRuns);
|
||||
}
|
||||
|
||||
const resumedRuns = new Set<string>();
|
||||
const endedHookInFlightRunIds = new Set<string>();
|
||||
|
||||
function suppressAnnounceForSteerRestart(entry?: SubagentRunRecord) {
|
||||
return entry?.suppressAnnounceReason === "steer-restart";
|
||||
}
|
||||
|
||||
function shouldKeepThreadBindingAfterRun(params: {
|
||||
entry: SubagentRunRecord;
|
||||
reason: SubagentLifecycleEndedReason;
|
||||
}) {
|
||||
if (params.reason === SUBAGENT_ENDED_REASON_KILLED) {
|
||||
return false;
|
||||
}
|
||||
return params.entry.spawnMode === "session";
|
||||
}
|
||||
|
||||
function shouldEmitEndedHookForRun(params: {
|
||||
entry: SubagentRunRecord;
|
||||
reason: SubagentLifecycleEndedReason;
|
||||
}) {
|
||||
return !shouldKeepThreadBindingAfterRun(params);
|
||||
}
|
||||
|
||||
async function emitSubagentEndedHookForRun(params: {
|
||||
entry: SubagentRunRecord;
|
||||
reason?: SubagentLifecycleEndedReason;
|
||||
sendFarewell?: boolean;
|
||||
accountId?: string;
|
||||
}) {
|
||||
const reason = params.reason ?? params.entry.endedReason ?? SUBAGENT_ENDED_REASON_COMPLETE;
|
||||
const outcome = resolveLifecycleOutcomeFromRunOutcome(params.entry.outcome);
|
||||
const error = params.entry.outcome?.status === "error" ? params.entry.outcome.error : undefined;
|
||||
await emitSubagentEndedHookOnce({
|
||||
entry: params.entry,
|
||||
reason,
|
||||
sendFarewell: params.sendFarewell,
|
||||
accountId: params.accountId ?? params.entry.requesterOrigin?.accountId,
|
||||
outcome,
|
||||
error,
|
||||
inFlightRunIds: endedHookInFlightRunIds,
|
||||
persist: persistSubagentRuns,
|
||||
});
|
||||
}
|
||||
|
||||
async function completeSubagentRun(params: {
|
||||
runId: string;
|
||||
endedAt?: number;
|
||||
outcome: SubagentRunOutcome;
|
||||
reason: SubagentLifecycleEndedReason;
|
||||
sendFarewell?: boolean;
|
||||
accountId?: string;
|
||||
triggerCleanup: boolean;
|
||||
}) {
|
||||
const entry = subagentRuns.get(params.runId);
|
||||
if (!entry) {
|
||||
return;
|
||||
}
|
||||
|
||||
let mutated = false;
|
||||
const endedAt = typeof params.endedAt === "number" ? params.endedAt : Date.now();
|
||||
if (entry.endedAt !== endedAt) {
|
||||
entry.endedAt = endedAt;
|
||||
mutated = true;
|
||||
}
|
||||
if (!runOutcomesEqual(entry.outcome, params.outcome)) {
|
||||
entry.outcome = params.outcome;
|
||||
mutated = true;
|
||||
}
|
||||
if (entry.endedReason !== params.reason) {
|
||||
entry.endedReason = params.reason;
|
||||
mutated = true;
|
||||
}
|
||||
|
||||
if (mutated) {
|
||||
persistSubagentRuns();
|
||||
}
|
||||
|
||||
const suppressedForSteerRestart = suppressAnnounceForSteerRestart(entry);
|
||||
const shouldEmitEndedHook =
|
||||
!suppressedForSteerRestart &&
|
||||
shouldEmitEndedHookForRun({
|
||||
entry,
|
||||
reason: params.reason,
|
||||
});
|
||||
const shouldDeferEndedHook =
|
||||
shouldEmitEndedHook &&
|
||||
params.triggerCleanup &&
|
||||
entry.expectsCompletionMessage === true &&
|
||||
!suppressedForSteerRestart;
|
||||
if (!shouldDeferEndedHook && shouldEmitEndedHook) {
|
||||
await emitSubagentEndedHookForRun({
|
||||
entry,
|
||||
reason: params.reason,
|
||||
sendFarewell: params.sendFarewell,
|
||||
accountId: params.accountId,
|
||||
});
|
||||
}
|
||||
|
||||
if (!params.triggerCleanup) {
|
||||
return;
|
||||
}
|
||||
if (suppressedForSteerRestart) {
|
||||
return;
|
||||
}
|
||||
startSubagentAnnounceCleanupFlow(params.runId, entry);
|
||||
}
|
||||
|
||||
function startSubagentAnnounceCleanupFlow(runId: string, entry: SubagentRunRecord): boolean {
|
||||
if (!beginSubagentCleanup(runId)) {
|
||||
return false;
|
||||
@@ -102,7 +202,6 @@ function startSubagentAnnounceCleanupFlow(runId: string, entry: SubagentRunRecor
|
||||
requesterOrigin,
|
||||
requesterDisplayKey: entry.requesterDisplayKey,
|
||||
task: entry.task,
|
||||
expectsCompletionMessage: entry.expectsCompletionMessage,
|
||||
timeoutMs: SUBAGENT_ANNOUNCE_TIMEOUT_MS,
|
||||
cleanup: entry.cleanup,
|
||||
waitForCompletion: false,
|
||||
@@ -110,8 +209,10 @@ function startSubagentAnnounceCleanupFlow(runId: string, entry: SubagentRunRecor
|
||||
endedAt: entry.endedAt,
|
||||
label: entry.label,
|
||||
outcome: entry.outcome,
|
||||
spawnMode: entry.spawnMode,
|
||||
expectsCompletionMessage: entry.expectsCompletionMessage,
|
||||
}).then((didAnnounce) => {
|
||||
finalizeSubagentCleanup(runId, entry.cleanup, didAnnounce);
|
||||
void finalizeSubagentCleanup(runId, entry.cleanup, didAnnounce);
|
||||
});
|
||||
return true;
|
||||
}
|
||||
@@ -182,20 +283,13 @@ function restoreSubagentRunsOnce() {
|
||||
}
|
||||
restoreAttempted = true;
|
||||
try {
|
||||
const restored = loadSubagentRegistryFromDisk();
|
||||
if (restored.size === 0) {
|
||||
const restoredCount = restoreSubagentRunsFromDisk({
|
||||
runs: subagentRuns,
|
||||
mergeOnly: true,
|
||||
});
|
||||
if (restoredCount === 0) {
|
||||
return;
|
||||
}
|
||||
for (const [runId, entry] of restored.entries()) {
|
||||
if (!runId || !entry) {
|
||||
continue;
|
||||
}
|
||||
// Keep any newer in-memory entries.
|
||||
if (!subagentRuns.has(runId)) {
|
||||
subagentRuns.set(runId, entry);
|
||||
}
|
||||
}
|
||||
|
||||
// Resume pending work.
|
||||
ensureListener();
|
||||
if ([...subagentRuns.values()].some((entry) => entry.archiveAtMs)) {
|
||||
@@ -255,7 +349,11 @@ async function sweepSubagentRuns() {
|
||||
try {
|
||||
await callGateway({
|
||||
method: "sessions.delete",
|
||||
params: { key: entry.childSessionKey, deleteTranscript: true },
|
||||
params: {
|
||||
key: entry.childSessionKey,
|
||||
deleteTranscript: true,
|
||||
emitLifecycleHooks: false,
|
||||
},
|
||||
timeoutMs: 10_000,
|
||||
});
|
||||
} catch {
|
||||
@@ -276,93 +374,154 @@ function ensureListener() {
|
||||
}
|
||||
listenerStarted = true;
|
||||
listenerStop = onAgentEvent((evt) => {
|
||||
if (!evt || evt.stream !== "lifecycle") {
|
||||
return;
|
||||
}
|
||||
const entry = subagentRuns.get(evt.runId);
|
||||
if (!entry) {
|
||||
return;
|
||||
}
|
||||
const phase = evt.data?.phase;
|
||||
if (phase === "start") {
|
||||
const startedAt = typeof evt.data?.startedAt === "number" ? evt.data.startedAt : undefined;
|
||||
if (startedAt) {
|
||||
entry.startedAt = startedAt;
|
||||
persistSubagentRuns();
|
||||
void (async () => {
|
||||
if (!evt || evt.stream !== "lifecycle") {
|
||||
return;
|
||||
}
|
||||
return;
|
||||
}
|
||||
if (phase !== "end" && phase !== "error") {
|
||||
return;
|
||||
}
|
||||
const endedAt = typeof evt.data?.endedAt === "number" ? evt.data.endedAt : Date.now();
|
||||
entry.endedAt = endedAt;
|
||||
if (phase === "error") {
|
||||
const entry = subagentRuns.get(evt.runId);
|
||||
if (!entry) {
|
||||
return;
|
||||
}
|
||||
const phase = evt.data?.phase;
|
||||
if (phase === "start") {
|
||||
const startedAt = typeof evt.data?.startedAt === "number" ? evt.data.startedAt : undefined;
|
||||
if (startedAt) {
|
||||
entry.startedAt = startedAt;
|
||||
persistSubagentRuns();
|
||||
}
|
||||
return;
|
||||
}
|
||||
if (phase !== "end" && phase !== "error") {
|
||||
return;
|
||||
}
|
||||
const endedAt = typeof evt.data?.endedAt === "number" ? evt.data.endedAt : Date.now();
|
||||
const error = typeof evt.data?.error === "string" ? evt.data.error : undefined;
|
||||
entry.outcome = { status: "error", error };
|
||||
} else if (evt.data?.aborted) {
|
||||
entry.outcome = { status: "timeout" };
|
||||
} else {
|
||||
entry.outcome = { status: "ok" };
|
||||
}
|
||||
persistSubagentRuns();
|
||||
|
||||
if (suppressAnnounceForSteerRestart(entry)) {
|
||||
return;
|
||||
}
|
||||
|
||||
if (!startSubagentAnnounceCleanupFlow(evt.runId, entry)) {
|
||||
return;
|
||||
}
|
||||
const outcome: SubagentRunOutcome =
|
||||
phase === "error"
|
||||
? { status: "error", error }
|
||||
: evt.data?.aborted
|
||||
? { status: "timeout" }
|
||||
: { status: "ok" };
|
||||
await completeSubagentRun({
|
||||
runId: evt.runId,
|
||||
endedAt,
|
||||
outcome,
|
||||
reason: phase === "error" ? SUBAGENT_ENDED_REASON_ERROR : SUBAGENT_ENDED_REASON_COMPLETE,
|
||||
sendFarewell: true,
|
||||
accountId: entry.requesterOrigin?.accountId,
|
||||
triggerCleanup: true,
|
||||
});
|
||||
})();
|
||||
});
|
||||
}
|
||||
|
||||
function finalizeSubagentCleanup(runId: string, cleanup: "delete" | "keep", didAnnounce: boolean) {
|
||||
async function finalizeSubagentCleanup(
|
||||
runId: string,
|
||||
cleanup: "delete" | "keep",
|
||||
didAnnounce: boolean,
|
||||
) {
|
||||
const entry = subagentRuns.get(runId);
|
||||
if (!entry) {
|
||||
return;
|
||||
}
|
||||
if (!didAnnounce) {
|
||||
const now = Date.now();
|
||||
const retryCount = (entry.announceRetryCount ?? 0) + 1;
|
||||
entry.announceRetryCount = retryCount;
|
||||
if (didAnnounce) {
|
||||
const completionReason = resolveCleanupCompletionReason(entry);
|
||||
await emitCompletionEndedHookIfNeeded(entry, completionReason);
|
||||
completeCleanupBookkeeping({
|
||||
runId,
|
||||
entry,
|
||||
cleanup,
|
||||
completedAt: Date.now(),
|
||||
});
|
||||
return;
|
||||
}
|
||||
|
||||
const now = Date.now();
|
||||
const deferredDecision = resolveDeferredCleanupDecision({
|
||||
entry,
|
||||
now,
|
||||
activeDescendantRuns: Math.max(0, countActiveDescendantRuns(entry.childSessionKey)),
|
||||
announceExpiryMs: ANNOUNCE_EXPIRY_MS,
|
||||
maxAnnounceRetryCount: MAX_ANNOUNCE_RETRY_COUNT,
|
||||
deferDescendantDelayMs: MIN_ANNOUNCE_RETRY_DELAY_MS,
|
||||
resolveAnnounceRetryDelayMs,
|
||||
});
|
||||
|
||||
if (deferredDecision.kind === "defer-descendants") {
|
||||
entry.lastAnnounceRetryAt = now;
|
||||
|
||||
// Check if the announce has exceeded retry limits or expired (#18264).
|
||||
const endedAgo = typeof entry.endedAt === "number" ? now - entry.endedAt : 0;
|
||||
if (retryCount >= MAX_ANNOUNCE_RETRY_COUNT || endedAgo > ANNOUNCE_EXPIRY_MS) {
|
||||
// Give up: mark as completed to break the infinite retry loop.
|
||||
logAnnounceGiveUp(entry, retryCount >= MAX_ANNOUNCE_RETRY_COUNT ? "retry-limit" : "expiry");
|
||||
entry.cleanupCompletedAt = now;
|
||||
persistSubagentRuns();
|
||||
retryDeferredCompletedAnnounces(runId);
|
||||
return;
|
||||
}
|
||||
|
||||
// Allow retry on the next wake if announce was deferred or failed.
|
||||
entry.cleanupHandled = false;
|
||||
resumedRuns.delete(runId);
|
||||
persistSubagentRuns();
|
||||
if (entry.expectsCompletionMessage !== true) {
|
||||
return;
|
||||
}
|
||||
setTimeout(
|
||||
() => {
|
||||
resumeSubagentRun(runId);
|
||||
},
|
||||
resolveAnnounceRetryDelayMs(entry.announceRetryCount ?? 0),
|
||||
).unref?.();
|
||||
setTimeout(() => {
|
||||
resumeSubagentRun(runId);
|
||||
}, deferredDecision.delayMs).unref?.();
|
||||
return;
|
||||
}
|
||||
if (cleanup === "delete") {
|
||||
subagentRuns.delete(runId);
|
||||
persistSubagentRuns();
|
||||
retryDeferredCompletedAnnounces(runId);
|
||||
|
||||
if (deferredDecision.retryCount != null) {
|
||||
entry.announceRetryCount = deferredDecision.retryCount;
|
||||
entry.lastAnnounceRetryAt = now;
|
||||
}
|
||||
|
||||
if (deferredDecision.kind === "give-up") {
|
||||
const completionReason = resolveCleanupCompletionReason(entry);
|
||||
await emitCompletionEndedHookIfNeeded(entry, completionReason);
|
||||
logAnnounceGiveUp(entry, deferredDecision.reason);
|
||||
completeCleanupBookkeeping({
|
||||
runId,
|
||||
entry,
|
||||
cleanup: "keep",
|
||||
completedAt: now,
|
||||
});
|
||||
return;
|
||||
}
|
||||
entry.cleanupCompletedAt = Date.now();
|
||||
|
||||
// Allow retry on the next wake if announce was deferred or failed.
|
||||
entry.cleanupHandled = false;
|
||||
resumedRuns.delete(runId);
|
||||
persistSubagentRuns();
|
||||
retryDeferredCompletedAnnounces(runId);
|
||||
if (deferredDecision.resumeDelayMs == null) {
|
||||
return;
|
||||
}
|
||||
setTimeout(() => {
|
||||
resumeSubagentRun(runId);
|
||||
}, deferredDecision.resumeDelayMs).unref?.();
|
||||
}
|
||||
|
||||
async function emitCompletionEndedHookIfNeeded(
|
||||
entry: SubagentRunRecord,
|
||||
reason: SubagentLifecycleEndedReason,
|
||||
) {
|
||||
if (
|
||||
entry.expectsCompletionMessage === true &&
|
||||
shouldEmitEndedHookForRun({
|
||||
entry,
|
||||
reason,
|
||||
})
|
||||
) {
|
||||
await emitSubagentEndedHookForRun({
|
||||
entry,
|
||||
reason,
|
||||
sendFarewell: true,
|
||||
});
|
||||
}
|
||||
}
|
||||
|
||||
function completeCleanupBookkeeping(params: {
|
||||
runId: string;
|
||||
entry: SubagentRunRecord;
|
||||
cleanup: "delete" | "keep";
|
||||
completedAt: number;
|
||||
}) {
|
||||
if (params.cleanup === "delete") {
|
||||
subagentRuns.delete(params.runId);
|
||||
persistSubagentRuns();
|
||||
retryDeferredCompletedAnnounces(params.runId);
|
||||
return;
|
||||
}
|
||||
params.entry.cleanupCompletedAt = params.completedAt;
|
||||
persistSubagentRuns();
|
||||
retryDeferredCompletedAnnounces(params.runId);
|
||||
}
|
||||
|
||||
function retryDeferredCompletedAnnounces(excludeRunId?: string) {
|
||||
@@ -475,7 +634,9 @@ export function replaceSubagentRunAfterSteer(params: {
|
||||
const now = Date.now();
|
||||
const cfg = loadConfig();
|
||||
const archiveAfterMs = resolveArchiveAfterMs(cfg);
|
||||
const archiveAtMs = archiveAfterMs ? now + archiveAfterMs : undefined;
|
||||
const spawnMode = source.spawnMode === "session" ? "session" : "run";
|
||||
const archiveAtMs =
|
||||
spawnMode === "session" ? undefined : archiveAfterMs ? now + archiveAfterMs : undefined;
|
||||
const runTimeoutSeconds = params.runTimeoutSeconds ?? source.runTimeoutSeconds ?? 0;
|
||||
const waitTimeoutMs = resolveSubagentWaitTimeoutMs(cfg, runTimeoutSeconds);
|
||||
|
||||
@@ -484,12 +645,15 @@ export function replaceSubagentRunAfterSteer(params: {
|
||||
runId: nextRunId,
|
||||
startedAt: now,
|
||||
endedAt: undefined,
|
||||
endedReason: undefined,
|
||||
endedHookEmittedAt: undefined,
|
||||
outcome: undefined,
|
||||
cleanupCompletedAt: undefined,
|
||||
cleanupHandled: false,
|
||||
suppressAnnounceReason: undefined,
|
||||
announceRetryCount: undefined,
|
||||
lastAnnounceRetryAt: undefined,
|
||||
spawnMode,
|
||||
archiveAtMs,
|
||||
runTimeoutSeconds,
|
||||
};
|
||||
@@ -516,11 +680,14 @@ export function registerSubagentRun(params: {
|
||||
model?: string;
|
||||
runTimeoutSeconds?: number;
|
||||
expectsCompletionMessage?: boolean;
|
||||
spawnMode?: "run" | "session";
|
||||
}) {
|
||||
const now = Date.now();
|
||||
const cfg = loadConfig();
|
||||
const archiveAfterMs = resolveArchiveAfterMs(cfg);
|
||||
const archiveAtMs = archiveAfterMs ? now + archiveAfterMs : undefined;
|
||||
const spawnMode = params.spawnMode === "session" ? "session" : "run";
|
||||
const archiveAtMs =
|
||||
spawnMode === "session" ? undefined : archiveAfterMs ? now + archiveAfterMs : undefined;
|
||||
const runTimeoutSeconds = params.runTimeoutSeconds ?? 0;
|
||||
const waitTimeoutMs = resolveSubagentWaitTimeoutMs(cfg, runTimeoutSeconds);
|
||||
const requesterOrigin = normalizeDeliveryContext(params.requesterOrigin);
|
||||
@@ -533,6 +700,7 @@ export function registerSubagentRun(params: {
|
||||
task: params.task,
|
||||
cleanup: params.cleanup,
|
||||
expectsCompletionMessage: params.expectsCompletionMessage,
|
||||
spawnMode,
|
||||
label: params.label,
|
||||
model: params.model,
|
||||
runTimeoutSeconds,
|
||||
@@ -543,7 +711,7 @@ export function registerSubagentRun(params: {
|
||||
});
|
||||
ensureListener();
|
||||
persistSubagentRuns();
|
||||
if (archiveAfterMs) {
|
||||
if (archiveAtMs) {
|
||||
startSweeper();
|
||||
}
|
||||
// Wait for subagent completion via gateway RPC (cross-process).
|
||||
@@ -588,22 +756,29 @@ async function waitForSubagentCompletion(runId: string, waitTimeoutMs: number) {
|
||||
mutated = true;
|
||||
}
|
||||
const waitError = typeof wait.error === "string" ? wait.error : undefined;
|
||||
entry.outcome =
|
||||
const outcome: SubagentRunOutcome =
|
||||
wait.status === "error"
|
||||
? { status: "error", error: waitError }
|
||||
: wait.status === "timeout"
|
||||
? { status: "timeout" }
|
||||
: { status: "ok" };
|
||||
mutated = true;
|
||||
if (!runOutcomesEqual(entry.outcome, outcome)) {
|
||||
entry.outcome = outcome;
|
||||
mutated = true;
|
||||
}
|
||||
if (mutated) {
|
||||
persistSubagentRuns();
|
||||
}
|
||||
if (suppressAnnounceForSteerRestart(entry)) {
|
||||
return;
|
||||
}
|
||||
if (!startSubagentAnnounceCleanupFlow(runId, entry)) {
|
||||
return;
|
||||
}
|
||||
await completeSubagentRun({
|
||||
runId,
|
||||
endedAt: entry.endedAt,
|
||||
outcome,
|
||||
reason:
|
||||
wait.status === "error" ? SUBAGENT_ENDED_REASON_ERROR : SUBAGENT_ENDED_REASON_COMPLETE,
|
||||
sendFarewell: true,
|
||||
accountId: entry.requesterOrigin?.accountId,
|
||||
triggerCleanup: true,
|
||||
});
|
||||
} catch {
|
||||
// ignore
|
||||
}
|
||||
@@ -612,6 +787,7 @@ async function waitForSubagentCompletion(runId: string, waitTimeoutMs: number) {
|
||||
export function resetSubagentRegistryForTests(opts?: { persist?: boolean }) {
|
||||
subagentRuns.clear();
|
||||
resumedRuns.clear();
|
||||
endedHookInFlightRunIds.clear();
|
||||
resetAnnounceQueuesForTests();
|
||||
stopSweeper();
|
||||
restoreAttempted = false;
|
||||
@@ -640,62 +816,23 @@ export function releaseSubagentRun(runId: string) {
|
||||
}
|
||||
|
||||
function findRunIdsByChildSessionKey(childSessionKey: string): string[] {
|
||||
const key = childSessionKey.trim();
|
||||
if (!key) {
|
||||
return [];
|
||||
}
|
||||
const runIds: string[] = [];
|
||||
for (const [runId, entry] of subagentRuns.entries()) {
|
||||
if (entry.childSessionKey === key) {
|
||||
runIds.push(runId);
|
||||
}
|
||||
}
|
||||
return runIds;
|
||||
}
|
||||
|
||||
function getRunsSnapshotForRead(): Map<string, SubagentRunRecord> {
|
||||
const merged = new Map<string, SubagentRunRecord>();
|
||||
const shouldReadDisk = !(process.env.VITEST || process.env.NODE_ENV === "test");
|
||||
if (shouldReadDisk) {
|
||||
try {
|
||||
// Registry state is persisted to disk so other worker processes (for
|
||||
// example cron runners) can observe active children spawned elsewhere.
|
||||
for (const [runId, entry] of loadSubagentRegistryFromDisk().entries()) {
|
||||
merged.set(runId, entry);
|
||||
}
|
||||
} catch {
|
||||
// Ignore disk read failures and fall back to local memory state.
|
||||
}
|
||||
}
|
||||
for (const [runId, entry] of subagentRuns.entries()) {
|
||||
merged.set(runId, entry);
|
||||
}
|
||||
return merged;
|
||||
return findRunIdsByChildSessionKeyFromRuns(subagentRuns, childSessionKey);
|
||||
}
|
||||
|
||||
export function resolveRequesterForChildSession(childSessionKey: string): {
|
||||
requesterSessionKey: string;
|
||||
requesterOrigin?: DeliveryContext;
|
||||
} | null {
|
||||
const key = childSessionKey.trim();
|
||||
if (!key) {
|
||||
return null;
|
||||
}
|
||||
let best: SubagentRunRecord | undefined;
|
||||
for (const entry of getRunsSnapshotForRead().values()) {
|
||||
if (entry.childSessionKey !== key) {
|
||||
continue;
|
||||
}
|
||||
if (!best || entry.createdAt > best.createdAt) {
|
||||
best = entry;
|
||||
}
|
||||
}
|
||||
if (!best) {
|
||||
const resolved = resolveRequesterForChildSessionFromRuns(
|
||||
getSubagentRunsSnapshotForRead(subagentRuns),
|
||||
childSessionKey,
|
||||
);
|
||||
if (!resolved) {
|
||||
return null;
|
||||
}
|
||||
return {
|
||||
requesterSessionKey: best.requesterSessionKey,
|
||||
requesterOrigin: normalizeDeliveryContext(best.requesterOrigin),
|
||||
requesterSessionKey: resolved.requesterSessionKey,
|
||||
requesterOrigin: normalizeDeliveryContext(resolved.requesterOrigin),
|
||||
};
|
||||
}
|
||||
|
||||
@@ -734,6 +871,7 @@ export function markSubagentRunTerminated(params: {
|
||||
const now = Date.now();
|
||||
const reason = params.reason?.trim() || "killed";
|
||||
let updated = 0;
|
||||
const entriesByChildSessionKey = new Map<string, SubagentRunRecord>();
|
||||
for (const runId of runIds) {
|
||||
const entry = subagentRuns.get(runId);
|
||||
if (!entry) {
|
||||
@@ -744,103 +882,57 @@ export function markSubagentRunTerminated(params: {
|
||||
}
|
||||
entry.endedAt = now;
|
||||
entry.outcome = { status: "error", error: reason };
|
||||
entry.endedReason = SUBAGENT_ENDED_REASON_KILLED;
|
||||
entry.cleanupHandled = true;
|
||||
entry.cleanupCompletedAt = now;
|
||||
entry.suppressAnnounceReason = "killed";
|
||||
if (!entriesByChildSessionKey.has(entry.childSessionKey)) {
|
||||
entriesByChildSessionKey.set(entry.childSessionKey, entry);
|
||||
}
|
||||
updated += 1;
|
||||
}
|
||||
if (updated > 0) {
|
||||
persistSubagentRuns();
|
||||
for (const entry of entriesByChildSessionKey.values()) {
|
||||
void emitSubagentEndedHookOnce({
|
||||
entry,
|
||||
reason: SUBAGENT_ENDED_REASON_KILLED,
|
||||
sendFarewell: true,
|
||||
outcome: SUBAGENT_ENDED_OUTCOME_KILLED,
|
||||
error: reason,
|
||||
inFlightRunIds: endedHookInFlightRunIds,
|
||||
persist: persistSubagentRuns,
|
||||
}).catch(() => {
|
||||
// Hook failures should not break termination flow.
|
||||
});
|
||||
}
|
||||
}
|
||||
return updated;
|
||||
}
|
||||
|
||||
export function listSubagentRunsForRequester(requesterSessionKey: string): SubagentRunRecord[] {
|
||||
const key = requesterSessionKey.trim();
|
||||
if (!key) {
|
||||
return [];
|
||||
}
|
||||
return [...subagentRuns.values()].filter((entry) => entry.requesterSessionKey === key);
|
||||
return listRunsForRequesterFromRuns(subagentRuns, requesterSessionKey);
|
||||
}
|
||||
|
||||
export function countActiveRunsForSession(requesterSessionKey: string): number {
|
||||
const key = requesterSessionKey.trim();
|
||||
if (!key) {
|
||||
return 0;
|
||||
}
|
||||
let count = 0;
|
||||
for (const entry of getRunsSnapshotForRead().values()) {
|
||||
if (entry.requesterSessionKey !== key) {
|
||||
continue;
|
||||
}
|
||||
if (typeof entry.endedAt === "number") {
|
||||
continue;
|
||||
}
|
||||
count += 1;
|
||||
}
|
||||
return count;
|
||||
return countActiveRunsForSessionFromRuns(
|
||||
getSubagentRunsSnapshotForRead(subagentRuns),
|
||||
requesterSessionKey,
|
||||
);
|
||||
}
|
||||
|
||||
export function countActiveDescendantRuns(rootSessionKey: string): number {
|
||||
const root = rootSessionKey.trim();
|
||||
if (!root) {
|
||||
return 0;
|
||||
}
|
||||
const runs = getRunsSnapshotForRead();
|
||||
const pending = [root];
|
||||
const visited = new Set<string>([root]);
|
||||
let count = 0;
|
||||
while (pending.length > 0) {
|
||||
const requester = pending.shift();
|
||||
if (!requester) {
|
||||
continue;
|
||||
}
|
||||
for (const entry of runs.values()) {
|
||||
if (entry.requesterSessionKey !== requester) {
|
||||
continue;
|
||||
}
|
||||
if (typeof entry.endedAt !== "number") {
|
||||
count += 1;
|
||||
}
|
||||
const childKey = entry.childSessionKey.trim();
|
||||
if (!childKey || visited.has(childKey)) {
|
||||
continue;
|
||||
}
|
||||
visited.add(childKey);
|
||||
pending.push(childKey);
|
||||
}
|
||||
}
|
||||
return count;
|
||||
return countActiveDescendantRunsFromRuns(
|
||||
getSubagentRunsSnapshotForRead(subagentRuns),
|
||||
rootSessionKey,
|
||||
);
|
||||
}
|
||||
|
||||
export function listDescendantRunsForRequester(rootSessionKey: string): SubagentRunRecord[] {
|
||||
const root = rootSessionKey.trim();
|
||||
if (!root) {
|
||||
return [];
|
||||
}
|
||||
const runs = getRunsSnapshotForRead();
|
||||
const pending = [root];
|
||||
const visited = new Set<string>([root]);
|
||||
const descendants: SubagentRunRecord[] = [];
|
||||
while (pending.length > 0) {
|
||||
const requester = pending.shift();
|
||||
if (!requester) {
|
||||
continue;
|
||||
}
|
||||
for (const entry of runs.values()) {
|
||||
if (entry.requesterSessionKey !== requester) {
|
||||
continue;
|
||||
}
|
||||
descendants.push(entry);
|
||||
const childKey = entry.childSessionKey.trim();
|
||||
if (!childKey || visited.has(childKey)) {
|
||||
continue;
|
||||
}
|
||||
visited.add(childKey);
|
||||
pending.push(childKey);
|
||||
}
|
||||
}
|
||||
return descendants;
|
||||
return listDescendantRunsForRequesterFromRuns(
|
||||
getSubagentRunsSnapshotForRead(subagentRuns),
|
||||
rootSessionKey,
|
||||
);
|
||||
}
|
||||
|
||||
export function initSubagentRegistry() {
|
||||
|
||||
Reference in New Issue
Block a user