mirror of
https://github.com/openclaw/openclaw.git
synced 2026-05-11 06:14:34 +00:00
feat: ACP thread-bound agents (#23580)
* docs: add ACP thread-bound agents plan doc * docs: expand ACP implementation specification * feat(acp): route ACP sessions through core dispatch and lifecycle cleanup * feat(acp): add /acp commands and Discord spawn gate * ACP: add acpx runtime plugin backend * fix(subagents): defer transient lifecycle errors before announce * Agents: harden ACP sessions_spawn and tighten spawn guidance * Agents: require explicit ACP target for runtime spawns * docs: expand ACP control-plane implementation plan * ACP: harden metadata seeding and spawn guidance * ACP: centralize runtime control-plane manager and fail-closed dispatch * ACP: harden runtime manager and unify spawn helpers * Commands: route ACP sessions through ACP runtime in agent command * ACP: require persisted metadata for runtime spawns * Sessions: preserve ACP metadata when updating entries * Plugins: harden ACP backend registry across loaders * ACPX: make availability probe compatible with adapters * E2E: add manual Discord ACP plain-language smoke script * ACPX: preserve streamed spacing across Discord delivery * Docs: add ACP Discord streaming strategy * ACP: harden Discord stream buffering for thread replies * ACP: reuse shared block reply pipeline for projector * ACP: unify streaming config and adopt coalesceIdleMs * Docs: add temporary ACP production hardening plan * Docs: trim temporary ACP hardening plan goals * Docs: gate ACP thread controls by backend capabilities * ACP: add capability-gated runtime controls and /acp operator commands * Docs: remove temporary ACP hardening plan * ACP: fix spawn target validation and close cache cleanup * ACP: harden runtime dispatch and recovery paths * ACP: split ACP command/runtime internals and centralize policy * ACP: harden runtime lifecycle, validation, and observability * ACP: surface runtime and backend session IDs in thread bindings * docs: add temp plan for binding-service migration * ACP: migrate thread binding flows to SessionBindingService * ACP: address review feedback and preserve prompt wording * ACPX plugin: pin runtime dependency and prefer bundled CLI * Discord: complete binding-service migration cleanup and restore ACP plan * Docs: add standalone ACP agents guide * ACP: route harness intents to thread-bound ACP sessions * ACP: fix spawn thread routing and queue-owner stall * ACP: harden startup reconciliation and command bypass handling * ACP: fix dispatch bypass type narrowing * ACP: align runtime metadata to agentSessionId * ACP: normalize session identifier handling and labels * ACP: mark thread banner session ids provisional until first reply * ACP: stabilize session identity mapping and startup reconciliation * ACP: add resolved session-id notices and cwd in thread intros * Discord: prefix thread meta notices consistently * Discord: unify ACP/thread meta notices with gear prefix * Discord: split thread persona naming from meta formatting * Extensions: bump acpx plugin dependency to 0.1.9 * Agents: gate ACP prompt guidance behind acp.enabled * Docs: remove temp experiment plan docs * Docs: scope streaming plan to holy grail refactor * Docs: refactor ACP agents guide for human-first flow * Docs/Skill: add ACP feature-flag guidance and direct acpx telephone-game flow * Docs/Skill: add OpenCode and Pi to ACP harness lists * Docs/Skill: align ACP harness list with current acpx registry * Dev/Test: move ACP plain-language smoke script and mark as keep * Docs/Skill: reorder ACP harness lists with Pi first * ACP: split control-plane manager into core/types/utils modules * Docs: refresh ACP thread-bound agents plan * ACP: extract dispatch lane and split manager domains * ACP: centralize binding context and remove reverse deps * Infra: unify system message formatting * ACP: centralize error boundaries and session id rendering * ACP: enforce init concurrency cap and strict meta clear * Tests: fix ACP dispatch binding mock typing * Tests: fix Discord thread-binding mock drift and ACP request id * ACP: gate slash bypass and persist cleared overrides * ACPX: await pre-abort cancel before runTurn return * Extension: pin acpx runtime dependency to 0.1.11 * Docs: add pinned acpx install strategy for ACP extension * Extensions/acpx: enforce strict local pinned startup * Extensions/acpx: tighten acp-router install guidance * ACPX: retry runtime test temp-dir cleanup * Extensions/acpx: require proactive ACPX repair for thread spawns * Extensions/acpx: require restart offer after acpx reinstall * extensions/acpx: remove workspace protocol devDependency * extensions/acpx: bump pinned acpx to 0.1.13 * extensions/acpx: sync lockfile after dependency bump * ACPX: make runtime spawn Windows-safe * fix: align doctor-config-flow repair tests with default-account migration (#23580) (thanks @osolmaz)
This commit is contained in:
@@ -66,6 +66,12 @@ const MAX_ANNOUNCE_RETRY_COUNT = 3;
|
||||
*/
|
||||
const ANNOUNCE_EXPIRY_MS = 5 * 60_000; // 5 minutes
|
||||
type SubagentRunOrphanReason = "missing-session-entry" | "missing-session-id";
|
||||
/**
|
||||
* Embedded runs can emit transient lifecycle `error` events while provider/model
|
||||
* retry is still in progress. Defer terminal error cleanup briefly so a
|
||||
* subsequent lifecycle `start` / `end` can cancel premature failure announces.
|
||||
*/
|
||||
const LIFECYCLE_ERROR_RETRY_GRACE_MS = 15_000;
|
||||
|
||||
function resolveAnnounceRetryDelayMs(retryCount: number) {
|
||||
const boundedRetryCount = Math.max(0, Math.min(retryCount, 10));
|
||||
@@ -204,6 +210,66 @@ function reconcileOrphanedRestoredRuns() {
|
||||
|
||||
const resumedRuns = new Set<string>();
|
||||
const endedHookInFlightRunIds = new Set<string>();
|
||||
const pendingLifecycleErrorByRunId = new Map<
|
||||
string,
|
||||
{
|
||||
timer: NodeJS.Timeout;
|
||||
endedAt: number;
|
||||
error?: string;
|
||||
}
|
||||
>();
|
||||
|
||||
function clearPendingLifecycleError(runId: string) {
|
||||
const pending = pendingLifecycleErrorByRunId.get(runId);
|
||||
if (!pending) {
|
||||
return;
|
||||
}
|
||||
clearTimeout(pending.timer);
|
||||
pendingLifecycleErrorByRunId.delete(runId);
|
||||
}
|
||||
|
||||
function clearAllPendingLifecycleErrors() {
|
||||
for (const pending of pendingLifecycleErrorByRunId.values()) {
|
||||
clearTimeout(pending.timer);
|
||||
}
|
||||
pendingLifecycleErrorByRunId.clear();
|
||||
}
|
||||
|
||||
function schedulePendingLifecycleError(params: { runId: string; endedAt: number; error?: string }) {
|
||||
clearPendingLifecycleError(params.runId);
|
||||
const timer = setTimeout(() => {
|
||||
const pending = pendingLifecycleErrorByRunId.get(params.runId);
|
||||
if (!pending || pending.timer !== timer) {
|
||||
return;
|
||||
}
|
||||
pendingLifecycleErrorByRunId.delete(params.runId);
|
||||
const entry = subagentRuns.get(params.runId);
|
||||
if (!entry) {
|
||||
return;
|
||||
}
|
||||
if (entry.endedReason === SUBAGENT_ENDED_REASON_COMPLETE || entry.outcome?.status === "ok") {
|
||||
return;
|
||||
}
|
||||
void completeSubagentRun({
|
||||
runId: params.runId,
|
||||
endedAt: pending.endedAt,
|
||||
outcome: {
|
||||
status: "error",
|
||||
error: pending.error,
|
||||
},
|
||||
reason: SUBAGENT_ENDED_REASON_ERROR,
|
||||
sendFarewell: true,
|
||||
accountId: entry.requesterOrigin?.accountId,
|
||||
triggerCleanup: true,
|
||||
});
|
||||
}, LIFECYCLE_ERROR_RETRY_GRACE_MS);
|
||||
timer.unref?.();
|
||||
pendingLifecycleErrorByRunId.set(params.runId, {
|
||||
timer,
|
||||
endedAt: params.endedAt,
|
||||
error: params.error,
|
||||
});
|
||||
}
|
||||
|
||||
function suppressAnnounceForSteerRestart(entry?: SubagentRunRecord) {
|
||||
return entry?.suppressAnnounceReason === "steer-restart";
|
||||
@@ -256,6 +322,7 @@ async function completeSubagentRun(params: {
|
||||
accountId?: string;
|
||||
triggerCleanup: boolean;
|
||||
}) {
|
||||
clearPendingLifecycleError(params.runId);
|
||||
const entry = subagentRuns.get(params.runId);
|
||||
if (!entry) {
|
||||
return;
|
||||
@@ -491,6 +558,7 @@ async function sweepSubagentRuns() {
|
||||
if (!entry.archiveAtMs || entry.archiveAtMs > now) {
|
||||
continue;
|
||||
}
|
||||
clearPendingLifecycleError(runId);
|
||||
subagentRuns.delete(runId);
|
||||
mutated = true;
|
||||
try {
|
||||
@@ -531,6 +599,7 @@ function ensureListener() {
|
||||
}
|
||||
const phase = evt.data?.phase;
|
||||
if (phase === "start") {
|
||||
clearPendingLifecycleError(evt.runId);
|
||||
const startedAt = typeof evt.data?.startedAt === "number" ? evt.data.startedAt : undefined;
|
||||
if (startedAt) {
|
||||
entry.startedAt = startedAt;
|
||||
@@ -543,17 +612,23 @@ function ensureListener() {
|
||||
}
|
||||
const endedAt = typeof evt.data?.endedAt === "number" ? evt.data.endedAt : Date.now();
|
||||
const error = typeof evt.data?.error === "string" ? evt.data.error : undefined;
|
||||
const outcome: SubagentRunOutcome =
|
||||
phase === "error"
|
||||
? { status: "error", error }
|
||||
: evt.data?.aborted
|
||||
? { status: "timeout" }
|
||||
: { status: "ok" };
|
||||
if (phase === "error") {
|
||||
schedulePendingLifecycleError({
|
||||
runId: evt.runId,
|
||||
endedAt,
|
||||
error,
|
||||
});
|
||||
return;
|
||||
}
|
||||
clearPendingLifecycleError(evt.runId);
|
||||
const outcome: SubagentRunOutcome = evt.data?.aborted
|
||||
? { status: "timeout" }
|
||||
: { status: "ok" };
|
||||
await completeSubagentRun({
|
||||
runId: evt.runId,
|
||||
endedAt,
|
||||
outcome,
|
||||
reason: phase === "error" ? SUBAGENT_ENDED_REASON_ERROR : SUBAGENT_ENDED_REASON_COMPLETE,
|
||||
reason: SUBAGENT_ENDED_REASON_COMPLETE,
|
||||
sendFarewell: true,
|
||||
accountId: entry.requesterOrigin?.accountId,
|
||||
triggerCleanup: true,
|
||||
@@ -661,6 +736,7 @@ function completeCleanupBookkeeping(params: {
|
||||
completedAt: number;
|
||||
}) {
|
||||
if (params.cleanup === "delete") {
|
||||
clearPendingLifecycleError(params.runId);
|
||||
subagentRuns.delete(params.runId);
|
||||
persistSubagentRuns();
|
||||
retryDeferredCompletedAnnounces(params.runId);
|
||||
@@ -774,6 +850,7 @@ export function replaceSubagentRunAfterSteer(params: {
|
||||
}
|
||||
|
||||
if (previousRunId !== nextRunId) {
|
||||
clearPendingLifecycleError(previousRunId);
|
||||
subagentRuns.delete(previousRunId);
|
||||
resumedRuns.delete(previousRunId);
|
||||
}
|
||||
@@ -935,6 +1012,7 @@ export function resetSubagentRegistryForTests(opts?: { persist?: boolean }) {
|
||||
subagentRuns.clear();
|
||||
resumedRuns.clear();
|
||||
endedHookInFlightRunIds.clear();
|
||||
clearAllPendingLifecycleErrors();
|
||||
resetAnnounceQueuesForTests();
|
||||
stopSweeper();
|
||||
restoreAttempted = false;
|
||||
@@ -953,6 +1031,7 @@ export function addSubagentRunForTests(entry: SubagentRunRecord) {
|
||||
}
|
||||
|
||||
export function releaseSubagentRun(runId: string) {
|
||||
clearPendingLifecycleError(runId);
|
||||
const didDelete = subagentRuns.delete(runId);
|
||||
if (didDelete) {
|
||||
persistSubagentRuns();
|
||||
@@ -1020,6 +1099,7 @@ export function markSubagentRunTerminated(params: {
|
||||
let updated = 0;
|
||||
const entriesByChildSessionKey = new Map<string, SubagentRunRecord>();
|
||||
for (const runId of runIds) {
|
||||
clearPendingLifecycleError(runId);
|
||||
const entry = subagentRuns.get(runId);
|
||||
if (!entry) {
|
||||
continue;
|
||||
|
||||
Reference in New Issue
Block a user