mirror of
https://github.com/openclaw/openclaw.git
synced 2026-05-07 10:31:24 +00:00
feat: add stuck loop detection and exponential backoff infrastructure for agent polling (#17118)
Merged via /review-pr -> /prepare-pr -> /merge-pr.
Prepared head SHA: eebabf679b
Co-authored-by: akramcodez <179671552+akramcodez@users.noreply.github.com>
Co-authored-by: gumadeiras <5599352+gumadeiras@users.noreply.github.com>
Reviewed-by: @gumadeiras
This commit is contained in:
@@ -6,6 +6,17 @@ export type SessionState = {
|
||||
lastActivity: number;
|
||||
state: SessionStateValue;
|
||||
queueDepth: number;
|
||||
toolCallHistory?: ToolCallRecord[];
|
||||
toolLoopWarningBuckets?: Map<string, number>;
|
||||
commandPollCounts?: Map<string, { count: number; lastPollAt: number }>;
|
||||
};
|
||||
|
||||
export type ToolCallRecord = {
|
||||
toolName: string;
|
||||
argsHash: string;
|
||||
toolCallId?: string;
|
||||
resultHash?: string;
|
||||
timestamp: number;
|
||||
};
|
||||
|
||||
export type SessionRef = {
|
||||
|
||||
@@ -256,6 +256,42 @@ export function logRunAttempt(params: SessionRef & { runId: string; attempt: num
|
||||
markActivity();
|
||||
}
|
||||
|
||||
export function logToolLoopAction(
|
||||
params: SessionRef & {
|
||||
toolName: string;
|
||||
level: "warning" | "critical";
|
||||
action: "warn" | "block";
|
||||
detector: "generic_repeat" | "known_poll_no_progress" | "global_circuit_breaker" | "ping_pong";
|
||||
count: number;
|
||||
message: string;
|
||||
pairedToolName?: string;
|
||||
},
|
||||
) {
|
||||
const payload = `tool loop: sessionId=${params.sessionId ?? "unknown"} sessionKey=${
|
||||
params.sessionKey ?? "unknown"
|
||||
} tool=${params.toolName} level=${params.level} action=${params.action} detector=${
|
||||
params.detector
|
||||
} count=${params.count}${params.pairedToolName ? ` pairedTool=${params.pairedToolName}` : ""} message="${params.message}"`;
|
||||
if (params.level === "critical") {
|
||||
diag.error(payload);
|
||||
} else {
|
||||
diag.warn(payload);
|
||||
}
|
||||
emitDiagnosticEvent({
|
||||
type: "tool.loop",
|
||||
sessionId: params.sessionId,
|
||||
sessionKey: params.sessionKey,
|
||||
toolName: params.toolName,
|
||||
level: params.level,
|
||||
action: params.action,
|
||||
detector: params.detector,
|
||||
count: params.count,
|
||||
message: params.message,
|
||||
pairedToolName: params.pairedToolName,
|
||||
});
|
||||
markActivity();
|
||||
}
|
||||
|
||||
export function logActiveRuns() {
|
||||
const activeSessions = Array.from(diagnosticSessionStates.entries())
|
||||
.filter(([, s]) => s.state === "processing")
|
||||
@@ -314,6 +350,16 @@ export function startDiagnosticHeartbeat() {
|
||||
queued: totalQueued,
|
||||
});
|
||||
|
||||
import("../agents/command-poll-backoff.js")
|
||||
.then(({ pruneStaleCommandPolls }) => {
|
||||
for (const [, state] of diagnosticSessionStates) {
|
||||
pruneStaleCommandPolls(state);
|
||||
}
|
||||
})
|
||||
.catch((err) => {
|
||||
diag.debug(`command-poll-backoff prune failed: ${String(err)}`);
|
||||
});
|
||||
|
||||
for (const [, state] of diagnosticSessionStates) {
|
||||
const ageMs = now - state.lastActivity;
|
||||
if (state.state === "processing" && ageMs > 120_000) {
|
||||
|
||||
Reference in New Issue
Block a user