feat: add stuck loop detection and exponential backoff infrastructure for agent polling (#17118)

Merged via /review-pr -> /prepare-pr -> /merge-pr.

Prepared head SHA: eebabf679b
Co-authored-by: akramcodez <179671552+akramcodez@users.noreply.github.com>
Co-authored-by: gumadeiras <5599352+gumadeiras@users.noreply.github.com>
Reviewed-by: @gumadeiras
This commit is contained in:
Sk Akram
2026-02-17 01:46:35 +05:30
committed by GitHub
parent 1f99d82712
commit e5eb5b3e43
11 changed files with 1769 additions and 2 deletions

View File

@@ -6,6 +6,17 @@ export type SessionState = {
lastActivity: number;
state: SessionStateValue;
queueDepth: number;
toolCallHistory?: ToolCallRecord[];
toolLoopWarningBuckets?: Map<string, number>;
commandPollCounts?: Map<string, { count: number; lastPollAt: number }>;
};
export type ToolCallRecord = {
toolName: string;
argsHash: string;
toolCallId?: string;
resultHash?: string;
timestamp: number;
};
export type SessionRef = {

View File

@@ -256,6 +256,42 @@ export function logRunAttempt(params: SessionRef & { runId: string; attempt: num
markActivity();
}
export function logToolLoopAction(
params: SessionRef & {
toolName: string;
level: "warning" | "critical";
action: "warn" | "block";
detector: "generic_repeat" | "known_poll_no_progress" | "global_circuit_breaker" | "ping_pong";
count: number;
message: string;
pairedToolName?: string;
},
) {
const payload = `tool loop: sessionId=${params.sessionId ?? "unknown"} sessionKey=${
params.sessionKey ?? "unknown"
} tool=${params.toolName} level=${params.level} action=${params.action} detector=${
params.detector
} count=${params.count}${params.pairedToolName ? ` pairedTool=${params.pairedToolName}` : ""} message="${params.message}"`;
if (params.level === "critical") {
diag.error(payload);
} else {
diag.warn(payload);
}
emitDiagnosticEvent({
type: "tool.loop",
sessionId: params.sessionId,
sessionKey: params.sessionKey,
toolName: params.toolName,
level: params.level,
action: params.action,
detector: params.detector,
count: params.count,
message: params.message,
pairedToolName: params.pairedToolName,
});
markActivity();
}
export function logActiveRuns() {
const activeSessions = Array.from(diagnosticSessionStates.entries())
.filter(([, s]) => s.state === "processing")
@@ -314,6 +350,16 @@ export function startDiagnosticHeartbeat() {
queued: totalQueued,
});
import("../agents/command-poll-backoff.js")
.then(({ pruneStaleCommandPolls }) => {
for (const [, state] of diagnosticSessionStates) {
pruneStaleCommandPolls(state);
}
})
.catch((err) => {
diag.debug(`command-poll-backoff prune failed: ${String(err)}`);
});
for (const [, state] of diagnosticSessionStates) {
const ageMs = now - state.lastActivity;
if (state.state === "processing" && ageMs > 120_000) {