Agents: infer auth-profile unavailable failover reason

This commit is contained in:
Vignesh Natarajan
2026-02-22 16:10:24 -08:00
parent 331b728b8d
commit 5c7c37a02a
9 changed files with 340 additions and 4 deletions

View File

@@ -5,6 +5,7 @@ import {
clearExpiredCooldowns,
isProfileInCooldown,
markAuthProfileFailure,
resolveProfilesUnavailableReason,
resolveProfileUnusableUntil,
} from "./usage.js";
@@ -85,6 +86,101 @@ describe("isProfileInCooldown", () => {
});
});
describe("resolveProfilesUnavailableReason", () => {
it("prefers active disabledReason when profiles are disabled", () => {
const now = Date.now();
const store = makeStore({
"anthropic:default": {
disabledUntil: now + 60_000,
disabledReason: "billing",
},
});
expect(
resolveProfilesUnavailableReason({
store,
profileIds: ["anthropic:default"],
now,
}),
).toBe("billing");
});
it("uses recorded non-rate-limit failure counts for active cooldown windows", () => {
const now = Date.now();
const store = makeStore({
"anthropic:default": {
cooldownUntil: now + 60_000,
failureCounts: { auth: 3, rate_limit: 1 },
},
});
expect(
resolveProfilesUnavailableReason({
store,
profileIds: ["anthropic:default"],
now,
}),
).toBe("auth");
});
it("falls back to rate_limit when active cooldown has no reason history", () => {
const now = Date.now();
const store = makeStore({
"anthropic:default": {
cooldownUntil: now + 60_000,
},
});
expect(
resolveProfilesUnavailableReason({
store,
profileIds: ["anthropic:default"],
now,
}),
).toBe("rate_limit");
});
it("ignores expired windows and returns null when no profile is actively unavailable", () => {
const now = Date.now();
const store = makeStore({
"anthropic:default": {
cooldownUntil: now - 1_000,
failureCounts: { auth: 5 },
},
"anthropic:backup": {
disabledUntil: now - 500,
disabledReason: "billing",
},
});
expect(
resolveProfilesUnavailableReason({
store,
profileIds: ["anthropic:default", "anthropic:backup"],
now,
}),
).toBeNull();
});
it("breaks ties by reason priority for equal active failure counts", () => {
const now = Date.now();
const store = makeStore({
"anthropic:default": {
cooldownUntil: now + 60_000,
failureCounts: { timeout: 2, auth: 2 },
},
});
expect(
resolveProfilesUnavailableReason({
store,
profileIds: ["anthropic:default"],
now,
}),
).toBe("auth");
});
});
// ---------------------------------------------------------------------------
// clearExpiredCooldowns
// ---------------------------------------------------------------------------

View File

@@ -3,6 +3,20 @@ import { normalizeProviderId } from "../model-selection.js";
import { saveAuthProfileStore, updateAuthProfileStoreWithLock } from "./store.js";
import type { AuthProfileFailureReason, AuthProfileStore, ProfileUsageStats } from "./types.js";
const FAILURE_REASON_PRIORITY: AuthProfileFailureReason[] = [
"auth",
"billing",
"format",
"model_not_found",
"timeout",
"rate_limit",
"unknown",
];
const FAILURE_REASON_SET = new Set<AuthProfileFailureReason>(FAILURE_REASON_PRIORITY);
const FAILURE_REASON_ORDER = new Map<AuthProfileFailureReason, number>(
FAILURE_REASON_PRIORITY.map((reason, index) => [reason, index]),
);
export function resolveProfileUnusableUntil(
stats: Pick<ProfileUsageStats, "cooldownUntil" | "disabledUntil">,
): number | null {
@@ -27,6 +41,85 @@ export function isProfileInCooldown(store: AuthProfileStore, profileId: string):
return unusableUntil ? Date.now() < unusableUntil : false;
}
function isActiveUnusableWindow(until: number | undefined, now: number): boolean {
return typeof until === "number" && Number.isFinite(until) && until > 0 && now < until;
}
/**
* Infer the most likely reason all candidate profiles are currently unavailable.
*
* We prefer explicit active `disabledReason` values (for example billing/auth)
* over generic cooldown buckets, then fall back to failure-count signals.
*/
export function resolveProfilesUnavailableReason(params: {
store: AuthProfileStore;
profileIds: string[];
now?: number;
}): AuthProfileFailureReason | null {
const now = params.now ?? Date.now();
const scores = new Map<AuthProfileFailureReason, number>();
const addScore = (reason: AuthProfileFailureReason, value: number) => {
if (!FAILURE_REASON_SET.has(reason) || value <= 0 || !Number.isFinite(value)) {
return;
}
scores.set(reason, (scores.get(reason) ?? 0) + value);
};
for (const profileId of params.profileIds) {
const stats = params.store.usageStats?.[profileId];
if (!stats) {
continue;
}
const disabledActive = isActiveUnusableWindow(stats.disabledUntil, now);
if (disabledActive && stats.disabledReason && FAILURE_REASON_SET.has(stats.disabledReason)) {
// Disabled reasons are explicit and high-signal; weight heavily.
addScore(stats.disabledReason, 1_000);
continue;
}
const cooldownActive = isActiveUnusableWindow(stats.cooldownUntil, now);
if (!cooldownActive) {
continue;
}
let recordedReason = false;
for (const [rawReason, rawCount] of Object.entries(stats.failureCounts ?? {})) {
const reason = rawReason as AuthProfileFailureReason;
const count = typeof rawCount === "number" ? rawCount : 0;
if (!FAILURE_REASON_SET.has(reason) || count <= 0) {
continue;
}
addScore(reason, count);
recordedReason = true;
}
if (!recordedReason) {
addScore("rate_limit", 1);
}
}
if (scores.size === 0) {
return null;
}
let best: AuthProfileFailureReason | null = null;
let bestScore = -1;
let bestPriority = Number.MAX_SAFE_INTEGER;
for (const reason of FAILURE_REASON_PRIORITY) {
const score = scores.get(reason);
if (typeof score !== "number") {
continue;
}
const priority = FAILURE_REASON_ORDER.get(reason) ?? Number.MAX_SAFE_INTEGER;
if (score > bestScore || (score === bestScore && priority < bestPriority)) {
best = reason;
bestScore = score;
bestPriority = priority;
}
}
return best;
}
/**
* Return the soonest `unusableUntil` timestamp (ms epoch) among the given
* profiles, or `null` when no profile has a recorded cooldown. Note: the