fix(agents): honor explicit rate-limit cooldown probes in fallback runs

This commit is contained in:
Vignesh Natarajan
2026-03-05 20:02:36 -08:00
parent ce71fac7d6
commit d45353f95b
14 changed files with 150 additions and 25 deletions

View File

@@ -633,15 +633,39 @@ export async function runEmbeddedPiAgent(
};
try {
const autoProfileCandidates = profileCandidates.filter(
(candidate): candidate is string =>
typeof candidate === "string" && candidate.length > 0 && candidate !== lockedProfileId,
);
const allAutoProfilesInCooldown =
autoProfileCandidates.length > 0 &&
autoProfileCandidates.every((candidate) => isProfileInCooldown(authStore, candidate));
const unavailableReason = allAutoProfilesInCooldown
? (resolveProfilesUnavailableReason({
store: authStore,
profileIds: autoProfileCandidates,
}) ?? "rate_limit")
: null;
const allowRateLimitCooldownProbe =
params.allowRateLimitCooldownProbe === true &&
allAutoProfilesInCooldown &&
unavailableReason === "rate_limit";
let didRateLimitCooldownProbe = false;
while (profileIndex < profileCandidates.length) {
const candidate = profileCandidates[profileIndex];
if (
candidate &&
candidate !== lockedProfileId &&
isProfileInCooldown(authStore, candidate)
) {
profileIndex += 1;
continue;
const inCooldown =
candidate && candidate !== lockedProfileId && isProfileInCooldown(authStore, candidate);
if (inCooldown) {
if (allowRateLimitCooldownProbe && !didRateLimitCooldownProbe) {
didRateLimitCooldownProbe = true;
log.warn(
`probing cooldowned auth profile for ${provider}/${modelId} due to rate_limit unavailability`,
);
} else {
profileIndex += 1;
continue;
}
}
await applyApiKeyInfo(profileCandidates[profileIndex]);
break;

View File

@@ -113,4 +113,12 @@ export type RunEmbeddedPiAgentParams = {
streamParams?: AgentStreamParams;
ownerNumbers?: string[];
enforceFinalTag?: boolean;
/**
* Allow a single run attempt even when all auth profiles are in cooldown,
* but only for inferred `rate_limit` cooldowns.
*
* This is used by model fallback when trying sibling models on providers
* where rate limits are often model-scoped.
*/
allowRateLimitCooldownProbe?: boolean;
};