refactor(web-fetch): dedupe firecrawl fallback

This commit is contained in:
Peter Steinberger
2026-02-16 02:15:02 +00:00
parent d9ca051a1d
commit 568fd337be

View File

@@ -425,7 +425,18 @@ export async function fetchFirecrawlContent(params: {
}; };
} }
async function runWebFetch(params: { type FirecrawlRuntimeParams = {
firecrawlEnabled: boolean;
firecrawlApiKey?: string;
firecrawlBaseUrl: string;
firecrawlOnlyMainContent: boolean;
firecrawlMaxAgeMs: number;
firecrawlProxy: "auto" | "basic" | "stealth";
firecrawlStoreInCache: boolean;
firecrawlTimeoutSeconds: number;
};
type WebFetchRuntimeParams = FirecrawlRuntimeParams & {
url: string; url: string;
extractMode: ExtractMode; extractMode: ExtractMode;
maxChars: number; maxChars: number;
@@ -435,15 +446,60 @@ async function runWebFetch(params: {
cacheTtlMs: number; cacheTtlMs: number;
userAgent: string; userAgent: string;
readabilityEnabled: boolean; readabilityEnabled: boolean;
firecrawlEnabled: boolean; };
firecrawlApiKey?: string;
firecrawlBaseUrl: string; function toFirecrawlContentParams(
firecrawlOnlyMainContent: boolean; params: FirecrawlRuntimeParams & { url: string; extractMode: ExtractMode },
firecrawlMaxAgeMs: number; ): Parameters<typeof fetchFirecrawlContent>[0] | null {
firecrawlProxy: "auto" | "basic" | "stealth"; if (!params.firecrawlEnabled || !params.firecrawlApiKey) {
firecrawlStoreInCache: boolean; return null;
firecrawlTimeoutSeconds: number; }
}): Promise<Record<string, unknown>> { return {
url: params.url,
extractMode: params.extractMode,
apiKey: params.firecrawlApiKey,
baseUrl: params.firecrawlBaseUrl,
onlyMainContent: params.firecrawlOnlyMainContent,
maxAgeMs: params.firecrawlMaxAgeMs,
proxy: params.firecrawlProxy,
storeInCache: params.firecrawlStoreInCache,
timeoutSeconds: params.firecrawlTimeoutSeconds,
};
}
async function maybeFetchFirecrawlWebFetchPayload(
params: WebFetchRuntimeParams & {
urlToFetch: string;
finalUrlFallback: string;
statusFallback: number;
cacheKey: string;
tookMs: number;
},
): Promise<Record<string, unknown> | null> {
const firecrawlParams = toFirecrawlContentParams({
...params,
url: params.urlToFetch,
extractMode: params.extractMode,
});
if (!firecrawlParams) {
return null;
}
const firecrawl = await fetchFirecrawlContent(firecrawlParams);
const payload = buildFirecrawlWebFetchPayload({
firecrawl,
rawUrl: params.url,
finalUrlFallback: params.finalUrlFallback,
statusFallback: params.statusFallback,
extractMode: params.extractMode,
maxChars: params.maxChars,
tookMs: params.tookMs,
});
writeCache(FETCH_CACHE, params.cacheKey, payload, params.cacheTtlMs);
return payload;
}
async function runWebFetch(params: WebFetchRuntimeParams): Promise<Record<string, unknown>> {
const cacheKey = normalizeCacheKey( const cacheKey = normalizeCacheKey(
`fetch:${params.url}:${params.extractMode}:${params.maxChars}`, `fetch:${params.url}:${params.extractMode}:${params.maxChars}`,
); );
@@ -494,28 +550,15 @@ async function runWebFetch(params: {
if (error instanceof SsrFBlockedError) { if (error instanceof SsrFBlockedError) {
throw error; throw error;
} }
if (params.firecrawlEnabled && params.firecrawlApiKey) { const payload = await maybeFetchFirecrawlWebFetchPayload({
const firecrawl = await fetchFirecrawlContent({ ...params,
url: finalUrl, urlToFetch: finalUrl,
extractMode: params.extractMode,
apiKey: params.firecrawlApiKey,
baseUrl: params.firecrawlBaseUrl,
onlyMainContent: params.firecrawlOnlyMainContent,
maxAgeMs: params.firecrawlMaxAgeMs,
proxy: params.firecrawlProxy,
storeInCache: params.firecrawlStoreInCache,
timeoutSeconds: params.firecrawlTimeoutSeconds,
});
const payload = buildFirecrawlWebFetchPayload({
firecrawl,
rawUrl: params.url,
finalUrlFallback: finalUrl, finalUrlFallback: finalUrl,
statusFallback: 200, statusFallback: 200,
extractMode: params.extractMode, cacheKey,
maxChars: params.maxChars,
tookMs: Date.now() - start, tookMs: Date.now() - start,
}); });
writeCache(FETCH_CACHE, cacheKey, payload, params.cacheTtlMs); if (payload) {
return payload; return payload;
} }
throw error; throw error;
@@ -523,28 +566,15 @@ async function runWebFetch(params: {
try { try {
if (!res.ok) { if (!res.ok) {
if (params.firecrawlEnabled && params.firecrawlApiKey) { const payload = await maybeFetchFirecrawlWebFetchPayload({
const firecrawl = await fetchFirecrawlContent({ ...params,
url: params.url, urlToFetch: params.url,
extractMode: params.extractMode,
apiKey: params.firecrawlApiKey,
baseUrl: params.firecrawlBaseUrl,
onlyMainContent: params.firecrawlOnlyMainContent,
maxAgeMs: params.firecrawlMaxAgeMs,
proxy: params.firecrawlProxy,
storeInCache: params.firecrawlStoreInCache,
timeoutSeconds: params.firecrawlTimeoutSeconds,
});
const payload = buildFirecrawlWebFetchPayload({
firecrawl,
rawUrl: params.url,
finalUrlFallback: finalUrl, finalUrlFallback: finalUrl,
statusFallback: res.status, statusFallback: res.status,
extractMode: params.extractMode, cacheKey,
maxChars: params.maxChars,
tookMs: Date.now() - start, tookMs: Date.now() - start,
}); });
writeCache(FETCH_CACHE, cacheKey, payload, params.cacheTtlMs); if (payload) {
return payload; return payload;
} }
const rawDetailResult = await readResponseText(res, { maxBytes: DEFAULT_ERROR_MAX_BYTES }); const rawDetailResult = await readResponseText(res, { maxBytes: DEFAULT_ERROR_MAX_BYTES });
@@ -647,33 +677,15 @@ async function runWebFetch(params: {
} }
} }
async function tryFirecrawlFallback(params: { async function tryFirecrawlFallback(
url: string; params: FirecrawlRuntimeParams & { url: string; extractMode: ExtractMode },
extractMode: ExtractMode; ): Promise<{ text: string; title?: string } | null> {
firecrawlEnabled: boolean; const firecrawlParams = toFirecrawlContentParams(params);
firecrawlApiKey?: string; if (!firecrawlParams) {
firecrawlBaseUrl: string;
firecrawlOnlyMainContent: boolean;
firecrawlMaxAgeMs: number;
firecrawlProxy: "auto" | "basic" | "stealth";
firecrawlStoreInCache: boolean;
firecrawlTimeoutSeconds: number;
}): Promise<{ text: string; title?: string } | null> {
if (!params.firecrawlEnabled || !params.firecrawlApiKey) {
return null; return null;
} }
try { try {
const firecrawl = await fetchFirecrawlContent({ const firecrawl = await fetchFirecrawlContent(firecrawlParams);
url: params.url,
extractMode: params.extractMode,
apiKey: params.firecrawlApiKey,
baseUrl: params.firecrawlBaseUrl,
onlyMainContent: params.firecrawlOnlyMainContent,
maxAgeMs: params.firecrawlMaxAgeMs,
proxy: params.firecrawlProxy,
storeInCache: params.firecrawlStoreInCache,
timeoutSeconds: params.firecrawlTimeoutSeconds,
});
return { text: firecrawl.text, title: firecrawl.title }; return { text: firecrawl.text, title: firecrawl.title };
} catch { } catch {
return null; return null;