mirror of
https://github.com/openclaw/openclaw.git
synced 2026-05-08 11:41:24 +00:00
refactor(web-fetch): dedupe firecrawl fallback
This commit is contained in:
@@ -425,7 +425,18 @@ export async function fetchFirecrawlContent(params: {
|
|||||||
};
|
};
|
||||||
}
|
}
|
||||||
|
|
||||||
async function runWebFetch(params: {
|
type FirecrawlRuntimeParams = {
|
||||||
|
firecrawlEnabled: boolean;
|
||||||
|
firecrawlApiKey?: string;
|
||||||
|
firecrawlBaseUrl: string;
|
||||||
|
firecrawlOnlyMainContent: boolean;
|
||||||
|
firecrawlMaxAgeMs: number;
|
||||||
|
firecrawlProxy: "auto" | "basic" | "stealth";
|
||||||
|
firecrawlStoreInCache: boolean;
|
||||||
|
firecrawlTimeoutSeconds: number;
|
||||||
|
};
|
||||||
|
|
||||||
|
type WebFetchRuntimeParams = FirecrawlRuntimeParams & {
|
||||||
url: string;
|
url: string;
|
||||||
extractMode: ExtractMode;
|
extractMode: ExtractMode;
|
||||||
maxChars: number;
|
maxChars: number;
|
||||||
@@ -435,15 +446,60 @@ async function runWebFetch(params: {
|
|||||||
cacheTtlMs: number;
|
cacheTtlMs: number;
|
||||||
userAgent: string;
|
userAgent: string;
|
||||||
readabilityEnabled: boolean;
|
readabilityEnabled: boolean;
|
||||||
firecrawlEnabled: boolean;
|
};
|
||||||
firecrawlApiKey?: string;
|
|
||||||
firecrawlBaseUrl: string;
|
function toFirecrawlContentParams(
|
||||||
firecrawlOnlyMainContent: boolean;
|
params: FirecrawlRuntimeParams & { url: string; extractMode: ExtractMode },
|
||||||
firecrawlMaxAgeMs: number;
|
): Parameters<typeof fetchFirecrawlContent>[0] | null {
|
||||||
firecrawlProxy: "auto" | "basic" | "stealth";
|
if (!params.firecrawlEnabled || !params.firecrawlApiKey) {
|
||||||
firecrawlStoreInCache: boolean;
|
return null;
|
||||||
firecrawlTimeoutSeconds: number;
|
}
|
||||||
}): Promise<Record<string, unknown>> {
|
return {
|
||||||
|
url: params.url,
|
||||||
|
extractMode: params.extractMode,
|
||||||
|
apiKey: params.firecrawlApiKey,
|
||||||
|
baseUrl: params.firecrawlBaseUrl,
|
||||||
|
onlyMainContent: params.firecrawlOnlyMainContent,
|
||||||
|
maxAgeMs: params.firecrawlMaxAgeMs,
|
||||||
|
proxy: params.firecrawlProxy,
|
||||||
|
storeInCache: params.firecrawlStoreInCache,
|
||||||
|
timeoutSeconds: params.firecrawlTimeoutSeconds,
|
||||||
|
};
|
||||||
|
}
|
||||||
|
|
||||||
|
async function maybeFetchFirecrawlWebFetchPayload(
|
||||||
|
params: WebFetchRuntimeParams & {
|
||||||
|
urlToFetch: string;
|
||||||
|
finalUrlFallback: string;
|
||||||
|
statusFallback: number;
|
||||||
|
cacheKey: string;
|
||||||
|
tookMs: number;
|
||||||
|
},
|
||||||
|
): Promise<Record<string, unknown> | null> {
|
||||||
|
const firecrawlParams = toFirecrawlContentParams({
|
||||||
|
...params,
|
||||||
|
url: params.urlToFetch,
|
||||||
|
extractMode: params.extractMode,
|
||||||
|
});
|
||||||
|
if (!firecrawlParams) {
|
||||||
|
return null;
|
||||||
|
}
|
||||||
|
|
||||||
|
const firecrawl = await fetchFirecrawlContent(firecrawlParams);
|
||||||
|
const payload = buildFirecrawlWebFetchPayload({
|
||||||
|
firecrawl,
|
||||||
|
rawUrl: params.url,
|
||||||
|
finalUrlFallback: params.finalUrlFallback,
|
||||||
|
statusFallback: params.statusFallback,
|
||||||
|
extractMode: params.extractMode,
|
||||||
|
maxChars: params.maxChars,
|
||||||
|
tookMs: params.tookMs,
|
||||||
|
});
|
||||||
|
writeCache(FETCH_CACHE, params.cacheKey, payload, params.cacheTtlMs);
|
||||||
|
return payload;
|
||||||
|
}
|
||||||
|
|
||||||
|
async function runWebFetch(params: WebFetchRuntimeParams): Promise<Record<string, unknown>> {
|
||||||
const cacheKey = normalizeCacheKey(
|
const cacheKey = normalizeCacheKey(
|
||||||
`fetch:${params.url}:${params.extractMode}:${params.maxChars}`,
|
`fetch:${params.url}:${params.extractMode}:${params.maxChars}`,
|
||||||
);
|
);
|
||||||
@@ -494,28 +550,15 @@ async function runWebFetch(params: {
|
|||||||
if (error instanceof SsrFBlockedError) {
|
if (error instanceof SsrFBlockedError) {
|
||||||
throw error;
|
throw error;
|
||||||
}
|
}
|
||||||
if (params.firecrawlEnabled && params.firecrawlApiKey) {
|
const payload = await maybeFetchFirecrawlWebFetchPayload({
|
||||||
const firecrawl = await fetchFirecrawlContent({
|
...params,
|
||||||
url: finalUrl,
|
urlToFetch: finalUrl,
|
||||||
extractMode: params.extractMode,
|
finalUrlFallback: finalUrl,
|
||||||
apiKey: params.firecrawlApiKey,
|
statusFallback: 200,
|
||||||
baseUrl: params.firecrawlBaseUrl,
|
cacheKey,
|
||||||
onlyMainContent: params.firecrawlOnlyMainContent,
|
tookMs: Date.now() - start,
|
||||||
maxAgeMs: params.firecrawlMaxAgeMs,
|
});
|
||||||
proxy: params.firecrawlProxy,
|
if (payload) {
|
||||||
storeInCache: params.firecrawlStoreInCache,
|
|
||||||
timeoutSeconds: params.firecrawlTimeoutSeconds,
|
|
||||||
});
|
|
||||||
const payload = buildFirecrawlWebFetchPayload({
|
|
||||||
firecrawl,
|
|
||||||
rawUrl: params.url,
|
|
||||||
finalUrlFallback: finalUrl,
|
|
||||||
statusFallback: 200,
|
|
||||||
extractMode: params.extractMode,
|
|
||||||
maxChars: params.maxChars,
|
|
||||||
tookMs: Date.now() - start,
|
|
||||||
});
|
|
||||||
writeCache(FETCH_CACHE, cacheKey, payload, params.cacheTtlMs);
|
|
||||||
return payload;
|
return payload;
|
||||||
}
|
}
|
||||||
throw error;
|
throw error;
|
||||||
@@ -523,28 +566,15 @@ async function runWebFetch(params: {
|
|||||||
|
|
||||||
try {
|
try {
|
||||||
if (!res.ok) {
|
if (!res.ok) {
|
||||||
if (params.firecrawlEnabled && params.firecrawlApiKey) {
|
const payload = await maybeFetchFirecrawlWebFetchPayload({
|
||||||
const firecrawl = await fetchFirecrawlContent({
|
...params,
|
||||||
url: params.url,
|
urlToFetch: params.url,
|
||||||
extractMode: params.extractMode,
|
finalUrlFallback: finalUrl,
|
||||||
apiKey: params.firecrawlApiKey,
|
statusFallback: res.status,
|
||||||
baseUrl: params.firecrawlBaseUrl,
|
cacheKey,
|
||||||
onlyMainContent: params.firecrawlOnlyMainContent,
|
tookMs: Date.now() - start,
|
||||||
maxAgeMs: params.firecrawlMaxAgeMs,
|
});
|
||||||
proxy: params.firecrawlProxy,
|
if (payload) {
|
||||||
storeInCache: params.firecrawlStoreInCache,
|
|
||||||
timeoutSeconds: params.firecrawlTimeoutSeconds,
|
|
||||||
});
|
|
||||||
const payload = buildFirecrawlWebFetchPayload({
|
|
||||||
firecrawl,
|
|
||||||
rawUrl: params.url,
|
|
||||||
finalUrlFallback: finalUrl,
|
|
||||||
statusFallback: res.status,
|
|
||||||
extractMode: params.extractMode,
|
|
||||||
maxChars: params.maxChars,
|
|
||||||
tookMs: Date.now() - start,
|
|
||||||
});
|
|
||||||
writeCache(FETCH_CACHE, cacheKey, payload, params.cacheTtlMs);
|
|
||||||
return payload;
|
return payload;
|
||||||
}
|
}
|
||||||
const rawDetailResult = await readResponseText(res, { maxBytes: DEFAULT_ERROR_MAX_BYTES });
|
const rawDetailResult = await readResponseText(res, { maxBytes: DEFAULT_ERROR_MAX_BYTES });
|
||||||
@@ -647,33 +677,15 @@ async function runWebFetch(params: {
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
async function tryFirecrawlFallback(params: {
|
async function tryFirecrawlFallback(
|
||||||
url: string;
|
params: FirecrawlRuntimeParams & { url: string; extractMode: ExtractMode },
|
||||||
extractMode: ExtractMode;
|
): Promise<{ text: string; title?: string } | null> {
|
||||||
firecrawlEnabled: boolean;
|
const firecrawlParams = toFirecrawlContentParams(params);
|
||||||
firecrawlApiKey?: string;
|
if (!firecrawlParams) {
|
||||||
firecrawlBaseUrl: string;
|
|
||||||
firecrawlOnlyMainContent: boolean;
|
|
||||||
firecrawlMaxAgeMs: number;
|
|
||||||
firecrawlProxy: "auto" | "basic" | "stealth";
|
|
||||||
firecrawlStoreInCache: boolean;
|
|
||||||
firecrawlTimeoutSeconds: number;
|
|
||||||
}): Promise<{ text: string; title?: string } | null> {
|
|
||||||
if (!params.firecrawlEnabled || !params.firecrawlApiKey) {
|
|
||||||
return null;
|
return null;
|
||||||
}
|
}
|
||||||
try {
|
try {
|
||||||
const firecrawl = await fetchFirecrawlContent({
|
const firecrawl = await fetchFirecrawlContent(firecrawlParams);
|
||||||
url: params.url,
|
|
||||||
extractMode: params.extractMode,
|
|
||||||
apiKey: params.firecrawlApiKey,
|
|
||||||
baseUrl: params.firecrawlBaseUrl,
|
|
||||||
onlyMainContent: params.firecrawlOnlyMainContent,
|
|
||||||
maxAgeMs: params.firecrawlMaxAgeMs,
|
|
||||||
proxy: params.firecrawlProxy,
|
|
||||||
storeInCache: params.firecrawlStoreInCache,
|
|
||||||
timeoutSeconds: params.firecrawlTimeoutSeconds,
|
|
||||||
});
|
|
||||||
return { text: firecrawl.text, title: firecrawl.title };
|
return { text: firecrawl.text, title: firecrawl.title };
|
||||||
} catch {
|
} catch {
|
||||||
return null;
|
return null;
|
||||||
|
|||||||
Reference in New Issue
Block a user