mirror of
https://github.com/openclaw/openclaw.git
synced 2026-05-09 00:08:28 +00:00
refactor(web-fetch): dedupe firecrawl payload builder
This commit is contained in:
@@ -286,6 +286,43 @@ function wrapWebFetchField(value: string | undefined): string | undefined {
|
|||||||
return wrapExternalContent(value, { source: "web_fetch", includeWarning: false });
|
return wrapExternalContent(value, { source: "web_fetch", includeWarning: false });
|
||||||
}
|
}
|
||||||
|
|
||||||
|
function buildFirecrawlWebFetchPayload(params: {
|
||||||
|
firecrawl: Awaited<ReturnType<typeof fetchFirecrawlContent>>;
|
||||||
|
rawUrl: string;
|
||||||
|
finalUrlFallback: string;
|
||||||
|
statusFallback: number;
|
||||||
|
extractMode: ExtractMode;
|
||||||
|
maxChars: number;
|
||||||
|
tookMs: number;
|
||||||
|
}): Record<string, unknown> {
|
||||||
|
const wrapped = wrapWebFetchContent(params.firecrawl.text, params.maxChars);
|
||||||
|
const wrappedTitle = params.firecrawl.title
|
||||||
|
? wrapWebFetchField(params.firecrawl.title)
|
||||||
|
: undefined;
|
||||||
|
return {
|
||||||
|
url: params.rawUrl, // Keep raw for tool chaining
|
||||||
|
finalUrl: params.firecrawl.finalUrl || params.finalUrlFallback, // Keep raw
|
||||||
|
status: params.firecrawl.status ?? params.statusFallback,
|
||||||
|
contentType: "text/markdown", // Protocol metadata, don't wrap
|
||||||
|
title: wrappedTitle,
|
||||||
|
extractMode: params.extractMode,
|
||||||
|
extractor: "firecrawl",
|
||||||
|
externalContent: {
|
||||||
|
untrusted: true,
|
||||||
|
source: "web_fetch",
|
||||||
|
wrapped: true,
|
||||||
|
},
|
||||||
|
truncated: wrapped.truncated,
|
||||||
|
length: wrapped.wrappedLength,
|
||||||
|
rawLength: wrapped.rawLength, // Actual content length, not wrapped
|
||||||
|
wrappedLength: wrapped.wrappedLength,
|
||||||
|
fetchedAt: new Date().toISOString(),
|
||||||
|
tookMs: params.tookMs,
|
||||||
|
text: wrapped.text,
|
||||||
|
warning: wrapWebFetchField(params.firecrawl.warning),
|
||||||
|
};
|
||||||
|
}
|
||||||
|
|
||||||
function normalizeContentType(value: string | null | undefined): string | undefined {
|
function normalizeContentType(value: string | null | undefined): string | undefined {
|
||||||
if (!value) {
|
if (!value) {
|
||||||
return undefined;
|
return undefined;
|
||||||
@@ -452,30 +489,15 @@ async function runWebFetch(params: {
|
|||||||
storeInCache: params.firecrawlStoreInCache,
|
storeInCache: params.firecrawlStoreInCache,
|
||||||
timeoutSeconds: params.firecrawlTimeoutSeconds,
|
timeoutSeconds: params.firecrawlTimeoutSeconds,
|
||||||
});
|
});
|
||||||
const wrapped = wrapWebFetchContent(firecrawl.text, params.maxChars);
|
const payload = buildFirecrawlWebFetchPayload({
|
||||||
const wrappedTitle = firecrawl.title ? wrapWebFetchField(firecrawl.title) : undefined;
|
firecrawl,
|
||||||
const payload = {
|
rawUrl: params.url,
|
||||||
url: params.url, // Keep raw for tool chaining
|
finalUrlFallback: finalUrl,
|
||||||
finalUrl: firecrawl.finalUrl || finalUrl, // Keep raw
|
statusFallback: 200,
|
||||||
status: firecrawl.status ?? 200,
|
|
||||||
contentType: "text/markdown", // Protocol metadata, don't wrap
|
|
||||||
title: wrappedTitle,
|
|
||||||
extractMode: params.extractMode,
|
extractMode: params.extractMode,
|
||||||
extractor: "firecrawl",
|
maxChars: params.maxChars,
|
||||||
externalContent: {
|
|
||||||
untrusted: true,
|
|
||||||
source: "web_fetch",
|
|
||||||
wrapped: true,
|
|
||||||
},
|
|
||||||
truncated: wrapped.truncated,
|
|
||||||
length: wrapped.wrappedLength,
|
|
||||||
rawLength: wrapped.rawLength, // Actual content length, not wrapped
|
|
||||||
wrappedLength: wrapped.wrappedLength,
|
|
||||||
fetchedAt: new Date().toISOString(),
|
|
||||||
tookMs: Date.now() - start,
|
tookMs: Date.now() - start,
|
||||||
text: wrapped.text,
|
});
|
||||||
warning: wrapWebFetchField(firecrawl.warning),
|
|
||||||
};
|
|
||||||
writeCache(FETCH_CACHE, cacheKey, payload, params.cacheTtlMs);
|
writeCache(FETCH_CACHE, cacheKey, payload, params.cacheTtlMs);
|
||||||
return payload;
|
return payload;
|
||||||
}
|
}
|
||||||
@@ -496,30 +518,15 @@ async function runWebFetch(params: {
|
|||||||
storeInCache: params.firecrawlStoreInCache,
|
storeInCache: params.firecrawlStoreInCache,
|
||||||
timeoutSeconds: params.firecrawlTimeoutSeconds,
|
timeoutSeconds: params.firecrawlTimeoutSeconds,
|
||||||
});
|
});
|
||||||
const wrapped = wrapWebFetchContent(firecrawl.text, params.maxChars);
|
const payload = buildFirecrawlWebFetchPayload({
|
||||||
const wrappedTitle = firecrawl.title ? wrapWebFetchField(firecrawl.title) : undefined;
|
firecrawl,
|
||||||
const payload = {
|
rawUrl: params.url,
|
||||||
url: params.url, // Keep raw for tool chaining
|
finalUrlFallback: finalUrl,
|
||||||
finalUrl: firecrawl.finalUrl || finalUrl, // Keep raw
|
statusFallback: res.status,
|
||||||
status: firecrawl.status ?? res.status,
|
|
||||||
contentType: "text/markdown", // Protocol metadata, don't wrap
|
|
||||||
title: wrappedTitle,
|
|
||||||
extractMode: params.extractMode,
|
extractMode: params.extractMode,
|
||||||
extractor: "firecrawl",
|
maxChars: params.maxChars,
|
||||||
externalContent: {
|
|
||||||
untrusted: true,
|
|
||||||
source: "web_fetch",
|
|
||||||
wrapped: true,
|
|
||||||
},
|
|
||||||
truncated: wrapped.truncated,
|
|
||||||
length: wrapped.wrappedLength,
|
|
||||||
rawLength: wrapped.rawLength, // Actual content length, not wrapped
|
|
||||||
wrappedLength: wrapped.wrappedLength,
|
|
||||||
fetchedAt: new Date().toISOString(),
|
|
||||||
tookMs: Date.now() - start,
|
tookMs: Date.now() - start,
|
||||||
text: wrapped.text,
|
});
|
||||||
warning: wrapWebFetchField(firecrawl.warning),
|
|
||||||
};
|
|
||||||
writeCache(FETCH_CACHE, cacheKey, payload, params.cacheTtlMs);
|
writeCache(FETCH_CACHE, cacheKey, payload, params.cacheTtlMs);
|
||||||
return payload;
|
return payload;
|
||||||
}
|
}
|
||||||
|
|||||||
Reference in New Issue
Block a user