refactor(web-fetch): dedupe firecrawl payload builder

This commit is contained in:
Peter Steinberger
2026-02-15 05:53:55 +00:00
parent 2f4b91d738
commit 485b78bb94

View File

@@ -286,6 +286,43 @@ function wrapWebFetchField(value: string | undefined): string | undefined {
return wrapExternalContent(value, { source: "web_fetch", includeWarning: false }); return wrapExternalContent(value, { source: "web_fetch", includeWarning: false });
} }
function buildFirecrawlWebFetchPayload(params: {
firecrawl: Awaited<ReturnType<typeof fetchFirecrawlContent>>;
rawUrl: string;
finalUrlFallback: string;
statusFallback: number;
extractMode: ExtractMode;
maxChars: number;
tookMs: number;
}): Record<string, unknown> {
const wrapped = wrapWebFetchContent(params.firecrawl.text, params.maxChars);
const wrappedTitle = params.firecrawl.title
? wrapWebFetchField(params.firecrawl.title)
: undefined;
return {
url: params.rawUrl, // Keep raw for tool chaining
finalUrl: params.firecrawl.finalUrl || params.finalUrlFallback, // Keep raw
status: params.firecrawl.status ?? params.statusFallback,
contentType: "text/markdown", // Protocol metadata, don't wrap
title: wrappedTitle,
extractMode: params.extractMode,
extractor: "firecrawl",
externalContent: {
untrusted: true,
source: "web_fetch",
wrapped: true,
},
truncated: wrapped.truncated,
length: wrapped.wrappedLength,
rawLength: wrapped.rawLength, // Actual content length, not wrapped
wrappedLength: wrapped.wrappedLength,
fetchedAt: new Date().toISOString(),
tookMs: params.tookMs,
text: wrapped.text,
warning: wrapWebFetchField(params.firecrawl.warning),
};
}
function normalizeContentType(value: string | null | undefined): string | undefined { function normalizeContentType(value: string | null | undefined): string | undefined {
if (!value) { if (!value) {
return undefined; return undefined;
@@ -452,30 +489,15 @@ async function runWebFetch(params: {
storeInCache: params.firecrawlStoreInCache, storeInCache: params.firecrawlStoreInCache,
timeoutSeconds: params.firecrawlTimeoutSeconds, timeoutSeconds: params.firecrawlTimeoutSeconds,
}); });
const wrapped = wrapWebFetchContent(firecrawl.text, params.maxChars); const payload = buildFirecrawlWebFetchPayload({
const wrappedTitle = firecrawl.title ? wrapWebFetchField(firecrawl.title) : undefined; firecrawl,
const payload = { rawUrl: params.url,
url: params.url, // Keep raw for tool chaining finalUrlFallback: finalUrl,
finalUrl: firecrawl.finalUrl || finalUrl, // Keep raw statusFallback: 200,
status: firecrawl.status ?? 200,
contentType: "text/markdown", // Protocol metadata, don't wrap
title: wrappedTitle,
extractMode: params.extractMode, extractMode: params.extractMode,
extractor: "firecrawl", maxChars: params.maxChars,
externalContent: {
untrusted: true,
source: "web_fetch",
wrapped: true,
},
truncated: wrapped.truncated,
length: wrapped.wrappedLength,
rawLength: wrapped.rawLength, // Actual content length, not wrapped
wrappedLength: wrapped.wrappedLength,
fetchedAt: new Date().toISOString(),
tookMs: Date.now() - start, tookMs: Date.now() - start,
text: wrapped.text, });
warning: wrapWebFetchField(firecrawl.warning),
};
writeCache(FETCH_CACHE, cacheKey, payload, params.cacheTtlMs); writeCache(FETCH_CACHE, cacheKey, payload, params.cacheTtlMs);
return payload; return payload;
} }
@@ -496,30 +518,15 @@ async function runWebFetch(params: {
storeInCache: params.firecrawlStoreInCache, storeInCache: params.firecrawlStoreInCache,
timeoutSeconds: params.firecrawlTimeoutSeconds, timeoutSeconds: params.firecrawlTimeoutSeconds,
}); });
const wrapped = wrapWebFetchContent(firecrawl.text, params.maxChars); const payload = buildFirecrawlWebFetchPayload({
const wrappedTitle = firecrawl.title ? wrapWebFetchField(firecrawl.title) : undefined; firecrawl,
const payload = { rawUrl: params.url,
url: params.url, // Keep raw for tool chaining finalUrlFallback: finalUrl,
finalUrl: firecrawl.finalUrl || finalUrl, // Keep raw statusFallback: res.status,
status: firecrawl.status ?? res.status,
contentType: "text/markdown", // Protocol metadata, don't wrap
title: wrappedTitle,
extractMode: params.extractMode, extractMode: params.extractMode,
extractor: "firecrawl", maxChars: params.maxChars,
externalContent: {
untrusted: true,
source: "web_fetch",
wrapped: true,
},
truncated: wrapped.truncated,
length: wrapped.wrappedLength,
rawLength: wrapped.rawLength, // Actual content length, not wrapped
wrappedLength: wrapped.wrappedLength,
fetchedAt: new Date().toISOString(),
tookMs: Date.now() - start, tookMs: Date.now() - start,
text: wrapped.text, });
warning: wrapWebFetchField(firecrawl.warning),
};
writeCache(FETCH_CACHE, cacheKey, payload, params.cacheTtlMs); writeCache(FETCH_CACHE, cacheKey, payload, params.cacheTtlMs);
return payload; return payload;
} }