feat: add configurable web_fetch maxChars cap

This commit is contained in:
Peter Steinberger
2026-02-03 17:35:51 -08:00
parent 6b4b6049b4
commit d3ba57b7d7
8 changed files with 74 additions and 3 deletions

View File

@@ -49,6 +49,20 @@ function firecrawlError(): MockResponse {
};
}
function textResponse(
text: string,
url = "https://example.com/",
contentType = "text/plain; charset=utf-8",
): MockResponse {
return {
ok: true,
status: 200,
url,
headers: makeHeaders({ "content-type": contentType }),
text: async () => text,
};
}
function errorHtmlResponse(
html: string,
status = 404,
@@ -322,6 +336,37 @@ describe("web_fetch extraction fallbacks", () => {
expect(details.extractor).toBe("firecrawl");
expect(details.text).toContain("firecrawl fallback");
});
it("wraps external content and clamps oversized maxChars", async () => {
const large = "a".repeat(80_000);
const mockFetch = vi.fn(
(input: RequestInfo) =>
Promise.resolve(textResponse(large, requestUrl(input))) as Promise<Response>,
);
// @ts-expect-error mock fetch
global.fetch = mockFetch;
const tool = createWebFetchTool({
config: {
tools: {
web: {
fetch: { cacheTtlMinutes: 0, firecrawl: { enabled: false }, maxCharsCap: 10_000 },
},
},
},
sandboxed: false,
});
const result = await tool?.execute?.("call", {
url: "https://example.com/large",
maxChars: 200_000,
});
const details = result?.details as { text?: string; length?: number; truncated?: boolean };
expect(details.text).toContain("<<<EXTERNAL_UNTRUSTED_CONTENT>>>");
expect(details.text).toContain("Source: Web Fetch");
expect(details.length).toBeLessThanOrEqual(10_000);
expect(details.truncated).toBe(true);
});
it("strips and truncates HTML from error responses", async () => {
const long = "x".repeat(12_000);
const html =