mirror of
https://github.com/openclaw/openclaw.git
synced 2026-05-08 11:41:24 +00:00
feat: add configurable web_fetch maxChars cap
This commit is contained in:
@@ -95,6 +95,17 @@ function resolveFetchReadabilityEnabled(fetch?: WebFetchConfig): boolean {
|
||||
return true;
|
||||
}
|
||||
|
||||
function resolveFetchMaxCharsCap(fetch?: WebFetchConfig): number {
|
||||
const raw =
|
||||
fetch && "maxCharsCap" in fetch && typeof fetch.maxCharsCap === "number"
|
||||
? fetch.maxCharsCap
|
||||
: undefined;
|
||||
if (typeof raw !== "number" || !Number.isFinite(raw)) {
|
||||
return DEFAULT_FETCH_MAX_CHARS;
|
||||
}
|
||||
return Math.max(100, Math.floor(raw));
|
||||
}
|
||||
|
||||
function resolveFirecrawlConfig(fetch?: WebFetchConfig): FirecrawlFetchConfig {
|
||||
if (!fetch || typeof fetch !== "object") {
|
||||
return undefined;
|
||||
@@ -160,9 +171,10 @@ function resolveFirecrawlMaxAgeMsOrDefault(firecrawl?: FirecrawlFetchConfig): nu
|
||||
return DEFAULT_FIRECRAWL_MAX_AGE_MS;
|
||||
}
|
||||
|
||||
function resolveMaxChars(value: unknown, fallback: number): number {
|
||||
function resolveMaxChars(value: unknown, fallback: number, cap: number): number {
|
||||
const parsed = typeof value === "number" && Number.isFinite(value) ? value : fallback;
|
||||
return Math.max(100, Math.floor(parsed));
|
||||
const clamped = Math.max(100, Math.floor(parsed));
|
||||
return Math.min(clamped, cap);
|
||||
}
|
||||
|
||||
function resolveMaxRedirects(value: unknown, fallback: number): number {
|
||||
@@ -647,10 +659,15 @@ export function createWebFetchTool(options?: {
|
||||
const url = readStringParam(params, "url", { required: true });
|
||||
const extractMode = readStringParam(params, "extractMode") === "text" ? "text" : "markdown";
|
||||
const maxChars = readNumberParam(params, "maxChars", { integer: true });
|
||||
const maxCharsCap = resolveFetchMaxCharsCap(fetch);
|
||||
const result = await runWebFetch({
|
||||
url,
|
||||
extractMode,
|
||||
maxChars: resolveMaxChars(maxChars ?? fetch?.maxChars, DEFAULT_FETCH_MAX_CHARS),
|
||||
maxChars: resolveMaxChars(
|
||||
maxChars ?? fetch?.maxChars,
|
||||
DEFAULT_FETCH_MAX_CHARS,
|
||||
maxCharsCap,
|
||||
),
|
||||
maxRedirects: resolveMaxRedirects(fetch?.maxRedirects, DEFAULT_FETCH_MAX_REDIRECTS),
|
||||
timeoutSeconds: resolveTimeoutSeconds(fetch?.timeoutSeconds, DEFAULT_TIMEOUT_SECONDS),
|
||||
cacheTtlMs: resolveCacheTtlMs(fetch?.cacheTtlMinutes, DEFAULT_CACHE_TTL_MINUTES),
|
||||
|
||||
@@ -49,6 +49,20 @@ function firecrawlError(): MockResponse {
|
||||
};
|
||||
}
|
||||
|
||||
function textResponse(
|
||||
text: string,
|
||||
url = "https://example.com/",
|
||||
contentType = "text/plain; charset=utf-8",
|
||||
): MockResponse {
|
||||
return {
|
||||
ok: true,
|
||||
status: 200,
|
||||
url,
|
||||
headers: makeHeaders({ "content-type": contentType }),
|
||||
text: async () => text,
|
||||
};
|
||||
}
|
||||
|
||||
function errorHtmlResponse(
|
||||
html: string,
|
||||
status = 404,
|
||||
@@ -322,6 +336,37 @@ describe("web_fetch extraction fallbacks", () => {
|
||||
expect(details.extractor).toBe("firecrawl");
|
||||
expect(details.text).toContain("firecrawl fallback");
|
||||
});
|
||||
|
||||
it("wraps external content and clamps oversized maxChars", async () => {
|
||||
const large = "a".repeat(80_000);
|
||||
const mockFetch = vi.fn(
|
||||
(input: RequestInfo) =>
|
||||
Promise.resolve(textResponse(large, requestUrl(input))) as Promise<Response>,
|
||||
);
|
||||
// @ts-expect-error mock fetch
|
||||
global.fetch = mockFetch;
|
||||
|
||||
const tool = createWebFetchTool({
|
||||
config: {
|
||||
tools: {
|
||||
web: {
|
||||
fetch: { cacheTtlMinutes: 0, firecrawl: { enabled: false }, maxCharsCap: 10_000 },
|
||||
},
|
||||
},
|
||||
},
|
||||
sandboxed: false,
|
||||
});
|
||||
|
||||
const result = await tool?.execute?.("call", {
|
||||
url: "https://example.com/large",
|
||||
maxChars: 200_000,
|
||||
});
|
||||
const details = result?.details as { text?: string; length?: number; truncated?: boolean };
|
||||
expect(details.text).toContain("<<<EXTERNAL_UNTRUSTED_CONTENT>>>");
|
||||
expect(details.text).toContain("Source: Web Fetch");
|
||||
expect(details.length).toBeLessThanOrEqual(10_000);
|
||||
expect(details.truncated).toBe(true);
|
||||
});
|
||||
it("strips and truncates HTML from error responses", async () => {
|
||||
const long = "x".repeat(12_000);
|
||||
const html =
|
||||
|
||||
Reference in New Issue
Block a user