diff --git a/CHANGELOG.md b/CHANGELOG.md index 4e9bd5fd9af..b6cf77d85ea 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -22,6 +22,7 @@ Docs: https://docs.openclaw.ai - Sessions/Agents: pass `agentId` when resolving existing transcript paths in reply runs so non-default agents and heartbeat/chat handlers no longer fail with `Session file path must be within sessions directory`. (#15141) Thanks @Goldenmonstew. - Sessions/Agents: pass `agentId` through status and usage transcript-resolution paths (auto-reply, gateway usage APIs, and session cost/log loaders) so non-default agents can resolve absolute session files without path-validation failures. (#15103) Thanks @jalehman. - Signal/Install: auto-install `signal-cli` via Homebrew on non-x64 Linux architectures, avoiding x86_64 native binary `Exec format error` failures on arm64/arm hosts. (#15443) Thanks @jogvan-k. +- Web tools/web_fetch: prefer `text/markdown` responses for Cloudflare Markdown for Agents, add `cf-markdown` extraction for markdown bodies, and redact fetched URLs in `x-markdown-tokens` debug logs to avoid leaking raw paths/query params. (#15376) Thanks @Yaxuan42. ## 2026.2.12 diff --git a/src/agents/tools/web-fetch.cf-markdown.test.ts b/src/agents/tools/web-fetch.cf-markdown.test.ts index b7c73283dec..d73300681fc 100644 --- a/src/agents/tools/web-fetch.cf-markdown.test.ts +++ b/src/agents/tools/web-fetch.cf-markdown.test.ts @@ -123,9 +123,17 @@ describe("web_fetch Cloudflare Markdown for Agents", () => { }, }); - await tool?.execute?.("call", { url: "https://example.com/tokens" }); + await tool?.execute?.("call", { url: "https://example.com/tokens/private?token=secret" }); - expect(logSpy).toHaveBeenCalledWith(expect.stringContaining("x-markdown-tokens: 1500")); + expect(logSpy).toHaveBeenCalledWith( + expect.stringContaining("x-markdown-tokens: 1500 (https://example.com/...)"), + ); + const tokenLogs = logSpy.mock.calls + .map(([message]) => String(message)) + .filter((message) => message.includes("x-markdown-tokens")); + expect(tokenLogs).toHaveLength(1); + expect(tokenLogs[0]).not.toContain("token=secret"); + expect(tokenLogs[0]).not.toContain("/tokens/private"); }); it("converts markdown to text when extractMode is text", async () => { diff --git a/src/agents/tools/web-fetch.ts b/src/agents/tools/web-fetch.ts index 1e65b401f08..97bb5406863 100644 --- a/src/agents/tools/web-fetch.ts +++ b/src/agents/tools/web-fetch.ts @@ -213,6 +213,15 @@ function formatWebFetchErrorDetail(params: { return truncated.text; } +function redactUrlForDebugLog(rawUrl: string): string { + try { + const parsed = new URL(rawUrl); + return parsed.pathname && parsed.pathname !== "/" ? `${parsed.origin}/...` : parsed.origin; + } catch { + return "[invalid-url]"; + } +} + const WEB_FETCH_WRAPPER_WITH_WARNING_OVERHEAD = wrapWebContent("", "web_fetch").length; const WEB_FETCH_WRAPPER_NO_WARNING_OVERHEAD = wrapExternalContent("", { source: "web_fetch", @@ -423,7 +432,9 @@ async function runWebFetch(params: { // Cloudflare Markdown for Agents — log token budget hint when present const markdownTokens = res.headers.get("x-markdown-tokens"); if (markdownTokens) { - logDebug(`[web-fetch] x-markdown-tokens: ${markdownTokens} (${finalUrl})`); + logDebug( + `[web-fetch] x-markdown-tokens: ${markdownTokens} (${redactUrlForDebugLog(finalUrl)})`, + ); } } catch (error) { if (error instanceof SsrFBlockedError) {