refactor(web): unify proxy-guarded fetch path for web tools

This commit is contained in:
Peter Steinberger
2026-02-26 12:44:06 +01:00
parent 8bf1c9a23a
commit b74be2577f
5 changed files with 386 additions and 252 deletions

View File

@@ -1,6 +1,5 @@
import { Type } from "@sinclair/typebox"; import { Type } from "@sinclair/typebox";
import type { OpenClawConfig } from "../../config/config.js"; import type { OpenClawConfig } from "../../config/config.js";
import { fetchWithSsrFGuard } from "../../infra/net/fetch-guard.js";
import { SsrFBlockedError } from "../../infra/net/ssrf.js"; import { SsrFBlockedError } from "../../infra/net/ssrf.js";
import { logDebug } from "../../logger.js"; import { logDebug } from "../../logger.js";
import { wrapExternalContent, wrapWebContent } from "../../security/external-content.js"; import { wrapExternalContent, wrapWebContent } from "../../security/external-content.js";
@@ -15,6 +14,7 @@ import {
truncateText, truncateText,
type ExtractMode, type ExtractMode,
} from "./web-fetch-utils.js"; } from "./web-fetch-utils.js";
import { fetchWithWebToolsNetworkGuard } from "./web-guarded-fetch.js";
import { import {
CacheEntry, CacheEntry,
DEFAULT_CACHE_TTL_MINUTES, DEFAULT_CACHE_TTL_MINUTES,
@@ -523,11 +523,10 @@ async function runWebFetch(params: WebFetchRuntimeParams): Promise<Record<string
let release: (() => Promise<void>) | null = null; let release: (() => Promise<void>) | null = null;
let finalUrl = params.url; let finalUrl = params.url;
try { try {
const result = await fetchWithSsrFGuard({ const result = await fetchWithWebToolsNetworkGuard({
url: params.url, url: params.url,
maxRedirects: params.maxRedirects, maxRedirects: params.maxRedirects,
timeoutMs: params.timeoutSeconds * 1000, timeoutSeconds: params.timeoutSeconds,
proxy: "env",
init: { init: {
headers: { headers: {
Accept: "text/markdown, text/html;q=0.9, */*;q=0.1", Accept: "text/markdown, text/html;q=0.9, */*;q=0.1",

View File

@@ -0,0 +1,50 @@
import {
fetchWithSsrFGuard,
type GuardedFetchOptions,
type GuardedFetchResult,
} from "../../infra/net/fetch-guard.js";
import type { SsrFPolicy } from "../../infra/net/ssrf.js";
export const WEB_TOOLS_TRUSTED_NETWORK_SSRF_POLICY: SsrFPolicy = {
dangerouslyAllowPrivateNetwork: true,
};
type WebToolGuardedFetchOptions = Omit<GuardedFetchOptions, "proxy"> & {
timeoutSeconds?: number;
};
function resolveTimeoutMs(params: {
timeoutMs?: number;
timeoutSeconds?: number;
}): number | undefined {
if (typeof params.timeoutMs === "number" && Number.isFinite(params.timeoutMs)) {
return params.timeoutMs;
}
if (typeof params.timeoutSeconds === "number" && Number.isFinite(params.timeoutSeconds)) {
return params.timeoutSeconds * 1000;
}
return undefined;
}
export async function fetchWithWebToolsNetworkGuard(
params: WebToolGuardedFetchOptions,
): Promise<GuardedFetchResult> {
const { timeoutSeconds, ...rest } = params;
return fetchWithSsrFGuard({
...rest,
timeoutMs: resolveTimeoutMs({ timeoutMs: rest.timeoutMs, timeoutSeconds }),
proxy: "env",
});
}
export async function withWebToolsNetworkGuard<T>(
params: WebToolGuardedFetchOptions,
run: (result: { response: Response; finalUrl: string }) => Promise<T>,
): Promise<T> {
const { response, finalUrl, release } = await fetchWithWebToolsNetworkGuard(params);
try {
return await run({ response, finalUrl });
} finally {
await release();
}
}

View File

@@ -33,6 +33,7 @@ describe("web_search redirect resolution hardening", () => {
timeoutMs: 5000, timeoutMs: 5000,
init: { method: "HEAD" }, init: { method: "HEAD" },
policy: { dangerouslyAllowPrivateNetwork: true }, policy: { dangerouslyAllowPrivateNetwork: true },
proxy: "env",
}), }),
); );
expect(release).toHaveBeenCalledTimes(1); expect(release).toHaveBeenCalledTimes(1);

View File

@@ -2,11 +2,14 @@ import { Type } from "@sinclair/typebox";
import { formatCliCommand } from "../../cli/command-format.js"; import { formatCliCommand } from "../../cli/command-format.js";
import type { OpenClawConfig } from "../../config/config.js"; import type { OpenClawConfig } from "../../config/config.js";
import { logVerbose } from "../../globals.js"; import { logVerbose } from "../../globals.js";
import { fetchWithSsrFGuard } from "../../infra/net/fetch-guard.js";
import { wrapWebContent } from "../../security/external-content.js"; import { wrapWebContent } from "../../security/external-content.js";
import { normalizeSecretInput } from "../../utils/normalize-secret-input.js"; import { normalizeSecretInput } from "../../utils/normalize-secret-input.js";
import type { AnyAgentTool } from "./common.js"; import type { AnyAgentTool } from "./common.js";
import { jsonResult, readNumberParam, readStringParam } from "./common.js"; import { jsonResult, readNumberParam, readStringParam } from "./common.js";
import {
WEB_TOOLS_TRUSTED_NETWORK_SSRF_POLICY,
withWebToolsNetworkGuard,
} from "./web-guarded-fetch.js";
import { import {
CacheEntry, CacheEntry,
DEFAULT_CACHE_TTL_MINUTES, DEFAULT_CACHE_TTL_MINUTES,
@@ -44,7 +47,6 @@ const BRAVE_FRESHNESS_SHORTCUTS = new Set(["pd", "pw", "pm", "py"]);
const BRAVE_FRESHNESS_RANGE = /^(\d{4}-\d{2}-\d{2})to(\d{4}-\d{2}-\d{2})$/; const BRAVE_FRESHNESS_RANGE = /^(\d{4}-\d{2}-\d{2})to(\d{4}-\d{2}-\d{2})$/;
const BRAVE_SEARCH_LANG_CODE = /^[a-z]{2}$/i; const BRAVE_SEARCH_LANG_CODE = /^[a-z]{2}$/i;
const BRAVE_UI_LANG_LOCALE = /^([a-z]{2})-([a-z]{2})$/i; const BRAVE_UI_LANG_LOCALE = /^([a-z]{2})-([a-z]{2})$/i;
const TRUSTED_NETWORK_SSRF_POLICY = { dangerouslyAllowPrivateNetwork: true } as const;
const WebSearchSchema = Type.Object({ const WebSearchSchema = Type.Object({
query: Type.String({ description: "Search query string." }), query: Type.String({ description: "Search query string." }),
@@ -599,19 +601,23 @@ function resolveGeminiModel(gemini?: GeminiConfig): string {
return fromConfig || DEFAULT_GEMINI_MODEL; return fromConfig || DEFAULT_GEMINI_MODEL;
} }
async function fetchTrustedWebSearchEndpoint(params: { async function withTrustedWebSearchEndpoint<T>(
params: {
url: string; url: string;
timeoutSeconds: number; timeoutSeconds: number;
init: RequestInit; init: RequestInit;
}): Promise<{ response: Response; release: () => Promise<void> }> { },
const { response, release } = await fetchWithSsrFGuard({ run: (response: Response) => Promise<T>,
): Promise<T> {
return withWebToolsNetworkGuard(
{
url: params.url, url: params.url,
init: params.init, init: params.init,
timeoutMs: params.timeoutSeconds * 1000, timeoutSeconds: params.timeoutSeconds,
policy: TRUSTED_NETWORK_SSRF_POLICY, policy: WEB_TOOLS_TRUSTED_NETWORK_SSRF_POLICY,
proxy: "env", },
}); async ({ response }) => run(response),
return { response, release }; );
} }
async function runGeminiSearch(params: { async function runGeminiSearch(params: {
@@ -622,7 +628,8 @@ async function runGeminiSearch(params: {
}): Promise<{ content: string; citations: Array<{ url: string; title?: string }> }> { }): Promise<{ content: string; citations: Array<{ url: string; title?: string }> }> {
const endpoint = `${GEMINI_API_BASE}/models/${params.model}:generateContent`; const endpoint = `${GEMINI_API_BASE}/models/${params.model}:generateContent`;
const { response: res, release } = await fetchTrustedWebSearchEndpoint({ return withTrustedWebSearchEndpoint(
{
url: endpoint, url: endpoint,
timeoutSeconds: params.timeoutSeconds, timeoutSeconds: params.timeoutSeconds,
init: { init: {
@@ -640,12 +647,15 @@ async function runGeminiSearch(params: {
tools: [{ google_search: {} }], tools: [{ google_search: {} }],
}), }),
}, },
}); },
try { async (res) => {
if (!res.ok) { if (!res.ok) {
const detailResult = await readResponseText(res, { maxBytes: 64_000 }); const detailResult = await readResponseText(res, { maxBytes: 64_000 });
// Strip API key from any error detail to prevent accidental key leakage in logs // Strip API key from any error detail to prevent accidental key leakage in logs
const safeDetail = (detailResult.text || res.statusText).replace(/key=[^&\s]+/gi, "key=***"); const safeDetail = (detailResult.text || res.statusText).replace(
/key=[^&\s]+/gi,
"key=***",
);
throw new Error(`Gemini API error (${res.status}): ${safeDetail}`); throw new Error(`Gemini API error (${res.status}): ${safeDetail}`);
} }
@@ -694,9 +704,8 @@ async function runGeminiSearch(params: {
} }
return { content, citations }; return { content, citations };
} finally { },
await release(); );
}
} }
const REDIRECT_TIMEOUT_MS = 5000; const REDIRECT_TIMEOUT_MS = 5000;
@@ -707,18 +716,15 @@ const REDIRECT_TIMEOUT_MS = 5000;
*/ */
async function resolveRedirectUrl(url: string): Promise<string> { async function resolveRedirectUrl(url: string): Promise<string> {
try { try {
const { finalUrl, release } = await fetchWithSsrFGuard({ return await withWebToolsNetworkGuard(
{
url, url,
init: { method: "HEAD" }, init: { method: "HEAD" },
timeoutMs: REDIRECT_TIMEOUT_MS, timeoutMs: REDIRECT_TIMEOUT_MS,
policy: TRUSTED_NETWORK_SSRF_POLICY, policy: WEB_TOOLS_TRUSTED_NETWORK_SSRF_POLICY,
proxy: "env", },
}); async ({ finalUrl }) => finalUrl || url,
try { );
return finalUrl || url;
} finally {
await release();
}
} catch { } catch {
return url; return url;
} }
@@ -892,7 +898,8 @@ async function runPerplexitySearch(params: {
body.search_recency_filter = recencyFilter; body.search_recency_filter = recencyFilter;
} }
const { response: res, release } = await fetchTrustedWebSearchEndpoint({ return withTrustedWebSearchEndpoint(
{
url: endpoint, url: endpoint,
timeoutSeconds: params.timeoutSeconds, timeoutSeconds: params.timeoutSeconds,
init: { init: {
@@ -905,8 +912,8 @@ async function runPerplexitySearch(params: {
}, },
body: JSON.stringify(body), body: JSON.stringify(body),
}, },
}); },
try { async (res) => {
if (!res.ok) { if (!res.ok) {
return await throwWebSearchApiError(res, "Perplexity"); return await throwWebSearchApiError(res, "Perplexity");
} }
@@ -916,9 +923,8 @@ async function runPerplexitySearch(params: {
const citations = data.citations ?? []; const citations = data.citations ?? [];
return { content, citations }; return { content, citations };
} finally { },
await release(); );
}
} }
async function runGrokSearch(params: { async function runGrokSearch(params: {
@@ -948,7 +954,8 @@ async function runGrokSearch(params: {
// citations are returned automatically when available — we just parse // citations are returned automatically when available — we just parse
// them from the response without requesting them explicitly (#12910). // them from the response without requesting them explicitly (#12910).
const { response: res, release } = await fetchTrustedWebSearchEndpoint({ return withTrustedWebSearchEndpoint(
{
url: XAI_API_ENDPOINT, url: XAI_API_ENDPOINT,
timeoutSeconds: params.timeoutSeconds, timeoutSeconds: params.timeoutSeconds,
init: { init: {
@@ -959,8 +966,8 @@ async function runGrokSearch(params: {
}, },
body: JSON.stringify(body), body: JSON.stringify(body),
}, },
}); },
try { async (res) => {
if (!res.ok) { if (!res.ok) {
return await throwWebSearchApiError(res, "xAI"); return await throwWebSearchApiError(res, "xAI");
} }
@@ -973,9 +980,8 @@ async function runGrokSearch(params: {
const inlineCitations = data.inline_citations; const inlineCitations = data.inline_citations;
return { content, citations, inlineCitations }; return { content, citations, inlineCitations };
} finally { },
await release(); );
}
} }
function extractKimiMessageText(message: KimiMessage | undefined): string | undefined { function extractKimiMessageText(message: KimiMessage | undefined): string | undefined {
@@ -1047,7 +1053,8 @@ async function runKimiSearch(params: {
const MAX_ROUNDS = 3; const MAX_ROUNDS = 3;
for (let round = 0; round < MAX_ROUNDS; round += 1) { for (let round = 0; round < MAX_ROUNDS; round += 1) {
const { response: res, release } = await fetchTrustedWebSearchEndpoint({ const nextResult = await withTrustedWebSearchEndpoint(
{
url: endpoint, url: endpoint,
timeoutSeconds: params.timeoutSeconds, timeoutSeconds: params.timeoutSeconds,
init: { init: {
@@ -1062,8 +1069,10 @@ async function runKimiSearch(params: {
tools: [KIMI_WEB_SEARCH_TOOL], tools: [KIMI_WEB_SEARCH_TOOL],
}), }),
}, },
}); },
try { async (
res,
): Promise<{ done: true; content: string; citations: string[] } | { done: false }> => {
if (!res.ok) { if (!res.ok) {
return await throwWebSearchApiError(res, "Kimi"); return await throwWebSearchApiError(res, "Kimi");
} }
@@ -1078,7 +1087,7 @@ async function runKimiSearch(params: {
const toolCalls = message?.tool_calls ?? []; const toolCalls = message?.tool_calls ?? [];
if (choice?.finish_reason !== "tool_calls" || toolCalls.length === 0) { if (choice?.finish_reason !== "tool_calls" || toolCalls.length === 0) {
return { content: text ?? "No response", citations: [...collectedCitations] }; return { done: true, content: text ?? "No response", citations: [...collectedCitations] };
} }
messages.push({ messages.push({
@@ -1108,10 +1117,15 @@ async function runKimiSearch(params: {
} }
if (!pushedToolResult) { if (!pushedToolResult) {
return { content: text ?? "No response", citations: [...collectedCitations] }; return { done: true, content: text ?? "No response", citations: [...collectedCitations] };
} }
} finally {
await release(); return { done: false };
},
);
if (nextResult.done) {
return { content: nextResult.content, citations: nextResult.citations };
} }
} }
@@ -1287,7 +1301,8 @@ async function runWebSearch(params: {
url.searchParams.set("freshness", params.freshness); url.searchParams.set("freshness", params.freshness);
} }
const { response: res, release } = await fetchTrustedWebSearchEndpoint({ const mapped = await withTrustedWebSearchEndpoint(
{
url: url.toString(), url: url.toString(),
timeoutSeconds: params.timeoutSeconds, timeoutSeconds: params.timeoutSeconds,
init: { init: {
@@ -1297,15 +1312,8 @@ async function runWebSearch(params: {
"X-Subscription-Token": params.apiKey, "X-Subscription-Token": params.apiKey,
}, },
}, },
}); },
let mapped: Array<{ async (res) => {
title: string;
url: string;
description: string;
published?: string;
siteName?: string;
}> = [];
try {
if (!res.ok) { if (!res.ok) {
const detailResult = await readResponseText(res, { maxBytes: 64_000 }); const detailResult = await readResponseText(res, { maxBytes: 64_000 });
const detail = detailResult.text; const detail = detailResult.text;
@@ -1314,7 +1322,7 @@ async function runWebSearch(params: {
const data = (await res.json()) as BraveSearchResponse; const data = (await res.json()) as BraveSearchResponse;
const results = Array.isArray(data.web?.results) ? (data.web?.results ?? []) : []; const results = Array.isArray(data.web?.results) ? (data.web?.results ?? []) : [];
mapped = results.map((entry) => { return results.map((entry) => {
const description = entry.description ?? ""; const description = entry.description ?? "";
const title = entry.title ?? ""; const title = entry.title ?? "";
const url = entry.url ?? ""; const url = entry.url ?? "";
@@ -1327,9 +1335,8 @@ async function runWebSearch(params: {
siteName: rawSiteName || undefined, siteName: rawSiteName || undefined,
}; };
}); });
} finally { },
await release(); );
}
const payload = { const payload = {
query: params.query, query: params.query,

View File

@@ -46,6 +46,29 @@ function createKimiSearchTool(kimiConfig?: { apiKey?: string; baseUrl?: string;
}); });
} }
function createProviderSearchTool(provider: "brave" | "perplexity" | "grok" | "gemini" | "kimi") {
const searchConfig =
provider === "perplexity"
? { provider, perplexity: { apiKey: "pplx-config-test" } }
: provider === "grok"
? { provider, grok: { apiKey: "xai-config-test" } }
: provider === "gemini"
? { provider, gemini: { apiKey: "gemini-config-test" } }
: provider === "kimi"
? { provider, kimi: { apiKey: "moonshot-config-test" } }
: { provider, apiKey: "brave-config-test" };
return createWebSearchTool({
config: {
tools: {
web: {
search: searchConfig,
},
},
},
sandboxed: true,
});
}
function parseFirstRequestBody(mockFetch: ReturnType<typeof installMockFetch>) { function parseFirstRequestBody(mockFetch: ReturnType<typeof installMockFetch>) {
const request = mockFetch.mock.calls[0]?.[1] as RequestInit | undefined; const request = mockFetch.mock.calls[0]?.[1] as RequestInit | undefined;
const requestBody = request?.body; const requestBody = request?.body;
@@ -62,6 +85,34 @@ function installPerplexitySuccessFetch() {
}); });
} }
function createProviderSuccessPayload(
provider: "brave" | "perplexity" | "grok" | "gemini" | "kimi",
) {
if (provider === "brave") {
return { web: { results: [] } };
}
if (provider === "perplexity") {
return { choices: [{ message: { content: "ok" } }], citations: [] };
}
if (provider === "grok") {
return { output_text: "ok", citations: [] };
}
if (provider === "gemini") {
return {
candidates: [
{
content: { parts: [{ text: "ok" }] },
groundingMetadata: { groundingChunks: [] },
},
],
};
}
return {
choices: [{ finish_reason: "stop", message: { role: "assistant", content: "ok" } }],
search_results: [],
};
}
async function executePerplexitySearch( async function executePerplexitySearch(
query: string, query: string,
options?: { options?: {
@@ -159,6 +210,32 @@ describe("web_search country and language parameters", () => {
}); });
}); });
describe("web_search provider proxy dispatch", () => {
const priorFetch = global.fetch;
afterEach(() => {
vi.unstubAllEnvs();
global.fetch = priorFetch;
});
it.each(["brave", "perplexity", "grok", "gemini", "kimi"] as const)(
"uses proxy-aware dispatcher for %s provider when HTTP_PROXY is configured",
async (provider) => {
vi.stubEnv("HTTP_PROXY", "http://127.0.0.1:7890");
const mockFetch = installMockFetch(createProviderSuccessPayload(provider));
const tool = createProviderSearchTool(provider);
expect(tool).not.toBeNull();
await tool?.execute?.("call-1", { query: `proxy-${provider}-test` });
const requestInit = mockFetch.mock.calls[0]?.[1] as
| (RequestInit & { dispatcher?: unknown })
| undefined;
expect(requestInit?.dispatcher).toBeInstanceOf(EnvHttpProxyAgent);
},
);
});
describe("web_search perplexity baseUrl defaults", () => { describe("web_search perplexity baseUrl defaults", () => {
const priorFetch = global.fetch; const priorFetch = global.fetch;