feat(tools): add URL allowlist for web_search and web_fetch

Add optional urlAllowlist config at tools.web level that restricts which
URLs can be accessed by web tools:

- Config types (types.tools.ts): Add urlAllowlist?: string[] to tools.web
- Zod schema: Add urlAllowlist field to ToolsWebSchema
- Schema help: Add help text for the new config fields
- web_search: Filter Brave search results by allowlist (provider=brave)
- web_fetch: Block URLs not matching allowlist before fetching
- ssrf.ts: Export normalizeHostnameAllowlist and matchesHostnameAllowlist

URL matching supports:
- Exact domain match (example.com)
- Wildcard patterns (*.github.com)

When urlAllowlist is not configured, all URLs are allowed (backwards compatible).

Tests: Add web-tools.url-allowlist.test.ts with 23 tests covering:
- URL allowlist resolution from config
- Wildcard pattern matching
- web_fetch error response format
- Brave search result filtering
This commit is contained in:
smartprogrammer93
2026-02-16 21:09:44 +00:00
committed by Peter Steinberger
parent e179d453c7
commit 6d2e3685d6
7 changed files with 305 additions and 14 deletions

View File

@@ -2,6 +2,7 @@ import { Type } from "@sinclair/typebox";
import type { OpenClawConfig } from "../../config/config.js";
import type { AnyAgentTool } from "./common.js";
import { formatCliCommand } from "../../cli/command-format.js";
import { matchesHostnameAllowlist, normalizeHostnameAllowlist } from "../../infra/net/ssrf.js";
import { wrapWebContent } from "../../security/external-content.js";
import { normalizeSecretInput } from "../../utils/normalize-secret-input.js";
import { jsonResult, readNumberParam, readStringParam } from "./common.js";
@@ -75,6 +76,43 @@ type WebSearchConfig = NonNullable<OpenClawConfig["tools"]>["web"] extends infer
: undefined
: undefined;
type WebConfig = NonNullable<OpenClawConfig["tools"]>["web"];
export function resolveUrlAllowlist(web?: WebConfig): string[] | undefined {
if (!web || typeof web !== "object") {
return undefined;
}
if (!("urlAllowlist" in web)) {
return undefined;
}
const allowlist = web.urlAllowlist;
if (!Array.isArray(allowlist)) {
return undefined;
}
return allowlist.length > 0 ? allowlist : undefined;
}
export function filterResultsByAllowlist(
results: Array<{ url?: string; siteName?: string }>,
allowlist: string[],
): Array<{ url?: string; siteName?: string }> {
if (allowlist.length === 0) {
return results;
}
const normalizedAllowlist = normalizeHostnameAllowlist(allowlist);
return results.filter((result) => {
if (!result.url) {
return true; // Keep entries without URL
}
try {
const hostname = new URL(result.url).hostname;
return matchesHostnameAllowlist(hostname, normalizedAllowlist);
} catch {
return true; // Keep entries with invalid URLs (let them pass through)
}
});
}
type BraveSearchResult = {
title?: string;
url?: string;
@@ -566,6 +604,7 @@ async function runWebSearch(params: {
perplexityModel?: string;
grokModel?: string;
grokInlineCitations?: boolean;
urlAllowlist?: string[];
}): Promise<Record<string, unknown>> {
const cacheKey = normalizeCacheKey(
params.provider === "brave"
@@ -688,10 +727,15 @@ async function runWebSearch(params: {
};
});
// Filter results by urlAllowlist if configured
const filteredResults = params.urlAllowlist
? filterResultsByAllowlist(mapped, params.urlAllowlist)
: mapped;
const payload = {
query: params.query,
provider: params.provider,
count: mapped.length,
count: filteredResults.length,
tookMs: Date.now() - start,
externalContent: {
untrusted: true,
@@ -699,7 +743,7 @@ async function runWebSearch(params: {
provider: params.provider,
wrapped: true,
},
results: mapped,
results: filteredResults,
};
writeCache(SEARCH_CACHE, cacheKey, payload, params.cacheTtlMs);
return payload;
@@ -717,6 +761,7 @@ export function createWebSearchTool(options?: {
const provider = resolveSearchProvider(search);
const perplexityConfig = resolvePerplexityConfig(search);
const grokConfig = resolveGrokConfig(search);
const urlAllowlist = resolveUrlAllowlist(options?.config?.tools?.web);
const description =
provider === "perplexity"
@@ -786,6 +831,7 @@ export function createWebSearchTool(options?: {
perplexityModel: resolvePerplexityModel(perplexityConfig),
grokModel: resolveGrokModel(grokConfig),
grokInlineCitations: resolveGrokInlineCitations(grokConfig),
urlAllowlist,
});
return jsonResult(result);
},
@@ -803,4 +849,6 @@ export const __testing = {
resolveGrokModel,
resolveGrokInlineCitations,
extractGrokContent,
resolveUrlAllowlist,
filterResultsByAllowlist,
} as const;