mirror of
https://github.com/openclaw/openclaw.git
synced 2026-05-09 01:08:28 +00:00
feat(tools): add URL allowlist for web_search and web_fetch
Add optional urlAllowlist config at tools.web level that restricts which URLs can be accessed by web tools: - Config types (types.tools.ts): Add urlAllowlist?: string[] to tools.web - Zod schema: Add urlAllowlist field to ToolsWebSchema - Schema help: Add help text for the new config fields - web_search: Filter Brave search results by allowlist (provider=brave) - web_fetch: Block URLs not matching allowlist before fetching - ssrf.ts: Export normalizeHostnameAllowlist and matchesHostnameAllowlist URL matching supports: - Exact domain match (example.com) - Wildcard patterns (*.github.com) When urlAllowlist is not configured, all URLs are allowed (backwards compatible). Tests: Add web-tools.url-allowlist.test.ts with 23 tests covering: - URL allowlist resolution from config - Wildcard pattern matching - web_fetch error response format - Brave search result filtering
This commit is contained in:
committed by
Peter Steinberger
parent
e179d453c7
commit
6d2e3685d6
@@ -2,6 +2,7 @@ import { Type } from "@sinclair/typebox";
|
||||
import type { OpenClawConfig } from "../../config/config.js";
|
||||
import type { AnyAgentTool } from "./common.js";
|
||||
import { formatCliCommand } from "../../cli/command-format.js";
|
||||
import { matchesHostnameAllowlist, normalizeHostnameAllowlist } from "../../infra/net/ssrf.js";
|
||||
import { wrapWebContent } from "../../security/external-content.js";
|
||||
import { normalizeSecretInput } from "../../utils/normalize-secret-input.js";
|
||||
import { jsonResult, readNumberParam, readStringParam } from "./common.js";
|
||||
@@ -75,6 +76,43 @@ type WebSearchConfig = NonNullable<OpenClawConfig["tools"]>["web"] extends infer
|
||||
: undefined
|
||||
: undefined;
|
||||
|
||||
type WebConfig = NonNullable<OpenClawConfig["tools"]>["web"];
|
||||
|
||||
export function resolveUrlAllowlist(web?: WebConfig): string[] | undefined {
|
||||
if (!web || typeof web !== "object") {
|
||||
return undefined;
|
||||
}
|
||||
if (!("urlAllowlist" in web)) {
|
||||
return undefined;
|
||||
}
|
||||
const allowlist = web.urlAllowlist;
|
||||
if (!Array.isArray(allowlist)) {
|
||||
return undefined;
|
||||
}
|
||||
return allowlist.length > 0 ? allowlist : undefined;
|
||||
}
|
||||
|
||||
export function filterResultsByAllowlist(
|
||||
results: Array<{ url?: string; siteName?: string }>,
|
||||
allowlist: string[],
|
||||
): Array<{ url?: string; siteName?: string }> {
|
||||
if (allowlist.length === 0) {
|
||||
return results;
|
||||
}
|
||||
const normalizedAllowlist = normalizeHostnameAllowlist(allowlist);
|
||||
return results.filter((result) => {
|
||||
if (!result.url) {
|
||||
return true; // Keep entries without URL
|
||||
}
|
||||
try {
|
||||
const hostname = new URL(result.url).hostname;
|
||||
return matchesHostnameAllowlist(hostname, normalizedAllowlist);
|
||||
} catch {
|
||||
return true; // Keep entries with invalid URLs (let them pass through)
|
||||
}
|
||||
});
|
||||
}
|
||||
|
||||
type BraveSearchResult = {
|
||||
title?: string;
|
||||
url?: string;
|
||||
@@ -566,6 +604,7 @@ async function runWebSearch(params: {
|
||||
perplexityModel?: string;
|
||||
grokModel?: string;
|
||||
grokInlineCitations?: boolean;
|
||||
urlAllowlist?: string[];
|
||||
}): Promise<Record<string, unknown>> {
|
||||
const cacheKey = normalizeCacheKey(
|
||||
params.provider === "brave"
|
||||
@@ -688,10 +727,15 @@ async function runWebSearch(params: {
|
||||
};
|
||||
});
|
||||
|
||||
// Filter results by urlAllowlist if configured
|
||||
const filteredResults = params.urlAllowlist
|
||||
? filterResultsByAllowlist(mapped, params.urlAllowlist)
|
||||
: mapped;
|
||||
|
||||
const payload = {
|
||||
query: params.query,
|
||||
provider: params.provider,
|
||||
count: mapped.length,
|
||||
count: filteredResults.length,
|
||||
tookMs: Date.now() - start,
|
||||
externalContent: {
|
||||
untrusted: true,
|
||||
@@ -699,7 +743,7 @@ async function runWebSearch(params: {
|
||||
provider: params.provider,
|
||||
wrapped: true,
|
||||
},
|
||||
results: mapped,
|
||||
results: filteredResults,
|
||||
};
|
||||
writeCache(SEARCH_CACHE, cacheKey, payload, params.cacheTtlMs);
|
||||
return payload;
|
||||
@@ -717,6 +761,7 @@ export function createWebSearchTool(options?: {
|
||||
const provider = resolveSearchProvider(search);
|
||||
const perplexityConfig = resolvePerplexityConfig(search);
|
||||
const grokConfig = resolveGrokConfig(search);
|
||||
const urlAllowlist = resolveUrlAllowlist(options?.config?.tools?.web);
|
||||
|
||||
const description =
|
||||
provider === "perplexity"
|
||||
@@ -786,6 +831,7 @@ export function createWebSearchTool(options?: {
|
||||
perplexityModel: resolvePerplexityModel(perplexityConfig),
|
||||
grokModel: resolveGrokModel(grokConfig),
|
||||
grokInlineCitations: resolveGrokInlineCitations(grokConfig),
|
||||
urlAllowlist,
|
||||
});
|
||||
return jsonResult(result);
|
||||
},
|
||||
@@ -803,4 +849,6 @@ export const __testing = {
|
||||
resolveGrokModel,
|
||||
resolveGrokInlineCitations,
|
||||
extractGrokContent,
|
||||
resolveUrlAllowlist,
|
||||
filterResultsByAllowlist,
|
||||
} as const;
|
||||
|
||||
Reference in New Issue
Block a user