mirror of
https://github.com/openclaw/openclaw.git
synced 2026-05-08 22:38:26 +00:00
feat(tools): add URL allowlist for web_search and web_fetch
Add optional urlAllowlist config at tools.web level that restricts which URLs can be accessed by web tools: - Config types (types.tools.ts): Add urlAllowlist?: string[] to tools.web - Zod schema: Add urlAllowlist field to ToolsWebSchema - Schema help: Add help text for the new config fields - web_search: Filter Brave search results by allowlist (provider=brave) - web_fetch: Block URLs not matching allowlist before fetching - ssrf.ts: Export normalizeHostnameAllowlist and matchesHostnameAllowlist URL matching supports: - Exact domain match (example.com) - Wildcard patterns (*.github.com) When urlAllowlist is not configured, all URLs are allowed (backwards compatible). Tests: Add web-tools.url-allowlist.test.ts with 23 tests covering: - URL allowlist resolution from config - Wildcard pattern matching - web_fetch error response format - Brave search result filtering
This commit is contained in:
committed by
Peter Steinberger
parent
e179d453c7
commit
6d2e3685d6
@@ -2,7 +2,11 @@ import { Type } from "@sinclair/typebox";
|
||||
import type { OpenClawConfig } from "../../config/config.js";
|
||||
import type { AnyAgentTool } from "./common.js";
|
||||
import { fetchWithSsrFGuard } from "../../infra/net/fetch-guard.js";
|
||||
import { SsrFBlockedError } from "../../infra/net/ssrf.js";
|
||||
import {
|
||||
matchesHostnameAllowlist,
|
||||
normalizeHostnameAllowlist,
|
||||
SsrFBlockedError,
|
||||
} from "../../infra/net/ssrf.js";
|
||||
import { logDebug } from "../../logger.js";
|
||||
import { wrapExternalContent, wrapWebContent } from "../../security/external-content.js";
|
||||
import { normalizeSecretInput } from "../../utils/normalize-secret-input.js";
|
||||
@@ -68,6 +72,32 @@ type WebFetchConfig = NonNullable<OpenClawConfig["tools"]>["web"] extends infer
|
||||
: undefined
|
||||
: undefined;
|
||||
|
||||
type WebConfig = NonNullable<OpenClawConfig["tools"]>["web"];
|
||||
|
||||
export function resolveFetchUrlAllowlist(web?: WebConfig): string[] | undefined {
|
||||
if (!web || typeof web !== "object") {
|
||||
return undefined;
|
||||
}
|
||||
if (!("urlAllowlist" in web)) {
|
||||
return undefined;
|
||||
}
|
||||
const allowlist = web.urlAllowlist;
|
||||
if (!Array.isArray(allowlist)) {
|
||||
return undefined;
|
||||
}
|
||||
return allowlist.length > 0 ? allowlist : undefined;
|
||||
}
|
||||
|
||||
export function isUrlAllowedByAllowlist(url: string, allowlist: string[]): boolean {
|
||||
try {
|
||||
const hostname = new URL(url).hostname;
|
||||
const normalizedAllowlist = normalizeHostnameAllowlist(allowlist);
|
||||
return matchesHostnameAllowlist(hostname, normalizedAllowlist);
|
||||
} catch {
|
||||
return false;
|
||||
}
|
||||
}
|
||||
|
||||
type FirecrawlFetchConfig =
|
||||
| {
|
||||
enabled?: boolean;
|
||||
@@ -732,6 +762,7 @@ export function createWebFetchTool(options?: {
|
||||
(fetch && "userAgent" in fetch && typeof fetch.userAgent === "string" && fetch.userAgent) ||
|
||||
DEFAULT_FETCH_USER_AGENT;
|
||||
const maxResponseBytes = resolveFetchMaxResponseBytes(fetch);
|
||||
const urlAllowlist = resolveFetchUrlAllowlist(options?.config?.tools?.web);
|
||||
return {
|
||||
label: "Web Fetch",
|
||||
name: "web_fetch",
|
||||
@@ -741,6 +772,25 @@ export function createWebFetchTool(options?: {
|
||||
execute: async (_toolCallId, args) => {
|
||||
const params = args as Record<string, unknown>;
|
||||
const url = readStringParam(params, "url", { required: true });
|
||||
|
||||
// Check URL against allowlist if configured
|
||||
if (urlAllowlist && urlAllowlist.length > 0) {
|
||||
if (!isUrlAllowedByAllowlist(url, urlAllowlist)) {
|
||||
let hostname: string;
|
||||
try {
|
||||
hostname = new URL(url).hostname;
|
||||
} catch {
|
||||
hostname = url;
|
||||
}
|
||||
return jsonResult({
|
||||
error: "url_not_allowed",
|
||||
message: `URL not in allowlist. Allowed domains: ${urlAllowlist.join(", ")}`,
|
||||
blockedUrl: url,
|
||||
blockedHostname: hostname,
|
||||
});
|
||||
}
|
||||
}
|
||||
|
||||
const extractMode = readStringParam(params, "extractMode") === "text" ? "text" : "markdown";
|
||||
const maxChars = readNumberParam(params, "maxChars", { integer: true });
|
||||
const maxCharsCap = resolveFetchMaxCharsCap(fetch);
|
||||
|
||||
Reference in New Issue
Block a user