mirror of
https://github.com/openclaw/openclaw.git
synced 2026-05-09 14:44:32 +00:00
feat(tools): add URL allowlist for web_search and web_fetch
Add optional urlAllowlist config at tools.web level that restricts which URLs can be accessed by web tools: - Config types (types.tools.ts): Add urlAllowlist?: string[] to tools.web - Zod schema: Add urlAllowlist field to ToolsWebSchema - Schema help: Add help text for the new config fields - web_search: Filter Brave search results by allowlist (provider=brave) - web_fetch: Block URLs not matching allowlist before fetching - ssrf.ts: Export normalizeHostnameAllowlist and matchesHostnameAllowlist URL matching supports: - Exact domain match (example.com) - Wildcard patterns (*.github.com) When urlAllowlist is not configured, all URLs are allowed (backwards compatible). Tests: Add web-tools.url-allowlist.test.ts with 23 tests covering: - URL allowlist resolution from config - Wildcard pattern matching - web_fetch error response format - Brave search result filtering
This commit is contained in:
committed by
Peter Steinberger
parent
e179d453c7
commit
6d2e3685d6
193
src/agents/tools/web-tools.url-allowlist.test.ts
Normal file
193
src/agents/tools/web-tools.url-allowlist.test.ts
Normal file
@@ -0,0 +1,193 @@
|
||||
import { describe, expect, it } from "vitest";
|
||||
import type { OpenClawConfig } from "../config/config.js";
|
||||
import { isUrlAllowedByAllowlist, resolveFetchUrlAllowlist } from "./web-fetch.js";
|
||||
import { filterResultsByAllowlist, resolveUrlAllowlist } from "./web-search.js";
|
||||
|
||||
describe("web-search urlAllowlist", () => {
|
||||
describe("resolveUrlAllowlist", () => {
|
||||
it("returns undefined when web config is undefined", () => {
|
||||
const result = resolveUrlAllowlist(undefined);
|
||||
expect(result).toBeUndefined();
|
||||
});
|
||||
|
||||
it("returns undefined when urlAllowlist is not set", () => {
|
||||
const result = resolveUrlAllowlist({ search: { enabled: true } });
|
||||
expect(result).toBeUndefined();
|
||||
});
|
||||
|
||||
it("returns undefined when urlAllowlist is empty array", () => {
|
||||
const result = resolveUrlAllowlist({ urlAllowlist: [], search: { enabled: true } });
|
||||
expect(result).toBeUndefined();
|
||||
});
|
||||
|
||||
it("returns the allowlist when configured", () => {
|
||||
const result = resolveUrlAllowlist({
|
||||
urlAllowlist: ["example.com", "*.github.com"],
|
||||
search: { enabled: true },
|
||||
});
|
||||
expect(result).toEqual(["example.com", "*.github.com"]);
|
||||
});
|
||||
});
|
||||
|
||||
describe("filterResultsByAllowlist", () => {
|
||||
const results = [
|
||||
{ url: "https://example.com/page", siteName: "example.com" },
|
||||
{ url: "https://api.github.com/user/repo", siteName: "api.github.com" },
|
||||
{ url: "https://docs.openclaw.ai/guide", siteName: "docs.openclaw.ai" },
|
||||
{ url: "https://blocked.org/page", siteName: "blocked.org" },
|
||||
{ url: undefined, siteName: "unknown" }, // entry without URL
|
||||
];
|
||||
|
||||
it("returns all results when allowlist is empty", () => {
|
||||
const result = filterResultsByAllowlist(results, []);
|
||||
expect(result).toHaveLength(5);
|
||||
});
|
||||
|
||||
it("filters results by exact domain match", () => {
|
||||
const result = filterResultsByAllowlist(results, ["example.com"]);
|
||||
expect(result).toHaveLength(2); // example.com + entry without URL
|
||||
expect(result.map((r) => r.url)).toContain("https://example.com/page");
|
||||
expect(result.map((r) => r.url)).not.toContain("https://api.github.com/user/repo");
|
||||
});
|
||||
|
||||
it("filters results by wildcard pattern", () => {
|
||||
const result = filterResultsByAllowlist(results, ["*.github.com"]);
|
||||
expect(result).toHaveLength(2); // api.github.com + entry without URL
|
||||
expect(result.map((r) => r.url)).toContain("https://api.github.com/user/repo");
|
||||
expect(result.map((r) => r.url)).not.toContain("https://example.com/page");
|
||||
});
|
||||
|
||||
it("filters results with multiple patterns", () => {
|
||||
const result = filterResultsByAllowlist(results, ["example.com", "*.github.com"]);
|
||||
expect(result).toHaveLength(3); // example.com + api.github.com + entry without URL
|
||||
expect(result.map((r) => r.url)).toContain("https://example.com/page");
|
||||
expect(result.map((r) => r.url)).toContain("https://api.github.com/user/repo");
|
||||
expect(result.map((r) => r.url)).not.toContain("https://blocked.org/page");
|
||||
});
|
||||
|
||||
it("keeps entries without URLs and entries not in blocklist", () => {
|
||||
const result = filterResultsByAllowlist(results, ["blocked.org"]);
|
||||
// With allowlist ["blocked.org"], we ONLY keep blocked.org URLs and entries without URLs
|
||||
expect(result).toHaveLength(2); // blocked.org + entry without URL
|
||||
expect(result.map((r) => r.url)).toContain("https://blocked.org/page");
|
||||
});
|
||||
});
|
||||
});
|
||||
|
||||
describe("web-fetch urlAllowlist", () => {
|
||||
describe("resolveFetchUrlAllowlist", () => {
|
||||
it("returns undefined when web config is undefined", () => {
|
||||
const result = resolveFetchUrlAllowlist(undefined);
|
||||
expect(result).toBeUndefined();
|
||||
});
|
||||
|
||||
it("returns undefined when urlAllowlist is not set", () => {
|
||||
const result = resolveFetchUrlAllowlist({ fetch: { enabled: true } });
|
||||
expect(result).toBeUndefined();
|
||||
});
|
||||
|
||||
it("returns undefined when urlAllowlist is empty array", () => {
|
||||
const result = resolveFetchUrlAllowlist({ urlAllowlist: [], fetch: { enabled: true } });
|
||||
expect(result).toBeUndefined();
|
||||
});
|
||||
|
||||
it("returns the allowlist when configured", () => {
|
||||
const result = resolveFetchUrlAllowlist({
|
||||
urlAllowlist: ["example.com", "*.github.com"],
|
||||
fetch: { enabled: true },
|
||||
});
|
||||
expect(result).toEqual(["example.com", "*.github.com"]);
|
||||
});
|
||||
});
|
||||
|
||||
describe("isUrlAllowedByAllowlist", () => {
|
||||
it("allows any URL when allowlist is empty", () => {
|
||||
const result = isUrlAllowedByAllowlist("https://example.com/page", []);
|
||||
expect(result).toBe(true);
|
||||
});
|
||||
|
||||
it("blocks URLs not in allowlist", () => {
|
||||
const result = isUrlAllowedByAllowlist("https://blocked.com/page", ["example.com"]);
|
||||
expect(result).toBe(false);
|
||||
});
|
||||
|
||||
it("allows URLs matching exact domain", () => {
|
||||
const result = isUrlAllowedByAllowlist("https://example.com/page", ["example.com"]);
|
||||
expect(result).toBe(true);
|
||||
});
|
||||
|
||||
it("allows URLs matching wildcard pattern", () => {
|
||||
const result = isUrlAllowedByAllowlist("https://api.github.com/users", ["*.github.com"]);
|
||||
expect(result).toBe(true);
|
||||
});
|
||||
|
||||
it("blocks URLs not matching wildcard pattern", () => {
|
||||
const result = isUrlAllowedByAllowlist("https://github.com", ["*.github.com"]);
|
||||
// Exact match "github.com" should not match "*.github.com" pattern
|
||||
// because *.github.com requires at least one subdomain
|
||||
expect(result).toBe(false);
|
||||
});
|
||||
|
||||
it("allows subdomain with wildcard pattern", () => {
|
||||
const result = isUrlAllowedByAllowlist("https://docs.openclaw.ai/guide", ["*.openclaw.ai"]);
|
||||
expect(result).toBe(true);
|
||||
});
|
||||
|
||||
it("handles URLs without protocol", () => {
|
||||
const result = isUrlAllowedByAllowlist("not-a-url", ["example.com"]);
|
||||
expect(result).toBe(false);
|
||||
});
|
||||
});
|
||||
|
||||
describe("web_fetch error response", () => {
|
||||
// This test verifies the error format returned when URL is blocked
|
||||
it("returns correct error format for blocked URL", () => {
|
||||
// Simulate the error response format
|
||||
const urlAllowlist = ["example.com"];
|
||||
const url = "https://blocked.com/page";
|
||||
|
||||
if (!isUrlAllowedByAllowlist(url, urlAllowlist)) {
|
||||
const hostname = new URL(url).hostname;
|
||||
const errorResponse = {
|
||||
error: "url_not_allowed",
|
||||
message: `URL not in allowlist. Allowed domains: ${urlAllowlist.join(", ")}`,
|
||||
blockedUrl: url,
|
||||
blockedHostname: hostname,
|
||||
};
|
||||
|
||||
expect(errorResponse.error).toBe("url_not_allowed");
|
||||
expect(errorResponse.message).toContain("example.com");
|
||||
expect(errorResponse.blockedUrl).toBe("https://blocked.com/page");
|
||||
expect(errorResponse.blockedHostname).toBe("blocked.com");
|
||||
}
|
||||
});
|
||||
});
|
||||
});
|
||||
|
||||
describe("integration with config", () => {
|
||||
it("reads urlAllowlist from tools.web config", () => {
|
||||
const config: OpenClawConfig = {
|
||||
tools: {
|
||||
web: {
|
||||
urlAllowlist: ["example.com", "*.github.com"],
|
||||
search: { enabled: true },
|
||||
fetch: { enabled: true },
|
||||
},
|
||||
},
|
||||
};
|
||||
|
||||
const searchAllowlist = resolveUrlAllowlist(config.tools?.web);
|
||||
const fetchAllowlist = resolveFetchUrlAllowlist(config.tools?.web);
|
||||
|
||||
expect(searchAllowlist).toEqual(["example.com", "*.github.com"]);
|
||||
expect(fetchAllowlist).toEqual(["example.com", "*.github.com"]);
|
||||
});
|
||||
|
||||
it("works with undefined config", () => {
|
||||
const searchAllowlist = resolveUrlAllowlist(undefined);
|
||||
const fetchAllowlist = resolveFetchUrlAllowlist(undefined);
|
||||
|
||||
expect(searchAllowlist).toBeUndefined();
|
||||
expect(fetchAllowlist).toBeUndefined();
|
||||
});
|
||||
});
|
||||
Reference in New Issue
Block a user