From e368c365036bc8d70a1c941d998c6b7910da9668 Mon Sep 17 00:00:00 2001 From: Ocean Vael Date: Mon, 16 Feb 2026 15:20:42 +0000 Subject: [PATCH] feat: add llms.txt discovery as default agent behavior Add automatic llms.txt awareness so agents check for /llms.txt or /.well-known/llms.txt when exploring new domains. Changes: - System prompt: new 'llms.txt Discovery' section (full mode only, when web_fetch is available) instructing agents to check for llms.txt files when visiting new domains - web_fetch tool: updated description to mention llms.txt discovery llms.txt is an emerging standard (like robots.txt for AI) that helps site owners describe how AI agents should interact with their content. Making this a default behavior helps the ecosystem adopt agent-native web experiences. Ref: https://llmstxt.org --- src/agents/system-prompt.ts | 18 ++++++++++++++++++ src/agents/tools/web-fetch.ts | 2 +- 2 files changed, 19 insertions(+), 1 deletion(-) diff --git a/src/agents/system-prompt.ts b/src/agents/system-prompt.ts index 364d1f49b4e..896a631a58a 100644 --- a/src/agents/system-prompt.ts +++ b/src/agents/system-prompt.ts @@ -147,6 +147,23 @@ function buildVoiceSection(params: { isMinimal: boolean; ttsHint?: string }) { return ["## Voice (TTS)", hint, ""]; } +function buildLlmsTxtSection(params: { isMinimal: boolean; availableTools: Set }) { + if (params.isMinimal) { + return []; + } + if (!params.availableTools.has("web_fetch")) { + return []; + } + return [ + "## llms.txt Discovery", + "When exploring a new domain or website (via web_fetch or browser), check for an llms.txt file that describes how AI agents should interact with the site:", + "- Try `/llms.txt` or `/.well-known/llms.txt` at the domain root", + "- If found, follow its guidance for interacting with that site's content and APIs", + "- llms.txt is an emerging standard (like robots.txt for AI) — not all sites have one, so don't warn if missing", + "", + ]; +} + function buildDocsSection(params: { docsPath?: string; isMinimal: boolean; readToolName: string }) { const docsPath = params.docsPath?.trim(); if (!docsPath || params.isMinimal) { @@ -546,6 +563,7 @@ export function buildAgentSystemPrompt(params: { messageToolHints: params.messageToolHints, }), ...buildVoiceSection({ isMinimal, ttsHint: params.ttsHint }), + ...buildLlmsTxtSection({ isMinimal, availableTools }), ]; if (extraSystemPrompt) { diff --git a/src/agents/tools/web-fetch.ts b/src/agents/tools/web-fetch.ts index 931d8c778f0..dcb8c409fa0 100644 --- a/src/agents/tools/web-fetch.ts +++ b/src/agents/tools/web-fetch.ts @@ -767,7 +767,7 @@ export function createWebFetchTool(options?: { label: "Web Fetch", name: "web_fetch", description: - "Fetch and extract readable content from a URL (HTML → markdown/text). Use for lightweight page access without browser automation.", + "Fetch and extract readable content from a URL (HTML → markdown/text). Use for lightweight page access without browser automation. When exploring a new domain, also check for /llms.txt or /.well-known/llms.txt — these files describe how AI agents should interact with the site.", parameters: WebFetchSchema, execute: async (_toolCallId, args) => { const params = args as Record;