mirror of
https://github.com/openclaw/openclaw.git
synced 2026-05-10 21:44:32 +00:00
feat: add PDF analysis tool with native provider support (#31319)
* feat: add PDF analysis tool with native provider support New `pdf` tool for analyzing PDF documents with model-powered analysis. Architecture: - Native PDF path: sends raw PDF bytes directly to providers that support inline document input (Anthropic via DocumentBlockParam, Google Gemini via inlineData with application/pdf MIME type) - Extraction fallback: for providers without native PDF support, extracts text via pdfjs-dist and rasterizes pages to images via @napi-rs/canvas, then sends through the standard vision/text completion path Key features: - Single PDF (`pdf` param) or multiple PDFs (`pdfs` array, up to 10) - Page range selection (`pages` param, e.g. "1-5", "1,3,7-9") - Model override (`model` param) and file size limits (`maxBytesMb`) - Auto-detects provider capability and falls back gracefully - Same security patterns as image tool (SSRF guards, sandbox support, local path roots, workspace-only policy) Config (agents.defaults): - pdfModel: primary/fallbacks (defaults to imageModel, then session model) - pdfMaxBytesMb: max PDF file size (default: 10) - pdfMaxPages: max pages to process (default: 20) Model catalog: - Extended ModelInputType to include "document" alongside "text"/"image" - Added modelSupportsDocument() capability check Files: - src/agents/tools/pdf-tool.ts - main tool factory - src/agents/tools/pdf-tool.helpers.ts - helpers (page range, config, etc.) - src/agents/tools/pdf-native-providers.ts - direct API calls for Anthropic/Google - src/agents/tools/pdf-tool.test.ts - 43 tests covering all paths - Modified: model-catalog.ts, openclaw-tools.ts, config schema/types/labels/help * fix: prepare pdf tool for merge (#31319) (thanks @tyler6204)
This commit is contained in:
@@ -13,6 +13,7 @@ import { createGatewayTool } from "./tools/gateway-tool.js";
|
||||
import { createImageTool } from "./tools/image-tool.js";
|
||||
import { createMessageTool } from "./tools/message-tool.js";
|
||||
import { createNodesTool } from "./tools/nodes-tool.js";
|
||||
import { createPdfTool } from "./tools/pdf-tool.js";
|
||||
import { createSessionStatusTool } from "./tools/session-status-tool.js";
|
||||
import { createSessionsHistoryTool } from "./tools/sessions-history-tool.js";
|
||||
import { createSessionsListTool } from "./tools/sessions-list-tool.js";
|
||||
@@ -84,6 +85,18 @@ export function createOpenClawTools(options?: {
|
||||
modelHasVision: options?.modelHasVision,
|
||||
})
|
||||
: null;
|
||||
const pdfTool = options?.agentDir?.trim()
|
||||
? createPdfTool({
|
||||
config: options?.config,
|
||||
agentDir: options.agentDir,
|
||||
workspaceDir,
|
||||
sandbox:
|
||||
options?.sandboxRoot && options?.sandboxFsBridge
|
||||
? { root: options.sandboxRoot, bridge: options.sandboxFsBridge }
|
||||
: undefined,
|
||||
fsPolicy: options?.fsPolicy,
|
||||
})
|
||||
: null;
|
||||
const webSearchTool = createWebSearchTool({
|
||||
config: options?.config,
|
||||
sandboxed: options?.sandboxed,
|
||||
@@ -173,6 +186,7 @@ export function createOpenClawTools(options?: {
|
||||
...(webSearchTool ? [webSearchTool] : []),
|
||||
...(webFetchTool ? [webFetchTool] : []),
|
||||
...(imageTool ? [imageTool] : []),
|
||||
...(pdfTool ? [pdfTool] : []),
|
||||
];
|
||||
|
||||
const pluginTools = resolvePluginTools({
|
||||
|
||||
Reference in New Issue
Block a user