Files
openclaw/src/agents/tools/pdf-tool.helpers.ts
Tyler Yust d0ac1b0195 feat: add PDF analysis tool with native provider support (#31319)
* feat: add PDF analysis tool with native provider support

New `pdf` tool for analyzing PDF documents with model-powered analysis.

Architecture:
- Native PDF path: sends raw PDF bytes directly to providers that support
  inline document input (Anthropic via DocumentBlockParam, Google Gemini
  via inlineData with application/pdf MIME type)
- Extraction fallback: for providers without native PDF support, extracts
  text via pdfjs-dist and rasterizes pages to images via @napi-rs/canvas,
  then sends through the standard vision/text completion path

Key features:
- Single PDF (`pdf` param) or multiple PDFs (`pdfs` array, up to 10)
- Page range selection (`pages` param, e.g. "1-5", "1,3,7-9")
- Model override (`model` param) and file size limits (`maxBytesMb`)
- Auto-detects provider capability and falls back gracefully
- Same security patterns as image tool (SSRF guards, sandbox support,
  local path roots, workspace-only policy)

Config (agents.defaults):
- pdfModel: primary/fallbacks (defaults to imageModel, then session model)
- pdfMaxBytesMb: max PDF file size (default: 10)
- pdfMaxPages: max pages to process (default: 20)

Model catalog:
- Extended ModelInputType to include "document" alongside "text"/"image"
- Added modelSupportsDocument() capability check

Files:
- src/agents/tools/pdf-tool.ts - main tool factory
- src/agents/tools/pdf-tool.helpers.ts - helpers (page range, config, etc.)
- src/agents/tools/pdf-native-providers.ts - direct API calls for Anthropic/Google
- src/agents/tools/pdf-tool.test.ts - 43 tests covering all paths
- Modified: model-catalog.ts, openclaw-tools.ts, config schema/types/labels/help

* fix: prepare pdf tool for merge (#31319) (thanks @tyler6204)
2026-03-01 22:39:12 -08:00

104 lines
3.3 KiB
TypeScript

import type { AssistantMessage } from "@mariozechner/pi-ai";
import type { OpenClawConfig } from "../../config/config.js";
import {
resolveAgentModelFallbackValues,
resolveAgentModelPrimaryValue,
} from "../../config/model-input.js";
import { extractAssistantText } from "../pi-embedded-utils.js";
export type PdfModelConfig = { primary?: string; fallbacks?: string[] };
/**
* Providers known to support native PDF document input.
* When the model's provider is in this set, the tool sends raw PDF bytes
* via provider-specific API calls instead of extracting text/images first.
*/
export const NATIVE_PDF_PROVIDERS = new Set(["anthropic", "google"]);
/**
* Check whether a provider supports native PDF document input.
*/
export function providerSupportsNativePdf(provider: string): boolean {
return NATIVE_PDF_PROVIDERS.has(provider.toLowerCase().trim());
}
/**
* Parse a page range string (e.g. "1-5", "3", "1-3,7-9") into an array of 1-based page numbers.
*/
export function parsePageRange(range: string, maxPages: number): number[] {
const pages = new Set<number>();
const parts = range.split(",").map((p) => p.trim());
for (const part of parts) {
if (!part) {
continue;
}
const dashMatch = /^(\d+)\s*-\s*(\d+)$/.exec(part);
if (dashMatch) {
const start = Number(dashMatch[1]);
const end = Number(dashMatch[2]);
if (!Number.isFinite(start) || !Number.isFinite(end) || start < 1 || end < start) {
throw new Error(`Invalid page range: "${part}"`);
}
for (let i = start; i <= Math.min(end, maxPages); i++) {
pages.add(i);
}
} else {
const num = Number(part);
if (!Number.isFinite(num) || num < 1) {
throw new Error(`Invalid page number: "${part}"`);
}
if (num <= maxPages) {
pages.add(num);
}
}
}
return Array.from(pages).toSorted((a, b) => a - b);
}
export function coercePdfAssistantText(params: {
message: AssistantMessage;
provider: string;
model: string;
}): string {
const stop = params.message.stopReason;
const errorMessage = params.message.errorMessage?.trim();
if (stop === "error" || stop === "aborted") {
throw new Error(
errorMessage
? `PDF model failed (${params.provider}/${params.model}): ${errorMessage}`
: `PDF model failed (${params.provider}/${params.model})`,
);
}
if (errorMessage) {
throw new Error(`PDF model failed (${params.provider}/${params.model}): ${errorMessage}`);
}
const text = extractAssistantText(params.message);
if (text.trim()) {
return text.trim();
}
throw new Error(`PDF model returned no text (${params.provider}/${params.model}).`);
}
export function coercePdfModelConfig(cfg?: OpenClawConfig): PdfModelConfig {
const primary = resolveAgentModelPrimaryValue(cfg?.agents?.defaults?.pdfModel);
const fallbacks = resolveAgentModelFallbackValues(cfg?.agents?.defaults?.pdfModel);
return {
...(primary?.trim() ? { primary: primary.trim() } : {}),
...(fallbacks.length > 0 ? { fallbacks } : {}),
};
}
export function resolvePdfToolMaxTokens(
modelMaxTokens: number | undefined,
requestedMaxTokens = 4096,
) {
if (
typeof modelMaxTokens !== "number" ||
!Number.isFinite(modelMaxTokens) ||
modelMaxTokens <= 0
) {
return requestedMaxTokens;
}
return Math.min(requestedMaxTokens, modelMaxTokens);
}