fix: increase image tool maxTokens from 512 to 4096 (#11770)

* increase image tool maxTokens from 512 to 4096

* fix: cap image tool tokens by model capability (#11770) (thanks @detecti1)

* docs: fix changelog attribution for #11770

---------

Co-authored-by: Peter Steinberger <steipete@gmail.com>
This commit is contained in:
Lilo
2026-02-14 00:52:27 +08:00
committed by GitHub
parent 1c36bec970
commit 397011bd78
4 changed files with 27 additions and 7 deletions

View File

@@ -10,6 +10,7 @@ Docs: https://docs.openclaw.ai
### Fixes
- Agents/Image tool: cap image-analysis completion `maxTokens` by model capability (`min(4096, model.maxTokens)`) to avoid over-limit provider failures while still preventing truncation. (#11770) Thanks @detecti1.
- Security/Canvas: serve A2UI assets via the shared safe-open path (`openFileWithinRoot`) to close traversal/TOCTOU gaps, with traversal and symlink regression coverage. (#10525) Thanks @abdelsfane.
- Security/Gateway: breaking default-behavior change - canvas IP-based auth fallback now only accepts machine-scoped addresses (RFC1918, link-local, ULA IPv6, CGNAT); public-source IP matches now require bearer token auth. (#14661) Thanks @sumleo.
- Security/WhatsApp: enforce `0o600` on `creds.json` and `creds.json.bak` on save/backup/restore paths to reduce credential file exposure. (#10529) Thanks @abdelsfane.

View File

@@ -346,6 +346,18 @@ describe("image tool MiniMax VLM routing", () => {
});
describe("image tool response validation", () => {
it("caps image-tool max tokens by model capability", () => {
expect(__testing.resolveImageToolMaxTokens(4000)).toBe(4000);
});
it("keeps requested image-tool max tokens when model capability is higher", () => {
expect(__testing.resolveImageToolMaxTokens(8192)).toBe(4096);
});
it("falls back to requested image-tool max tokens when model capability is missing", () => {
expect(__testing.resolveImageToolMaxTokens(undefined)).toBe(4096);
});
it("rejects image-model responses with no final text", () => {
expect(() =>
__testing.coerceImageAssistantText({

View File

@@ -29,8 +29,20 @@ const ANTHROPIC_IMAGE_FALLBACK = "anthropic/claude-opus-4-5";
export const __testing = {
decodeDataUrl,
coerceImageAssistantText,
resolveImageToolMaxTokens,
} as const;
function resolveImageToolMaxTokens(modelMaxTokens: number | undefined, requestedMaxTokens = 4096) {
if (
typeof modelMaxTokens !== "number" ||
!Number.isFinite(modelMaxTokens) ||
modelMaxTokens <= 0
) {
return requestedMaxTokens;
}
return Math.min(requestedMaxTokens, modelMaxTokens);
}
function resolveDefaultModelRef(cfg?: OpenClawConfig): {
provider: string;
model: string;
@@ -287,7 +299,7 @@ async function runImagePrompt(params: {
const context = buildImageContext(params.prompt, params.base64, params.mimeType);
const message = await complete(model, context, {
apiKey,
maxTokens: 512,
maxTokens: resolveImageToolMaxTokens(model.maxTokens),
});
const text = coerceImageAssistantText({
message,

View File

@@ -70,12 +70,7 @@ export function hasBinary(bin: string): boolean {
const parts = pathEnv.split(path.delimiter).filter(Boolean);
const extensions =
process.platform === "win32"
? [
"",
...(process.env.PATHEXT ?? ".EXE;.CMD;.BAT;.COM")
.split(";")
.filter(Boolean),
]
? ["", ...(process.env.PATHEXT ?? ".EXE;.CMD;.BAT;.COM").split(";").filter(Boolean)]
: [""];
for (const part of parts) {
for (const ext of extensions) {