fix(models): support minimax-portal coding plan vlm routing for image tool (openclaw#33953)

Verified:
- pnpm install --frozen-lockfile
- pnpm build
- pnpm check
- pnpm test:macmini

Co-authored-by: tars90percent <252094836+tars90percent@users.noreply.github.com>
This commit is contained in:
Tars
2026-03-08 04:30:53 +08:00
committed by GitHub
parent e554c59aac
commit dab0e97c22
15 changed files with 246 additions and 23 deletions

View File

@@ -1,8 +1,10 @@
import { describe, expect, it } from "vitest";
import {
AUTO_AUDIO_KEY_PROVIDERS,
AUTO_IMAGE_KEY_PROVIDERS,
AUTO_VIDEO_KEY_PROVIDERS,
DEFAULT_AUDIO_MODELS,
DEFAULT_IMAGE_MODELS,
} from "./defaults.js";
describe("DEFAULT_AUDIO_MODELS", () => {
@@ -22,3 +24,15 @@ describe("AUTO_VIDEO_KEY_PROVIDERS", () => {
expect(AUTO_VIDEO_KEY_PROVIDERS).toContain("moonshot");
});
});
describe("AUTO_IMAGE_KEY_PROVIDERS", () => {
it("includes minimax-portal auto key resolution", () => {
expect(AUTO_IMAGE_KEY_PROVIDERS).toContain("minimax-portal");
});
});
describe("DEFAULT_IMAGE_MODELS", () => {
it("includes the MiniMax portal vision default", () => {
expect(DEFAULT_IMAGE_MODELS["minimax-portal"]).toBe("MiniMax-VL-01");
});
});

View File

@@ -46,6 +46,7 @@ export const AUTO_IMAGE_KEY_PROVIDERS = [
"anthropic",
"google",
"minimax",
"minimax-portal",
"zai",
] as const;
export const AUTO_VIDEO_KEY_PROVIDERS = ["google", "moonshot"] as const;
@@ -54,6 +55,7 @@ export const DEFAULT_IMAGE_MODELS: Record<string, string> = {
anthropic: "claude-opus-4-6",
google: "gemini-3-flash-preview",
minimax: "MiniMax-VL-01",
"minimax-portal": "MiniMax-VL-01",
zai: "glm-4.6v",
};
export const CLI_OUTPUT_MAX_BUFFER = 5 * MB;

View File

@@ -0,0 +1,133 @@
import { beforeEach, describe, expect, it, vi } from "vitest";
const completeMock = vi.fn();
const minimaxUnderstandImageMock = vi.fn();
const ensureOpenClawModelsJsonMock = vi.fn(async () => {});
const getApiKeyForModelMock = vi.fn(async () => ({
apiKey: "oauth-test",
source: "test",
mode: "oauth",
}));
const requireApiKeyMock = vi.fn((auth: { apiKey?: string }) => auth.apiKey ?? "");
const setRuntimeApiKeyMock = vi.fn();
const discoverModelsMock = vi.fn();
vi.mock("@mariozechner/pi-ai", async (importOriginal) => {
const actual = await importOriginal<typeof import("@mariozechner/pi-ai")>();
return {
...actual,
complete: completeMock,
};
});
vi.mock("../../agents/minimax-vlm.js", () => ({
isMinimaxVlmProvider: (provider: string) =>
provider === "minimax" || provider === "minimax-portal",
isMinimaxVlmModel: (provider: string, modelId: string) =>
(provider === "minimax" || provider === "minimax-portal") && modelId === "MiniMax-VL-01",
minimaxUnderstandImage: minimaxUnderstandImageMock,
}));
vi.mock("../../agents/models-config.js", () => ({
ensureOpenClawModelsJson: ensureOpenClawModelsJsonMock,
}));
vi.mock("../../agents/model-auth.js", () => ({
getApiKeyForModel: getApiKeyForModelMock,
requireApiKey: requireApiKeyMock,
}));
vi.mock("../../agents/pi-model-discovery-runtime.js", () => ({
discoverAuthStorage: () => ({
setRuntimeApiKey: setRuntimeApiKeyMock,
}),
discoverModels: discoverModelsMock,
}));
describe("describeImageWithModel", () => {
beforeEach(() => {
vi.clearAllMocks();
minimaxUnderstandImageMock.mockResolvedValue("portal ok");
discoverModelsMock.mockReturnValue({
find: vi.fn(() => ({
provider: "minimax-portal",
id: "MiniMax-VL-01",
input: ["text", "image"],
baseUrl: "https://api.minimax.io/anthropic",
})),
});
});
it("routes minimax-portal image models through the MiniMax VLM endpoint", async () => {
const { describeImageWithModel } = await import("./image.js");
const result = await describeImageWithModel({
cfg: {},
agentDir: "/tmp/openclaw-agent",
provider: "minimax-portal",
model: "MiniMax-VL-01",
buffer: Buffer.from("png-bytes"),
fileName: "image.png",
mime: "image/png",
prompt: "Describe the image.",
timeoutMs: 1000,
});
expect(result).toEqual({
text: "portal ok",
model: "MiniMax-VL-01",
});
expect(ensureOpenClawModelsJsonMock).toHaveBeenCalled();
expect(getApiKeyForModelMock).toHaveBeenCalled();
expect(requireApiKeyMock).toHaveBeenCalled();
expect(setRuntimeApiKeyMock).toHaveBeenCalledWith("minimax-portal", "oauth-test");
expect(minimaxUnderstandImageMock).toHaveBeenCalledWith({
apiKey: "oauth-test",
prompt: "Describe the image.",
imageDataUrl: `data:image/png;base64,${Buffer.from("png-bytes").toString("base64")}`,
modelBaseUrl: "https://api.minimax.io/anthropic",
});
expect(completeMock).not.toHaveBeenCalled();
});
it("uses generic completion for non-canonical minimax-portal image models", async () => {
discoverModelsMock.mockReturnValue({
find: vi.fn(() => ({
provider: "minimax-portal",
id: "custom-vision",
input: ["text", "image"],
baseUrl: "https://api.minimax.io/anthropic",
})),
});
completeMock.mockResolvedValue({
role: "assistant",
api: "anthropic-messages",
provider: "minimax-portal",
model: "custom-vision",
stopReason: "stop",
timestamp: Date.now(),
content: [{ type: "text", text: "generic ok" }],
});
const { describeImageWithModel } = await import("./image.js");
const result = await describeImageWithModel({
cfg: {},
agentDir: "/tmp/openclaw-agent",
provider: "minimax-portal",
model: "custom-vision",
buffer: Buffer.from("png-bytes"),
fileName: "image.png",
mime: "image/png",
prompt: "Describe the image.",
timeoutMs: 1000,
});
expect(result).toEqual({
text: "generic ok",
model: "custom-vision",
});
expect(completeMock).toHaveBeenCalledOnce();
expect(minimaxUnderstandImageMock).not.toHaveBeenCalled();
});
});

View File

@@ -1,6 +1,6 @@
import type { Api, Context, Model } from "@mariozechner/pi-ai";
import { complete } from "@mariozechner/pi-ai";
import { minimaxUnderstandImage } from "../../agents/minimax-vlm.js";
import { isMinimaxVlmModel, minimaxUnderstandImage } from "../../agents/minimax-vlm.js";
import { getApiKeyForModel, requireApiKey } from "../../agents/model-auth.js";
import { ensureOpenClawModelsJson } from "../../agents/models-config.js";
import { coerceImageAssistantText } from "../../agents/tools/image-tool.helpers.js";
@@ -40,7 +40,7 @@ export async function describeImageWithModel(
authStorage.setRuntimeApiKey(model.provider, apiKey);
const base64 = params.buffer.toString("base64");
if (model.provider === "minimax") {
if (isMinimaxVlmModel(model.provider, model.id)) {
const text = await minimaxUnderstandImage({
apiKey,
prompt: params.prompt ?? "Describe the image.",

View File

@@ -24,4 +24,12 @@ describe("media-understanding provider registry", () => {
expect(provider?.id).toBe("moonshot");
expect(provider?.capabilities).toEqual(["image", "video"]);
});
it("registers the minimax portal provider", () => {
const registry = buildMediaUnderstandingRegistry();
const provider = getMediaUnderstandingProvider("minimax-portal", registry);
expect(provider?.id).toBe("minimax-portal");
expect(provider?.capabilities).toEqual(["image"]);
});
});

View File

@@ -4,7 +4,7 @@ import { anthropicProvider } from "./anthropic/index.js";
import { deepgramProvider } from "./deepgram/index.js";
import { googleProvider } from "./google/index.js";
import { groqProvider } from "./groq/index.js";
import { minimaxProvider } from "./minimax/index.js";
import { minimaxPortalProvider, minimaxProvider } from "./minimax/index.js";
import { mistralProvider } from "./mistral/index.js";
import { moonshotProvider } from "./moonshot/index.js";
import { openaiProvider } from "./openai/index.js";
@@ -16,6 +16,7 @@ const PROVIDERS: MediaUnderstandingProvider[] = [
googleProvider,
anthropicProvider,
minimaxProvider,
minimaxPortalProvider,
moonshotProvider,
mistralProvider,
zaiProvider,

View File

@@ -6,3 +6,9 @@ export const minimaxProvider: MediaUnderstandingProvider = {
capabilities: ["image"],
describeImage: describeImageWithModel,
};
export const minimaxPortalProvider: MediaUnderstandingProvider = {
id: "minimax-portal",
capabilities: ["image"],
describeImage: describeImageWithModel,
};