mirror of
https://github.com/openclaw/openclaw.git
synced 2026-05-08 03:41:22 +00:00
fix(models): support minimax-portal coding plan vlm routing for image tool (openclaw#33953)
Verified: - pnpm install --frozen-lockfile - pnpm build - pnpm check - pnpm test:macmini Co-authored-by: tars90percent <252094836+tars90percent@users.noreply.github.com>
This commit is contained in:
@@ -1,8 +1,10 @@
|
||||
import { describe, expect, it } from "vitest";
|
||||
import {
|
||||
AUTO_AUDIO_KEY_PROVIDERS,
|
||||
AUTO_IMAGE_KEY_PROVIDERS,
|
||||
AUTO_VIDEO_KEY_PROVIDERS,
|
||||
DEFAULT_AUDIO_MODELS,
|
||||
DEFAULT_IMAGE_MODELS,
|
||||
} from "./defaults.js";
|
||||
|
||||
describe("DEFAULT_AUDIO_MODELS", () => {
|
||||
@@ -22,3 +24,15 @@ describe("AUTO_VIDEO_KEY_PROVIDERS", () => {
|
||||
expect(AUTO_VIDEO_KEY_PROVIDERS).toContain("moonshot");
|
||||
});
|
||||
});
|
||||
|
||||
describe("AUTO_IMAGE_KEY_PROVIDERS", () => {
|
||||
it("includes minimax-portal auto key resolution", () => {
|
||||
expect(AUTO_IMAGE_KEY_PROVIDERS).toContain("minimax-portal");
|
||||
});
|
||||
});
|
||||
|
||||
describe("DEFAULT_IMAGE_MODELS", () => {
|
||||
it("includes the MiniMax portal vision default", () => {
|
||||
expect(DEFAULT_IMAGE_MODELS["minimax-portal"]).toBe("MiniMax-VL-01");
|
||||
});
|
||||
});
|
||||
|
||||
@@ -46,6 +46,7 @@ export const AUTO_IMAGE_KEY_PROVIDERS = [
|
||||
"anthropic",
|
||||
"google",
|
||||
"minimax",
|
||||
"minimax-portal",
|
||||
"zai",
|
||||
] as const;
|
||||
export const AUTO_VIDEO_KEY_PROVIDERS = ["google", "moonshot"] as const;
|
||||
@@ -54,6 +55,7 @@ export const DEFAULT_IMAGE_MODELS: Record<string, string> = {
|
||||
anthropic: "claude-opus-4-6",
|
||||
google: "gemini-3-flash-preview",
|
||||
minimax: "MiniMax-VL-01",
|
||||
"minimax-portal": "MiniMax-VL-01",
|
||||
zai: "glm-4.6v",
|
||||
};
|
||||
export const CLI_OUTPUT_MAX_BUFFER = 5 * MB;
|
||||
|
||||
133
src/media-understanding/providers/image.test.ts
Normal file
133
src/media-understanding/providers/image.test.ts
Normal file
@@ -0,0 +1,133 @@
|
||||
import { beforeEach, describe, expect, it, vi } from "vitest";
|
||||
|
||||
const completeMock = vi.fn();
|
||||
const minimaxUnderstandImageMock = vi.fn();
|
||||
const ensureOpenClawModelsJsonMock = vi.fn(async () => {});
|
||||
const getApiKeyForModelMock = vi.fn(async () => ({
|
||||
apiKey: "oauth-test",
|
||||
source: "test",
|
||||
mode: "oauth",
|
||||
}));
|
||||
const requireApiKeyMock = vi.fn((auth: { apiKey?: string }) => auth.apiKey ?? "");
|
||||
const setRuntimeApiKeyMock = vi.fn();
|
||||
const discoverModelsMock = vi.fn();
|
||||
|
||||
vi.mock("@mariozechner/pi-ai", async (importOriginal) => {
|
||||
const actual = await importOriginal<typeof import("@mariozechner/pi-ai")>();
|
||||
return {
|
||||
...actual,
|
||||
complete: completeMock,
|
||||
};
|
||||
});
|
||||
|
||||
vi.mock("../../agents/minimax-vlm.js", () => ({
|
||||
isMinimaxVlmProvider: (provider: string) =>
|
||||
provider === "minimax" || provider === "minimax-portal",
|
||||
isMinimaxVlmModel: (provider: string, modelId: string) =>
|
||||
(provider === "minimax" || provider === "minimax-portal") && modelId === "MiniMax-VL-01",
|
||||
minimaxUnderstandImage: minimaxUnderstandImageMock,
|
||||
}));
|
||||
|
||||
vi.mock("../../agents/models-config.js", () => ({
|
||||
ensureOpenClawModelsJson: ensureOpenClawModelsJsonMock,
|
||||
}));
|
||||
|
||||
vi.mock("../../agents/model-auth.js", () => ({
|
||||
getApiKeyForModel: getApiKeyForModelMock,
|
||||
requireApiKey: requireApiKeyMock,
|
||||
}));
|
||||
|
||||
vi.mock("../../agents/pi-model-discovery-runtime.js", () => ({
|
||||
discoverAuthStorage: () => ({
|
||||
setRuntimeApiKey: setRuntimeApiKeyMock,
|
||||
}),
|
||||
discoverModels: discoverModelsMock,
|
||||
}));
|
||||
|
||||
describe("describeImageWithModel", () => {
|
||||
beforeEach(() => {
|
||||
vi.clearAllMocks();
|
||||
minimaxUnderstandImageMock.mockResolvedValue("portal ok");
|
||||
discoverModelsMock.mockReturnValue({
|
||||
find: vi.fn(() => ({
|
||||
provider: "minimax-portal",
|
||||
id: "MiniMax-VL-01",
|
||||
input: ["text", "image"],
|
||||
baseUrl: "https://api.minimax.io/anthropic",
|
||||
})),
|
||||
});
|
||||
});
|
||||
|
||||
it("routes minimax-portal image models through the MiniMax VLM endpoint", async () => {
|
||||
const { describeImageWithModel } = await import("./image.js");
|
||||
|
||||
const result = await describeImageWithModel({
|
||||
cfg: {},
|
||||
agentDir: "/tmp/openclaw-agent",
|
||||
provider: "minimax-portal",
|
||||
model: "MiniMax-VL-01",
|
||||
buffer: Buffer.from("png-bytes"),
|
||||
fileName: "image.png",
|
||||
mime: "image/png",
|
||||
prompt: "Describe the image.",
|
||||
timeoutMs: 1000,
|
||||
});
|
||||
|
||||
expect(result).toEqual({
|
||||
text: "portal ok",
|
||||
model: "MiniMax-VL-01",
|
||||
});
|
||||
expect(ensureOpenClawModelsJsonMock).toHaveBeenCalled();
|
||||
expect(getApiKeyForModelMock).toHaveBeenCalled();
|
||||
expect(requireApiKeyMock).toHaveBeenCalled();
|
||||
expect(setRuntimeApiKeyMock).toHaveBeenCalledWith("minimax-portal", "oauth-test");
|
||||
expect(minimaxUnderstandImageMock).toHaveBeenCalledWith({
|
||||
apiKey: "oauth-test",
|
||||
prompt: "Describe the image.",
|
||||
imageDataUrl: `data:image/png;base64,${Buffer.from("png-bytes").toString("base64")}`,
|
||||
modelBaseUrl: "https://api.minimax.io/anthropic",
|
||||
});
|
||||
expect(completeMock).not.toHaveBeenCalled();
|
||||
});
|
||||
|
||||
it("uses generic completion for non-canonical minimax-portal image models", async () => {
|
||||
discoverModelsMock.mockReturnValue({
|
||||
find: vi.fn(() => ({
|
||||
provider: "minimax-portal",
|
||||
id: "custom-vision",
|
||||
input: ["text", "image"],
|
||||
baseUrl: "https://api.minimax.io/anthropic",
|
||||
})),
|
||||
});
|
||||
completeMock.mockResolvedValue({
|
||||
role: "assistant",
|
||||
api: "anthropic-messages",
|
||||
provider: "minimax-portal",
|
||||
model: "custom-vision",
|
||||
stopReason: "stop",
|
||||
timestamp: Date.now(),
|
||||
content: [{ type: "text", text: "generic ok" }],
|
||||
});
|
||||
|
||||
const { describeImageWithModel } = await import("./image.js");
|
||||
|
||||
const result = await describeImageWithModel({
|
||||
cfg: {},
|
||||
agentDir: "/tmp/openclaw-agent",
|
||||
provider: "minimax-portal",
|
||||
model: "custom-vision",
|
||||
buffer: Buffer.from("png-bytes"),
|
||||
fileName: "image.png",
|
||||
mime: "image/png",
|
||||
prompt: "Describe the image.",
|
||||
timeoutMs: 1000,
|
||||
});
|
||||
|
||||
expect(result).toEqual({
|
||||
text: "generic ok",
|
||||
model: "custom-vision",
|
||||
});
|
||||
expect(completeMock).toHaveBeenCalledOnce();
|
||||
expect(minimaxUnderstandImageMock).not.toHaveBeenCalled();
|
||||
});
|
||||
});
|
||||
@@ -1,6 +1,6 @@
|
||||
import type { Api, Context, Model } from "@mariozechner/pi-ai";
|
||||
import { complete } from "@mariozechner/pi-ai";
|
||||
import { minimaxUnderstandImage } from "../../agents/minimax-vlm.js";
|
||||
import { isMinimaxVlmModel, minimaxUnderstandImage } from "../../agents/minimax-vlm.js";
|
||||
import { getApiKeyForModel, requireApiKey } from "../../agents/model-auth.js";
|
||||
import { ensureOpenClawModelsJson } from "../../agents/models-config.js";
|
||||
import { coerceImageAssistantText } from "../../agents/tools/image-tool.helpers.js";
|
||||
@@ -40,7 +40,7 @@ export async function describeImageWithModel(
|
||||
authStorage.setRuntimeApiKey(model.provider, apiKey);
|
||||
|
||||
const base64 = params.buffer.toString("base64");
|
||||
if (model.provider === "minimax") {
|
||||
if (isMinimaxVlmModel(model.provider, model.id)) {
|
||||
const text = await minimaxUnderstandImage({
|
||||
apiKey,
|
||||
prompt: params.prompt ?? "Describe the image.",
|
||||
|
||||
@@ -24,4 +24,12 @@ describe("media-understanding provider registry", () => {
|
||||
expect(provider?.id).toBe("moonshot");
|
||||
expect(provider?.capabilities).toEqual(["image", "video"]);
|
||||
});
|
||||
|
||||
it("registers the minimax portal provider", () => {
|
||||
const registry = buildMediaUnderstandingRegistry();
|
||||
const provider = getMediaUnderstandingProvider("minimax-portal", registry);
|
||||
|
||||
expect(provider?.id).toBe("minimax-portal");
|
||||
expect(provider?.capabilities).toEqual(["image"]);
|
||||
});
|
||||
});
|
||||
|
||||
@@ -4,7 +4,7 @@ import { anthropicProvider } from "./anthropic/index.js";
|
||||
import { deepgramProvider } from "./deepgram/index.js";
|
||||
import { googleProvider } from "./google/index.js";
|
||||
import { groqProvider } from "./groq/index.js";
|
||||
import { minimaxProvider } from "./minimax/index.js";
|
||||
import { minimaxPortalProvider, minimaxProvider } from "./minimax/index.js";
|
||||
import { mistralProvider } from "./mistral/index.js";
|
||||
import { moonshotProvider } from "./moonshot/index.js";
|
||||
import { openaiProvider } from "./openai/index.js";
|
||||
@@ -16,6 +16,7 @@ const PROVIDERS: MediaUnderstandingProvider[] = [
|
||||
googleProvider,
|
||||
anthropicProvider,
|
||||
minimaxProvider,
|
||||
minimaxPortalProvider,
|
||||
moonshotProvider,
|
||||
mistralProvider,
|
||||
zaiProvider,
|
||||
|
||||
@@ -6,3 +6,9 @@ export const minimaxProvider: MediaUnderstandingProvider = {
|
||||
capabilities: ["image"],
|
||||
describeImage: describeImageWithModel,
|
||||
};
|
||||
|
||||
export const minimaxPortalProvider: MediaUnderstandingProvider = {
|
||||
id: "minimax-portal",
|
||||
capabilities: ["image"],
|
||||
describeImage: describeImageWithModel,
|
||||
};
|
||||
|
||||
Reference in New Issue
Block a user