mirror of
https://github.com/openclaw/openclaw.git
synced 2026-05-13 06:30:35 +00:00
feat(media): add moonshot video provider and wiring
Co-authored-by: xiaoyaner0201 <xiaoyaner0201@users.noreply.github.com>
This commit is contained in:
@@ -16,4 +16,12 @@ describe("media-understanding provider registry", () => {
|
||||
|
||||
expect(provider?.id).toBe("google");
|
||||
});
|
||||
|
||||
it("registers the Moonshot provider", () => {
|
||||
const registry = buildMediaUnderstandingRegistry();
|
||||
const provider = getMediaUnderstandingProvider("moonshot", registry);
|
||||
|
||||
expect(provider?.id).toBe("moonshot");
|
||||
expect(provider?.capabilities).toEqual(["image", "video"]);
|
||||
});
|
||||
});
|
||||
|
||||
@@ -6,6 +6,7 @@ import { googleProvider } from "./google/index.js";
|
||||
import { groqProvider } from "./groq/index.js";
|
||||
import { minimaxProvider } from "./minimax/index.js";
|
||||
import { mistralProvider } from "./mistral/index.js";
|
||||
import { moonshotProvider } from "./moonshot/index.js";
|
||||
import { openaiProvider } from "./openai/index.js";
|
||||
import { zaiProvider } from "./zai/index.js";
|
||||
|
||||
@@ -15,6 +16,7 @@ const PROVIDERS: MediaUnderstandingProvider[] = [
|
||||
googleProvider,
|
||||
anthropicProvider,
|
||||
minimaxProvider,
|
||||
moonshotProvider,
|
||||
mistralProvider,
|
||||
zaiProvider,
|
||||
deepgramProvider,
|
||||
|
||||
10
src/media-understanding/providers/moonshot/index.ts
Normal file
10
src/media-understanding/providers/moonshot/index.ts
Normal file
@@ -0,0 +1,10 @@
|
||||
import type { MediaUnderstandingProvider } from "../../types.js";
|
||||
import { describeImageWithModel } from "../image.js";
|
||||
import { describeMoonshotVideo } from "./video.js";
|
||||
|
||||
export const moonshotProvider: MediaUnderstandingProvider = {
|
||||
id: "moonshot",
|
||||
capabilities: ["image", "video"],
|
||||
describeImage: describeImageWithModel,
|
||||
describeVideo: describeMoonshotVideo,
|
||||
};
|
||||
72
src/media-understanding/providers/moonshot/video.test.ts
Normal file
72
src/media-understanding/providers/moonshot/video.test.ts
Normal file
@@ -0,0 +1,72 @@
|
||||
import { describe, expect, it } from "vitest";
|
||||
import {
|
||||
createRequestCaptureJsonFetch,
|
||||
installPinnedHostnameTestHooks,
|
||||
} from "../audio.test-helpers.js";
|
||||
import { describeMoonshotVideo } from "./video.js";
|
||||
|
||||
installPinnedHostnameTestHooks();
|
||||
|
||||
describe("describeMoonshotVideo", () => {
|
||||
it("builds an OpenAI-compatible video request", async () => {
|
||||
const { fetchFn, getRequest } = createRequestCaptureJsonFetch({
|
||||
choices: [{ message: { content: "video ok" } }],
|
||||
});
|
||||
|
||||
const result = await describeMoonshotVideo({
|
||||
buffer: Buffer.from("video-bytes"),
|
||||
fileName: "clip.mp4",
|
||||
apiKey: "moonshot-test",
|
||||
timeoutMs: 1500,
|
||||
baseUrl: "https://api.moonshot.ai/v1/",
|
||||
model: "kimi-k2.5",
|
||||
headers: { "X-Trace": "1" },
|
||||
fetchFn,
|
||||
});
|
||||
const { url, init } = getRequest();
|
||||
|
||||
expect(result.text).toBe("video ok");
|
||||
expect(result.model).toBe("kimi-k2.5");
|
||||
expect(url).toBe("https://api.moonshot.ai/v1/chat/completions");
|
||||
expect(init?.method).toBe("POST");
|
||||
expect(init?.signal).toBeInstanceOf(AbortSignal);
|
||||
|
||||
const headers = new Headers(init?.headers);
|
||||
expect(headers.get("authorization")).toBe("Bearer moonshot-test");
|
||||
expect(headers.get("content-type")).toBe("application/json");
|
||||
expect(headers.get("x-trace")).toBe("1");
|
||||
|
||||
const body = JSON.parse(typeof init?.body === "string" ? init.body : "{}") as {
|
||||
model?: string;
|
||||
messages?: Array<{
|
||||
content?: Array<{ type?: string; text?: string; video_url?: { url?: string } }>;
|
||||
}>;
|
||||
};
|
||||
expect(body.model).toBe("kimi-k2.5");
|
||||
expect(body.messages?.[0]?.content?.[0]).toMatchObject({
|
||||
type: "text",
|
||||
text: "Describe the video.",
|
||||
});
|
||||
expect(body.messages?.[0]?.content?.[1]?.type).toBe("video_url");
|
||||
expect(body.messages?.[0]?.content?.[1]?.video_url?.url).toBe(
|
||||
`data:video/mp4;base64,${Buffer.from("video-bytes").toString("base64")}`,
|
||||
);
|
||||
});
|
||||
|
||||
it("falls back to reasoning_content when content is empty", async () => {
|
||||
const { fetchFn } = createRequestCaptureJsonFetch({
|
||||
choices: [{ message: { content: "", reasoning_content: "reasoned answer" } }],
|
||||
});
|
||||
|
||||
const result = await describeMoonshotVideo({
|
||||
buffer: Buffer.from("video"),
|
||||
fileName: "clip.mp4",
|
||||
apiKey: "moonshot-test",
|
||||
timeoutMs: 1000,
|
||||
fetchFn,
|
||||
});
|
||||
|
||||
expect(result.text).toBe("reasoned answer");
|
||||
expect(result.model).toBe("kimi-k2.5");
|
||||
});
|
||||
});
|
||||
109
src/media-understanding/providers/moonshot/video.ts
Normal file
109
src/media-understanding/providers/moonshot/video.ts
Normal file
@@ -0,0 +1,109 @@
|
||||
import type { VideoDescriptionRequest, VideoDescriptionResult } from "../../types.js";
|
||||
import { assertOkOrThrowHttpError, fetchWithTimeoutGuarded, normalizeBaseUrl } from "../shared.js";
|
||||
|
||||
export const DEFAULT_MOONSHOT_VIDEO_BASE_URL = "https://api.moonshot.ai/v1";
|
||||
const DEFAULT_MOONSHOT_VIDEO_MODEL = "kimi-k2.5";
|
||||
const DEFAULT_MOONSHOT_VIDEO_PROMPT = "Describe the video.";
|
||||
|
||||
type MoonshotVideoPayload = {
|
||||
choices?: Array<{
|
||||
message?: {
|
||||
content?: string | Array<{ text?: string }>;
|
||||
reasoning_content?: string;
|
||||
};
|
||||
}>;
|
||||
};
|
||||
|
||||
function resolveModel(model?: string): string {
|
||||
const trimmed = model?.trim();
|
||||
return trimmed || DEFAULT_MOONSHOT_VIDEO_MODEL;
|
||||
}
|
||||
|
||||
function resolvePrompt(prompt?: string): string {
|
||||
const trimmed = prompt?.trim();
|
||||
return trimmed || DEFAULT_MOONSHOT_VIDEO_PROMPT;
|
||||
}
|
||||
|
||||
function coerceMoonshotText(payload: MoonshotVideoPayload): string | null {
|
||||
const message = payload.choices?.[0]?.message;
|
||||
if (!message) {
|
||||
return null;
|
||||
}
|
||||
if (typeof message.content === "string" && message.content.trim()) {
|
||||
return message.content.trim();
|
||||
}
|
||||
if (Array.isArray(message.content)) {
|
||||
const text = message.content
|
||||
.map((part) => (typeof part.text === "string" ? part.text.trim() : ""))
|
||||
.filter(Boolean)
|
||||
.join("\n")
|
||||
.trim();
|
||||
if (text) {
|
||||
return text;
|
||||
}
|
||||
}
|
||||
if (typeof message.reasoning_content === "string" && message.reasoning_content.trim()) {
|
||||
return message.reasoning_content.trim();
|
||||
}
|
||||
return null;
|
||||
}
|
||||
|
||||
export async function describeMoonshotVideo(
|
||||
params: VideoDescriptionRequest,
|
||||
): Promise<VideoDescriptionResult> {
|
||||
const fetchFn = params.fetchFn ?? fetch;
|
||||
const baseUrl = normalizeBaseUrl(params.baseUrl, DEFAULT_MOONSHOT_VIDEO_BASE_URL);
|
||||
const model = resolveModel(params.model);
|
||||
const mime = params.mime ?? "video/mp4";
|
||||
const prompt = resolvePrompt(params.prompt);
|
||||
const url = `${baseUrl}/chat/completions`;
|
||||
|
||||
const headers = new Headers(params.headers);
|
||||
if (!headers.has("content-type")) {
|
||||
headers.set("content-type", "application/json");
|
||||
}
|
||||
if (!headers.has("authorization")) {
|
||||
headers.set("authorization", `Bearer ${params.apiKey}`);
|
||||
}
|
||||
|
||||
const body = {
|
||||
model,
|
||||
messages: [
|
||||
{
|
||||
role: "user",
|
||||
content: [
|
||||
{ type: "text", text: prompt },
|
||||
{
|
||||
type: "video_url",
|
||||
video_url: {
|
||||
url: `data:${mime};base64,${params.buffer.toString("base64")}`,
|
||||
},
|
||||
},
|
||||
],
|
||||
},
|
||||
],
|
||||
};
|
||||
|
||||
const { response: res, release } = await fetchWithTimeoutGuarded(
|
||||
url,
|
||||
{
|
||||
method: "POST",
|
||||
headers,
|
||||
body: JSON.stringify(body),
|
||||
},
|
||||
params.timeoutMs,
|
||||
fetchFn,
|
||||
);
|
||||
|
||||
try {
|
||||
await assertOkOrThrowHttpError(res, "Moonshot video description failed");
|
||||
const payload = (await res.json()) as MoonshotVideoPayload;
|
||||
const text = coerceMoonshotText(payload);
|
||||
if (!text) {
|
||||
throw new Error("Moonshot video description response missing content");
|
||||
}
|
||||
return { text, model };
|
||||
} finally {
|
||||
await release();
|
||||
}
|
||||
}
|
||||
Reference in New Issue
Block a user