feat: Provider/Mistral full support for Mistral on OpenClaw 🇫🇷 (#23845)

* Onboard: add Mistral auth choice and CLI flags * Onboard/Auth: add Mistral provider config defaults * Auth choice: wire Mistral API-key flow * Onboard non-interactive: support --mistral-api-key * Media understanding: add Mistral Voxtral audio provider * Changelog: note Mistral onboarding and media support * Docs: add Mistral provider and onboarding/media references * Tests: cover Mistral media registry/defaults and auth mapping * Memory: add Mistral embeddings provider support * Onboarding: refresh Mistral model metadata * Docs: document Mistral embeddings and endpoints * Memory: persist Mistral embedding client state in managers * Memory: add regressions for mistral provider wiring * Gateway: add live tool probe retry helper * Gateway: cover live tool probe retry helper * Gateway: retry malformed live tool-read probe responses * Memory: support plain-text batch error bodies * Tests: add Mistral Voxtral live transcription smoke * Docs: add Mistral live audio test command * Revert: remove Mistral live voice test and docs entry * Onboard: re-export Mistral default model ref from models * Changelog: credit joeVenner for Mistral work * fix: include Mistral in auto audio key fallback * Update CHANGELOG.md * Update CHANGELOG.md --------- Co-authored-by: Shakker <shakkerdroid@gmail.com>
2026-05-07 16:11:25 +00:00 · 2026-02-22 19:03:56 -05:00
parent a66b98a9da
commit d92ba4f8aa
55 changed files with 996 additions and 66 deletions
--- a/src/media-understanding/defaults.test.ts
+++ b/src/media-understanding/defaults.test.ts
@@ -0,0 +1,14 @@
+import { describe, expect, it } from "vitest";
+import { AUTO_AUDIO_KEY_PROVIDERS, DEFAULT_AUDIO_MODELS } from "./defaults.js";
+
+describe("DEFAULT_AUDIO_MODELS", () => {
+  it("includes Mistral Voxtral default", () => {
+    expect(DEFAULT_AUDIO_MODELS.mistral).toBe("voxtral-mini-latest");
+  });
+});
+
+describe("AUTO_AUDIO_KEY_PROVIDERS", () => {
+  it("includes mistral auto key resolution", () => {
+    expect(AUTO_AUDIO_KEY_PROVIDERS).toContain("mistral");
+  });
+});
--- a/src/media-understanding/defaults.ts
+++ b/src/media-understanding/defaults.ts
@@ -31,9 +31,16 @@ export const DEFAULT_AUDIO_MODELS: Record<string, string> = {
  groq: "whisper-large-v3-turbo",
  openai: "gpt-4o-mini-transcribe",
  deepgram: "nova-3",
+  mistral: "voxtral-mini-latest",
 };

-export const AUTO_AUDIO_KEY_PROVIDERS = ["openai", "groq", "deepgram", "google"] as const;
+export const AUTO_AUDIO_KEY_PROVIDERS = [
+  "openai",
+  "groq",
+  "deepgram",
+  "google",
+  "mistral",
+] as const;
 export const AUTO_IMAGE_KEY_PROVIDERS = [
  "openai",
  "anthropic",
--- a/src/media-understanding/providers/index.test.ts
+++ b/src/media-understanding/providers/index.test.ts
@@ -0,0 +1,19 @@
+import { describe, expect, it } from "vitest";
+import { buildMediaUnderstandingRegistry, getMediaUnderstandingProvider } from "./index.js";
+
+describe("media-understanding provider registry", () => {
+  it("registers the Mistral provider", () => {
+    const registry = buildMediaUnderstandingRegistry();
+    const provider = getMediaUnderstandingProvider("mistral", registry);
+
+    expect(provider?.id).toBe("mistral");
+    expect(provider?.capabilities).toEqual(["audio"]);
+  });
+
+  it("keeps provider id normalization behavior", () => {
+    const registry = buildMediaUnderstandingRegistry();
+    const provider = getMediaUnderstandingProvider("gemini", registry);
+
+    expect(provider?.id).toBe("google");
+  });
+});
--- a/src/media-understanding/providers/index.ts
+++ b/src/media-understanding/providers/index.ts
@@ -5,6 +5,7 @@ import { deepgramProvider } from "./deepgram/index.js";
 import { googleProvider } from "./google/index.js";
 import { groqProvider } from "./groq/index.js";
 import { minimaxProvider } from "./minimax/index.js";
+import { mistralProvider } from "./mistral/index.js";
 import { openaiProvider } from "./openai/index.js";
 import { zaiProvider } from "./zai/index.js";

@@ -14,6 +15,7 @@ const PROVIDERS: MediaUnderstandingProvider[] = [
  googleProvider,
  anthropicProvider,
  minimaxProvider,
+  mistralProvider,
  zaiProvider,
  deepgramProvider,
 ];
--- a/src/media-understanding/providers/mistral/index.test.ts
+++ b/src/media-understanding/providers/mistral/index.test.ts
@@ -0,0 +1,46 @@
+import { describe, expect, it } from "vitest";
+import {
+  createRequestCaptureJsonFetch,
+  installPinnedHostnameTestHooks,
+} from "../audio.test-helpers.js";
+import { mistralProvider } from "./index.js";
+
+installPinnedHostnameTestHooks();
+
+describe("mistralProvider", () => {
+  it("has expected provider metadata", () => {
+    expect(mistralProvider.id).toBe("mistral");
+    expect(mistralProvider.capabilities).toEqual(["audio"]);
+    expect(mistralProvider.transcribeAudio).toBeDefined();
+  });
+
+  it("uses Mistral base URL by default", async () => {
+    const { fetchFn, getRequest } = createRequestCaptureJsonFetch({ text: "bonjour" });
+
+    const result = await mistralProvider.transcribeAudio!({
+      buffer: Buffer.from("audio-bytes"),
+      fileName: "voice.ogg",
+      apiKey: "test-mistral-key",
+      timeoutMs: 5000,
+      fetchFn,
+    });
+
+    expect(getRequest().url).toBe("https://api.mistral.ai/v1/audio/transcriptions");
+    expect(result.text).toBe("bonjour");
+  });
+
+  it("allows overriding baseUrl", async () => {
+    const { fetchFn, getRequest } = createRequestCaptureJsonFetch({ text: "ok" });
+
+    await mistralProvider.transcribeAudio!({
+      buffer: Buffer.from("audio"),
+      fileName: "note.mp3",
+      apiKey: "key",
+      timeoutMs: 1000,
+      baseUrl: "https://custom.mistral.example/v1",
+      fetchFn,
+    });
+
+    expect(getRequest().url).toBe("https://custom.mistral.example/v1/audio/transcriptions");
+  });
+});
--- a/src/media-understanding/providers/mistral/index.ts
+++ b/src/media-understanding/providers/mistral/index.ts
@@ -0,0 +1,14 @@
+import type { MediaUnderstandingProvider } from "../../types.js";
+import { transcribeOpenAiCompatibleAudio } from "../openai/audio.js";
+
+const DEFAULT_MISTRAL_AUDIO_BASE_URL = "https://api.mistral.ai/v1";
+
+export const mistralProvider: MediaUnderstandingProvider = {
+  id: "mistral",
+  capabilities: ["audio"],
+  transcribeAudio: (req) =>
+    transcribeOpenAiCompatibleAudio({
+      ...req,
+      baseUrl: req.baseUrl ?? DEFAULT_MISTRAL_AUDIO_BASE_URL,
+    }),
+};
--- a/src/media-understanding/runner.auto-audio.test.ts
+++ b/src/media-understanding/runner.auto-audio.test.ts
@@ -107,4 +107,55 @@ describe("runCapability auto audio entries", () => {
    expect(result.outputs[0]?.text).toBe("ok");
    expect(seenModel).toBe("whisper-1");
  });
+
+  it("uses mistral when only mistral key is configured", async () => {
+    let runResult: Awaited<ReturnType<typeof runCapability>> | undefined;
+    await withAudioFixture("openclaw-auto-audio-mistral", async ({ ctx, media, cache }) => {
+      const providerRegistry = buildProviderRegistry({
+        openai: {
+          id: "openai",
+          capabilities: ["audio"],
+          transcribeAudio: async () => ({ text: "openai", model: "gpt-4o-mini-transcribe" }),
+        },
+        mistral: {
+          id: "mistral",
+          capabilities: ["audio"],
+          transcribeAudio: async (req) => ({ text: "mistral", model: req.model ?? "unknown" }),
+        },
+      });
+      const cfg = {
+        models: {
+          providers: {
+            mistral: {
+              apiKey: "mistral-test-key",
+              models: [],
+            },
+          },
+        },
+        tools: {
+          media: {
+            audio: {
+              enabled: true,
+            },
+          },
+        },
+      } as unknown as OpenClawConfig;
+
+      runResult = await runCapability({
+        capability: "audio",
+        cfg,
+        ctx,
+        attachments: cache,
+        media,
+        providerRegistry,
+      });
+    });
+    if (!runResult) {
+      throw new Error("Expected auto audio mistral result");
+    }
+    expect(runResult.decision.outcome).toBe("success");
+    expect(runResult.outputs[0]?.provider).toBe("mistral");
+    expect(runResult.outputs[0]?.model).toBe("voxtral-mini-latest");
+    expect(runResult.outputs[0]?.text).toBe("mistral");
+  });
 });