feat(agents) : Hugging Face Inference provider first-class support and Together API fix and Direct Injection Refactor Auths [AI-assisted] (#13472)

* initial commit * removes assesment from docs * resolves automated review comments * resolves lint , type , tests , refactors , and submits * solves : why do we have to lint the tests xD * adds greptile fixes * solves a type error * solves a ci error * refactors auths * solves a failing test after i pulled from main lol * solves a failing test after i pulled from main lol * resolves token naming issue to comply with better practices when using hf / huggingface * fixes curly lints ! * fixes failing tests for google api from main * solve merge conflicts * solve failing tests with a defensive check 'undefined' openrouterapi key * fix: preserve Hugging Face auth-choice intent and token behavior (#13472) (thanks @Josephrp) * test: resolve auth-choice cherry-pick conflict cleanup (#13472) --------- Co-authored-by: Cursor <cursoragent@cursor.com> Co-authored-by: Peter Steinberger <steipete@gmail.com>
2026-05-08 03:41:22 +00:00 · 2026-02-13 16:18:16 +01:00
parent e50ce897b0
commit 08b7932df0
27 changed files with 1617 additions and 355 deletions
--- a/src/agents/huggingface-models.test.ts
+++ b/src/agents/huggingface-models.test.ts
@@ -0,0 +1,44 @@
+import { describe, expect, it } from "vitest";
+import {
+  discoverHuggingfaceModels,
+  HUGGINGFACE_MODEL_CATALOG,
+  buildHuggingfaceModelDefinition,
+  isHuggingfacePolicyLocked,
+} from "./huggingface-models.js";
+
+describe("huggingface-models", () => {
+  it("buildHuggingfaceModelDefinition returns config with required fields", () => {
+    const entry = HUGGINGFACE_MODEL_CATALOG[0];
+    const def = buildHuggingfaceModelDefinition(entry);
+    expect(def.id).toBe(entry.id);
+    expect(def.name).toBe(entry.name);
+    expect(def.reasoning).toBe(entry.reasoning);
+    expect(def.input).toEqual(entry.input);
+    expect(def.cost).toEqual(entry.cost);
+    expect(def.contextWindow).toBe(entry.contextWindow);
+    expect(def.maxTokens).toBe(entry.maxTokens);
+  });
+
+  it("discoverHuggingfaceModels returns static catalog when apiKey is empty", async () => {
+    const models = await discoverHuggingfaceModels("");
+    expect(models).toHaveLength(HUGGINGFACE_MODEL_CATALOG.length);
+    expect(models.map((m) => m.id)).toEqual(HUGGINGFACE_MODEL_CATALOG.map((m) => m.id));
+  });
+
+  it("discoverHuggingfaceModels returns static catalog in test env (VITEST)", async () => {
+    const models = await discoverHuggingfaceModels("hf_test_token");
+    expect(models).toHaveLength(HUGGINGFACE_MODEL_CATALOG.length);
+    expect(models[0].id).toBe("deepseek-ai/DeepSeek-R1");
+  });
+
+  describe("isHuggingfacePolicyLocked", () => {
+    it("returns true for :cheapest and :fastest refs", () => {
+      expect(isHuggingfacePolicyLocked("huggingface/deepseek-ai/DeepSeek-R1:cheapest")).toBe(true);
+      expect(isHuggingfacePolicyLocked("huggingface/deepseek-ai/DeepSeek-R1:fastest")).toBe(true);
+    });
+    it("returns false for base ref and :provider refs", () => {
+      expect(isHuggingfacePolicyLocked("huggingface/deepseek-ai/DeepSeek-R1")).toBe(false);
+      expect(isHuggingfacePolicyLocked("huggingface/foo:together")).toBe(false);
+    });
+  });
+});
--- a/src/agents/huggingface-models.ts
+++ b/src/agents/huggingface-models.ts
@@ -0,0 +1,229 @@
+import type { ModelDefinitionConfig } from "../config/types.models.js";
+
+/** Hugging Face Inference Providers (router) — OpenAI-compatible chat completions. */
+export const HUGGINGFACE_BASE_URL = "https://router.huggingface.co/v1";
+
+/** Router policy suffixes: router picks backend by cost or speed; no specific provider selection. */
+export const HUGGINGFACE_POLICY_SUFFIXES = ["cheapest", "fastest"] as const;
+
+/**
+ * True when the model ref uses :cheapest or :fastest. When true, provider choice is locked
+ * (router decides); do not show an interactive "prefer specific backend" option.
+ */
+export function isHuggingfacePolicyLocked(modelRef: string): boolean {
+  const ref = String(modelRef).trim();
+  return HUGGINGFACE_POLICY_SUFFIXES.some((s) => ref.endsWith(`:${s}`) || ref === s);
+}
+
+/** Default cost when not in static catalog (HF pricing varies by provider). */
+const HUGGINGFACE_DEFAULT_COST = {
+  input: 0,
+  output: 0,
+  cacheRead: 0,
+  cacheWrite: 0,
+};
+
+/** Defaults for models discovered from GET /v1/models. */
+const HUGGINGFACE_DEFAULT_CONTEXT_WINDOW = 131072;
+const HUGGINGFACE_DEFAULT_MAX_TOKENS = 8192;
+
+/**
+ * Shape of a single model entry from GET https://router.huggingface.co/v1/models.
+ * Aligned with the Inference Providers API response (object, data[].id, owned_by, architecture, providers).
+ */
+interface HFModelEntry {
+  id: string;
+  object?: string;
+  created?: number;
+  /** Organisation that owns the model (e.g. "Qwen", "deepseek-ai"). Used for display when name/title absent. */
+  owned_by?: string;
+  /** Display name from API when present (not all responses include this). */
+  name?: string;
+  title?: string;
+  display_name?: string;
+  /** Input/output modalities; we use input_modalities for ModelDefinitionConfig.input. */
+  architecture?: {
+    input_modalities?: string[];
+    output_modalities?: string[];
+    [key: string]: unknown;
+  };
+  /** Backend providers; we use the first provider with context_length when available. */
+  providers?: Array<{
+    provider?: string;
+    context_length?: number;
+    status?: string;
+    pricing?: { input?: number; output?: number; [key: string]: unknown };
+    [key: string]: unknown;
+  }>;
+  [key: string]: unknown;
+}
+
+/** Response shape from GET https://router.huggingface.co/v1/models (OpenAI-style list). */
+interface OpenAIListModelsResponse {
+  object?: string;
+  data?: HFModelEntry[];
+}
+
+export const HUGGINGFACE_MODEL_CATALOG: ModelDefinitionConfig[] = [
+  {
+    id: "deepseek-ai/DeepSeek-R1",
+    name: "DeepSeek R1",
+    reasoning: true,
+    input: ["text"],
+    contextWindow: 131072,
+    maxTokens: 8192,
+    cost: { input: 3.0, output: 7.0, cacheRead: 3.0, cacheWrite: 3.0 },
+  },
+  {
+    id: "deepseek-ai/DeepSeek-V3.1",
+    name: "DeepSeek V3.1",
+    reasoning: false,
+    input: ["text"],
+    contextWindow: 131072,
+    maxTokens: 8192,
+    cost: { input: 0.6, output: 1.25, cacheRead: 0.6, cacheWrite: 0.6 },
+  },
+  {
+    id: "meta-llama/Llama-3.3-70B-Instruct-Turbo",
+    name: "Llama 3.3 70B Instruct Turbo",
+    reasoning: false,
+    input: ["text"],
+    contextWindow: 131072,
+    maxTokens: 8192,
+    cost: { input: 0.88, output: 0.88, cacheRead: 0.88, cacheWrite: 0.88 },
+  },
+  {
+    id: "openai/gpt-oss-120b",
+    name: "GPT-OSS 120B",
+    reasoning: false,
+    input: ["text"],
+    contextWindow: 131072,
+    maxTokens: 8192,
+    cost: { input: 0, output: 0, cacheRead: 0, cacheWrite: 0 },
+  },
+];
+
+export function buildHuggingfaceModelDefinition(
+  model: (typeof HUGGINGFACE_MODEL_CATALOG)[number],
+): ModelDefinitionConfig {
+  return {
+    id: model.id,
+    name: model.name,
+    reasoning: model.reasoning,
+    input: model.input,
+    cost: model.cost,
+    contextWindow: model.contextWindow,
+    maxTokens: model.maxTokens,
+  };
+}
+
+/**
+ * Infer reasoning and display name from Hub-style model id (e.g. "deepseek-ai/DeepSeek-R1").
+ */
+function inferredMetaFromModelId(id: string): { name: string; reasoning: boolean } {
+  const base = id.split("/").pop() ?? id;
+  const reasoning = /r1|reasoning|thinking|reason/i.test(id) || /-\d+[tb]?-thinking/i.test(base);
+  const name = base.replace(/-/g, " ").replace(/\b(\w)/g, (c) => c.toUpperCase());
+  return { name, reasoning };
+}
+
+/** Prefer API-supplied display name, then owned_by/id, then inferred from id. */
+function displayNameFromApiEntry(entry: HFModelEntry, inferredName: string): string {
+  const fromApi =
+    (typeof entry.name === "string" && entry.name.trim()) ||
+    (typeof entry.title === "string" && entry.title.trim()) ||
+    (typeof entry.display_name === "string" && entry.display_name.trim());
+  if (fromApi) {
+    return fromApi;
+  }
+  if (typeof entry.owned_by === "string" && entry.owned_by.trim()) {
+    const base = entry.id.split("/").pop() ?? entry.id;
+    return `${entry.owned_by.trim()}/${base}`;
+  }
+  return inferredName;
+}
+
+/**
+ * Discover chat-completion models from Hugging Face Inference Providers (GET /v1/models).
+ * Requires a valid HF token. Falls back to static catalog on failure or in test env.
+ */
+export async function discoverHuggingfaceModels(apiKey: string): Promise<ModelDefinitionConfig[]> {
+  if (process.env.VITEST === "true" || process.env.NODE_ENV === "test") {
+    return HUGGINGFACE_MODEL_CATALOG.map(buildHuggingfaceModelDefinition);
+  }
+
+  const trimmedKey = apiKey?.trim();
+  if (!trimmedKey) {
+    return HUGGINGFACE_MODEL_CATALOG.map(buildHuggingfaceModelDefinition);
+  }
+
+  try {
+    // GET https://router.huggingface.co/v1/models — response: { object, data: [{ id, owned_by, architecture: { input_modalities }, providers: [{ provider, context_length?, pricing? }] }] }. POST /v1/chat/completions requires Authorization.
+    const response = await fetch(`${HUGGINGFACE_BASE_URL}/models`, {
+      signal: AbortSignal.timeout(10_000),
+      headers: {
+        Authorization: `Bearer ${trimmedKey}`,
+        "Content-Type": "application/json",
+      },
+    });
+
+    if (!response.ok) {
+      console.warn(
+        `[huggingface-models] GET /v1/models failed: HTTP ${response.status}, using static catalog`,
+      );
+      return HUGGINGFACE_MODEL_CATALOG.map(buildHuggingfaceModelDefinition);
+    }
+
+    const body = (await response.json()) as OpenAIListModelsResponse;
+    const data = body?.data;
+    if (!Array.isArray(data) || data.length === 0) {
+      console.warn("[huggingface-models] No models in response, using static catalog");
+      return HUGGINGFACE_MODEL_CATALOG.map(buildHuggingfaceModelDefinition);
+    }
+
+    const catalogById = new Map(HUGGINGFACE_MODEL_CATALOG.map((m) => [m.id, m] as const));
+    const seen = new Set<string>();
+    const models: ModelDefinitionConfig[] = [];
+
+    for (const entry of data) {
+      const id = typeof entry?.id === "string" ? entry.id.trim() : "";
+      if (!id || seen.has(id)) {
+        continue;
+      }
+      seen.add(id);
+
+      const catalogEntry = catalogById.get(id);
+      if (catalogEntry) {
+        models.push(buildHuggingfaceModelDefinition(catalogEntry));
+      } else {
+        const inferred = inferredMetaFromModelId(id);
+        const name = displayNameFromApiEntry(entry, inferred.name);
+        const modalities = entry.architecture?.input_modalities;
+        const input: Array<"text" | "image"> =
+          Array.isArray(modalities) && modalities.includes("image") ? ["text", "image"] : ["text"];
+        const providers = Array.isArray(entry.providers) ? entry.providers : [];
+        const providerWithContext = providers.find(
+          (p) => typeof p?.context_length === "number" && p.context_length > 0,
+        );
+        const contextLength =
+          providerWithContext?.context_length ?? HUGGINGFACE_DEFAULT_CONTEXT_WINDOW;
+        models.push({
+          id,
+          name,
+          reasoning: inferred.reasoning,
+          input,
+          cost: HUGGINGFACE_DEFAULT_COST,
+          contextWindow: contextLength,
+          maxTokens: HUGGINGFACE_DEFAULT_MAX_TOKENS,
+        });
+      }
+    }
+
+    return models.length > 0
+      ? models
+      : HUGGINGFACE_MODEL_CATALOG.map(buildHuggingfaceModelDefinition);
+  } catch (error) {
+    console.warn(`[huggingface-models] Discovery failed: ${String(error)}, using static catalog`);
+    return HUGGINGFACE_MODEL_CATALOG.map(buildHuggingfaceModelDefinition);
+  }
+}
--- a/src/agents/model-auth.e2e.test.ts
+++ b/src/agents/model-auth.e2e.test.ts
@@ -532,4 +532,79 @@ describe("getApiKeyForModel", () => {
      }
    }
  });
+
+  it("resolveEnvApiKey('huggingface') returns HUGGINGFACE_HUB_TOKEN when set", async () => {
+    const prevHub = process.env.HUGGINGFACE_HUB_TOKEN;
+    const prevHf = process.env.HF_TOKEN;
+    try {
+      delete process.env.HF_TOKEN;
+      process.env.HUGGINGFACE_HUB_TOKEN = "hf_hub_xyz";
+      vi.resetModules();
+      const { resolveEnvApiKey } = await import("./model-auth.js");
+      const resolved = resolveEnvApiKey("huggingface");
+      expect(resolved?.apiKey).toBe("hf_hub_xyz");
+      expect(resolved?.source).toContain("HUGGINGFACE_HUB_TOKEN");
+    } finally {
+      if (prevHub === undefined) {
+        delete process.env.HUGGINGFACE_HUB_TOKEN;
+      } else {
+        process.env.HUGGINGFACE_HUB_TOKEN = prevHub;
+      }
+      if (prevHf === undefined) {
+        delete process.env.HF_TOKEN;
+      } else {
+        process.env.HF_TOKEN = prevHf;
+      }
+    }
+  });
+
+  it("resolveEnvApiKey('huggingface') prefers HUGGINGFACE_HUB_TOKEN over HF_TOKEN when both set", async () => {
+    const prevHub = process.env.HUGGINGFACE_HUB_TOKEN;
+    const prevHf = process.env.HF_TOKEN;
+    try {
+      process.env.HUGGINGFACE_HUB_TOKEN = "hf_hub_first";
+      process.env.HF_TOKEN = "hf_second";
+      vi.resetModules();
+      const { resolveEnvApiKey } = await import("./model-auth.js");
+      const resolved = resolveEnvApiKey("huggingface");
+      expect(resolved?.apiKey).toBe("hf_hub_first");
+      expect(resolved?.source).toContain("HUGGINGFACE_HUB_TOKEN");
+    } finally {
+      if (prevHub === undefined) {
+        delete process.env.HUGGINGFACE_HUB_TOKEN;
+      } else {
+        process.env.HUGGINGFACE_HUB_TOKEN = prevHub;
+      }
+      if (prevHf === undefined) {
+        delete process.env.HF_TOKEN;
+      } else {
+        process.env.HF_TOKEN = prevHf;
+      }
+    }
+  });
+
+  it("resolveEnvApiKey('huggingface') returns HF_TOKEN when only HF_TOKEN set", async () => {
+    const prevHub = process.env.HUGGINGFACE_HUB_TOKEN;
+    const prevHf = process.env.HF_TOKEN;
+    try {
+      delete process.env.HUGGINGFACE_HUB_TOKEN;
+      process.env.HF_TOKEN = "hf_abc123";
+      vi.resetModules();
+      const { resolveEnvApiKey } = await import("./model-auth.js");
+      const resolved = resolveEnvApiKey("huggingface");
+      expect(resolved?.apiKey).toBe("hf_abc123");
+      expect(resolved?.source).toContain("HF_TOKEN");
+    } finally {
+      if (prevHub === undefined) {
+        delete process.env.HUGGINGFACE_HUB_TOKEN;
+      } else {
+        process.env.HUGGINGFACE_HUB_TOKEN = prevHub;
+      }
+      if (prevHf === undefined) {
+        delete process.env.HF_TOKEN;
+      } else {
+        process.env.HF_TOKEN = prevHf;
+      }
+    }
+  });
 });
--- a/src/agents/model-auth.ts
+++ b/src/agents/model-auth.ts
@@ -287,6 +287,10 @@ export function resolveEnvApiKey(provider: string): EnvApiKeyResult | null {
    return pick("KIMI_API_KEY") ?? pick("KIMICODE_API_KEY");
  }

+  if (normalized === "huggingface") {
+    return pick("HUGGINGFACE_HUB_TOKEN") ?? pick("HF_TOKEN");
+  }
+
  const envMap: Record<string, string> = {
    openai: "OPENAI_API_KEY",
    google: "GEMINI_API_KEY",
--- a/src/agents/models-config.providers.ts
+++ b/src/agents/models-config.providers.ts
@@ -10,6 +10,12 @@ import {
  buildCloudflareAiGatewayModelDefinition,
  resolveCloudflareAiGatewayBaseUrl,
 } from "./cloudflare-ai-gateway.js";
+import {
+  discoverHuggingfaceModels,
+  HUGGINGFACE_BASE_URL,
+  HUGGINGFACE_MODEL_CATALOG,
+  buildHuggingfaceModelDefinition,
+} from "./huggingface-models.js";
 import { resolveAwsSdkEnvVarName, resolveEnvApiKey } from "./model-auth.js";
 import {
  buildSyntheticModelDefinition,
@@ -542,6 +548,25 @@ async function buildOllamaProvider(configuredBaseUrl?: string): Promise<Provider
  };
 }

+async function buildHuggingfaceProvider(apiKey?: string): Promise<ProviderConfig> {
+  // Resolve env var name to value for discovery (GET /v1/models requires Bearer token).
+  const resolvedSecret =
+    apiKey?.trim() !== ""
+      ? /^[A-Z][A-Z0-9_]*$/.test(apiKey!.trim())
+        ? (process.env[apiKey!.trim()] ?? "").trim()
+        : apiKey!.trim()
+      : "";
+  const models =
+    resolvedSecret !== ""
+      ? await discoverHuggingfaceModels(resolvedSecret)
+      : HUGGINGFACE_MODEL_CATALOG.map(buildHuggingfaceModelDefinition);
+  return {
+    baseUrl: HUGGINGFACE_BASE_URL,
+    api: "openai-completions",
+    models,
+  };
+}
+
 function buildTogetherProvider(): ProviderConfig {
  return {
    baseUrl: TOGETHER_BASE_URL,
@@ -715,6 +740,17 @@ export async function resolveImplicitProviders(params: {
    };
  }

+  const huggingfaceKey =
+    resolveEnvApiKeyVarName("huggingface") ??
+    resolveApiKeyFromProfiles({ provider: "huggingface", store: authStore });
+  if (huggingfaceKey) {
+    const hfProvider = await buildHuggingfaceProvider(huggingfaceKey);
+    providers.huggingface = {
+      ...hfProvider,
+      apiKey: huggingfaceKey,
+    };
+  }
+
  const qianfanKey =
    resolveEnvApiKeyVarName("qianfan") ??
    resolveApiKeyFromProfiles({ provider: "qianfan", store: authStore });