feat(ollama): add native /api/chat provider for streaming + tool calling (#11853)

Merged via /review-pr -> /prepare-pr -> /merge-pr.

Prepared head SHA: 0a723f98e6
Co-authored-by: BrokenFinger98 <115936166+BrokenFinger98@users.noreply.github.com>
Co-authored-by: steipete <58493+steipete@users.noreply.github.com>
Reviewed-by: @steipete
This commit is contained in:
Sunwoo Yu
2026-02-14 09:20:42 +09:00
committed by GitHub
parent 5378583da1
commit 11702290ff
9 changed files with 760 additions and 75 deletions

View File

@@ -17,6 +17,7 @@ import {
buildHuggingfaceModelDefinition,
} from "./huggingface-models.js";
import { resolveAwsSdkEnvVarName, resolveEnvApiKey } from "./model-auth.js";
import { OLLAMA_NATIVE_BASE_URL } from "./ollama-stream.js";
import {
buildSyntheticModelDefinition,
SYNTHETIC_BASE_URL,
@@ -79,8 +80,8 @@ const QWEN_PORTAL_DEFAULT_COST = {
cacheWrite: 0,
};
const OLLAMA_BASE_URL = "http://127.0.0.1:11434/v1";
const OLLAMA_API_BASE_URL = "http://127.0.0.1:11434";
const OLLAMA_BASE_URL = OLLAMA_NATIVE_BASE_URL;
const OLLAMA_API_BASE_URL = OLLAMA_BASE_URL;
const OLLAMA_DEFAULT_CONTEXT_WINDOW = 128000;
const OLLAMA_DEFAULT_MAX_TOKENS = 8192;
const OLLAMA_DEFAULT_COST = {
@@ -180,11 +181,6 @@ async function discoverOllamaModels(baseUrl?: string): Promise<ModelDefinitionCo
cost: OLLAMA_DEFAULT_COST,
contextWindow: OLLAMA_DEFAULT_CONTEXT_WINDOW,
maxTokens: OLLAMA_DEFAULT_MAX_TOKENS,
// Disable streaming by default for Ollama to avoid SDK issue #1205
// See: https://github.com/badlogic/pi-mono/issues/1205
params: {
streaming: false,
},
};
});
} catch (error) {
@@ -541,8 +537,8 @@ async function buildVeniceProvider(): Promise<ProviderConfig> {
async function buildOllamaProvider(configuredBaseUrl?: string): Promise<ProviderConfig> {
const models = await discoverOllamaModels(configuredBaseUrl);
return {
baseUrl: configuredBaseUrl ?? OLLAMA_BASE_URL,
api: "openai-completions",
baseUrl: resolveOllamaApiBase(configuredBaseUrl),
api: "ollama",
models,
};
}