diff --git a/CHANGELOG.md b/CHANGELOG.md index 2a8270dd154..f7679f4c5b0 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -12,6 +12,7 @@ Docs: https://docs.openclaw.ai ### Fixes +- Ollama/reasoning visibility: stop promoting native `thinking` and `reasoning` fields into final assistant text so local reasoning models no longer leak internal thoughts in normal replies. (#45330) Thanks @xi7ang. - Windows/gateway install: bound `schtasks` calls and fall back to the Startup-folder login item when task creation hangs, so native `openclaw gateway install` fails fast instead of wedging forever on broken Scheduled Task setups. - Windows/gateway auth: stop attaching device identity on local loopback shared-token and password gateway calls, so native Windows agent replies no longer log stale `device signature expired` fallback noise before succeeding. - Telegram/media downloads: thread the same direct or proxy transport policy into SSRF-guarded file fetches so inbound attachments keep working when Telegram falls back between env-proxy and direct networking. (#44639) Thanks @obviyus. diff --git a/src/agents/ollama-stream.test.ts b/src/agents/ollama-stream.test.ts index 2af5e490c7f..241c7a0f858 100644 --- a/src/agents/ollama-stream.test.ts +++ b/src/agents/ollama-stream.test.ts @@ -106,7 +106,7 @@ describe("buildAssistantMessage", () => { expect(result.usage.totalTokens).toBe(15); }); - it("falls back to thinking when content is empty", () => { + it("drops thinking-only output when content is empty", () => { const response = { model: "qwen3:32b", created_at: "2026-01-01T00:00:00Z", @@ -119,10 +119,10 @@ describe("buildAssistantMessage", () => { }; const result = buildAssistantMessage(response, modelInfo); expect(result.stopReason).toBe("stop"); - expect(result.content).toEqual([{ type: "text", text: "Thinking output" }]); + expect(result.content).toEqual([]); }); - it("falls back to reasoning when content and thinking are empty", () => { + it("drops reasoning-only output when content and thinking are empty", () => { const response = { model: "qwen3:32b", created_at: "2026-01-01T00:00:00Z", @@ -135,7 +135,7 @@ describe("buildAssistantMessage", () => { }; const result = buildAssistantMessage(response, modelInfo); expect(result.stopReason).toBe("stop"); - expect(result.content).toEqual([{ type: "text", text: "Reasoning output" }]); + expect(result.content).toEqual([]); }); it("builds response with tool calls", () => { @@ -485,7 +485,7 @@ describe("createOllamaStreamFn", () => { ); }); - it("accumulates thinking chunks when content is empty", async () => { + it("drops thinking chunks when no final content is emitted", async () => { await withMockNdjsonFetch( [ '{"model":"m","created_at":"t","message":{"role":"assistant","content":"","thinking":"reasoned"},"done":false}', @@ -501,7 +501,7 @@ describe("createOllamaStreamFn", () => { throw new Error("Expected done event"); } - expect(doneEvent.message.content).toEqual([{ type: "text", text: "reasoned output" }]); + expect(doneEvent.message.content).toEqual([]); }, ); }); @@ -528,7 +528,7 @@ describe("createOllamaStreamFn", () => { ); }); - it("accumulates reasoning chunks when thinking is absent", async () => { + it("drops reasoning chunks when no final content is emitted", async () => { await withMockNdjsonFetch( [ '{"model":"m","created_at":"t","message":{"role":"assistant","content":"","reasoning":"reasoned"},"done":false}', @@ -544,7 +544,7 @@ describe("createOllamaStreamFn", () => { throw new Error("Expected done event"); } - expect(doneEvent.message.content).toEqual([{ type: "text", text: "reasoned output" }]); + expect(doneEvent.message.content).toEqual([]); }, ); }); diff --git a/src/agents/ollama-stream.ts b/src/agents/ollama-stream.ts index 9d23852bb31..70a2ef33cf1 100644 --- a/src/agents/ollama-stream.ts +++ b/src/agents/ollama-stream.ts @@ -340,10 +340,9 @@ export function buildAssistantMessage( ): AssistantMessage { const content: (TextContent | ToolCall)[] = []; - // Ollama-native reasoning models may emit their answer in `thinking` or - // `reasoning` with an empty `content`. Fall back so replies are not dropped. - const text = - response.message.content || response.message.thinking || response.message.reasoning || ""; + // Native Ollama reasoning fields are internal model output. The reply text + // must come from `content`; reasoning visibility is controlled elsewhere. + const text = response.message.content || ""; if (text) { content.push({ type: "text", text }); } @@ -497,20 +496,12 @@ export function createOllamaStreamFn( const reader = response.body.getReader(); let accumulatedContent = ""; - let fallbackContent = ""; - let sawContent = false; const accumulatedToolCalls: OllamaToolCall[] = []; let finalResponse: OllamaChatResponse | undefined; for await (const chunk of parseNdjsonStream(reader)) { if (chunk.message?.content) { - sawContent = true; accumulatedContent += chunk.message.content; - } else if (!sawContent && chunk.message?.thinking) { - fallbackContent += chunk.message.thinking; - } else if (!sawContent && chunk.message?.reasoning) { - // Backward compatibility for older/native variants that still use reasoning. - fallbackContent += chunk.message.reasoning; } // Ollama sends tool_calls in intermediate (done:false) chunks, @@ -529,7 +520,7 @@ export function createOllamaStreamFn( throw new Error("Ollama API stream ended without a final response"); } - finalResponse.message.content = accumulatedContent || fallbackContent; + finalResponse.message.content = accumulatedContent; if (accumulatedToolCalls.length > 0) { finalResponse.message.tool_calls = accumulatedToolCalls; }