fix(ollama): hide native reasoning-only output (#45330) Thanks @xi7ang

Co-authored-by: xi7ang <266449609+xi7ang@users.noreply.github.com> Co-authored-by: Frank Yang <vibespecs@gmail.com>
2026-05-19 00:40:40 +00:00 · 2026-03-14 01:38:06 +08:00
parent ee1d4eb29d
commit 7778627b71
3 changed files with 13 additions and 21 deletions
--- a/CHANGELOG.md
+++ b/CHANGELOG.md
@@ -12,6 +12,7 @@ Docs: https://docs.openclaw.ai

 ### Fixes

+- Ollama/reasoning visibility: stop promoting native `thinking` and `reasoning` fields into final assistant text so local reasoning models no longer leak internal thoughts in normal replies. (#45330) Thanks @xi7ang.
 - Windows/gateway install: bound `schtasks` calls and fall back to the Startup-folder login item when task creation hangs, so native `openclaw gateway install` fails fast instead of wedging forever on broken Scheduled Task setups.
 - Windows/gateway auth: stop attaching device identity on local loopback shared-token and password gateway calls, so native Windows agent replies no longer log stale `device signature expired` fallback noise before succeeding.
 - Telegram/media downloads: thread the same direct or proxy transport policy into SSRF-guarded file fetches so inbound attachments keep working when Telegram falls back between env-proxy and direct networking. (#44639) Thanks @obviyus.
--- a/src/agents/ollama-stream.test.ts
+++ b/src/agents/ollama-stream.test.ts
@@ -106,7 +106,7 @@ describe("buildAssistantMessage", () => {
    expect(result.usage.totalTokens).toBe(15);
  });

-  it("falls back to thinking when content is empty", () => {
+  it("drops thinking-only output when content is empty", () => {
    const response = {
      model: "qwen3:32b",
      created_at: "2026-01-01T00:00:00Z",
@@ -119,10 +119,10 @@ describe("buildAssistantMessage", () => {
    };
    const result = buildAssistantMessage(response, modelInfo);
    expect(result.stopReason).toBe("stop");
-    expect(result.content).toEqual([{ type: "text", text: "Thinking output" }]);
+    expect(result.content).toEqual([]);
  });

-  it("falls back to reasoning when content and thinking are empty", () => {
+  it("drops reasoning-only output when content and thinking are empty", () => {
    const response = {
      model: "qwen3:32b",
      created_at: "2026-01-01T00:00:00Z",
@@ -135,7 +135,7 @@ describe("buildAssistantMessage", () => {
    };
    const result = buildAssistantMessage(response, modelInfo);
    expect(result.stopReason).toBe("stop");
-    expect(result.content).toEqual([{ type: "text", text: "Reasoning output" }]);
+    expect(result.content).toEqual([]);
  });

  it("builds response with tool calls", () => {
@@ -485,7 +485,7 @@ describe("createOllamaStreamFn", () => {
    );
  });

-  it("accumulates thinking chunks when content is empty", async () => {
+  it("drops thinking chunks when no final content is emitted", async () => {
    await withMockNdjsonFetch(
      [
        '{"model":"m","created_at":"t","message":{"role":"assistant","content":"","thinking":"reasoned"},"done":false}',
@@ -501,7 +501,7 @@ describe("createOllamaStreamFn", () => {
          throw new Error("Expected done event");
        }

-        expect(doneEvent.message.content).toEqual([{ type: "text", text: "reasoned output" }]);
+        expect(doneEvent.message.content).toEqual([]);
      },
    );
  });
@@ -528,7 +528,7 @@ describe("createOllamaStreamFn", () => {
    );
  });

-  it("accumulates reasoning chunks when thinking is absent", async () => {
+  it("drops reasoning chunks when no final content is emitted", async () => {
    await withMockNdjsonFetch(
      [
        '{"model":"m","created_at":"t","message":{"role":"assistant","content":"","reasoning":"reasoned"},"done":false}',
@@ -544,7 +544,7 @@ describe("createOllamaStreamFn", () => {
          throw new Error("Expected done event");
        }

-        expect(doneEvent.message.content).toEqual([{ type: "text", text: "reasoned output" }]);
+        expect(doneEvent.message.content).toEqual([]);
      },
    );
  });
--- a/src/agents/ollama-stream.ts
+++ b/src/agents/ollama-stream.ts
@@ -340,10 +340,9 @@ export function buildAssistantMessage(
 ): AssistantMessage {
  const content: (TextContent | ToolCall)[] = [];

-  // Ollama-native reasoning models may emit their answer in `thinking` or
-  // `reasoning` with an empty `content`. Fall back so replies are not dropped.
-  const text =
-    response.message.content || response.message.thinking || response.message.reasoning || "";
+  // Native Ollama reasoning fields are internal model output. The reply text
+  // must come from `content`; reasoning visibility is controlled elsewhere.
+  const text = response.message.content || "";
  if (text) {
    content.push({ type: "text", text });
  }
@@ -497,20 +496,12 @@ export function createOllamaStreamFn(

        const reader = response.body.getReader();
        let accumulatedContent = "";
-        let fallbackContent = "";
-        let sawContent = false;
        const accumulatedToolCalls: OllamaToolCall[] = [];
        let finalResponse: OllamaChatResponse | undefined;

        for await (const chunk of parseNdjsonStream(reader)) {
          if (chunk.message?.content) {
-            sawContent = true;
            accumulatedContent += chunk.message.content;
-          } else if (!sawContent && chunk.message?.thinking) {
-            fallbackContent += chunk.message.thinking;
-          } else if (!sawContent && chunk.message?.reasoning) {
-            // Backward compatibility for older/native variants that still use reasoning.
-            fallbackContent += chunk.message.reasoning;
          }

          // Ollama sends tool_calls in intermediate (done:false) chunks,
@@ -529,7 +520,7 @@ export function createOllamaStreamFn(
          throw new Error("Ollama API stream ended without a final response");
        }

-        finalResponse.message.content = accumulatedContent || fallbackContent;
+        finalResponse.message.content = accumulatedContent;
        if (accumulatedToolCalls.length > 0) {
          finalResponse.message.tool_calls = accumulatedToolCalls;
        }