fix(ollama): hide native reasoning-only output (#45330) Thanks @xi7ang

Co-authored-by: xi7ang <266449609+xi7ang@users.noreply.github.com>
Co-authored-by: Frank Yang <vibespecs@gmail.com>
This commit is contained in:
Frank Yang
2026-03-14 01:38:06 +08:00
committed by GitHub
parent ee1d4eb29d
commit 7778627b71
3 changed files with 13 additions and 21 deletions

View File

@@ -12,6 +12,7 @@ Docs: https://docs.openclaw.ai
### Fixes
- Ollama/reasoning visibility: stop promoting native `thinking` and `reasoning` fields into final assistant text so local reasoning models no longer leak internal thoughts in normal replies. (#45330) Thanks @xi7ang.
- Windows/gateway install: bound `schtasks` calls and fall back to the Startup-folder login item when task creation hangs, so native `openclaw gateway install` fails fast instead of wedging forever on broken Scheduled Task setups.
- Windows/gateway auth: stop attaching device identity on local loopback shared-token and password gateway calls, so native Windows agent replies no longer log stale `device signature expired` fallback noise before succeeding.
- Telegram/media downloads: thread the same direct or proxy transport policy into SSRF-guarded file fetches so inbound attachments keep working when Telegram falls back between env-proxy and direct networking. (#44639) Thanks @obviyus.

View File

@@ -106,7 +106,7 @@ describe("buildAssistantMessage", () => {
expect(result.usage.totalTokens).toBe(15);
});
it("falls back to thinking when content is empty", () => {
it("drops thinking-only output when content is empty", () => {
const response = {
model: "qwen3:32b",
created_at: "2026-01-01T00:00:00Z",
@@ -119,10 +119,10 @@ describe("buildAssistantMessage", () => {
};
const result = buildAssistantMessage(response, modelInfo);
expect(result.stopReason).toBe("stop");
expect(result.content).toEqual([{ type: "text", text: "Thinking output" }]);
expect(result.content).toEqual([]);
});
it("falls back to reasoning when content and thinking are empty", () => {
it("drops reasoning-only output when content and thinking are empty", () => {
const response = {
model: "qwen3:32b",
created_at: "2026-01-01T00:00:00Z",
@@ -135,7 +135,7 @@ describe("buildAssistantMessage", () => {
};
const result = buildAssistantMessage(response, modelInfo);
expect(result.stopReason).toBe("stop");
expect(result.content).toEqual([{ type: "text", text: "Reasoning output" }]);
expect(result.content).toEqual([]);
});
it("builds response with tool calls", () => {
@@ -485,7 +485,7 @@ describe("createOllamaStreamFn", () => {
);
});
it("accumulates thinking chunks when content is empty", async () => {
it("drops thinking chunks when no final content is emitted", async () => {
await withMockNdjsonFetch(
[
'{"model":"m","created_at":"t","message":{"role":"assistant","content":"","thinking":"reasoned"},"done":false}',
@@ -501,7 +501,7 @@ describe("createOllamaStreamFn", () => {
throw new Error("Expected done event");
}
expect(doneEvent.message.content).toEqual([{ type: "text", text: "reasoned output" }]);
expect(doneEvent.message.content).toEqual([]);
},
);
});
@@ -528,7 +528,7 @@ describe("createOllamaStreamFn", () => {
);
});
it("accumulates reasoning chunks when thinking is absent", async () => {
it("drops reasoning chunks when no final content is emitted", async () => {
await withMockNdjsonFetch(
[
'{"model":"m","created_at":"t","message":{"role":"assistant","content":"","reasoning":"reasoned"},"done":false}',
@@ -544,7 +544,7 @@ describe("createOllamaStreamFn", () => {
throw new Error("Expected done event");
}
expect(doneEvent.message.content).toEqual([{ type: "text", text: "reasoned output" }]);
expect(doneEvent.message.content).toEqual([]);
},
);
});

View File

@@ -340,10 +340,9 @@ export function buildAssistantMessage(
): AssistantMessage {
const content: (TextContent | ToolCall)[] = [];
// Ollama-native reasoning models may emit their answer in `thinking` or
// `reasoning` with an empty `content`. Fall back so replies are not dropped.
const text =
response.message.content || response.message.thinking || response.message.reasoning || "";
// Native Ollama reasoning fields are internal model output. The reply text
// must come from `content`; reasoning visibility is controlled elsewhere.
const text = response.message.content || "";
if (text) {
content.push({ type: "text", text });
}
@@ -497,20 +496,12 @@ export function createOllamaStreamFn(
const reader = response.body.getReader();
let accumulatedContent = "";
let fallbackContent = "";
let sawContent = false;
const accumulatedToolCalls: OllamaToolCall[] = [];
let finalResponse: OllamaChatResponse | undefined;
for await (const chunk of parseNdjsonStream(reader)) {
if (chunk.message?.content) {
sawContent = true;
accumulatedContent += chunk.message.content;
} else if (!sawContent && chunk.message?.thinking) {
fallbackContent += chunk.message.thinking;
} else if (!sawContent && chunk.message?.reasoning) {
// Backward compatibility for older/native variants that still use reasoning.
fallbackContent += chunk.message.reasoning;
}
// Ollama sends tool_calls in intermediate (done:false) chunks,
@@ -529,7 +520,7 @@ export function createOllamaStreamFn(
throw new Error("Ollama API stream ended without a final response");
}
finalResponse.message.content = accumulatedContent || fallbackContent;
finalResponse.message.content = accumulatedContent;
if (accumulatedToolCalls.length > 0) {
finalResponse.message.tool_calls = accumulatedToolCalls;
}