fix(agents): gate auto reasoning by effective thinking level (openclaw#24335) thanks @Kay-051

2026-05-07 07:11:39 +00:00 · 2026-02-23 14:58:26 +02:00
parent 42795b87a3
commit 9d37654a90
3 changed files with 41 additions and 6 deletions
--- a/CHANGELOG.md
+++ b/CHANGELOG.md
@@ -12,6 +12,7 @@ Docs: https://docs.openclaw.ai
 ### Fixes
 - Agents/Reasoning: when model-default thinking is active (for example `thinking=low`), keep auto-reasoning disabled unless explicitly enabled, preventing `Reasoning:` thinking-block leakage in channel replies. (#24335, #24290) thanks @Kay-051.
 - Auto-reply/Inbound metadata: hide direct-chat `message_id`/`message_id_full` and sender metadata only from normalized chat type (not sender-id sentinels), preserving group metadata visibility and preventing sender-id spoofed direct-mode classification. (#24373) thanks @jd316.
 - Security/Exec: detect obfuscated commands before exec allowlist decisions and require explicit approval for obfuscation patterns. (#8592) Thanks @CornBrother0x and @vincentkoc.
 - Agents/Compaction: pass `agentDir` into manual `/compact` command runs so compaction auth/profile resolution stays scoped to the active agent. (#24133) thanks @Glucksberg.
--- a/src/auto-reply/reply.directive.directive-behavior.defaults-think-low-reasoning-capable-models-no.test.ts
+++ b/src/auto-reply/reply.directive.directive-behavior.defaults-think-low-reasoning-capable-models-no.test.ts
@@ -244,7 +244,7 @@ describe("directive behavior", () => {
      expect(call?.model).toBe("claude-opus-4-5");
    });
  });
-  it("defaults thinking to low for reasoning-capable models during normal replies", async () => {
+  it("defaults thinking to low for reasoning-capable models without auto-enabling reasoning", async () => {
    await withTempHome(async (home) => {
      mockEmbeddedTextResult("done");
      vi.mocked(loadModelCatalog).mockResolvedValueOnce([
@@ -269,6 +269,38 @@ describe("directive behavior", () => {
      expect(runEmbeddedPiAgent).toHaveBeenCalledOnce();
      const call = vi.mocked(runEmbeddedPiAgent).mock.calls[0]?.[0];
      expect(call?.thinkLevel).toBe("low");
      expect(call?.reasoningLevel).toBe("off");
    });
  });
  it("keeps auto-reasoning enabled when thinking is explicitly off", async () => {
    await withTempHome(async (home) => {
      mockEmbeddedTextResult("done");
      vi.mocked(loadModelCatalog).mockResolvedValueOnce([
        {
          id: "claude-opus-4-5",
          name: "Opus 4.5",
          provider: "anthropic",
          reasoning: true,
        },
      ]);
      await getReplyFromConfig(
        {
          Body: "hello",
          From: "+1004",
          To: "+2000",
        },
        {},
        makeWhatsAppDirectiveConfig(home, {
          model: { primary: "anthropic/claude-opus-4-5" },
          thinkingDefault: "off",
        }),
      );
      expect(runEmbeddedPiAgent).toHaveBeenCalledOnce();
      const call = vi.mocked(runEmbeddedPiAgent).mock.calls[0]?.[0];
      expect(call?.thinkLevel).toBe("off");
      expect(call?.reasoningLevel).toBe("on");
    });
  });
  it("passes elevated defaults when sender is approved", async () => {
--- a/src/auto-reply/reply/get-reply-directives.ts
+++ b/src/auto-reply/reply/get-reply-directives.ts
@@ -389,14 +389,16 @@ export async function resolveReplyDirectives(params: {
  provider = modelState.provider;
  model = modelState.model;
-  // When neither directive nor session set reasoning, default to model capability (e.g. OpenRouter with reasoning: true).
+  // When neither directive nor session set reasoning, default to model capability
-  // Skip auto-enabling when thinking is already active — the model's internal
+  // (e.g. OpenRouter with reasoning: true). Skip auto-enabling when thinking is
-  // thinking blocks would otherwise be formatted and delivered as visible
+  // active, including model-inferred defaults, or internal thinking blocks can
-  // "Reasoning:" messages, leaking internal content to the user.
+  // be emitted as visible "Reasoning:" messages.
  const reasoningExplicitlySet =
    directives.reasoningLevel !== undefined ||
    (sessionEntry?.reasoningLevel !== undefined && sessionEntry?.reasoningLevel !== null);
-  const thinkingActive = resolvedThinkLevel !== undefined && resolvedThinkLevel !== "off";
+  const effectiveThinkingForReasoning =
    resolvedThinkLevel ?? (await modelState.resolveDefaultThinkingLevel());
  const thinkingActive = effectiveThinkingForReasoning !== "off";
  if (!reasoningExplicitlySet && resolvedReasoningLevel === "off" && !thinkingActive) {
    resolvedReasoningLevel = await modelState.resolveDefaultReasoningLevel();
  }