feat(compaction): add adaptive chunk sizing, progressive fallback, and UI indicator (#1466)

* fix(ui): allow relative URLs in avatar validation The isAvatarUrl check only accepted http://, https://, or data: URLs, but the /avatar/{agentId} endpoint returns relative paths like /avatar/main. This caused local file avatars to display as text instead of images. Fixes avatar display for locally configured avatar files. * fix(gateway): resolve local avatars to URL in HTML injection and RPC The frontend fix alone wasn't enough because: 1. serveIndexHtml() was injecting the raw avatar filename into HTML 2. agent.identity.get RPC was returning raw filename, overwriting the HTML-injected value Now both paths resolve local file avatars (*.png, *.jpg, etc.) to the /avatar/{agentId} endpoint URL. * feat(compaction): add adaptive chunk sizing and progressive fallback - Add computeAdaptiveChunkRatio() to reduce chunk size for large messages - Add isOversizedForSummary() to detect messages too large to summarize - Add summarizeWithFallback() with progressive fallback: - Tries full summarization first - Falls back to partial summarization excluding oversized messages - Notes oversized messages in the summary output - Add SAFETY_MARGIN (1.2x) buffer for token estimation inaccuracy - Reduce MIN_CHUNK_RATIO to 0.15 for very large messages This prevents compaction failures when conversations contain unusually large tool outputs or responses that exceed the summarization model's context window. * feat(ui): add compaction indicator and improve event error handling Compaction indicator: - Add CompactionStatus type and handleCompactionEvent() in app-tool-stream.ts - Show '🧹 Compacting context...' toast while active (with pulse animation) - Show '🧹 Context compacted' briefly after completion - Auto-clear toast after 5 seconds - Add CSS styles for .callout.info, .callout.success, .compaction-indicator Error handling improvements: - Wrap onEvent callback in try/catch in gateway.ts to prevent errors from breaking the WebSocket message handler - Wrap handleGatewayEvent in try/catch with console.error logging to isolate errors and make them visible in devtools These changes address UI freezes during heavy agent activity by: 1. Showing users when compaction is happening 2. Preventing uncaught errors from silently breaking the event loop * fix(control-ui): add agentId to DEFAULT_ASSISTANT_IDENTITY TypeScript inferred the union type without agentId when falling back to DEFAULT_ASSISTANT_IDENTITY, causing build errors at control-ui.ts:222-223.
2026-05-09 21:24:31 +00:00 · 2026-01-23 01:32:30 -05:00
parent 68ea6e521b
commit d03c404cb4
11 changed files with 406 additions and 8 deletions
--- a/src/agents/pi-extensions/compaction-safeguard.test.ts
+++ b/src/agents/pi-extensions/compaction-safeguard.test.ts
@@ -3,7 +3,15 @@ import { describe, expect, it } from "vitest";

 import { __testing } from "./compaction-safeguard.js";

-const { collectToolFailures, formatToolFailuresSection } = __testing;
+const {
+  collectToolFailures,
+  formatToolFailuresSection,
+  computeAdaptiveChunkRatio,
+  isOversizedForSummary,
+  BASE_CHUNK_RATIO,
+  MIN_CHUNK_RATIO,
+  SAFETY_MARGIN,
+} = __testing;

 describe("compaction-safeguard tool failures", () => {
  it("formats tool failures with meta and summary", () => {
@@ -96,3 +104,107 @@ describe("compaction-safeguard tool failures", () => {
    expect(section).toBe("");
  });
 });
+
+describe("computeAdaptiveChunkRatio", () => {
+  const CONTEXT_WINDOW = 200_000;
+
+  it("returns BASE_CHUNK_RATIO for normal messages", () => {
+    // Small messages: 1000 tokens each, well under 10% of context
+    const messages: AgentMessage[] = [
+      { role: "user", content: "x".repeat(1000), timestamp: Date.now() },
+      {
+        role: "assistant",
+        content: [{ type: "text", text: "y".repeat(1000) }],
+        timestamp: Date.now(),
+      },
+    ];
+
+    const ratio = computeAdaptiveChunkRatio(messages, CONTEXT_WINDOW);
+    expect(ratio).toBe(BASE_CHUNK_RATIO);
+  });
+
+  it("reduces ratio when average message > 10% of context", () => {
+    // Large messages: ~50K tokens each (25% of context)
+    const messages: AgentMessage[] = [
+      { role: "user", content: "x".repeat(50_000 * 4), timestamp: Date.now() },
+      {
+        role: "assistant",
+        content: [{ type: "text", text: "y".repeat(50_000 * 4) }],
+        timestamp: Date.now(),
+      },
+    ];
+
+    const ratio = computeAdaptiveChunkRatio(messages, CONTEXT_WINDOW);
+    expect(ratio).toBeLessThan(BASE_CHUNK_RATIO);
+    expect(ratio).toBeGreaterThanOrEqual(MIN_CHUNK_RATIO);
+  });
+
+  it("respects MIN_CHUNK_RATIO floor", () => {
+    // Very large messages that would push ratio below minimum
+    const messages: AgentMessage[] = [
+      { role: "user", content: "x".repeat(150_000 * 4), timestamp: Date.now() },
+    ];
+
+    const ratio = computeAdaptiveChunkRatio(messages, CONTEXT_WINDOW);
+    expect(ratio).toBeGreaterThanOrEqual(MIN_CHUNK_RATIO);
+  });
+
+  it("handles empty message array", () => {
+    const ratio = computeAdaptiveChunkRatio([], CONTEXT_WINDOW);
+    expect(ratio).toBe(BASE_CHUNK_RATIO);
+  });
+
+  it("handles single huge message", () => {
+    // Single massive message
+    const messages: AgentMessage[] = [
+      { role: "user", content: "x".repeat(180_000 * 4), timestamp: Date.now() },
+    ];
+
+    const ratio = computeAdaptiveChunkRatio(messages, CONTEXT_WINDOW);
+    expect(ratio).toBeGreaterThanOrEqual(MIN_CHUNK_RATIO);
+    expect(ratio).toBeLessThanOrEqual(BASE_CHUNK_RATIO);
+  });
+});
+
+describe("isOversizedForSummary", () => {
+  const CONTEXT_WINDOW = 200_000;
+
+  it("returns false for small messages", () => {
+    const msg: AgentMessage = {
+      role: "user",
+      content: "Hello, world!",
+      timestamp: Date.now(),
+    };
+
+    expect(isOversizedForSummary(msg, CONTEXT_WINDOW)).toBe(false);
+  });
+
+  it("returns true for messages > 50% of context", () => {
+    // Message with ~120K tokens (60% of 200K context)
+    // After safety margin (1.2x), effective is 144K which is > 100K (50%)
+    const msg: AgentMessage = {
+      role: "user",
+      content: "x".repeat(120_000 * 4),
+      timestamp: Date.now(),
+    };
+
+    expect(isOversizedForSummary(msg, CONTEXT_WINDOW)).toBe(true);
+  });
+
+  it("applies safety margin", () => {
+    // Message at exactly 50% of context before margin
+    // After SAFETY_MARGIN (1.2), it becomes 60% which is > 50%
+    const halfContextChars = (CONTEXT_WINDOW * 0.5) / SAFETY_MARGIN;
+    const msg: AgentMessage = {
+      role: "user",
+      content: "x".repeat(Math.floor(halfContextChars * 4)),
+      timestamp: Date.now(),
+    };
+
+    // With safety margin applied, this should be at the boundary
+    // The function checks if tokens * SAFETY_MARGIN > contextWindow * 0.5
+    const isOversized = isOversizedForSummary(msg, CONTEXT_WINDOW);
+    // Due to token estimation, this could be either true or false at the boundary
+    expect(typeof isOversized).toBe("boolean");
+  });
+});