fix(pi): stop history image reinjection token blowup

2026-05-09 12:37:40 +00:00 · 2026-02-26 16:38:09 +01:00
parent 9a4b2266cc
commit c81e9866ff
6 changed files with 69 additions and 259 deletions
--- a/src/agents/pi-embedded-runner/run/images.ts
+++ b/src/agents/pi-embedded-runner/run/images.ts
@@ -36,8 +36,6 @@ export interface DetectedImageRef {
  type: "path" | "url";
  /** The resolved/normalized path or URL */
  resolved: string;
-  /** Index of the message this ref was found in (for history images) */
-  messageIndex?: number;
 }

 /**
@@ -268,93 +266,6 @@ export function modelSupportsImages(model: { input?: string[] }): boolean {
  return model.input?.includes("image") ?? false;
 }

-function extractTextFromMessage(message: unknown): string {
-  if (!message || typeof message !== "object") {
-    return "";
-  }
-  const content = (message as { content?: unknown }).content;
-  if (typeof content === "string") {
-    return content;
-  }
-  if (!Array.isArray(content)) {
-    return "";
-  }
-  const textParts: string[] = [];
-  for (const part of content) {
-    if (!part || typeof part !== "object") {
-      continue;
-    }
-    const record = part as Record<string, unknown>;
-    if (record.type === "text" && typeof record.text === "string") {
-      textParts.push(record.text);
-    }
-  }
-  return textParts.join("\n").trim();
-}
-
-/**
- * Extracts image references from conversation history messages.
- * Scans user messages for image paths/URLs that can be loaded.
- * Each ref includes the messageIndex so images can be injected at their original location.
- *
- * Note: Global deduplication is intentional - if the same image appears in multiple
- * messages, we only inject it at the FIRST occurrence. This is sufficient because:
- * 1. The model sees all message content including the image
- * 2. Later references to "the image" or "that picture" will work since it's in context
- * 3. Injecting duplicates would waste tokens and potentially hit size limits
- */
-function detectImagesFromHistory(messages: unknown[]): DetectedImageRef[] {
-  const allRefs: DetectedImageRef[] = [];
-  const seen = new Set<string>();
-
-  const messageHasImageContent = (msg: unknown): boolean => {
-    if (!msg || typeof msg !== "object") {
-      return false;
-    }
-    const content = (msg as { content?: unknown }).content;
-    if (!Array.isArray(content)) {
-      return false;
-    }
-    return content.some(
-      (part) =>
-        part != null && typeof part === "object" && (part as { type?: string }).type === "image",
-    );
-  };
-
-  for (let i = 0; i < messages.length; i++) {
-    const msg = messages[i];
-    if (!msg || typeof msg !== "object") {
-      continue;
-    }
-    const message = msg as { role?: string };
-    // Only scan user messages for image references
-    if (message.role !== "user") {
-      continue;
-    }
-    // Skip if message already has image content (prevents reloading each turn)
-    if (messageHasImageContent(msg)) {
-      continue;
-    }
-
-    const text = extractTextFromMessage(msg);
-    if (!text) {
-      continue;
-    }
-
-    const refs = detectImageReferences(text);
-    for (const ref of refs) {
-      const key = ref.resolved.toLowerCase();
-      if (seen.has(key)) {
-        continue;
-      }
-      seen.add(key);
-      allRefs.push({ ...ref, messageIndex: i });
-    }
-  }
-
-  return allRefs;
-}
-
 /**
 * Detects and loads images referenced in a prompt for models with vision capability.
 *
@@ -362,18 +273,14 @@ function detectImagesFromHistory(messages: unknown[]): DetectedImageRef[] {
 * loads them, and returns them as ImageContent array ready to be passed to
 * the model's prompt method.
 *
- * Also scans conversation history for images from previous turns and returns
- * them mapped by message index so they can be injected at their original location.
- *
 * @param params Configuration for image detection and loading
- * @returns Object with loaded images for current prompt and history images by message index
+ * @returns Object with loaded images for current prompt only
 */
 export async function detectAndLoadPromptImages(params: {
  prompt: string;
  workspaceDir: string;
  model: { input?: string[] };
  existingImages?: ImageContent[];
-  historyMessages?: unknown[];
  maxBytes?: number;
  maxDimensionPx?: number;
  workspaceOnly?: boolean;
@@ -381,8 +288,6 @@ export async function detectAndLoadPromptImages(params: {
 }): Promise<{
  /** Images for the current prompt (existingImages + detected in current prompt) */
  images: ImageContent[];
-  /** Images from history messages, keyed by message index */
-  historyImagesByIndex: Map<number, ImageContent[]>;
  detectedRefs: DetectedImageRef[];
  loadedCount: number;
  skippedCount: number;
@@ -391,7 +296,6 @@ export async function detectAndLoadPromptImages(params: {
  if (!modelSupportsImages(params.model)) {
    return {
      images: [],
-      historyImagesByIndex: new Map(),
      detectedRefs: [],
      loadedCount: 0,
      skippedCount: 0,
@@ -399,38 +303,20 @@ export async function detectAndLoadPromptImages(params: {
  }

  // Detect images from current prompt
-  const promptRefs = detectImageReferences(params.prompt);
-
-  // Detect images from conversation history (with message indices)
-  const historyRefs = params.historyMessages ? detectImagesFromHistory(params.historyMessages) : [];
-
-  // Deduplicate: if an image is in the current prompt, don't also load it from history.
-  // Current prompt images are passed via the `images` parameter to prompt(), while history
-  // images are injected into their original message positions. We don't want the same
-  // image loaded and sent twice (wasting tokens and potentially causing confusion).
-  const seenPaths = new Set(promptRefs.map((r) => r.resolved.toLowerCase()));
-  const uniqueHistoryRefs = historyRefs.filter((r) => !seenPaths.has(r.resolved.toLowerCase()));
-
-  const allRefs = [...promptRefs, ...uniqueHistoryRefs];
+  const allRefs = detectImageReferences(params.prompt);

  if (allRefs.length === 0) {
    return {
      images: params.existingImages ?? [],
-      historyImagesByIndex: new Map(),
      detectedRefs: [],
      loadedCount: 0,
      skippedCount: 0,
    };
  }

-  log.debug(
-    `Native image: detected ${allRefs.length} image refs (${promptRefs.length} in prompt, ${uniqueHistoryRefs.length} in history)`,
-  );
+  log.debug(`Native image: detected ${allRefs.length} image refs in prompt`);

-  // Load images for current prompt
  const promptImages: ImageContent[] = [...(params.existingImages ?? [])];
-  // Load images for history, grouped by message index
-  const historyImagesByIndex = new Map<number, ImageContent[]>();

  let loadedCount = 0;
  let skippedCount = 0;
@@ -442,18 +328,7 @@ export async function detectAndLoadPromptImages(params: {
      sandbox: params.sandbox,
    });
    if (image) {
-      if (ref.messageIndex !== undefined) {
-        // History image - add to the appropriate message index
-        const existing = historyImagesByIndex.get(ref.messageIndex);
-        if (existing) {
-          existing.push(image);
-        } else {
-          historyImagesByIndex.set(ref.messageIndex, [image]);
-        }
-      } else {
-        // Current prompt image
-        promptImages.push(image);
-      }
+      promptImages.push(image);
      loadedCount++;
      log.debug(`Native image: loaded ${ref.type} ${ref.resolved}`);
    } else {
@@ -469,21 +344,9 @@ export async function detectAndLoadPromptImages(params: {
    "prompt:images",
    imageSanitization,
  );
-  const sanitizedHistoryImagesByIndex = new Map<number, ImageContent[]>();
-  for (const [index, images] of historyImagesByIndex) {
-    const sanitized = await sanitizeImagesWithLog(
-      images,
-      `history:images:${index}`,
-      imageSanitization,
-    );
-    if (sanitized.length > 0) {
-      sanitizedHistoryImagesByIndex.set(index, sanitized);
-    }
-  }

  return {
    images: sanitizedPromptImages,
-    historyImagesByIndex: sanitizedHistoryImagesByIndex,
    detectedRefs: allRefs,
    loadedCount,
    skippedCount,