refactor(shared): dedupe chat content text extraction

This commit is contained in:
Peter Steinberger
2026-02-15 17:21:36 +00:00
parent ac3db098ab
commit b74c3d80cc
4 changed files with 71 additions and 61 deletions

View File

@@ -3,6 +3,7 @@ import type { CronDelivery, CronMessageChannel } from "../../cron/types.js";
import { loadConfig } from "../../config/config.js"; import { loadConfig } from "../../config/config.js";
import { normalizeCronJobCreate, normalizeCronJobPatch } from "../../cron/normalize.js"; import { normalizeCronJobCreate, normalizeCronJobPatch } from "../../cron/normalize.js";
import { parseAgentSessionKey } from "../../sessions/session-key-utils.js"; import { parseAgentSessionKey } from "../../sessions/session-key-utils.js";
import { extractTextFromChatContent } from "../../shared/chat-content.js";
import { isRecord, truncateUtf16Safe } from "../../utils.js"; import { isRecord, truncateUtf16Safe } from "../../utils.js";
import { resolveSessionAgentId } from "../agent-scope.js"; import { resolveSessionAgentId } from "../agent-scope.js";
import { optionalStringEnum, stringEnum } from "../schema/typebox.js"; import { optionalStringEnum, stringEnum } from "../schema/typebox.js";
@@ -69,38 +70,13 @@ function truncateText(input: string, maxLen: number) {
return `${truncated}...`; return `${truncated}...`;
} }
function normalizeContextText(raw: string) {
return raw.replace(/\s+/g, " ").trim();
}
function extractMessageText(message: ChatMessage): { role: string; text: string } | null { function extractMessageText(message: ChatMessage): { role: string; text: string } | null {
const role = typeof message.role === "string" ? message.role : ""; const role = typeof message.role === "string" ? message.role : "";
if (role !== "user" && role !== "assistant") { if (role !== "user" && role !== "assistant") {
return null; return null;
} }
const content = message.content; const text = extractTextFromChatContent(message.content);
if (typeof content === "string") { return text ? { role, text } : null;
const normalized = normalizeContextText(content);
return normalized ? { role, text: normalized } : null;
}
if (!Array.isArray(content)) {
return null;
}
const chunks: string[] = [];
for (const block of content) {
if (!block || typeof block !== "object") {
continue;
}
if ((block as { type?: unknown }).type !== "text") {
continue;
}
const text = (block as { text?: unknown }).text;
if (typeof text === "string" && text.trim()) {
chunks.push(text);
}
}
const joined = normalizeContextText(chunks.join(" "));
return joined ? { role, text: joined } : null;
} }
async function buildReminderContextLines(params: { async function buildReminderContextLines(params: {

View File

@@ -27,6 +27,7 @@ import { callGateway } from "../../gateway/call.js";
import { logVerbose } from "../../globals.js"; import { logVerbose } from "../../globals.js";
import { formatTimeAgo } from "../../infra/format-time/format-relative.ts"; import { formatTimeAgo } from "../../infra/format-time/format-relative.ts";
import { parseAgentSessionKey } from "../../routing/session-key.js"; import { parseAgentSessionKey } from "../../routing/session-key.js";
import { extractTextFromChatContent } from "../../shared/chat-content.js";
import { import {
formatDurationCompact, formatDurationCompact,
formatTokenUsageDisplay, formatTokenUsageDisplay,
@@ -202,45 +203,15 @@ function buildSubagentsHelp() {
type ChatMessage = { type ChatMessage = {
role?: unknown; role?: unknown;
content?: unknown; content?: unknown;
name?: unknown;
toolName?: unknown;
}; };
function normalizeMessageText(text: string) {
return text.replace(/\s+/g, " ").trim();
}
export function extractMessageText(message: ChatMessage): { role: string; text: string } | null { export function extractMessageText(message: ChatMessage): { role: string; text: string } | null {
const role = typeof message.role === "string" ? message.role : ""; const role = typeof message.role === "string" ? message.role : "";
const shouldSanitize = role === "assistant"; const shouldSanitize = role === "assistant";
const content = message.content; const text = extractTextFromChatContent(message.content, {
if (typeof content === "string") { sanitizeText: shouldSanitize ? sanitizeTextContent : undefined,
const normalized = normalizeMessageText( });
shouldSanitize ? sanitizeTextContent(content) : content, return text ? { role, text } : null;
);
return normalized ? { role, text: normalized } : null;
}
if (!Array.isArray(content)) {
return null;
}
const chunks: string[] = [];
for (const block of content) {
if (!block || typeof block !== "object") {
continue;
}
if ((block as { type?: unknown }).type !== "text") {
continue;
}
const text = (block as { text?: unknown }).text;
if (typeof text === "string") {
const value = shouldSanitize ? sanitizeTextContent(text) : text;
if (value.trim()) {
chunks.push(value);
}
}
}
const joined = normalizeMessageText(chunks.join(" "));
return joined ? { role, text: joined } : null;
} }
function formatLogLines(messages: ChatMessage[]) { function formatLogLines(messages: ChatMessage[]) {

View File

@@ -0,0 +1,26 @@
import { describe, expect, it } from "vitest";
import { extractTextFromChatContent } from "./chat-content.js";
describe("extractTextFromChatContent", () => {
it("normalizes string content", () => {
expect(extractTextFromChatContent(" hello\nworld ")).toBe("hello world");
});
it("extracts text blocks from array content", () => {
expect(
extractTextFromChatContent([
{ type: "text", text: " hello " },
{ type: "image_url", image_url: "https://example.com" },
{ type: "text", text: "world" },
]),
).toBe("hello world");
});
it("applies sanitizer when provided", () => {
expect(
extractTextFromChatContent("Here [Tool Call: foo (ID: 1)] ok", {
sanitizeText: (text) => text.replace(/\[Tool Call:[^\]]+\]\s*/g, ""),
}),
).toBe("Here ok");
});
});

View File

@@ -0,0 +1,37 @@
export function extractTextFromChatContent(
content: unknown,
opts?: { sanitizeText?: (text: string) => string },
): string | null {
const normalize = (text: string) => text.replace(/\s+/g, " ").trim();
if (typeof content === "string") {
const value = opts?.sanitizeText ? opts.sanitizeText(content) : content;
const normalized = normalize(value);
return normalized ? normalized : null;
}
if (!Array.isArray(content)) {
return null;
}
const chunks: string[] = [];
for (const block of content) {
if (!block || typeof block !== "object") {
continue;
}
if ((block as { type?: unknown }).type !== "text") {
continue;
}
const text = (block as { text?: unknown }).text;
if (typeof text !== "string") {
continue;
}
const value = opts?.sanitizeText ? opts.sanitizeText(text) : text;
if (value.trim()) {
chunks.push(value);
}
}
const joined = normalize(chunks.join(" "));
return joined ? joined : null;
}