refactor: unify markdown formatting pipeline

This commit is contained in:
Peter Steinberger
2026-01-15 00:12:29 +00:00
parent 0d0b77ded6
commit bd7d362d3b
16 changed files with 1245 additions and 350 deletions

View File

@@ -1,5 +1,5 @@
import { type Bot, InputFile } from "grammy";
import { chunkMarkdownText } from "../../auto-reply/chunk.js";
import { markdownToTelegramChunks, markdownToTelegramHtml } from "../format.js";
import type { ReplyPayload } from "../../auto-reply/types.js";
import type { ReplyToMode } from "../../config/config.js";
import { danger, logVerbose } from "../../globals.js";
@@ -10,7 +10,6 @@ import { isGifMedia } from "../../media/mime.js";
import { saveMediaBuffer } from "../../media/store.js";
import type { RuntimeEnv } from "../../runtime.js";
import { loadWebMedia } from "../../web/media.js";
import { markdownToTelegramHtml } from "../format.js";
import { resolveTelegramVoiceSend } from "../voice.js";
import { buildTelegramThreadParams, resolveTelegramReplyId } from "./helpers.js";
import type { TelegramContext } from "./types.js";
@@ -42,11 +41,14 @@ export async function deliverReplies(params: {
? [reply.mediaUrl]
: [];
if (mediaList.length === 0) {
for (const chunk of chunkMarkdownText(reply.text || "", textLimit)) {
await sendTelegramText(bot, chatId, chunk, runtime, {
const chunks = markdownToTelegramChunks(reply.text || "", textLimit);
for (const chunk of chunks) {
await sendTelegramText(bot, chatId, chunk.html, runtime, {
replyToMessageId:
replyToId && (replyToMode === "all" || !hasReplied) ? replyToId : undefined,
messageThreadId,
textMode: "html",
plainText: chunk.text,
});
if (replyToId && !hasReplied) {
hasReplied = true;
@@ -155,7 +157,12 @@ async function sendTelegramText(
chatId: string,
text: string,
runtime: RuntimeEnv,
opts?: { replyToMessageId?: number; messageThreadId?: number },
opts?: {
replyToMessageId?: number;
messageThreadId?: number;
textMode?: "markdown" | "html";
plainText?: string;
},
): Promise<number | undefined> {
const threadParams = buildTelegramThreadParams(opts?.messageThreadId);
const baseParams: Record<string, unknown> = {
@@ -164,7 +171,8 @@ async function sendTelegramText(
if (threadParams) {
baseParams.message_thread_id = threadParams.message_thread_id;
}
const htmlText = markdownToTelegramHtml(text);
const textMode = opts?.textMode ?? "markdown";
const htmlText = textMode === "html" ? text : markdownToTelegramHtml(text);
try {
const res = await bot.api.sendMessage(chatId, htmlText, {
parse_mode: "HTML",
@@ -175,7 +183,8 @@ async function sendTelegramText(
const errText = formatErrorMessage(err);
if (PARSE_ERR_RE.test(errText)) {
runtime.log?.(`telegram HTML parse failed; retrying without formatting: ${errText}`);
const res = await bot.api.sendMessage(chatId, text, {
const fallbackText = opts?.plainText ?? text;
const res = await bot.api.sendMessage(chatId, fallbackText, {
...baseParams,
});
return res.message_id;

View File

@@ -1,138 +1,68 @@
import MarkdownIt from "markdown-it";
import { chunkMarkdownIR, markdownToIR, type MarkdownLinkSpan, type MarkdownIR } from "../markdown/ir.js";
import { renderMarkdownWithMarkers } from "../markdown/render.js";
type ListState = {
type: "bullet" | "ordered";
index: number;
export type TelegramFormattedChunk = {
html: string;
text: string;
};
type RenderEnv = {
telegramListStack?: ListState[];
telegramLinkStack?: boolean[];
};
const md = new MarkdownIt({
html: false,
linkify: true,
breaks: false,
typographer: false,
});
md.enable("strikethrough");
const { escapeHtml } = md.utils;
function getListStack(env: RenderEnv): ListState[] {
if (!env.telegramListStack) env.telegramListStack = [];
return env.telegramListStack;
function escapeHtml(text: string): string {
return text.replace(/&/g, "&amp;").replace(/</g, "&lt;").replace(/>/g, "&gt;");
}
function getLinkStack(env: RenderEnv): boolean[] {
if (!env.telegramLinkStack) env.telegramLinkStack = [];
return env.telegramLinkStack;
function escapeHtmlAttr(text: string): string {
return escapeHtml(text).replace(/"/g, "&quot;");
}
md.renderer.rules.text = (tokens, idx) => escapeHtml(tokens[idx]?.content ?? "");
function buildTelegramLink(link: MarkdownLinkSpan, _text: string) {
const href = link.href.trim();
if (!href) return null;
if (link.start === link.end) return null;
const safeHref = escapeHtmlAttr(href);
return {
start: link.start,
end: link.end,
open: `<a href="${safeHref}">`,
close: "</a>",
};
}
md.renderer.rules.softbreak = () => "\n";
md.renderer.rules.hardbreak = () => "\n";
md.renderer.rules.paragraph_open = () => "";
md.renderer.rules.paragraph_close = (_tokens, _idx, _opts, env) => {
const stack = getListStack(env as RenderEnv);
return stack.length ? "" : "\n\n";
};
md.renderer.rules.heading_open = () => "";
md.renderer.rules.heading_close = () => "\n\n";
md.renderer.rules.blockquote_open = () => "";
md.renderer.rules.blockquote_close = () => "\n";
md.renderer.rules.bullet_list_open = (_tokens, _idx, _opts, env) => {
getListStack(env as RenderEnv).push({ type: "bullet", index: 0 });
return "";
};
md.renderer.rules.bullet_list_close = (_tokens, _idx, _opts, env) => {
getListStack(env as RenderEnv).pop();
return "";
};
md.renderer.rules.ordered_list_open = (tokens, idx, _opts, env) => {
const start = Number(tokens[idx]?.attrGet("start") ?? "1");
getListStack(env as RenderEnv).push({ type: "ordered", index: start - 1 });
return "";
};
md.renderer.rules.ordered_list_close = (_tokens, _idx, _opts, env) => {
getListStack(env as RenderEnv).pop();
return "";
};
md.renderer.rules.list_item_open = (_tokens, _idx, _opts, env) => {
const stack = getListStack(env as RenderEnv);
const top = stack[stack.length - 1];
if (!top) return "";
top.index += 1;
const indent = " ".repeat(Math.max(0, stack.length - 1));
const prefix = top.type === "ordered" ? `${top.index}. ` : "• ";
return `${indent}${prefix}`;
};
md.renderer.rules.list_item_close = () => "\n";
md.renderer.rules.em_open = () => "<i>";
md.renderer.rules.em_close = () => "</i>";
md.renderer.rules.strong_open = () => "<b>";
md.renderer.rules.strong_close = () => "</b>";
md.renderer.rules.s_open = () => "<s>";
md.renderer.rules.s_close = () => "</s>";
md.renderer.rules.code_inline = (tokens, idx) =>
`<code>${escapeHtml(tokens[idx]?.content ?? "")}</code>`;
md.renderer.rules.code_block = (tokens, idx) =>
`<pre><code>${escapeHtml(tokens[idx]?.content ?? "")}</code></pre>\n`;
md.renderer.rules.fence = (tokens, idx) =>
`<pre><code>${escapeHtml(tokens[idx]?.content ?? "")}</code></pre>\n`;
md.renderer.rules.link_open = (tokens, idx, _opts, env) => {
const href = tokens[idx]?.attrGet("href") ?? "";
const safeHref = escapeHtml(href);
const stack = getLinkStack(env as RenderEnv);
const hasHref = Boolean(safeHref);
stack.push(hasHref);
return hasHref ? `<a href="${safeHref}">` : "";
};
md.renderer.rules.link_close = (_tokens, _idx, _opts, env) => {
const stack = getLinkStack(env as RenderEnv);
const hasHref = stack.pop();
return hasHref ? "</a>" : "";
};
md.renderer.rules.image = (tokens, idx) => {
const alt = tokens[idx]?.content ?? "";
return escapeHtml(alt);
};
md.renderer.rules.html_block = (tokens, idx) => escapeHtml(tokens[idx]?.content ?? "");
md.renderer.rules.html_inline = (tokens, idx) => escapeHtml(tokens[idx]?.content ?? "");
md.renderer.rules.table_open = () => "";
md.renderer.rules.table_close = () => "";
md.renderer.rules.thead_open = () => "";
md.renderer.rules.thead_close = () => "";
md.renderer.rules.tbody_open = () => "";
md.renderer.rules.tbody_close = () => "";
md.renderer.rules.tr_open = () => "";
md.renderer.rules.tr_close = () => "\n";
md.renderer.rules.th_open = () => "";
md.renderer.rules.th_close = () => "\t";
md.renderer.rules.td_open = () => "";
md.renderer.rules.td_close = () => "\t";
md.renderer.rules.hr = () => "\n";
function renderTelegramHtml(ir: MarkdownIR): string {
return renderMarkdownWithMarkers(ir, {
styleMarkers: {
bold: { open: "<b>", close: "</b>" },
italic: { open: "<i>", close: "</i>" },
strikethrough: { open: "<s>", close: "</s>" },
code: { open: "<code>", close: "</code>" },
code_block: { open: "<pre><code>", close: "</code></pre>" },
},
escapeText: escapeHtml,
buildLink: buildTelegramLink,
});
}
export function markdownToTelegramHtml(markdown: string): string {
const env: RenderEnv = {};
const rendered = md.render(markdown ?? "", env);
return rendered
.replace(/[ \t]+\n/g, "\n")
.replace(/\t+\n/g, "\n")
.replace(/\n{3,}/g, "\n\n")
.trimEnd();
const ir = markdownToIR(markdown ?? "", {
linkify: true,
headingStyle: "none",
blockquotePrefix: "",
});
return renderTelegramHtml(ir);
}
export function markdownToTelegramChunks(markdown: string, limit: number): TelegramFormattedChunk[] {
const ir = markdownToIR(markdown ?? "", {
linkify: true,
headingStyle: "none",
blockquotePrefix: "",
});
const chunks = chunkMarkdownIR(ir, limit);
return chunks.map((chunk) => ({
html: renderTelegramHtml(chunk),
text: chunk.text,
}));
}
export function markdownToTelegramHtmlChunks(markdown: string, limit: number): string[] {
return markdownToTelegramChunks(markdown, limit).map((chunk) => chunk.html);
}

View File

@@ -28,6 +28,8 @@ type TelegramSendOpts = {
maxBytes?: number;
api?: Bot["api"];
retry?: RetryConfig;
textMode?: "markdown" | "html";
plainText?: string;
/** Send audio as voice message (voice bubble) instead of audio file. Defaults to false. */
asVoice?: boolean;
/** Message ID to reply to (for threading) */
@@ -308,7 +310,8 @@ export async function sendMessageTelegram(
if (!text || !text.trim()) {
throw new Error("Message must be non-empty for Telegram sends");
}
const htmlText = markdownToTelegramHtml(text);
const textMode = opts.textMode ?? "markdown";
const htmlText = textMode === "html" ? text : markdownToTelegramHtml(text);
const textParams = hasThreadParams
? {
parse_mode: "HTML" as const,
@@ -335,11 +338,12 @@ export async function sendMessageTelegram(
...(replyMarkup ? { reply_markup: replyMarkup } : {}),
}
: undefined;
const fallbackText = opts.plainText ?? text;
return await request(
() =>
plainParams
? api.sendMessage(chatId, text, plainParams)
: api.sendMessage(chatId, text),
? api.sendMessage(chatId, fallbackText, plainParams)
: api.sendMessage(chatId, fallbackText),
"message-plain",
).catch((err2) => {
throw wrapChatNotFound(err2);