import type { MarkdownTableMode } from "../config/types.base.js"; import { chunkMarkdownIR, markdownToIR, type MarkdownLinkSpan, type MarkdownIR, } from "../markdown/ir.js"; import { renderMarkdownWithMarkers } from "../markdown/render.js"; export type TelegramFormattedChunk = { html: string; text: string; }; function escapeHtml(text: string): string { return text.replace(/&/g, "&").replace(//g, ">"); } function escapeHtmlAttr(text: string): string { return escapeHtml(text).replace(/"/g, """); } function buildTelegramLink(link: MarkdownLinkSpan, _text: string) { const href = link.href.trim(); if (!href) { return null; } if (link.start === link.end) { return null; } const safeHref = escapeHtmlAttr(href); return { start: link.start, end: link.end, open: ``, close: "", }; } function renderTelegramHtml(ir: MarkdownIR): string { return renderMarkdownWithMarkers(ir, { styleMarkers: { bold: { open: "", close: "" }, italic: { open: "", close: "" }, strikethrough: { open: "", close: "" }, code: { open: "", close: "" }, code_block: { open: "
", close: "
" }, }, escapeText: escapeHtml, buildLink: buildTelegramLink, }); } export function markdownToTelegramHtml( markdown: string, options: { tableMode?: MarkdownTableMode; wrapFileRefs?: boolean } = {}, ): string { const ir = markdownToIR(markdown ?? "", { linkify: true, headingStyle: "none", blockquotePrefix: "", tableMode: options.tableMode, }); const html = renderTelegramHtml(ir); // Apply file reference wrapping if requested (for chunked rendering) if (options.wrapFileRefs !== false) { return wrapFileReferencesInHtml(html); } return html; } /** * File extensions that share TLDs and commonly appear in code/documentation. * These are wrapped in tags to prevent Telegram from generating * spurious domain registrar previews. */ const FILE_EXTENSIONS_WITH_TLD = new Set([ // High priority - commonly referenced in messages "md", // Markdown (Moldova) "go", // Go language "py", // Python (Paraguay) "pl", // Perl (Poland) "ai", // Adobe Illustrator (Anguilla) "sh", // Shell (Saint Helena) // Medium priority - sometimes referenced "io", // Tuvalu (often used for tech projects) "tv", // Tuvalu (video files) "fm", // Federated States of Micronesia (audio) "am", // Armenia "at", // Austria "be", // Belgium "cc", // Cocos Islands "co", // Colombia ]); /** * Wraps standalone file references (with TLD extensions) in tags. * This prevents Telegram from treating them as URLs and generating * irrelevant domain registrar previews. * * Runs AFTER markdown→HTML conversion to avoid modifying HTML attributes. * Skips content inside ,
, and  tags to avoid nesting issues.
 */
export function wrapFileReferencesInHtml(html: string): string {
  // Build regex pattern for all tracked extensions
  const extensionsPattern = Array.from(FILE_EXTENSIONS_WITH_TLD).join("|");
  const filePattern = new RegExp(
    `(^|>|[\\s])([a-zA-Z0-9_.\\-./]+\\.(?:${extensionsPattern}))(?=$|[\\s<])`,
    "gi",
  );

  // Track if we're inside tags that should not be modified
  let inCode = false;
  let inPre = false;
  let inAnchor = false;
  let result = "";
  let lastIndex = 0;

  // Process the HTML token by token to respect tag boundaries
  const tagPattern = /(<\/?)(code|pre|a)\b[^>]*?>/gi;
  let match: RegExpExecArray | null;

  while ((match = tagPattern.exec(html)) !== null) {
    const tagStart = match.index;
    const tagEnd = tagPattern.lastIndex;
    const isClosing = match[1] === "/";
    const tagName = match[2].toLowerCase();

    // Process text before this tag
    const textBefore = html.slice(lastIndex, tagStart);
    result += textBefore.replace(filePattern, (m, prefix, filename) => {
      // Skip if inside protected tags or if it's a URL
      if (inCode || inPre || inAnchor) {
        return m;
      }
      if (filename.startsWith("//")) {
        return m;
      }
      if (/https?:\/\/$/i.test(prefix)) {
        return m;
      }
      return `${prefix}${filename}`;
    });

    // Update tag state
    if (tagName === "code") {
      inCode = !isClosing;
    } else if (tagName === "pre") {
      inPre = !isClosing;
    } else if (tagName === "a") {
      inAnchor = !isClosing;
    }

    // Add the tag itself
    result += html.slice(tagStart, tagEnd);
    lastIndex = tagEnd;
  }

  // Process remaining text
  const remainingText = html.slice(lastIndex);
  result += remainingText.replace(filePattern, (m, prefix, filename) => {
    if (inCode || inPre || inAnchor) {
      return m;
    }
    if (filename.startsWith("//")) {
      return m;
    }
    if (/https?:\/\/$/i.test(prefix)) {
      return m;
    }
    return `${prefix}${filename}`;
  });

  return result;
}

export function renderTelegramHtmlText(
  text: string,
  options: { textMode?: "markdown" | "html"; tableMode?: MarkdownTableMode } = {},
): string {
  const textMode = options.textMode ?? "markdown";
  if (textMode === "html") {
    // For HTML mode, still wrap file references in the HTML
    return wrapFileReferencesInHtml(text);
  }
  const html = markdownToTelegramHtml(text, { tableMode: options.tableMode });
  // Wrap file references after markdown→HTML conversion
  // This ensures we only transform text nodes, not HTML attributes
  return wrapFileReferencesInHtml(html);
}

export function markdownToTelegramChunks(
  markdown: string,
  limit: number,
  options: { tableMode?: MarkdownTableMode } = {},
): TelegramFormattedChunk[] {
  const ir = markdownToIR(markdown ?? "", {
    linkify: true,
    headingStyle: "none",
    blockquotePrefix: "",
    tableMode: options.tableMode,
  });
  const chunks = chunkMarkdownIR(ir, limit);
  return chunks.map((chunk) => ({
    html: wrapFileReferencesInHtml(renderTelegramHtml(chunk)),
    text: chunk.text,
  }));
}

export function markdownToTelegramHtmlChunks(markdown: string, limit: number): string[] {
  return markdownToTelegramChunks(markdown, limit).map((chunk) => chunk.html);
}