fix(telegram): add escapeHtml and escapeRegex for defense in depth

Code review fixes: 1. Escape filename with escapeHtml() before inserting into <code> tags - Prevents HTML injection if regex ever matches unsafe chars - Defense in depth (current regex already limits to safe chars) 2. Escape extensions with escapeRegex() before joining into pattern - Prevents regex breakage if extensions contain metacharacters - Future-proofs against extensions like 'c++' or 'd.ts' Add tests documenting regex safety boundaries: - Filenames with special chars (&, <, >) don't match - Only [a-zA-Z0-9_.\-./] chars are captured
2026-05-24 09:54:27 +00:00 · 2026-02-05 11:47:50 +03:00
parent 8a5453e3e7
commit 5431591cfa
2 changed files with 35 additions and 5 deletions
--- a/src/telegram/format.ts
+++ b/src/telegram/format.ts
@@ -132,9 +132,14 @@ export function markdownToTelegramHtml(
 * Runs AFTER markdown→HTML conversion to avoid modifying HTML attributes.
 * Skips content inside <code>, <pre>, and <a> tags to avoid nesting issues.
 */
+/** Escape regex metacharacters in a string */
+function escapeRegex(str: string): string {
+  return str.replace(/[.*+?^${}()|[\]\\]/g, "\\$&");
+}
+
 export function wrapFileReferencesInHtml(html: string): string {
-  // Build regex pattern for all tracked extensions
-  const extensionsPattern = Array.from(FILE_EXTENSIONS_WITH_TLD).join("|");
+  // Build regex pattern for all tracked extensions (escape metacharacters for safety)
+  const extensionsPattern = Array.from(FILE_EXTENSIONS_WITH_TLD).map(escapeRegex).join("|");

  // Safety-net: de-linkify auto-generated anchors where href="http://<label>" (defense in depth for textMode: "html")
  const autoLinkedAnchor = /<a\s+href="https?:\/\/([^"]+)"[^>]*>\1<\/a>/gi;
@@ -142,7 +147,7 @@ export function wrapFileReferencesInHtml(html: string): string {
    if (!isAutoLinkedFileRef(`http://${label}`, label)) {
      return _match;
    }
-    return `<code>${label}</code>`;
+    return `<code>${escapeHtml(label)}</code>`;
  });
  const filePattern = new RegExp(
    `(^|>|[\\s])([a-zA-Z0-9_.\\-./]+\\.(?:${extensionsPattern}))(?=$|[\\s<])`,
@@ -179,7 +184,7 @@ export function wrapFileReferencesInHtml(html: string): string {
      if (/https?:\/\/$/i.test(prefix)) {
        return m;
      }
-      return `${prefix}<code>${filename}</code>`;
+      return `${prefix}<code>${escapeHtml(filename)}</code>`;
    });

    // Update tag depth
@@ -208,7 +213,7 @@ export function wrapFileReferencesInHtml(html: string): string {
    if (/https?:\/\/$/i.test(prefix)) {
      return m;
    }
-    return `${prefix}<code>${filename}</code>`;
+    return `${prefix}<code>${escapeHtml(filename)}</code>`;
  });

  return result;
--- a/src/telegram/format.wrap-md.test.ts
+++ b/src/telegram/format.wrap-md.test.ts
@@ -277,4 +277,29 @@ describe("edge cases", () => {
    const input = '<a href="http://other.md">README.md</a>';
    expect(wrapFileReferencesInHtml(input)).toBe(input);
  });
+
+  it("does not match filenames with special characters", () => {
+    // The regex only matches [a-zA-Z0-9_.\\-./] so & breaks the pattern
+    const input = "R&D.md";
+    const result = wrapFileReferencesInHtml(input);
+    // Not wrapped because & is not in the allowed character class
+    expect(result).toBe(input);
+  });
+
+  it("does not match filenames containing angle brackets", () => {
+    // The regex character class [a-zA-Z0-9_.\\-./] doesn't include < >
+    // so these won't be matched and wrapped (which is correct/safe)
+    const input = "file<script>.md";
+    const result = wrapFileReferencesInHtml(input);
+    // Not wrapped because < breaks the filename pattern
+    expect(result).toBe(input);
+  });
+
+  it("wraps only the valid filename portion before HTML tags", () => {
+    // x.md is a valid filename, </code><b>bold</b> is not part of it
+    const input = "x.md</code><b>bold</b>";
+    const result = wrapFileReferencesInHtml(input);
+    // Only x.md gets wrapped, the rest passes through
+    expect(result).toBe("<code>x.md</code></code><b>bold</b>");
+  });
 });