From 5431591cfa0330dfae50681b6fd54bd6895a678c Mon Sep 17 00:00:00 2001 From: divanoli Date: Thu, 5 Feb 2026 11:47:50 +0300 Subject: [PATCH] fix(telegram): add escapeHtml and escapeRegex for defense in depth Code review fixes: 1. Escape filename with escapeHtml() before inserting into tags - Prevents HTML injection if regex ever matches unsafe chars - Defense in depth (current regex already limits to safe chars) 2. Escape extensions with escapeRegex() before joining into pattern - Prevents regex breakage if extensions contain metacharacters - Future-proofs against extensions like 'c++' or 'd.ts' Add tests documenting regex safety boundaries: - Filenames with special chars (&, <, >) don't match - Only [a-zA-Z0-9_.\-./] chars are captured --- src/telegram/format.ts | 15 ++++++++++----- src/telegram/format.wrap-md.test.ts | 25 +++++++++++++++++++++++++ 2 files changed, 35 insertions(+), 5 deletions(-) diff --git a/src/telegram/format.ts b/src/telegram/format.ts index 6282e0d2828..1a0b9c0a583 100644 --- a/src/telegram/format.ts +++ b/src/telegram/format.ts @@ -132,9 +132,14 @@ export function markdownToTelegramHtml( * Runs AFTER markdown→HTML conversion to avoid modifying HTML attributes. * Skips content inside ,
, and  tags to avoid nesting issues.
  */
+/** Escape regex metacharacters in a string */
+function escapeRegex(str: string): string {
+  return str.replace(/[.*+?^${}()|[\]\\]/g, "\\$&");
+}
+
 export function wrapFileReferencesInHtml(html: string): string {
-  // Build regex pattern for all tracked extensions
-  const extensionsPattern = Array.from(FILE_EXTENSIONS_WITH_TLD).join("|");
+  // Build regex pattern for all tracked extensions (escape metacharacters for safety)
+  const extensionsPattern = Array.from(FILE_EXTENSIONS_WITH_TLD).map(escapeRegex).join("|");
 
   // Safety-net: de-linkify auto-generated anchors where href="http://README.md';
     expect(wrapFileReferencesInHtml(input)).toBe(input);
   });
+
+  it("does not match filenames with special characters", () => {
+    // The regex only matches [a-zA-Z0-9_.\\-./] so & breaks the pattern
+    const input = "R&D.md";
+    const result = wrapFileReferencesInHtml(input);
+    // Not wrapped because & is not in the allowed character class
+    expect(result).toBe(input);
+  });
+
+  it("does not match filenames containing angle brackets", () => {
+    // The regex character class [a-zA-Z0-9_.\\-./] doesn't include < >
+    // so these won't be matched and wrapped (which is correct/safe)
+    const input = "file