From 94851de4f8413bcd0a7ce8d190ecbcfeaedf755d Mon Sep 17 00:00:00 2001 From: divanoli Date: Thu, 5 Feb 2026 12:03:43 +0300 Subject: [PATCH] refactor(telegram): remove popular domain TLDs from file extension list Remove .ai, .io, .tv, .fm from FILE_EXTENSIONS_WITH_TLD because: - These are commonly used as real domains (x.ai, vercel.io, github.io) - Rarely used as actual file extensions - Users are more likely referring to websites than files Keep: md, sh, py, go, pl (common file extensions, rarely intentional domains) Keep: am, at, be, cc, co (less common as intentional domain references) Update tests to reflect the change: - Add test for supported extensions (.am, .at, .be, .cc, .co) - Add test verifying popular TLDs stay as links --- src/telegram/format.ts | 32 +++++++++++++-------------- src/telegram/format.wrap-md.test.ts | 34 ++++++++++++++++++++--------- 2 files changed, 40 insertions(+), 26 deletions(-) diff --git a/src/telegram/format.ts b/src/telegram/format.ts index aec5f9f2257..a9ca7bdc6fa 100644 --- a/src/telegram/format.ts +++ b/src/telegram/format.ts @@ -24,24 +24,24 @@ function escapeHtmlAttr(text: string): string { * File extensions that share TLDs and commonly appear in code/documentation. * These are wrapped in tags to prevent Telegram from generating * spurious domain registrar previews. + * + * Only includes extensions that are: + * 1. Commonly used as file extensions in code/docs + * 2. Rarely used as intentional domain references + * + * Excluded: .ai, .io, .tv, .fm (popular domain TLDs like x.ai, vercel.io, github.io) */ const FILE_EXTENSIONS_WITH_TLD = new Set([ - // High priority - commonly referenced in messages - "md", // Markdown (Moldova) - "go", // Go language - "py", // Python (Paraguay) - "pl", // Perl (Poland) - "ai", // Adobe Illustrator (Anguilla) - "sh", // Shell (Saint Helena) - // Medium priority - sometimes referenced - "io", // Tuvalu (often used for tech projects) - "tv", // Tuvalu (video files) - "fm", // Federated States of Micronesia (audio) - "am", // Armenia - "at", // Austria - "be", // Belgium - "cc", // Cocos Islands - "co", // Colombia + "md", // Markdown (Moldova) - very common in repos + "go", // Go language - common in Go projects + "py", // Python (Paraguay) - common in Python projects + "pl", // Perl (Poland) - common in Perl projects + "sh", // Shell (Saint Helena) - common for scripts + "am", // Automake files (Armenia) + "at", // Assembly (Austria) + "be", // Backend files (Belgium) + "cc", // C++ source (Cocos Islands) + "co", // Configuration (Colombia) ]); /** Detects when markdown-it linkify auto-generated a link from a bare filename (e.g. README.md → http://README.md) */ diff --git a/src/telegram/format.wrap-md.test.ts b/src/telegram/format.wrap-md.test.ts index 5cc41e4c414..5668b3d9661 100644 --- a/src/telegram/format.wrap-md.test.ts +++ b/src/telegram/format.wrap-md.test.ts @@ -191,12 +191,25 @@ describe("edge cases", () => { expect(result).toBe("README.md"); }); - it("wraps all TLD extensions (.ai, .io, .tv, .fm)", () => { - const result = markdownToTelegramHtml("logo.ai and app.io and video.tv and audio.fm"); - expect(result).toContain("logo.ai"); - expect(result).toContain("app.io"); - expect(result).toContain("video.tv"); - expect(result).toContain("audio.fm"); + it("wraps supported TLD extensions (.am, .at, .be, .cc, .co)", () => { + const result = markdownToTelegramHtml( + "Makefile.am and code.at and app.be and main.cc and config.co", + ); + expect(result).toContain("Makefile.am"); + expect(result).toContain("code.at"); + expect(result).toContain("app.be"); + expect(result).toContain("main.cc"); + expect(result).toContain("config.co"); + }); + + it("does not wrap popular domain TLDs (.ai, .io, .tv, .fm)", () => { + // These are commonly used as real domains (x.ai, vercel.io, github.io) + const result = markdownToTelegramHtml("Check x.ai and vercel.io and app.tv and radio.fm"); + // Should be links, not code + expect(result).toContain(''); + expect(result).toContain(''); + expect(result).toContain(''); + expect(result).toContain(''); }); it("does not wrap non-TLD extensions", () => { @@ -287,11 +300,12 @@ describe("edge cases", () => { }); it("wraps orphaned single-letter TLD patterns", () => { - const result1 = wrapFileReferencesInHtml("X.ai is cool"); - expect(result1).toContain("X.ai"); + // Use extensions still in the set (md, sh, py, go) + const result1 = wrapFileReferencesInHtml("X.md is cool"); + expect(result1).toContain("X.md"); - const result2 = wrapFileReferencesInHtml("Check R.io"); - expect(result2).toContain("R.io"); + const result2 = wrapFileReferencesInHtml("Check R.sh"); + expect(result2).toContain("R.sh"); }); it("does not match filenames containing angle brackets", () => {