fix(telegram): add escapeHtml and escapeRegex for defense in depth

Code review fixes:
1. Escape filename with escapeHtml() before inserting into <code> tags
   - Prevents HTML injection if regex ever matches unsafe chars
   - Defense in depth (current regex already limits to safe chars)

2. Escape extensions with escapeRegex() before joining into pattern
   - Prevents regex breakage if extensions contain metacharacters
   - Future-proofs against extensions like 'c++' or 'd.ts'

Add tests documenting regex safety boundaries:
- Filenames with special chars (&, <, >) don't match
- Only [a-zA-Z0-9_.\-./] chars are captured
This commit is contained in:
divanoli
2026-02-05 11:47:50 +03:00
parent 8a5453e3e7
commit 5431591cfa
2 changed files with 35 additions and 5 deletions

View File

@@ -132,9 +132,14 @@ export function markdownToTelegramHtml(
* Runs AFTER markdown→HTML conversion to avoid modifying HTML attributes.
* Skips content inside <code>, <pre>, and <a> tags to avoid nesting issues.
*/
/** Escape regex metacharacters in a string */
function escapeRegex(str: string): string {
return str.replace(/[.*+?^${}()|[\]\\]/g, "\\$&");
}
export function wrapFileReferencesInHtml(html: string): string {
// Build regex pattern for all tracked extensions
const extensionsPattern = Array.from(FILE_EXTENSIONS_WITH_TLD).join("|");
// Build regex pattern for all tracked extensions (escape metacharacters for safety)
const extensionsPattern = Array.from(FILE_EXTENSIONS_WITH_TLD).map(escapeRegex).join("|");
// Safety-net: de-linkify auto-generated anchors where href="http://<label>" (defense in depth for textMode: "html")
const autoLinkedAnchor = /<a\s+href="https?:\/\/([^"]+)"[^>]*>\1<\/a>/gi;
@@ -142,7 +147,7 @@ export function wrapFileReferencesInHtml(html: string): string {
if (!isAutoLinkedFileRef(`http://${label}`, label)) {
return _match;
}
return `<code>${label}</code>`;
return `<code>${escapeHtml(label)}</code>`;
});
const filePattern = new RegExp(
`(^|>|[\\s])([a-zA-Z0-9_.\\-./]+\\.(?:${extensionsPattern}))(?=$|[\\s<])`,
@@ -179,7 +184,7 @@ export function wrapFileReferencesInHtml(html: string): string {
if (/https?:\/\/$/i.test(prefix)) {
return m;
}
return `${prefix}<code>${filename}</code>`;
return `${prefix}<code>${escapeHtml(filename)}</code>`;
});
// Update tag depth
@@ -208,7 +213,7 @@ export function wrapFileReferencesInHtml(html: string): string {
if (/https?:\/\/$/i.test(prefix)) {
return m;
}
return `${prefix}<code>${filename}</code>`;
return `${prefix}<code>${escapeHtml(filename)}</code>`;
});
return result;

View File

@@ -277,4 +277,29 @@ describe("edge cases", () => {
const input = '<a href="http://other.md">README.md</a>';
expect(wrapFileReferencesInHtml(input)).toBe(input);
});
it("does not match filenames with special characters", () => {
// The regex only matches [a-zA-Z0-9_.\\-./] so & breaks the pattern
const input = "R&D.md";
const result = wrapFileReferencesInHtml(input);
// Not wrapped because & is not in the allowed character class
expect(result).toBe(input);
});
it("does not match filenames containing angle brackets", () => {
// The regex character class [a-zA-Z0-9_.\\-./] doesn't include < >
// so these won't be matched and wrapped (which is correct/safe)
const input = "file<script>.md";
const result = wrapFileReferencesInHtml(input);
// Not wrapped because < breaks the filename pattern
expect(result).toBe(input);
});
it("wraps only the valid filename portion before HTML tags", () => {
// x.md is a valid filename, </code><b>bold</b> is not part of it
const input = "x.md</code><b>bold</b>";
const result = wrapFileReferencesInHtml(input);
// Only x.md gets wrapped, the rest passes through
expect(result).toBe("<code>x.md</code></code><b>bold</b>");
});
});