mirror of
https://github.com/openclaw/openclaw.git
synced 2026-05-24 09:54:27 +00:00
fix(telegram): use regex literal and depth counters for tag tracking
Code review fixes: 1. Replace RegExp constructor with regex literal for autoLinkedAnchor - Avoids double-escaping issues with \s - Uses backreference \1 to match href=label pattern directly 2. Replace boolean toggles with depth counters for tag nesting - codeDepth, preDepth, anchorDepth track nesting levels - Correctly handles nested tags like <pre><code>...</code></pre> - Prevents wrapping inside any level of protected tags Add 4 tests for edge cases: - Nested code tags (depth tracking) - Multiple anchor tags in sequence - Auto-linked anchor with backreference match - Anchor with different href/label (no match)
This commit is contained in:
@@ -137,12 +137,9 @@ export function wrapFileReferencesInHtml(html: string): string {
|
||||
const extensionsPattern = Array.from(FILE_EXTENSIONS_WITH_TLD).join("|");
|
||||
|
||||
// Safety-net: de-linkify auto-generated anchors where href="http://<label>" (defense in depth for textMode: "html")
|
||||
const autoLinkedAnchor = new RegExp(`<a\\s+href="https?://([^"]+)"\\s*>([^<]+)</a>`, "gi");
|
||||
html = html.replace(autoLinkedAnchor, (_match, href: string, label: string) => {
|
||||
if (href !== label) {
|
||||
return _match;
|
||||
}
|
||||
if (!isAutoLinkedFileRef(`http://${href}`, label)) {
|
||||
const autoLinkedAnchor = /<a\s+href="https?:\/\/([^"]+)"[^>]*>\1<\/a>/gi;
|
||||
html = html.replace(autoLinkedAnchor, (_match, label: string) => {
|
||||
if (!isAutoLinkedFileRef(`http://${label}`, label)) {
|
||||
return _match;
|
||||
}
|
||||
return `<code>${label}</code>`;
|
||||
@@ -152,10 +149,10 @@ export function wrapFileReferencesInHtml(html: string): string {
|
||||
"gi",
|
||||
);
|
||||
|
||||
// Track if we're inside tags that should not be modified
|
||||
let inCode = false;
|
||||
let inPre = false;
|
||||
let inAnchor = false;
|
||||
// Track nesting depth for tags that should not be modified
|
||||
let codeDepth = 0;
|
||||
let preDepth = 0;
|
||||
let anchorDepth = 0;
|
||||
let result = "";
|
||||
let lastIndex = 0;
|
||||
|
||||
@@ -173,7 +170,7 @@ export function wrapFileReferencesInHtml(html: string): string {
|
||||
const textBefore = html.slice(lastIndex, tagStart);
|
||||
result += textBefore.replace(filePattern, (m, prefix, filename) => {
|
||||
// Skip if inside protected tags or if it's a URL
|
||||
if (inCode || inPre || inAnchor) {
|
||||
if (codeDepth > 0 || preDepth > 0 || anchorDepth > 0) {
|
||||
return m;
|
||||
}
|
||||
if (filename.startsWith("//")) {
|
||||
@@ -185,13 +182,13 @@ export function wrapFileReferencesInHtml(html: string): string {
|
||||
return `${prefix}<code>${filename}</code>`;
|
||||
});
|
||||
|
||||
// Update tag state
|
||||
// Update tag depth
|
||||
if (tagName === "code") {
|
||||
inCode = !isClosing;
|
||||
codeDepth += isClosing ? -1 : 1;
|
||||
} else if (tagName === "pre") {
|
||||
inPre = !isClosing;
|
||||
preDepth += isClosing ? -1 : 1;
|
||||
} else if (tagName === "a") {
|
||||
inAnchor = !isClosing;
|
||||
anchorDepth += isClosing ? -1 : 1;
|
||||
}
|
||||
|
||||
// Add the tag itself
|
||||
@@ -202,7 +199,7 @@ export function wrapFileReferencesInHtml(html: string): string {
|
||||
// Process remaining text
|
||||
const remainingText = html.slice(lastIndex);
|
||||
result += remainingText.replace(filePattern, (m, prefix, filename) => {
|
||||
if (inCode || inPre || inAnchor) {
|
||||
if (codeDepth > 0 || preDepth > 0 || anchorDepth > 0) {
|
||||
return m;
|
||||
}
|
||||
if (filename.startsWith("//")) {
|
||||
|
||||
@@ -250,4 +250,31 @@ describe("edge cases", () => {
|
||||
expect(result).toContain("<code>README.MD</code>");
|
||||
expect(result).toContain("<code>SCRIPT.PY</code>");
|
||||
});
|
||||
|
||||
it("handles nested code tags (depth tracking)", () => {
|
||||
// Nested <code> inside <pre> - should not wrap inner content
|
||||
const input = "<pre><code>README.md</code></pre> then script.py";
|
||||
const result = wrapFileReferencesInHtml(input);
|
||||
expect(result).toBe("<pre><code>README.md</code></pre> then <code>script.py</code>");
|
||||
});
|
||||
|
||||
it("handles multiple anchor tags in sequence", () => {
|
||||
const input =
|
||||
'<a href="https://a.com">link1</a> README.md <a href="https://b.com">link2</a> script.py';
|
||||
const result = wrapFileReferencesInHtml(input);
|
||||
expect(result).toContain("</a> <code>README.md</code> <a");
|
||||
expect(result).toContain("</a> <code>script.py</code>");
|
||||
});
|
||||
|
||||
it("handles auto-linked anchor with backreference match", () => {
|
||||
// The regex uses \1 backreference - href must equal label
|
||||
const input = '<a href="http://README.md">README.md</a>';
|
||||
expect(wrapFileReferencesInHtml(input)).toBe("<code>README.md</code>");
|
||||
});
|
||||
|
||||
it("preserves anchor when href and label differ (no backreference match)", () => {
|
||||
// Different href and label - should NOT de-linkify
|
||||
const input = '<a href="http://other.md">README.md</a>';
|
||||
expect(wrapFileReferencesInHtml(input)).toBe(input);
|
||||
});
|
||||
});
|
||||
|
||||
Reference in New Issue
Block a user