feat(filters): upgrade markdown-normalizer to v1.2.7

- Fix Issue #49: resolve greedy regex matching in consecutive emphasis - Add LaTeX formula protection to prevent corruption of \times, \nu, etc. - Expand i18n support to 12 languages with strict alignment - Fix NameError in Request import during testing
2026-02-24 15:05:25 +08:00
parent 18ada2a177
commit 2da934dd92
16 changed files with 981 additions and 1009 deletions
--- a/plugins/debug/reproduce_issue_49_v2.py
+++ b/plugins/debug/reproduce_issue_49_v2.py
@@ -0,0 +1,28 @@
+import re
+
+def reproduce_bug_v2():
+    # 模拟更接近旧版实际代码的情况
+    # 旧版代码中循环多次处理，且正则可能在处理嵌套或连续块时出现偏移
+    text = "I **prefer** tea **to** coffee."
+    
+    # 这是一个贪婪且不具备前瞻断言的正则
+    buggy_pattern = re.compile(r"(\*\*)( +)(.*?)( +)(\*\*)")
+
+    # 模拟那种“只要看到 ** 且中间有空格就想修”的逻辑
+    # 如果文本是 "I **prefer** tea **to**"
+    # 这里的空格出现在 "prefer**" 和 "**to" 之间
+    content = "I **prefer**  tea  **to** coffee."
+    
+    # 错误的匹配尝试：将第一个块的结尾和第二个块的开头误认为是一对
+    # I **prefer**  tea  **to**
+    #          ^^      ^^ 
+    #          A       B
+    # 正则误以为 A 是开始，B 是结束
+    
+    bug_result = re.sub(r"\*\*( +)(.*?)( +)\*\*", r"**\2**", content)
+    
+    print(f"Input:  {content}")
+    print(f"Output: {bug_result}")
+
+if __name__ == "__main__":
+    reproduce_bug_v2()