fix(markdown_normalizer): adopt safe-by-default strategy for escaping

- Set 'enable_escape_fix' to False by default to prevent accidental corruption
- Improve LaTeX display math identification using regex protection
- Update documentation to reflect opt-in recommendation for escape fixes
- Fix Issue #57 remaining aggressive escaping bugs
This commit is contained in:
fujie
2026-03-09 01:05:13 +08:00
parent 9bf31488ae
commit 2eee7c5d35
9 changed files with 230 additions and 39 deletions

View File

@@ -0,0 +1,22 @@
from plugins.filters.markdown_normalizer.markdown_normalizer import ContentNormalizer, NormalizerConfig
def test_latex_display_math_protection():
"""Verify that $$\nabla$$ is NOT broken by escape fix."""
config = NormalizerConfig(enable_escape_fix=True)
norm = ContentNormalizer(config)
# Input has literal backslash + n (represented as \\n in python code)
# Total input: $$ \ n a b l a $$
text = r"$$\nabla$$"
res = norm.normalize(text)
# It should NOT change literal \n to a newline inside $$
assert "\n" not in res, f"LaTeX display math was corrupted with a real newline: {repr(res)}"
assert res == text, f"Expected {repr(text)}, got {repr(res)}"
if __name__ == "__main__":
try:
test_latex_display_math_protection()
print("✅ LaTeX protection test passed.")
except AssertionError as e:
print(f"❌ LaTeX protection test FAILED: {e}")