feat(filters): upgrade markdown-normalizer to v1.2.7
- Fix Issue #49: resolve greedy regex matching in consecutive emphasis - Add LaTeX formula protection to prevent corruption of \times, \nu, etc. - Expand i18n support to 12 languages with strict alignment - Fix NameError in Request import during testing
This commit is contained in:
128
tests/test_fix_emphasis_spacing.py
Normal file
128
tests/test_fix_emphasis_spacing.py
Normal file
@@ -0,0 +1,128 @@
|
||||
import unittest
|
||||
import sys
|
||||
import os
|
||||
|
||||
# Add the parent directory and plugin directory to sys.path
|
||||
current_dir = os.path.dirname(os.path.abspath(__file__))
|
||||
plugin_dir = os.path.abspath(
|
||||
os.path.join(current_dir, "..", "plugins", "filters", "markdown_normalizer")
|
||||
)
|
||||
sys.path.append(plugin_dir)
|
||||
|
||||
from markdown_normalizer import ContentNormalizer, NormalizerConfig
|
||||
|
||||
|
||||
class TestEmphasisSpacingFix(unittest.TestCase):
|
||||
def setUp(self):
|
||||
# Explicitly enable the priority and emphasis spacing fix
|
||||
self.config = NormalizerConfig(enable_emphasis_spacing_fix=True)
|
||||
self.normalizer = ContentNormalizer(self.config)
|
||||
|
||||
def test_user_reported_bug(self):
|
||||
"""
|
||||
Test case from user reported issue:
|
||||
'When there are e.g. 2 bold parts on a line of text, it treats the part between them as an ill-formatted bold part and removes spaces'
|
||||
"""
|
||||
input_text = "I **prefer** tea **to** coffee."
|
||||
# Before fix, it might become "I **prefer**tea**to** coffee."
|
||||
# Use a fresh normalizer to ensure state is clean
|
||||
result = self.normalizer.normalize(input_text)
|
||||
self.assertEqual(
|
||||
result,
|
||||
"I **prefer** tea **to** coffee.",
|
||||
"Spaces between bold parts should be preserved.",
|
||||
)
|
||||
|
||||
def test_triple_bold_parts(self):
|
||||
"""Verify handling of more than 2 bold parts on a single line"""
|
||||
input_text = "The **quick** brown **fox** jumps **over** the dog."
|
||||
result = self.normalizer.normalize(input_text)
|
||||
self.assertEqual(
|
||||
result, input_text, "Multiple bold parts on same line should not merge."
|
||||
)
|
||||
|
||||
def test_legitimate_spacing_fix(self):
|
||||
"""Verify it still fixes actual spacing issues"""
|
||||
test_cases = [
|
||||
("** text **", "**text**"),
|
||||
("**text **", "**text**"),
|
||||
("** text**", "**text**"),
|
||||
("__ bold __", "__bold__"),
|
||||
("* italic *", "*italic*"),
|
||||
("_ italic _", "_italic_"),
|
||||
("*** bolditalic ***", "***bolditalic***"),
|
||||
]
|
||||
for inp, expected in test_cases:
|
||||
with self.subTest(inp=inp):
|
||||
self.assertEqual(self.normalizer.normalize(inp), expected)
|
||||
|
||||
def test_nested_emphasis(self):
|
||||
"""Test recursive handling of nested emphasis (italic inside bold)"""
|
||||
# Note: ** _italic_ ** -> **_italic_**
|
||||
input_text = "** _italic _ **"
|
||||
expected = "**_italic_**"
|
||||
self.assertEqual(self.normalizer.normalize(input_text), expected)
|
||||
|
||||
# Complex nesting
|
||||
input_text_complex = "**bold and _ italic _ parts**"
|
||||
expected_complex = "**bold and _italic_ parts**"
|
||||
self.assertEqual(
|
||||
self.normalizer.normalize(input_text_complex), expected_complex
|
||||
)
|
||||
|
||||
def test_math_operator_protection(self):
|
||||
"""Verify that math operators are protected (e.g., ' 2 * 3 * 4 ')"""
|
||||
input_text = "Calculations: 2 * 3 * 4 = 24"
|
||||
# The spacing around * should be preserved because it's an operator
|
||||
result = self.normalizer.normalize(input_text)
|
||||
self.assertEqual(
|
||||
result,
|
||||
input_text,
|
||||
"Math operators (single '*' with spaces) should not be treated as emphasis.",
|
||||
)
|
||||
|
||||
def test_list_marker_protection(self):
|
||||
"""Verify that list markers are not merged with bold contents"""
|
||||
# * **bold**
|
||||
input_text = "* **bold**"
|
||||
result = self.normalizer.normalize(input_text)
|
||||
self.assertEqual(
|
||||
result,
|
||||
input_text,
|
||||
"List marker '*' should not be merged with subsequent bold marker.",
|
||||
)
|
||||
|
||||
def test_mixed_single_and_double_emphasis(self):
|
||||
"""Verify a mix of single and double emphasis on the same line"""
|
||||
input_text = "He is *very* **bold** today."
|
||||
result = self.normalizer.normalize(input_text)
|
||||
self.assertEqual(
|
||||
result,
|
||||
input_text,
|
||||
"Mixed emphasis styles should not interfere with each other.",
|
||||
)
|
||||
|
||||
def test_placeholder_protection(self):
|
||||
"""Verify that placeholders (multiple underscores) are protected"""
|
||||
input_text = "Fill in the blank: ____ and ____."
|
||||
result = self.normalizer.normalize(input_text)
|
||||
self.assertEqual(
|
||||
result, input_text, "Placeholders like '____' should not be modified."
|
||||
)
|
||||
|
||||
def test_regression_cross_block_greedy(self):
|
||||
"""Special check for the greedy regex scenario that caused the bug"""
|
||||
# User reported case
|
||||
input_text = "I **prefer** tea **to** coffee."
|
||||
result = self.normalizer.normalize(input_text)
|
||||
self.assertEqual(
|
||||
result, input_text, "User reported case should not have spaces removed."
|
||||
)
|
||||
|
||||
# Another variant with different symbols
|
||||
input_text2 = "Using __bold__ and __more bold__ here."
|
||||
self.assertEqual(self.normalizer.normalize(input_text2), input_text2)
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
unittest.main()
|
||||
Reference in New Issue
Block a user