- Fix Issue #49: resolve greedy regex matching in consecutive emphasis - Add LaTeX formula protection to prevent corruption of \times, \nu, etc. - Expand i18n support to 12 languages with strict alignment - Fix NameError in Request import during testing
129 lines
4.9 KiB
Python
129 lines
4.9 KiB
Python
import unittest
|
|
import sys
|
|
import os
|
|
|
|
# Add the parent directory and plugin directory to sys.path
|
|
current_dir = os.path.dirname(os.path.abspath(__file__))
|
|
plugin_dir = os.path.abspath(
|
|
os.path.join(current_dir, "..", "plugins", "filters", "markdown_normalizer")
|
|
)
|
|
sys.path.append(plugin_dir)
|
|
|
|
from markdown_normalizer import ContentNormalizer, NormalizerConfig
|
|
|
|
|
|
class TestEmphasisSpacingFix(unittest.TestCase):
|
|
def setUp(self):
|
|
# Explicitly enable the priority and emphasis spacing fix
|
|
self.config = NormalizerConfig(enable_emphasis_spacing_fix=True)
|
|
self.normalizer = ContentNormalizer(self.config)
|
|
|
|
def test_user_reported_bug(self):
|
|
"""
|
|
Test case from user reported issue:
|
|
'When there are e.g. 2 bold parts on a line of text, it treats the part between them as an ill-formatted bold part and removes spaces'
|
|
"""
|
|
input_text = "I **prefer** tea **to** coffee."
|
|
# Before fix, it might become "I **prefer**tea**to** coffee."
|
|
# Use a fresh normalizer to ensure state is clean
|
|
result = self.normalizer.normalize(input_text)
|
|
self.assertEqual(
|
|
result,
|
|
"I **prefer** tea **to** coffee.",
|
|
"Spaces between bold parts should be preserved.",
|
|
)
|
|
|
|
def test_triple_bold_parts(self):
|
|
"""Verify handling of more than 2 bold parts on a single line"""
|
|
input_text = "The **quick** brown **fox** jumps **over** the dog."
|
|
result = self.normalizer.normalize(input_text)
|
|
self.assertEqual(
|
|
result, input_text, "Multiple bold parts on same line should not merge."
|
|
)
|
|
|
|
def test_legitimate_spacing_fix(self):
|
|
"""Verify it still fixes actual spacing issues"""
|
|
test_cases = [
|
|
("** text **", "**text**"),
|
|
("**text **", "**text**"),
|
|
("** text**", "**text**"),
|
|
("__ bold __", "__bold__"),
|
|
("* italic *", "*italic*"),
|
|
("_ italic _", "_italic_"),
|
|
("*** bolditalic ***", "***bolditalic***"),
|
|
]
|
|
for inp, expected in test_cases:
|
|
with self.subTest(inp=inp):
|
|
self.assertEqual(self.normalizer.normalize(inp), expected)
|
|
|
|
def test_nested_emphasis(self):
|
|
"""Test recursive handling of nested emphasis (italic inside bold)"""
|
|
# Note: ** _italic_ ** -> **_italic_**
|
|
input_text = "** _italic _ **"
|
|
expected = "**_italic_**"
|
|
self.assertEqual(self.normalizer.normalize(input_text), expected)
|
|
|
|
# Complex nesting
|
|
input_text_complex = "**bold and _ italic _ parts**"
|
|
expected_complex = "**bold and _italic_ parts**"
|
|
self.assertEqual(
|
|
self.normalizer.normalize(input_text_complex), expected_complex
|
|
)
|
|
|
|
def test_math_operator_protection(self):
|
|
"""Verify that math operators are protected (e.g., ' 2 * 3 * 4 ')"""
|
|
input_text = "Calculations: 2 * 3 * 4 = 24"
|
|
# The spacing around * should be preserved because it's an operator
|
|
result = self.normalizer.normalize(input_text)
|
|
self.assertEqual(
|
|
result,
|
|
input_text,
|
|
"Math operators (single '*' with spaces) should not be treated as emphasis.",
|
|
)
|
|
|
|
def test_list_marker_protection(self):
|
|
"""Verify that list markers are not merged with bold contents"""
|
|
# * **bold**
|
|
input_text = "* **bold**"
|
|
result = self.normalizer.normalize(input_text)
|
|
self.assertEqual(
|
|
result,
|
|
input_text,
|
|
"List marker '*' should not be merged with subsequent bold marker.",
|
|
)
|
|
|
|
def test_mixed_single_and_double_emphasis(self):
|
|
"""Verify a mix of single and double emphasis on the same line"""
|
|
input_text = "He is *very* **bold** today."
|
|
result = self.normalizer.normalize(input_text)
|
|
self.assertEqual(
|
|
result,
|
|
input_text,
|
|
"Mixed emphasis styles should not interfere with each other.",
|
|
)
|
|
|
|
def test_placeholder_protection(self):
|
|
"""Verify that placeholders (multiple underscores) are protected"""
|
|
input_text = "Fill in the blank: ____ and ____."
|
|
result = self.normalizer.normalize(input_text)
|
|
self.assertEqual(
|
|
result, input_text, "Placeholders like '____' should not be modified."
|
|
)
|
|
|
|
def test_regression_cross_block_greedy(self):
|
|
"""Special check for the greedy regex scenario that caused the bug"""
|
|
# User reported case
|
|
input_text = "I **prefer** tea **to** coffee."
|
|
result = self.normalizer.normalize(input_text)
|
|
self.assertEqual(
|
|
result, input_text, "User reported case should not have spaces removed."
|
|
)
|
|
|
|
# Another variant with different symbols
|
|
input_text2 = "Using __bold__ and __more bold__ here."
|
|
self.assertEqual(self.normalizer.normalize(input_text2), input_text2)
|
|
|
|
|
|
if __name__ == "__main__":
|
|
unittest.main()
|