Fu-Jie_openwebui-extensions/tests/test_fix_emphasis_spacing.py

import unittest
import sys
import os

# Add the parent directory and plugin directory to sys.path
current_dir = os.path.dirname(os.path.abspath(__file__))
plugin_dir = os.path.abspath(
    os.path.join(current_dir, "..", "plugins", "filters", "markdown_normalizer")
)
sys.path.append(plugin_dir)

from markdown_normalizer import ContentNormalizer, NormalizerConfig


class TestEmphasisSpacingFix(unittest.TestCase):
    def setUp(self):
        # Explicitly enable the priority and emphasis spacing fix
        self.config = NormalizerConfig(enable_emphasis_spacing_fix=True)
        self.normalizer = ContentNormalizer(self.config)

    def test_user_reported_bug(self):
        """
        Test case from user reported issue:
        'When there are e.g. 2 bold parts on a line of text, it treats the part between them as an ill-formatted bold part and removes spaces'
        """
        input_text = "I **prefer** tea **to** coffee."
        # Before fix, it might become "I **prefer**tea**to** coffee."
        # Use a fresh normalizer to ensure state is clean
        result = self.normalizer.normalize(input_text)
        self.assertEqual(
            result,
            "I **prefer** tea **to** coffee.",
            "Spaces between bold parts should be preserved.",
        )

    def test_triple_bold_parts(self):
        """Verify handling of more than 2 bold parts on a single line"""
        input_text = "The **quick** brown **fox** jumps **over** the dog."
        result = self.normalizer.normalize(input_text)
        self.assertEqual(
            result, input_text, "Multiple bold parts on same line should not merge."
        )

    def test_legitimate_spacing_fix(self):
        """Verify it still fixes actual spacing issues"""
        test_cases = [
            ("** text **", "**text**"),
            ("**text **", "**text**"),
            ("** text**", "**text**"),
            ("__ bold __", "__bold__"),
            ("* italic *", "*italic*"),
            ("_ italic _", "_italic_"),
            ("*** bolditalic ***", "***bolditalic***"),
        ]
        for inp, expected in test_cases:
            with self.subTest(inp=inp):
                self.assertEqual(self.normalizer.normalize(inp), expected)

    def test_nested_emphasis(self):
        """Test recursive handling of nested emphasis (italic inside bold)"""
        # Note: ** _italic_ ** -> **_italic_**
        input_text = "** _italic _ **"
        expected = "**_italic_**"
        self.assertEqual(self.normalizer.normalize(input_text), expected)

        # Complex nesting
        input_text_complex = "**bold and _ italic _ parts**"
        expected_complex = "**bold and _italic_ parts**"
        self.assertEqual(
            self.normalizer.normalize(input_text_complex), expected_complex
        )

    def test_math_operator_protection(self):
        """Verify that math operators are protected (e.g., ' 2 * 3 * 4 ')"""
        input_text = "Calculations: 2 * 3 * 4 = 24"
        # The spacing around * should be preserved because it's an operator
        result = self.normalizer.normalize(input_text)
        self.assertEqual(
            result,
            input_text,
            "Math operators (single '*' with spaces) should not be treated as emphasis.",
        )

    def test_list_marker_protection(self):
        """Verify that list markers are not merged with bold contents"""
        # *   **bold**
        input_text = "*   **bold**"
        result = self.normalizer.normalize(input_text)
        self.assertEqual(
            result,
            input_text,
            "List marker '*' should not be merged with subsequent bold marker.",
        )

    def test_mixed_single_and_double_emphasis(self):
        """Verify a mix of single and double emphasis on the same line"""
        input_text = "He is *very* **bold** today."
        result = self.normalizer.normalize(input_text)
        self.assertEqual(
            result,
            input_text,
            "Mixed emphasis styles should not interfere with each other.",
        )

    def test_placeholder_protection(self):
        """Verify that placeholders (multiple underscores) are protected"""
        input_text = "Fill in the blank: ____ and ____."
        result = self.normalizer.normalize(input_text)
        self.assertEqual(
            result, input_text, "Placeholders like '____' should not be modified."
        )

    def test_regression_cross_block_greedy(self):
        """Special check for the greedy regex scenario that caused the bug"""
        # User reported case
        input_text = "I **prefer** tea **to** coffee."
        result = self.normalizer.normalize(input_text)
        self.assertEqual(
            result, input_text, "User reported case should not have spaces removed."
        )

        # Another variant with different symbols
        input_text2 = "Using __bold__ and __more bold__ here."
        self.assertEqual(self.normalizer.normalize(input_text2), input_text2)


if __name__ == "__main__":
    unittest.main()