From f304eb7633abb85f3098535fe2cd8a41a2f9ddbb Mon Sep 17 00:00:00 2001
From: fujie <fj1945@live.cn>
Date: Sun, 18 Jan 2026 01:14:17 +0800
Subject: [PATCH] feat(markdown-normalizer): release v1.2.3 with bug fixes and
 test suite

---
 docs/plugins/filters/index.md                 |   2 +-
 docs/plugins/filters/index.zh.md              |   2 +-
 docs/plugins/filters/markdown_normalizer.md   |  10 +-
 .../plugins/filters/markdown_normalizer.zh.md |  10 +-
 plugins/filters/markdown_normalizer/README.md |  10 +-
 .../filters/markdown_normalizer/README_CN.md  |  10 +-
 .../markdown_normalizer.py                    |  21 ++-
 .../markdown_normalizer_cn.py                 |  21 ++-
 .../markdown_normalizer/tests/__init__.py     |   1 +
 .../markdown_normalizer/tests/conftest.py     |  75 ++++++++++
 .../tests/test_code_blocks.py                 |  54 +++++++
 .../tests/test_details_tags.py                |  48 ++++++
 .../tests/test_emphasis_spacing.py            | 138 ++++++++++++++++++
 .../tests/test_headings_tables.py             |  51 +++++++
 pytest.ini                                    |   6 +
 15 files changed, 447 insertions(+), 12 deletions(-)
 create mode 100644 plugins/filters/markdown_normalizer/tests/__init__.py
 create mode 100644 plugins/filters/markdown_normalizer/tests/conftest.py
 create mode 100644 plugins/filters/markdown_normalizer/tests/test_code_blocks.py
 create mode 100644 plugins/filters/markdown_normalizer/tests/test_details_tags.py
 create mode 100644 plugins/filters/markdown_normalizer/tests/test_emphasis_spacing.py
 create mode 100644 plugins/filters/markdown_normalizer/tests/test_headings_tables.py
 create mode 100644 pytest.ini

diff --git a/docs/plugins/filters/index.md b/docs/plugins/filters/index.md
index c48c6a7..a538ffc 100644
--- a/docs/plugins/filters/index.md
+++ b/docs/plugins/filters/index.md
@@ -44,7 +44,7 @@ Filters act as middleware in the message pipeline:
 
     Fixes common Markdown formatting issues in LLM outputs, including Mermaid syntax, code blocks, and LaTeX formulas.
 
-    **Version:** 1.1.2
+    **Version:** 1.2.3
 
     [:octicons-arrow-right-24: Documentation](markdown_normalizer.md)
 
diff --git a/docs/plugins/filters/index.zh.md b/docs/plugins/filters/index.zh.md
index 9bb7f2c..b52e167 100644
--- a/docs/plugins/filters/index.zh.md
+++ b/docs/plugins/filters/index.zh.md
@@ -44,7 +44,7 @@ Filter 充当消息管线中的中间件：
 
     修复 LLM 输出中常见的 Markdown 格式问题，包括 Mermaid 语法、代码块和 LaTeX 公式。
 
-    **版本：** 1.0.1
+    **版本：** 1.2.3
 
     [:octicons-arrow-right-24: 查看文档](markdown_normalizer.zh.md)
 
diff --git a/docs/plugins/filters/markdown_normalizer.md b/docs/plugins/filters/markdown_normalizer.md
index 4724c3e..f2ba281 100644
--- a/docs/plugins/filters/markdown_normalizer.md
+++ b/docs/plugins/filters/markdown_normalizer.md
@@ -51,9 +51,17 @@ A content normalizer filter for Open WebUI that fixes common Markdown formatting
 
 ## Changelog
 
+### v1.2.3
+
+* **List Marker Protection Enhancement**: Fixed a bug where list markers (`*`) followed by plain text and emphasis were having their spaces incorrectly stripped (e.g., `*   U16 forward` became `*U16 forward`).
+* **Placeholder Support**: Confirmed that 4 or more underscores (e.g., `____`) are correctly treated as placeholders and not modified by the emphasis fix.
+
 ### v1.2.2
 
-* **Version Bump**: Documentation and metadata updated for the latest release.
+* **Code Block Indentation Fix**: Fixed an issue where code blocks nested inside lists were having their indentation incorrectly stripped. Now preserves proper indentation for nested code blocks.
+* **Underscore Emphasis Support**: Extended emphasis spacing fix to support `__` (double underscore for bold) and `___` (triple underscore for bold+italic) syntax.
+* **List Marker Protection**: Fixed a bug where list markers (`*`) followed by emphasis markers (`**`) were incorrectly merged (e.g., `*   **Yes**` became `***Yes**`). Added safeguard to prevent this.
+* **Test Suite**: Added comprehensive pytest test suite with 56 test cases covering all major features.
 
 ### v1.2.1
 
diff --git a/docs/plugins/filters/markdown_normalizer.zh.md b/docs/plugins/filters/markdown_normalizer.zh.md
index cb5b281..51c913d 100644
--- a/docs/plugins/filters/markdown_normalizer.zh.md
+++ b/docs/plugins/filters/markdown_normalizer.zh.md
@@ -51,9 +51,17 @@
 
 ## 更新日志
 
+### v1.2.3
+
+* **列表标记保护增强**: 修复了列表标记 (`*`) 后跟普通文本和强调标记时，空格被错误剥离的问题（例如 `*   U16 前锋` 变成 `*U16 前锋`）。
+* **占位符支持**: 确认 4 个或更多下划线（如 `____`）会被正确视为占位符，不会被强调修复逻辑修改。
+
 ### v1.2.2
 
-* **版本更新**: 文档与元数据已同步到最新版本。
+* **代码块缩进修复**: 修复了列表中嵌套代码块的缩进被错误剥离的问题。现在会正确保留嵌套代码块的缩进。
+* **下划线强调语法支持**: 扩展强调空格修复以支持 `__` (双下划线加粗) 和 `___` (三下划线加粗斜体) 语法。
+* **列表标记保护**: 修复了列表标记 (`*`) 后跟强调标记 (`**`) 被错误合并的 Bug（例如 `*   **是**` 变成 `***是**`）。添加了保护逻辑防止此问题。
+* **测试套件**: 新增完整的 pytest 测试套件，包含 56 个测试用例，覆盖所有主要功能。
 
 ### v1.2.1
 
diff --git a/plugins/filters/markdown_normalizer/README.md b/plugins/filters/markdown_normalizer/README.md
index 584d108..273bcb6 100644
--- a/plugins/filters/markdown_normalizer/README.md
+++ b/plugins/filters/markdown_normalizer/README.md
@@ -53,9 +53,17 @@ A content normalizer filter for Open WebUI that fixes common Markdown formatting
 
 ## Changelog
 
+### v1.2.3
+
+* **List Marker Protection Enhancement**: Fixed a bug where list markers (`*`) followed by plain text and emphasis were having their spaces incorrectly stripped (e.g., `*   U16 forward` became `*U16 forward`).
+* **Placeholder Support**: Confirmed that 4 or more underscores (e.g., `____`) are correctly treated as placeholders and not modified by the emphasis fix.
+
 ### v1.2.2
 
-* **Version Bump**: Documentation and metadata updated for the latest release.
+* **Code Block Indentation Fix**: Fixed an issue where code blocks nested inside lists were having their indentation incorrectly stripped. Now preserves proper indentation for nested code blocks.
+* **Underscore Emphasis Support**: Extended emphasis spacing fix to support `__` (double underscore for bold) and `___` (triple underscore for bold+italic) syntax.
+* **List Marker Protection**: Fixed a bug where list markers (`*`) followed by emphasis markers (`**`) were incorrectly merged (e.g., `*   **Yes**` became `***Yes**`). Added safeguard to prevent this.
+* **Test Suite**: Added comprehensive pytest test suite with 56 test cases covering all major features.
 
 ### v1.2.1
 
diff --git a/plugins/filters/markdown_normalizer/README_CN.md b/plugins/filters/markdown_normalizer/README_CN.md
index 606074d..ae2dd50 100644
--- a/plugins/filters/markdown_normalizer/README_CN.md
+++ b/plugins/filters/markdown_normalizer/README_CN.md
@@ -53,9 +53,17 @@
 
 ## 更新日志
 
+### v1.2.3
+
+* **列表标记保护增强**: 修复了列表标记 (`*`) 后跟普通文本和强调标记时，空格被错误剥离的问题（例如 `*   U16 前锋` 变成 `*U16 前锋`）。
+* **占位符支持**: 确认 4 个或更多下划线（如 `____`）会被正确视为占位符，不会被强调修复逻辑修改。
+
 ### v1.2.2
 
-* **版本更新**: 文档与元数据已同步到最新版本。
+* **代码块缩进修复**: 修复了列表中嵌套代码块的缩进被错误剥离的问题。现在会正确保留嵌套代码块的缩进。
+* **下划线强调语法支持**: 扩展强调空格修复以支持 `__` (双下划线加粗) 和 `___` (三下划线加粗斜体) 语法。
+* **列表标记保护**: 修复了列表标记 (`*`) 后跟强调标记 (`**`) 被错误合并的 Bug（例如 `*   **是**` 变成 `***是**`）。添加了保护逻辑防止此问题。
+* **测试套件**: 新增完整的 pytest 测试套件，包含 56 个测试用例，覆盖所有主要功能。
 
 ### v1.2.1
 
diff --git a/plugins/filters/markdown_normalizer/markdown_normalizer.py b/plugins/filters/markdown_normalizer/markdown_normalizer.py
index 7b1b249..d80a597 100644
--- a/plugins/filters/markdown_normalizer/markdown_normalizer.py
+++ b/plugins/filters/markdown_normalizer/markdown_normalizer.py
@@ -3,7 +3,7 @@ title: Markdown Normalizer
 author: Fu-Jie
 author_url: https://github.com/Fu-Jie/awesome-openwebui
 funding_url: https://github.com/open-webui
-version: 1.2.2
+version: 1.2.3
 openwebui_id: baaa8732-9348-40b7-8359-7e009660e23c
 description: A content normalizer filter that fixes common Markdown formatting issues in LLM outputs, such as broken code blocks, LaTeX formulas, and list formatting.
 """
@@ -109,12 +109,13 @@ class ContentNormalizer:
         "heading_space": re.compile(r"^(#+)([^ \n#])", re.MULTILINE),
         # Table: | col1 | col2 -> | col1 | col2 |
         "table_pipe": re.compile(r"^(\|.*[^|\n])$", re.MULTILINE),
-        # Emphasis spacing: ** text ** -> **text**
+        # Emphasis spacing: ** text ** -> **text**, __ text __ -> __text__
         # Matches emphasis blocks within a single line. We use a recursive approach
         # in _fix_emphasis_spacing to handle nesting and spaces correctly.
         # NOTE: We use [^\n] instead of . to prevent cross-line matching.
+        # Supports: * (italic), ** (bold), *** (bold+italic), _ (italic), __ (bold), ___ (bold+italic)
         "emphasis_spacing": re.compile(
-            r"(?<!\*|_)(\*{1,3}|_)(?P<inner>[^\n]*?)(\1)(?!\*|_)"
+            r"(?<!\*|_)(\*{1,3}|_{1,3})(?P<inner>[^\n]*?)(\1)(?!\*|_)"
         ),
     }
 
@@ -485,6 +486,20 @@ class ContentNormalizer:
                 if symbol in ["*", "_"]:
                     return match.group(0)
 
+            # Safeguard: List marker protection
+            # If symbol is single '*' and inner content starts with whitespace followed by emphasis markers,
+            # this is likely a list item like "*   **bold**" - don't merge them.
+            # Pattern: "*   **text**" should NOT become "***text**"
+            if symbol == "*" and inner.lstrip().startswith(("*", "_")):
+                return match.group(0)
+
+            # Extended list marker protection:
+            # If symbol is single '*' and inner starts with multiple spaces (list indentation pattern),
+            # this is likely a list item like "*   text" - don't strip the spaces.
+            # Pattern: "*   U16 forward **Kuang**" should NOT become "*U16 forward **Kuang**"
+            if symbol == "*" and inner.startswith("   "):
+                return match.group(0)
+
             return f"{symbol}{stripped_inner}{symbol}"
 
         parts = content.split("```")
diff --git a/plugins/filters/markdown_normalizer/markdown_normalizer_cn.py b/plugins/filters/markdown_normalizer/markdown_normalizer_cn.py
index 4a944db..8d825bb 100644
--- a/plugins/filters/markdown_normalizer/markdown_normalizer_cn.py
+++ b/plugins/filters/markdown_normalizer/markdown_normalizer_cn.py
@@ -3,7 +3,7 @@ title: Markdown 格式修复器 (Markdown Normalizer)
 author: Fu-Jie
 author_url: https://github.com/Fu-Jie/awesome-openwebui
 funding_url: https://github.com/open-webui
-version: 1.2.2
+version: 1.2.3
 description: 内容规范化过滤器，修复 LLM 输出中常见的 Markdown 格式问题，如损坏的代码块、LaTeX 公式、Mermaid 图表和列表格式。
 """
 
@@ -101,12 +101,13 @@ class ContentNormalizer:
         "heading_space": re.compile(r"^(#+)([^ \n#])", re.MULTILINE),
         # Table: | col1 | col2 -> | col1 | col2 |
         "table_pipe": re.compile(r"^(\|.*[^|\n])$", re.MULTILINE),
-        # Emphasis spacing: ** text ** -> **text**
+        # Emphasis spacing: ** text ** -> **text**, __ text __ -> __text__
         # Matches emphasis blocks within a single line. We use a recursive approach
         # in _fix_emphasis_spacing to handle nesting and spaces correctly.
         # NOTE: We use [^\n] instead of . to prevent cross-line matching.
+        # Supports: * (italic), ** (bold), *** (bold+italic), _ (italic), __ (bold), ___ (bold+italic)
         "emphasis_spacing": re.compile(
-            r"(?<!\*|_)(\*{1,3}|_)(?P<inner>[^\n]*?)(\1)(?!\*|_)"
+            r"(?<!\*|_)(\*{1,3}|_{1,3})(?P<inner>[^\n]*?)(\1)(?!\*|_)"
         ),
     }
 
@@ -464,6 +465,20 @@ class ContentNormalizer:
                 if symbol in ["*", "_"]:
                     return match.group(0)
 
+            # Safeguard: List marker protection
+            # If symbol is single '*' and inner content starts with whitespace followed by emphasis markers,
+            # this is likely a list item like "*   **bold**" - don't merge them.
+            # Pattern: "*   **text**" should NOT become "***text**"
+            if symbol == "*" and inner.lstrip().startswith(("*", "_")):
+                return match.group(0)
+
+            # Extended list marker protection:
+            # If symbol is single '*' and inner starts with multiple spaces (list indentation pattern),
+            # this is likely a list item like "*   text" - don't strip the spaces.
+            # Pattern: "*   U16 forward **Kuang**" should NOT become "*U16 forward **Kuang**"
+            if symbol == "*" and inner.startswith("   "):
+                return match.group(0)
+
             return f"{symbol}{stripped_inner}{symbol}"
 
         parts = content.split("```")
diff --git a/plugins/filters/markdown_normalizer/tests/__init__.py b/plugins/filters/markdown_normalizer/tests/__init__.py
new file mode 100644
index 0000000..8bb80c2
--- /dev/null
+++ b/plugins/filters/markdown_normalizer/tests/__init__.py
@@ -0,0 +1 @@
+# Markdown Normalizer Test Suite
diff --git a/plugins/filters/markdown_normalizer/tests/conftest.py b/plugins/filters/markdown_normalizer/tests/conftest.py
new file mode 100644
index 0000000..3ace6e2
--- /dev/null
+++ b/plugins/filters/markdown_normalizer/tests/conftest.py
@@ -0,0 +1,75 @@
+"""
+Shared fixtures for Markdown Normalizer tests.
+"""
+
+import pytest
+import sys
+import os
+
+# Add the parent directory to sys.path for imports
+sys.path.insert(0, os.path.dirname(os.path.dirname(os.path.abspath(__file__))))
+
+from markdown_normalizer import ContentNormalizer, NormalizerConfig
+
+
+@pytest.fixture
+def normalizer():
+    """Default normalizer with all fixes enabled."""
+    config = NormalizerConfig(
+        enable_escape_fix=True,
+        enable_thought_tag_fix=True,
+        enable_details_tag_fix=True,
+        enable_code_block_fix=True,
+        enable_latex_fix=True,
+        enable_list_fix=False,  # Experimental, keep off by default
+        enable_unclosed_block_fix=True,
+        enable_fullwidth_symbol_fix=False,
+        enable_mermaid_fix=True,
+        enable_heading_fix=True,
+        enable_table_fix=True,
+        enable_xml_tag_cleanup=True,
+        enable_emphasis_spacing_fix=True,
+    )
+    return ContentNormalizer(config)
+
+
+@pytest.fixture
+def emphasis_only_normalizer():
+    """Normalizer with only emphasis spacing fix enabled."""
+    config = NormalizerConfig(
+        enable_escape_fix=False,
+        enable_thought_tag_fix=False,
+        enable_details_tag_fix=False,
+        enable_code_block_fix=False,
+        enable_latex_fix=False,
+        enable_list_fix=False,
+        enable_unclosed_block_fix=False,
+        enable_fullwidth_symbol_fix=False,
+        enable_mermaid_fix=False,
+        enable_heading_fix=False,
+        enable_table_fix=False,
+        enable_xml_tag_cleanup=False,
+        enable_emphasis_spacing_fix=True,
+    )
+    return ContentNormalizer(config)
+
+
+@pytest.fixture
+def mermaid_only_normalizer():
+    """Normalizer with only Mermaid fix enabled."""
+    config = NormalizerConfig(
+        enable_escape_fix=False,
+        enable_thought_tag_fix=False,
+        enable_details_tag_fix=False,
+        enable_code_block_fix=False,
+        enable_latex_fix=False,
+        enable_list_fix=False,
+        enable_unclosed_block_fix=False,
+        enable_fullwidth_symbol_fix=False,
+        enable_mermaid_fix=True,
+        enable_heading_fix=False,
+        enable_table_fix=False,
+        enable_xml_tag_cleanup=False,
+        enable_emphasis_spacing_fix=False,
+    )
+    return ContentNormalizer(config)
diff --git a/plugins/filters/markdown_normalizer/tests/test_code_blocks.py b/plugins/filters/markdown_normalizer/tests/test_code_blocks.py
new file mode 100644
index 0000000..5307968
--- /dev/null
+++ b/plugins/filters/markdown_normalizer/tests/test_code_blocks.py
@@ -0,0 +1,54 @@
+"""
+Tests for code block formatting fixes.
+Covers: prefix, suffix, indentation preservation.
+"""
+
+import pytest
+
+
+class TestCodeBlockFix:
+    """Test code block formatting normalization."""
+
+    def test_code_block_indentation_preserved(self, normalizer):
+        """Indented code blocks (e.g., in lists) should preserve indentation."""
+        input_str = """
+*   List item 1
+    ```python
+    def foo():
+        print("bar")
+    ```
+*   List item 2
+"""
+        # Indentation should be preserved
+        assert "    ```python" in normalizer.normalize(input_str)
+
+    def test_inline_code_block_prefix(self, normalizer):
+        """Code block that follows text on same line should be modified."""
+        input_str = "text```python\ncode\n```"
+        result = normalizer.normalize(input_str)
+        # Just verify the code block markers are present
+        assert "```" in result
+
+    def test_code_block_suffix_fix(self, normalizer):
+        """Code block with content on same line after lang should be fixed."""
+        input_str = "```python   code\nmore code\n```"
+        result = normalizer.normalize(input_str)
+        # Content should be on new line
+        assert "```python\n" in result or "```python  " in result
+
+
+class TestUnclosedCodeBlock:
+    """Test auto-closing of unclosed code blocks."""
+
+    def test_unclosed_code_block_is_closed(self, normalizer):
+        """Unclosed code blocks should be automatically closed."""
+        input_str = "```python\ncode here"
+        result = normalizer.normalize(input_str)
+        # Should have closing ```
+        assert result.endswith("```") or result.count("```") == 2
+
+    def test_balanced_code_blocks_unchanged(self, normalizer):
+        """Already balanced code blocks should not get extra closing."""
+        input_str = "```python\ncode\n```"
+        result = normalizer.normalize(input_str)
+        assert result.count("```") == 2
diff --git a/plugins/filters/markdown_normalizer/tests/test_details_tags.py b/plugins/filters/markdown_normalizer/tests/test_details_tags.py
new file mode 100644
index 0000000..ad362f6
--- /dev/null
+++ b/plugins/filters/markdown_normalizer/tests/test_details_tags.py
@@ -0,0 +1,48 @@
+"""
+Tests for details tag normalization.
+Covers: </details> spacing, self-closing tags.
+"""
+
+import pytest
+
+
+class TestDetailsTagFix:
+    """Test details tag normalization."""
+
+    def test_details_end_gets_newlines(self, normalizer):
+        """</details> should be followed by double newline."""
+        input_str = "</details>Content after"
+        result = normalizer.normalize(input_str)
+        assert "</details>\n\n" in result
+
+    def test_self_closing_details_gets_newline(self, normalizer):
+        """Self-closing <details .../> should get newline after."""
+        input_str = "<details open />## Heading"
+        result = normalizer.normalize(input_str)
+        # Should have newline between tag and heading
+        assert "/>\n" in result or "/> \n" in result
+
+    def test_details_in_code_block_unchanged(self, normalizer):
+        """Details tags inside code blocks should not be modified."""
+        input_str = "```html\n<details>content</details>more\n```"
+        result = normalizer.normalize(input_str)
+        # Content inside code block should be unchanged
+        assert "</details>more" in result
+
+
+class TestThoughtTagFix:
+    """Test thought tag normalization."""
+
+    def test_think_tag_normalized(self, normalizer):
+        """<think> should be normalized to <thought>."""
+        input_str = "<think>content</think>"
+        result = normalizer.normalize(input_str)
+        assert "<thought>" in result
+        assert "</thought>" in result
+
+    def test_thinking_tag_normalized(self, normalizer):
+        """<thinking> should be normalized to <thought>."""
+        input_str = "<thinking>content</thinking>"
+        result = normalizer.normalize(input_str)
+        assert "<thought>" in result
+        assert "</thought>" in result
diff --git a/plugins/filters/markdown_normalizer/tests/test_emphasis_spacing.py b/plugins/filters/markdown_normalizer/tests/test_emphasis_spacing.py
new file mode 100644
index 0000000..8e9ad5f
--- /dev/null
+++ b/plugins/filters/markdown_normalizer/tests/test_emphasis_spacing.py
@@ -0,0 +1,138 @@
+"""
+Tests for emphasis spacing fix.
+Covers: *, **, ***, _, __, ___ with spaces inside.
+"""
+
+import pytest
+
+
+class TestEmphasisSpacingFix:
+    """Test emphasis spacing normalization."""
+
+    @pytest.mark.parametrize(
+        "input_str,expected",
+        [
+            # Double asterisk (bold)
+            ("** bold **", "**bold**"),
+            ("** bold text **", "**bold text**"),
+            ("**text **", "**text**"),
+            ("** text**", "**text**"),
+            # Triple asterisk (bold+italic)
+            ("*** bold italic ***", "***bold italic***"),
+            # Double underscore (bold)
+            ("__ bold __", "__bold__"),
+            ("__ bold text __", "__bold text__"),
+            ("__text __", "__text__"),
+            ("__ text__", "__text__"),
+            # Triple underscore (bold+italic)
+            ("___ bold italic ___", "___bold italic___"),
+            # Mixed markers
+            ("** bold ** and __ also __", "**bold** and __also__"),
+        ],
+    )
+    def test_emphasis_with_spaces_fixed(
+        self, emphasis_only_normalizer, input_str, expected
+    ):
+        """Test that emphasis with spaces is correctly fixed."""
+        assert emphasis_only_normalizer.normalize(input_str) == expected
+
+    @pytest.mark.parametrize(
+        "input_str",
+        [
+            # Single * and _ with spaces on both sides - treated as operator (safeguard)
+            "* italic *",
+            "_ italic _",
+            # Already correct emphasis
+            "**bold**",
+            "__bold__",
+            "*italic*",
+            "_italic_",
+            "***bold italic***",
+            "___bold italic___",
+        ],
+    )
+    def test_safeguard_and_correct_emphasis_unchanged(
+        self, emphasis_only_normalizer, input_str
+    ):
+        """Test that safeguard cases and already correct emphasis are not modified."""
+        assert emphasis_only_normalizer.normalize(input_str) == input_str
+
+
+class TestEmphasisSideEffects:
+    """Test that emphasis fix does NOT affect unrelated content."""
+
+    @pytest.mark.parametrize(
+        "input_str,description",
+        [
+            # URLs with underscores
+            ("https://example.com/path_with_underscore", "URL"),
+            ("Visit https://api.example.com/get_user_info for info", "URL in text"),
+            # Variable names (snake_case)
+            ("The `my_variable_name` is important", "Variable in backticks"),
+            ("Use `get_user_data()` function", "Function name"),
+            # File names
+            ("Edit the `config_file_name.py` file", "File name"),
+            ("See `my_script__v2.py` for details", "Double underscore in filename"),
+            # Math-like subscripts
+            ("The variable a_1 and b_2 are defined", "Math subscripts"),
+            # Single underscores not matching emphasis pattern
+            ("word_with_underscore", "Underscore in word"),
+            ("a_b_c_d", "Multiple underscores"),
+            # Horizontal rules
+            ("---", "HR with dashes"),
+            ("***", "HR with asterisks"),
+            ("___", "HR with underscores"),
+            # List items
+            ("- item_one\n- item_two", "List items"),
+        ],
+    )
+    def test_no_side_effects(self, emphasis_only_normalizer, input_str, description):
+        """Test that various content types are NOT modified by emphasis fix."""
+        assert (
+            emphasis_only_normalizer.normalize(input_str) == input_str
+        ), f"Failed for: {description}"
+
+    def test_list_marker_not_merged_with_emphasis(self, emphasis_only_normalizer):
+        """Test that list markers (*) are not merged with emphasis (**).
+
+        Regression test for: "*   **Yes**" should NOT become "***Yes**"
+        """
+        input_str = """1.  **Start**: The user opens the login page.
+    *   **Yes**: Login successful.
+    *   **No**: Show error message."""
+        result = emphasis_only_normalizer.normalize(input_str)
+        assert (
+            "*   **Yes**" in result
+        ), "List marker was incorrectly merged with emphasis"
+        assert (
+            "*   **No**" in result
+        ), "List marker was incorrectly merged with emphasis"
+        assert "***Yes**" not in result, "BUG: List marker merged with emphasis"
+        assert "***No**" not in result, "BUG: List marker merged with emphasis"
+
+    def test_list_marker_with_plain_text_then_emphasis(self, emphasis_only_normalizer):
+        """Test that list items with plain text before emphasis are preserved.
+
+        Regression test for: "*   U16 forward **Kuang**" should NOT become "*U16 forward **Kuang**"
+        """
+        input_str = "*   U16 China forward **Kuang Zhaolei**"
+        result = emphasis_only_normalizer.normalize(input_str)
+        assert "*   U16" in result, "List marker spaces were incorrectly stripped"
+        assert (
+            "*U16" not in result or "*   U16" in result
+        ), "BUG: List marker spaces stripped"
+
+
+class TestEmphasisInCodeBlocks:
+    """Test that emphasis inside code blocks is NOT modified."""
+
+    def test_emphasis_in_code_block_unchanged(self, emphasis_only_normalizer):
+        """Code blocks should be completely skipped."""
+        input_str = "```python\nmy_var = get_data__from_api()\n```"
+        assert emphasis_only_normalizer.normalize(input_str) == input_str
+
+    def test_mixed_emphasis_and_code(self, emphasis_only_normalizer):
+        """Text outside code blocks should be fixed, inside should not."""
+        input_str = "** bold ** text\n```python\n** not bold **\n```"
+        expected = "**bold** text\n```python\n** not bold **\n```"
+        assert emphasis_only_normalizer.normalize(input_str) == expected
diff --git a/plugins/filters/markdown_normalizer/tests/test_headings_tables.py b/plugins/filters/markdown_normalizer/tests/test_headings_tables.py
new file mode 100644
index 0000000..104faff
--- /dev/null
+++ b/plugins/filters/markdown_normalizer/tests/test_headings_tables.py
@@ -0,0 +1,51 @@
+"""
+Tests for heading fix.
+Covers: Missing space after # in headings.
+"""
+
+import pytest
+
+
+class TestHeadingFix:
+    """Test heading space normalization."""
+
+    @pytest.mark.parametrize(
+        "input_str,expected",
+        [
+            ("#Heading", "# Heading"),
+            ("##Heading", "## Heading"),
+            ("###Heading", "### Heading"),
+            ("#中文标题", "# 中文标题"),
+            ("#123", "# 123"),  # Numbers after # also get space
+        ],
+    )
+    def test_missing_space_added(self, normalizer, input_str, expected):
+        """Headings missing space after # should be fixed."""
+        assert normalizer.normalize(input_str) == expected
+
+    @pytest.mark.parametrize(
+        "input_str",
+        [
+            "# Heading",
+            "## Already Correct",
+            "###",  # Just hashes
+        ],
+    )
+    def test_correct_headings_unchanged(self, normalizer, input_str):
+        """Already correct headings should not be modified."""
+        assert normalizer.normalize(input_str) == input_str
+
+
+class TestTableFix:
+    """Test table pipe normalization."""
+
+    def test_missing_closing_pipe_added(self, normalizer):
+        """Tables missing closing | should have it added."""
+        input_str = "| col1 | col2"
+        result = normalizer.normalize(input_str)
+        assert result.endswith("|") or "col2 |" in result
+
+    def test_already_closed_table_unchanged(self, normalizer):
+        """Tables with closing | should not be modified."""
+        input_str = "| col1 | col2 |"
+        assert normalizer.normalize(input_str) == input_str
diff --git a/pytest.ini b/pytest.ini
new file mode 100644
index 0000000..b43577e
--- /dev/null
+++ b/pytest.ini
@@ -0,0 +1,6 @@
+[pytest]
+testpaths = plugins
+python_files = test_*.py
+python_classes = Test*
+python_functions = test_*
+addopts = -v --tb=short