Compare commits

...

5 Commits

Author SHA1 Message Date
fujie
9bf31488ae fix(release): correct indentation in Python script for plugin metadata extraction 2026-03-08 20:03:16 +08:00
fujie
ef86a2c3c4 fix(ci): fix EOF here-doc indentation 2026-03-08 19:52:39 +08:00
fujie
b4c6d23dfb fix(ci): fix here-doc syntax error in release workflow 2026-03-08 19:49:50 +08:00
fujie
6102851e55 fix(markdown_normalizer): enhance reliability and code protection
- Fix error fallback mechanism to guarantee 100% rollback to original text on failure
- Improve escape character cleanup to protect inline code blocks from unwanted modification
- Fix 'enable_escape_fix_in_code_blocks' configuration to correctly apply to code blocks when enabled
- Change 'show_debug_log' default to False to reduce console noise and improve privacy
- Update READMEs and docs, bumped version to 1.2.8
2026-03-08 19:48:17 +08:00
fujie
79c1fde217 fix(release): enforce single plugin update per release and improve version tagging 2026-03-08 19:42:13 +08:00
12 changed files with 566 additions and 290 deletions

View File

@@ -5,13 +5,13 @@
# Triggers:
# - Push to main branch when plugins are modified (auto-release)
# - Manual trigger (workflow_dispatch) with custom release notes
# - Push of version tags (v*)
# - Push of plugin version tags (<plugin>-v*)
#
# What it does:
# 1. Detects plugin version changes compared to the last release
# 2. Generates release notes with updated plugin information
# 3. Creates a GitHub Release with plugin files as downloadable assets
# 4. Supports multiple plugin updates in a single release
# 4. Enforces one plugin creation/update per release
name: Plugin Release
@@ -28,13 +28,14 @@ on:
- 'plugins/**/v*_CN.md'
- 'docs/plugins/**/*.md'
tags:
- '*-v*'
- 'v*'
# Manual trigger with inputs
workflow_dispatch:
inputs:
version:
description: 'Release version (e.g., v1.0.0). Leave empty for auto-generated version.'
description: 'Release tag (e.g., markdown-normalizer-v1.2.8). Leave empty for auto-generated tag.'
required: false
type: string
release_title:
@@ -65,7 +66,9 @@ jobs:
outputs:
has_changes: ${{ steps.detect.outputs.has_changes }}
changed_plugins: ${{ steps.detect.outputs.changed_plugins }}
changed_plugin_titles: ${{ steps.detect.outputs.changed_plugin_titles }}
changed_plugin_title: ${{ steps.detect.outputs.changed_plugin_title }}
changed_plugin_slug: ${{ steps.detect.outputs.changed_plugin_slug }}
changed_plugin_version: ${{ steps.detect.outputs.changed_plugin_version }}
changed_plugin_count: ${{ steps.detect.outputs.changed_plugin_count }}
release_notes: ${{ steps.detect.outputs.release_notes }}
has_doc_changes: ${{ steps.detect.outputs.has_doc_changes }}
@@ -95,12 +98,12 @@ jobs:
run: |
# Always compare against the most recent previously released version.
CURRENT_TAG=""
if [[ "${GITHUB_REF}" == refs/tags/v* ]]; then
if [[ "${GITHUB_REF}" == refs/tags/* ]]; then
CURRENT_TAG="${GITHUB_REF#refs/tags/}"
echo "Current tag event detected: $CURRENT_TAG"
fi
PREVIOUS_RELEASE_TAG=$(git tag --sort=-creatordate | grep -E '^v' | grep -Fxv "$CURRENT_TAG" | head -n1 || true)
PREVIOUS_RELEASE_TAG=$(git tag --sort=-creatordate | grep -Fxv "$CURRENT_TAG" | head -n1 || true)
if [ -n "$PREVIOUS_RELEASE_TAG" ]; then
echo "Comparing with previous release tag: $PREVIOUS_RELEASE_TAG"
@@ -162,22 +165,80 @@ jobs:
# Only trigger release if there are actual version changes, not just doc changes
echo "has_changes=false" >> $GITHUB_OUTPUT
echo "changed_plugins=" >> $GITHUB_OUTPUT
echo "changed_plugin_titles=" >> $GITHUB_OUTPUT
echo "changed_plugin_title=" >> $GITHUB_OUTPUT
echo "changed_plugin_slug=" >> $GITHUB_OUTPUT
echo "changed_plugin_version=" >> $GITHUB_OUTPUT
echo "changed_plugin_count=0" >> $GITHUB_OUTPUT
else
echo "has_changes=true" >> $GITHUB_OUTPUT
# Extract changed plugin file paths and titles using Python
python3 -c "import json; data = json.load(open('changes.json', 'r')); get_title = lambda plugin: ((plugin.get('data', {}).get('function', {}).get('meta', {}).get('manifest', {}).get('title')) or plugin.get('title') or '').strip(); files = []; [files.append(plugin['file_path']) for plugin in data.get('added', []) if plugin.get('file_path')]; [files.append(update['current']['file_path']) for update in data.get('updated', []) if update.get('current', {}).get('file_path')]; print('\\n'.join(files))" > changed_files.txt
python3 -c "import json; data = json.load(open('changes.json', 'r')); get_title = lambda plugin: ((plugin.get('data', {}).get('function', {}).get('meta', {}).get('manifest', {}).get('title')) or plugin.get('title') or '').strip(); titles = []; [titles.append(title) for plugin in data.get('added', []) for title in [get_title(plugin)] if title and title not in titles]; [titles.append(title) for update in data.get('updated', []) for title in [get_title(update.get('current', {}))] if title and title not in titles]; print(', '.join(titles)); open('changed_plugin_count.txt', 'w').write(str(len(titles)))" > changed_plugin_titles.txt
# Extract changed plugin metadata and enforce a single-plugin release.
python3 <<'PY'
import json
import sys
from pathlib import Path
data = json.load(open('changes.json', 'r', encoding='utf-8'))
def get_plugin_meta(plugin):
manifest = plugin.get('data', {}).get('function', {}).get('meta', {}).get('manifest', {})
title = (manifest.get('title') or plugin.get('title') or '').strip()
version = (manifest.get('version') or plugin.get('version') or '').strip()
file_path = (plugin.get('file_path') or '').strip()
slug = Path(file_path).parent.name.replace('_', '-').strip() if file_path else ''
return {
'title': title,
'slug': slug,
'version': version,
'file_path': file_path,
}
plugins = []
seen_keys = set()
for plugin in data.get('added', []):
meta = get_plugin_meta(plugin)
key = meta['file_path'] or meta['title']
if key and key not in seen_keys:
plugins.append(meta)
seen_keys.add(key)
for update in data.get('updated', []):
meta = get_plugin_meta(update.get('current', {}))
key = meta['file_path'] or meta['title']
if key and key not in seen_keys:
plugins.append(meta)
seen_keys.add(key)
Path('changed_files.txt').write_text(
'\n'.join(meta['file_path'] for meta in plugins if meta['file_path']),
encoding='utf-8',
)
Path('changed_plugin_count.txt').write_text(str(len(plugins)), encoding='utf-8')
if len(plugins) > 1:
print('Error: release workflow only supports one plugin creation/update per release.', file=sys.stderr)
for meta in plugins:
print(
f"- {meta['title'] or 'Unknown'} v{meta['version'] or '?'} ({meta['file_path'] or 'unknown path'})",
file=sys.stderr,
)
sys.exit(1)
selected = plugins[0] if plugins else {'title': '', 'slug': '', 'version': ''}
Path('changed_plugin_title.txt').write_text(selected['title'], encoding='utf-8')
Path('changed_plugin_slug.txt').write_text(selected['slug'], encoding='utf-8')
Path('changed_plugin_version.txt').write_text(selected['version'], encoding='utf-8')
PY
echo "changed_plugins<<EOF" >> $GITHUB_OUTPUT
cat changed_files.txt >> $GITHUB_OUTPUT
echo "" >> $GITHUB_OUTPUT
echo "EOF" >> $GITHUB_OUTPUT
echo "changed_plugin_titles=$(cat changed_plugin_titles.txt)" >> $GITHUB_OUTPUT
echo "changed_plugin_title=$(cat changed_plugin_title.txt)" >> $GITHUB_OUTPUT
echo "changed_plugin_slug=$(cat changed_plugin_slug.txt)" >> $GITHUB_OUTPUT
echo "changed_plugin_version=$(cat changed_plugin_version.txt)" >> $GITHUB_OUTPUT
echo "changed_plugin_count=$(cat changed_plugin_count.txt)" >> $GITHUB_OUTPUT
fi
@@ -191,7 +252,7 @@ jobs:
release:
needs: check-changes
if: needs.check-changes.outputs.has_changes == 'true' || github.event_name == 'workflow_dispatch' || startsWith(github.ref, 'refs/tags/v')
if: needs.check-changes.outputs.has_changes == 'true' || github.event_name == 'workflow_dispatch' || startsWith(github.ref, 'refs/tags/')
runs-on: ubuntu-latest
env:
LANG: en_US.UTF-8
@@ -219,53 +280,34 @@ jobs:
id: version
env:
GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }}
CHANGED_PLUGIN_SLUG: ${{ needs.check-changes.outputs.changed_plugin_slug }}
CHANGED_PLUGIN_VERSION: ${{ needs.check-changes.outputs.changed_plugin_version }}
run: |
if [ "${{ github.event_name }}" = "workflow_dispatch" ] && [ -n "${{ github.event.inputs.version }}" ]; then
VERSION="${{ github.event.inputs.version }}"
elif [[ "${{ github.ref }}" == refs/tags/v* ]]; then
elif [[ "${{ github.ref }}" == refs/tags/* ]]; then
VERSION="${GITHUB_REF#refs/tags/}"
elif [ -n "$CHANGED_PLUGIN_SLUG" ] && [ -n "$CHANGED_PLUGIN_VERSION" ]; then
VERSION="${CHANGED_PLUGIN_SLUG}-v${CHANGED_PLUGIN_VERSION}"
else
# Auto-generate version based on date and daily release count
TODAY=$(date +'%Y.%m.%d')
TODAY_PREFIX="v${TODAY}-"
# Count existing releases with today's date prefix
# grep -c returns 1 if count is 0, so we use || true to avoid script failure
EXISTING_COUNT=$(gh release list --limit 100 2>/dev/null | grep -c "^${TODAY_PREFIX}" || true)
# Clean up output (handle potential newlines or fallback issues)
EXISTING_COUNT=$(echo "$EXISTING_COUNT" | tr -cd '0-9')
if [ -z "$EXISTING_COUNT" ]; then EXISTING_COUNT=0; fi
NEXT_NUM=$((EXISTING_COUNT + 1))
VERSION="${TODAY_PREFIX}${NEXT_NUM}"
# Final fallback to ensure VERSION is never empty
if [ -z "$VERSION" ]; then
VERSION="v$(date +'%Y.%m.%d-%H%M%S')"
fi
echo "Error: failed to determine plugin-scoped release tag." >&2
exit 1
fi
echo "version=$VERSION" >> $GITHUB_OUTPUT
echo "Release version: $VERSION"
echo "Release tag: $VERSION"
- name: Build release metadata
id: meta
env:
VERSION: ${{ steps.version.outputs.version }}
INPUT_TITLE: ${{ github.event.inputs.release_title }}
CHANGED_PLUGIN_TITLES: ${{ needs.check-changes.outputs.changed_plugin_titles }}
CHANGED_PLUGIN_COUNT: ${{ needs.check-changes.outputs.changed_plugin_count }}
CHANGED_PLUGIN_TITLE: ${{ needs.check-changes.outputs.changed_plugin_title }}
CHANGED_PLUGIN_VERSION: ${{ needs.check-changes.outputs.changed_plugin_version }}
run: |
if [ -n "$INPUT_TITLE" ]; then
RELEASE_NAME="$INPUT_TITLE"
elif [ "$CHANGED_PLUGIN_COUNT" = "1" ] && [ -n "$CHANGED_PLUGIN_TITLES" ]; then
RELEASE_NAME="$CHANGED_PLUGIN_TITLES $VERSION"
elif [ -n "$CHANGED_PLUGIN_TITLES" ] && [ "$CHANGED_PLUGIN_COUNT" = "2" ]; then
RELEASE_NAME="$VERSION - $CHANGED_PLUGIN_TITLES"
elif [ -n "$CHANGED_PLUGIN_TITLES" ] && [ "${CHANGED_PLUGIN_COUNT:-0}" -gt 2 ]; then
FIRST_PLUGIN=$(echo "$CHANGED_PLUGIN_TITLES" | cut -d',' -f1 | xargs)
RELEASE_NAME="$VERSION - $FIRST_PLUGIN and $CHANGED_PLUGIN_COUNT plugin updates"
elif [ -n "$CHANGED_PLUGIN_TITLE" ] && [ -n "$CHANGED_PLUGIN_VERSION" ]; then
RELEASE_NAME="$CHANGED_PLUGIN_TITLE v$CHANGED_PLUGIN_VERSION"
else
RELEASE_NAME="$VERSION"
fi
@@ -391,12 +433,22 @@ jobs:
VERSION: ${{ steps.version.outputs.version }}
TITLE: ${{ github.event.inputs.release_title }}
NOTES: ${{ github.event.inputs.release_notes }}
CHANGED_PLUGIN_TITLE: ${{ needs.check-changes.outputs.changed_plugin_title }}
CHANGED_PLUGIN_VERSION: ${{ needs.check-changes.outputs.changed_plugin_version }}
DETECTED_CHANGES: ${{ needs.check-changes.outputs.release_notes }}
COMMITS: ${{ steps.commits.outputs.commits }}
DOC_FILES: ${{ needs.check-changes.outputs.changed_doc_files }}
run: |
> release_notes.md
if [ -n "$CHANGED_PLUGIN_TITLE" ] && [ -n "$CHANGED_PLUGIN_VERSION" ]; then
echo "# $CHANGED_PLUGIN_TITLE v$CHANGED_PLUGIN_VERSION" >> release_notes.md
echo "" >> release_notes.md
elif [ -n "$TITLE" ]; then
echo "# $TITLE" >> release_notes.md
echo "" >> release_notes.md
fi
# 1. Release notes from v*.md files (highest priority, shown first)
if [ -n "$DOC_FILES" ]; then
RELEASE_NOTE_FILES=$(echo "$DOC_FILES" | grep -E '^plugins/.*/v[^/]*\.md$' | grep -v '_CN\.md$' || true)
@@ -404,7 +456,7 @@ jobs:
while IFS= read -r file; do
[ -z "$file" ] && continue
if [ -f "$file" ]; then
python3 -c "import json, pathlib, re; file_path = pathlib.Path(r'''$file'''); plugin_versions_path = pathlib.Path('plugin_versions.json'); text = file_path.read_text(encoding='utf-8'); plugin_dir = file_path.parent.as_posix(); plugins = json.loads(plugin_versions_path.read_text(encoding='utf-8')) if plugin_versions_path.exists() else []; plugin_title = next((plugin.get('title', '').strip() for plugin in plugins if plugin.get('file_path', '').startswith(plugin_dir + '/')), ''); text = re.sub(r'^#\\s+(v[0-9][^\\n]*Release Notes)\\s*$', '# ' + plugin_title + ' ' + r'\\1', text, count=1, flags=re.MULTILINE) if plugin_title else text; print(text.rstrip())" >> release_notes.md
python3 -c "import pathlib, re; file_path = pathlib.Path(r'''$file'''); text = file_path.read_text(encoding='utf-8'); text = re.sub(r'^#\\s+.+?(?:\\r?\\n)+', '', text, count=1, flags=re.MULTILINE); print(text.lstrip().rstrip())" >> release_notes.md
echo "" >> release_notes.md
fi
done <<< "$RELEASE_NOTE_FILES"
@@ -412,7 +464,7 @@ jobs:
fi
# 2. Plugin version changes detected by script
if [ -n "$TITLE" ]; then
if [ -z "$CHANGED_PLUGIN_TITLE" ] && [ -z "$CHANGED_PLUGIN_VERSION" ] && [ -n "$TITLE" ]; then
echo "## $TITLE" >> release_notes.md
echo "" >> release_notes.md
fi
@@ -464,12 +516,12 @@ jobs:
📚 [Documentation](https://fu-jie.github.io/openwebui-extensions/)
🐛 [Report Issues](https://github.com/Fu-Jie/openwebui-extensions/issues)
EOF
echo "=== Release Notes ==="
cat release_notes.md
- name: Create Git Tag
if: ${{ !startsWith(github.ref, 'refs/tags/v') }}
if: ${{ !startsWith(github.ref, 'refs/tags/') }}
run: |
VERSION="${{ steps.version.outputs.version }}"

104
ISSUE_57_ANALYSIS_REPORT.md Normal file
View File

@@ -0,0 +1,104 @@
# Markdown Normalizer 插件可靠性修复分析报告 (Issue #57)
## 1. 问题背景
根据 Issue #57 报告,`Markdown Normalizer` 在 v1.2.7 版本中存在数项严重影响可靠性的 Bug包括错误回滚失效、对内联技术内容的过度转义、配置项不生效以及调试日志潜在的隐私风险。
## 2. 核心处理流程图 (v1.2.8)
以下流程展示了插件如何在确保“不损坏原始内容”的前提下进行智能修复:
```mermaid
graph TD
Start([开始处理内容]) --> Cache[1. 内存中存入原始快照 Snapshot]
Cache --> Logic{进入修复流程}
subgraph "分层保护逻辑 (Context-Aware)"
Logic --> Block[识别并锁定 ``` 代码块]
Block --> Inline[识别并锁定 ` 行内代码]
Inline --> Math[识别并锁定 $ LaTeX 公式]
Math --> Clean[仅对非锁定区域执行转义清理]
end
Clean --> Others[执行其他规则: Thought/Details/Table等]
Others --> Check{运行是否报错?}
Check -- 否 (成功) --> Success[返回修复后的内容]
Check -- 是 (失败) --> Rollback[触发回滚: 丢弃所有修改]
Rollback --> Original[返回步骤1存储的原始快照]
Success --> End([输出结果])
Original --> End
```
## 3. 修复项详细说明
### 2.1 错误回滚机制修复 (Reliability: Error Fallback)
- **问题**:在 `normalize` 流程中,如果某个清理器抛出异常,返回的是已被部分修改的 `content`,导致输出内容损坏。
- **技术实现**
```python
def normalize(self, content: str) -> str:
original_content = content # 1. 流程开始前缓存原始快照
try:
# ... 执行一系列清理步骤 ...
return content
except Exception as e:
# 2. 任何步骤失败,立即记录日志并回滚
logger.error(f"Content normalization failed: {e}", exc_info=True)
return original_content # 确保返回的是原始快照
```
- **验证结果**:通过模拟 `RuntimeError` 验证,插件现在能 100% 回滚至原始状态。
### 2.2 上下文感知的转义保护 (Context-Aware Escaping)
- **问题**:全局替换导致正文中包含在 `` ` `` 内的代码片段如正则、Windows 路径)被破坏。
- **技术实现**
重构后的 `_fix_escape_characters` 采用了 **“分词保护策略”**,通过多层嵌套分割来确保仅在非代码上下文中进行清理:
```python
def _fix_escape_characters(self, content: str) -> str:
# 层级 1: 以 ``` 分隔代码块
parts = content.split("```")
for i in range(len(parts)):
is_code_block = (i % 2 != 0)
if is_code_block and not self.config.enable_escape_fix_in_code_blocks:
continue # 默认跳过代码块
if not is_code_block:
# 层级 2: 在非代码块正文中,以 ` 分隔内联代码
inline_parts = parts[i].split("`")
for k in range(0, len(inline_parts), 2): # 仅处理非内联代码部分
# 层级 3: 在非内联代码中,以 $ 分隔 LaTeX 公式
sub_parts = inline_parts[k].split("$")
for j in range(0, len(sub_parts), 2):
# 最终:仅在确认为“纯文本”的部分执行 clean_text
sub_parts[j] = clean_text(sub_parts[j])
inline_parts[k] = "$".join(sub_parts)
parts[i] = "`".join(inline_parts)
else:
parts[i] = clean_text(parts[i])
return "```".join(parts)
```
- **验证结果**:测试用例 `Regex: [\n\r]` 和 `C:\Windows` 在正文中保持原样,而普通文本中的 `\\n` 被正确转换。
### 2.3 配置项激活 (Configuration Enforcement)
- **问题**`enable_escape_fix_in_code_blocks` 开关在代码中被定义但未被逻辑引用。
- **修复方案**:在 `_fix_escape_characters` 处理流程中加入对该开关的判断。
- **验证结果**:当开关关闭(默认)时,代码块内容保持不变;开启时,代码块内执行转义修复。
### 2.4 默认日志策略调整 (Privacy & Performance)
- **问题**`show_debug_log` 默认为 `True`,且会将原始内容打印到浏览器控制台。
- **修复方案**:将默认值改为 `False`。
- **验证结果**:新安装或默认配置下不再主动输出全量日志,仅在用户显式开启时用于调试。
## 3. 综合测试覆盖
已建立 `comprehensive_test_markdown_normalizer.py` 测试脚本,覆盖以下场景:
1. **异常抛出回滚**:确保插件“不破坏”原始内容。
2. **内联代码保护**:验证正则和路径字符串的完整性。
3. **代码块开关控制**:验证配置项的有效性。
4. **LaTeX 命令回归测试**:确保 `\times`, `\theta` 等命令不被误触。
5. **复杂嵌套结构**:验证包含 Thought 标签、列表、内联代码及代码块的混合文本处理。
## 4. 结论
`Markdown Normalizer v1.2.8` 已解决 Issue #57 提出的所有核心可靠性问题。插件现在具备“不损坏内容”的防御性编程能力,并能更智能地感知 Markdown 上下文。
---
**报告日期**2026-03-08
**修复版本**v1.2.8

View File

@@ -52,7 +52,7 @@ Filters act as middleware in the message pipeline:
Fixes common Markdown formatting issues in LLM outputs, including Mermaid syntax, code blocks, and LaTeX formulas.
**Version:** 1.2.7
**Version:** 1.2.8
[:octicons-arrow-right-24: Documentation](markdown_normalizer.md)

View File

@@ -52,7 +52,7 @@ Filter 充当消息管线中的中间件:
修复 LLM 输出中常见的 Markdown 格式问题,包括 Mermaid 语法、代码块和 LaTeX 公式。
**版本:** 1.2.7
**版本:** 1.2.8
[:octicons-arrow-right-24: 查看文档](markdown_normalizer.zh.md)

View File

@@ -1,81 +1,87 @@
# Markdown Normalizer Filter
**Author:** [Fu-Jie](https://github.com/Fu-Jie/openwebui-extensions) | **Version:** 1.2.7 | **Project:** [OpenWebUI Extensions](https://github.com/Fu-Jie/openwebui-extensions) | **License:** MIT
**Author:** [Fu-Jie](https://github.com/Fu-Jie/openwebui-extensions) | **Version:** 1.2.8 | **Project:** [OpenWebUI Extensions](https://github.com/Fu-Jie/openwebui-extensions) | **License:** MIT
A content normalizer filter for Open WebUI that fixes common Markdown formatting issues in LLM outputs. It ensures that code blocks, LaTeX formulas, Mermaid diagrams, and other Markdown elements are rendered correctly.
A powerful, context-aware content normalizer filter for Open WebUI designed to fix common Markdown formatting issues in LLM outputs. It ensures that code blocks, LaTeX formulas, Mermaid diagrams, and other structural Markdown elements are rendered flawlessly, without destroying valid technical content.
> 🏆 **Featured by OpenWebUI Official** — Recommended in the official OpenWebUI Community Newsletter: [January 28, 2026](https://openwebui.com/blog/newsletter-january-28-2026)
> 🏆 **Featured by OpenWebUI Official** — This plugin was recommended in the official OpenWebUI Community Newsletter: [January 28, 2026](https://openwebui.com/blog/newsletter-january-28-2026)
## 🔥 What's New in v1.2.7
[English](https://github.com/Fu-Jie/openwebui-extensions/blob/main/plugins/filters/markdown_normalizer/README.md) | [简体中文](https://github.com/Fu-Jie/openwebui-extensions/blob/main/plugins/filters/markdown_normalizer/README_CN.md)
* **LaTeX Formula Protection**: Enhanced escape character cleaning to protect LaTeX commands like `\times`, `\nu`, and `\theta` from being corrupted.
* **Expanded i18n Support**: Now supports 12 languages with automatic detection and fallback.
* **Valves Optimization**: Optimized configuration descriptions to be English-only for better consistency.
* **Bug Fixes**:
* Resolved [Issue #49](https://github.com/Fu-Jie/openwebui-extensions/issues/49): Fixed a bug where consecutive bold parts on the same line caused spaces between them to be removed.
* Fixed a `NameError` in the plugin code that caused test collection failures.
---
## 🚀 Why do you need this plugin? (What does it do?)
Language Models (LLMs) often generate malformed Markdown due to tokenization artifacts, aggressive escaping, or hallucinated formatting. If you've ever seen:
- A `mermaid` diagram fail to render because of missing quotes around labels.
- A SQL block stuck on a single line because `\n` was output literally instead of a real newline.
- A `<details>` block break the entire chat rendering because of missing newlines.
- A LaTeX formula fail because the LLM used `\[` instead of `$$`.
**This plugin automatically intercepts the LLM's raw output, analyzes its structure, and surgically repairs these formatting errors in real-time before they reach your browser.**
## ✨ Comprehensive Feature List
### 1. Advanced Structural Protections (Context-Aware)
Before making any changes, the plugin builds a semantic map of the text to protect your technical content:
- **Code Block Protection**: Skips formatting inside ` ``` ` code blocks by default to protect code logic.
- **Inline Code Protection**: Recognizes `` `code` `` snippets and protects regular expressions and file paths (e.g., `C:\Windows`) from being incorrectly unescaped.
- **LaTeX Protection**: Identifies inline (`$`) and block (`$$`) formulas to prevent modifying critical math commands like `\times`, `\theta`, or `\nu`.
### 2. Auto-Healing Transformations
- **Details Tag Normalization**: `<details>` blocks (often used for Chain of Thought) require strict spacing to render correctly. The plugin automatically injects blank lines after `</details>` and self-closing `<details />` tags.
- **Mermaid Syntax Fixer**: One of the most common LLM errors is omitting quotes in Mermaid diagrams (e.g., `A --> B(Some text)`). This plugin parses the Mermaid syntax and auto-quotes labels and citations to guarantee the graph renders.
- **Emphasis Spacing Fix**: Fixes formatting-breaking extra spaces inside bold/italic markers (e.g., `** text **` becomes `**text**`) while cleverly ignoring math expressions like `2 * 3 * 4`.
- **Intelligent Escape Character Cleanup**: Removes excessive literal `\n` and `\t` generated by some models and converts them to actual structural newlines (only in safe text areas).
- **LaTeX Standardization**: Automatically upgrades old-school LaTeX delimiters (`\[...\]` and `\(...\)`) to modern Markdown standards (`$$...$$` and `$ ... $`).
- **Thought Tag Unification**: Standardizes various model thought outputs (`<think>`, `<thinking>`) into a unified `<thought>` tag.
- **Broken Code Block Repair**: Fixes indentation issues, repairs mangled language prefixes (e.g., ` ```python`), and automatically closes unclosed code blocks if a generation was cut off.
- **List & Table Formatting**: Injects missing newlines to repair broken numbered lists and adds missing closing pipes (`|`) to tables.
- **XML Artifact Cleanup**: Silently removes leftover `<antArtifact>` or `<antThinking>` tags often leaked by Claude models.
### 3. Reliability & Safety
- **100% Rollback Guarantee**: If any normalization logic fails or crashes, the plugin catches the error and silently returns the exact original text, ensuring your chat never breaks.
## 🔥 What's New in v1.2.8
* **Reliability Enhancement**: Complete error fallback mechanism. Guarantees 0% data loss during processing.
* **Inline Code Protection**: Upgraded escaping logic to protect inline code blocks (`` `...` ``).
* **Code Block Escaping Control**: The `enable_escape_fix_in_code_blocks` Valve now correctly targets broken newlines inside code blocks (perfect for fixing flat SQL queries) when enabled.
* **Privacy Optimization**: `show_debug_log` now defaults to `False` to prevent console noise.
## 🌐 Multilingual Support
Supports automatic interface and status switching for the following languages:
The plugin UI and status notifications automatically switch based on your language:
`English`, `简体中文`, `繁體中文 (香港)`, `繁體中文 (台灣)`, `한국어`, `日本語`, `Français`, `Deutsch`, `Español`, `Italiano`, `Tiếng Việt`, `Bahasa Indonesia`.
## ✨ Core Features
* **Details Tag Normalization**: Ensures proper spacing for `<details>` tags (used for thought chains). Adds a blank line after `</details>` and ensures a newline after self-closing `<details />` tags to prevent rendering issues.
* **Emphasis Spacing Fix**: Fixes extra spaces inside emphasis markers (e.g., `** text **` -> `**text**`) which can cause rendering failures. Includes safeguards to protect math expressions (e.g., `2 * 3 * 4`) and list variables.
* **Mermaid Syntax Fix**: Automatically fixes common Mermaid syntax errors, such as unquoted node labels (including multi-line labels and citations) and unclosed subgraphs. **New in v1.1.2**: Comprehensive protection for edge labels (text on connecting lines) across all link types (solid, dotted, thick).
* **Frontend Console Debugging**: Supports printing structured debug logs directly to the browser console (F12) for easier troubleshooting.
* **Code Block Formatting**: Fixes broken code block prefixes, suffixes, and indentation.
* **LaTeX Normalization**: Standardizes LaTeX formula delimiters (`\[` -> `$$`, `\(` -> `$`).
* **Thought Tag Normalization**: Unifies thought tags (`<think>`, `<thinking>` -> `<thought>`).
* **Escape Character Fix**: Cleans up excessive escape characters (`\\n`, `\\t`).
* **List Formatting**: Ensures proper newlines in list items.
* **Heading Fix**: Adds missing spaces in headings (`#Heading` -> `# Heading`).
* **Table Fix**: Adds missing closing pipes in tables.
* **XML Cleanup**: Removes leftover XML artifacts.
## How to Use 🛠️
1. Install the plugin in Open WebUI.
2. Enable the filter globally or for specific models.
3. Configure the enabled fixes in the **Valves** settings.
4. (Optional) **Show Debug Log** is enabled by default in Valves. This prints structured logs to the browser console (F12).
> [!WARNING]
> As this is an initial version, some "negative fixes" might occur (e.g., breaking valid Markdown). If you encounter issues, please check the console logs, copy the "Original" vs "Normalized" content, and submit an issue.
2. Enable the filter globally or assign it to specific models (highly recommended for models with poor formatting).
3. Tune the specific fixes you want via the **Valves** settings.
## Configuration (Valves) ⚙️
| Parameter | Default | Description |
| :--- | :--- | :--- |
| `priority` | `50` | Filter priority. Higher runs later (recommended after other filters). |
| `enable_escape_fix` | `True` | Fix excessive escape characters (`\n`, `\t`, etc.). |
| `enable_escape_fix_in_code_blocks` | `False` | Apply escape fix inside code blocks (may affect valid code). |
| `enable_thought_tag_fix` | `True` | Normalize thought tags (`</thought>`). |
| `enable_details_tag_fix` | `True` | Normalize `<details>` tags and add safe spacing. |
| `enable_code_block_fix` | `True` | Fix code block formatting (indentation/newlines). |
| `enable_latex_fix` | `True` | Normalize LaTeX delimiters (`\[` -> `$$`, `\(` -> `$`). |
| `priority` | `50` | Filter priority. Higher runs later (recommended to run this after all other content filters). |
| `enable_escape_fix` | `True` | Convert excessive literal escape characters (`\n`, `\t`) to real spacing. |
| `enable_escape_fix_in_code_blocks` | `False` | **Pro-tip**: Turn this ON if your SQL/HTML code blocks are constantly printing on a single line. Turn OFF for Python/C++. |
| `enable_thought_tag_fix` | `True` | Normalize `<think>` tags. |
| `enable_details_tag_fix` | `True` | Normalize `<details>` spacing. |
| `enable_code_block_fix` | `True` | Fix code block indentation and newlines. |
| `enable_latex_fix` | `True` | Standardize LaTeX delimiters (`\[` -> `$$`). |
| `enable_list_fix` | `False` | Fix list item newlines (experimental). |
| `enable_unclosed_block_fix` | `True` | Auto-close unclosed code blocks. |
| `enable_fullwidth_symbol_fix` | `False` | Fix full-width symbols in code blocks. |
| `enable_mermaid_fix` | `True` | Fix common Mermaid syntax errors. |
| `enable_heading_fix` | `True` | Fix missing space in headings. |
| `enable_table_fix` | `True` | Fix missing closing pipe in tables. |
| `enable_xml_tag_cleanup` | `True` | Cleanup leftover XML tags. |
| `enable_emphasis_spacing_fix` | `False` | Fix extra spaces in emphasis. |
| `show_status` | `True` | Show status notification when fixes are applied. |
| `show_debug_log` | `True` | Print debug logs to browser console (F12). |
| `enable_mermaid_fix` | `True` | Fix common Mermaid syntax errors (auto-quoting). |
| `enable_heading_fix` | `True` | Add missing space after heading hashes (`#Title` -> `# Title`). |
| `enable_table_fix` | `True` | Add missing closing pipe in tables. |
| `enable_xml_tag_cleanup` | `True` | Remove leftover XML artifacts. |
| `enable_emphasis_spacing_fix` | `False` | Fix extra spaces in emphasis formatting. |
| `show_status` | `True` | Show UI status notification when a fix is actively applied. |
| `show_debug_log` | `False` | Print detailed before/after diffs to browser console (F12). |
## ⭐ Support
If this plugin has been useful, a star on [OpenWebUI Extensions](https://github.com/Fu-Jie/openwebui-extensions) is a big motivation for me. Thank you for the support.
If this plugin saves your day, a star on [OpenWebUI Extensions](https://github.com/Fu-Jie/openwebui-extensions) is a big motivation for me. Thank you!
## 🧩 Others
### Troubleshooting ❓
* **Submit an Issue**: If you encounter any problems, please submit an issue on GitHub: [OpenWebUI Extensions Issues](https://github.com/Fu-Jie/openwebui-extensions/issues)
### Changelog
See the full history on GitHub: [OpenWebUI Extensions](https://github.com/Fu-Jie/openwebui-extensions)
* **Troubleshooting**: Encountering "negative fixes"? Enable `show_debug_log`, check your console, and submit an issue on GitHub: [OpenWebUI Extensions Issues](https://github.com/Fu-Jie/openwebui-extensions/issues)

View File

@@ -1,81 +1,87 @@
# Markdown 格式化过滤器 (Markdown Normalizer)
**作者:** [Fu-Jie](https://github.com/Fu-Jie/openwebui-extensions) | **Version:** 1.2.7 | **项目:** [OpenWebUI Extensions](https://github.com/Fu-Jie/openwebui-extensions) | **许可证:** MIT
**作者:** [Fu-Jie](https://github.com/Fu-Jie/openwebui-extensions) | **版本:** 1.2.8 | **项目:** [OpenWebUI Extensions](https://github.com/Fu-Jie/openwebui-extensions) | **许可证:** MIT
这是一个用于 Open WebUI 的内容格式化过滤器,旨在修复 LLM 输出中常见的 Markdown 格式问题。它能确保代码块、LaTeX 公式、Mermaid 图表和其他 Markdown 元素被正确渲染
这是一个强大的、具备上下文感知的 Markdown 内容规范化过滤器,专为 Open WebUI 设计,旨在实时修复大语言模型 (LLM) 输出中常见的格式错乱问题。它能确保代码块、LaTeX 公式、Mermaid 图表以及其他结构化元素被完美渲染,同时**绝不破坏**你原有的有效技术内容(如代码、正则、路径)
> 🏆 **OpenWebUI 官方推荐** — 获得 OpenWebUI 社区 Newsletter 官方推荐:[2026 年 1 月 28 日](https://openwebui.com/blog/newsletter-january-28-2026)
> 🏆 **OpenWebUI 官方推荐** — 本插件获得 OpenWebUI 社区 Newsletter 官方推荐:[2026 年 1 月 28 日](https://openwebui.com/blog/newsletter-january-28-2026)
## 🔥 最新更新 v1.2.7
[English](https://github.com/Fu-Jie/openwebui-extensions/blob/main/plugins/filters/markdown_normalizer/README.md) | [简体中文](https://github.com/Fu-Jie/openwebui-extensions/blob/main/plugins/filters/markdown_normalizer/README_CN.md)
* **LaTeX 公式保护**: 增强了转义字符清理逻辑,自动保护 `$ $``$$ $$` 内的 LaTeX 命令(如 `\times``\nu``\theta`),防止渲染失效。
* **扩展国际化 (i18n) 支持**: 现已支持 12 种语言,具备自动探测与回退机制。
* **配置项优化**: 将 Valves 配置项的描述统一为英文,保持界面一致性。
* **修复 Bug**:
* 修复了 [Issue #49](https://github.com/Fu-Jie/openwebui-extensions/issues/49):解决了当同一行存在多个加粗部分时,由于正则匹配过于贪婪导致中间内容丢失空格的问题。
* 修复了插件代码中的 `NameError` 错误,确保测试脚本能正常运行
---
## 🚀 为什么你需要这个插件?(它能解决什么问题?)
由于分词 (Tokenization) 伪影、过度转义或格式幻觉LLM 经常会生成破损的 Markdown。如果你遇到过以下情况
- `mermaid` 图表因为节点标签缺少双引号而渲染失败、白屏
- LLM 输出的 SQL 语句挤在一行,因为本该换行的地方输出了字面量 `\n`
- 复杂的 `<details>` (思维链展开块) 因为缺少换行符导致整个聊天界面排版崩塌。
- LaTeX 数学公式无法显示,因为模型使用了旧版的 `\[` 而不是 Markdown 支持的 `$$`
**本插件会自动拦截 LLM 返回的原始数据,实时分析其文本结构,并像外科手术一样精准修复这些排版错误,然后再将其展示在你的浏览器中。**
## ✨ 核心功能与修复能力全景
### 1. 高级结构保护 (上下文感知)
在执行任何修改前,插件会为整个文本建立语义地图,确保技术性内容不被误伤:
- **代码块保护**:默认跳过 ` ``` ` 内部的内容,保护所有编程逻辑。
- **行内代码保护**:识别 `` `代码` `` 片段,防止正则表达式(如 `[\n\r]`)或文件路径(如 `C:\Windows`)被错误地去转义。
- **LaTeX 公式保护**:识别行内 (`$`) 和块级 (`$$`) 公式,防止诸如 `\times`, `\theta` 等核心数学命令被意外破坏。
### 2. 自动治愈转换 (Auto-Healing)
- **Details 标签排版修复**`<details>` 块要求极为严格的空行才能正确渲染内部内容。插件会自动在 `</details>` 以及自闭合 `<details />` 标签后注入安全的换行符。
- **Mermaid 语法急救**:自动修复最常见的 Mermaid 错误——为未加引号的节点标签(如 `A --> B(Some text)`)自动补充双引号,甚至支持多行标签和引用,确保拓扑图 100% 渲染。
- **强调语法间距修复**:修复加粗/斜体语法内部多余的空格(如 `** 文本 **` 变为 `**文本**`,否则 OpenWebUI 无法加粗),同时智能忽略数学算式(如 `2 * 3 * 4`)。
- **智能转义字符清理**:将模型过度转义生成的字面量 `\n``\t` 转化为真正的换行和缩进(仅在安全的纯文本区域执行)。
- **LaTeX 现代化转换**:自动将旧式的 LaTeX 定界符(`\[...\]``\(...\)`)升级为现代 Markdown 标准(`$$...$$``$ ... $`)。
- **思维标签大一统**:无论模型输出的是 `<think>` 还是 `<thinking>`,统一标准化为 `<thought>` 标签。
- **残缺代码块修复**:修复乱码的语言前缀(例如 ` ```python`),调整缩进,并在模型回答被截断时,自动补充闭合的 ` ``` `
- **列表与表格急救**:为粘连的编号列表注入换行,为残缺的 Markdown 表格补充末尾的闭合管道符(`|`)。
- **XML 伪影消除**:静默移除 Claude 模型经常泄露的 `<antArtifact>``<antThinking>` 残留标签。
### 3. 绝对的可靠性与安全 (100% Rollback)
- **无损回滚机制**:如果在修复过程中发生任何意外错误或崩溃,插件会立即捕获异常,并静默返回**绝对原始**的文本,确保你的对话永远不会因插件报错而丢失。
## 🔥 最新更新 v1.2.8
* **可靠性增强**:修复了错误回滚机制。当规范化过程中发生意外错误时,插件现在会正确返回原始文本,而不是返回被部分修改的损坏内容。
* **内联代码保护**:优化了转义字符清理逻辑,现在会保护内联代码块(`` `...` ``)不被错误转义,防止破坏有效的代码片段。
* **配置项修复**`enable_escape_fix_in_code_blocks` 配置项现在能正确作用于代码块了。**在代码块内修复换行符(比如修复 SQL只需在设置中开启此选项即可。**
* **隐私与日志优化**:将 `show_debug_log` 默认值修改为 `False`,避免将可能敏感的内容自动输出到浏览器控制台,并减少不必要的日志噪音。
## 🌐 多语言支持 (i18n)
支持以下语言的界面状态自动切换:
界面状态提示气泡会根据你的浏览器语言自动切换:
`English`, `简体中文`, `繁體中文 (香港)`, `繁體中文 (台灣)`, `한국어`, `日本語`, `Français`, `Deutsch`, `Español`, `Italiano`, `Tiếng Việt`, `Bahasa Indonesia`
## ✨ 核心特性
* **Details 标签规范化**: 确保 `<details>` 标签(常用于思维链)有正确的间距。在 `</details>` 后添加空行,并在自闭合 `<details />` 标签后添加换行,防止渲染问题。
* **强调空格修复**: 修复强调标记内部的多余空格(例如 `** 文本 **` -> `**文本**`),这会导致 Markdown 渲染失败。包含保护机制,防止误修改数学表达式(如 `2 * 3 * 4`)或列表变量。
* **Mermaid 语法修复**: 自动修复常见的 Mermaid 语法错误,如未加引号的节点标签(支持多行标签和引用标记)和未闭合的子图 (Subgraph)。**v1.1.2 新增**: 全面保护各种类型的连线标签(实线、虚线、粗线),防止被误修改。
* **前端控制台调试**: 支持将结构化的调试日志直接打印到浏览器控制台 (F12),方便排查问题。
* **代码块格式化**: 修复破损的代码块前缀、后缀和缩进问题。
* **LaTeX 规范化**: 标准化 LaTeX 公式定界符 (`\[` -> `$$`, `\(` -> `$`)。
* **思维标签规范化**: 统一思维链标签 (`<think>`, `<thinking>` -> `<thought>`)。
* **转义字符修复**: 清理过度的转义字符 (`\\n`, `\\t`)。
* **列表格式化**: 确保列表项有正确的换行。
* **标题修复**: 修复标题中缺失的空格 (`#标题` -> `# 标题`)。
* **表格修复**: 修复表格中缺失的闭合管道符。
* **XML 清理**: 移除残留的 XML 标签。
## 使用方法
## 使用方法 🛠️
1. 在 Open WebUI 中安装此插件。
2. 全局启用或为特定模型启用此过滤器。
3.**Valves** 设置中配置需要启用的修复项。
4. (可选) **显示调试日志 (Show Debug Log)** 在 Valves 中默认开启。这会将结构化的日志打印到浏览器控制台 (F12)。
> [!WARNING]
> 由于这是初版,可能会出现“负向修复”的情况(例如破坏了原本正确的格式)。如果您遇到问题,请务目查看控制台日志,复制“原始 (Original)”与“规范化 (Normalized)”的内容对比,并提交 Issue 反馈。
2. 全局启用或为特定模型启用此过滤器(强烈建议为格式输出不稳定的模型启用)
3.**Valves (配置参数)** 设置中微调你需要的修复项。
## 配置参数 (Valves) ⚙️
| 参数 | 默认值 | 描述 |
| :--- | :--- | :--- |
| `priority` | `50` | 过滤器优先级。数值越大越靠后(建议在其他过滤器之后运行)。 |
| `enable_escape_fix` | `True` | 修复过度的转义字符(`\n`, `\t`)。 |
| `enable_escape_fix_in_code_blocks` | `False` | 在代码块内应用转义修复(可能影响有效代码)。 |
| `enable_thought_tag_fix` | `True` | 规范化思维标签`</thought>`。 |
| `enable_details_tag_fix` | `True` | 规范化 `<details>` 标签并添加安全间距。 |
| `enable_code_block_fix` | `True` | 修复代码块格式(缩进/换行。 |
| `enable_latex_fix` | `True` | 规范化 LaTeX 定界符(`\[` -> `$$`, `\(` -> `$`)。 |
| `priority` | `50` | 过滤器优先级。数值越大越靠后(建议在其他内容过滤器之后运行)。 |
| `enable_escape_fix` | `True` | 修复过度的转义字符(将字面量 `\n` 转换为实际换行)。 |
| `enable_escape_fix_in_code_blocks` | `False` | **高阶技巧**:如果你的 SQL 或 HTML 代码块总是挤在一行,**请开启此项**。如果你经常写 Python/C++,建议保持关闭。 |
| `enable_thought_tag_fix` | `True` | 规范化思维标签`<thought>`。 |
| `enable_details_tag_fix` | `True` | 修复 `<details>` 标签的排版间距。 |
| `enable_code_block_fix` | `True` | 修复代码块前缀、缩进换行。 |
| `enable_latex_fix` | `True` | 规范化 LaTeX 定界符(`\[` -> `$$`)。 |
| `enable_list_fix` | `False` | 修复列表项换行(实验性)。 |
| `enable_unclosed_block_fix` | `True` | 自动闭合未闭合的代码块。 |
| `enable_fullwidth_symbol_fix` | `False` | 修复代码块中的全角符号。 |
| `enable_mermaid_fix` | `True` | 修复常见 Mermaid 语法错误。 |
| `enable_heading_fix` | `True` | 修复标题中缺失的空格。 |
| `enable_unclosed_block_fix` | `True` | 自动闭合被截断的代码块。 |
| `enable_mermaid_fix` | `True` | 修复常见 Mermaid 语法错误(如自动加引号)。 |
| `enable_heading_fix` | `True` | 修复标题中缺失的空格 (`#Title` -> `# Title`)。 |
| `enable_table_fix` | `True` | 修复表格中缺失的闭合管道符。 |
| `enable_xml_tag_cleanup` | `True` | 清理残留的 XML 标签。 |
| `enable_emphasis_spacing_fix` | `False` | 修复强调语法的多余空格。 |
| `show_status` | `True` | 应用修复时显示状态通知。 |
| `show_debug_log` | `True` | 在浏览器控制台打印调试日志。 |
| `enable_xml_tag_cleanup` | `True` | 清理残留的 XML 分析标签。 |
| `enable_emphasis_spacing_fix` | `False` | 修复强调语法(加粗/斜体)内部的多余空格。 |
| `show_status` | `True` | 当触发任何修复规则时,在页面底部显示提示气泡。 |
| `show_debug_log` | `False` | 在浏览器控制台 (F12) 打印修改前后的详细对比日志。 |
## ⭐ 支持
如果这个插件拯救了你的排版,欢迎到 [OpenWebUI Extensions](https://github.com/Fu-Jie/openwebui-extensions) 点个 Star这是我持续改进的最大动力。感谢支持
如果这个插件对你有帮助,欢迎到 [OpenWebUI Extensions](https://github.com/Fu-Jie/openwebui-extensions) 点个 Star这将是我持续改进的动力感谢支持。
## 其他
### 故障排除 (Troubleshooting) ❓
* **提交 Issue**: 如果遇到任何问题,请在 GitHub 上提交 Issue[OpenWebUI Extensions Issues](https://github.com/Fu-Jie/openwebui-extensions/issues)
### 更新日志
完整历史请查看 GitHub 项目: [OpenWebUI Extensions](https://github.com/Fu-Jie/openwebui-extensions)
## 🧩 其他
* **故障排除**:遇到“负向修复”(即原本正常的排版被修坏了)?请开启 `show_debug_log`,在 F12 控制台复制出原始文本,并在 GitHub 提交 Issue[提交 Issue](https://github.com/Fu-Jie/openwebui-extensions/issues)

View File

@@ -1,81 +1,87 @@
# Markdown Normalizer Filter
**Author:** [Fu-Jie](https://github.com/Fu-Jie/openwebui-extensions) | **Version:** 1.2.7 | **Project:** [OpenWebUI Extensions](https://github.com/Fu-Jie/openwebui-extensions) | **License:** MIT
**Author:** [Fu-Jie](https://github.com/Fu-Jie/openwebui-extensions) | **Version:** 1.2.8 | **Project:** [OpenWebUI Extensions](https://github.com/Fu-Jie/openwebui-extensions) | **License:** MIT
A content normalizer filter for Open WebUI that fixes common Markdown formatting issues in LLM outputs. It ensures that code blocks, LaTeX formulas, Mermaid diagrams, and other Markdown elements are rendered correctly.
A powerful, context-aware content normalizer filter for Open WebUI designed to fix common Markdown formatting issues in LLM outputs. It ensures that code blocks, LaTeX formulas, Mermaid diagrams, and other structural Markdown elements are rendered flawlessly, without destroying valid technical content.
> 🏆 **Featured by OpenWebUI Official** — This plugin was recommended in the official OpenWebUI Community Newsletter: [January 28, 2026](https://openwebui.com/blog/newsletter-january-28-2026)
## 🔥 What's New in v1.2.7
[English](https://github.com/Fu-Jie/openwebui-extensions/blob/main/plugins/filters/markdown_normalizer/README.md) | [简体中文](https://github.com/Fu-Jie/openwebui-extensions/blob/main/plugins/filters/markdown_normalizer/README_CN.md)
* **LaTeX Formula Protection**: Enhanced escape character cleaning to protect LaTeX commands like `\times`, `\nu`, and `\theta` from being corrupted.
* **Expanded i18n Support**: Now supports 12 languages with automatic detection and fallback.
* **Valves Optimization**: Optimized configuration descriptions to be English-only for better consistency.
* **Bug Fixes**:
* Resolved [Issue #49](https://github.com/Fu-Jie/openwebui-extensions/issues/49): Fixed a bug where consecutive bold parts on the same line caused spaces between them to be removed.
* Fixed a `NameError` in the plugin code that caused test collection failures.
---
## 🚀 Why do you need this plugin? (What does it do?)
Language Models (LLMs) often generate malformed Markdown due to tokenization artifacts, aggressive escaping, or hallucinated formatting. If you've ever seen:
- A `mermaid` diagram fail to render because of missing quotes around labels.
- A SQL block stuck on a single line because `\n` was output literally instead of a real newline.
- A `<details>` block break the entire chat rendering because of missing newlines.
- A LaTeX formula fail because the LLM used `\[` instead of `$$`.
**This plugin automatically intercepts the LLM's raw output, analyzes its structure, and surgically repairs these formatting errors in real-time before they reach your browser.**
## ✨ Comprehensive Feature List
### 1. Advanced Structural Protections (Context-Aware)
Before making any changes, the plugin builds a semantic map of the text to protect your technical content:
- **Code Block Protection**: Skips formatting inside ` ``` ` code blocks by default to protect code logic.
- **Inline Code Protection**: Recognizes `` `code` `` snippets and protects regular expressions and file paths (e.g., `C:\Windows`) from being incorrectly unescaped.
- **LaTeX Protection**: Identifies inline (`$`) and block (`$$`) formulas to prevent modifying critical math commands like `\times`, `\theta`, or `\nu`.
### 2. Auto-Healing Transformations
- **Details Tag Normalization**: `<details>` blocks (often used for Chain of Thought) require strict spacing to render correctly. The plugin automatically injects blank lines after `</details>` and self-closing `<details />` tags.
- **Mermaid Syntax Fixer**: One of the most common LLM errors is omitting quotes in Mermaid diagrams (e.g., `A --> B(Some text)`). This plugin parses the Mermaid syntax and auto-quotes labels and citations to guarantee the graph renders.
- **Emphasis Spacing Fix**: Fixes formatting-breaking extra spaces inside bold/italic markers (e.g., `** text **` becomes `**text**`) while cleverly ignoring math expressions like `2 * 3 * 4`.
- **Intelligent Escape Character Cleanup**: Removes excessive literal `\n` and `\t` generated by some models and converts them to actual structural newlines (only in safe text areas).
- **LaTeX Standardization**: Automatically upgrades old-school LaTeX delimiters (`\[...\]` and `\(...\)`) to modern Markdown standards (`$$...$$` and `$ ... $`).
- **Thought Tag Unification**: Standardizes various model thought outputs (`<think>`, `<thinking>`) into a unified `<thought>` tag.
- **Broken Code Block Repair**: Fixes indentation issues, repairs mangled language prefixes (e.g., ` ```python`), and automatically closes unclosed code blocks if a generation was cut off.
- **List & Table Formatting**: Injects missing newlines to repair broken numbered lists and adds missing closing pipes (`|`) to tables.
- **XML Artifact Cleanup**: Silently removes leftover `<antArtifact>` or `<antThinking>` tags often leaked by Claude models.
### 3. Reliability & Safety
- **100% Rollback Guarantee**: If any normalization logic fails or crashes, the plugin catches the error and silently returns the exact original text, ensuring your chat never breaks.
## 🔥 What's New in v1.2.8
* **Reliability Enhancement**: Complete error fallback mechanism. Guarantees 0% data loss during processing.
* **Inline Code Protection**: Upgraded escaping logic to protect inline code blocks (`` `...` ``).
* **Code Block Escaping Control**: The `enable_escape_fix_in_code_blocks` Valve now correctly targets broken newlines inside code blocks (perfect for fixing flat SQL queries) when enabled.
* **Privacy Optimization**: `show_debug_log` now defaults to `False` to prevent console noise.
## 🌐 Multilingual Support
Supports automatic interface and status switching for the following languages:
The plugin UI and status notifications automatically switch based on your language:
`English`, `简体中文`, `繁體中文 (香港)`, `繁體中文 (台灣)`, `한국어`, `日本語`, `Français`, `Deutsch`, `Español`, `Italiano`, `Tiếng Việt`, `Bahasa Indonesia`.
## ✨ Core Features
* **Details Tag Normalization**: Ensures proper spacing for `<details>` tags (used for thought chains). Adds a blank line after `</details>` and ensures a newline after self-closing `<details />` tags to prevent rendering issues.
* **Emphasis Spacing Fix**: Fixes extra spaces inside emphasis markers (e.g., `** text **` -> `**text**`) which can cause rendering failures. Includes safeguards to protect math expressions (e.g., `2 * 3 * 4`) and list variables.
* **Mermaid Syntax Fix**: Automatically fixes common Mermaid syntax errors, such as unquoted node labels (including multi-line labels and citations) and unclosed subgraphs. **New in v1.1.2**: Comprehensive protection for edge labels (text on connecting lines) across all link types (solid, dotted, thick).
* **Frontend Console Debugging**: Supports printing structured debug logs directly to the browser console (F12) for easier troubleshooting.
* **Code Block Formatting**: Fixes broken code block prefixes, suffixes, and indentation.
* **LaTeX Normalization**: Standardizes LaTeX formula delimiters (`\[` -> `$$`, `\(` -> `$`).
* **Thought Tag Normalization**: Unifies thought tags (`<think>`, `<thinking>` -> `<thought>`).
* **Escape Character Fix**: Cleans up excessive escape characters (`\\n`, `\\t`).
* **List Formatting**: Ensures proper newlines in list items.
* **Heading Fix**: Adds missing spaces in headings (`#Heading` -> `# Heading`).
* **Table Fix**: Adds missing closing pipes in tables.
* **XML Cleanup**: Removes leftover XML artifacts.
## How to Use 🛠️
1. Install the plugin in Open WebUI.
2. Enable the filter globally or for specific models.
3. Configure the enabled fixes in the **Valves** settings.
4. (Optional) **Show Debug Log** is enabled by default in Valves. This prints structured logs to the browser console (F12).
> [!WARNING]
> As this is an initial version, some "negative fixes" might occur (e.g., breaking valid Markdown). If you encounter issues, please check the console logs, copy the "Original" vs "Normalized" content, and submit an issue.
2. Enable the filter globally or assign it to specific models (highly recommended for models with poor formatting).
3. Tune the specific fixes you want via the **Valves** settings.
## Configuration (Valves) ⚙️
| Parameter | Default | Description |
| :--- | :--- | :--- |
| `priority` | `50` | Filter priority. Higher runs later (recommended after other filters). |
| `enable_escape_fix` | `True` | Fix excessive escape characters (`\n`, `\t`, etc.). |
| `enable_escape_fix_in_code_blocks` | `False` | Apply escape fix inside code blocks (may affect valid code). |
| `enable_thought_tag_fix` | `True` | Normalize thought tags (`</thought>`). |
| `enable_details_tag_fix` | `True` | Normalize `<details>` tags and add safe spacing. |
| `enable_code_block_fix` | `True` | Fix code block formatting (indentation/newlines). |
| `enable_latex_fix` | `True` | Normalize LaTeX delimiters (`\[` -> `$$`, `\(` -> `$`). |
| `priority` | `50` | Filter priority. Higher runs later (recommended to run this after all other content filters). |
| `enable_escape_fix` | `True` | Convert excessive literal escape characters (`\n`, `\t`) to real spacing. |
| `enable_escape_fix_in_code_blocks` | `False` | **Pro-tip**: Turn this ON if your SQL/HTML code blocks are constantly printing on a single line. Turn OFF for Python/C++. |
| `enable_thought_tag_fix` | `True` | Normalize `<think>` tags. |
| `enable_details_tag_fix` | `True` | Normalize `<details>` spacing. |
| `enable_code_block_fix` | `True` | Fix code block indentation and newlines. |
| `enable_latex_fix` | `True` | Standardize LaTeX delimiters (`\[` -> `$$`). |
| `enable_list_fix` | `False` | Fix list item newlines (experimental). |
| `enable_unclosed_block_fix` | `True` | Auto-close unclosed code blocks. |
| `enable_fullwidth_symbol_fix` | `False` | Fix full-width symbols in code blocks. |
| `enable_mermaid_fix` | `True` | Fix common Mermaid syntax errors. |
| `enable_heading_fix` | `True` | Fix missing space in headings. |
| `enable_table_fix` | `True` | Fix missing closing pipe in tables. |
| `enable_xml_tag_cleanup` | `True` | Cleanup leftover XML tags. |
| `enable_emphasis_spacing_fix` | `False` | Fix extra spaces in emphasis. |
| `show_status` | `True` | Show status notification when fixes are applied. |
| `show_debug_log` | `True` | Print debug logs to browser console (F12). |
| `enable_mermaid_fix` | `True` | Fix common Mermaid syntax errors (auto-quoting). |
| `enable_heading_fix` | `True` | Add missing space after heading hashes (`#Title` -> `# Title`). |
| `enable_table_fix` | `True` | Add missing closing pipe in tables. |
| `enable_xml_tag_cleanup` | `True` | Remove leftover XML artifacts. |
| `enable_emphasis_spacing_fix` | `False` | Fix extra spaces in emphasis formatting. |
| `show_status` | `True` | Show UI status notification when a fix is actively applied. |
| `show_debug_log` | `False` | Print detailed before/after diffs to browser console (F12). |
## ⭐ Support
If this plugin has been useful, a star on [OpenWebUI Extensions](https://github.com/Fu-Jie/openwebui-extensions) is a big motivation for me. Thank you for the support.
If this plugin saves your day, a star on [OpenWebUI Extensions](https://github.com/Fu-Jie/openwebui-extensions) is a big motivation for me. Thank you!
## 🧩 Others
### Troubleshooting ❓
* **Submit an Issue**: If you encounter any problems, please submit an issue on GitHub: [OpenWebUI Extensions Issues](https://github.com/Fu-Jie/openwebui-extensions/issues)
### Changelog
See the full history on GitHub: [OpenWebUI Extensions](https://github.com/Fu-Jie/openwebui-extensions)
* **Troubleshooting**: Encountering "negative fixes"? Enable `show_debug_log`, check your console, and submit an issue on GitHub: [OpenWebUI Extensions Issues](https://github.com/Fu-Jie/openwebui-extensions/issues)

View File

@@ -1,81 +1,87 @@
# Markdown 格式化过滤器 (Markdown Normalizer)
**作者:** [Fu-Jie](https://github.com/Fu-Jie/openwebui-extensions) | **版本:** 1.2.7 | **项目:** [OpenWebUI Extensions](https://github.com/Fu-Jie/openwebui-extensions) | **许可证:** MIT
**作者:** [Fu-Jie](https://github.com/Fu-Jie/openwebui-extensions) | **版本:** 1.2.8 | **项目:** [OpenWebUI Extensions](https://github.com/Fu-Jie/openwebui-extensions) | **许可证:** MIT
这是一个用于 Open WebUI 的内容格式化过滤器,旨在修复 LLM 输出中常见的 Markdown 格式问题。它能确保代码块、LaTeX 公式、Mermaid 图表和其他 Markdown 元素被正确渲染
这是一个强大的、具备上下文感知的 Markdown 内容规范化过滤器,专为 Open WebUI 设计,旨在实时修复大语言模型 (LLM) 输出中常见的格式错乱问题。它能确保代码块、LaTeX 公式、Mermaid 图表以及其他结构化元素被完美渲染,同时**绝不破坏**你原有的有效技术内容(如代码、正则、路径)
> 🏆 **OpenWebUI 官方推荐** — 本插件获得 OpenWebUI 社区 Newsletter 官方推荐:[2026 年 1 月 28 日](https://openwebui.com/blog/newsletter-january-28-2026)
## 🔥 最新更新 v1.2.7
[English](https://github.com/Fu-Jie/openwebui-extensions/blob/main/plugins/filters/markdown_normalizer/README.md) | [简体中文](https://github.com/Fu-Jie/openwebui-extensions/blob/main/plugins/filters/markdown_normalizer/README_CN.md)
* **LaTeX 公式保护**: 增强了转义字符清理逻辑,自动保护 `$ $``$$ $$` 内的 LaTeX 命令(如 `\times``\nu``\theta`),防止渲染失效。
* **扩展国际化 (i18n) 支持**: 现已支持 12 种语言,具备自动探测与回退机制。
* **配置项优化**: 将 Valves 配置项的描述统一为英文,保持界面一致性。
* **修复 Bug**:
* 修复了 [Issue #49](https://github.com/Fu-Jie/openwebui-extensions/issues/49):解决了当同一行存在多个加粗部分时,由于正则匹配过于贪婪导致中间内容丢失空格的问题。
* 修复了插件代码中的 `NameError` 错误,确保测试脚本能正常运行
---
## 🚀 为什么你需要这个插件?(它能解决什么问题?)
由于分词 (Tokenization) 伪影、过度转义或格式幻觉LLM 经常会生成破损的 Markdown。如果你遇到过以下情况
- `mermaid` 图表因为节点标签缺少双引号而渲染失败、白屏
- LLM 输出的 SQL 语句挤在一行,因为本该换行的地方输出了字面量 `\n`
- 复杂的 `<details>` (思维链展开块) 因为缺少换行符导致整个聊天界面排版崩塌。
- LaTeX 数学公式无法显示,因为模型使用了旧版的 `\[` 而不是 Markdown 支持的 `$$`
**本插件会自动拦截 LLM 返回的原始数据,实时分析其文本结构,并像外科手术一样精准修复这些排版错误,然后再将其展示在你的浏览器中。**
## ✨ 核心功能与修复能力全景
### 1. 高级结构保护 (上下文感知)
在执行任何修改前,插件会为整个文本建立语义地图,确保技术性内容不被误伤:
- **代码块保护**:默认跳过 ` ``` ` 内部的内容,保护所有编程逻辑。
- **行内代码保护**:识别 `` `代码` `` 片段,防止正则表达式(如 `[\n\r]`)或文件路径(如 `C:\Windows`)被错误地去转义。
- **LaTeX 公式保护**:识别行内 (`$`) 和块级 (`$$`) 公式,防止诸如 `\times`, `\theta` 等核心数学命令被意外破坏。
### 2. 自动治愈转换 (Auto-Healing)
- **Details 标签排版修复**`<details>` 块要求极为严格的空行才能正确渲染内部内容。插件会自动在 `</details>` 以及自闭合 `<details />` 标签后注入安全的换行符。
- **Mermaid 语法急救**:自动修复最常见的 Mermaid 错误——为未加引号的节点标签(如 `A --> B(Some text)`)自动补充双引号,甚至支持多行标签和引用,确保拓扑图 100% 渲染。
- **强调语法间距修复**:修复加粗/斜体语法内部多余的空格(如 `** 文本 **` 变为 `**文本**`,否则 OpenWebUI 无法加粗),同时智能忽略数学算式(如 `2 * 3 * 4`)。
- **智能转义字符清理**:将模型过度转义生成的字面量 `\n``\t` 转化为真正的换行和缩进(仅在安全的纯文本区域执行)。
- **LaTeX 现代化转换**:自动将旧式的 LaTeX 定界符(`\[...\]``\(...\)`)升级为现代 Markdown 标准(`$$...$$``$ ... $`)。
- **思维标签大一统**:无论模型输出的是 `<think>` 还是 `<thinking>`,统一标准化为 `<thought>` 标签。
- **残缺代码块修复**:修复乱码的语言前缀(例如 ` ```python`),调整缩进,并在模型回答被截断时,自动补充闭合的 ` ``` `
- **列表与表格急救**:为粘连的编号列表注入换行,为残缺的 Markdown 表格补充末尾的闭合管道符(`|`)。
- **XML 伪影消除**:静默移除 Claude 模型经常泄露的 `<antArtifact>``<antThinking>` 残留标签。
### 3. 绝对的可靠性与安全 (100% Rollback)
- **无损回滚机制**:如果在修复过程中发生任何意外错误或崩溃,插件会立即捕获异常,并静默返回**绝对原始**的文本,确保你的对话永远不会因插件报错而丢失。
## 🔥 最新更新 v1.2.8
* **可靠性增强**:修复了错误回滚机制。当规范化过程中发生意外错误时,插件现在会正确返回原始文本,而不是返回被部分修改的损坏内容。
* **内联代码保护**:优化了转义字符清理逻辑,现在会保护内联代码块(`` `...` ``)不被错误转义,防止破坏有效的代码片段。
* **配置项修复**`enable_escape_fix_in_code_blocks` 配置项现在能正确作用于代码块了。**在代码块内修复换行符(比如修复 SQL只需在设置中开启此选项即可。**
* **隐私与日志优化**:将 `show_debug_log` 默认值修改为 `False`,避免将可能敏感的内容自动输出到浏览器控制台,并减少不必要的日志噪音。
## 🌐 多语言支持 (i18n)
支持以下语言的界面状态自动切换:
界面状态提示气泡会根据你的浏览器语言自动切换:
`English`, `简体中文`, `繁體中文 (香港)`, `繁體中文 (台灣)`, `한국어`, `日本語`, `Français`, `Deutsch`, `Español`, `Italiano`, `Tiếng Việt`, `Bahasa Indonesia`
## ✨ 核心特性
* **Details 标签规范化**: 确保 `<details>` 标签(常用于思维链)有正确的间距。在 `</details>` 后添加空行,并在自闭合 `<details />` 标签后添加换行,防止渲染问题。
* **强调空格修复**: 修复强调标记内部的多余空格(例如 `** 文本 **` -> `**文本**`),这会导致 Markdown 渲染失败。包含保护机制,防止误修改数学表达式(如 `2 * 3 * 4`)或列表变量。
* **Mermaid 语法修复**: 自动修复常见的 Mermaid 语法错误,如未加引号的节点标签(支持多行标签和引用标记)和未闭合的子图 (Subgraph)。**v1.1.2 新增**: 全面保护各种类型的连线标签(实线、虚线、粗线),防止被误修改。
* **前端控制台调试**: 支持将结构化的调试日志直接打印到浏览器控制台 (F12),方便排查问题。
* **代码块格式化**: 修复破损的代码块前缀、后缀和缩进问题。
* **LaTeX 规范化**: 标准化 LaTeX 公式定界符 (`\[` -> `$$`, `\(` -> `$`)。
* **思维标签规范化**: 统一思维链标签 (`<think>`, `<thinking>` -> `<thought>`)。
* **转义字符修复**: 清理过度的转义字符 (`\\n`, `\\t`)。
* **列表格式化**: 确保列表项有正确的换行。
* **标题修复**: 修复标题中缺失的空格 (`#标题` -> `# 标题`)。
* **表格修复**: 修复表格中缺失的闭合管道符。
* **XML 清理**: 移除残留的 XML 标签。
## 使用方法
## 使用方法 🛠️
1. 在 Open WebUI 中安装此插件。
2. 全局启用或为特定模型启用此过滤器。
3.**Valves** 设置中配置需要启用的修复项。
4. (可选) **显示调试日志 (Show Debug Log)** 在 Valves 中默认开启。这会将结构化的日志打印到浏览器控制台 (F12)。
> [!WARNING]
> 由于这是初版,可能会出现“负向修复”的情况(例如破坏了原本正确的格式)。如果您遇到问题,请务必查看控制台日志,复制“原始 (Original)”与“规范化 (Normalized)”的内容对比,并提交 Issue 反馈。
2. 全局启用或为特定模型启用此过滤器(强烈建议为格式输出不稳定的模型启用)
3.**Valves (配置参数)** 设置中微调你需要的修复项。
## 配置参数 (Valves) ⚙️
| 参数 | 默认值 | 描述 |
| :--- | :--- | :--- |
| `priority` | `50` | 过滤器优先级。数值越大越靠后(建议在其他过滤器之后运行)。 |
| `enable_escape_fix` | `True` | 修复过度的转义字符(`\n`, `\t`)。 |
| `enable_escape_fix_in_code_blocks` | `False` | 在代码块内应用转义修复(可能影响有效代码)。 |
| `enable_thought_tag_fix` | `True` | 规范化思维标签`</thought>`。 |
| `enable_details_tag_fix` | `True` | 规范化 `<details>` 标签并添加安全间距。 |
| `enable_code_block_fix` | `True` | 修复代码块格式(缩进/换行。 |
| `enable_latex_fix` | `True` | 规范化 LaTeX 定界符(`\[` -> `$$`, `\(` -> `$`)。 |
| `priority` | `50` | 过滤器优先级。数值越大越靠后(建议在其他内容过滤器之后运行)。 |
| `enable_escape_fix` | `True` | 修复过度的转义字符(将字面量 `\n` 转换为实际换行)。 |
| `enable_escape_fix_in_code_blocks` | `False` | **高阶技巧**:如果你的 SQL 或 HTML 代码块总是挤在一行,**请开启此项**。如果你经常写 Python/C++,建议保持关闭。 |
| `enable_thought_tag_fix` | `True` | 规范化思维标签`<thought>`。 |
| `enable_details_tag_fix` | `True` | 修复 `<details>` 标签的排版间距。 |
| `enable_code_block_fix` | `True` | 修复代码块前缀、缩进换行。 |
| `enable_latex_fix` | `True` | 规范化 LaTeX 定界符(`\[` -> `$$`)。 |
| `enable_list_fix` | `False` | 修复列表项换行(实验性)。 |
| `enable_unclosed_block_fix` | `True` | 自动闭合未闭合的代码块。 |
| `enable_fullwidth_symbol_fix` | `False` | 修复代码块中的全角符号。 |
| `enable_mermaid_fix` | `True` | 修复常见 Mermaid 语法错误。 |
| `enable_heading_fix` | `True` | 修复标题中缺失的空格。 |
| `enable_unclosed_block_fix` | `True` | 自动闭合被截断的代码块。 |
| `enable_mermaid_fix` | `True` | 修复常见 Mermaid 语法错误(如自动加引号)。 |
| `enable_heading_fix` | `True` | 修复标题中缺失的空格 (`#Title` -> `# Title`)。 |
| `enable_table_fix` | `True` | 修复表格中缺失的闭合管道符。 |
| `enable_xml_tag_cleanup` | `True` | 清理残留的 XML 标签。 |
| `enable_emphasis_spacing_fix` | `False` | 修复强调语法的多余空格。 |
| `show_status` | `True` | 应用修复时显示状态通知。 |
| `show_debug_log` | `True` | 在浏览器控制台打印调试日志。 |
| `enable_xml_tag_cleanup` | `True` | 清理残留的 XML 分析标签。 |
| `enable_emphasis_spacing_fix` | `False` | 修复强调语法(加粗/斜体)内部的多余空格。 |
| `show_status` | `True` | 当触发任何修复规则时,在页面底部显示提示气泡。 |
| `show_debug_log` | `False` | 在浏览器控制台 (F12) 打印修改前后的详细对比日志。 |
## ⭐ 支持
如果这个插件拯救了你的排版,欢迎到 [OpenWebUI Extensions](https://github.com/Fu-Jie/openwebui-extensions) 点个 Star这是我持续改进的最大动力。感谢支持
如果这个插件对你有帮助,欢迎到 [OpenWebUI Extensions](https://github.com/Fu-Jie/openwebui-extensions) 点个 Star这将是我持续改进的动力感谢支持。
## 其他
### 故障排除 (Troubleshooting) ❓
* **提交 Issue**: 如果遇到任何问题,请在 GitHub 上提交 Issue[OpenWebUI Extensions Issues](https://github.com/Fu-Jie/openwebui-extensions/issues)
### 更新日志
完整历史请查看 GitHub 项目: [OpenWebUI Extensions](https://github.com/Fu-Jie/openwebui-extensions)
## 🧩 其他
* **故障排除**:遇到“负向修复”(即原本正常的排版被修坏了)?请开启 `show_debug_log`,在 F12 控制台复制出原始文本,并在 GitHub 提交 Issue[提交 Issue](https://github.com/Fu-Jie/openwebui-extensions/issues)

View File

@@ -3,7 +3,7 @@ title: Markdown Normalizer
author: Fu-Jie
author_url: https://github.com/Fu-Jie/openwebui-extensions
funding_url: https://github.com/open-webui
version: 1.2.7
version: 1.2.8
openwebui_id: baaa8732-9348-40b7-8359-7e009660e23c
description: A content normalizer filter that fixes common Markdown formatting issues in LLM outputs, such as broken code blocks, LaTeX formulas, and list formatting. Including LaTeX command protection.
"""
@@ -456,28 +456,45 @@ class ContentNormalizer:
except Exception as e:
# Production safeguard: return original content on error
logger.error(f"Content normalization failed: {e}", exc_info=True)
return content
return original_content
def _fix_escape_characters(self, content: str) -> str:
"""Fix excessive escape characters while protecting LaTeX and code blocks."""
"""Fix excessive escape characters while protecting LaTeX, code blocks, and inline code."""
def clean_text(text: str) -> str:
# Only fix \n and double backslashes, skip \t as it's dangerous for LaTeX (\times, \theta)
# First handle literal escaped newlines
text = text.replace("\\r\\n", "\n")
text = text.replace("\\n", "\n")
# Then handle double backslashes that are not followed by n or r
# (which would have been part of an escaped newline handled above)
# Use regex to replace \\ with \ only if not followed by n or r
# But wait, \n is already \n (actual newline) here.
# So we can safely replace all remaining \\ with \
text = text.replace("\\\\", "\\")
return text
# 1. Protect code blocks
# 1. Protect block code
parts = content.split("```")
for i in range(0, len(parts), 2): # Even indices are text
# 2. Protect LaTeX formulas within text
# Split by $ to find inline/block math
sub_parts = parts[i].split("$")
for j in range(0, len(sub_parts), 2): # Even indices are non-math text
sub_parts[j] = clean_text(sub_parts[j])
parts[i] = "$".join(sub_parts)
for i in range(0, len(parts)):
is_code_block = (i % 2 != 0)
if is_code_block and not self.config.enable_escape_fix_in_code_blocks:
continue
if not is_code_block:
# 2. Protect inline code
inline_parts = parts[i].split("`")
for k in range(0, len(inline_parts), 2): # Even indices are non-inline-code text
# 3. Protect LaTeX formulas within text
# Split by $ to find inline/block math
sub_parts = inline_parts[k].split("$")
for j in range(0, len(sub_parts), 2): # Even indices are non-math text
sub_parts[j] = clean_text(sub_parts[j])
inline_parts[k] = "$".join(sub_parts)
parts[i] = "`".join(inline_parts)
else:
# Inside code block and enable_escape_fix_in_code_blocks is True
parts[i] = clean_text(parts[i])
return "```".join(parts)
@@ -767,7 +784,7 @@ class Filter:
description="Show status notification when fixes are applied.",
)
show_debug_log: bool = Field(
default=True,
default=False,
description="Print debug logs to browser console (F12).",
)

View File

@@ -0,0 +1,13 @@
# v1.2.8 Release Notes
This release focuses on significantly improving the reliability and safety of the Markdown Normalizer filter, ensuring that it never corrupts valid technical content and elegantly handles unexpected errors.
## Bug Fixes
- **Error Fallback Mechanism**: Fixed an issue where the plugin could return partially modified or broken text if an error occurred during normalization. It now guarantees a 100% rollback to the original text upon any failure.
- **Inline Code Protection**: Refined the escape character fixing logic to accurately identify and protect inline code blocks (`` `...` ``). This prevents valid technical strings, such as regular expressions (`[\n\r]`) and Windows file paths (`C:\Windows`), from being unintentionally modified.
- **Code Block Escaping Control**: Fixed a bug where the `enable_escape_fix_in_code_blocks` Valve setting was ignored. The setting now correctly applies, allowing users to optionally fix broken newlines inside code blocks (e.g., repairing flat SQL queries) when enabled.
## New Features
- **Privacy & Log Optimization**: The `show_debug_log` Valve now defaults to `False` instead of `True`. This prevents sensitive chat content from automatically printing to the browser console and reduces unnecessary log noise for general users.

View File

@@ -0,0 +1,13 @@
# v1.2.8 版本发布说明
本次更新重点在于大幅提升 Markdown Normalizer 插件的可靠性与安全性,确保它在任何情况下都不会损坏有效的技术内容,并能优雅地处理各种意外错误。
## 问题修复
- **错误回滚机制 (Error Fallback)**:修复了规范化过程中如果发生错误会导致返回残缺或损坏文本的问题。现在,插件在遇到任何异常失败时,保证 100% 回滚并返回原始文本,确保对话内容不丢失。
- **内联代码保护 (Inline Code Protection)**:优化了转义字符的修复逻辑,现在能够精准识别并保护内联代码块(`` `...` ``)。这防止了像正则表达式(`[\n\r]`)和 Windows 文件路径(`C:\Windows`)这样的有效技术字符串被意外修改。
- **代码块转义控制修复 (Code Block Escaping Control)**:修复了 `enable_escape_fix_in_code_blocks` 配置项失效的 Bug。现在该选项可以正常生效当开启时用户可以借此修复代码块内部例如 SQL 查询语句)因错误转义导致挤在一行的问题。
## 新功能
- **隐私与日志优化 (Privacy & Log Optimization)**`show_debug_log` 的默认值从 `True` 更改为了 `False`。这避免了将可能包含敏感信息的对话内容自动打印到浏览器控制台,并减少了普通用户的日志噪音。

View File

@@ -0,0 +1,53 @@
from plugins.filters.markdown_normalizer.markdown_normalizer import ContentNormalizer, NormalizerConfig
def test_error_rollback():
"""Issue 57-1: Ensure content is NOT modified if a cleaner raises an exception."""
def broken_cleaner(text): raise RuntimeError("Plugin Crash Simulation")
config = NormalizerConfig(custom_cleaners=[broken_cleaner])
norm = ContentNormalizer(config)
raw_text = "Content that should NOT be modified on error."
res = norm.normalize(raw_text)
assert res == raw_text
def test_inline_code_protection():
"""Issue 57-2: Protect backslashes inside inline code blocks."""
norm = ContentNormalizer(NormalizerConfig(enable_escape_fix=True))
inline_code = "Regex: `[\\\\n\\\\r]` and Path: `C:\\\\\\\\Windows` and Normal: \\\\n"
res = norm.normalize(inline_code)
# The normal \\\\n at the end SHOULD be converted to actual \n
# The backslashes inside ` ` should NOT be converted.
assert "`[\\\\n\\\\r]`" in res
assert "`C:\\\\\\\\Windows`" in res
assert "\n" in res
def test_code_block_escape_control():
"""Issue 57-3: Verify enable_escape_fix_in_code_blocks valve."""
# input code: print('\\n')
# representation: "print('\\\\n')"
block_text = "```python\nprint('\\\\n')\n```"
# Subcase A: Disabled (Default)
norm_off = ContentNormalizer(NormalizerConfig(enable_escape_fix_in_code_blocks=False))
assert norm_off.normalize(block_text) == block_text
# Subcase B: Enabled
norm_on = ContentNormalizer(NormalizerConfig(enable_escape_fix_in_code_blocks=True))
# Expected: "```python\nprint('\n')\n```"
res = norm_on.normalize(block_text)
assert "\n" in res
assert "\\n" not in res.split("```")[1]
def test_latex_protection():
"""Regression: Ensure LaTeX commands are not corrupted by escape fix."""
norm = ContentNormalizer(NormalizerConfig(enable_escape_fix=True))
latex_text = "Math: $\\\\times \\\\theta \\\\nu$ and Normal: \\\\n"
res = norm.normalize(latex_text)
assert "$\\\\times \\\\theta \\\\nu$" in res
assert "\n" in res
if __name__ == "__main__":
test_error_rollback()
test_inline_code_protection()
test_code_block_escape_control()
test_latex_protection()
print("All tests passed!")