Update context enhancement filter

2026-01-10 18:47:32 +08:00
parent 7085e794a3
commit 06fdfee182
1 changed files with 4 additions and 275 deletions
--- a/plugins/filters/context_enhancement_filter/context_enhancement_filter.py
+++ b/plugins/filters/context_enhancement_filter/context_enhancement_filter.py
@@ -1,12 +1,9 @@
 """
 title: Context & Model Enhancement Filter
-author: Fu-Jie
-author_url: https://github.com/Fu-Jie
-funding_url: https://github.com/Fu-Jie/awesome-openwebui
-version: 0.2
+version: 0.3

 description:
-    一个功能全面的 Filter 插件，用于增强请求上下文和优化模型功能。提供四大核心功能：
+    一个专注于增强请求上下文和优化模型功能的 Filter 插件。提供三大核心功能：

    1. 环境变量注入：在每条用户消息前自动注入用户环境变量（用户名、时间、时区、语言等）
       - 支持纯文本、图片、多模态消息
@@ -24,222 +21,24 @@ description:
       - 动态模型重定向
       - 智能化的模型识别和适配

-    4. 智能内容规范化：生产级的内容清洗与修复系统
-       - 智能修复损坏的代码块（前缀、后缀、缩进）
-       - 规范化 LaTeX 公式格式（行内/块级）
-       - 优化思维链标签（</thought>）格式
-       - 自动闭合未结束的代码块
-       - 智能列表格式修复
-       - 清理冗余的 XML 标签
-       - 可配置的规则系统
-
 features:
    - 自动化环境变量管理
    - 智能模型功能适配
    - 异步状态反馈
    - 幂等性保证
    - 多模型支持
-    - 智能内容清洗与规范化
 """

 from pydantic import BaseModel, Field
-from typing import Optional, List, Callable
+from typing import Optional
 import re
 import logging
-from dataclasses import dataclass, field
+import asyncio


 # 配置日志
 logger = logging.getLogger(__name__)

-@dataclass
-class NormalizerConfig:
-    """规范化配置类,用于动态启用/禁用特定规则"""
-    enable_escape_fix: bool = True          # 修复转义字符
-    enable_thought_tag_fix: bool = True     # 修复思考链标签
-    enable_code_block_fix: bool = True      # 修复代码块格式
-    enable_latex_fix: bool = True           # 修复 LaTeX 公式格式
-    enable_list_fix: bool = False            # 修复列表换行
-    enable_unclosed_block_fix: bool = True  # 修复未闭合代码块
-    enable_fullwidth_symbol_fix: bool = False # 修复代码内的全角符号
-    enable_xml_tag_cleanup: bool = True     # 清理 XML 残留标签
-    
-    # 自定义清理函数列表（高级扩展用）
-    custom_cleaners: List[Callable[[str], str]] = field(default_factory=list)
-
-class ContentNormalizer:
-    """LLM 输出内容规范化器 - 生产级实现"""
-    
-    # --- 1. 预编译正则表达式（性能优化） ---
-    _PATTERNS = {
-        # 代码块前缀：如果 ``` 前面不是行首也不是换行符
-        'code_block_prefix': re.compile(r'(?<!^)(?<!\n)(```)', re.MULTILINE),
-        
-        # 代码块后缀：匹配 ```语言名 后面紧跟非空白字符(没有换行)
-        # 匹配 ```python code 这种情况，但不匹配 ```python 或 ```python\n
-        'code_block_suffix': re.compile(r'(```[\w\+\-\.]*)[ \t]+([^\n\r])'),
-        
-        # 代码块缩进：行首的空白字符 + ```
-        'code_block_indent': re.compile(r'^[ \t]+(```)', re.MULTILINE),
-        
-        # 思考链标签：</thought> 后可能跟空格或换行
-        'thought_tag': re.compile(r'</thought>[ \t]*\n*'),
-        
-        # LaTeX 块级公式：\[ ... \]
-        'latex_bracket_block': re.compile(r'\\\[(.+?)\\\]', re.DOTALL),
-        # LaTeX 行内公式：\( ... \)
-        'latex_paren_inline': re.compile(r'\\\((.+?)\\\)'),
-        
-        # 列表项：非换行符 + 数字 + 点 + 空格 (e.g. "Text1. Item")
-        'list_item': re.compile(r'([^\n])(\d+\. )'),
-        
-        # XML 残留标签 (如 Claude 的 artifacts)
-        'xml_artifacts': re.compile(r'</?(?:antArtifact|antThinking|artifact)[^>]*>', re.IGNORECASE),
-    }
-    
-    def __init__(self, config: Optional[NormalizerConfig] = None):
-        self.config = config or NormalizerConfig()
-        self.applied_fixes = []
-    
-    def normalize(self, content: str) -> str:
-        """主入口：按顺序应用所有规范化规则"""
-        self.applied_fixes = []
-        if not content:
-            return content
-        
-        try:
-            # 1. 转义字符修复（必须最先执行，否则影响后续正则）
-            if self.config.enable_escape_fix:
-                original = content
-                content = self._fix_escape_characters(content)
-                if content != original:
-                    self.applied_fixes.append("修复转义字符")
-            
-            # 2. 思考链标签规范化
-            if self.config.enable_thought_tag_fix:
-                original = content
-                content = self._fix_thought_tags(content)
-                if content != original:
-                    self.applied_fixes.append("规范化思考链")
-            
-            # 3. 代码块格式修复
-            if self.config.enable_code_block_fix:
-                original = content
-                content = self._fix_code_blocks(content)
-                if content != original:
-                    self.applied_fixes.append("修复代码块格式")
-            
-            # 4. LaTeX 公式规范化
-            if self.config.enable_latex_fix:
-                original = content
-                content = self._fix_latex_formulas(content)
-                if content != original:
-                    self.applied_fixes.append("规范化 LaTeX 公式")
-            
-            # 5. 列表格式修复
-            if self.config.enable_list_fix:
-                original = content
-                content = self._fix_list_formatting(content)
-                if content != original:
-                    self.applied_fixes.append("修复列表格式")
-            
-            # 6. 未闭合代码块检测与修复
-            if self.config.enable_unclosed_block_fix:
-                original = content
-                content = self._fix_unclosed_code_blocks(content)
-                if content != original:
-                    self.applied_fixes.append("闭合未结束代码块")
-            
-            # 7. 全角符号转半角（仅代码块内）
-            if self.config.enable_fullwidth_symbol_fix:
-                original = content
-                content = self._fix_fullwidth_symbols_in_code(content)
-                if content != original:
-                    self.applied_fixes.append("全角符号转半角")
-            
-            # 8. XML 标签残留清理
-            if self.config.enable_xml_tag_cleanup:
-                original = content
-                content = self._cleanup_xml_tags(content)
-                if content != original:
-                    self.applied_fixes.append("清理 XML 标签")
-            
-            # 9. 执行自定义清理函数
-            for cleaner in self.config.custom_cleaners:
-                original = content
-                content = cleaner(content)
-                if content != original:
-                    self.applied_fixes.append("执行自定义清理")
-            
-            return content
-            
-        except Exception as e:
-            # 生产环境保底机制：如果清洗过程报错，返回原始内容，避免阻断服务
-            logger.error(f"内容规范化失败: {e}", exc_info=True)
-            return content
-    
-    def _fix_escape_characters(self, content: str) -> str:
-        """修复过度转义的字符"""
-        # 注意：先处理具体的转义序列，再处理通用的双反斜杠
-        content = content.replace("\\r\\n", "\n")
-        content = content.replace("\\n", "\n")
-        content = content.replace("\\t", "\t")
-        # 修复过度转义的反斜杠 (例如路径 C:\\Users)
-        content = content.replace("\\\\", "\\")
-        return content
-    
-    def _fix_thought_tags(self, content: str) -> str:
-        """规范化 </thought> 标签，统一为空两行"""
-        return self._PATTERNS['thought_tag'].sub("</thought>\n\n", content)
-    
-    def _fix_code_blocks(self, content: str) -> str:
-        """修复代码块格式（独占行、换行、去缩进）"""
-        # C: 移除代码块前的缩进（必须先执行，否则影响下面的判断）
-        content = self._PATTERNS['code_block_indent'].sub(r"\1", content)
-        # A: 确保 ``` 前有换行
-        content = self._PATTERNS['code_block_prefix'].sub(r"\n\1", content)
-        # B: 确保 ```语言标识 后有换行
-        content = self._PATTERNS['code_block_suffix'].sub(r"\1\n\2", content)
-        return content
-    
-    def _fix_latex_formulas(self, content: str) -> str:
-        """规范化 LaTeX 公式：\[ -> $$ (块级), \( -> $ (行内)"""
-        content = self._PATTERNS['latex_bracket_block'].sub(r"$$\1$$", content)
-        content = self._PATTERNS['latex_paren_inline'].sub(r"$\1$", content)
-        return content
-    
-    def _fix_list_formatting(self, content: str) -> str:
-        """修复列表项缺少换行的问题 (如 'text1. item' -> 'text\\n1. item')"""
-        return self._PATTERNS['list_item'].sub(r"\1\n\2", content)
-    
-    def _fix_unclosed_code_blocks(self, content: str) -> str:
-        """检测并修复未闭合的代码块"""
-        if content.count("```") % 2 != 0:
-            logger.warning("检测到未闭合的代码块，自动补全")
-            content += "\n```"
-        return content
-    
-    def _fix_fullwidth_symbols_in_code(self, content: str) -> str:
-        """在代码块内将全角符号转为半角（精细化操作）"""
-        # 常见误用的全角符号映射
-        FULLWIDTH_MAP = {
-            '，': ',', '。': '.', '（': '(', '）': ')',
-            '【': '[', '】': ']', '；': ';', '：': ':',
-            '？': '?', '！': '!', '"': '"', '"': '"',
-            ''': "'", ''': "'",
-        }
-        
-        parts = content.split("```")
-        # 代码块内容位于索引 1, 3, 5... (奇数位)
-        for i in range(1, len(parts), 2):
-            for full, half in FULLWIDTH_MAP.items():
-                parts[i] = parts[i].replace(full, half)
-        
-        return "```".join(parts)
-    
-    def _cleanup_xml_tags(self, content: str) -> str:
-        """移除无关的 XML 标签"""
-        return self._PATTERNS['xml_artifacts'].sub("", content)

 class Filter:
    class Valves(BaseModel):
@@ -349,13 +148,9 @@ class Filter:
                body["model"] = body["model"] + "-search"
                features["web_search"] = False
                search_enabled_for_model = True
-            if user_email == "yi204o@qq.com":
-                features["web_search"] = False

        # 如果启用了模型本身的搜索能力，发送状态提示
        if search_enabled_for_model and __event_emitter__:
-            import asyncio
-
            try:
                asyncio.create_task(
                    self._emit_search_status(__event_emitter__, model_name)
@@ -464,8 +259,6 @@ class Filter:

            # 环境变量注入成功后，发送状态提示给用户
            if env_injected and __event_emitter__:
-                import asyncio
-
                try:
                    # 如果在异步环境中，使用 await
                    asyncio.create_task(self._emit_env_status(__event_emitter__))
@@ -506,67 +299,3 @@ class Filter:
            )
        except Exception as e:
            print(f"发送搜索状态提示时出错: {e}")
-
-    async def _emit_normalization_status(self, __event_emitter__, applied_fixes: List[str] = None):
-        """
-        发送内容规范化完成的状态提示
-        """
-        description = "✓ 内容已自动规范化"
-        if applied_fixes:
-            description += f"：{', '.join(applied_fixes)}"
-
-        try:
-            await __event_emitter__(
-                {
-                    "type": "status",
-                    "data": {
-                        "description": description,
-                        "done": True,
-                    },
-                }
-            )
-        except Exception as e:
-            print(f"发送规范化状态提示时出错: {e}")
-
-    def _contains_html(self, content: str) -> bool:
-        """
-        检测内容是否包含 HTML 标签
-        """
-        # 匹配常见的 HTML 标签
-        pattern = r"<\s*/?\s*(?:html|head|body|div|span|p|br|hr|ul|ol|li|table|thead|tbody|tfoot|tr|td|th|img|a|b|i|strong|em|code|pre|blockquote|h[1-6]|script|style|form|input|button|label|select|option|iframe|link|meta|title)\b"
-        return bool(re.search(pattern, content, re.IGNORECASE))
-
-    def outlet(self, body: dict, __user__: Optional[dict] = None, __event_emitter__=None) -> dict:
-        """
-        处理传出响应体，通过修改最后一条助手消息的内容。
-        使用 ContentNormalizer 进行全面的内容规范化。
-        """
-        if "messages" in body and body["messages"]:
-            last = body["messages"][-1]
-            content = last.get("content", "") or ""
-            
-            if last.get("role") == "assistant" and isinstance(content, str):
-                # 如果包含 HTML，跳过规范化，为了防止错误格式化
-                if self._contains_html(content):
-                    return body
-
-                # 初始化规范化器
-                normalizer = ContentNormalizer()
-                
-                # 执行规范化
-                new_content = normalizer.normalize(content)
-                
-                # 更新内容
-                if new_content != content:
-                    last["content"] = new_content
-                    # 如果内容发生了改变，发送状态提示
-                    if __event_emitter__:
-                        import asyncio
-                        try:
-                            # 传入 applied_fixes
-                            asyncio.create_task(self._emit_normalization_status(__event_emitter__, normalizer.applied_fixes))
-                        except RuntimeError:
-                            # 假如不在循环中，则忽略
-                            pass
-        
-        return body