Update context enhancement filter
This commit is contained in:
@@ -1,12 +1,9 @@
|
|||||||
"""
|
"""
|
||||||
title: Context & Model Enhancement Filter
|
title: Context & Model Enhancement Filter
|
||||||
author: Fu-Jie
|
version: 0.3
|
||||||
author_url: https://github.com/Fu-Jie
|
|
||||||
funding_url: https://github.com/Fu-Jie/awesome-openwebui
|
|
||||||
version: 0.2
|
|
||||||
|
|
||||||
description:
|
description:
|
||||||
一个功能全面的 Filter 插件,用于增强请求上下文和优化模型功能。提供四大核心功能:
|
一个专注于增强请求上下文和优化模型功能的 Filter 插件。提供三大核心功能:
|
||||||
|
|
||||||
1. 环境变量注入:在每条用户消息前自动注入用户环境变量(用户名、时间、时区、语言等)
|
1. 环境变量注入:在每条用户消息前自动注入用户环境变量(用户名、时间、时区、语言等)
|
||||||
- 支持纯文本、图片、多模态消息
|
- 支持纯文本、图片、多模态消息
|
||||||
@@ -24,222 +21,24 @@ description:
|
|||||||
- 动态模型重定向
|
- 动态模型重定向
|
||||||
- 智能化的模型识别和适配
|
- 智能化的模型识别和适配
|
||||||
|
|
||||||
4. 智能内容规范化:生产级的内容清洗与修复系统
|
|
||||||
- 智能修复损坏的代码块(前缀、后缀、缩进)
|
|
||||||
- 规范化 LaTeX 公式格式(行内/块级)
|
|
||||||
- 优化思维链标签(</thought>)格式
|
|
||||||
- 自动闭合未结束的代码块
|
|
||||||
- 智能列表格式修复
|
|
||||||
- 清理冗余的 XML 标签
|
|
||||||
- 可配置的规则系统
|
|
||||||
|
|
||||||
features:
|
features:
|
||||||
- 自动化环境变量管理
|
- 自动化环境变量管理
|
||||||
- 智能模型功能适配
|
- 智能模型功能适配
|
||||||
- 异步状态反馈
|
- 异步状态反馈
|
||||||
- 幂等性保证
|
- 幂等性保证
|
||||||
- 多模型支持
|
- 多模型支持
|
||||||
- 智能内容清洗与规范化
|
|
||||||
"""
|
"""
|
||||||
|
|
||||||
from pydantic import BaseModel, Field
|
from pydantic import BaseModel, Field
|
||||||
from typing import Optional, List, Callable
|
from typing import Optional
|
||||||
import re
|
import re
|
||||||
import logging
|
import logging
|
||||||
from dataclasses import dataclass, field
|
import asyncio
|
||||||
|
|
||||||
|
|
||||||
# 配置日志
|
# 配置日志
|
||||||
logger = logging.getLogger(__name__)
|
logger = logging.getLogger(__name__)
|
||||||
|
|
||||||
@dataclass
|
|
||||||
class NormalizerConfig:
|
|
||||||
"""规范化配置类,用于动态启用/禁用特定规则"""
|
|
||||||
enable_escape_fix: bool = True # 修复转义字符
|
|
||||||
enable_thought_tag_fix: bool = True # 修复思考链标签
|
|
||||||
enable_code_block_fix: bool = True # 修复代码块格式
|
|
||||||
enable_latex_fix: bool = True # 修复 LaTeX 公式格式
|
|
||||||
enable_list_fix: bool = False # 修复列表换行
|
|
||||||
enable_unclosed_block_fix: bool = True # 修复未闭合代码块
|
|
||||||
enable_fullwidth_symbol_fix: bool = False # 修复代码内的全角符号
|
|
||||||
enable_xml_tag_cleanup: bool = True # 清理 XML 残留标签
|
|
||||||
|
|
||||||
# 自定义清理函数列表(高级扩展用)
|
|
||||||
custom_cleaners: List[Callable[[str], str]] = field(default_factory=list)
|
|
||||||
|
|
||||||
class ContentNormalizer:
|
|
||||||
"""LLM 输出内容规范化器 - 生产级实现"""
|
|
||||||
|
|
||||||
# --- 1. 预编译正则表达式(性能优化) ---
|
|
||||||
_PATTERNS = {
|
|
||||||
# 代码块前缀:如果 ``` 前面不是行首也不是换行符
|
|
||||||
'code_block_prefix': re.compile(r'(?<!^)(?<!\n)(```)', re.MULTILINE),
|
|
||||||
|
|
||||||
# 代码块后缀:匹配 ```语言名 后面紧跟非空白字符(没有换行)
|
|
||||||
# 匹配 ```python code 这种情况,但不匹配 ```python 或 ```python\n
|
|
||||||
'code_block_suffix': re.compile(r'(```[\w\+\-\.]*)[ \t]+([^\n\r])'),
|
|
||||||
|
|
||||||
# 代码块缩进:行首的空白字符 + ```
|
|
||||||
'code_block_indent': re.compile(r'^[ \t]+(```)', re.MULTILINE),
|
|
||||||
|
|
||||||
# 思考链标签:</thought> 后可能跟空格或换行
|
|
||||||
'thought_tag': re.compile(r'</thought>[ \t]*\n*'),
|
|
||||||
|
|
||||||
# LaTeX 块级公式:\[ ... \]
|
|
||||||
'latex_bracket_block': re.compile(r'\\\[(.+?)\\\]', re.DOTALL),
|
|
||||||
# LaTeX 行内公式:\( ... \)
|
|
||||||
'latex_paren_inline': re.compile(r'\\\((.+?)\\\)'),
|
|
||||||
|
|
||||||
# 列表项:非换行符 + 数字 + 点 + 空格 (e.g. "Text1. Item")
|
|
||||||
'list_item': re.compile(r'([^\n])(\d+\. )'),
|
|
||||||
|
|
||||||
# XML 残留标签 (如 Claude 的 artifacts)
|
|
||||||
'xml_artifacts': re.compile(r'</?(?:antArtifact|antThinking|artifact)[^>]*>', re.IGNORECASE),
|
|
||||||
}
|
|
||||||
|
|
||||||
def __init__(self, config: Optional[NormalizerConfig] = None):
|
|
||||||
self.config = config or NormalizerConfig()
|
|
||||||
self.applied_fixes = []
|
|
||||||
|
|
||||||
def normalize(self, content: str) -> str:
|
|
||||||
"""主入口:按顺序应用所有规范化规则"""
|
|
||||||
self.applied_fixes = []
|
|
||||||
if not content:
|
|
||||||
return content
|
|
||||||
|
|
||||||
try:
|
|
||||||
# 1. 转义字符修复(必须最先执行,否则影响后续正则)
|
|
||||||
if self.config.enable_escape_fix:
|
|
||||||
original = content
|
|
||||||
content = self._fix_escape_characters(content)
|
|
||||||
if content != original:
|
|
||||||
self.applied_fixes.append("修复转义字符")
|
|
||||||
|
|
||||||
# 2. 思考链标签规范化
|
|
||||||
if self.config.enable_thought_tag_fix:
|
|
||||||
original = content
|
|
||||||
content = self._fix_thought_tags(content)
|
|
||||||
if content != original:
|
|
||||||
self.applied_fixes.append("规范化思考链")
|
|
||||||
|
|
||||||
# 3. 代码块格式修复
|
|
||||||
if self.config.enable_code_block_fix:
|
|
||||||
original = content
|
|
||||||
content = self._fix_code_blocks(content)
|
|
||||||
if content != original:
|
|
||||||
self.applied_fixes.append("修复代码块格式")
|
|
||||||
|
|
||||||
# 4. LaTeX 公式规范化
|
|
||||||
if self.config.enable_latex_fix:
|
|
||||||
original = content
|
|
||||||
content = self._fix_latex_formulas(content)
|
|
||||||
if content != original:
|
|
||||||
self.applied_fixes.append("规范化 LaTeX 公式")
|
|
||||||
|
|
||||||
# 5. 列表格式修复
|
|
||||||
if self.config.enable_list_fix:
|
|
||||||
original = content
|
|
||||||
content = self._fix_list_formatting(content)
|
|
||||||
if content != original:
|
|
||||||
self.applied_fixes.append("修复列表格式")
|
|
||||||
|
|
||||||
# 6. 未闭合代码块检测与修复
|
|
||||||
if self.config.enable_unclosed_block_fix:
|
|
||||||
original = content
|
|
||||||
content = self._fix_unclosed_code_blocks(content)
|
|
||||||
if content != original:
|
|
||||||
self.applied_fixes.append("闭合未结束代码块")
|
|
||||||
|
|
||||||
# 7. 全角符号转半角(仅代码块内)
|
|
||||||
if self.config.enable_fullwidth_symbol_fix:
|
|
||||||
original = content
|
|
||||||
content = self._fix_fullwidth_symbols_in_code(content)
|
|
||||||
if content != original:
|
|
||||||
self.applied_fixes.append("全角符号转半角")
|
|
||||||
|
|
||||||
# 8. XML 标签残留清理
|
|
||||||
if self.config.enable_xml_tag_cleanup:
|
|
||||||
original = content
|
|
||||||
content = self._cleanup_xml_tags(content)
|
|
||||||
if content != original:
|
|
||||||
self.applied_fixes.append("清理 XML 标签")
|
|
||||||
|
|
||||||
# 9. 执行自定义清理函数
|
|
||||||
for cleaner in self.config.custom_cleaners:
|
|
||||||
original = content
|
|
||||||
content = cleaner(content)
|
|
||||||
if content != original:
|
|
||||||
self.applied_fixes.append("执行自定义清理")
|
|
||||||
|
|
||||||
return content
|
|
||||||
|
|
||||||
except Exception as e:
|
|
||||||
# 生产环境保底机制:如果清洗过程报错,返回原始内容,避免阻断服务
|
|
||||||
logger.error(f"内容规范化失败: {e}", exc_info=True)
|
|
||||||
return content
|
|
||||||
|
|
||||||
def _fix_escape_characters(self, content: str) -> str:
|
|
||||||
"""修复过度转义的字符"""
|
|
||||||
# 注意:先处理具体的转义序列,再处理通用的双反斜杠
|
|
||||||
content = content.replace("\\r\\n", "\n")
|
|
||||||
content = content.replace("\\n", "\n")
|
|
||||||
content = content.replace("\\t", "\t")
|
|
||||||
# 修复过度转义的反斜杠 (例如路径 C:\\Users)
|
|
||||||
content = content.replace("\\\\", "\\")
|
|
||||||
return content
|
|
||||||
|
|
||||||
def _fix_thought_tags(self, content: str) -> str:
|
|
||||||
"""规范化 </thought> 标签,统一为空两行"""
|
|
||||||
return self._PATTERNS['thought_tag'].sub("</thought>\n\n", content)
|
|
||||||
|
|
||||||
def _fix_code_blocks(self, content: str) -> str:
|
|
||||||
"""修复代码块格式(独占行、换行、去缩进)"""
|
|
||||||
# C: 移除代码块前的缩进(必须先执行,否则影响下面的判断)
|
|
||||||
content = self._PATTERNS['code_block_indent'].sub(r"\1", content)
|
|
||||||
# A: 确保 ``` 前有换行
|
|
||||||
content = self._PATTERNS['code_block_prefix'].sub(r"\n\1", content)
|
|
||||||
# B: 确保 ```语言标识 后有换行
|
|
||||||
content = self._PATTERNS['code_block_suffix'].sub(r"\1\n\2", content)
|
|
||||||
return content
|
|
||||||
|
|
||||||
def _fix_latex_formulas(self, content: str) -> str:
|
|
||||||
"""规范化 LaTeX 公式:\[ -> $$ (块级), \( -> $ (行内)"""
|
|
||||||
content = self._PATTERNS['latex_bracket_block'].sub(r"$$\1$$", content)
|
|
||||||
content = self._PATTERNS['latex_paren_inline'].sub(r"$\1$", content)
|
|
||||||
return content
|
|
||||||
|
|
||||||
def _fix_list_formatting(self, content: str) -> str:
|
|
||||||
"""修复列表项缺少换行的问题 (如 'text1. item' -> 'text\\n1. item')"""
|
|
||||||
return self._PATTERNS['list_item'].sub(r"\1\n\2", content)
|
|
||||||
|
|
||||||
def _fix_unclosed_code_blocks(self, content: str) -> str:
|
|
||||||
"""检测并修复未闭合的代码块"""
|
|
||||||
if content.count("```") % 2 != 0:
|
|
||||||
logger.warning("检测到未闭合的代码块,自动补全")
|
|
||||||
content += "\n```"
|
|
||||||
return content
|
|
||||||
|
|
||||||
def _fix_fullwidth_symbols_in_code(self, content: str) -> str:
|
|
||||||
"""在代码块内将全角符号转为半角(精细化操作)"""
|
|
||||||
# 常见误用的全角符号映射
|
|
||||||
FULLWIDTH_MAP = {
|
|
||||||
',': ',', '。': '.', '(': '(', ')': ')',
|
|
||||||
'【': '[', '】': ']', ';': ';', ':': ':',
|
|
||||||
'?': '?', '!': '!', '"': '"', '"': '"',
|
|
||||||
''': "'", ''': "'",
|
|
||||||
}
|
|
||||||
|
|
||||||
parts = content.split("```")
|
|
||||||
# 代码块内容位于索引 1, 3, 5... (奇数位)
|
|
||||||
for i in range(1, len(parts), 2):
|
|
||||||
for full, half in FULLWIDTH_MAP.items():
|
|
||||||
parts[i] = parts[i].replace(full, half)
|
|
||||||
|
|
||||||
return "```".join(parts)
|
|
||||||
|
|
||||||
def _cleanup_xml_tags(self, content: str) -> str:
|
|
||||||
"""移除无关的 XML 标签"""
|
|
||||||
return self._PATTERNS['xml_artifacts'].sub("", content)
|
|
||||||
|
|
||||||
class Filter:
|
class Filter:
|
||||||
class Valves(BaseModel):
|
class Valves(BaseModel):
|
||||||
@@ -349,13 +148,9 @@ class Filter:
|
|||||||
body["model"] = body["model"] + "-search"
|
body["model"] = body["model"] + "-search"
|
||||||
features["web_search"] = False
|
features["web_search"] = False
|
||||||
search_enabled_for_model = True
|
search_enabled_for_model = True
|
||||||
if user_email == "yi204o@qq.com":
|
|
||||||
features["web_search"] = False
|
|
||||||
|
|
||||||
# 如果启用了模型本身的搜索能力,发送状态提示
|
# 如果启用了模型本身的搜索能力,发送状态提示
|
||||||
if search_enabled_for_model and __event_emitter__:
|
if search_enabled_for_model and __event_emitter__:
|
||||||
import asyncio
|
|
||||||
|
|
||||||
try:
|
try:
|
||||||
asyncio.create_task(
|
asyncio.create_task(
|
||||||
self._emit_search_status(__event_emitter__, model_name)
|
self._emit_search_status(__event_emitter__, model_name)
|
||||||
@@ -464,8 +259,6 @@ class Filter:
|
|||||||
|
|
||||||
# 环境变量注入成功后,发送状态提示给用户
|
# 环境变量注入成功后,发送状态提示给用户
|
||||||
if env_injected and __event_emitter__:
|
if env_injected and __event_emitter__:
|
||||||
import asyncio
|
|
||||||
|
|
||||||
try:
|
try:
|
||||||
# 如果在异步环境中,使用 await
|
# 如果在异步环境中,使用 await
|
||||||
asyncio.create_task(self._emit_env_status(__event_emitter__))
|
asyncio.create_task(self._emit_env_status(__event_emitter__))
|
||||||
@@ -506,67 +299,3 @@ class Filter:
|
|||||||
)
|
)
|
||||||
except Exception as e:
|
except Exception as e:
|
||||||
print(f"发送搜索状态提示时出错: {e}")
|
print(f"发送搜索状态提示时出错: {e}")
|
||||||
|
|
||||||
async def _emit_normalization_status(self, __event_emitter__, applied_fixes: List[str] = None):
|
|
||||||
"""
|
|
||||||
发送内容规范化完成的状态提示
|
|
||||||
"""
|
|
||||||
description = "✓ 内容已自动规范化"
|
|
||||||
if applied_fixes:
|
|
||||||
description += f":{', '.join(applied_fixes)}"
|
|
||||||
|
|
||||||
try:
|
|
||||||
await __event_emitter__(
|
|
||||||
{
|
|
||||||
"type": "status",
|
|
||||||
"data": {
|
|
||||||
"description": description,
|
|
||||||
"done": True,
|
|
||||||
},
|
|
||||||
}
|
|
||||||
)
|
|
||||||
except Exception as e:
|
|
||||||
print(f"发送规范化状态提示时出错: {e}")
|
|
||||||
|
|
||||||
def _contains_html(self, content: str) -> bool:
|
|
||||||
"""
|
|
||||||
检测内容是否包含 HTML 标签
|
|
||||||
"""
|
|
||||||
# 匹配常见的 HTML 标签
|
|
||||||
pattern = r"<\s*/?\s*(?:html|head|body|div|span|p|br|hr|ul|ol|li|table|thead|tbody|tfoot|tr|td|th|img|a|b|i|strong|em|code|pre|blockquote|h[1-6]|script|style|form|input|button|label|select|option|iframe|link|meta|title)\b"
|
|
||||||
return bool(re.search(pattern, content, re.IGNORECASE))
|
|
||||||
|
|
||||||
def outlet(self, body: dict, __user__: Optional[dict] = None, __event_emitter__=None) -> dict:
|
|
||||||
"""
|
|
||||||
处理传出响应体,通过修改最后一条助手消息的内容。
|
|
||||||
使用 ContentNormalizer 进行全面的内容规范化。
|
|
||||||
"""
|
|
||||||
if "messages" in body and body["messages"]:
|
|
||||||
last = body["messages"][-1]
|
|
||||||
content = last.get("content", "") or ""
|
|
||||||
|
|
||||||
if last.get("role") == "assistant" and isinstance(content, str):
|
|
||||||
# 如果包含 HTML,跳过规范化,为了防止错误格式化
|
|
||||||
if self._contains_html(content):
|
|
||||||
return body
|
|
||||||
|
|
||||||
# 初始化规范化器
|
|
||||||
normalizer = ContentNormalizer()
|
|
||||||
|
|
||||||
# 执行规范化
|
|
||||||
new_content = normalizer.normalize(content)
|
|
||||||
|
|
||||||
# 更新内容
|
|
||||||
if new_content != content:
|
|
||||||
last["content"] = new_content
|
|
||||||
# 如果内容发生了改变,发送状态提示
|
|
||||||
if __event_emitter__:
|
|
||||||
import asyncio
|
|
||||||
try:
|
|
||||||
# 传入 applied_fixes
|
|
||||||
asyncio.create_task(self._emit_normalization_status(__event_emitter__, normalizer.applied_fixes))
|
|
||||||
except RuntimeError:
|
|
||||||
# 假如不在循环中,则忽略
|
|
||||||
pass
|
|
||||||
|
|
||||||
return body
|
|
||||||
|
|||||||
Reference in New Issue
Block a user