From 0ef4d67d097c6b995278069a72921ef1c72ee73b Mon Sep 17 00:00:00 2001 From: fujie Date: Sat, 20 Dec 2025 14:59:55 +0800 Subject: [PATCH] =?UTF-8?q?feat:=20=E6=B7=BB=E5=8A=A0=E4=BA=86=E4=B8=AD?= =?UTF-8?q?=E8=8B=B1=E6=96=87=E5=8A=A8=E4=BD=9C=E6=8F=92=E4=BB=B6=E6=A8=A1?= =?UTF-8?q?=E6=9D=BF=EF=BC=8C=E6=9B=B4=E6=96=B0=E4=BA=86=E6=91=98=E8=A6=81?= =?UTF-8?q?=E5=92=8C=E6=99=BA=E8=83=BD=E6=80=9D=E7=BB=B4=E5=AF=BC=E5=9B=BE?= =?UTF-8?q?=E6=8F=92=E4=BB=B6=EF=BC=8C=E5=B9=B6=E7=AE=80=E5=8C=96=E4=BA=86?= =?UTF-8?q?=E5=BC=82=E6=AD=A5=E4=B8=8A=E4=B8=8B=E6=96=87=E5=8E=8B=E7=BC=A9?= =?UTF-8?q?=E6=8F=92=E4=BB=B6=E7=9A=84=E6=A8=A1=E5=9E=8B=E9=98=88=E5=80=BC?= =?UTF-8?q?=E9=85=8D=E7=BD=AE=E3=80=82?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- plugins/actions/ACTION_PLUGIN_TEMPLATE.py | 274 +++++++++++++ plugins/actions/ACTION_PLUGIN_TEMPLATE_CN.py | 277 +++++++++++++ .../actions/smart-mind-map/smart_mind_map.py | 11 +- plugins/actions/smart-mind-map/思维导图.py | 12 +- plugins/actions/summary/summary.py | 11 +- plugins/actions/summary/精读.py | 20 +- .../async-context-compression/README_CN.md | 2 +- .../async_context_compression.py | 375 +----------------- .../异步上下文压缩.py | 169 +++++--- 9 files changed, 703 insertions(+), 448 deletions(-) create mode 100644 plugins/actions/ACTION_PLUGIN_TEMPLATE.py create mode 100644 plugins/actions/ACTION_PLUGIN_TEMPLATE_CN.py diff --git a/plugins/actions/ACTION_PLUGIN_TEMPLATE.py b/plugins/actions/ACTION_PLUGIN_TEMPLATE.py new file mode 100644 index 0000000..2b9eff7 --- /dev/null +++ b/plugins/actions/ACTION_PLUGIN_TEMPLATE.py @@ -0,0 +1,274 @@ +""" +title: [Plugin Name] (e.g., Smart Mind Map) +author: [Your Name] +author_url: [Your URL] +funding_url: [Funding URL] +version: 0.1.0 +icon_url: [Data URI or URL for Icon] +description: [Brief description of what the plugin does] +requirements: [List of dependencies, e.g., jinja2, markdown] +""" + +from pydantic import BaseModel, Field +from typing import Optional, Dict, Any, List, Callable, Awaitable +import logging +import re +import json +from fastapi import Request +from datetime import datetime +import pytz + +# Import OpenWebUI utilities +from open_webui.utils.chat import generate_chat_completion +from open_webui.models.users import Users + +# Setup logging +logging.basicConfig( + level=logging.INFO, format="%(asctime)s - %(name)s - %(levelname)s - %(message)s" +) +logger = logging.getLogger(__name__) + +# ================================================================= +# Constants & Prompts +# ================================================================= + +SYSTEM_PROMPT = """ +[Insert System Prompt Here] +You are a helpful assistant... +Please output in [JSON/Markdown] format... +""" + +USER_PROMPT_TEMPLATE = """ +[Insert User Prompt Template Here] +User Context: +Name: {user_name} +Time: {current_date_time_str} + +Content to process: +{content} +""" + +# HTML Template for rendering the result in the chat +HTML_TEMPLATE = """ + + + + + + [Plugin Title] + + + +
+

[Result Title]

+
{result_content}
+
+ + +""" + + +class Action: + class Valves(BaseModel): + show_status: bool = Field( + default=True, + description="Whether to show operation status updates in the chat interface.", + ) + LLM_MODEL_ID: str = Field( + default="", + description="Built-in LLM Model ID used for processing. If empty, uses the current conversation's model.", + ) + MIN_TEXT_LENGTH: int = Field( + default=50, + description="Minimum text length required for processing (characters).", + ) + # Add other configuration fields as needed + # MAX_TEXT_LENGTH: int = Field(default=2000, description="...") + + def __init__(self): + self.valves = self.Valves() + + def _get_user_context(self, __user__: Optional[Dict[str, Any]]) -> Dict[str, str]: + """Extracts user context information.""" + if isinstance(__user__, (list, tuple)): + user_data = __user__[0] if __user__ else {} + elif isinstance(__user__, dict): + user_data = __user__ + else: + user_data = {} + + return { + "user_id": user_data.get("id", "unknown_user"), + "user_name": user_data.get("name", "User"), + "user_language": user_data.get("language", "en-US"), + } + + def _get_current_time_context(self) -> Dict[str, str]: + """Gets current time context.""" + try: + # Default to a specific timezone or system time + tz = pytz.timezone("Asia/Shanghai") # Change as needed + now = datetime.now(tz) + except Exception: + now = datetime.now() + + return { + "current_date_time_str": now.strftime("%Y-%m-%d %H:%M:%S"), + "current_weekday": now.strftime("%A"), + "current_year": now.strftime("%Y"), + "current_timezone_str": str(now.tzinfo) if now.tzinfo else "Unknown", + } + + def _process_llm_output(self, llm_output: str) -> Any: + """ + Process the raw output from the LLM. + Override this method to parse JSON, extract Markdown, etc. + """ + # Example: Extract JSON + # try: + # start = llm_output.find('{') + # end = llm_output.rfind('}') + 1 + # if start != -1 and end != -1: + # return json.loads(llm_output[start:end]) + # except Exception: + # pass + return llm_output.strip() + + async def _emit_status( + self, + emitter: Optional[Callable[[Any], Awaitable[None]]], + description: str, + done: bool = False, + ): + """Emits a status update event.""" + if self.valves.show_status and emitter: + await emitter( + {"type": "status", "data": {"description": description, "done": done}} + ) + + async def _emit_notification( + self, + emitter: Optional[Callable[[Any], Awaitable[None]]], + content: str, + type: str = "info", + ): + """Emits a notification event (info, success, warning, error).""" + if emitter: + await emitter( + {"type": "notification", "data": {"type": type, "content": content}} + ) + + async def _emit_message( + self, emitter: Optional[Callable[[Any], Awaitable[None]]], content: str + ): + """Emits a message event (appends to current message).""" + if emitter: + await emitter({"type": "message", "data": {"content": content}}) + + async def _emit_replace( + self, emitter: Optional[Callable[[Any], Awaitable[None]]], content: str + ): + """Emits a replace event (replaces current message).""" + if emitter: + await emitter({"type": "replace", "data": {"content": content}}) + + async def action( + self, + body: dict, + __user__: Optional[Dict[str, Any]] = None, + __event_emitter__: Optional[Callable[[Any], Awaitable[None]]] = None, + __event_call__: Optional[Callable[[Any], Awaitable[Any]]] = None, + __request__: Optional[Request] = None, + ) -> Optional[dict]: + logger.info(f"Action: {__name__} started") + + # 1. Context Setup + user_context = self._get_user_context(__user__) + time_context = self._get_current_time_context() + + # 2. Input Validation + messages = body.get("messages", []) + if not messages or not messages[-1].get("content"): + return body # Or handle error + + original_content = messages[-1]["content"] + + if len(original_content) < self.valves.MIN_TEXT_LENGTH: + warning_msg = f"Text too short ({len(original_content)} chars). Minimum required: {self.valves.MIN_TEXT_LENGTH}." + await self._emit_notification(__event_emitter__, warning_msg, "warning") + return body # Or return a message indicating failure + + # 3. Status Notification (Start) + await self._emit_status(__event_emitter__, "Processing...", done=False) + + try: + # 4. Prepare Prompt + formatted_prompt = USER_PROMPT_TEMPLATE.format( + user_name=user_context["user_name"], + current_date_time_str=time_context["current_date_time_str"], + content=original_content, + # Add other context variables + ) + + # 5. Determine Model + target_model = self.valves.LLM_MODEL_ID + if not target_model: + target_model = body.get("model") + # Note: No hardcoded fallback here, relies on system/user context + + # 6. Call LLM + user_obj = Users.get_user_by_id(user_context["user_id"]) + + payload = { + "model": target_model, + "messages": [ + {"role": "system", "content": SYSTEM_PROMPT}, + {"role": "user", "content": formatted_prompt}, + ], + "stream": False, + # "temperature": 0.5, + } + + llm_response = await generate_chat_completion( + __request__, payload, user_obj + ) + + if not llm_response or "choices" not in llm_response: + raise ValueError("Invalid LLM response") + + assistant_content = llm_response["choices"][0]["message"]["content"] + + # 7. Process Output + processed_data = self._process_llm_output(assistant_content) + + # 8. Generate HTML/Result + # Example: simple string replacement + final_html = HTML_TEMPLATE.replace("{result_content}", str(processed_data)) + final_html = final_html.replace( + "{user_language}", user_context["user_language"] + ) + + # 9. Inject Result + html_embed_tag = f"```html\n{final_html}\n```" + body["messages"][-1]["content"] += f"\n\n{html_embed_tag}" + + # 10. Status Notification (Success) + await self._emit_status( + __event_emitter__, "Completed successfully!", done=True + ) + await self._emit_notification( + __event_emitter__, "Action completed successfully.", "success" + ) + + except Exception as e: + logger.error(f"Action failed: {e}", exc_info=True) + error_msg = f"Error: {str(e)}" + + # Append error to chat (optional) + body["messages"][-1]["content"] += f"\n\n❌ **Error**: {error_msg}" + + return body diff --git a/plugins/actions/ACTION_PLUGIN_TEMPLATE_CN.py b/plugins/actions/ACTION_PLUGIN_TEMPLATE_CN.py new file mode 100644 index 0000000..ca70588 --- /dev/null +++ b/plugins/actions/ACTION_PLUGIN_TEMPLATE_CN.py @@ -0,0 +1,277 @@ +""" +title: [插件名称] (例如: 智能思维导图) +author: [作者姓名] +author_url: [作者主页链接] +funding_url: [赞助链接] +version: 0.1.0 +icon_url: [图标 URL 或 Data URI] +description: [简短描述插件的功能] +requirements: [依赖列表, 例如: jinja2, markdown] +""" + +from pydantic import BaseModel, Field +from typing import Optional, Dict, Any, List, Callable, Awaitable +import logging +import re +import json +from fastapi import Request +from datetime import datetime +import pytz + +# 导入 OpenWebUI 工具函数 +from open_webui.utils.chat import generate_chat_completion +from open_webui.models.users import Users + +# 设置日志 +logging.basicConfig( + level=logging.INFO, format="%(asctime)s - %(name)s - %(levelname)s - %(message)s" +) +logger = logging.getLogger(__name__) + +# ================================================================= +# 常量与提示词 (Constants & Prompts) +# ================================================================= + +SYSTEM_PROMPT = """ +[在此处插入系统提示词] +你是一个有用的助手... +请以 [JSON/Markdown] 格式输出... +""" + +USER_PROMPT_TEMPLATE = """ +[在此处插入用户提示词模板] +用户上下文: +姓名: {user_name} +时间: {current_date_time_str} + +待处理内容: +{content} +""" + +# 用于在聊天中渲染结果的 HTML 模板 +HTML_TEMPLATE = """ + + + + + + [插件标题] + + + +
+

[结果标题]

+
{result_content}
+
+ + +""" + + +class Action: + class Valves(BaseModel): + show_status: bool = Field( + default=True, + description="是否在聊天界面显示操作状态更新。", + ) + LLM_MODEL_ID: str = Field( + default="", + description="用于处理的内置 LLM 模型 ID。如果为空,则使用当前对话的模型。", + ) + MIN_TEXT_LENGTH: int = Field( + default=50, + description="处理所需的最小文本长度(字符数)。", + ) + # 根据需要添加其他配置字段 + # MAX_TEXT_LENGTH: int = Field(default=2000, description="...") + + def __init__(self): + self.valves = self.Valves() + + def _get_user_context(self, __user__: Optional[Dict[str, Any]]) -> Dict[str, str]: + """提取用户上下文信息。""" + if isinstance(__user__, (list, tuple)): + user_data = __user__[0] if __user__ else {} + elif isinstance(__user__, dict): + user_data = __user__ + else: + user_data = {} + + return { + "user_id": user_data.get("id", "unknown_user"), + "user_name": user_data.get("name", "用户"), + "user_language": user_data.get("language", "zh-CN"), + } + + def _get_current_time_context(self) -> Dict[str, str]: + """获取当前时间上下文。""" + try: + # 默认为特定时区或系统时间 + tz = pytz.timezone("Asia/Shanghai") # 根据需要修改 + now = datetime.now(tz) + except Exception: + now = datetime.now() + + return { + "current_date_time_str": now.strftime("%Y-%m-%d %H:%M:%S"), + "current_weekday": now.strftime("%A"), + "current_year": now.strftime("%Y"), + "current_timezone_str": str(now.tzinfo) if now.tzinfo else "Unknown", + } + + def _process_llm_output(self, llm_output: str) -> Any: + """ + 处理 LLM 的原始输出。 + 重写此方法以解析 JSON、提取 Markdown 等。 + """ + # 示例: 提取 JSON + # try: + # start = llm_output.find('{') + # end = llm_output.rfind('}') + 1 + # if start != -1 and end != -1: + # return json.loads(llm_output[start:end]) + # except Exception: + # pass + return llm_output.strip() + + async def _emit_status( + self, + emitter: Optional[Callable[[Any], Awaitable[None]]], + description: str, + done: bool = False, + ): + """发送状态更新事件。""" + if self.valves.show_status and emitter: + await emitter( + {"type": "status", "data": {"description": description, "done": done}} + ) + + async def _emit_notification( + self, + emitter: Optional[Callable[[Any], Awaitable[None]]], + content: str, + type: str = "info", + ): + """发送通知事件 (info, success, warning, error)。""" + if emitter: + await emitter( + {"type": "notification", "data": {"type": type, "content": content}} + ) + + async def _emit_message( + self, emitter: Optional[Callable[[Any], Awaitable[None]]], content: str + ): + """发送消息追加事件 (追加到当前消息)。""" + if emitter: + await emitter({"type": "message", "data": {"content": content}}) + + async def _emit_replace( + self, emitter: Optional[Callable[[Any], Awaitable[None]]], content: str + ): + """发送消息替换事件 (替换当前消息)。""" + if emitter: + await emitter({"type": "replace", "data": {"content": content}}) + + async def action( + self, + body: dict, + __user__: Optional[Dict[str, Any]] = None, + __event_emitter__: Optional[Callable[[Any], Awaitable[None]]] = None, + __event_call__: Optional[Callable[[Any], Awaitable[Any]]] = None, + __request__: Optional[Request] = None, + ) -> Optional[dict]: + logger.info(f"Action: {__name__} started") + + # 1. 上下文设置 + user_context = self._get_user_context(__user__) + time_context = self._get_current_time_context() + + # 2. 输入验证 + messages = body.get("messages", []) + if not messages or not messages[-1].get("content"): + return body # 或者处理错误 + + original_content = messages[-1]["content"] + + if len(original_content) < self.valves.MIN_TEXT_LENGTH: + warning_msg = f"文本过短 ({len(original_content)} 字符)。最少需要: {self.valves.MIN_TEXT_LENGTH}。" + await self._emit_notification(__event_emitter__, warning_msg, "warning") + return body # 或者返回失败消息 + + # 3. 状态通知 (开始) + await self._emit_status(__event_emitter__, "正在处理...", done=False) + + try: + # 4. 准备提示词 + formatted_prompt = USER_PROMPT_TEMPLATE.format( + user_name=user_context["user_name"], + current_date_time_str=time_context["current_date_time_str"], + content=original_content, + # 添加其他上下文变量 + ) + + # 5. 确定模型 + target_model = self.valves.LLM_MODEL_ID + if not target_model: + target_model = body.get("model") + # 注意: 这里没有硬编码的回退,依赖于系统/用户上下文 + + # 6. 调用 LLM + user_obj = Users.get_user_by_id(user_context["user_id"]) + + payload = { + "model": target_model, + "messages": [ + {"role": "system", "content": SYSTEM_PROMPT}, + {"role": "user", "content": formatted_prompt}, + ], + "stream": False, + # "temperature": 0.5, + } + + llm_response = await generate_chat_completion( + __request__, payload, user_obj + ) + + if not llm_response or "choices" not in llm_response: + raise ValueError("无效的 LLM 响应") + + assistant_content = llm_response["choices"][0]["message"]["content"] + + # 7. 处理输出 + processed_data = self._process_llm_output(assistant_content) + + # 8. 生成 HTML/结果 + # 示例: 简单的字符串替换 + final_html = HTML_TEMPLATE.replace("{result_content}", str(processed_data)) + final_html = final_html.replace( + "{user_language}", user_context["user_language"] + ) + + # 9. 注入结果 + html_embed_tag = f"```html\n{final_html}\n```" + body["messages"][-1]["content"] += f"\n\n{html_embed_tag}" + + # 10. 状态通知 (成功) + await self._emit_status(__event_emitter__, "处理完成!", done=True) + await self._emit_notification( + __event_emitter__, "操作成功完成。", "success" + ) + + except Exception as e: + logger.error(f"Action failed: {e}", exc_info=True) + error_msg = f"错误: {str(e)}" + + # 将错误附加到聊天中 (可选) + body["messages"][-1]["content"] += f"\n\n❌ **错误**: {error_msg}" + + await self._emit_status(__event_emitter__, "处理失败。", done=True) + await self._emit_notification( + __event_emitter__, "操作失败,请检查日志。", "error" + ) + + return body diff --git a/plugins/actions/smart-mind-map/smart_mind_map.py b/plugins/actions/smart-mind-map/smart_mind_map.py index 70ef2f8..0aa447a 100644 --- a/plugins/actions/smart-mind-map/smart_mind_map.py +++ b/plugins/actions/smart-mind-map/smart_mind_map.py @@ -362,8 +362,8 @@ class Action: description="Whether to show action status updates in the chat interface.", ) LLM_MODEL_ID: str = Field( - default="gemini-2.5-flash", - description="Built-in LLM model ID for text analysis.", + default="", + description="Built-in LLM model ID for text analysis. If empty, uses the current conversation's model.", ) MIN_TEXT_LENGTH: int = Field( default=100, @@ -514,8 +514,13 @@ class Action: long_text_content=long_text_content, ) + # Determine model to use + target_model = self.valves.LLM_MODEL_ID + if not target_model: + target_model = body.get("model") + llm_payload = { - "model": self.valves.LLM_MODEL_ID, + "model": target_model, "messages": [ {"role": "system", "content": SYSTEM_PROMPT_MINDMAP_ASSISTANT}, {"role": "user", "content": formatted_user_prompt}, diff --git a/plugins/actions/smart-mind-map/思维导图.py b/plugins/actions/smart-mind-map/思维导图.py index 2804ee3..454f851 100644 --- a/plugins/actions/smart-mind-map/思维导图.py +++ b/plugins/actions/smart-mind-map/思维导图.py @@ -363,8 +363,8 @@ class Action: default=True, description="是否在聊天界面显示操作状态更新。" ) LLM_MODEL_ID: str = Field( - default="gemini-2.5-flash", - description="用于文本分析的内置LLM模型ID。", + default="", + description="用于文本分析的内置LLM模型ID。如果为空,则使用当前对话的模型。", ) MIN_TEXT_LENGTH: int = Field( default=100, description="进行思维导图分析所需的最小文本长度(字符数)。" @@ -514,13 +514,17 @@ class Action: long_text_content=long_text_content, ) + # 确定使用的模型 + target_model = self.valves.LLM_MODEL_ID + if not target_model: + target_model = body.get("model") + llm_payload = { - "model": self.valves.LLM_MODEL_ID, + "model": target_model, "messages": [ {"role": "system", "content": SYSTEM_PROMPT_MINDMAP_ASSISTANT}, {"role": "user", "content": formatted_user_prompt}, ], - "temperature": 0.5, "stream": False, } user_obj = Users.get_user_by_id(user_id) diff --git a/plugins/actions/summary/summary.py b/plugins/actions/summary/summary.py index 92d9485..c1b42ac 100644 --- a/plugins/actions/summary/summary.py +++ b/plugins/actions/summary/summary.py @@ -281,8 +281,8 @@ class Action: description="Whether to show operation status updates in the chat interface.", ) LLM_MODEL_ID: str = Field( - default="gemini-2.5-flash", - description="Built-in LLM Model ID used for text analysis.", + default="", + description="Built-in LLM Model ID used for text analysis. If empty, uses the current conversation's model.", ) MIN_TEXT_LENGTH: int = Field( default=200, @@ -451,8 +451,13 @@ class Action: long_text_content=original_content, ) + # Determine model to use + target_model = self.valves.LLM_MODEL_ID + if not target_model: + target_model = body.get("model") + llm_payload = { - "model": self.valves.LLM_MODEL_ID, + "model": target_model, "messages": [ {"role": "system", "content": SYSTEM_PROMPT_READING_ASSISTANT}, {"role": "user", "content": formatted_user_prompt}, diff --git a/plugins/actions/summary/精读.py b/plugins/actions/summary/精读.py index e9e80ed..a366f1a 100644 --- a/plugins/actions/summary/精读.py +++ b/plugins/actions/summary/精读.py @@ -277,11 +277,12 @@ class Action: default=True, description="是否在聊天界面显示操作状态更新。" ) LLM_MODEL_ID: str = Field( - default="gemini-2.5-flash", - description="用于文本分析的内置LLM模型ID。", + default="", + description="用于文本分析的内置LLM模型ID。如果为空,则使用当前对话的模型。", ) MIN_TEXT_LENGTH: int = Field( - default=200, description="进行深度分析所需的最小文本长度(字符数)。建议200字符以上。" + default=200, + description="进行深度分析所需的最小文本长度(字符数)。建议200字符以上。", ) RECOMMENDED_MIN_LENGTH: int = Field( default=500, description="建议的最小文本长度,以获得最佳分析效果。" @@ -395,7 +396,7 @@ class Action: {"role": "assistant", "content": f"⚠️ {short_text_message}"} ] } - + # Recommend for longer texts if len(original_content) < self.valves.RECOMMENDED_MIN_LENGTH: if __event_emitter__: @@ -439,8 +440,13 @@ class Action: long_text_content=original_content, ) + # 确定使用的模型 + target_model = self.valves.LLM_MODEL_ID + if not target_model: + target_model = body.get("model") + llm_payload = { - "model": self.valves.LLM_MODEL_ID, + "model": target_model, "messages": [ {"role": "system", "content": SYSTEM_PROMPT_READING_ASSISTANT}, {"role": "user", "content": formatted_user_prompt}, @@ -452,7 +458,9 @@ class Action: if not user_obj: raise ValueError(f"无法获取用户对象, 用户ID: {user_id}") - llm_response = await generate_chat_completion(__request__, llm_payload, user_obj) + llm_response = await generate_chat_completion( + __request__, llm_payload, user_obj + ) assistant_response_content = llm_response["choices"][0]["message"][ "content" ] diff --git a/plugins/filters/async-context-compression/README_CN.md b/plugins/filters/async-context-compression/README_CN.md index 172ad5e..4983838 100644 --- a/plugins/filters/async-context-compression/README_CN.md +++ b/plugins/filters/async-context-compression/README_CN.md @@ -65,7 +65,7 @@ | 参数 | 默认值 | 描述 | | :--- | :--- | :--- | -| `summary_model` | `None` | 用于生成摘要的模型 ID。**强烈建议**配置一个快速、经济且上下文窗口较大的模型(如 `gemini-2.5-flash`, `deepseek-v3`)。如果留空,将尝试使用当前对话的模型。 | +| `summary_model` | `None` | 用于生成摘要的模型 ID。**强烈建议**配置一个快速、经济且上下文窗口较大的模型(如 `gemini-2.5-flash`)。如果留空,将尝试使用当前对话的模型。 | | `max_summary_tokens` | `16384` | 生成摘要时允许的最大 Token 数。 | | `summary_temperature` | `0.1` | 控制摘要生成的随机性,较低的值结果更稳定。 | diff --git a/plugins/filters/async-context-compression/async_context_compression.py b/plugins/filters/async-context-compression/async_context_compression.py index 79d9058..0678b74 100644 --- a/plugins/filters/async-context-compression/async_context_compression.py +++ b/plugins/filters/async-context-compression/async_context_compression.py @@ -383,360 +383,7 @@ class Filter: description="Hard limit for context. Exceeding this value will force removal of earliest messages (Global Default)", ) model_thresholds: dict = Field( - default={ - # Groq - "groq-openai/gpt-oss-20b": { - "max_context_tokens": 8000, - "compression_threshold_tokens": 5600, - }, - "groq-openai/gpt-oss-120b": { - "max_context_tokens": 8000, - "compression_threshold_tokens": 5600, - }, - # Qwen (ModelScope / CF) - "modelscope-Qwen/Qwen3-Coder-480B-A35B-Instruct": { - "max_context_tokens": 256000, - "compression_threshold_tokens": 179200, - }, - "cfchatqwen-qwen3-max-search": { - "max_context_tokens": 262144, - "compression_threshold_tokens": 183500, - }, - "modelscope-Qwen/Qwen3-235B-A22B-Thinking-2507": { - "max_context_tokens": 262144, - "compression_threshold_tokens": 183500, - }, - "cfchatqwen-qwen3-max": { - "max_context_tokens": 262144, - "compression_threshold_tokens": 183500, - }, - "cfchatqwen-qwen3-vl-plus-thinking": { - "max_context_tokens": 262144, - "compression_threshold_tokens": 183500, - }, - "cfchatqwen-qwen3-coder-plus-search": { - "max_context_tokens": 1048576, - "compression_threshold_tokens": 734000, - }, - "cfchatqwen-qwen3-vl-plus": { - "max_context_tokens": 262144, - "compression_threshold_tokens": 183500, - }, - "cfchatqwen-qwen3-coder-plus": { - "max_context_tokens": 1048576, - "compression_threshold_tokens": 734000, - }, - "cfchatqwen-qwen3-omni-flash-thinking": { - "max_context_tokens": 65536, - "compression_threshold_tokens": 45875, - }, - "cfchatqwen-qwen3-omni-flash": { - "max_context_tokens": 65536, - "compression_threshold_tokens": 45875, - }, - "cfchatqwen-qwen3-next-80b-a3b-thinking": { - "max_context_tokens": 262144, - "compression_threshold_tokens": 183500, - }, - "modelscope-Qwen/Qwen3-VL-235B-A22B-Instruct": { - "max_context_tokens": 262144, - "compression_threshold_tokens": 183500, - }, - "cfchatqwen-qwen3-next-80b-a3b-thinking-search": { - "max_context_tokens": 262144, - "compression_threshold_tokens": 183500, - }, - "cfchatqwen-qwen3-next-80b-a3b": { - "max_context_tokens": 262144, - "compression_threshold_tokens": 183500, - }, - "cfchatqwen-qwen3-235b-a22b-thinking-search": { - "max_context_tokens": 131072, - "compression_threshold_tokens": 91750, - }, - "cfchatqwen-qwen3-235b-a22b": { - "max_context_tokens": 131072, - "compression_threshold_tokens": 91750, - }, - "cfchatqwen-qwen3-235b-a22b-thinking": { - "max_context_tokens": 131072, - "compression_threshold_tokens": 91750, - }, - "cfchatqwen-qwen3-coder-flash-search": { - "max_context_tokens": 262144, - "compression_threshold_tokens": 183500, - }, - "cfchatqwen-qwen3-coder-flash": { - "max_context_tokens": 262144, - "compression_threshold_tokens": 183500, - }, - "cfchatqwen-qwen3-max-2025-10-30": { - "max_context_tokens": 262144, - "compression_threshold_tokens": 183500, - }, - "cfchatqwen-qwen3-max-2025-10-30-thinking": { - "max_context_tokens": 262144, - "compression_threshold_tokens": 183500, - }, - "cfchatqwen-qwen3-max-2025-10-30-thinking-search": { - "max_context_tokens": 262144, - "compression_threshold_tokens": 183500, - }, - "modelscope-Qwen/Qwen3-235B-A22B-Instruct-2507": { - "max_context_tokens": 262144, - "compression_threshold_tokens": 183500, - }, - "cfchatqwen-qwen3-vl-30b-a3b": { - "max_context_tokens": 131072, - "compression_threshold_tokens": 91750, - }, - "cfchatqwen-qwen3-vl-30b-a3b-thinking": { - "max_context_tokens": 131072, - "compression_threshold_tokens": 91750, - }, - # Gemini - "gemini-2.5-pro-search": { - "max_context_tokens": 1048576, - "compression_threshold_tokens": 734000, - }, - "gemini-2.5-flash-search": { - "max_context_tokens": 1048576, - "compression_threshold_tokens": 734000, - }, - "gemini-2.5-flash": { - "max_context_tokens": 1048576, - "compression_threshold_tokens": 734000, - }, - "gemini-2.5-flash-lite": { - "max_context_tokens": 1048576, - "compression_threshold_tokens": 734000, - }, - "gemini-2.5-flash-lite-search": { - "max_context_tokens": 1048576, - "compression_threshold_tokens": 734000, - }, - "gemini-2.5-pro": { - "max_context_tokens": 1048576, - "compression_threshold_tokens": 734000, - }, - "gemini-2.0-flash-search": { - "max_context_tokens": 1048576, - "compression_threshold_tokens": 734000, - }, - "gemini-2.0-flash": { - "max_context_tokens": 1048576, - "compression_threshold_tokens": 734000, - }, - "gemini-2.0-flash-exp": { - "max_context_tokens": 1048576, - "compression_threshold_tokens": 734000, - }, - "gemini-2.0-flash-lite": { - "max_context_tokens": 1048576, - "compression_threshold_tokens": 734000, - }, - "copilot-gemini-2.5-pro": { - "max_context_tokens": 128000, - "compression_threshold_tokens": 89600, - }, - "gemini-pro-latest": { - "max_context_tokens": 1048576, - "compression_threshold_tokens": 734000, - }, - "gemini-3-pro-preview": { - "max_context_tokens": 128000, - "compression_threshold_tokens": 89600, - }, - "gemini-pro-latest-search": { - "max_context_tokens": 1048576, - "compression_threshold_tokens": 734000, - }, - "gemini-flash-latest": { - "max_context_tokens": 1048576, - "compression_threshold_tokens": 734000, - }, - "gemini-flash-latest-search": { - "max_context_tokens": 1048576, - "compression_threshold_tokens": 734000, - }, - "gemini-flash-lite-latest-search": { - "max_context_tokens": 1048576, - "compression_threshold_tokens": 734000, - }, - "gemini-flash-lite-latest": { - "max_context_tokens": 1048576, - "compression_threshold_tokens": 734000, - }, - "gemini-robotics-er-1.5-preview": { - "max_context_tokens": 1048576, - "compression_threshold_tokens": 734000, - }, - # DeepSeek - "modelscope-deepseek-ai/DeepSeek-V3.1": { - "max_context_tokens": 128000, - "compression_threshold_tokens": 89600, - }, - "cfdeepseek-deepseek-search": { - "max_context_tokens": 128000, - "compression_threshold_tokens": 89600, - }, - "openrouter-deepseek/deepseek-r1-0528:free": { - "max_context_tokens": 163840, - "compression_threshold_tokens": 114688, - }, - "modelscope-deepseek-ai/DeepSeek-V3.2-Exp": { - "max_context_tokens": 128000, - "compression_threshold_tokens": 89600, - }, - "cfdeepseek-deepseek-r1-search": { - "max_context_tokens": 128000, - "compression_threshold_tokens": 89600, - }, - "cfdeepseek-deepseek-r1": { - "max_context_tokens": 128000, - "compression_threshold_tokens": 89600, - }, - "openrouter-deepseek/deepseek-chat-v3.1:free": { - "max_context_tokens": 163800, - "compression_threshold_tokens": 114660, - }, - "modelscope-deepseek-ai/DeepSeek-R1-0528": { - "max_context_tokens": 128000, - "compression_threshold_tokens": 89600, - }, - "cfdeepseek-deepseek": { - "max_context_tokens": 128000, - "compression_threshold_tokens": 89600, - }, - # Kimi (Moonshot) - "cfkimi-kimi-k2-search": { - "max_context_tokens": 128000, - "compression_threshold_tokens": 89600, - }, - "cfkimi-kimi-k1.5-search": { - "max_context_tokens": 128000, - "compression_threshold_tokens": 89600, - }, - "cfkimi-kimi-k1.5-thinking-search": { - "max_context_tokens": 128000, - "compression_threshold_tokens": 89600, - }, - "cfkimi-kimi-research": { - "max_context_tokens": 128000, - "compression_threshold_tokens": 89600, - }, - "openrouter-moonshotai/kimi-k2:free": { - "max_context_tokens": 32768, - "compression_threshold_tokens": 22937, - }, - "cfkimi-kimi-k2": { - "max_context_tokens": 128000, - "compression_threshold_tokens": 89600, - }, - "cfkimi-kimi-k1.5": { - "max_context_tokens": 128000, - "compression_threshold_tokens": 89600, - }, - # GPT / OpenAI - "gpt-4.1": { - "max_context_tokens": 128000, - "compression_threshold_tokens": 89600, - }, - "gpt-4o": { - "max_context_tokens": 64000, - "compression_threshold_tokens": 44800, - }, - "gpt-5": { - "max_context_tokens": 128000, - "compression_threshold_tokens": 89600, - }, - "github-gpt-4.1": { - "max_context_tokens": 7500, - "compression_threshold_tokens": 5250, - }, - "gpt-5-mini": { - "max_context_tokens": 128000, - "compression_threshold_tokens": 89600, - }, - "gpt-5.1": { - "max_context_tokens": 128000, - "compression_threshold_tokens": 89600, - }, - "gpt-5.1-codex": { - "max_context_tokens": 128000, - "compression_threshold_tokens": 89600, - }, - "gpt-5.1-codex-mini": { - "max_context_tokens": 128000, - "compression_threshold_tokens": 89600, - }, - "gpt-5-codex": { - "max_context_tokens": 200000, - "compression_threshold_tokens": 140000, - }, - "github-gpt-4.1-mini": { - "max_context_tokens": 7500, - "compression_threshold_tokens": 5250, - }, - "openrouter-openai/gpt-oss-20b:free": { - "max_context_tokens": 128000, - "compression_threshold_tokens": 89600, - }, - # Claude / Anthropic - "claude-sonnet-4.5": { - "max_context_tokens": 128000, - "compression_threshold_tokens": 89600, - }, - "claude-haiku-4.5": { - "max_context_tokens": 128000, - "compression_threshold_tokens": 89600, - }, - "copilot-claude-opus-41": { - "max_context_tokens": 80000, - "compression_threshold_tokens": 56000, - }, - "copilot-claude-sonnet-4": { - "max_context_tokens": 80000, - "compression_threshold_tokens": 56000, - }, - # Other / OpenRouter / OSWE - "oswe-vscode-insiders": { - "max_context_tokens": 256000, - "compression_threshold_tokens": 179200, - }, - "modelscope-MiniMax/MiniMax-M2": { - "max_context_tokens": 204800, - "compression_threshold_tokens": 143360, - }, - "oswe-vscode-prime": { - "max_context_tokens": 200000, - "compression_threshold_tokens": 140000, - }, - "grok-code-fast-1": { - "max_context_tokens": 128000, - "compression_threshold_tokens": 89600, - }, - "copilot-auto": { - "max_context_tokens": 128000, - "compression_threshold_tokens": 89600, - }, - "modelscope-ZhipuAI/GLM-4.6": { - "max_context_tokens": 32000, - "compression_threshold_tokens": 22400, - }, - "openrouter-x-ai/grok-4.1-fast:free": { - "max_context_tokens": 2000000, - "compression_threshold_tokens": 1400000, - }, - "openrouter-qwen/qwen3-coder:free": { - "max_context_tokens": 262000, - "compression_threshold_tokens": 183400, - }, - "openrouter-qwen/qwen3-235b-a22b:free": { - "max_context_tokens": 40960, - "compression_threshold_tokens": 28672, - }, - }, + default={}, description="Threshold override configuration for specific models. Only includes models requiring special configuration.", ) @@ -847,7 +494,7 @@ class Filter: return record.summary return None - def _count_tokens(self, text: str, model: str = "gpt-3.5-turbo") -> int: + def _count_tokens(self, text: str) -> int: """Counts the number of tokens in the text.""" if not text: return 0 @@ -866,9 +513,7 @@ class Filter: # Fallback strategy: Rough estimation (1 token ≈ 4 chars) return len(text) // 4 - def _calculate_messages_tokens( - self, messages: List[Dict], model: str = "gpt-3.5-turbo" - ) -> int: + def _calculate_messages_tokens(self, messages: List[Dict]) -> int: """Calculates the total tokens for a list of messages.""" total_tokens = 0 for msg in messages: @@ -879,9 +524,9 @@ class Filter: for part in content: if isinstance(part, dict) and part.get("type") == "text": text_content += part.get("text", "") - total_tokens += self._count_tokens(text_content, model) + total_tokens += self._count_tokens(text_content) else: - total_tokens += self._count_tokens(str(content), model) + total_tokens += self._count_tokens(str(content)) return total_tokens def _get_model_thresholds(self, model_id: str) -> Dict[str, int]: @@ -1101,7 +746,7 @@ class Filter: # Calculate Token count in a background thread current_tokens = await asyncio.to_thread( - self._calculate_messages_tokens, messages, model + self._calculate_messages_tokens, messages ) if self.valves.debug_mode: @@ -1197,7 +842,7 @@ class Filter: # Calculate current total Tokens (using summary model for counting) total_tokens = await asyncio.to_thread( - self._calculate_messages_tokens, messages, summary_model_id + self._calculate_messages_tokens, messages ) if total_tokens > max_context_tokens: @@ -1213,9 +858,7 @@ class Filter: while removed_tokens < excess_tokens and middle_messages: msg_to_remove = middle_messages.pop(0) - msg_tokens = self._count_tokens( - str(msg_to_remove.get("content", "")), summary_model_id - ) + msg_tokens = self._count_tokens(str(msg_to_remove.get("content", ""))) removed_tokens += msg_tokens removed_count += 1 @@ -1269,7 +912,7 @@ class Filter: { "type": "status", "data": { - "description": f"Context summary updated (Saved {len(middle_messages)} messages)", + "description": f"Context summary updated (Compressed {len(middle_messages)} messages)", "done": True, }, } diff --git a/plugins/filters/async-context-compression/异步上下文压缩.py b/plugins/filters/async-context-compression/异步上下文压缩.py index 6ab2772..4d30129 100644 --- a/plugins/filters/async-context-compression/异步上下文压缩.py +++ b/plugins/filters/async-context-compression/异步上下文压缩.py @@ -367,20 +367,26 @@ class Filter: ) # Token 相关参数 compression_threshold_tokens: int = Field( - default=64000, ge=0, description="当上下文总 Token 数超过此值时,触发压缩 (全局默认值)" + default=64000, + ge=0, + description="当上下文总 Token 数超过此值时,触发压缩 (全局默认值)", ) max_context_tokens: int = Field( - default=128000, ge=0, description="上下文的硬性上限。超过此值将强制移除最早的消息 (全局默认值)" + default=128000, + ge=0, + description="上下文的硬性上限。超过此值将强制移除最早的消息 (全局默认值)", ) model_thresholds: dict = Field( default={}, - description="针对特定模型的阈值覆盖配置。仅包含需要特殊配置的模型。" + description="针对特定模型的阈值覆盖配置。仅包含需要特殊配置的模型。", ) - + keep_first: int = Field( default=1, ge=0, description="始终保留最初的 N 条消息。设置为 0 则不保留。" ) - keep_last: int = Field(default=6, ge=0, description="始终保留最近的 N 条完整消息。") + keep_last: int = Field( + default=6, ge=0, description="始终保留最近的 N 条完整消息。" + ) summary_model: str = Field( default=None, description="用于生成摘要的模型 ID。留空则使用当前对话的模型。用于匹配 model_thresholds 中的配置。", @@ -404,15 +410,15 @@ class Filter: session = self._SessionLocal() try: # 查找现有记录 - existing = ( - session.query(ChatSummary).filter_by(chat_id=chat_id).first() - ) + existing = session.query(ChatSummary).filter_by(chat_id=chat_id).first() if existing: # [优化] 乐观锁检查:只有进度向前推进时才更新 if compressed_count <= existing.compressed_message_count: if self.valves.debug_mode: - print(f"[存储] 跳过更新:新进度 ({compressed_count}) 不大于现有进度 ({existing.compressed_message_count})") + print( + f"[存储] 跳过更新:新进度 ({compressed_count}) 不大于现有进度 ({existing.compressed_message_count})" + ) return # 更新现有记录 @@ -471,11 +477,11 @@ class Filter: return record.summary return None - def _count_tokens(self, text: str, model: str = "gpt-3.5-turbo") -> int: + def _count_tokens(self, text: str) -> int: """计算文本的 Token 数量""" if not text: return 0 - + if tiktoken: try: # 统一使用 o200k_base 编码 (适配最新模型) @@ -484,11 +490,13 @@ class Filter: except Exception as e: if self.valves.debug_mode: print(f"[Token计数] tiktoken 错误: {e},回退到字符估算") - + # 回退策略:粗略估算 (1 token ≈ 4 chars) return len(text) // 4 - def _calculate_messages_tokens(self, messages: List[Dict], model: str = "gpt-3.5-turbo") -> int: + def _calculate_messages_tokens( + self, messages: List[Dict] + ) -> int: """计算消息列表的总 Token 数""" total_tokens = 0 for msg in messages: @@ -499,14 +507,14 @@ class Filter: for part in content: if isinstance(part, dict) and part.get("type") == "text": text_content += part.get("text", "") - total_tokens += self._count_tokens(text_content, model) + total_tokens += self._count_tokens(text_content) else: - total_tokens += self._count_tokens(str(content), model) + total_tokens += self._count_tokens(str(content)) return total_tokens def _get_model_thresholds(self, model_id: str) -> Dict[str, int]: """获取特定模型的阈值配置 - + 优先级: 1. 如果 model_thresholds 中存在该模型ID的配置,使用该配置 2. 否则使用全局参数 compression_threshold_tokens 和 max_context_tokens @@ -516,14 +524,14 @@ class Filter: if self.valves.debug_mode: print(f"[配置] 使用模型特定配置: {model_id}") return self.valves.model_thresholds[model_id] - + # 使用全局默认配置 if self.valves.debug_mode: print(f"[配置] 模型 {model_id} 未在 model_thresholds 中,使用全局参数") - + return { "compression_threshold_tokens": self.valves.compression_threshold_tokens, - "max_context_tokens": self.valves.max_context_tokens + "max_context_tokens": self.valves.max_context_tokens, } def _inject_summary_to_first_message(self, message: dict, summary: str) -> dict: @@ -584,36 +592,36 @@ class Filter: # 记录原始消息的目标压缩进度,供 outlet 使用 # 目标是压缩到倒数第 keep_last 条之前 target_compressed_count = max(0, len(messages) - self.valves.keep_last) - + # [优化] 简单的状态清理检查 if chat_id in self.temp_state: - if self.valves.debug_mode: + if self.valves.debug_mode: print(f"[Inlet] ⚠️ 覆盖未消费的旧状态 (Chat ID: {chat_id})") - + self.temp_state[chat_id] = target_compressed_count - + if self.valves.debug_mode: print(f"[Inlet] 记录目标压缩进度: {target_compressed_count}") # 加载摘要记录 summary_record = await asyncio.to_thread(self._load_summary_record, chat_id) - + final_messages = [] - + if summary_record: # 存在摘要,构建视图:[Head] + [Summary Message] + [Tail] # Tail 是从上次压缩点之后的所有消息 compressed_count = summary_record.compressed_message_count - + # 确保 compressed_count 合理 if compressed_count > len(messages): compressed_count = max(0, len(messages) - self.valves.keep_last) - + # 1. 头部消息 (Keep First) head_messages = [] if self.valves.keep_first > 0: - head_messages = messages[:self.valves.keep_first] - + head_messages = messages[: self.valves.keep_first] + # 2. 摘要消息 (作为 User 消息插入) summary_content = ( f"【系统提示:以下是历史对话的摘要,仅供参考上下文,请勿对摘要内容进行回复,直接回答后续的最新问题】\n\n" @@ -622,14 +630,14 @@ class Filter: f"以下是最近的对话:" ) summary_msg = {"role": "user", "content": summary_content} - + # 3. 尾部消息 (Tail) - 从上次压缩点开始的所有消息 # 注意:这里必须确保不重复包含头部消息 start_index = max(compressed_count, self.valves.keep_first) tail_messages = messages[start_index:] - + final_messages = head_messages + [summary_msg] + tail_messages - + # 发送状态通知 if __event_emitter__: await __event_emitter__( @@ -641,15 +649,17 @@ class Filter: }, } ) - + if self.valves.debug_mode: - print(f"[Inlet] 应用摘要: Head({len(head_messages)}) + Summary + Tail({len(tail_messages)})") + print( + f"[Inlet] 应用摘要: Head({len(head_messages)}) + Summary + Tail({len(tail_messages)})" + ) else: # 没有摘要,使用原始消息 final_messages = messages body["messages"] = final_messages - + if self.valves.debug_mode: print(f"[Inlet] 最终发送: {len(body['messages'])} 条消息") print(f"{'='*60}\n") @@ -701,17 +711,19 @@ class Filter: """ try: messages = body.get("messages", []) - + # 获取当前模型的阈值配置 thresholds = self._get_model_thresholds(model) - compression_threshold_tokens = thresholds.get("compression_threshold_tokens", self.valves.compression_threshold_tokens) + compression_threshold_tokens = thresholds.get( + "compression_threshold_tokens", self.valves.compression_threshold_tokens + ) if self.valves.debug_mode: print(f"\n[🔍 后台计算] 开始 Token 计数...") # 在后台线程中计算 Token 数 current_tokens = await asyncio.to_thread( - self._calculate_messages_tokens, messages, model + self._calculate_messages_tokens, messages ) if self.valves.debug_mode: @@ -762,18 +774,22 @@ class Filter: if target_compressed_count is None: target_compressed_count = max(0, len(messages) - self.valves.keep_last) if self.valves.debug_mode: - print(f"[🤖 异步摘要任务] ⚠️ 无法获取 inlet 状态,使用当前消息数估算进度: {target_compressed_count}") - + print( + f"[🤖 异步摘要任务] ⚠️ 无法获取 inlet 状态,使用当前消息数估算进度: {target_compressed_count}" + ) + # 2. 确定待压缩的消息范围 (Middle) start_index = self.valves.keep_first end_index = len(messages) - self.valves.keep_last if self.valves.keep_last == 0: end_index = len(messages) - + # 确保索引有效 if start_index >= end_index: if self.valves.debug_mode: - print(f"[🤖 异步摘要任务] 中间消息为空 (Start: {start_index}, End: {end_index}),跳过") + print( + f"[🤖 异步摘要任务] 中间消息为空 (Start: {start_index}, End: {end_index}),跳过" + ) return middle_messages = messages[start_index:end_index] @@ -784,36 +800,48 @@ class Filter: # 3. 检查 Token 上限并截断 (Max Context Truncation) # [优化] 使用摘要模型(如果有)的阈值来决定能处理多少中间消息 # 这样可以用长窗口模型(如 gemini-flash)来压缩超过当前模型窗口的历史记录 - summary_model_id = self.valves.summary_model or body.get("model", "gpt-3.5-turbo") - + summary_model_id = self.valves.summary_model or body.get("model") + thresholds = self._get_model_thresholds(summary_model_id) # 注意:这里使用的是摘要模型的最大上下文限制 - max_context_tokens = thresholds.get("max_context_tokens", self.valves.max_context_tokens) - + max_context_tokens = thresholds.get( + "max_context_tokens", self.valves.max_context_tokens + ) + if self.valves.debug_mode: - print(f"[🤖 异步摘要任务] 使用模型 {summary_model_id} 的上限: {max_context_tokens} Tokens") - + print( + f"[🤖 异步摘要任务] 使用模型 {summary_model_id} 的上限: {max_context_tokens} Tokens" + ) + # 计算当前总 Token (使用摘要模型进行计数) - total_tokens = await asyncio.to_thread(self._calculate_messages_tokens, messages, summary_model_id) - + total_tokens = await asyncio.to_thread( + self._calculate_messages_tokens, messages, summary_model_id + ) + if total_tokens > max_context_tokens: excess_tokens = total_tokens - max_context_tokens if self.valves.debug_mode: - print(f"[🤖 异步摘要任务] ⚠️ 总 Token ({total_tokens}) 超过摘要模型上限 ({max_context_tokens}),需要移除约 {excess_tokens} Token") - + print( + f"[🤖 异步摘要任务] ⚠️ 总 Token ({total_tokens}) 超过摘要模型上限 ({max_context_tokens}),需要移除约 {excess_tokens} Token" + ) + # 从 middle_messages 头部开始移除 removed_tokens = 0 removed_count = 0 - + while removed_tokens < excess_tokens and middle_messages: msg_to_remove = middle_messages.pop(0) - msg_tokens = self._count_tokens(str(msg_to_remove.get("content", "")), summary_model_id) + msg_tokens = self._count_tokens( + str(msg_to_remove.get("content", "")) + ) removed_tokens += msg_tokens removed_count += 1 - + if self.valves.debug_mode: - print(f"[🤖 异步摘要任务] 已移除 {removed_count} 条消息,共 {removed_tokens} Token") - + print( + f"[🤖 异步摘要任务] 已移除 {removed_count} 条消息,共 {removed_tokens} Token" + ) + if not middle_messages: if self.valves.debug_mode: print(f"[🤖 异步摘要任务] 截断后中间消息为空,跳过摘要生成") @@ -824,7 +852,7 @@ class Filter: # 5. 调用 LLM 生成新摘要 # 注意:这里不再传入 previous_summary,因为旧摘要(如果有)已经包含在 middle_messages 里了 - + # 发送开始生成摘要的状态通知 if __event_emitter__: await __event_emitter__( @@ -837,13 +865,17 @@ class Filter: } ) - new_summary = await self._call_summary_llm(None, conversation_text, body, user_data) + new_summary = await self._call_summary_llm( + None, conversation_text, body, user_data + ) # 6. 保存新摘要 if self.valves.debug_mode: print("[优化] 正在后台线程中保存摘要,以避免阻塞事件循环。") - - await asyncio.to_thread(self._save_summary, chat_id, new_summary, target_compressed_count) + + await asyncio.to_thread( + self._save_summary, chat_id, new_summary, target_compressed_count + ) # 发送完成状态通知 if __event_emitter__: @@ -851,7 +883,7 @@ class Filter: { "type": "status", "data": { - "description": f"上下文摘要已更新 (节省 {len(middle_messages)} 条消息)", + "description": f"上下文摘要已更新 (已压缩 {len(middle_messages)} 条消息)", "done": True, }, } @@ -859,11 +891,14 @@ class Filter: if self.valves.debug_mode: print(f"[🤖 异步摘要任务] ✅ 完成!新摘要长度: {len(new_summary)} 字符") - print(f"[🤖 异步摘要任务] 进度更新: 已压缩至原始第 {target_compressed_count} 条消息") + print( + f"[🤖 异步摘要任务] 进度更新: 已压缩至原始第 {target_compressed_count} 条消息" + ) except Exception as e: print(f"[🤖 异步摘要任务] ❌ 错误: {str(e)}") import traceback + traceback.print_exc() def _format_messages_for_summary(self, messages: list) -> str: @@ -893,7 +928,11 @@ class Filter: return "\n\n".join(formatted) async def _call_summary_llm( - self, previous_summary: Optional[str], new_conversation_text: str, body: dict, user_data: dict + self, + previous_summary: Optional[str], + new_conversation_text: str, + body: dict, + user_data: dict, ) -> str: """ 使用 Open WebUI 内置方法调用 LLM 生成摘要 @@ -960,7 +999,7 @@ class Filter: if self.valves.debug_mode: print("[优化] 正在后台线程中获取用户对象,以避免阻塞事件循环。") user = await asyncio.to_thread(Users.get_user_by_id, user_id) - + if not user: raise ValueError(f"无法找到用户: {user_id}")