feat: 添加了中英文动作插件模板,更新了摘要和智能思维导图插件,并简化了异步上下文压缩插件的模型阈值配置。
This commit is contained in:
274
plugins/actions/ACTION_PLUGIN_TEMPLATE.py
Normal file
274
plugins/actions/ACTION_PLUGIN_TEMPLATE.py
Normal file
@@ -0,0 +1,274 @@
|
||||
"""
|
||||
title: [Plugin Name] (e.g., Smart Mind Map)
|
||||
author: [Your Name]
|
||||
author_url: [Your URL]
|
||||
funding_url: [Funding URL]
|
||||
version: 0.1.0
|
||||
icon_url: [Data URI or URL for Icon]
|
||||
description: [Brief description of what the plugin does]
|
||||
requirements: [List of dependencies, e.g., jinja2, markdown]
|
||||
"""
|
||||
|
||||
from pydantic import BaseModel, Field
|
||||
from typing import Optional, Dict, Any, List, Callable, Awaitable
|
||||
import logging
|
||||
import re
|
||||
import json
|
||||
from fastapi import Request
|
||||
from datetime import datetime
|
||||
import pytz
|
||||
|
||||
# Import OpenWebUI utilities
|
||||
from open_webui.utils.chat import generate_chat_completion
|
||||
from open_webui.models.users import Users
|
||||
|
||||
# Setup logging
|
||||
logging.basicConfig(
|
||||
level=logging.INFO, format="%(asctime)s - %(name)s - %(levelname)s - %(message)s"
|
||||
)
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
# =================================================================
|
||||
# Constants & Prompts
|
||||
# =================================================================
|
||||
|
||||
SYSTEM_PROMPT = """
|
||||
[Insert System Prompt Here]
|
||||
You are a helpful assistant...
|
||||
Please output in [JSON/Markdown] format...
|
||||
"""
|
||||
|
||||
USER_PROMPT_TEMPLATE = """
|
||||
[Insert User Prompt Template Here]
|
||||
User Context:
|
||||
Name: {user_name}
|
||||
Time: {current_date_time_str}
|
||||
|
||||
Content to process:
|
||||
{content}
|
||||
"""
|
||||
|
||||
# HTML Template for rendering the result in the chat
|
||||
HTML_TEMPLATE = """
|
||||
<!DOCTYPE html>
|
||||
<html lang="{user_language}">
|
||||
<head>
|
||||
<meta charset="UTF-8">
|
||||
<meta name="viewport" content="width=device-width, initial-scale=1.0">
|
||||
<title>[Plugin Title]</title>
|
||||
<style>
|
||||
/* Add your CSS styles here */
|
||||
body { font-family: sans-serif; padding: 20px; }
|
||||
.container { border: 1px solid #ccc; padding: 20px; border-radius: 8px; }
|
||||
</style>
|
||||
</head>
|
||||
<body>
|
||||
<div class="container">
|
||||
<h1>[Result Title]</h1>
|
||||
<div id="content">{result_content}</div>
|
||||
</div>
|
||||
</body>
|
||||
</html>
|
||||
"""
|
||||
|
||||
|
||||
class Action:
|
||||
class Valves(BaseModel):
|
||||
show_status: bool = Field(
|
||||
default=True,
|
||||
description="Whether to show operation status updates in the chat interface.",
|
||||
)
|
||||
LLM_MODEL_ID: str = Field(
|
||||
default="",
|
||||
description="Built-in LLM Model ID used for processing. If empty, uses the current conversation's model.",
|
||||
)
|
||||
MIN_TEXT_LENGTH: int = Field(
|
||||
default=50,
|
||||
description="Minimum text length required for processing (characters).",
|
||||
)
|
||||
# Add other configuration fields as needed
|
||||
# MAX_TEXT_LENGTH: int = Field(default=2000, description="...")
|
||||
|
||||
def __init__(self):
|
||||
self.valves = self.Valves()
|
||||
|
||||
def _get_user_context(self, __user__: Optional[Dict[str, Any]]) -> Dict[str, str]:
|
||||
"""Extracts user context information."""
|
||||
if isinstance(__user__, (list, tuple)):
|
||||
user_data = __user__[0] if __user__ else {}
|
||||
elif isinstance(__user__, dict):
|
||||
user_data = __user__
|
||||
else:
|
||||
user_data = {}
|
||||
|
||||
return {
|
||||
"user_id": user_data.get("id", "unknown_user"),
|
||||
"user_name": user_data.get("name", "User"),
|
||||
"user_language": user_data.get("language", "en-US"),
|
||||
}
|
||||
|
||||
def _get_current_time_context(self) -> Dict[str, str]:
|
||||
"""Gets current time context."""
|
||||
try:
|
||||
# Default to a specific timezone or system time
|
||||
tz = pytz.timezone("Asia/Shanghai") # Change as needed
|
||||
now = datetime.now(tz)
|
||||
except Exception:
|
||||
now = datetime.now()
|
||||
|
||||
return {
|
||||
"current_date_time_str": now.strftime("%Y-%m-%d %H:%M:%S"),
|
||||
"current_weekday": now.strftime("%A"),
|
||||
"current_year": now.strftime("%Y"),
|
||||
"current_timezone_str": str(now.tzinfo) if now.tzinfo else "Unknown",
|
||||
}
|
||||
|
||||
def _process_llm_output(self, llm_output: str) -> Any:
|
||||
"""
|
||||
Process the raw output from the LLM.
|
||||
Override this method to parse JSON, extract Markdown, etc.
|
||||
"""
|
||||
# Example: Extract JSON
|
||||
# try:
|
||||
# start = llm_output.find('{')
|
||||
# end = llm_output.rfind('}') + 1
|
||||
# if start != -1 and end != -1:
|
||||
# return json.loads(llm_output[start:end])
|
||||
# except Exception:
|
||||
# pass
|
||||
return llm_output.strip()
|
||||
|
||||
async def _emit_status(
|
||||
self,
|
||||
emitter: Optional[Callable[[Any], Awaitable[None]]],
|
||||
description: str,
|
||||
done: bool = False,
|
||||
):
|
||||
"""Emits a status update event."""
|
||||
if self.valves.show_status and emitter:
|
||||
await emitter(
|
||||
{"type": "status", "data": {"description": description, "done": done}}
|
||||
)
|
||||
|
||||
async def _emit_notification(
|
||||
self,
|
||||
emitter: Optional[Callable[[Any], Awaitable[None]]],
|
||||
content: str,
|
||||
type: str = "info",
|
||||
):
|
||||
"""Emits a notification event (info, success, warning, error)."""
|
||||
if emitter:
|
||||
await emitter(
|
||||
{"type": "notification", "data": {"type": type, "content": content}}
|
||||
)
|
||||
|
||||
async def _emit_message(
|
||||
self, emitter: Optional[Callable[[Any], Awaitable[None]]], content: str
|
||||
):
|
||||
"""Emits a message event (appends to current message)."""
|
||||
if emitter:
|
||||
await emitter({"type": "message", "data": {"content": content}})
|
||||
|
||||
async def _emit_replace(
|
||||
self, emitter: Optional[Callable[[Any], Awaitable[None]]], content: str
|
||||
):
|
||||
"""Emits a replace event (replaces current message)."""
|
||||
if emitter:
|
||||
await emitter({"type": "replace", "data": {"content": content}})
|
||||
|
||||
async def action(
|
||||
self,
|
||||
body: dict,
|
||||
__user__: Optional[Dict[str, Any]] = None,
|
||||
__event_emitter__: Optional[Callable[[Any], Awaitable[None]]] = None,
|
||||
__event_call__: Optional[Callable[[Any], Awaitable[Any]]] = None,
|
||||
__request__: Optional[Request] = None,
|
||||
) -> Optional[dict]:
|
||||
logger.info(f"Action: {__name__} started")
|
||||
|
||||
# 1. Context Setup
|
||||
user_context = self._get_user_context(__user__)
|
||||
time_context = self._get_current_time_context()
|
||||
|
||||
# 2. Input Validation
|
||||
messages = body.get("messages", [])
|
||||
if not messages or not messages[-1].get("content"):
|
||||
return body # Or handle error
|
||||
|
||||
original_content = messages[-1]["content"]
|
||||
|
||||
if len(original_content) < self.valves.MIN_TEXT_LENGTH:
|
||||
warning_msg = f"Text too short ({len(original_content)} chars). Minimum required: {self.valves.MIN_TEXT_LENGTH}."
|
||||
await self._emit_notification(__event_emitter__, warning_msg, "warning")
|
||||
return body # Or return a message indicating failure
|
||||
|
||||
# 3. Status Notification (Start)
|
||||
await self._emit_status(__event_emitter__, "Processing...", done=False)
|
||||
|
||||
try:
|
||||
# 4. Prepare Prompt
|
||||
formatted_prompt = USER_PROMPT_TEMPLATE.format(
|
||||
user_name=user_context["user_name"],
|
||||
current_date_time_str=time_context["current_date_time_str"],
|
||||
content=original_content,
|
||||
# Add other context variables
|
||||
)
|
||||
|
||||
# 5. Determine Model
|
||||
target_model = self.valves.LLM_MODEL_ID
|
||||
if not target_model:
|
||||
target_model = body.get("model")
|
||||
# Note: No hardcoded fallback here, relies on system/user context
|
||||
|
||||
# 6. Call LLM
|
||||
user_obj = Users.get_user_by_id(user_context["user_id"])
|
||||
|
||||
payload = {
|
||||
"model": target_model,
|
||||
"messages": [
|
||||
{"role": "system", "content": SYSTEM_PROMPT},
|
||||
{"role": "user", "content": formatted_prompt},
|
||||
],
|
||||
"stream": False,
|
||||
# "temperature": 0.5,
|
||||
}
|
||||
|
||||
llm_response = await generate_chat_completion(
|
||||
__request__, payload, user_obj
|
||||
)
|
||||
|
||||
if not llm_response or "choices" not in llm_response:
|
||||
raise ValueError("Invalid LLM response")
|
||||
|
||||
assistant_content = llm_response["choices"][0]["message"]["content"]
|
||||
|
||||
# 7. Process Output
|
||||
processed_data = self._process_llm_output(assistant_content)
|
||||
|
||||
# 8. Generate HTML/Result
|
||||
# Example: simple string replacement
|
||||
final_html = HTML_TEMPLATE.replace("{result_content}", str(processed_data))
|
||||
final_html = final_html.replace(
|
||||
"{user_language}", user_context["user_language"]
|
||||
)
|
||||
|
||||
# 9. Inject Result
|
||||
html_embed_tag = f"```html\n{final_html}\n```"
|
||||
body["messages"][-1]["content"] += f"\n\n{html_embed_tag}"
|
||||
|
||||
# 10. Status Notification (Success)
|
||||
await self._emit_status(
|
||||
__event_emitter__, "Completed successfully!", done=True
|
||||
)
|
||||
await self._emit_notification(
|
||||
__event_emitter__, "Action completed successfully.", "success"
|
||||
)
|
||||
|
||||
except Exception as e:
|
||||
logger.error(f"Action failed: {e}", exc_info=True)
|
||||
error_msg = f"Error: {str(e)}"
|
||||
|
||||
# Append error to chat (optional)
|
||||
body["messages"][-1]["content"] += f"\n\n❌ **Error**: {error_msg}"
|
||||
|
||||
return body
|
||||
277
plugins/actions/ACTION_PLUGIN_TEMPLATE_CN.py
Normal file
277
plugins/actions/ACTION_PLUGIN_TEMPLATE_CN.py
Normal file
@@ -0,0 +1,277 @@
|
||||
"""
|
||||
title: [插件名称] (例如: 智能思维导图)
|
||||
author: [作者姓名]
|
||||
author_url: [作者主页链接]
|
||||
funding_url: [赞助链接]
|
||||
version: 0.1.0
|
||||
icon_url: [图标 URL 或 Data URI]
|
||||
description: [简短描述插件的功能]
|
||||
requirements: [依赖列表, 例如: jinja2, markdown]
|
||||
"""
|
||||
|
||||
from pydantic import BaseModel, Field
|
||||
from typing import Optional, Dict, Any, List, Callable, Awaitable
|
||||
import logging
|
||||
import re
|
||||
import json
|
||||
from fastapi import Request
|
||||
from datetime import datetime
|
||||
import pytz
|
||||
|
||||
# 导入 OpenWebUI 工具函数
|
||||
from open_webui.utils.chat import generate_chat_completion
|
||||
from open_webui.models.users import Users
|
||||
|
||||
# 设置日志
|
||||
logging.basicConfig(
|
||||
level=logging.INFO, format="%(asctime)s - %(name)s - %(levelname)s - %(message)s"
|
||||
)
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
# =================================================================
|
||||
# 常量与提示词 (Constants & Prompts)
|
||||
# =================================================================
|
||||
|
||||
SYSTEM_PROMPT = """
|
||||
[在此处插入系统提示词]
|
||||
你是一个有用的助手...
|
||||
请以 [JSON/Markdown] 格式输出...
|
||||
"""
|
||||
|
||||
USER_PROMPT_TEMPLATE = """
|
||||
[在此处插入用户提示词模板]
|
||||
用户上下文:
|
||||
姓名: {user_name}
|
||||
时间: {current_date_time_str}
|
||||
|
||||
待处理内容:
|
||||
{content}
|
||||
"""
|
||||
|
||||
# 用于在聊天中渲染结果的 HTML 模板
|
||||
HTML_TEMPLATE = """
|
||||
<!DOCTYPE html>
|
||||
<html lang="{user_language}">
|
||||
<head>
|
||||
<meta charset="UTF-8">
|
||||
<meta name="viewport" content="width=device-width, initial-scale=1.0">
|
||||
<title>[插件标题]</title>
|
||||
<style>
|
||||
/* 在此处添加 CSS 样式 */
|
||||
body { font-family: sans-serif; padding: 20px; }
|
||||
.container { border: 1px solid #ccc; padding: 20px; border-radius: 8px; }
|
||||
</style>
|
||||
</head>
|
||||
<body>
|
||||
<div class="container">
|
||||
<h1>[结果标题]</h1>
|
||||
<div id="content">{result_content}</div>
|
||||
</div>
|
||||
</body>
|
||||
</html>
|
||||
"""
|
||||
|
||||
|
||||
class Action:
|
||||
class Valves(BaseModel):
|
||||
show_status: bool = Field(
|
||||
default=True,
|
||||
description="是否在聊天界面显示操作状态更新。",
|
||||
)
|
||||
LLM_MODEL_ID: str = Field(
|
||||
default="",
|
||||
description="用于处理的内置 LLM 模型 ID。如果为空,则使用当前对话的模型。",
|
||||
)
|
||||
MIN_TEXT_LENGTH: int = Field(
|
||||
default=50,
|
||||
description="处理所需的最小文本长度(字符数)。",
|
||||
)
|
||||
# 根据需要添加其他配置字段
|
||||
# MAX_TEXT_LENGTH: int = Field(default=2000, description="...")
|
||||
|
||||
def __init__(self):
|
||||
self.valves = self.Valves()
|
||||
|
||||
def _get_user_context(self, __user__: Optional[Dict[str, Any]]) -> Dict[str, str]:
|
||||
"""提取用户上下文信息。"""
|
||||
if isinstance(__user__, (list, tuple)):
|
||||
user_data = __user__[0] if __user__ else {}
|
||||
elif isinstance(__user__, dict):
|
||||
user_data = __user__
|
||||
else:
|
||||
user_data = {}
|
||||
|
||||
return {
|
||||
"user_id": user_data.get("id", "unknown_user"),
|
||||
"user_name": user_data.get("name", "用户"),
|
||||
"user_language": user_data.get("language", "zh-CN"),
|
||||
}
|
||||
|
||||
def _get_current_time_context(self) -> Dict[str, str]:
|
||||
"""获取当前时间上下文。"""
|
||||
try:
|
||||
# 默认为特定时区或系统时间
|
||||
tz = pytz.timezone("Asia/Shanghai") # 根据需要修改
|
||||
now = datetime.now(tz)
|
||||
except Exception:
|
||||
now = datetime.now()
|
||||
|
||||
return {
|
||||
"current_date_time_str": now.strftime("%Y-%m-%d %H:%M:%S"),
|
||||
"current_weekday": now.strftime("%A"),
|
||||
"current_year": now.strftime("%Y"),
|
||||
"current_timezone_str": str(now.tzinfo) if now.tzinfo else "Unknown",
|
||||
}
|
||||
|
||||
def _process_llm_output(self, llm_output: str) -> Any:
|
||||
"""
|
||||
处理 LLM 的原始输出。
|
||||
重写此方法以解析 JSON、提取 Markdown 等。
|
||||
"""
|
||||
# 示例: 提取 JSON
|
||||
# try:
|
||||
# start = llm_output.find('{')
|
||||
# end = llm_output.rfind('}') + 1
|
||||
# if start != -1 and end != -1:
|
||||
# return json.loads(llm_output[start:end])
|
||||
# except Exception:
|
||||
# pass
|
||||
return llm_output.strip()
|
||||
|
||||
async def _emit_status(
|
||||
self,
|
||||
emitter: Optional[Callable[[Any], Awaitable[None]]],
|
||||
description: str,
|
||||
done: bool = False,
|
||||
):
|
||||
"""发送状态更新事件。"""
|
||||
if self.valves.show_status and emitter:
|
||||
await emitter(
|
||||
{"type": "status", "data": {"description": description, "done": done}}
|
||||
)
|
||||
|
||||
async def _emit_notification(
|
||||
self,
|
||||
emitter: Optional[Callable[[Any], Awaitable[None]]],
|
||||
content: str,
|
||||
type: str = "info",
|
||||
):
|
||||
"""发送通知事件 (info, success, warning, error)。"""
|
||||
if emitter:
|
||||
await emitter(
|
||||
{"type": "notification", "data": {"type": type, "content": content}}
|
||||
)
|
||||
|
||||
async def _emit_message(
|
||||
self, emitter: Optional[Callable[[Any], Awaitable[None]]], content: str
|
||||
):
|
||||
"""发送消息追加事件 (追加到当前消息)。"""
|
||||
if emitter:
|
||||
await emitter({"type": "message", "data": {"content": content}})
|
||||
|
||||
async def _emit_replace(
|
||||
self, emitter: Optional[Callable[[Any], Awaitable[None]]], content: str
|
||||
):
|
||||
"""发送消息替换事件 (替换当前消息)。"""
|
||||
if emitter:
|
||||
await emitter({"type": "replace", "data": {"content": content}})
|
||||
|
||||
async def action(
|
||||
self,
|
||||
body: dict,
|
||||
__user__: Optional[Dict[str, Any]] = None,
|
||||
__event_emitter__: Optional[Callable[[Any], Awaitable[None]]] = None,
|
||||
__event_call__: Optional[Callable[[Any], Awaitable[Any]]] = None,
|
||||
__request__: Optional[Request] = None,
|
||||
) -> Optional[dict]:
|
||||
logger.info(f"Action: {__name__} started")
|
||||
|
||||
# 1. 上下文设置
|
||||
user_context = self._get_user_context(__user__)
|
||||
time_context = self._get_current_time_context()
|
||||
|
||||
# 2. 输入验证
|
||||
messages = body.get("messages", [])
|
||||
if not messages or not messages[-1].get("content"):
|
||||
return body # 或者处理错误
|
||||
|
||||
original_content = messages[-1]["content"]
|
||||
|
||||
if len(original_content) < self.valves.MIN_TEXT_LENGTH:
|
||||
warning_msg = f"文本过短 ({len(original_content)} 字符)。最少需要: {self.valves.MIN_TEXT_LENGTH}。"
|
||||
await self._emit_notification(__event_emitter__, warning_msg, "warning")
|
||||
return body # 或者返回失败消息
|
||||
|
||||
# 3. 状态通知 (开始)
|
||||
await self._emit_status(__event_emitter__, "正在处理...", done=False)
|
||||
|
||||
try:
|
||||
# 4. 准备提示词
|
||||
formatted_prompt = USER_PROMPT_TEMPLATE.format(
|
||||
user_name=user_context["user_name"],
|
||||
current_date_time_str=time_context["current_date_time_str"],
|
||||
content=original_content,
|
||||
# 添加其他上下文变量
|
||||
)
|
||||
|
||||
# 5. 确定模型
|
||||
target_model = self.valves.LLM_MODEL_ID
|
||||
if not target_model:
|
||||
target_model = body.get("model")
|
||||
# 注意: 这里没有硬编码的回退,依赖于系统/用户上下文
|
||||
|
||||
# 6. 调用 LLM
|
||||
user_obj = Users.get_user_by_id(user_context["user_id"])
|
||||
|
||||
payload = {
|
||||
"model": target_model,
|
||||
"messages": [
|
||||
{"role": "system", "content": SYSTEM_PROMPT},
|
||||
{"role": "user", "content": formatted_prompt},
|
||||
],
|
||||
"stream": False,
|
||||
# "temperature": 0.5,
|
||||
}
|
||||
|
||||
llm_response = await generate_chat_completion(
|
||||
__request__, payload, user_obj
|
||||
)
|
||||
|
||||
if not llm_response or "choices" not in llm_response:
|
||||
raise ValueError("无效的 LLM 响应")
|
||||
|
||||
assistant_content = llm_response["choices"][0]["message"]["content"]
|
||||
|
||||
# 7. 处理输出
|
||||
processed_data = self._process_llm_output(assistant_content)
|
||||
|
||||
# 8. 生成 HTML/结果
|
||||
# 示例: 简单的字符串替换
|
||||
final_html = HTML_TEMPLATE.replace("{result_content}", str(processed_data))
|
||||
final_html = final_html.replace(
|
||||
"{user_language}", user_context["user_language"]
|
||||
)
|
||||
|
||||
# 9. 注入结果
|
||||
html_embed_tag = f"```html\n{final_html}\n```"
|
||||
body["messages"][-1]["content"] += f"\n\n{html_embed_tag}"
|
||||
|
||||
# 10. 状态通知 (成功)
|
||||
await self._emit_status(__event_emitter__, "处理完成!", done=True)
|
||||
await self._emit_notification(
|
||||
__event_emitter__, "操作成功完成。", "success"
|
||||
)
|
||||
|
||||
except Exception as e:
|
||||
logger.error(f"Action failed: {e}", exc_info=True)
|
||||
error_msg = f"错误: {str(e)}"
|
||||
|
||||
# 将错误附加到聊天中 (可选)
|
||||
body["messages"][-1]["content"] += f"\n\n❌ **错误**: {error_msg}"
|
||||
|
||||
await self._emit_status(__event_emitter__, "处理失败。", done=True)
|
||||
await self._emit_notification(
|
||||
__event_emitter__, "操作失败,请检查日志。", "error"
|
||||
)
|
||||
|
||||
return body
|
||||
@@ -362,8 +362,8 @@ class Action:
|
||||
description="Whether to show action status updates in the chat interface.",
|
||||
)
|
||||
LLM_MODEL_ID: str = Field(
|
||||
default="gemini-2.5-flash",
|
||||
description="Built-in LLM model ID for text analysis.",
|
||||
default="",
|
||||
description="Built-in LLM model ID for text analysis. If empty, uses the current conversation's model.",
|
||||
)
|
||||
MIN_TEXT_LENGTH: int = Field(
|
||||
default=100,
|
||||
@@ -514,8 +514,13 @@ class Action:
|
||||
long_text_content=long_text_content,
|
||||
)
|
||||
|
||||
# Determine model to use
|
||||
target_model = self.valves.LLM_MODEL_ID
|
||||
if not target_model:
|
||||
target_model = body.get("model")
|
||||
|
||||
llm_payload = {
|
||||
"model": self.valves.LLM_MODEL_ID,
|
||||
"model": target_model,
|
||||
"messages": [
|
||||
{"role": "system", "content": SYSTEM_PROMPT_MINDMAP_ASSISTANT},
|
||||
{"role": "user", "content": formatted_user_prompt},
|
||||
|
||||
@@ -363,8 +363,8 @@ class Action:
|
||||
default=True, description="是否在聊天界面显示操作状态更新。"
|
||||
)
|
||||
LLM_MODEL_ID: str = Field(
|
||||
default="gemini-2.5-flash",
|
||||
description="用于文本分析的内置LLM模型ID。",
|
||||
default="",
|
||||
description="用于文本分析的内置LLM模型ID。如果为空,则使用当前对话的模型。",
|
||||
)
|
||||
MIN_TEXT_LENGTH: int = Field(
|
||||
default=100, description="进行思维导图分析所需的最小文本长度(字符数)。"
|
||||
@@ -514,13 +514,17 @@ class Action:
|
||||
long_text_content=long_text_content,
|
||||
)
|
||||
|
||||
# 确定使用的模型
|
||||
target_model = self.valves.LLM_MODEL_ID
|
||||
if not target_model:
|
||||
target_model = body.get("model")
|
||||
|
||||
llm_payload = {
|
||||
"model": self.valves.LLM_MODEL_ID,
|
||||
"model": target_model,
|
||||
"messages": [
|
||||
{"role": "system", "content": SYSTEM_PROMPT_MINDMAP_ASSISTANT},
|
||||
{"role": "user", "content": formatted_user_prompt},
|
||||
],
|
||||
"temperature": 0.5,
|
||||
"stream": False,
|
||||
}
|
||||
user_obj = Users.get_user_by_id(user_id)
|
||||
|
||||
@@ -281,8 +281,8 @@ class Action:
|
||||
description="Whether to show operation status updates in the chat interface.",
|
||||
)
|
||||
LLM_MODEL_ID: str = Field(
|
||||
default="gemini-2.5-flash",
|
||||
description="Built-in LLM Model ID used for text analysis.",
|
||||
default="",
|
||||
description="Built-in LLM Model ID used for text analysis. If empty, uses the current conversation's model.",
|
||||
)
|
||||
MIN_TEXT_LENGTH: int = Field(
|
||||
default=200,
|
||||
@@ -451,8 +451,13 @@ class Action:
|
||||
long_text_content=original_content,
|
||||
)
|
||||
|
||||
# Determine model to use
|
||||
target_model = self.valves.LLM_MODEL_ID
|
||||
if not target_model:
|
||||
target_model = body.get("model")
|
||||
|
||||
llm_payload = {
|
||||
"model": self.valves.LLM_MODEL_ID,
|
||||
"model": target_model,
|
||||
"messages": [
|
||||
{"role": "system", "content": SYSTEM_PROMPT_READING_ASSISTANT},
|
||||
{"role": "user", "content": formatted_user_prompt},
|
||||
|
||||
@@ -277,11 +277,12 @@ class Action:
|
||||
default=True, description="是否在聊天界面显示操作状态更新。"
|
||||
)
|
||||
LLM_MODEL_ID: str = Field(
|
||||
default="gemini-2.5-flash",
|
||||
description="用于文本分析的内置LLM模型ID。",
|
||||
default="",
|
||||
description="用于文本分析的内置LLM模型ID。如果为空,则使用当前对话的模型。",
|
||||
)
|
||||
MIN_TEXT_LENGTH: int = Field(
|
||||
default=200, description="进行深度分析所需的最小文本长度(字符数)。建议200字符以上。"
|
||||
default=200,
|
||||
description="进行深度分析所需的最小文本长度(字符数)。建议200字符以上。",
|
||||
)
|
||||
RECOMMENDED_MIN_LENGTH: int = Field(
|
||||
default=500, description="建议的最小文本长度,以获得最佳分析效果。"
|
||||
@@ -395,7 +396,7 @@ class Action:
|
||||
{"role": "assistant", "content": f"⚠️ {short_text_message}"}
|
||||
]
|
||||
}
|
||||
|
||||
|
||||
# Recommend for longer texts
|
||||
if len(original_content) < self.valves.RECOMMENDED_MIN_LENGTH:
|
||||
if __event_emitter__:
|
||||
@@ -439,8 +440,13 @@ class Action:
|
||||
long_text_content=original_content,
|
||||
)
|
||||
|
||||
# 确定使用的模型
|
||||
target_model = self.valves.LLM_MODEL_ID
|
||||
if not target_model:
|
||||
target_model = body.get("model")
|
||||
|
||||
llm_payload = {
|
||||
"model": self.valves.LLM_MODEL_ID,
|
||||
"model": target_model,
|
||||
"messages": [
|
||||
{"role": "system", "content": SYSTEM_PROMPT_READING_ASSISTANT},
|
||||
{"role": "user", "content": formatted_user_prompt},
|
||||
@@ -452,7 +458,9 @@ class Action:
|
||||
if not user_obj:
|
||||
raise ValueError(f"无法获取用户对象, 用户ID: {user_id}")
|
||||
|
||||
llm_response = await generate_chat_completion(__request__, llm_payload, user_obj)
|
||||
llm_response = await generate_chat_completion(
|
||||
__request__, llm_payload, user_obj
|
||||
)
|
||||
assistant_response_content = llm_response["choices"][0]["message"][
|
||||
"content"
|
||||
]
|
||||
|
||||
@@ -65,7 +65,7 @@
|
||||
|
||||
| 参数 | 默认值 | 描述 |
|
||||
| :--- | :--- | :--- |
|
||||
| `summary_model` | `None` | 用于生成摘要的模型 ID。**强烈建议**配置一个快速、经济且上下文窗口较大的模型(如 `gemini-2.5-flash`, `deepseek-v3`)。如果留空,将尝试使用当前对话的模型。 |
|
||||
| `summary_model` | `None` | 用于生成摘要的模型 ID。**强烈建议**配置一个快速、经济且上下文窗口较大的模型(如 `gemini-2.5-flash`)。如果留空,将尝试使用当前对话的模型。 |
|
||||
| `max_summary_tokens` | `16384` | 生成摘要时允许的最大 Token 数。 |
|
||||
| `summary_temperature` | `0.1` | 控制摘要生成的随机性,较低的值结果更稳定。 |
|
||||
|
||||
|
||||
@@ -383,360 +383,7 @@ class Filter:
|
||||
description="Hard limit for context. Exceeding this value will force removal of earliest messages (Global Default)",
|
||||
)
|
||||
model_thresholds: dict = Field(
|
||||
default={
|
||||
# Groq
|
||||
"groq-openai/gpt-oss-20b": {
|
||||
"max_context_tokens": 8000,
|
||||
"compression_threshold_tokens": 5600,
|
||||
},
|
||||
"groq-openai/gpt-oss-120b": {
|
||||
"max_context_tokens": 8000,
|
||||
"compression_threshold_tokens": 5600,
|
||||
},
|
||||
# Qwen (ModelScope / CF)
|
||||
"modelscope-Qwen/Qwen3-Coder-480B-A35B-Instruct": {
|
||||
"max_context_tokens": 256000,
|
||||
"compression_threshold_tokens": 179200,
|
||||
},
|
||||
"cfchatqwen-qwen3-max-search": {
|
||||
"max_context_tokens": 262144,
|
||||
"compression_threshold_tokens": 183500,
|
||||
},
|
||||
"modelscope-Qwen/Qwen3-235B-A22B-Thinking-2507": {
|
||||
"max_context_tokens": 262144,
|
||||
"compression_threshold_tokens": 183500,
|
||||
},
|
||||
"cfchatqwen-qwen3-max": {
|
||||
"max_context_tokens": 262144,
|
||||
"compression_threshold_tokens": 183500,
|
||||
},
|
||||
"cfchatqwen-qwen3-vl-plus-thinking": {
|
||||
"max_context_tokens": 262144,
|
||||
"compression_threshold_tokens": 183500,
|
||||
},
|
||||
"cfchatqwen-qwen3-coder-plus-search": {
|
||||
"max_context_tokens": 1048576,
|
||||
"compression_threshold_tokens": 734000,
|
||||
},
|
||||
"cfchatqwen-qwen3-vl-plus": {
|
||||
"max_context_tokens": 262144,
|
||||
"compression_threshold_tokens": 183500,
|
||||
},
|
||||
"cfchatqwen-qwen3-coder-plus": {
|
||||
"max_context_tokens": 1048576,
|
||||
"compression_threshold_tokens": 734000,
|
||||
},
|
||||
"cfchatqwen-qwen3-omni-flash-thinking": {
|
||||
"max_context_tokens": 65536,
|
||||
"compression_threshold_tokens": 45875,
|
||||
},
|
||||
"cfchatqwen-qwen3-omni-flash": {
|
||||
"max_context_tokens": 65536,
|
||||
"compression_threshold_tokens": 45875,
|
||||
},
|
||||
"cfchatqwen-qwen3-next-80b-a3b-thinking": {
|
||||
"max_context_tokens": 262144,
|
||||
"compression_threshold_tokens": 183500,
|
||||
},
|
||||
"modelscope-Qwen/Qwen3-VL-235B-A22B-Instruct": {
|
||||
"max_context_tokens": 262144,
|
||||
"compression_threshold_tokens": 183500,
|
||||
},
|
||||
"cfchatqwen-qwen3-next-80b-a3b-thinking-search": {
|
||||
"max_context_tokens": 262144,
|
||||
"compression_threshold_tokens": 183500,
|
||||
},
|
||||
"cfchatqwen-qwen3-next-80b-a3b": {
|
||||
"max_context_tokens": 262144,
|
||||
"compression_threshold_tokens": 183500,
|
||||
},
|
||||
"cfchatqwen-qwen3-235b-a22b-thinking-search": {
|
||||
"max_context_tokens": 131072,
|
||||
"compression_threshold_tokens": 91750,
|
||||
},
|
||||
"cfchatqwen-qwen3-235b-a22b": {
|
||||
"max_context_tokens": 131072,
|
||||
"compression_threshold_tokens": 91750,
|
||||
},
|
||||
"cfchatqwen-qwen3-235b-a22b-thinking": {
|
||||
"max_context_tokens": 131072,
|
||||
"compression_threshold_tokens": 91750,
|
||||
},
|
||||
"cfchatqwen-qwen3-coder-flash-search": {
|
||||
"max_context_tokens": 262144,
|
||||
"compression_threshold_tokens": 183500,
|
||||
},
|
||||
"cfchatqwen-qwen3-coder-flash": {
|
||||
"max_context_tokens": 262144,
|
||||
"compression_threshold_tokens": 183500,
|
||||
},
|
||||
"cfchatqwen-qwen3-max-2025-10-30": {
|
||||
"max_context_tokens": 262144,
|
||||
"compression_threshold_tokens": 183500,
|
||||
},
|
||||
"cfchatqwen-qwen3-max-2025-10-30-thinking": {
|
||||
"max_context_tokens": 262144,
|
||||
"compression_threshold_tokens": 183500,
|
||||
},
|
||||
"cfchatqwen-qwen3-max-2025-10-30-thinking-search": {
|
||||
"max_context_tokens": 262144,
|
||||
"compression_threshold_tokens": 183500,
|
||||
},
|
||||
"modelscope-Qwen/Qwen3-235B-A22B-Instruct-2507": {
|
||||
"max_context_tokens": 262144,
|
||||
"compression_threshold_tokens": 183500,
|
||||
},
|
||||
"cfchatqwen-qwen3-vl-30b-a3b": {
|
||||
"max_context_tokens": 131072,
|
||||
"compression_threshold_tokens": 91750,
|
||||
},
|
||||
"cfchatqwen-qwen3-vl-30b-a3b-thinking": {
|
||||
"max_context_tokens": 131072,
|
||||
"compression_threshold_tokens": 91750,
|
||||
},
|
||||
# Gemini
|
||||
"gemini-2.5-pro-search": {
|
||||
"max_context_tokens": 1048576,
|
||||
"compression_threshold_tokens": 734000,
|
||||
},
|
||||
"gemini-2.5-flash-search": {
|
||||
"max_context_tokens": 1048576,
|
||||
"compression_threshold_tokens": 734000,
|
||||
},
|
||||
"gemini-2.5-flash": {
|
||||
"max_context_tokens": 1048576,
|
||||
"compression_threshold_tokens": 734000,
|
||||
},
|
||||
"gemini-2.5-flash-lite": {
|
||||
"max_context_tokens": 1048576,
|
||||
"compression_threshold_tokens": 734000,
|
||||
},
|
||||
"gemini-2.5-flash-lite-search": {
|
||||
"max_context_tokens": 1048576,
|
||||
"compression_threshold_tokens": 734000,
|
||||
},
|
||||
"gemini-2.5-pro": {
|
||||
"max_context_tokens": 1048576,
|
||||
"compression_threshold_tokens": 734000,
|
||||
},
|
||||
"gemini-2.0-flash-search": {
|
||||
"max_context_tokens": 1048576,
|
||||
"compression_threshold_tokens": 734000,
|
||||
},
|
||||
"gemini-2.0-flash": {
|
||||
"max_context_tokens": 1048576,
|
||||
"compression_threshold_tokens": 734000,
|
||||
},
|
||||
"gemini-2.0-flash-exp": {
|
||||
"max_context_tokens": 1048576,
|
||||
"compression_threshold_tokens": 734000,
|
||||
},
|
||||
"gemini-2.0-flash-lite": {
|
||||
"max_context_tokens": 1048576,
|
||||
"compression_threshold_tokens": 734000,
|
||||
},
|
||||
"copilot-gemini-2.5-pro": {
|
||||
"max_context_tokens": 128000,
|
||||
"compression_threshold_tokens": 89600,
|
||||
},
|
||||
"gemini-pro-latest": {
|
||||
"max_context_tokens": 1048576,
|
||||
"compression_threshold_tokens": 734000,
|
||||
},
|
||||
"gemini-3-pro-preview": {
|
||||
"max_context_tokens": 128000,
|
||||
"compression_threshold_tokens": 89600,
|
||||
},
|
||||
"gemini-pro-latest-search": {
|
||||
"max_context_tokens": 1048576,
|
||||
"compression_threshold_tokens": 734000,
|
||||
},
|
||||
"gemini-flash-latest": {
|
||||
"max_context_tokens": 1048576,
|
||||
"compression_threshold_tokens": 734000,
|
||||
},
|
||||
"gemini-flash-latest-search": {
|
||||
"max_context_tokens": 1048576,
|
||||
"compression_threshold_tokens": 734000,
|
||||
},
|
||||
"gemini-flash-lite-latest-search": {
|
||||
"max_context_tokens": 1048576,
|
||||
"compression_threshold_tokens": 734000,
|
||||
},
|
||||
"gemini-flash-lite-latest": {
|
||||
"max_context_tokens": 1048576,
|
||||
"compression_threshold_tokens": 734000,
|
||||
},
|
||||
"gemini-robotics-er-1.5-preview": {
|
||||
"max_context_tokens": 1048576,
|
||||
"compression_threshold_tokens": 734000,
|
||||
},
|
||||
# DeepSeek
|
||||
"modelscope-deepseek-ai/DeepSeek-V3.1": {
|
||||
"max_context_tokens": 128000,
|
||||
"compression_threshold_tokens": 89600,
|
||||
},
|
||||
"cfdeepseek-deepseek-search": {
|
||||
"max_context_tokens": 128000,
|
||||
"compression_threshold_tokens": 89600,
|
||||
},
|
||||
"openrouter-deepseek/deepseek-r1-0528:free": {
|
||||
"max_context_tokens": 163840,
|
||||
"compression_threshold_tokens": 114688,
|
||||
},
|
||||
"modelscope-deepseek-ai/DeepSeek-V3.2-Exp": {
|
||||
"max_context_tokens": 128000,
|
||||
"compression_threshold_tokens": 89600,
|
||||
},
|
||||
"cfdeepseek-deepseek-r1-search": {
|
||||
"max_context_tokens": 128000,
|
||||
"compression_threshold_tokens": 89600,
|
||||
},
|
||||
"cfdeepseek-deepseek-r1": {
|
||||
"max_context_tokens": 128000,
|
||||
"compression_threshold_tokens": 89600,
|
||||
},
|
||||
"openrouter-deepseek/deepseek-chat-v3.1:free": {
|
||||
"max_context_tokens": 163800,
|
||||
"compression_threshold_tokens": 114660,
|
||||
},
|
||||
"modelscope-deepseek-ai/DeepSeek-R1-0528": {
|
||||
"max_context_tokens": 128000,
|
||||
"compression_threshold_tokens": 89600,
|
||||
},
|
||||
"cfdeepseek-deepseek": {
|
||||
"max_context_tokens": 128000,
|
||||
"compression_threshold_tokens": 89600,
|
||||
},
|
||||
# Kimi (Moonshot)
|
||||
"cfkimi-kimi-k2-search": {
|
||||
"max_context_tokens": 128000,
|
||||
"compression_threshold_tokens": 89600,
|
||||
},
|
||||
"cfkimi-kimi-k1.5-search": {
|
||||
"max_context_tokens": 128000,
|
||||
"compression_threshold_tokens": 89600,
|
||||
},
|
||||
"cfkimi-kimi-k1.5-thinking-search": {
|
||||
"max_context_tokens": 128000,
|
||||
"compression_threshold_tokens": 89600,
|
||||
},
|
||||
"cfkimi-kimi-research": {
|
||||
"max_context_tokens": 128000,
|
||||
"compression_threshold_tokens": 89600,
|
||||
},
|
||||
"openrouter-moonshotai/kimi-k2:free": {
|
||||
"max_context_tokens": 32768,
|
||||
"compression_threshold_tokens": 22937,
|
||||
},
|
||||
"cfkimi-kimi-k2": {
|
||||
"max_context_tokens": 128000,
|
||||
"compression_threshold_tokens": 89600,
|
||||
},
|
||||
"cfkimi-kimi-k1.5": {
|
||||
"max_context_tokens": 128000,
|
||||
"compression_threshold_tokens": 89600,
|
||||
},
|
||||
# GPT / OpenAI
|
||||
"gpt-4.1": {
|
||||
"max_context_tokens": 128000,
|
||||
"compression_threshold_tokens": 89600,
|
||||
},
|
||||
"gpt-4o": {
|
||||
"max_context_tokens": 64000,
|
||||
"compression_threshold_tokens": 44800,
|
||||
},
|
||||
"gpt-5": {
|
||||
"max_context_tokens": 128000,
|
||||
"compression_threshold_tokens": 89600,
|
||||
},
|
||||
"github-gpt-4.1": {
|
||||
"max_context_tokens": 7500,
|
||||
"compression_threshold_tokens": 5250,
|
||||
},
|
||||
"gpt-5-mini": {
|
||||
"max_context_tokens": 128000,
|
||||
"compression_threshold_tokens": 89600,
|
||||
},
|
||||
"gpt-5.1": {
|
||||
"max_context_tokens": 128000,
|
||||
"compression_threshold_tokens": 89600,
|
||||
},
|
||||
"gpt-5.1-codex": {
|
||||
"max_context_tokens": 128000,
|
||||
"compression_threshold_tokens": 89600,
|
||||
},
|
||||
"gpt-5.1-codex-mini": {
|
||||
"max_context_tokens": 128000,
|
||||
"compression_threshold_tokens": 89600,
|
||||
},
|
||||
"gpt-5-codex": {
|
||||
"max_context_tokens": 200000,
|
||||
"compression_threshold_tokens": 140000,
|
||||
},
|
||||
"github-gpt-4.1-mini": {
|
||||
"max_context_tokens": 7500,
|
||||
"compression_threshold_tokens": 5250,
|
||||
},
|
||||
"openrouter-openai/gpt-oss-20b:free": {
|
||||
"max_context_tokens": 128000,
|
||||
"compression_threshold_tokens": 89600,
|
||||
},
|
||||
# Claude / Anthropic
|
||||
"claude-sonnet-4.5": {
|
||||
"max_context_tokens": 128000,
|
||||
"compression_threshold_tokens": 89600,
|
||||
},
|
||||
"claude-haiku-4.5": {
|
||||
"max_context_tokens": 128000,
|
||||
"compression_threshold_tokens": 89600,
|
||||
},
|
||||
"copilot-claude-opus-41": {
|
||||
"max_context_tokens": 80000,
|
||||
"compression_threshold_tokens": 56000,
|
||||
},
|
||||
"copilot-claude-sonnet-4": {
|
||||
"max_context_tokens": 80000,
|
||||
"compression_threshold_tokens": 56000,
|
||||
},
|
||||
# Other / OpenRouter / OSWE
|
||||
"oswe-vscode-insiders": {
|
||||
"max_context_tokens": 256000,
|
||||
"compression_threshold_tokens": 179200,
|
||||
},
|
||||
"modelscope-MiniMax/MiniMax-M2": {
|
||||
"max_context_tokens": 204800,
|
||||
"compression_threshold_tokens": 143360,
|
||||
},
|
||||
"oswe-vscode-prime": {
|
||||
"max_context_tokens": 200000,
|
||||
"compression_threshold_tokens": 140000,
|
||||
},
|
||||
"grok-code-fast-1": {
|
||||
"max_context_tokens": 128000,
|
||||
"compression_threshold_tokens": 89600,
|
||||
},
|
||||
"copilot-auto": {
|
||||
"max_context_tokens": 128000,
|
||||
"compression_threshold_tokens": 89600,
|
||||
},
|
||||
"modelscope-ZhipuAI/GLM-4.6": {
|
||||
"max_context_tokens": 32000,
|
||||
"compression_threshold_tokens": 22400,
|
||||
},
|
||||
"openrouter-x-ai/grok-4.1-fast:free": {
|
||||
"max_context_tokens": 2000000,
|
||||
"compression_threshold_tokens": 1400000,
|
||||
},
|
||||
"openrouter-qwen/qwen3-coder:free": {
|
||||
"max_context_tokens": 262000,
|
||||
"compression_threshold_tokens": 183400,
|
||||
},
|
||||
"openrouter-qwen/qwen3-235b-a22b:free": {
|
||||
"max_context_tokens": 40960,
|
||||
"compression_threshold_tokens": 28672,
|
||||
},
|
||||
},
|
||||
default={},
|
||||
description="Threshold override configuration for specific models. Only includes models requiring special configuration.",
|
||||
)
|
||||
|
||||
@@ -847,7 +494,7 @@ class Filter:
|
||||
return record.summary
|
||||
return None
|
||||
|
||||
def _count_tokens(self, text: str, model: str = "gpt-3.5-turbo") -> int:
|
||||
def _count_tokens(self, text: str) -> int:
|
||||
"""Counts the number of tokens in the text."""
|
||||
if not text:
|
||||
return 0
|
||||
@@ -866,9 +513,7 @@ class Filter:
|
||||
# Fallback strategy: Rough estimation (1 token ≈ 4 chars)
|
||||
return len(text) // 4
|
||||
|
||||
def _calculate_messages_tokens(
|
||||
self, messages: List[Dict], model: str = "gpt-3.5-turbo"
|
||||
) -> int:
|
||||
def _calculate_messages_tokens(self, messages: List[Dict]) -> int:
|
||||
"""Calculates the total tokens for a list of messages."""
|
||||
total_tokens = 0
|
||||
for msg in messages:
|
||||
@@ -879,9 +524,9 @@ class Filter:
|
||||
for part in content:
|
||||
if isinstance(part, dict) and part.get("type") == "text":
|
||||
text_content += part.get("text", "")
|
||||
total_tokens += self._count_tokens(text_content, model)
|
||||
total_tokens += self._count_tokens(text_content)
|
||||
else:
|
||||
total_tokens += self._count_tokens(str(content), model)
|
||||
total_tokens += self._count_tokens(str(content))
|
||||
return total_tokens
|
||||
|
||||
def _get_model_thresholds(self, model_id: str) -> Dict[str, int]:
|
||||
@@ -1101,7 +746,7 @@ class Filter:
|
||||
|
||||
# Calculate Token count in a background thread
|
||||
current_tokens = await asyncio.to_thread(
|
||||
self._calculate_messages_tokens, messages, model
|
||||
self._calculate_messages_tokens, messages
|
||||
)
|
||||
|
||||
if self.valves.debug_mode:
|
||||
@@ -1197,7 +842,7 @@ class Filter:
|
||||
|
||||
# Calculate current total Tokens (using summary model for counting)
|
||||
total_tokens = await asyncio.to_thread(
|
||||
self._calculate_messages_tokens, messages, summary_model_id
|
||||
self._calculate_messages_tokens, messages
|
||||
)
|
||||
|
||||
if total_tokens > max_context_tokens:
|
||||
@@ -1213,9 +858,7 @@ class Filter:
|
||||
|
||||
while removed_tokens < excess_tokens and middle_messages:
|
||||
msg_to_remove = middle_messages.pop(0)
|
||||
msg_tokens = self._count_tokens(
|
||||
str(msg_to_remove.get("content", "")), summary_model_id
|
||||
)
|
||||
msg_tokens = self._count_tokens(str(msg_to_remove.get("content", "")))
|
||||
removed_tokens += msg_tokens
|
||||
removed_count += 1
|
||||
|
||||
@@ -1269,7 +912,7 @@ class Filter:
|
||||
{
|
||||
"type": "status",
|
||||
"data": {
|
||||
"description": f"Context summary updated (Saved {len(middle_messages)} messages)",
|
||||
"description": f"Context summary updated (Compressed {len(middle_messages)} messages)",
|
||||
"done": True,
|
||||
},
|
||||
}
|
||||
|
||||
@@ -367,20 +367,26 @@ class Filter:
|
||||
)
|
||||
# Token 相关参数
|
||||
compression_threshold_tokens: int = Field(
|
||||
default=64000, ge=0, description="当上下文总 Token 数超过此值时,触发压缩 (全局默认值)"
|
||||
default=64000,
|
||||
ge=0,
|
||||
description="当上下文总 Token 数超过此值时,触发压缩 (全局默认值)",
|
||||
)
|
||||
max_context_tokens: int = Field(
|
||||
default=128000, ge=0, description="上下文的硬性上限。超过此值将强制移除最早的消息 (全局默认值)"
|
||||
default=128000,
|
||||
ge=0,
|
||||
description="上下文的硬性上限。超过此值将强制移除最早的消息 (全局默认值)",
|
||||
)
|
||||
model_thresholds: dict = Field(
|
||||
default={},
|
||||
description="针对特定模型的阈值覆盖配置。仅包含需要特殊配置的模型。"
|
||||
description="针对特定模型的阈值覆盖配置。仅包含需要特殊配置的模型。",
|
||||
)
|
||||
|
||||
|
||||
keep_first: int = Field(
|
||||
default=1, ge=0, description="始终保留最初的 N 条消息。设置为 0 则不保留。"
|
||||
)
|
||||
keep_last: int = Field(default=6, ge=0, description="始终保留最近的 N 条完整消息。")
|
||||
keep_last: int = Field(
|
||||
default=6, ge=0, description="始终保留最近的 N 条完整消息。"
|
||||
)
|
||||
summary_model: str = Field(
|
||||
default=None,
|
||||
description="用于生成摘要的模型 ID。留空则使用当前对话的模型。用于匹配 model_thresholds 中的配置。",
|
||||
@@ -404,15 +410,15 @@ class Filter:
|
||||
session = self._SessionLocal()
|
||||
try:
|
||||
# 查找现有记录
|
||||
existing = (
|
||||
session.query(ChatSummary).filter_by(chat_id=chat_id).first()
|
||||
)
|
||||
existing = session.query(ChatSummary).filter_by(chat_id=chat_id).first()
|
||||
|
||||
if existing:
|
||||
# [优化] 乐观锁检查:只有进度向前推进时才更新
|
||||
if compressed_count <= existing.compressed_message_count:
|
||||
if self.valves.debug_mode:
|
||||
print(f"[存储] 跳过更新:新进度 ({compressed_count}) 不大于现有进度 ({existing.compressed_message_count})")
|
||||
print(
|
||||
f"[存储] 跳过更新:新进度 ({compressed_count}) 不大于现有进度 ({existing.compressed_message_count})"
|
||||
)
|
||||
return
|
||||
|
||||
# 更新现有记录
|
||||
@@ -471,11 +477,11 @@ class Filter:
|
||||
return record.summary
|
||||
return None
|
||||
|
||||
def _count_tokens(self, text: str, model: str = "gpt-3.5-turbo") -> int:
|
||||
def _count_tokens(self, text: str) -> int:
|
||||
"""计算文本的 Token 数量"""
|
||||
if not text:
|
||||
return 0
|
||||
|
||||
|
||||
if tiktoken:
|
||||
try:
|
||||
# 统一使用 o200k_base 编码 (适配最新模型)
|
||||
@@ -484,11 +490,13 @@ class Filter:
|
||||
except Exception as e:
|
||||
if self.valves.debug_mode:
|
||||
print(f"[Token计数] tiktoken 错误: {e},回退到字符估算")
|
||||
|
||||
|
||||
# 回退策略:粗略估算 (1 token ≈ 4 chars)
|
||||
return len(text) // 4
|
||||
|
||||
def _calculate_messages_tokens(self, messages: List[Dict], model: str = "gpt-3.5-turbo") -> int:
|
||||
def _calculate_messages_tokens(
|
||||
self, messages: List[Dict]
|
||||
) -> int:
|
||||
"""计算消息列表的总 Token 数"""
|
||||
total_tokens = 0
|
||||
for msg in messages:
|
||||
@@ -499,14 +507,14 @@ class Filter:
|
||||
for part in content:
|
||||
if isinstance(part, dict) and part.get("type") == "text":
|
||||
text_content += part.get("text", "")
|
||||
total_tokens += self._count_tokens(text_content, model)
|
||||
total_tokens += self._count_tokens(text_content)
|
||||
else:
|
||||
total_tokens += self._count_tokens(str(content), model)
|
||||
total_tokens += self._count_tokens(str(content))
|
||||
return total_tokens
|
||||
|
||||
def _get_model_thresholds(self, model_id: str) -> Dict[str, int]:
|
||||
"""获取特定模型的阈值配置
|
||||
|
||||
|
||||
优先级:
|
||||
1. 如果 model_thresholds 中存在该模型ID的配置,使用该配置
|
||||
2. 否则使用全局参数 compression_threshold_tokens 和 max_context_tokens
|
||||
@@ -516,14 +524,14 @@ class Filter:
|
||||
if self.valves.debug_mode:
|
||||
print(f"[配置] 使用模型特定配置: {model_id}")
|
||||
return self.valves.model_thresholds[model_id]
|
||||
|
||||
|
||||
# 使用全局默认配置
|
||||
if self.valves.debug_mode:
|
||||
print(f"[配置] 模型 {model_id} 未在 model_thresholds 中,使用全局参数")
|
||||
|
||||
|
||||
return {
|
||||
"compression_threshold_tokens": self.valves.compression_threshold_tokens,
|
||||
"max_context_tokens": self.valves.max_context_tokens
|
||||
"max_context_tokens": self.valves.max_context_tokens,
|
||||
}
|
||||
|
||||
def _inject_summary_to_first_message(self, message: dict, summary: str) -> dict:
|
||||
@@ -584,36 +592,36 @@ class Filter:
|
||||
# 记录原始消息的目标压缩进度,供 outlet 使用
|
||||
# 目标是压缩到倒数第 keep_last 条之前
|
||||
target_compressed_count = max(0, len(messages) - self.valves.keep_last)
|
||||
|
||||
|
||||
# [优化] 简单的状态清理检查
|
||||
if chat_id in self.temp_state:
|
||||
if self.valves.debug_mode:
|
||||
if self.valves.debug_mode:
|
||||
print(f"[Inlet] ⚠️ 覆盖未消费的旧状态 (Chat ID: {chat_id})")
|
||||
|
||||
|
||||
self.temp_state[chat_id] = target_compressed_count
|
||||
|
||||
|
||||
if self.valves.debug_mode:
|
||||
print(f"[Inlet] 记录目标压缩进度: {target_compressed_count}")
|
||||
|
||||
# 加载摘要记录
|
||||
summary_record = await asyncio.to_thread(self._load_summary_record, chat_id)
|
||||
|
||||
|
||||
final_messages = []
|
||||
|
||||
|
||||
if summary_record:
|
||||
# 存在摘要,构建视图:[Head] + [Summary Message] + [Tail]
|
||||
# Tail 是从上次压缩点之后的所有消息
|
||||
compressed_count = summary_record.compressed_message_count
|
||||
|
||||
|
||||
# 确保 compressed_count 合理
|
||||
if compressed_count > len(messages):
|
||||
compressed_count = max(0, len(messages) - self.valves.keep_last)
|
||||
|
||||
|
||||
# 1. 头部消息 (Keep First)
|
||||
head_messages = []
|
||||
if self.valves.keep_first > 0:
|
||||
head_messages = messages[:self.valves.keep_first]
|
||||
|
||||
head_messages = messages[: self.valves.keep_first]
|
||||
|
||||
# 2. 摘要消息 (作为 User 消息插入)
|
||||
summary_content = (
|
||||
f"【系统提示:以下是历史对话的摘要,仅供参考上下文,请勿对摘要内容进行回复,直接回答后续的最新问题】\n\n"
|
||||
@@ -622,14 +630,14 @@ class Filter:
|
||||
f"以下是最近的对话:"
|
||||
)
|
||||
summary_msg = {"role": "user", "content": summary_content}
|
||||
|
||||
|
||||
# 3. 尾部消息 (Tail) - 从上次压缩点开始的所有消息
|
||||
# 注意:这里必须确保不重复包含头部消息
|
||||
start_index = max(compressed_count, self.valves.keep_first)
|
||||
tail_messages = messages[start_index:]
|
||||
|
||||
|
||||
final_messages = head_messages + [summary_msg] + tail_messages
|
||||
|
||||
|
||||
# 发送状态通知
|
||||
if __event_emitter__:
|
||||
await __event_emitter__(
|
||||
@@ -641,15 +649,17 @@ class Filter:
|
||||
},
|
||||
}
|
||||
)
|
||||
|
||||
|
||||
if self.valves.debug_mode:
|
||||
print(f"[Inlet] 应用摘要: Head({len(head_messages)}) + Summary + Tail({len(tail_messages)})")
|
||||
print(
|
||||
f"[Inlet] 应用摘要: Head({len(head_messages)}) + Summary + Tail({len(tail_messages)})"
|
||||
)
|
||||
else:
|
||||
# 没有摘要,使用原始消息
|
||||
final_messages = messages
|
||||
|
||||
body["messages"] = final_messages
|
||||
|
||||
|
||||
if self.valves.debug_mode:
|
||||
print(f"[Inlet] 最终发送: {len(body['messages'])} 条消息")
|
||||
print(f"{'='*60}\n")
|
||||
@@ -701,17 +711,19 @@ class Filter:
|
||||
"""
|
||||
try:
|
||||
messages = body.get("messages", [])
|
||||
|
||||
|
||||
# 获取当前模型的阈值配置
|
||||
thresholds = self._get_model_thresholds(model)
|
||||
compression_threshold_tokens = thresholds.get("compression_threshold_tokens", self.valves.compression_threshold_tokens)
|
||||
compression_threshold_tokens = thresholds.get(
|
||||
"compression_threshold_tokens", self.valves.compression_threshold_tokens
|
||||
)
|
||||
|
||||
if self.valves.debug_mode:
|
||||
print(f"\n[🔍 后台计算] 开始 Token 计数...")
|
||||
|
||||
# 在后台线程中计算 Token 数
|
||||
current_tokens = await asyncio.to_thread(
|
||||
self._calculate_messages_tokens, messages, model
|
||||
self._calculate_messages_tokens, messages
|
||||
)
|
||||
|
||||
if self.valves.debug_mode:
|
||||
@@ -762,18 +774,22 @@ class Filter:
|
||||
if target_compressed_count is None:
|
||||
target_compressed_count = max(0, len(messages) - self.valves.keep_last)
|
||||
if self.valves.debug_mode:
|
||||
print(f"[🤖 异步摘要任务] ⚠️ 无法获取 inlet 状态,使用当前消息数估算进度: {target_compressed_count}")
|
||||
|
||||
print(
|
||||
f"[🤖 异步摘要任务] ⚠️ 无法获取 inlet 状态,使用当前消息数估算进度: {target_compressed_count}"
|
||||
)
|
||||
|
||||
# 2. 确定待压缩的消息范围 (Middle)
|
||||
start_index = self.valves.keep_first
|
||||
end_index = len(messages) - self.valves.keep_last
|
||||
if self.valves.keep_last == 0:
|
||||
end_index = len(messages)
|
||||
|
||||
|
||||
# 确保索引有效
|
||||
if start_index >= end_index:
|
||||
if self.valves.debug_mode:
|
||||
print(f"[🤖 异步摘要任务] 中间消息为空 (Start: {start_index}, End: {end_index}),跳过")
|
||||
print(
|
||||
f"[🤖 异步摘要任务] 中间消息为空 (Start: {start_index}, End: {end_index}),跳过"
|
||||
)
|
||||
return
|
||||
|
||||
middle_messages = messages[start_index:end_index]
|
||||
@@ -784,36 +800,48 @@ class Filter:
|
||||
# 3. 检查 Token 上限并截断 (Max Context Truncation)
|
||||
# [优化] 使用摘要模型(如果有)的阈值来决定能处理多少中间消息
|
||||
# 这样可以用长窗口模型(如 gemini-flash)来压缩超过当前模型窗口的历史记录
|
||||
summary_model_id = self.valves.summary_model or body.get("model", "gpt-3.5-turbo")
|
||||
|
||||
summary_model_id = self.valves.summary_model or body.get("model")
|
||||
|
||||
thresholds = self._get_model_thresholds(summary_model_id)
|
||||
# 注意:这里使用的是摘要模型的最大上下文限制
|
||||
max_context_tokens = thresholds.get("max_context_tokens", self.valves.max_context_tokens)
|
||||
|
||||
max_context_tokens = thresholds.get(
|
||||
"max_context_tokens", self.valves.max_context_tokens
|
||||
)
|
||||
|
||||
if self.valves.debug_mode:
|
||||
print(f"[🤖 异步摘要任务] 使用模型 {summary_model_id} 的上限: {max_context_tokens} Tokens")
|
||||
|
||||
print(
|
||||
f"[🤖 异步摘要任务] 使用模型 {summary_model_id} 的上限: {max_context_tokens} Tokens"
|
||||
)
|
||||
|
||||
# 计算当前总 Token (使用摘要模型进行计数)
|
||||
total_tokens = await asyncio.to_thread(self._calculate_messages_tokens, messages, summary_model_id)
|
||||
|
||||
total_tokens = await asyncio.to_thread(
|
||||
self._calculate_messages_tokens, messages, summary_model_id
|
||||
)
|
||||
|
||||
if total_tokens > max_context_tokens:
|
||||
excess_tokens = total_tokens - max_context_tokens
|
||||
if self.valves.debug_mode:
|
||||
print(f"[🤖 异步摘要任务] ⚠️ 总 Token ({total_tokens}) 超过摘要模型上限 ({max_context_tokens}),需要移除约 {excess_tokens} Token")
|
||||
|
||||
print(
|
||||
f"[🤖 异步摘要任务] ⚠️ 总 Token ({total_tokens}) 超过摘要模型上限 ({max_context_tokens}),需要移除约 {excess_tokens} Token"
|
||||
)
|
||||
|
||||
# 从 middle_messages 头部开始移除
|
||||
removed_tokens = 0
|
||||
removed_count = 0
|
||||
|
||||
|
||||
while removed_tokens < excess_tokens and middle_messages:
|
||||
msg_to_remove = middle_messages.pop(0)
|
||||
msg_tokens = self._count_tokens(str(msg_to_remove.get("content", "")), summary_model_id)
|
||||
msg_tokens = self._count_tokens(
|
||||
str(msg_to_remove.get("content", ""))
|
||||
)
|
||||
removed_tokens += msg_tokens
|
||||
removed_count += 1
|
||||
|
||||
|
||||
if self.valves.debug_mode:
|
||||
print(f"[🤖 异步摘要任务] 已移除 {removed_count} 条消息,共 {removed_tokens} Token")
|
||||
|
||||
print(
|
||||
f"[🤖 异步摘要任务] 已移除 {removed_count} 条消息,共 {removed_tokens} Token"
|
||||
)
|
||||
|
||||
if not middle_messages:
|
||||
if self.valves.debug_mode:
|
||||
print(f"[🤖 异步摘要任务] 截断后中间消息为空,跳过摘要生成")
|
||||
@@ -824,7 +852,7 @@ class Filter:
|
||||
|
||||
# 5. 调用 LLM 生成新摘要
|
||||
# 注意:这里不再传入 previous_summary,因为旧摘要(如果有)已经包含在 middle_messages 里了
|
||||
|
||||
|
||||
# 发送开始生成摘要的状态通知
|
||||
if __event_emitter__:
|
||||
await __event_emitter__(
|
||||
@@ -837,13 +865,17 @@ class Filter:
|
||||
}
|
||||
)
|
||||
|
||||
new_summary = await self._call_summary_llm(None, conversation_text, body, user_data)
|
||||
new_summary = await self._call_summary_llm(
|
||||
None, conversation_text, body, user_data
|
||||
)
|
||||
|
||||
# 6. 保存新摘要
|
||||
if self.valves.debug_mode:
|
||||
print("[优化] 正在后台线程中保存摘要,以避免阻塞事件循环。")
|
||||
|
||||
await asyncio.to_thread(self._save_summary, chat_id, new_summary, target_compressed_count)
|
||||
|
||||
await asyncio.to_thread(
|
||||
self._save_summary, chat_id, new_summary, target_compressed_count
|
||||
)
|
||||
|
||||
# 发送完成状态通知
|
||||
if __event_emitter__:
|
||||
@@ -851,7 +883,7 @@ class Filter:
|
||||
{
|
||||
"type": "status",
|
||||
"data": {
|
||||
"description": f"上下文摘要已更新 (节省 {len(middle_messages)} 条消息)",
|
||||
"description": f"上下文摘要已更新 (已压缩 {len(middle_messages)} 条消息)",
|
||||
"done": True,
|
||||
},
|
||||
}
|
||||
@@ -859,11 +891,14 @@ class Filter:
|
||||
|
||||
if self.valves.debug_mode:
|
||||
print(f"[🤖 异步摘要任务] ✅ 完成!新摘要长度: {len(new_summary)} 字符")
|
||||
print(f"[🤖 异步摘要任务] 进度更新: 已压缩至原始第 {target_compressed_count} 条消息")
|
||||
print(
|
||||
f"[🤖 异步摘要任务] 进度更新: 已压缩至原始第 {target_compressed_count} 条消息"
|
||||
)
|
||||
|
||||
except Exception as e:
|
||||
print(f"[🤖 异步摘要任务] ❌ 错误: {str(e)}")
|
||||
import traceback
|
||||
|
||||
traceback.print_exc()
|
||||
|
||||
def _format_messages_for_summary(self, messages: list) -> str:
|
||||
@@ -893,7 +928,11 @@ class Filter:
|
||||
return "\n\n".join(formatted)
|
||||
|
||||
async def _call_summary_llm(
|
||||
self, previous_summary: Optional[str], new_conversation_text: str, body: dict, user_data: dict
|
||||
self,
|
||||
previous_summary: Optional[str],
|
||||
new_conversation_text: str,
|
||||
body: dict,
|
||||
user_data: dict,
|
||||
) -> str:
|
||||
"""
|
||||
使用 Open WebUI 内置方法调用 LLM 生成摘要
|
||||
@@ -960,7 +999,7 @@ class Filter:
|
||||
if self.valves.debug_mode:
|
||||
print("[优化] 正在后台线程中获取用户对象,以避免阻塞事件循环。")
|
||||
user = await asyncio.to_thread(Users.get_user_by_id, user_id)
|
||||
|
||||
|
||||
if not user:
|
||||
raise ValueError(f"无法找到用户: {user_id}")
|
||||
|
||||
|
||||
Reference in New Issue
Block a user