feat: 更新插件作者信息并将深度阅读插件本地化为英文

This commit is contained in:
fujie
2025-12-20 14:27:37 +08:00
parent eaa6319991
commit 39eb7d00ee
11 changed files with 353 additions and 479 deletions

View File

@@ -1,9 +1,11 @@
"""
title: 导出到Excel
title: Export to Excel
author: Fu-Jie
description: 从最后一条AI回答消息中提取Markdown表格到Excel文件并在浏览器中触发下载。支持多表并自动根据标题命名
icon_url: data:image/svg+xml;base64,PD94bWwgdmVyc2lvbj0iMS4wIiBlbmNvZGluZz0idXRmLTgiPz48IS0tIFVwbG9hZGVkIHRvOiBTVkcgUmVwbywgd3d3LnN2Z3JlcG8uY29tLCBHZW5lcmF0b3I6IFNWRyBSZXBvIE1peGVyIFRvb2xzIC0tPgo8c3ZnIHdpZHRoPSI4MDBweCIgaGVpZ2h0PSI4MDBweCIgdmlld0JveD0iMCAtMS4yNyAxMTAuMDM3IDExMC4wMzciIHhtbG5zPSJodHRwOi8vd3d3LnczLm9yZy8yMDAwL3N2ZyI+PHBhdGggZD0iTTU3LjU1IDBoNy40MjV2MTBjMTIuNTEzIDAgMjUuMDI1LjAyNSAzNy41MzctLjAzOCAyLjExMy4wODcgNC40MzgtLjA2MiA2LjI3NSAxLjIgMS4yODcgMS44NSAxLjEzOCA0LjIgMS4yMjUgNi4zMjUtLjA2MiAyMS43LS4wMzcgNDMuMzg4LS4wMjQgNjUuMDc1LS4wNjIgMy42MzguMzM3IDcuMzUtLjQyNSAxMC45MzgtLjUgMi42LTMuNjI1IDIuNjYyLTUuNzEzIDIuNzUtMTIuOTUuMDM3LTI1LjkxMi0uMDI1LTM4Ljg3NSAwdjExLjI1aC03Ljc2M2MtMTkuMDUtMy40NjMtMzguMTM4LTYuNjYyLTU3LjIxMi0xMFYxMC4wMTNDMTkuMTg4IDYuNjc1IDM4LjM3NSAzLjM4OCA1Ny41NSAweiIgZmlsbD0iIzIwNzI0NSIvPjxwYXRoIGQ9Ik02NC45NzUgMTMuNzVoNDEuMjVWOTIuNWgtNDEuMjVWODVoMTB2LTguNzVoLTEwdi01aDEwVjYyLjVoLTEwdi01aDEwdi04Ljc1aC0xMHYtNWgxMFYzNWgtMTB2LTVoMTB2LTguNzVoLTEwdi03LjV6IiBmaWxsPSIjZmZmZmZmIi8+PHBhdGggZD0iTTc5Ljk3NSAyMS4yNWgxNy41VjMwaC0xNy41di04Ljc1eiIgZmlsbD0iIzIwNzI0NSIvPjxwYXRoIGQ9Ik0zNy4wMjUgMzIuOTYyYzIuODI1LS4yIDUuNjYzLS4zNzUgOC41LS41MTJhMjYwNy4zNDQgMjYwNy4zNDQgMCAwIDEtMTAuMDg3IDIwLjQ4N2MzLjQzOCA3IDYuOTQ5IDEzLjk1IDEwLjM5OSAyMC45NSBhNzE2LjI4IDcxNi4yOCAwIDAgMS05LjAyNC0uNTc1Yy0yLjEyNS01LjIxMy00LjcxMy0xMC4yNS02LjIzOC0xNS43Yy0xLjY5OSA1LjA3NS00LjEyNSA5Ljg2Mi02LjA3NCAxNC44MzgtMi43MzgtLjAzOC01LjQ3Ni0uMTUtOC4yMTMtLjI2M0MxOS41IDY1LjkgMjIuNiA1OS41NjIgMjUuOTEyIDUzLjMxMmMtMi44MTItNi40MzgtNS45LTEyLjc1LTguOC0xOS4xNSAyLjc1LS4xNjMgNS41LS4zMjUgOC4yNS0uNDc1IDEuODYyIDQuODg4IDMuODk5IDkuNzEyIDUuNDM4IDE0LjcyNSAxLjY0OS01LjMxMiA0LjExMi0xMC4zMTIgNi4yMjUtMTUuNDV6IiBmaWxsPSIjZmZmZmZmIi8+PHBhdGggZD0iTTc5Ljk3NSAzNWgxNy41djguNzVoLTE3LjVWMzV6TTc5Ljk3NSA0OC43NWgxNy41djguNzVoLTE3LjV2LTguNzV6TTc5Ljk3NSA2Mi41aDE3LjV2OC43NWgtMTcuNVY2Mi41ek03OS45NzUgNzYuMjVoMTcuNVY4NWgtMTcuNXYtOC43NXoiIGZpbGw9IiMyMDcyNDUiLz48L3N2Zz4=
version: 0.1.0
author_url: https://github.com/Fu-Jie
funding_url: https://github.com/Fu-Jie/awesome-openwebui
version: 0.3.3
icon_url: data:image/svg+xml;base64,PHN2ZyB4bWxucz0iaHR0cDovL3d3dy53My5vcmcvMjAwMC9zdmciIHdpZHRoPSIyNCIgaGVpZ2h0PSIyNCIgdmlld0JveD0iMCAwIDI0IDI0IiBmaWxsPSJub25lIiBzdHJva2U9ImN1cnJlbnRDb2xvciIgc3Ryb2tlLXdpZHRoPSIyIiBzdHJva2UtbGluZWNhcD0icm91bmQiIHN0cm9rZS1saW5lam9pbj0icm91bmQiPjxwYXRoIGQ9Ik0xNCAyaDZhMiAyIDAgMCAxIDIgMnYxNmEyIDAgMCAwIDEtMiAySDZhMiAyIDAgMCAxLTItMnYtNSIvPjxwb2x5bGluZSBwb2ludHM9IjE0IDIgMTQgOCAyMCA4Ii8+PHBhdGggZD0iTTE2IDEzdjgiLz48cGF0aCBkPSJNOCAxM3Y4Ii8+PHBhdGggZD0iTTEyIDEzdjgiLz48cGF0aCBkPSJNMTYgMTdoLTgiLz48cGF0aCBkPSJNMTYgMjFoLTgiLz48cGF0aCBkPSJNMTYgMTNoLTgiLz48L3N2Zz4=
description: Exports the current chat history to an Excel (.xlsx) file, with automatic header extraction.
"""
import os
@@ -37,17 +39,17 @@ class Action:
print(f"action:{__name__}")
if isinstance(__user__, (list, tuple)):
user_language = (
__user__[0].get("language", "zh-CN") if __user__ else "zh-CN"
__user__[0].get("language", "en-US") if __user__ else "en-US"
)
user_name = __user__[0].get("name", "用户") if __user__[0] else "用户"
user_name = __user__[0].get("name", "User") if __user__[0] else "User"
user_id = (
__user__[0]["id"]
if __user__ and "id" in __user__[0]
else "unknown_user"
)
elif isinstance(__user__, dict):
user_language = __user__.get("language", "zh-CN")
user_name = __user__.get("name", "用户")
user_language = __user__.get("language", "en-US")
user_name = __user__.get("name", "User")
user_id = __user__.get("id", "unknown_user")
if __event_emitter__:
@@ -56,7 +58,7 @@ class Action:
await __event_emitter__(
{
"type": "status",
"data": {"description": "正在保存到文件...", "done": False},
"data": {"description": "Saving to file...", "done": False},
}
)
@@ -65,18 +67,18 @@ class Action:
tables = self.extract_tables_from_message(message_content)
if not tables:
raise HTTPException(status_code=400, detail="未找到任何表格。")
raise HTTPException(status_code=400, detail="No tables found.")
# 获取动态文件名和sheet名称
# Get dynamic filename and sheet names
workbook_name, sheet_names = self.generate_names_from_content(
message_content, tables
)
# 使用优化后的文件名生成逻辑
# Use optimized filename generation logic
current_datetime = datetime.datetime.now()
formatted_date = current_datetime.strftime("%Y%m%d")
# 如果没找到标题则使用 user_yyyymmdd 格式
# If no title found, use user_yyyymmdd format
if not workbook_name:
workbook_name = f"{user_name}_{formatted_date}"
@@ -87,10 +89,10 @@ class Action:
os.makedirs(os.path.dirname(excel_file_path), exist_ok=True)
# 保存表格到Excel使用符合中国规范的格式化功能
# Save tables to Excel (using enhanced formatting)
self.save_tables_to_excel_enhanced(tables, excel_file_path, sheet_names)
# 触发文件下载
# Trigger file download
if __event_call__:
with open(excel_file_path, "rb") as file:
file_content = file.read()
@@ -121,7 +123,7 @@ class Action:
URL.revokeObjectURL(url);
document.body.removeChild(a);
}} catch (error) {{
console.error('触发下载时出错:', error);
console.error('Error triggering download:', error);
}}
"""
},
@@ -130,15 +132,15 @@ class Action:
await __event_emitter__(
{
"type": "status",
"data": {"description": "输出已保存", "done": True},
"data": {"description": "File saved", "done": True},
}
)
# 清理临时文件
# Clean up temp file
if os.path.exists(excel_file_path):
os.remove(excel_file_path)
return {"message": "下载事件已触发"}
return {"message": "Download triggered"}
except HTTPException as e:
print(f"Error processing tables: {str(e.detail)}")
@@ -146,13 +148,13 @@ class Action:
{
"type": "status",
"data": {
"description": f"保存文件时出错: {e.detail}",
"description": f"Error saving file: {e.detail}",
"done": True,
},
}
)
await self._send_notification(
__event_emitter__, "error", "没有找到可以导出的表格!"
__event_emitter__, "error", "No tables found to export!"
)
raise e
except Exception as e:
@@ -161,22 +163,22 @@ class Action:
{
"type": "status",
"data": {
"description": f"保存文件时出错: {str(e)}",
"description": f"Error saving file: {str(e)}",
"done": True,
},
}
)
await self._send_notification(
__event_emitter__, "error", "没有找到可以导出的表格!"
__event_emitter__, "error", "No tables found to export!"
)
def extract_tables_from_message(self, message: str) -> List[Dict]:
"""
从消息文本中提取Markdown表格及位置信息
返回结构: [{
"data": 表格数据,
"start_line": 起始行号,
"end_line": 结束行号
Extract Markdown tables and their positions from message text
Returns structure: [{
"data": table data,
"start_line": start line number,
"end_line": end line number
}]
"""
table_row_pattern = r"^\s*\|.*\|.*\s*$"
@@ -190,17 +192,17 @@ class Action:
current_line += 1
if re.search(table_row_pattern, row):
if start_line is None:
start_line = current_line # 记录表格起始行
start_line = current_line # Record table start line
# 处理表格行
# Process table row
cells = [cell.strip() for cell in row.strip().strip("|").split("|")]
# 跳过分隔行
# Skip separator row
is_separator_row = all(re.fullmatch(r"[:\-]+", cell) for cell in cells)
if not is_separator_row:
current_table.append(cells)
elif current_table:
# 表格结束
# Table ends
tables.append(
{
"data": current_table,
@@ -211,7 +213,7 @@ class Action:
current_table = []
start_line = None
# 处理最后一个表格
# Process the last table
if current_table:
tables.append(
{
@@ -225,106 +227,106 @@ class Action:
def generate_names_from_content(self, content: str, tables: List[Dict]) -> tuple:
"""
根据内容生成工作簿名称和sheet名称
- 忽略非空段落,只使用 markdown 标题 (h1-h6)
- 单表格: 使用最近的标题作为工作簿和工作表名。
- 多表格: 使用文档第一个标题作为工作簿名,各表格最近的标题作为工作表名。
- 默认命名:
- 工作簿: 在主流程中处理 (user_yyyymmdd.xlsx)
- 工作表: 表1, 表2, ...
Generate workbook name and sheet names based on content
- Ignore non-empty paragraphs, only use markdown headers (h1-h6).
- Single table: Use the closest header as workbook and sheet name.
- Multiple tables: Use the first header in the document as workbook name, and closest header for each table as sheet name.
- Default naming:
- Workbook: Handled in main flow (user_yyyymmdd.xlsx).
- Sheet: Sheet1, Sheet2, ...
"""
lines = content.split("\n")
workbook_name = ""
sheet_names = []
all_headers = []
# 1. 查找文档中所有 h1-h6 标题及其位置
# 1. Find all h1-h6 headers and their positions
for i, line in enumerate(lines):
if re.match(r"^#{1,6}\s+", line):
all_headers.append(
{"text": re.sub(r"^#{1,6}\s+", "", line).strip(), "line_num": i}
)
# 2. 为每个表格生成 sheet 名称
# 2. Generate sheet name for each table
for i, table in enumerate(tables):
table_start_line = table["start_line"] - 1 # 转换为 0-based 索引
table_start_line = table["start_line"] - 1 # Convert to 0-based index
closest_header_text = None
# 查找当前表格上方最近的标题
# Find closest header above current table
candidate_headers = [
h for h in all_headers if h["line_num"] < table_start_line
]
if candidate_headers:
# 找到候选标题中行号最大的,即为最接近的
# Find the header with the largest line number among candidates
closest_header = max(candidate_headers, key=lambda x: x["line_num"])
closest_header_text = closest_header["text"]
if closest_header_text:
# 清理并添加找到的标题
# Clean and add found header
sheet_names.append(self.clean_sheet_name(closest_header_text))
else:
# 如果找不到标题,使用默认名称 "表{i+1}"
sheet_names.append(f"{i+1}")
# If no header found, use default name "Sheet{i+1}"
sheet_names.append(f"Sheet{i+1}")
# 3. 根据表格数量确定工作簿名称
# 3. Determine workbook name based on table count
if len(tables) == 1:
# 单个表格: 使用其工作表名作为工作簿名 (前提是该名称不是默认的 "表1")
if sheet_names[0] != "1":
# Single table: Use its sheet name as workbook name (if not default "Sheet1")
if sheet_names[0] != "Sheet1":
workbook_name = sheet_names[0]
elif len(tables) > 1:
# 多个表格: 使用文档中的第一个标题作为工作簿名
# Multiple tables: Use the first header in the document as workbook name
if all_headers:
# 找到所有标题中行号最小的,即为第一个标题
# Find header with smallest line number
first_header = min(all_headers, key=lambda x: x["line_num"])
workbook_name = first_header["text"]
# 4. 清理工作簿名称 (如果为空,主流程会使用默认名称)
# 4. Clean workbook name (if empty, main flow will use default name)
workbook_name = self.clean_filename(workbook_name) if workbook_name else ""
return workbook_name, sheet_names
def clean_filename(self, name: str) -> str:
"""清理文件名中的非法字符"""
"""Clean illegal characters in filename"""
return re.sub(r'[\\/*?:"<>|]', "", name).strip()
def clean_sheet_name(self, name: str) -> str:
"""清理sheet名称(限制31字符,去除非法字符)"""
"""Clean sheet name (limit 31 chars, remove illegal chars)"""
name = re.sub(r"[\\/*?[\]:]", "", name).strip()
return name[:31] if len(name) > 31 else name
# ======================== 符合中国规范的格式化功能 ========================
# ======================== Enhanced Formatting ========================
def calculate_text_width(self, text: str) -> float:
"""
计算文本显示宽度,考虑中英文字符差异
中文字符按2个单位计算英文字符按1个单位计算
Calculate text display width, considering CJK characters
CJK characters count as 2 units, others as 1 unit
"""
if not text:
return 0
width = 0
for char in str(text):
# 判断是否为中文字符(包括中文标点)
# Check if CJK character
if "\u4e00" <= char <= "\u9fff" or "\u3000" <= char <= "\u303f":
width += 2 # 中文字符占2个单位宽度
width += 2
else:
width += 1 # 英文字符占1个单位宽度
width += 1
return width
def calculate_text_height(self, text: str, max_width: int = 50) -> int:
"""
计算文本显示所需的行数
根据换行符和文本长度计算
Calculate required lines for text display
Based on newlines and text length
"""
if not text:
return 1
text = str(text)
# 计算换行符导致的行数
# Calculate lines from newlines
explicit_lines = text.count("\n") + 1
# 计算因文本长度超出而需要的额外行数
# Calculate extra lines from wrapping
text_width = self.calculate_text_width(text.replace("\n", ""))
wrapped_lines = max(
1, int(text_width / max_width) + (1 if text_width % max_width > 0 else 0)
@@ -334,7 +336,7 @@ class Action:
def get_column_letter(self, col_index: int) -> str:
"""
将列索引转换为Excel列字母 (A, B, C, ..., AA, AB, ...)
Convert column index to Excel column letter (A, B, C, ..., AA, AB, ...)
"""
result = ""
while col_index >= 0:
@@ -344,44 +346,42 @@ class Action:
def determine_content_type(self, header: str, values: list) -> str:
"""
根据表头和内容智能判断数据类型,符合中国官方表格规范
返回: 'number', 'date', 'sequence', 'text'
Intelligently determine data type based on header and content
Returns: 'number', 'date', 'sequence', 'text'
"""
header_lower = str(header).lower().strip()
# 检查表头关键词
# Check header keywords
number_keywords = [
"数量",
"金额",
"价格",
"费用",
"成本",
"收入",
"支出",
"总计",
"小计",
"百分比",
"quantity",
"amount",
"price",
"cost",
"revenue",
"expense",
"total",
"subtotal",
"percentage",
"%",
"比例",
"",
"数值",
"分数",
"成绩",
"得分",
"ratio",
"rate",
"value",
"score",
"points",
]
date_keywords = ["日期", "时间", "年份", "月份", "时刻", "date", "time"]
date_keywords = ["date", "time", "year", "month", "moment"]
sequence_keywords = [
"序号",
"编号",
"号码",
"排序",
"次序",
"顺序",
"no",
"no.",
"id",
"编码",
"index",
"rank",
"order",
"sequence",
"code",
]
# 检查表头
# Check header
for keyword in number_keywords:
if keyword in header_lower:
return "number"
@@ -394,13 +394,13 @@ class Action:
if keyword in header_lower:
return "sequence"
# 检查数据内容
# Check data content
if not values:
return "text"
sample_values = [
str(v).strip() for v in values[:10] if str(v).strip()
] # 取前10个非空值作为样本
] # Use first 10 non-empty values as sample
if not sample_values:
return "text"
@@ -409,22 +409,17 @@ class Action:
sequence_count = 0
for value in sample_values:
# 检查是否为数字
# Check if number
try:
float(
value.replace(",", "")
.replace("", "")
.replace("%", "")
.replace("", "")
)
float(value.replace(",", "").replace("%", ""))
numeric_count += 1
continue
except ValueError:
pass
# 检查是否为日期格式
# Check if date format
date_patterns = [
r"\d{4}[-/]\d{1,2}[-/]\d{1,2}日?",
r"\d{4}[-/]\d{1,2}[-/]\d{1,2}",
r"\d{1,2}[-/]\d{1,2}[-/]\d{4}",
r"\d{4}\d{2}\d{2}",
]
@@ -433,15 +428,15 @@ class Action:
date_count += 1
break
# 检查是否为序号格式
# Check if sequence format
if (
re.match(r"^\d+$", value) and len(value) <= 4
): # 纯数字且不超过4位可能是序号
): # Pure digits and <= 4 chars, likely sequence
sequence_count += 1
total_count = len(sample_values)
# 根据比例判断类型
# Determine type based on ratio
if numeric_count / total_count >= 0.7:
return "number"
elif date_count / total_count >= 0.7:
@@ -451,27 +446,17 @@ class Action:
else:
return "text"
def get_column_letter(self, col_index: int) -> str:
"""
将列索引转换为Excel列字母 (A, B, C, ..., AA, AB, ...)
"""
result = ""
while col_index >= 0:
result = chr(65 + col_index % 26) + result
col_index = col_index // 26 - 1
return result
def save_tables_to_excel_enhanced(
self, tables: List[Dict], file_path: str, sheet_names: List[str]
):
"""
符合中国官方表格规范的Excel保存功能
Enhanced Excel saving function with standard formatting
"""
try:
with pd.ExcelWriter(file_path, engine="xlsxwriter") as writer:
workbook = writer.book
# 定义表头样式 - 居中对齐(符合中国规范)
# Define header style - Center aligned
header_format = workbook.add_format(
{
"bold": True,
@@ -479,62 +464,62 @@ class Action:
"font_color": "white",
"bg_color": "#00abbd",
"border": 1,
"align": "center", # 表头居中
"align": "center",
"valign": "vcenter",
"text_wrap": True,
}
)
# 文本单元格样式 - 左对齐
# Text cell style - Left aligned
text_format = workbook.add_format(
{
"border": 1,
"align": "left", # 文本左对齐
"align": "left",
"valign": "vcenter",
"text_wrap": True,
}
)
# 数值单元格样式 - 右对齐
# Number cell style - Right aligned
number_format = workbook.add_format(
{"border": 1, "align": "right", "valign": "vcenter"} # 数值右对齐
{"border": 1, "align": "right", "valign": "vcenter"}
)
# 整数格式 - 右对齐
# Integer format - Right aligned
integer_format = workbook.add_format(
{
"num_format": "0",
"border": 1,
"align": "right", # 整数右对齐
"align": "right",
"valign": "vcenter",
}
)
# 小数格式 - 右对齐
# Decimal format - Right aligned
decimal_format = workbook.add_format(
{
"num_format": "0.00",
"border": 1,
"align": "right", # 小数右对齐
"align": "right",
"valign": "vcenter",
}
)
# 日期格式 - 居中对齐
# Date format - Center aligned
date_format = workbook.add_format(
{
"border": 1,
"align": "center", # 日期居中对齐
"align": "center",
"valign": "vcenter",
"text_wrap": True,
}
)
# 序号格式 - 居中对齐
# Sequence format - Center aligned
sequence_format = workbook.add_format(
{
"border": 1,
"align": "center", # 序号居中对齐
"align": "center",
"valign": "vcenter",
}
)
@@ -548,12 +533,12 @@ class Action:
print(f"Processing table {i+1} with {len(table_data)} rows")
# 获取sheet名称
# Get sheet name
sheet_name = (
sheet_names[i] if i < len(sheet_names) else f"{i+1}"
sheet_names[i] if i < len(sheet_names) else f"Sheet{i+1}"
)
# 创建DataFrame
# Create DataFrame
headers = [
str(cell).strip()
for cell in table_data[0]
@@ -561,7 +546,7 @@ class Action:
]
if not headers:
print(f"Warning: No valid headers found for table {i+1}")
headers = [f"{j+1}" for j in range(len(table_data[0]))]
headers = [f"Col{j+1}" for j in range(len(table_data[0]))]
data_rows = []
if len(table_data) > 1:
@@ -580,14 +565,14 @@ class Action:
print(f"DataFrame created with columns: {list(df.columns)}")
# 修复pandas FutureWarning - 使用try-except替代errors='ignore'
# Fix pandas FutureWarning
for col in df.columns:
try:
df[col] = pd.to_numeric(df[col])
except (ValueError, TypeError):
pass
# 先写入数据(不包含表头)
# Write data first (without header)
df.to_excel(
writer,
sheet_name=sheet_name,
@@ -597,8 +582,8 @@ class Action:
)
worksheet = writer.sheets[sheet_name]
# 应用符合中国规范的格式化
self.apply_chinese_standard_formatting(
# Apply enhanced formatting
self.apply_enhanced_formatting(
worksheet,
df,
headers,
@@ -620,7 +605,7 @@ class Action:
print(f"Error saving Excel file: {str(e)}")
raise
def apply_chinese_standard_formatting(
def apply_enhanced_formatting(
self,
worksheet,
df,
@@ -635,24 +620,24 @@ class Action:
sequence_format,
):
"""
应用符合中国官方表格规范的格式化
- 表头: 居中对齐
- 数值: 右对齐
- 文本: 左对齐
- 日期: 居中对齐
- 序号: 居中对齐
Apply enhanced formatting
- Header: Center aligned
- Number: Right aligned
- Text: Left aligned
- Date: Center aligned
- Sequence: Center aligned
"""
try:
# 1. 写入表头(居中对齐)
print(f"Writing headers with Chinese standard alignment: {headers}")
# 1. Write headers (Center aligned)
print(f"Writing headers with enhanced alignment: {headers}")
for col_idx, header in enumerate(headers):
if header and str(header).strip():
worksheet.write(0, col_idx, str(header).strip(), header_format)
else:
default_header = f"{col_idx+1}"
default_header = f"Col{col_idx+1}"
worksheet.write(0, col_idx, default_header, header_format)
# 2. 分析每列的数据类型并应用相应格式
# 2. Analyze column types
column_types = {}
for col_idx, column in enumerate(headers):
if col_idx < len(df.columns):
@@ -666,14 +651,14 @@ class Action:
else:
column_types[col_idx] = "text"
# 3. 写入并格式化数据(根据类型使用不同对齐方式)
# 3. Write and format data
for row_idx, row in df.iterrows():
for col_idx, value in enumerate(row):
content_type = column_types.get(col_idx, "text")
# 根据内容类型选择格式
# Select format based on content type
if content_type == "number":
# 数值类型 - 右对齐
# Number - Right aligned
if pd.api.types.is_numeric_dtype(df.iloc[:, col_idx]):
if pd.api.types.is_integer_dtype(df.iloc[:, col_idx]):
current_format = integer_format
@@ -691,49 +676,45 @@ class Action:
current_format = number_format
elif content_type == "date":
# 日期类型 - 居中对齐
# Date - Center aligned
current_format = date_format
elif content_type == "sequence":
# 序号类型 - 居中对齐
# Sequence - Center aligned
current_format = sequence_format
else:
# 文本类型 - 左对齐
# Text - Left aligned
current_format = text_format
worksheet.write(row_idx + 1, col_idx, value, current_format)
# 4. 自动调整列宽
# 4. Auto-adjust column width
for col_idx, column in enumerate(headers):
col_letter = self.get_column_letter(col_idx)
# 计算表头宽度
# Calculate header width
header_width = self.calculate_text_width(str(column))
# 计算数据列的最大宽度
# Calculate max data width
max_data_width = 0
if not df.empty and col_idx < len(df.columns):
for value in df.iloc[:, col_idx]:
value_width = self.calculate_text_width(str(value))
max_data_width = max(max_data_width, value_width)
# 基础宽度:取表头和数据的最大宽度
# Base width
base_width = max(header_width, max_data_width)
# 根据内容类型调整宽度
# Adjust width based on type
content_type = column_types.get(col_idx, "text")
if content_type == "sequence":
# 序号列通常比较窄
optimal_width = max(8, min(15, base_width + 2))
elif content_type == "number":
# 数值列需要额外空间显示数字
optimal_width = max(12, min(25, base_width + 3))
elif content_type == "date":
# 日期列需要固定宽度
optimal_width = max(15, min(20, base_width + 2))
else:
# 文本列根据内容调整
if base_width <= 10:
optimal_width = base_width + 3
elif base_width <= 20:
@@ -744,13 +725,11 @@ class Action:
worksheet.set_column(f"{col_letter}:{col_letter}", optimal_width)
# 5. 自动调整行高
# 设置表头行高为35点
# 5. Auto-adjust row height
worksheet.set_row(0, 35)
# 设置数据行行高
for row_idx, row in df.iterrows():
max_row_height = 20 # 中国表格规范建议的最小行高
max_row_height = 20
for col_idx, value in enumerate(row):
if col_idx < len(headers):
@@ -764,26 +743,24 @@ class Action:
col_width = 15
cell_lines = self.calculate_text_height(str(value), col_width)
cell_height = cell_lines * 20 # 每行20点高度符合中国规范
cell_height = cell_lines * 20
max_row_height = max(max_row_height, cell_height)
final_height = min(120, max_row_height)
worksheet.set_row(row_idx + 1, final_height)
print(f"Successfully applied Chinese standard formatting")
print(f"Successfully applied enhanced formatting")
except Exception as e:
print(f"Warning: Failed to apply Chinese standard formatting: {str(e)}")
# 降级到基础格式化
print(f"Warning: Failed to apply enhanced formatting: {str(e)}")
self.apply_basic_formatting_fallback(worksheet, df)
def apply_basic_formatting_fallback(self, worksheet, df):
"""
基础格式化降级方案
Basic formatting fallback
"""
try:
# 基础列宽调整
for i, column in enumerate(df.columns):
column_width = (
max(
@@ -798,7 +775,5 @@ class Action:
f"{col_letter}:{col_letter}", min(60, max(10, column_width))
)
print("Applied basic formatting fallback")
except Exception as e:
print(f"Warning: Even basic formatting failed: {str(e)}")
print(f"Error in basic formatting: {str(e)}")

View File

@@ -1,8 +1,8 @@
"""
title: 导出为 Excel
author: Antigravity
author_url: https://github.com/open-webui
funding_url: https://github.com/open-webui
author: Fu-Jie
author_url: https://github.com/Fu-Jie
funding_url: https://github.com/Fu-Jie/awesome-openwebui
version: 0.3.3
icon_url: data:image/svg+xml;base64,PHN2ZyB4bWxucz0iaHR0cDovL3d3dy53My5vcmcvMjAwMC9zdmciIHdpZHRoPSIyNCIgaGVpZ2h0PSIyNCIgdmlld0JveD0iMCAwIDI0IDI0IiBmaWxsPSJub25lIiBzdHJva2U9ImN1cnJlbnRDb2xvciIgc3Ryb2tlLXdpZHRoPSIyIiBzdHJva2UtbGluZWNhcD0icm91bmQiIHN0cm9rZS1saW5lam9pbj0icm91bmQiPjxwYXRoIGQ9Ik0xNCAyaDZhMiAyIDAgMCAxIDIgMnYxNmEyIDIgMCAwIDEtMiAyaC02YTIgMiAwIDAgMS0yLTJ2LTVhMiAyIDAgMCAxLTItMnYtNSIvPjxwb2x5bGluZSBwb2ludHM9IjE0IDIgMTQgOCAyMCA4Ii8+PHBhdGggZD0iTTE2IDEzdjgiLz48cGF0aCBkPSJNOCAxM3Y4Ii8+PHBhdGggZD0iTTEyIDEzdjgiLz48cGF0aCBkPSJNMTYgMTdoLTgiLz48cGF0aCBkPSJNMTYgMjFoLTgiLz48cGF0aCBkPSJNMTYgMTNoLTgiLz48L3N2Zz4=
description: 将当前对话历史导出为 Excel (.xlsx) 文件支持自动提取表头

View File

@@ -1,11 +1,11 @@
"""
title: 闪记卡 (Flash Card)
author: Antigravity
author_url: https://github.com/open-webui
funding_url: https://github.com/open-webui
title: Flash Card
author: Fu-Jie
author_url: https://github.com/Fu-Jie
funding_url: https://github.com/Fu-Jie/awesome-openwebui
version: 0.2.1
icon_url: data:image/svg+xml;base64,PHN2ZyB4bWxucz0iaHR0cDovL3d3dy53My5vcmcvMjAwMC9zdmciIHdpZHRoPSIyNCIgaGVpZ2h0PSIyNCIgdmlld0JveD0iMCAwIDI0IDI0Ij48ZGVmcz48bGluZWFyR3JhZGllbnQgaWQ9ImciIHgxPSIwIiB5MT0iMCIgeDI9IjEiIHkyPSIxIj48c3RvcCBvZmZzZXQ9IjAlIiBzdG9wLWNvbG9yPSIjRkZENzAwIi8+PHN0b3Agb2Zmc2V0PSIxMDAlIiBzdG9wLWNvbG9yPSIjRkZBNzAwIi8+PC9saW5lYXJHcmFkaWVudD48L2RlZnM+PHBhdGggZD0iTTEzIDJMMyA3djEzbDEwIDV2LTZ6IiBmaWxsPSJ1cmwoI2cpIi8+PHBhdGggZD0iTTEzIDJ2Nmw4LTN2MTNsLTggM3YtNnoiIGZpbGw9IiM2NjdlZWEiLz48cGF0aCBkPSJNMTMgMnY2bTAgNXYxMCIgc3Ryb2tlPSIjZmZmIiBzdHJva2Utd2lkdGg9IjEuNSIgc3Ryb2tlLW9wYWNpdHk9IjAuMyIvPjwvc3ZnPg==
description: 快速将文本提炼为精美的学习记忆卡片,支持核心要点提取与分类。
description: Quickly generates beautiful flashcards from text, extracting key points and categories.
"""
from pydantic import BaseModel, Field
@@ -24,20 +24,23 @@ class Action:
class Valves(BaseModel):
model_id: str = Field(
default="",
description="用于生成卡片内容的模型 ID。如果为空则使用当前模型。",
description="Model ID used for generating card content. If empty, uses the current model.",
)
min_text_length: int = Field(
default=50, description="生成闪记卡所需的最小文本长度(字符数)。"
default=50,
description="Minimum text length required to generate a flashcard (characters).",
)
max_text_length: int = Field(
default=2000,
description="建议的最大文本长度。超过此长度建议使用深度分析工具。",
description="Recommended maximum text length. For longer texts, deep analysis tools are recommended.",
)
language: str = Field(
default="zh", description="卡片内容的目标语言 (例如 'zh', 'en')。"
default="en",
description="Target language for card content (e.g., 'en', 'zh').",
)
show_status: bool = Field(
default=True, description="是否在聊天界面显示状态更新。"
default=True,
description="Whether to show status updates in the chat interface.",
)
def __init__(self):
@@ -72,7 +75,7 @@ class Action:
"type": "notification",
"data": {
"type": "warning",
"content": f"文本过短({text_length}字符),建议至少{self.valves.min_text_length}字符。",
"content": f"Text too short ({text_length} chars), recommended at least {self.valves.min_text_length} chars.",
},
}
)
@@ -85,7 +88,7 @@ class Action:
"type": "notification",
"data": {
"type": "info",
"content": f"文本较长({text_length}字符),建议使用'墨海拾贝'进行深度分析。",
"content": f"Text quite long ({text_length} chars), consider using 'Deep Reading' for deep analysis.",
},
}
)
@@ -97,7 +100,7 @@ class Action:
"type": "notification",
"data": {
"type": "info",
"content": "正在生成闪记卡...",
"content": "Generating Flash Card...",
},
}
)
@@ -110,29 +113,29 @@ class Action:
model = self.valves.model_id if self.valves.model_id else body.get("model")
system_prompt = f"""
你是一个闪记卡生成专家,专注于创建适合学习和记忆的知识卡片。你的任务是将文本提炼成简洁、易记的学习卡片。
You are a Flash Card Generation Expert, specializing in creating knowledge cards suitable for learning and memorization. Your task is to distill text into concise, easy-to-remember flashcards.
请提取以下字段,并以 JSON 格式返回:
1. "title": 创建一个简短、精准的标题6-12 字),突出核心概念
2. "summary": 用一句话总结核心要义20-40 字),要通俗易懂、便于记忆
3. "key_points": 列出 3-5 个关键记忆点(每个 10-20 字)
- 每个要点应该是独立的知识点
- 使用简洁、口语化的表达
- 避免冗长的句子
4. "tags": 列出 2-4 个分类标签(每个 2-5 字)
5. "category": 选择一个主分类(如:概念、技能、事实、方法等)
Please extract the following fields and return them in JSON format:
1. "title": Create a short, precise title (3-8 words), highlighting the core concept.
2. "summary": Summarize the core essence in one sentence (10-25 words), making it easy to understand and remember.
3. "key_points": List 3-5 key memory points (5-15 words each).
- Each point should be an independent knowledge unit.
- Use concise, conversational expression.
- Avoid long sentences.
4. "tags": List 2-4 classification tags (1-3 words each).
5. "category": Choose a main category (e.g., Concept, Skill, Fact, Method, etc.).
目标语言: {self.valves.language}
Target Language: {self.valves.language}
重要原则:
- **极简主义**: 每个要点都要精炼到极致
- **记忆优先**: 内容要便于记忆和回忆
- **核心聚焦**: 只提取最核心的知识点
- **口语化**: 使用通俗易懂的语言
- 只返回 JSON 对象,不要包含 markdown 格式
Important Principles:
- **Minimalism**: Refine each point to the extreme.
- **Memory First**: Content should be easy to memorize and recall.
- **Core Focus**: Extract only the most core knowledge points.
- **Conversational**: Use easy-to-understand language.
- Return ONLY the JSON object, do not include markdown formatting.
"""
prompt = f"请将以下文本提炼成一张学习记忆卡片:\n\n{target_message}"
prompt = f"Please refine the following text into a learning flashcard:\n\n{target_message}"
payload = {
"model": model,
@@ -163,7 +166,7 @@ class Action:
"type": "notification",
"data": {
"type": "error",
"content": "生成卡片数据失败,请重试。",
"content": "Failed to generate card data, please try again.",
},
}
)
@@ -173,11 +176,6 @@ class Action:
html_card = self.generate_html_card(card_data)
# 3. Append to message
# We append it to the user message so it shows up as part of the interaction
# Or we can append it to the assistant response if we were a Pipe, but this is an Action.
# Actions usually modify the input or trigger a side effect.
# To show the card, we can append it to the message content.
html_embed_tag = f"```html\n{html_card}\n```"
body["messages"][-1]["content"] += f"\n\n{html_embed_tag}"
@@ -187,7 +185,7 @@ class Action:
"type": "notification",
"data": {
"type": "success",
"content": "闪记卡生成成功!",
"content": "Flash Card generated successfully!",
},
}
)
@@ -202,7 +200,7 @@ class Action:
"type": "notification",
"data": {
"type": "error",
"content": f"生成知识卡片时出错: {str(e)}",
"content": f"Error generating knowledge card: {str(e)}",
},
}
)
@@ -519,7 +517,7 @@ class Action:
# Enhanced HTML structure
html = f"""<!DOCTYPE html>
<html lang="zh-CN">
<html lang="en">
<head>
<meta charset="UTF-8">
<meta name="viewport" content="width=device-width, initial-scale=1.0">
@@ -530,20 +528,20 @@ class Action:
<div class="knowledge-card">
<div class="card-inner">
<div class="card-header">
<div class="card-category">{data.get('category', '通用知识')}</div>
<h2 class="card-title">{data.get('title', '知识卡片')}</h2>
<div class="card-category">{data.get('category', 'General Knowledge')}</div>
<h2 class="card-title">{data.get('title', 'Flash Card')}</h2>
</div>
<div class="card-body">
<div class="card-summary">
{data.get('summary', '')}
</div>
<div class="card-section-title">核心要点</div>
<div class="card-section-title">Key Points</div>
<ul class="card-points">
{''.join([f'<li>{point}</li>' for point in data.get('key_points', [])])}
</ul>
</div>
<div class="card-footer">
<span class="card-tag-label">标签</span>
<span class="card-tag-label">Tags</span>
{''.join([f'<span class="card-tag">#{tag}</span>' for tag in data.get('tags', [])])}
</div>
</div>

View File

@@ -1,11 +1,11 @@
"""
title: Flash Card
author: Antigravity
author_url: https://github.com/open-webui
funding_url: https://github.com/open-webui
title: 闪记卡 (Flash Card)
author: Fu-Jie
author_url: https://github.com/Fu-Jie
funding_url: https://github.com/Fu-Jie/awesome-openwebui
version: 0.2.1
icon_url: data:image/svg+xml;base64,PHN2ZyB4bWxucz0iaHR0cDovL3d3dy53My5vcmcvMjAwMC9zdmciIHdpZHRoPSIyNCIgaGVpZ2h0PSIyNCIgdmlld0JveD0iMCAwIDI0IDI0Ij48ZGVmcz48bGluZWFyR3JhZGllbnQgaWQ9ImciIHgxPSIwIiB5MT0iMCIgeDI9IjEiIHkyPSIxIj48c3RvcCBvZmZzZXQ9IjAlIiBzdG9wLWNvbG9yPSIjRkZENzAwIi8+PHN0b3Agb2Zmc2V0PSIxMDAlIiBzdG9wLWNvbG9yPSIjRkZBNzAwIi8+PC9saW5lYXJHcmFkaWVudD48L2RlZnM+PHBhdGggZD0iTTEzIDJMMyA3djEzbDEwIDV2LTZ6IiBmaWxsPSJ1cmwoI2cpIi8+PHBhdGggZD0iTTEzIDJ2Nmw4LTN2MTNsLTggM3YtNnoiIGZpbGw9IiM2NjdlZWEiLz48cGF0aCBkPSJNMTMgMnY2bTAgNXYxMCIgc3Ryb2tlPSIjZmZmIiBzdHJva2Utd2lkdGg9IjEuNSIgc3Ryb2tlLW9wYWNpdHk9IjAuMyIvPjwvc3ZnPg==
description: Quickly generates beautiful flashcards from text, extracting key points and categories.
description: 快速将文本提炼为精美的学习记忆卡片支持核心要点提取与分类
"""
from pydantic import BaseModel, Field

View File

@@ -2,7 +2,7 @@
title: Smart Mind Map
icon_url: data:image/svg+xml;base64,PHN2ZyB4bWxucz0iaHR0cDovL3d3dy53My5vcmcvMjAwMC9zdmciIHdpZHRoPSIyNCIgaGVpZ2h0PSIyNCIgdmlld0JveD0iMCAwIDI0IDI0IiBmaWxsPSJub25lIiBzdHJva2U9ImN1cnJlbnRDb2xvciIgc3Ryb2tlLXdpZHRoPSIxLjUiIHN0cm9rZS1saW5lY2FwPSJyb3VuZCIgc3Ryb2tlLWxpbmVqb2luPSJyb3VuZCI+CiAgPGNpcmNsZSBjeD0iMTIiIGN5PSIxMiIgcj0iMyIgZmlsbD0iY3VycmVudENvbG9yIi8+CiAgPGxpbmUgeDE9IjEyIiB5MT0iOSIgeDI9IjEyIiB5Mj0iNCIvPgogIDxjaXJjbGUgY3g9IjEyIiBjeT0iMyIgcj0iMS41Ii8+CiAgPGxpbmUgeDE9IjEyIiB5MT0iMTUiIHgyPSIxMiIgeTI9IjIwIi8+CiAgPGNpcmNsZSBjeD0iMTIiIGN5PSIyMSIgcj0iMS41Ii8+CiAgPGxpbmUgeDE9IjkiIHkxPSIxMiIgeDI9IjQiIHkyPSIxMiIvPgogIDxjaXJjbGUgY3g9IjMiIGN5PSIxMiIgcj0iMS41Ii8+CiAgPGxpbmUgeDE9IjE1IiB5MT0iMTIiIHgyPSIyMCIgeTI9IjEyIi8+CiAgPGNpcmNsZSBjeD0iMjEiIGN5PSIxMiIgcj0iMS41Ii8+CiAgPGxpbmUgeDE9IjEwLjUiIHkxPSIxMC41IiB4Mj0iNiIgeTI9IjYiLz4KICA8Y2lyY2xlIGN4PSI1IiBjeT0iNSIgcj0iMS41Ii8+CiAgPGxpbmUgeDE9IjEzLjUiIHkxPSIxMC41IiB4Mj0iMTgiIHkyPSI2Ii8+CiAgPGNpcmNsZSBjeD0iMTkiIGN5PSI1IiByPSIxLjUiLz4KICA8bGluZSB4MT0iMTAuNSIgeTE9IjEzLjUiIHgyPSI2IiB5Mj0iMTgiLz4KICA8Y2lyY2xlIGN4PSI1IiBjeT0iMTkiIHI9IjEuNSIvPgogIDxsaW5lIHgxPSIxMy41IiB5MT0iMTMuNSIgeDI9IjE4IiB5Mj0iMTgiLz4KICA8Y2lyY2xlIGN4PSIxOSIgY3k9IjE5IiByPSIxLjUiLz4KPC9zdmc+
version: 0.7.3
description: 智能分析长文本并生成交互式思维导图,支持 SVG/Markdown 导出。
description: Intelligently analyzes long texts and generates interactive mind maps, supporting SVG/Markdown export.
"""
from pydantic import BaseModel, Field

View File

@@ -1,8 +1,8 @@
"""
title: Deep Reading & Summary
author: Antigravity
author_url: https://github.com/open-webui
funding_url: https://github.com/open-webui
author: Fu-Jie
author_url: https://github.com/Fu-Jie
funding_url: https://github.com/Fu-Jie/awesome-openwebui
version: 0.1.0
icon_url: data:image/svg+xml;base64,PHN2ZyB4bWxucz0iaHR0cDovL3d3dy53My5vcmcvMjAwMC9zdmciIHdpZHRoPSIyNCIgaGVpZ2h0PSIyNCIgdmlld0JveD0iMCAwIDI0IDI0IiBmaWxsPSJub25lIiBzdHJva2U9ImN1cnJlbnRDb2xvciIgc3Ryb2tlLXdpZHRoPSIyIiBzdHJva2UtbGluZWNhcD0icm91bmQiIHN0cm9rZS1saW5lam9pbj0icm91bmQiPjxwYXRoIGQ9Ik0yIDNIMGEyIDIgMCAwIDAgMiAyIi8+PHBhdGggZD0iTTIyIDNIMjBhMiAyIDAgMCAwLTIgMiIvPjxwYXRoIGQ9Ik0yIDdoMjB2MTRhMiAyIDAgMCAxLTIgMmgtMTZhMiAyIDAgMCAxLTItMnYtMTQiLz48cGF0aCBkPSJNMTEgMTJ2NiIvPjxwYXRoIGQ9Ik0xNiAxMnY2Ii8+PHBhdGggZD0iTTYgMTJ2NiIvPjwvc3ZnPg==
description: Provides deep reading analysis and summarization for long texts.
@@ -28,69 +28,69 @@ logging.basicConfig(
logger = logging.getLogger(__name__)
# =================================================================
# 内部 LLM 提示词设计
# Internal LLM Prompts
# =================================================================
SYSTEM_PROMPT_READING_ASSISTANT = """
你是一个专业的深度文本分析专家,擅长精读长篇文本并提炼精华。你的任务是进行全面、深入的分析。
You are a professional Deep Text Analysis Expert, specializing in reading long texts and extracting the essence. Your task is to conduct a comprehensive and in-depth analysis.
请提供以下内容:
1. **详细摘要**:用 2-3 段话全面总结文本的核心内容,确保准确性和完整性。不要过于简略,要让读者充分理解文本主旨。
2. **关键信息点**:列出 5-8 个最重要的事实、观点或论据。每个信息点应该:
- 具体且有深度
- 包含必要的细节和背景
- 使用 Markdown 列表格式
3. **行动建议**:从文本中识别并提炼出具体的、可执行的行动项。每个建议应该:
- 明确且可操作
- 包含执行的优先级或时间建议
- 如果没有明确的行动项,可以提供学习建议或思考方向
Please provide the following:
1. **Detailed Summary**: Summarize the core content of the text in 2-3 paragraphs, ensuring accuracy and completeness. Do not be too brief; ensure the reader fully understands the main idea.
2. **Key Information Points**: List 5-8 most important facts, viewpoints, or arguments. Each point should:
- Be specific and insightful
- Include necessary details and context
- Use Markdown list format
3. **Actionable Advice**: Identify and refine specific, actionable items from the text. Each suggestion should:
- Be clear and actionable
- Include execution priority or timing suggestions
- If there are no clear action items, provide learning suggestions or thinking directions
请严格遵循以下指导原则:
- **语言**:所有输出必须使用用户指定的语言。
- **格式**:请严格按照以下 Markdown 格式输出,确保每个部分都有明确的标题:
## 摘要
[这里是详细的摘要内容2-3段话可以使用 Markdown 进行**加粗**或*斜体*强调重点]
Please strictly follow these guidelines:
- **Language**: All output must be in the user's specified language.
- **Format**: Please strictly follow the Markdown format below, ensuring each section has a clear header:
## Summary
[Detailed summary content here, 2-3 paragraphs, use Markdown **bold** or *italic* to emphasize key points]
## 关键信息点
- [关键点1包含具体细节和背景]
- [关键点2包含具体细节和背景]
- [关键点3包含具体细节和背景]
- [至少5个最多8个关键点]
## Key Information Points
- [Key Point 1: Include specific details and context]
- [Key Point 2: Include specific details and context]
- [Key Point 3: Include specific details and context]
- [At least 5, at most 8 key points]
## 行动建议
- [行动项1具体、可执行包含优先级]
- [行动项2具体、可执行包含优先级]
- [如果没有明确行动项,提供学习建议或思考方向]
- **深度优先**:分析要深入、全面,不要浮于表面。
- **行动导向**:重点关注可执行的建议和下一步行动。
- **只输出分析结果**:不要包含任何额外的寒暄、解释或引导性文字。
## Actionable Advice
- [Action Item 1: Specific, actionable, include priority]
- [Action Item 2: Specific, actionable, include priority]
- [If no clear action items, provide learning suggestions or thinking directions]
- **Depth First**: Analysis should be deep and comprehensive, not superficial.
- **Action Oriented**: Focus on actionable suggestions and next steps.
- **Analysis Results Only**: Do not include any extra pleasantries, explanations, or leading text.
"""
USER_PROMPT_GENERATE_SUMMARY = """
请对以下长篇文本进行深度分析,提供:
1. 详细的摘要2-3段话全面概括文本内容
2. 关键信息点列表5-8个包含具体细节
3. 可执行的行动建议(具体、明确,包含优先级)
Please conduct a deep analysis of the following long text, providing:
1. Detailed Summary (2-3 paragraphs, comprehensive overview)
2. Key Information Points List (5-8 items, including specific details)
3. Actionable Advice (Specific, clear, including priority)
---
**用户上下文信息:**
用户姓名: {user_name}
当前日期时间: {current_date_time_str}
当前星期: {current_weekday}
当前时区: {current_timezone_str}
用户语言: {user_language}
**User Context:**
User Name: {user_name}
Current Date/Time: {current_date_time_str}
Weekday: {current_weekday}
Timezone: {current_timezone_str}
User Language: {user_language}
---
**长篇文本内容:**
**Long Text Content:**
```
{long_text_content}
```
请进行深入、全面的分析,重点关注可执行的行动建议。
Please conduct a deep and comprehensive analysis, focusing on actionable advice.
"""
# =================================================================
# 前端 HTML 模板 (Jinja2 语法)
# Frontend HTML Template (Jinja2 Syntax)
# =================================================================
HTML_TEMPLATE = """
@@ -99,7 +99,7 @@ HTML_TEMPLATE = """
<head>
<meta charset="UTF-8">
<meta name="viewport" content="width=device-width, initial-scale=1.0">
<title>精读:深度分析报告</title>
<title>Deep Reading: Deep Analysis Report</title>
<style>
:root {
--primary-color: #4285f4;
@@ -245,29 +245,29 @@ HTML_TEMPLATE = """
<body>
<div class="container">
<div class="header">
<h1>📖 精读:深度分析报告</h1>
<h1>📖 Deep Reading: Deep Analysis Report</h1>
</div>
<div class="user-context">
<span><strong>用户:</strong> {{ user_name }}</span>
<span><strong>分析时间:</strong> {{ current_date_time_str }}</span>
<span><strong>星期:</strong> {{ current_weekday }}</span>
<span><strong>User:</strong> {{ user_name }}</span>
<span><strong>Analysis Time:</strong> {{ current_date_time_str }}</span>
<span><strong>Weekday:</strong> {{ current_weekday }}</span>
</div>
<div class="content">
<div class="section summary-section">
<h2><span class="icon">📝</span>详细摘要</h2>
<h2><span class="icon">📝</span>Detailed Summary</h2>
<div class="html-content">{{ summary_html | safe }}</div>
</div>
<div class="section keypoints-section">
<h2><span class="icon">💡</span>关键信息点</h2>
<h2><span class="icon">💡</span>Key Information Points</h2>
<div class="html-content">{{ keypoints_html | safe }}</div>
</div>
<div class="section actions-section">
<h2><span class="icon">🎯</span>行动建议</h2>
<h2><span class="icon">🎯</span>Actionable Advice</h2>
<div class="html-content">{{ actions_html | safe }}</div>
</div>
</div>
<div class="footer">
<p>&copy; {{ current_year }} 精读 - 深度文本分析服务</p>
<p>&copy; {{ current_year }} Deep Reading - Deep Text Analysis Service</p>
</div>
</div>
</body>
@@ -277,18 +277,20 @@ HTML_TEMPLATE = """
class Action:
class Valves(BaseModel):
show_status: bool = Field(
default=True, description="是否在聊天界面显示操作状态更新。"
default=True,
description="Whether to show operation status updates in the chat interface.",
)
LLM_MODEL_ID: str = Field(
default="gemini-2.5-flash",
description="用于文本分析的内置LLM模型ID。",
description="Built-in LLM Model ID used for text analysis.",
)
MIN_TEXT_LENGTH: int = Field(
default=200,
description="进行深度分析所需的最小文本长度(字符数)。建议200字符以上。",
description="Minimum text length required for deep analysis (characters). Recommended 200+.",
)
RECOMMENDED_MIN_LENGTH: int = Field(
default=500, description="建议的最小文本长度,以获得最佳分析效果。"
default=500,
description="Recommended minimum text length for best analysis results.",
)
def __init__(self):
@@ -296,16 +298,20 @@ class Action:
def _process_llm_output(self, llm_output: str) -> Dict[str, str]:
"""
解析LLMMarkdown输出,将其转换为HTML片段。
Parse LLM Markdown output and convert to HTML fragments.
"""
summary_match = re.search(
r"##\s*摘要\s*\n(.*?)(?=\n##|$)", llm_output, re.DOTALL
r"##\s*Summary\s*\n(.*?)(?=\n##|$)", llm_output, re.DOTALL | re.IGNORECASE
)
keypoints_match = re.search(
r"##\s*关键信息点\s*\n(.*?)(?=\n##|$)", llm_output, re.DOTALL
r"##\s*Key Information Points\s*\n(.*?)(?=\n##|$)",
llm_output,
re.DOTALL | re.IGNORECASE,
)
actions_match = re.search(
r"##\s*行动建议\s*\n(.*?)(?=\n##|$)", llm_output, re.DOTALL
r"##\s*Actionable Advice\s*\n(.*?)(?=\n##|$)",
llm_output,
re.DOTALL | re.IGNORECASE,
)
summary_md = summary_match.group(1).strip() if summary_match else ""
@@ -314,24 +320,26 @@ class Action:
if not any([summary_md, keypoints_md, actions_md]):
summary_md = llm_output.strip()
logger.warning("LLM输出未遵循预期的Markdown格式。将整个输出视为摘要。")
logger.warning(
"LLM output did not follow expected Markdown format. Treating entire output as summary."
)
# 使用 'nl2br' 扩展将换行符 \n 转换为 <br>
# Use 'nl2br' extension to convert newlines \n to <br>
md_extensions = ["nl2br"]
summary_html = (
markdown.markdown(summary_md, extensions=md_extensions)
if summary_md
else '<p class="no-content">未能提取摘要信息。</p>'
else '<p class="no-content">Failed to extract summary.</p>'
)
keypoints_html = (
markdown.markdown(keypoints_md, extensions=md_extensions)
if keypoints_md
else '<p class="no-content">未能提取关键信息点。</p>'
else '<p class="no-content">Failed to extract key information points.</p>'
)
actions_html = (
markdown.markdown(actions_md, extensions=md_extensions)
if actions_md
else '<p class="no-content">暂无明确的行动建议。</p>'
else '<p class="no-content">No explicit actionable advice.</p>'
)
return {
@@ -342,7 +350,7 @@ class Action:
def _build_html(self, context: dict) -> str:
"""
使用 Jinja2 模板和上下文数据构建最终的HTML内容。
Build final HTML content using Jinja2 template and context data.
"""
template = Template(HTML_TEMPLATE)
return template.render(context)
@@ -354,39 +362,39 @@ class Action:
__event_emitter__: Optional[Any] = None,
__request__: Optional[Request] = None,
) -> Optional[dict]:
logger.info("Action: 精读启动 (v2.0.0 - Deep Reading)")
logger.info("Action: Deep Reading Started (v2.0.0)")
if isinstance(__user__, (list, tuple)):
user_language = (
__user__[0].get("language", "zh-CN") if __user__ else "zh-CN"
__user__[0].get("language", "en-US") if __user__ else "en-US"
)
user_name = __user__[0].get("name", "用户") if __user__[0] else "用户"
user_name = __user__[0].get("name", "User") if __user__[0] else "User"
user_id = (
__user__[0]["id"]
if __user__ and "id" in __user__[0]
else "unknown_user"
)
elif isinstance(__user__, dict):
user_language = __user__.get("language", "zh-CN")
user_name = __user__.get("name", "用户")
user_language = __user__.get("language", "en-US")
user_name = __user__.get("name", "User")
user_id = __user__.get("id", "unknown_user")
now = datetime.now()
current_date_time_str = now.strftime("%Y-%m-%d %H:%M:%S")
current_weekday = now.strftime("%A")
current_year = now.strftime("%Y")
current_timezone_str = "未知时区"
current_timezone_str = "Unknown Timezone"
original_content = ""
try:
messages = body.get("messages", [])
if not messages or not messages[-1].get("content"):
raise ValueError("无法获取有效的用户消息内容。")
raise ValueError("Unable to get valid user message content.")
original_content = messages[-1]["content"]
if len(original_content) < self.valves.MIN_TEXT_LENGTH:
short_text_message = f"文本内容过短({len(original_content)}字符),建议至少{self.valves.MIN_TEXT_LENGTH}字符以获得有效的深度分析。\n\n💡 提示:对于短文本,建议使用'⚡ 闪记卡'进行快速提炼。"
short_text_message = f"Text content too short ({len(original_content)} chars), recommended at least {self.valves.MIN_TEXT_LENGTH} chars for effective deep analysis.\n\n💡 Tip: For short texts, consider using '⚡ Flash Card' for quick refinement."
if __event_emitter__:
await __event_emitter__(
{
@@ -408,7 +416,7 @@ class Action:
"type": "notification",
"data": {
"type": "info",
"content": f"文本长度为{len(original_content)}字符。建议{self.valves.RECOMMENDED_MIN_LENGTH}字符以上可获得更好的分析效果。",
"content": f"Text length is {len(original_content)} chars. Recommended {self.valves.RECOMMENDED_MIN_LENGTH}+ chars for best analysis results.",
},
}
)
@@ -419,7 +427,7 @@ class Action:
"type": "notification",
"data": {
"type": "info",
"content": "📖 精读已启动,正在进行深度分析...",
"content": "📖 Deep Reading started, analyzing deeply...",
},
}
)
@@ -428,7 +436,7 @@ class Action:
{
"type": "status",
"data": {
"description": "📖 精读: 深入分析文本,提炼精华...",
"description": "📖 Deep Reading: Analyzing text, extracting essence...",
"done": False,
},
}
@@ -454,7 +462,7 @@ class Action:
user_obj = Users.get_user_by_id(user_id)
if not user_obj:
raise ValueError(f"无法获取用户对象, 用户ID: {user_id}")
raise ValueError(f"Unable to get user object, User ID: {user_id}")
llm_response = await generate_chat_completion(
__request__, llm_payload, user_obj
@@ -482,7 +490,10 @@ class Action:
await __event_emitter__(
{
"type": "status",
"data": {"description": "📖 精读: 分析完成!", "done": True},
"data": {
"description": "📖 Deep Reading: Analysis complete!",
"done": True,
},
}
)
await __event_emitter__(
@@ -490,18 +501,18 @@ class Action:
"type": "notification",
"data": {
"type": "success",
"content": f"📖 精读完成,{user_name}!深度分析报告已生成。",
"content": f"📖 Deep Reading complete, {user_name}! Deep analysis report generated.",
},
}
)
except Exception as e:
error_message = f"精读处理失败: {str(e)}"
logger.error(f"精读错误: {error_message}", exc_info=True)
user_facing_error = f"抱歉, 精读在处理时遇到错误: {str(e)}\n请检查Open WebUI后端日志获取更多详情。"
error_message = f"Deep Reading processing failed: {str(e)}"
logger.error(f"Deep Reading Error: {error_message}", exc_info=True)
user_facing_error = f"Sorry, Deep Reading encountered an error while processing: {str(e)}.\nPlease check Open WebUI backend logs for more details."
body["messages"][-1][
"content"
] = f"{original_content}\n\n❌ **错误:** {user_facing_error}"
] = f"{original_content}\n\n❌ **Error:** {user_facing_error}"
if __event_emitter__:
if self.valves.show_status:
@@ -509,7 +520,7 @@ class Action:
{
"type": "status",
"data": {
"description": "精读: 处理失败。",
"description": "Deep Reading: Processing failed.",
"done": True,
},
}
@@ -519,7 +530,7 @@ class Action:
"type": "notification",
"data": {
"type": "error",
"content": f"精读处理失败, {user_name}!",
"content": f"Deep Reading processing failed, {user_name}!",
},
}
)

View File

@@ -16,6 +16,8 @@
- ✅ **灵活保留策略**: 可自由配置保留对话头部和尾部的消息数量,确保关键信息和上下文的连贯性。
- ✅ **智能注入**: 将生成的历史摘要智能地注入到新的上下文中。
详细的工作原理和流程请参考 [工作流程指南](WORKFLOW_GUIDE_CN.md)。
---
## 安装与配置
@@ -49,16 +51,51 @@
您可以在过滤器的设置中调整以下参数:
### 核心参数
| 参数 | 默认值 | 描述 |
| :--- | :--- | :--- |
| `priority` | `10` | 过滤器执行顺序,数值越小越先执行。 |
| `compression_threshold` | `15` | 当总消息数达到此值时,将在后台触发摘要生成。 |
| `keep_first` | `1` | 始终保留对话开始的 N 条消息。第一条消息通常包含重要的系统提示。 |
| `keep_last` | `6` | 始终保留对话末尾的 N 条消息,以确保上下文连贯。 |
| `summary_model` | `None` | 用于生成摘要的模型。**强烈建议**配置一个快速、经济的兼容模型(如 `gemini-2.5-flash`)。如果留空,将尝试使用当前对话的模型,但这可能因模型不兼容(如 Pipe 模型)而失败。 |
| `max_summary_tokens` | `4000` | 生成摘要时允许的最大 Token 数。 |
| `summary_temperature` | `0.3` | 控制摘要生成的随机性,较低的值结果更稳定。 |
| `debug_mode` | `true` | 是否在日志中打印详细的调试信息。生产环境建议设为 `false`。 |
| `compression_threshold_tokens` | `64000` | **(重要)** 当上下文总 Token 数超过此值时,将在后台触发摘要生成。建议设置为模型最大上下文窗口的 50%-70%。 |
| `max_context_tokens` | `128000` | **(重要)** 上下文的硬性上限。如果超过此值,将强制移除最早的消息(保留受保护消息除外)。防止 Token 溢出。 |
| `keep_first` | `1` | 始终保留对话开始的 N 条消息。第一条消息通常包含重要的系统提示或环境变量,建议至少保留 1 条。 |
| `keep_last` | `6` | 始终保留对话末尾的 N 条消息,以确保最近对话的连贯性。 |
### 摘要生成配置
| 参数 | 默认值 | 描述 |
| :--- | :--- | :--- |
| `summary_model` | `None` | 用于生成摘要的模型 ID。**强烈建议**配置一个快速、经济且上下文窗口较大的模型(如 `gemini-2.5-flash`, `deepseek-v3`)。如果留空,将尝试使用当前对话的模型。 |
| `max_summary_tokens` | `16384` | 生成摘要时允许的最大 Token 数。 |
| `summary_temperature` | `0.1` | 控制摘要生成的随机性,较低的值结果更稳定。 |
### 高级配置
#### `model_thresholds` (模型特定阈值)
这是一个字典配置,允许您为特定的模型 ID 覆盖全局的 `compression_threshold_tokens``max_context_tokens`。这对于混合使用不同上下文窗口大小的模型非常有用。
**默认配置包含了主流模型(如 GPT-4, Claude 3.5, Gemini 1.5/2.0, Qwen 2.5/3, DeepSeek V3 等)的推荐阈值。**
**配置示例:**
```json
{
"gpt-4": {
"compression_threshold_tokens": 8000,
"max_context_tokens": 32000
},
"gemini-2.5-flash": {
"compression_threshold_tokens": 734000,
"max_context_tokens": 1048576
}
}
```
#### `debug_mode`
- **默认值**: `true`
- **描述**: 是否在 Open WebUI 的控制台日志中打印详细的调试信息(如 Token 计数、压缩进度、数据库操作等)。生产环境建议设为 `false`
---
@@ -68,10 +105,10 @@
- **解决**:请确认 `DATABASE_URL` 环境变量已正确设置,并且数据库服务运行正常。
- **问题:摘要未生成**
- **解决**:检查 `compression_threshold` 是否已达到,并确认 `summary_model` 配置正确。查看日志以获取详细错误。
- **解决**:检查 `compression_threshold_tokens` 是否已达到,并确认 `summary_model` 配置正确。查看日志以获取详细错误。
- **问题:初始的系统提示丢失**
- **解决**:确保 `keep_first` 的值大于 0以保留包含重要信息的初始消息。
- **问题:压缩效果不明显**
- **解决**:尝试适当提高 `compression_threshold`,或减少 `keep_first` / `keep_last` 的值。
- **解决**:尝试适当提高 `compression_threshold_tokens`,或减少 `keep_first` / `keep_last` 的值。

View File

@@ -373,109 +373,7 @@ class Filter:
default=128000, ge=0, description="上下文的硬性上限。超过此值将强制移除最早的消息 (全局默认值)"
)
model_thresholds: dict = Field(
default={
# Groq
"groq-openai/gpt-oss-20b": {"max_context_tokens": 8000, "compression_threshold_tokens": 5600},
"groq-openai/gpt-oss-120b": {"max_context_tokens": 8000, "compression_threshold_tokens": 5600},
# Qwen (ModelScope / CF)
"modelscope-Qwen/Qwen3-Coder-480B-A35B-Instruct": {"max_context_tokens": 256000, "compression_threshold_tokens": 179200},
"cfchatqwen-qwen3-max-search": {"max_context_tokens": 262144, "compression_threshold_tokens": 183500},
"modelscope-Qwen/Qwen3-235B-A22B-Thinking-2507": {"max_context_tokens": 262144, "compression_threshold_tokens": 183500},
"cfchatqwen-qwen3-max": {"max_context_tokens": 262144, "compression_threshold_tokens": 183500},
"cfchatqwen-qwen3-vl-plus-thinking": {"max_context_tokens": 262144, "compression_threshold_tokens": 183500},
"cfchatqwen-qwen3-coder-plus-search": {"max_context_tokens": 1048576, "compression_threshold_tokens": 734000},
"cfchatqwen-qwen3-vl-plus": {"max_context_tokens": 262144, "compression_threshold_tokens": 183500},
"cfchatqwen-qwen3-coder-plus": {"max_context_tokens": 1048576, "compression_threshold_tokens": 734000},
"cfchatqwen-qwen3-omni-flash-thinking": {"max_context_tokens": 65536, "compression_threshold_tokens": 45875},
"cfchatqwen-qwen3-omni-flash": {"max_context_tokens": 65536, "compression_threshold_tokens": 45875},
"cfchatqwen-qwen3-next-80b-a3b-thinking": {"max_context_tokens": 262144, "compression_threshold_tokens": 183500},
"modelscope-Qwen/Qwen3-VL-235B-A22B-Instruct": {"max_context_tokens": 262144, "compression_threshold_tokens": 183500},
"cfchatqwen-qwen3-next-80b-a3b-thinking-search": {"max_context_tokens": 262144, "compression_threshold_tokens": 183500},
"cfchatqwen-qwen3-next-80b-a3b": {"max_context_tokens": 262144, "compression_threshold_tokens": 183500},
"cfchatqwen-qwen3-235b-a22b-thinking-search": {"max_context_tokens": 131072, "compression_threshold_tokens": 91750},
"cfchatqwen-qwen3-235b-a22b": {"max_context_tokens": 131072, "compression_threshold_tokens": 91750},
"cfchatqwen-qwen3-235b-a22b-thinking": {"max_context_tokens": 131072, "compression_threshold_tokens": 91750},
"cfchatqwen-qwen3-coder-flash-search": {"max_context_tokens": 262144, "compression_threshold_tokens": 183500},
"cfchatqwen-qwen3-coder-flash": {"max_context_tokens": 262144, "compression_threshold_tokens": 183500},
"cfchatqwen-qwen3-max-2025-10-30": {"max_context_tokens": 262144, "compression_threshold_tokens": 183500},
"cfchatqwen-qwen3-max-2025-10-30-thinking": {"max_context_tokens": 262144, "compression_threshold_tokens": 183500},
"cfchatqwen-qwen3-max-2025-10-30-thinking-search": {"max_context_tokens": 262144, "compression_threshold_tokens": 183500},
"modelscope-Qwen/Qwen3-235B-A22B-Instruct-2507": {"max_context_tokens": 262144, "compression_threshold_tokens": 183500},
"cfchatqwen-qwen3-vl-30b-a3b": {"max_context_tokens": 131072, "compression_threshold_tokens": 91750},
"cfchatqwen-qwen3-vl-30b-a3b-thinking": {"max_context_tokens": 131072, "compression_threshold_tokens": 91750},
# Gemini
"gemini-2.5-pro-search": {"max_context_tokens": 1048576, "compression_threshold_tokens": 734000},
"gemini-2.5-flash-search": {"max_context_tokens": 1048576, "compression_threshold_tokens": 734000},
"gemini-2.5-flash": {"max_context_tokens": 1048576, "compression_threshold_tokens": 734000},
"gemini-2.5-flash-lite": {"max_context_tokens": 1048576, "compression_threshold_tokens": 734000},
"gemini-2.5-flash-lite-search": {"max_context_tokens": 1048576, "compression_threshold_tokens": 734000},
"gemini-2.5-pro": {"max_context_tokens": 1048576, "compression_threshold_tokens": 734000},
"gemini-2.0-flash-search": {"max_context_tokens": 1048576, "compression_threshold_tokens": 734000},
"gemini-2.0-flash": {"max_context_tokens": 1048576, "compression_threshold_tokens": 734000},
"gemini-2.0-flash-exp": {"max_context_tokens": 1048576, "compression_threshold_tokens": 734000},
"gemini-2.0-flash-lite": {"max_context_tokens": 1048576, "compression_threshold_tokens": 734000},
"copilot-gemini-2.5-pro": {"max_context_tokens": 128000, "compression_threshold_tokens": 89600},
"gemini-pro-latest": {"max_context_tokens": 1048576, "compression_threshold_tokens": 734000},
"gemini-3-pro-preview": {"max_context_tokens": 128000, "compression_threshold_tokens": 89600},
"gemini-pro-latest-search": {"max_context_tokens": 1048576, "compression_threshold_tokens": 734000},
"gemini-flash-latest": {"max_context_tokens": 1048576, "compression_threshold_tokens": 734000},
"gemini-flash-latest-search": {"max_context_tokens": 1048576, "compression_threshold_tokens": 734000},
"gemini-flash-lite-latest-search": {"max_context_tokens": 1048576, "compression_threshold_tokens": 734000},
"gemini-flash-lite-latest": {"max_context_tokens": 1048576, "compression_threshold_tokens": 734000},
"gemini-robotics-er-1.5-preview": {"max_context_tokens": 1048576, "compression_threshold_tokens": 734000},
# DeepSeek
"modelscope-deepseek-ai/DeepSeek-V3.1": {"max_context_tokens": 128000, "compression_threshold_tokens": 89600},
"cfdeepseek-deepseek-search": {"max_context_tokens": 128000, "compression_threshold_tokens": 89600},
"openrouter-deepseek/deepseek-r1-0528:free": {"max_context_tokens": 163840, "compression_threshold_tokens": 114688},
"modelscope-deepseek-ai/DeepSeek-V3.2-Exp": {"max_context_tokens": 128000, "compression_threshold_tokens": 89600},
"cfdeepseek-deepseek-r1-search": {"max_context_tokens": 128000, "compression_threshold_tokens": 89600},
"cfdeepseek-deepseek-r1": {"max_context_tokens": 128000, "compression_threshold_tokens": 89600},
"openrouter-deepseek/deepseek-chat-v3.1:free": {"max_context_tokens": 163800, "compression_threshold_tokens": 114660},
"modelscope-deepseek-ai/DeepSeek-R1-0528": {"max_context_tokens": 128000, "compression_threshold_tokens": 89600},
"cfdeepseek-deepseek": {"max_context_tokens": 128000, "compression_threshold_tokens": 89600},
# Kimi (Moonshot)
"cfkimi-kimi-k2-search": {"max_context_tokens": 128000, "compression_threshold_tokens": 89600},
"cfkimi-kimi-k1.5-search": {"max_context_tokens": 128000, "compression_threshold_tokens": 89600},
"cfkimi-kimi-k1.5-thinking-search": {"max_context_tokens": 128000, "compression_threshold_tokens": 89600},
"cfkimi-kimi-research": {"max_context_tokens": 128000, "compression_threshold_tokens": 89600},
"openrouter-moonshotai/kimi-k2:free": {"max_context_tokens": 32768, "compression_threshold_tokens": 22937},
"cfkimi-kimi-k2": {"max_context_tokens": 128000, "compression_threshold_tokens": 89600},
"cfkimi-kimi-k1.5": {"max_context_tokens": 128000, "compression_threshold_tokens": 89600},
# GPT / OpenAI
"gpt-4.1": {"max_context_tokens": 128000, "compression_threshold_tokens": 89600},
"gpt-4o": {"max_context_tokens": 64000, "compression_threshold_tokens": 44800},
"gpt-5": {"max_context_tokens": 128000, "compression_threshold_tokens": 89600},
"github-gpt-4.1": {"max_context_tokens": 7500, "compression_threshold_tokens": 5250},
"gpt-5-mini": {"max_context_tokens": 128000, "compression_threshold_tokens": 89600},
"gpt-5.1": {"max_context_tokens": 128000, "compression_threshold_tokens": 89600},
"gpt-5.1-codex": {"max_context_tokens": 128000, "compression_threshold_tokens": 89600},
"gpt-5.1-codex-mini": {"max_context_tokens": 128000, "compression_threshold_tokens": 89600},
"gpt-5-codex": {"max_context_tokens": 200000, "compression_threshold_tokens": 140000},
"github-gpt-4.1-mini": {"max_context_tokens": 7500, "compression_threshold_tokens": 5250},
"openrouter-openai/gpt-oss-20b:free": {"max_context_tokens": 128000, "compression_threshold_tokens": 89600},
# Claude / Anthropic
"claude-sonnet-4.5": {"max_context_tokens": 128000, "compression_threshold_tokens": 89600},
"claude-haiku-4.5": {"max_context_tokens": 128000, "compression_threshold_tokens": 89600},
"copilot-claude-opus-41": {"max_context_tokens": 80000, "compression_threshold_tokens": 56000},
"copilot-claude-sonnet-4": {"max_context_tokens": 80000, "compression_threshold_tokens": 56000},
# Other / OpenRouter / OSWE
"oswe-vscode-insiders": {"max_context_tokens": 256000, "compression_threshold_tokens": 179200},
"modelscope-MiniMax/MiniMax-M2": {"max_context_tokens": 204800, "compression_threshold_tokens": 143360},
"oswe-vscode-prime": {"max_context_tokens": 200000, "compression_threshold_tokens": 140000},
"grok-code-fast-1": {"max_context_tokens": 128000, "compression_threshold_tokens": 89600},
"copilot-auto": {"max_context_tokens": 128000, "compression_threshold_tokens": 89600},
"modelscope-ZhipuAI/GLM-4.6": {"max_context_tokens": 32000, "compression_threshold_tokens": 22400},
"openrouter-x-ai/grok-4.1-fast:free": {"max_context_tokens": 2000000, "compression_threshold_tokens": 1400000},
"openrouter-qwen/qwen3-coder:free": {"max_context_tokens": 262000, "compression_threshold_tokens": 183400},
"openrouter-qwen/qwen3-235b-a22b:free": {"max_context_tokens": 40960, "compression_threshold_tokens": 28672},
},
default={},
description="针对特定模型的阈值覆盖配置。仅包含需要特殊配置的模型。"
)

View File

@@ -1,45 +0,0 @@
需求文档:异步上下文压缩插件优化 (Async Context Compression Optimization)
1. 核心目标 将现有的基于消息数量的压缩逻辑升级为基于 Token 数量的压缩逻辑,并引入递归摘要机制,以更精准地控制上下文窗口,提高摘要质量,并防止历史信息丢失。
2. 功能需求
Token 计数与阈值控制
引入 tiktoken: 使用 tiktoken 库进行精确的 Token 计数。如果环境不支持,则回退到字符估算 (1 token ≈ 4 chars)。
新配置参数 (Valves):
compression_threshold_tokens (默认: 64000): 当上下文总 Token 数超过此值时,触发压缩(生成摘要)。
max_context_tokens (默认: 128000): 上下文的硬性上限。如果超过此值,强制移除最早的消息(保留受保护消息除外)。
model_thresholds (字典): 支持针对不同模型 ID 配置不同的阈值。例如:{'gpt-4': {'compression_threshold_tokens': 8000, ...}}。
废弃旧参数: compression_threshold (基于消息数) 将被标记为废弃,优先使用 Token 阈值。
递归摘要 (Recursive Summarization)
机制: 在生成新摘要时,必须读取并包含上一次的摘要。
逻辑: 新摘要 = LLM(上一次摘要 + 新产生的对话消息)。
目的: 防止随着对话进行,最早期的摘要信息被丢弃,确保长期记忆的连续性。
消息保护与修剪策略
保护机制: keep_first (保留头部 N 条) 和 keep_last (保留尾部 N 条) 的消息绝对不参与压缩,也不被移除。
修剪逻辑: 当触发 max_context_tokens 限制时,优先移除 keep_first 之后、keep_last 之前的最早消息。
优化的提示词 (Prompt Engineering)
目标: 去除无用信息(寒暄、重复),保留关键信号(事实、代码、决策)。
指令:
提炼与净化: 明确要求移除噪音。
关键保留: 强调代码片段必须逐字保留。
合并与更新: 明确指示将新信息合并到旧摘要中。
语言一致性: 输出语言必须与对话语言保持一致。
3. 实现细节
文件:
async_context_compression.py
类:
Filter
关键方法:
_count_tokens(text): 实现 Token 计数。
_calculate_messages_tokens(messages): 计算消息列表总 Token。
_generate_summary_async(...)
: 修改为加载旧摘要,并传入 LLM。
_call_summary_llm(...)
: 更新 Prompt接受 previous_summary 和 new_messages。
inlet(...)
:
使用 compression_threshold_tokens 判断是否注入摘要。
实现 max_context_tokens 的强制修剪逻辑。
outlet(...)
: 使用 compression_threshold_tokens 判断是否触发后台摘要任务。