feat: 更新插件作者信息并将深度阅读插件本地化为英文

This commit is contained in:
fujie
2025-12-20 14:27:37 +08:00
parent eaa6319991
commit 39eb7d00ee
11 changed files with 353 additions and 479 deletions

View File

@@ -1,9 +1,11 @@
""" """
title: 导出到Excel title: Export to Excel
author: Fu-Jie author: Fu-Jie
description: 从最后一条AI回答消息中提取Markdown表格到Excel文件并在浏览器中触发下载。支持多表并自动根据标题命名 author_url: https://github.com/Fu-Jie
icon_url: data:image/svg+xml;base64,PD94bWwgdmVyc2lvbj0iMS4wIiBlbmNvZGluZz0idXRmLTgiPz48IS0tIFVwbG9hZGVkIHRvOiBTVkcgUmVwbywgd3d3LnN2Z3JlcG8uY29tLCBHZW5lcmF0b3I6IFNWRyBSZXBvIE1peGVyIFRvb2xzIC0tPgo8c3ZnIHdpZHRoPSI4MDBweCIgaGVpZ2h0PSI4MDBweCIgdmlld0JveD0iMCAtMS4yNyAxMTAuMDM3IDExMC4wMzciIHhtbG5zPSJodHRwOi8vd3d3LnczLm9yZy8yMDAwL3N2ZyI+PHBhdGggZD0iTTU3LjU1IDBoNy40MjV2MTBjMTIuNTEzIDAgMjUuMDI1LjAyNSAzNy41MzctLjAzOCAyLjExMy4wODcgNC40MzgtLjA2MiA2LjI3NSAxLjIgMS4yODcgMS44NSAxLjEzOCA0LjIgMS4yMjUgNi4zMjUtLjA2MiAyMS43LS4wMzcgNDMuMzg4LS4wMjQgNjUuMDc1LS4wNjIgMy42MzguMzM3IDcuMzUtLjQyNSAxMC45MzgtLjUgMi42LTMuNjI1IDIuNjYyLTUuNzEzIDIuNzUtMTIuOTUuMDM3LTI1LjkxMi0uMDI1LTM4Ljg3NSAwdjExLjI1aC03Ljc2M2MtMTkuMDUtMy40NjMtMzguMTM4LTYuNjYyLTU3LjIxMi0xMFYxMC4wMTNDMTkuMTg4IDYuNjc1IDM4LjM3NSAzLjM4OCA1Ny41NSAweiIgZmlsbD0iIzIwNzI0NSIvPjxwYXRoIGQ9Ik02NC45NzUgMTMuNzVoNDEuMjVWOTIuNWgtNDEuMjVWODVoMTB2LTguNzVoLTEwdi01aDEwVjYyLjVoLTEwdi01aDEwdi04Ljc1aC0xMHYtNWgxMFYzNWgtMTB2LTVoMTB2LTguNzVoLTEwdi03LjV6IiBmaWxsPSIjZmZmZmZmIi8+PHBhdGggZD0iTTc5Ljk3NSAyMS4yNWgxNy41VjMwaC0xNy41di04Ljc1eiIgZmlsbD0iIzIwNzI0NSIvPjxwYXRoIGQ9Ik0zNy4wMjUgMzIuOTYyYzIuODI1LS4yIDUuNjYzLS4zNzUgOC41LS41MTJhMjYwNy4zNDQgMjYwNy4zNDQgMCAwIDEtMTAuMDg3IDIwLjQ4N2MzLjQzOCA3IDYuOTQ5IDEzLjk1IDEwLjM5OSAyMC45NSBhNzE2LjI4IDcxNi4yOCAwIDAgMS05LjAyNC0uNTc1Yy0yLjEyNS01LjIxMy00LjcxMy0xMC4yNS02LjIzOC0xNS43Yy0xLjY5OSA1LjA3NS00LjEyNSA5Ljg2Mi02LjA3NCAxNC44MzgtMi43MzgtLjAzOC01LjQ3Ni0uMTUtOC4yMTMtLjI2M0MxOS41IDY1LjkgMjIuNiA1OS41NjIgMjUuOTEyIDUzLjMxMmMtMi44MTItNi40MzgtNS45LTEyLjc1LTguOC0xOS4xNSAyLjc1LS4xNjMgNS41LS4zMjUgOC4yNS0uNDc1IDEuODYyIDQuODg4IDMuODk5IDkuNzEyIDUuNDM4IDE0LjcyNSAxLjY0OS01LjMxMiA0LjExMi0xMC4zMTIgNi4yMjUtMTUuNDV6IiBmaWxsPSIjZmZmZmZmIi8+PHBhdGggZD0iTTc5Ljk3NSAzNWgxNy41djguNzVoLTE3LjVWMzV6TTc5Ljk3NSA0OC43NWgxNy41djguNzVoLTE3LjV2LTguNzV6TTc5Ljk3NSA2Mi41aDE3LjV2OC43NWgtMTcuNVY2Mi41ek03OS45NzUgNzYuMjVoMTcuNVY4NWgtMTcuNXYtOC43NXoiIGZpbGw9IiMyMDcyNDUiLz48L3N2Zz4= funding_url: https://github.com/Fu-Jie/awesome-openwebui
version: 0.1.0 version: 0.3.3
icon_url: data:image/svg+xml;base64,PHN2ZyB4bWxucz0iaHR0cDovL3d3dy53My5vcmcvMjAwMC9zdmciIHdpZHRoPSIyNCIgaGVpZ2h0PSIyNCIgdmlld0JveD0iMCAwIDI0IDI0IiBmaWxsPSJub25lIiBzdHJva2U9ImN1cnJlbnRDb2xvciIgc3Ryb2tlLXdpZHRoPSIyIiBzdHJva2UtbGluZWNhcD0icm91bmQiIHN0cm9rZS1saW5lam9pbj0icm91bmQiPjxwYXRoIGQ9Ik0xNCAyaDZhMiAyIDAgMCAxIDIgMnYxNmEyIDAgMCAwIDEtMiAySDZhMiAyIDAgMCAxLTItMnYtNSIvPjxwb2x5bGluZSBwb2ludHM9IjE0IDIgMTQgOCAyMCA4Ii8+PHBhdGggZD0iTTE2IDEzdjgiLz48cGF0aCBkPSJNOCAxM3Y4Ii8+PHBhdGggZD0iTTEyIDEzdjgiLz48cGF0aCBkPSJNMTYgMTdoLTgiLz48cGF0aCBkPSJNMTYgMjFoLTgiLz48cGF0aCBkPSJNMTYgMTNoLTgiLz48L3N2Zz4=
description: Exports the current chat history to an Excel (.xlsx) file, with automatic header extraction.
""" """
import os import os
@@ -37,17 +39,17 @@ class Action:
print(f"action:{__name__}") print(f"action:{__name__}")
if isinstance(__user__, (list, tuple)): if isinstance(__user__, (list, tuple)):
user_language = ( user_language = (
__user__[0].get("language", "zh-CN") if __user__ else "zh-CN" __user__[0].get("language", "en-US") if __user__ else "en-US"
) )
user_name = __user__[0].get("name", "用户") if __user__[0] else "用户" user_name = __user__[0].get("name", "User") if __user__[0] else "User"
user_id = ( user_id = (
__user__[0]["id"] __user__[0]["id"]
if __user__ and "id" in __user__[0] if __user__ and "id" in __user__[0]
else "unknown_user" else "unknown_user"
) )
elif isinstance(__user__, dict): elif isinstance(__user__, dict):
user_language = __user__.get("language", "zh-CN") user_language = __user__.get("language", "en-US")
user_name = __user__.get("name", "用户") user_name = __user__.get("name", "User")
user_id = __user__.get("id", "unknown_user") user_id = __user__.get("id", "unknown_user")
if __event_emitter__: if __event_emitter__:
@@ -56,7 +58,7 @@ class Action:
await __event_emitter__( await __event_emitter__(
{ {
"type": "status", "type": "status",
"data": {"description": "正在保存到文件...", "done": False}, "data": {"description": "Saving to file...", "done": False},
} }
) )
@@ -65,18 +67,18 @@ class Action:
tables = self.extract_tables_from_message(message_content) tables = self.extract_tables_from_message(message_content)
if not tables: if not tables:
raise HTTPException(status_code=400, detail="未找到任何表格。") raise HTTPException(status_code=400, detail="No tables found.")
# 获取动态文件名和sheet名称 # Get dynamic filename and sheet names
workbook_name, sheet_names = self.generate_names_from_content( workbook_name, sheet_names = self.generate_names_from_content(
message_content, tables message_content, tables
) )
# 使用优化后的文件名生成逻辑 # Use optimized filename generation logic
current_datetime = datetime.datetime.now() current_datetime = datetime.datetime.now()
formatted_date = current_datetime.strftime("%Y%m%d") formatted_date = current_datetime.strftime("%Y%m%d")
# 如果没找到标题则使用 user_yyyymmdd 格式 # If no title found, use user_yyyymmdd format
if not workbook_name: if not workbook_name:
workbook_name = f"{user_name}_{formatted_date}" workbook_name = f"{user_name}_{formatted_date}"
@@ -87,10 +89,10 @@ class Action:
os.makedirs(os.path.dirname(excel_file_path), exist_ok=True) os.makedirs(os.path.dirname(excel_file_path), exist_ok=True)
# 保存表格到Excel使用符合中国规范的格式化功能 # Save tables to Excel (using enhanced formatting)
self.save_tables_to_excel_enhanced(tables, excel_file_path, sheet_names) self.save_tables_to_excel_enhanced(tables, excel_file_path, sheet_names)
# 触发文件下载 # Trigger file download
if __event_call__: if __event_call__:
with open(excel_file_path, "rb") as file: with open(excel_file_path, "rb") as file:
file_content = file.read() file_content = file.read()
@@ -121,7 +123,7 @@ class Action:
URL.revokeObjectURL(url); URL.revokeObjectURL(url);
document.body.removeChild(a); document.body.removeChild(a);
}} catch (error) {{ }} catch (error) {{
console.error('触发下载时出错:', error); console.error('Error triggering download:', error);
}} }}
""" """
}, },
@@ -130,15 +132,15 @@ class Action:
await __event_emitter__( await __event_emitter__(
{ {
"type": "status", "type": "status",
"data": {"description": "输出已保存", "done": True}, "data": {"description": "File saved", "done": True},
} }
) )
# 清理临时文件 # Clean up temp file
if os.path.exists(excel_file_path): if os.path.exists(excel_file_path):
os.remove(excel_file_path) os.remove(excel_file_path)
return {"message": "下载事件已触发"} return {"message": "Download triggered"}
except HTTPException as e: except HTTPException as e:
print(f"Error processing tables: {str(e.detail)}") print(f"Error processing tables: {str(e.detail)}")
@@ -146,13 +148,13 @@ class Action:
{ {
"type": "status", "type": "status",
"data": { "data": {
"description": f"保存文件时出错: {e.detail}", "description": f"Error saving file: {e.detail}",
"done": True, "done": True,
}, },
} }
) )
await self._send_notification( await self._send_notification(
__event_emitter__, "error", "没有找到可以导出的表格!" __event_emitter__, "error", "No tables found to export!"
) )
raise e raise e
except Exception as e: except Exception as e:
@@ -161,22 +163,22 @@ class Action:
{ {
"type": "status", "type": "status",
"data": { "data": {
"description": f"保存文件时出错: {str(e)}", "description": f"Error saving file: {str(e)}",
"done": True, "done": True,
}, },
} }
) )
await self._send_notification( await self._send_notification(
__event_emitter__, "error", "没有找到可以导出的表格!" __event_emitter__, "error", "No tables found to export!"
) )
def extract_tables_from_message(self, message: str) -> List[Dict]: def extract_tables_from_message(self, message: str) -> List[Dict]:
""" """
从消息文本中提取Markdown表格及位置信息 Extract Markdown tables and their positions from message text
返回结构: [{ Returns structure: [{
"data": 表格数据, "data": table data,
"start_line": 起始行号, "start_line": start line number,
"end_line": 结束行号 "end_line": end line number
}] }]
""" """
table_row_pattern = r"^\s*\|.*\|.*\s*$" table_row_pattern = r"^\s*\|.*\|.*\s*$"
@@ -190,17 +192,17 @@ class Action:
current_line += 1 current_line += 1
if re.search(table_row_pattern, row): if re.search(table_row_pattern, row):
if start_line is None: if start_line is None:
start_line = current_line # 记录表格起始行 start_line = current_line # Record table start line
# 处理表格行 # Process table row
cells = [cell.strip() for cell in row.strip().strip("|").split("|")] cells = [cell.strip() for cell in row.strip().strip("|").split("|")]
# 跳过分隔行 # Skip separator row
is_separator_row = all(re.fullmatch(r"[:\-]+", cell) for cell in cells) is_separator_row = all(re.fullmatch(r"[:\-]+", cell) for cell in cells)
if not is_separator_row: if not is_separator_row:
current_table.append(cells) current_table.append(cells)
elif current_table: elif current_table:
# 表格结束 # Table ends
tables.append( tables.append(
{ {
"data": current_table, "data": current_table,
@@ -211,7 +213,7 @@ class Action:
current_table = [] current_table = []
start_line = None start_line = None
# 处理最后一个表格 # Process the last table
if current_table: if current_table:
tables.append( tables.append(
{ {
@@ -225,106 +227,106 @@ class Action:
def generate_names_from_content(self, content: str, tables: List[Dict]) -> tuple: def generate_names_from_content(self, content: str, tables: List[Dict]) -> tuple:
""" """
根据内容生成工作簿名称和sheet名称 Generate workbook name and sheet names based on content
- 忽略非空段落,只使用 markdown 标题 (h1-h6) - Ignore non-empty paragraphs, only use markdown headers (h1-h6).
- 单表格: 使用最近的标题作为工作簿和工作表名。 - Single table: Use the closest header as workbook and sheet name.
- 多表格: 使用文档第一个标题作为工作簿名,各表格最近的标题作为工作表名。 - Multiple tables: Use the first header in the document as workbook name, and closest header for each table as sheet name.
- 默认命名: - Default naming:
- 工作簿: 在主流程中处理 (user_yyyymmdd.xlsx) - Workbook: Handled in main flow (user_yyyymmdd.xlsx).
- 工作表: 表1, 表2, ... - Sheet: Sheet1, Sheet2, ...
""" """
lines = content.split("\n") lines = content.split("\n")
workbook_name = "" workbook_name = ""
sheet_names = [] sheet_names = []
all_headers = [] all_headers = []
# 1. 查找文档中所有 h1-h6 标题及其位置 # 1. Find all h1-h6 headers and their positions
for i, line in enumerate(lines): for i, line in enumerate(lines):
if re.match(r"^#{1,6}\s+", line): if re.match(r"^#{1,6}\s+", line):
all_headers.append( all_headers.append(
{"text": re.sub(r"^#{1,6}\s+", "", line).strip(), "line_num": i} {"text": re.sub(r"^#{1,6}\s+", "", line).strip(), "line_num": i}
) )
# 2. 为每个表格生成 sheet 名称 # 2. Generate sheet name for each table
for i, table in enumerate(tables): for i, table in enumerate(tables):
table_start_line = table["start_line"] - 1 # 转换为 0-based 索引 table_start_line = table["start_line"] - 1 # Convert to 0-based index
closest_header_text = None closest_header_text = None
# 查找当前表格上方最近的标题 # Find closest header above current table
candidate_headers = [ candidate_headers = [
h for h in all_headers if h["line_num"] < table_start_line h for h in all_headers if h["line_num"] < table_start_line
] ]
if candidate_headers: if candidate_headers:
# 找到候选标题中行号最大的,即为最接近的 # Find the header with the largest line number among candidates
closest_header = max(candidate_headers, key=lambda x: x["line_num"]) closest_header = max(candidate_headers, key=lambda x: x["line_num"])
closest_header_text = closest_header["text"] closest_header_text = closest_header["text"]
if closest_header_text: if closest_header_text:
# 清理并添加找到的标题 # Clean and add found header
sheet_names.append(self.clean_sheet_name(closest_header_text)) sheet_names.append(self.clean_sheet_name(closest_header_text))
else: else:
# 如果找不到标题,使用默认名称 "表{i+1}" # If no header found, use default name "Sheet{i+1}"
sheet_names.append(f"{i+1}") sheet_names.append(f"Sheet{i+1}")
# 3. 根据表格数量确定工作簿名称 # 3. Determine workbook name based on table count
if len(tables) == 1: if len(tables) == 1:
# 单个表格: 使用其工作表名作为工作簿名 (前提是该名称不是默认的 "表1") # Single table: Use its sheet name as workbook name (if not default "Sheet1")
if sheet_names[0] != "1": if sheet_names[0] != "Sheet1":
workbook_name = sheet_names[0] workbook_name = sheet_names[0]
elif len(tables) > 1: elif len(tables) > 1:
# 多个表格: 使用文档中的第一个标题作为工作簿名 # Multiple tables: Use the first header in the document as workbook name
if all_headers: if all_headers:
# 找到所有标题中行号最小的,即为第一个标题 # Find header with smallest line number
first_header = min(all_headers, key=lambda x: x["line_num"]) first_header = min(all_headers, key=lambda x: x["line_num"])
workbook_name = first_header["text"] workbook_name = first_header["text"]
# 4. 清理工作簿名称 (如果为空,主流程会使用默认名称) # 4. Clean workbook name (if empty, main flow will use default name)
workbook_name = self.clean_filename(workbook_name) if workbook_name else "" workbook_name = self.clean_filename(workbook_name) if workbook_name else ""
return workbook_name, sheet_names return workbook_name, sheet_names
def clean_filename(self, name: str) -> str: def clean_filename(self, name: str) -> str:
"""清理文件名中的非法字符""" """Clean illegal characters in filename"""
return re.sub(r'[\\/*?:"<>|]', "", name).strip() return re.sub(r'[\\/*?:"<>|]', "", name).strip()
def clean_sheet_name(self, name: str) -> str: def clean_sheet_name(self, name: str) -> str:
"""清理sheet名称(限制31字符,去除非法字符)""" """Clean sheet name (limit 31 chars, remove illegal chars)"""
name = re.sub(r"[\\/*?[\]:]", "", name).strip() name = re.sub(r"[\\/*?[\]:]", "", name).strip()
return name[:31] if len(name) > 31 else name return name[:31] if len(name) > 31 else name
# ======================== 符合中国规范的格式化功能 ======================== # ======================== Enhanced Formatting ========================
def calculate_text_width(self, text: str) -> float: def calculate_text_width(self, text: str) -> float:
""" """
计算文本显示宽度,考虑中英文字符差异 Calculate text display width, considering CJK characters
中文字符按2个单位计算英文字符按1个单位计算 CJK characters count as 2 units, others as 1 unit
""" """
if not text: if not text:
return 0 return 0
width = 0 width = 0
for char in str(text): for char in str(text):
# 判断是否为中文字符(包括中文标点) # Check if CJK character
if "\u4e00" <= char <= "\u9fff" or "\u3000" <= char <= "\u303f": if "\u4e00" <= char <= "\u9fff" or "\u3000" <= char <= "\u303f":
width += 2 # 中文字符占2个单位宽度 width += 2
else: else:
width += 1 # 英文字符占1个单位宽度 width += 1
return width return width
def calculate_text_height(self, text: str, max_width: int = 50) -> int: def calculate_text_height(self, text: str, max_width: int = 50) -> int:
""" """
计算文本显示所需的行数 Calculate required lines for text display
根据换行符和文本长度计算 Based on newlines and text length
""" """
if not text: if not text:
return 1 return 1
text = str(text) text = str(text)
# 计算换行符导致的行数 # Calculate lines from newlines
explicit_lines = text.count("\n") + 1 explicit_lines = text.count("\n") + 1
# 计算因文本长度超出而需要的额外行数 # Calculate extra lines from wrapping
text_width = self.calculate_text_width(text.replace("\n", "")) text_width = self.calculate_text_width(text.replace("\n", ""))
wrapped_lines = max( wrapped_lines = max(
1, int(text_width / max_width) + (1 if text_width % max_width > 0 else 0) 1, int(text_width / max_width) + (1 if text_width % max_width > 0 else 0)
@@ -334,7 +336,7 @@ class Action:
def get_column_letter(self, col_index: int) -> str: def get_column_letter(self, col_index: int) -> str:
""" """
将列索引转换为Excel列字母 (A, B, C, ..., AA, AB, ...) Convert column index to Excel column letter (A, B, C, ..., AA, AB, ...)
""" """
result = "" result = ""
while col_index >= 0: while col_index >= 0:
@@ -344,44 +346,42 @@ class Action:
def determine_content_type(self, header: str, values: list) -> str: def determine_content_type(self, header: str, values: list) -> str:
""" """
根据表头和内容智能判断数据类型,符合中国官方表格规范 Intelligently determine data type based on header and content
返回: 'number', 'date', 'sequence', 'text' Returns: 'number', 'date', 'sequence', 'text'
""" """
header_lower = str(header).lower().strip() header_lower = str(header).lower().strip()
# 检查表头关键词 # Check header keywords
number_keywords = [ number_keywords = [
"数量", "quantity",
"金额", "amount",
"价格", "price",
"费用", "cost",
"成本", "revenue",
"收入", "expense",
"支出", "total",
"总计", "subtotal",
"小计", "percentage",
"百分比",
"%", "%",
"比例", "ratio",
"", "rate",
"数值", "value",
"分数", "score",
"成绩", "points",
"得分",
] ]
date_keywords = ["日期", "时间", "年份", "月份", "时刻", "date", "time"] date_keywords = ["date", "time", "year", "month", "moment"]
sequence_keywords = [ sequence_keywords = [
"序号", "no",
"编号", "no.",
"号码",
"排序",
"次序",
"顺序",
"id", "id",
"编码", "index",
"rank",
"order",
"sequence",
"code",
] ]
# 检查表头 # Check header
for keyword in number_keywords: for keyword in number_keywords:
if keyword in header_lower: if keyword in header_lower:
return "number" return "number"
@@ -394,13 +394,13 @@ class Action:
if keyword in header_lower: if keyword in header_lower:
return "sequence" return "sequence"
# 检查数据内容 # Check data content
if not values: if not values:
return "text" return "text"
sample_values = [ sample_values = [
str(v).strip() for v in values[:10] if str(v).strip() str(v).strip() for v in values[:10] if str(v).strip()
] # 取前10个非空值作为样本 ] # Use first 10 non-empty values as sample
if not sample_values: if not sample_values:
return "text" return "text"
@@ -409,22 +409,17 @@ class Action:
sequence_count = 0 sequence_count = 0
for value in sample_values: for value in sample_values:
# 检查是否为数字 # Check if number
try: try:
float( float(value.replace(",", "").replace("%", ""))
value.replace(",", "")
.replace("", "")
.replace("%", "")
.replace("", "")
)
numeric_count += 1 numeric_count += 1
continue continue
except ValueError: except ValueError:
pass pass
# 检查是否为日期格式 # Check if date format
date_patterns = [ date_patterns = [
r"\d{4}[-/]\d{1,2}[-/]\d{1,2}日?", r"\d{4}[-/]\d{1,2}[-/]\d{1,2}",
r"\d{1,2}[-/]\d{1,2}[-/]\d{4}", r"\d{1,2}[-/]\d{1,2}[-/]\d{4}",
r"\d{4}\d{2}\d{2}", r"\d{4}\d{2}\d{2}",
] ]
@@ -433,15 +428,15 @@ class Action:
date_count += 1 date_count += 1
break break
# 检查是否为序号格式 # Check if sequence format
if ( if (
re.match(r"^\d+$", value) and len(value) <= 4 re.match(r"^\d+$", value) and len(value) <= 4
): # 纯数字且不超过4位可能是序号 ): # Pure digits and <= 4 chars, likely sequence
sequence_count += 1 sequence_count += 1
total_count = len(sample_values) total_count = len(sample_values)
# 根据比例判断类型 # Determine type based on ratio
if numeric_count / total_count >= 0.7: if numeric_count / total_count >= 0.7:
return "number" return "number"
elif date_count / total_count >= 0.7: elif date_count / total_count >= 0.7:
@@ -451,27 +446,17 @@ class Action:
else: else:
return "text" return "text"
def get_column_letter(self, col_index: int) -> str:
"""
将列索引转换为Excel列字母 (A, B, C, ..., AA, AB, ...)
"""
result = ""
while col_index >= 0:
result = chr(65 + col_index % 26) + result
col_index = col_index // 26 - 1
return result
def save_tables_to_excel_enhanced( def save_tables_to_excel_enhanced(
self, tables: List[Dict], file_path: str, sheet_names: List[str] self, tables: List[Dict], file_path: str, sheet_names: List[str]
): ):
""" """
符合中国官方表格规范的Excel保存功能 Enhanced Excel saving function with standard formatting
""" """
try: try:
with pd.ExcelWriter(file_path, engine="xlsxwriter") as writer: with pd.ExcelWriter(file_path, engine="xlsxwriter") as writer:
workbook = writer.book workbook = writer.book
# 定义表头样式 - 居中对齐(符合中国规范) # Define header style - Center aligned
header_format = workbook.add_format( header_format = workbook.add_format(
{ {
"bold": True, "bold": True,
@@ -479,62 +464,62 @@ class Action:
"font_color": "white", "font_color": "white",
"bg_color": "#00abbd", "bg_color": "#00abbd",
"border": 1, "border": 1,
"align": "center", # 表头居中 "align": "center",
"valign": "vcenter", "valign": "vcenter",
"text_wrap": True, "text_wrap": True,
} }
) )
# 文本单元格样式 - 左对齐 # Text cell style - Left aligned
text_format = workbook.add_format( text_format = workbook.add_format(
{ {
"border": 1, "border": 1,
"align": "left", # 文本左对齐 "align": "left",
"valign": "vcenter", "valign": "vcenter",
"text_wrap": True, "text_wrap": True,
} }
) )
# 数值单元格样式 - 右对齐 # Number cell style - Right aligned
number_format = workbook.add_format( number_format = workbook.add_format(
{"border": 1, "align": "right", "valign": "vcenter"} # 数值右对齐 {"border": 1, "align": "right", "valign": "vcenter"}
) )
# 整数格式 - 右对齐 # Integer format - Right aligned
integer_format = workbook.add_format( integer_format = workbook.add_format(
{ {
"num_format": "0", "num_format": "0",
"border": 1, "border": 1,
"align": "right", # 整数右对齐 "align": "right",
"valign": "vcenter", "valign": "vcenter",
} }
) )
# 小数格式 - 右对齐 # Decimal format - Right aligned
decimal_format = workbook.add_format( decimal_format = workbook.add_format(
{ {
"num_format": "0.00", "num_format": "0.00",
"border": 1, "border": 1,
"align": "right", # 小数右对齐 "align": "right",
"valign": "vcenter", "valign": "vcenter",
} }
) )
# 日期格式 - 居中对齐 # Date format - Center aligned
date_format = workbook.add_format( date_format = workbook.add_format(
{ {
"border": 1, "border": 1,
"align": "center", # 日期居中对齐 "align": "center",
"valign": "vcenter", "valign": "vcenter",
"text_wrap": True, "text_wrap": True,
} }
) )
# 序号格式 - 居中对齐 # Sequence format - Center aligned
sequence_format = workbook.add_format( sequence_format = workbook.add_format(
{ {
"border": 1, "border": 1,
"align": "center", # 序号居中对齐 "align": "center",
"valign": "vcenter", "valign": "vcenter",
} }
) )
@@ -548,12 +533,12 @@ class Action:
print(f"Processing table {i+1} with {len(table_data)} rows") print(f"Processing table {i+1} with {len(table_data)} rows")
# 获取sheet名称 # Get sheet name
sheet_name = ( sheet_name = (
sheet_names[i] if i < len(sheet_names) else f"{i+1}" sheet_names[i] if i < len(sheet_names) else f"Sheet{i+1}"
) )
# 创建DataFrame # Create DataFrame
headers = [ headers = [
str(cell).strip() str(cell).strip()
for cell in table_data[0] for cell in table_data[0]
@@ -561,7 +546,7 @@ class Action:
] ]
if not headers: if not headers:
print(f"Warning: No valid headers found for table {i+1}") print(f"Warning: No valid headers found for table {i+1}")
headers = [f"{j+1}" for j in range(len(table_data[0]))] headers = [f"Col{j+1}" for j in range(len(table_data[0]))]
data_rows = [] data_rows = []
if len(table_data) > 1: if len(table_data) > 1:
@@ -580,14 +565,14 @@ class Action:
print(f"DataFrame created with columns: {list(df.columns)}") print(f"DataFrame created with columns: {list(df.columns)}")
# 修复pandas FutureWarning - 使用try-except替代errors='ignore' # Fix pandas FutureWarning
for col in df.columns: for col in df.columns:
try: try:
df[col] = pd.to_numeric(df[col]) df[col] = pd.to_numeric(df[col])
except (ValueError, TypeError): except (ValueError, TypeError):
pass pass
# 先写入数据(不包含表头) # Write data first (without header)
df.to_excel( df.to_excel(
writer, writer,
sheet_name=sheet_name, sheet_name=sheet_name,
@@ -597,8 +582,8 @@ class Action:
) )
worksheet = writer.sheets[sheet_name] worksheet = writer.sheets[sheet_name]
# 应用符合中国规范的格式化 # Apply enhanced formatting
self.apply_chinese_standard_formatting( self.apply_enhanced_formatting(
worksheet, worksheet,
df, df,
headers, headers,
@@ -620,7 +605,7 @@ class Action:
print(f"Error saving Excel file: {str(e)}") print(f"Error saving Excel file: {str(e)}")
raise raise
def apply_chinese_standard_formatting( def apply_enhanced_formatting(
self, self,
worksheet, worksheet,
df, df,
@@ -635,24 +620,24 @@ class Action:
sequence_format, sequence_format,
): ):
""" """
应用符合中国官方表格规范的格式化 Apply enhanced formatting
- 表头: 居中对齐 - Header: Center aligned
- 数值: 右对齐 - Number: Right aligned
- 文本: 左对齐 - Text: Left aligned
- 日期: 居中对齐 - Date: Center aligned
- 序号: 居中对齐 - Sequence: Center aligned
""" """
try: try:
# 1. 写入表头(居中对齐) # 1. Write headers (Center aligned)
print(f"Writing headers with Chinese standard alignment: {headers}") print(f"Writing headers with enhanced alignment: {headers}")
for col_idx, header in enumerate(headers): for col_idx, header in enumerate(headers):
if header and str(header).strip(): if header and str(header).strip():
worksheet.write(0, col_idx, str(header).strip(), header_format) worksheet.write(0, col_idx, str(header).strip(), header_format)
else: else:
default_header = f"{col_idx+1}" default_header = f"Col{col_idx+1}"
worksheet.write(0, col_idx, default_header, header_format) worksheet.write(0, col_idx, default_header, header_format)
# 2. 分析每列的数据类型并应用相应格式 # 2. Analyze column types
column_types = {} column_types = {}
for col_idx, column in enumerate(headers): for col_idx, column in enumerate(headers):
if col_idx < len(df.columns): if col_idx < len(df.columns):
@@ -666,14 +651,14 @@ class Action:
else: else:
column_types[col_idx] = "text" column_types[col_idx] = "text"
# 3. 写入并格式化数据(根据类型使用不同对齐方式) # 3. Write and format data
for row_idx, row in df.iterrows(): for row_idx, row in df.iterrows():
for col_idx, value in enumerate(row): for col_idx, value in enumerate(row):
content_type = column_types.get(col_idx, "text") content_type = column_types.get(col_idx, "text")
# 根据内容类型选择格式 # Select format based on content type
if content_type == "number": if content_type == "number":
# 数值类型 - 右对齐 # Number - Right aligned
if pd.api.types.is_numeric_dtype(df.iloc[:, col_idx]): if pd.api.types.is_numeric_dtype(df.iloc[:, col_idx]):
if pd.api.types.is_integer_dtype(df.iloc[:, col_idx]): if pd.api.types.is_integer_dtype(df.iloc[:, col_idx]):
current_format = integer_format current_format = integer_format
@@ -691,49 +676,45 @@ class Action:
current_format = number_format current_format = number_format
elif content_type == "date": elif content_type == "date":
# 日期类型 - 居中对齐 # Date - Center aligned
current_format = date_format current_format = date_format
elif content_type == "sequence": elif content_type == "sequence":
# 序号类型 - 居中对齐 # Sequence - Center aligned
current_format = sequence_format current_format = sequence_format
else: else:
# 文本类型 - 左对齐 # Text - Left aligned
current_format = text_format current_format = text_format
worksheet.write(row_idx + 1, col_idx, value, current_format) worksheet.write(row_idx + 1, col_idx, value, current_format)
# 4. 自动调整列宽 # 4. Auto-adjust column width
for col_idx, column in enumerate(headers): for col_idx, column in enumerate(headers):
col_letter = self.get_column_letter(col_idx) col_letter = self.get_column_letter(col_idx)
# 计算表头宽度 # Calculate header width
header_width = self.calculate_text_width(str(column)) header_width = self.calculate_text_width(str(column))
# 计算数据列的最大宽度 # Calculate max data width
max_data_width = 0 max_data_width = 0
if not df.empty and col_idx < len(df.columns): if not df.empty and col_idx < len(df.columns):
for value in df.iloc[:, col_idx]: for value in df.iloc[:, col_idx]:
value_width = self.calculate_text_width(str(value)) value_width = self.calculate_text_width(str(value))
max_data_width = max(max_data_width, value_width) max_data_width = max(max_data_width, value_width)
# 基础宽度:取表头和数据的最大宽度 # Base width
base_width = max(header_width, max_data_width) base_width = max(header_width, max_data_width)
# 根据内容类型调整宽度 # Adjust width based on type
content_type = column_types.get(col_idx, "text") content_type = column_types.get(col_idx, "text")
if content_type == "sequence": if content_type == "sequence":
# 序号列通常比较窄
optimal_width = max(8, min(15, base_width + 2)) optimal_width = max(8, min(15, base_width + 2))
elif content_type == "number": elif content_type == "number":
# 数值列需要额外空间显示数字
optimal_width = max(12, min(25, base_width + 3)) optimal_width = max(12, min(25, base_width + 3))
elif content_type == "date": elif content_type == "date":
# 日期列需要固定宽度
optimal_width = max(15, min(20, base_width + 2)) optimal_width = max(15, min(20, base_width + 2))
else: else:
# 文本列根据内容调整
if base_width <= 10: if base_width <= 10:
optimal_width = base_width + 3 optimal_width = base_width + 3
elif base_width <= 20: elif base_width <= 20:
@@ -744,13 +725,11 @@ class Action:
worksheet.set_column(f"{col_letter}:{col_letter}", optimal_width) worksheet.set_column(f"{col_letter}:{col_letter}", optimal_width)
# 5. 自动调整行高 # 5. Auto-adjust row height
# 设置表头行高为35点
worksheet.set_row(0, 35) worksheet.set_row(0, 35)
# 设置数据行行高
for row_idx, row in df.iterrows(): for row_idx, row in df.iterrows():
max_row_height = 20 # 中国表格规范建议的最小行高 max_row_height = 20
for col_idx, value in enumerate(row): for col_idx, value in enumerate(row):
if col_idx < len(headers): if col_idx < len(headers):
@@ -764,26 +743,24 @@ class Action:
col_width = 15 col_width = 15
cell_lines = self.calculate_text_height(str(value), col_width) cell_lines = self.calculate_text_height(str(value), col_width)
cell_height = cell_lines * 20 # 每行20点高度符合中国规范 cell_height = cell_lines * 20
max_row_height = max(max_row_height, cell_height) max_row_height = max(max_row_height, cell_height)
final_height = min(120, max_row_height) final_height = min(120, max_row_height)
worksheet.set_row(row_idx + 1, final_height) worksheet.set_row(row_idx + 1, final_height)
print(f"Successfully applied Chinese standard formatting") print(f"Successfully applied enhanced formatting")
except Exception as e: except Exception as e:
print(f"Warning: Failed to apply Chinese standard formatting: {str(e)}") print(f"Warning: Failed to apply enhanced formatting: {str(e)}")
# 降级到基础格式化
self.apply_basic_formatting_fallback(worksheet, df) self.apply_basic_formatting_fallback(worksheet, df)
def apply_basic_formatting_fallback(self, worksheet, df): def apply_basic_formatting_fallback(self, worksheet, df):
""" """
基础格式化降级方案 Basic formatting fallback
""" """
try: try:
# 基础列宽调整
for i, column in enumerate(df.columns): for i, column in enumerate(df.columns):
column_width = ( column_width = (
max( max(
@@ -798,7 +775,5 @@ class Action:
f"{col_letter}:{col_letter}", min(60, max(10, column_width)) f"{col_letter}:{col_letter}", min(60, max(10, column_width))
) )
print("Applied basic formatting fallback")
except Exception as e: except Exception as e:
print(f"Warning: Even basic formatting failed: {str(e)}") print(f"Error in basic formatting: {str(e)}")

View File

@@ -1,8 +1,8 @@
""" """
title: 导出为 Excel title: 导出为 Excel
author: Antigravity author: Fu-Jie
author_url: https://github.com/open-webui author_url: https://github.com/Fu-Jie
funding_url: https://github.com/open-webui funding_url: https://github.com/Fu-Jie/awesome-openwebui
version: 0.3.3 version: 0.3.3
icon_url: data:image/svg+xml;base64,PHN2ZyB4bWxucz0iaHR0cDovL3d3dy53My5vcmcvMjAwMC9zdmciIHdpZHRoPSIyNCIgaGVpZ2h0PSIyNCIgdmlld0JveD0iMCAwIDI0IDI0IiBmaWxsPSJub25lIiBzdHJva2U9ImN1cnJlbnRDb2xvciIgc3Ryb2tlLXdpZHRoPSIyIiBzdHJva2UtbGluZWNhcD0icm91bmQiIHN0cm9rZS1saW5lam9pbj0icm91bmQiPjxwYXRoIGQ9Ik0xNCAyaDZhMiAyIDAgMCAxIDIgMnYxNmEyIDIgMCAwIDEtMiAyaC02YTIgMiAwIDAgMS0yLTJ2LTVhMiAyIDAgMCAxLTItMnYtNSIvPjxwb2x5bGluZSBwb2ludHM9IjE0IDIgMTQgOCAyMCA4Ii8+PHBhdGggZD0iTTE2IDEzdjgiLz48cGF0aCBkPSJNOCAxM3Y4Ii8+PHBhdGggZD0iTTEyIDEzdjgiLz48cGF0aCBkPSJNMTYgMTdoLTgiLz48cGF0aCBkPSJNMTYgMjFoLTgiLz48cGF0aCBkPSJNMTYgMTNoLTgiLz48L3N2Zz4= icon_url: data:image/svg+xml;base64,PHN2ZyB4bWxucz0iaHR0cDovL3d3dy53My5vcmcvMjAwMC9zdmciIHdpZHRoPSIyNCIgaGVpZ2h0PSIyNCIgdmlld0JveD0iMCAwIDI0IDI0IiBmaWxsPSJub25lIiBzdHJva2U9ImN1cnJlbnRDb2xvciIgc3Ryb2tlLXdpZHRoPSIyIiBzdHJva2UtbGluZWNhcD0icm91bmQiIHN0cm9rZS1saW5lam9pbj0icm91bmQiPjxwYXRoIGQ9Ik0xNCAyaDZhMiAyIDAgMCAxIDIgMnYxNmEyIDIgMCAwIDEtMiAyaC02YTIgMiAwIDAgMS0yLTJ2LTVhMiAyIDAgMCAxLTItMnYtNSIvPjxwb2x5bGluZSBwb2ludHM9IjE0IDIgMTQgOCAyMCA4Ii8+PHBhdGggZD0iTTE2IDEzdjgiLz48cGF0aCBkPSJNOCAxM3Y4Ii8+PHBhdGggZD0iTTEyIDEzdjgiLz48cGF0aCBkPSJNMTYgMTdoLTgiLz48cGF0aCBkPSJNMTYgMjFoLTgiLz48cGF0aCBkPSJNMTYgMTNoLTgiLz48L3N2Zz4=
description: 将当前对话历史导出为 Excel (.xlsx) 文件支持自动提取表头 description: 将当前对话历史导出为 Excel (.xlsx) 文件支持自动提取表头

View File

@@ -1,11 +1,11 @@
""" """
title: 闪记卡 (Flash Card) title: Flash Card
author: Antigravity author: Fu-Jie
author_url: https://github.com/open-webui author_url: https://github.com/Fu-Jie
funding_url: https://github.com/open-webui funding_url: https://github.com/Fu-Jie/awesome-openwebui
version: 0.2.1 version: 0.2.1
icon_url: data:image/svg+xml;base64,PHN2ZyB4bWxucz0iaHR0cDovL3d3dy53My5vcmcvMjAwMC9zdmciIHdpZHRoPSIyNCIgaGVpZ2h0PSIyNCIgdmlld0JveD0iMCAwIDI0IDI0Ij48ZGVmcz48bGluZWFyR3JhZGllbnQgaWQ9ImciIHgxPSIwIiB5MT0iMCIgeDI9IjEiIHkyPSIxIj48c3RvcCBvZmZzZXQ9IjAlIiBzdG9wLWNvbG9yPSIjRkZENzAwIi8+PHN0b3Agb2Zmc2V0PSIxMDAlIiBzdG9wLWNvbG9yPSIjRkZBNzAwIi8+PC9saW5lYXJHcmFkaWVudD48L2RlZnM+PHBhdGggZD0iTTEzIDJMMyA3djEzbDEwIDV2LTZ6IiBmaWxsPSJ1cmwoI2cpIi8+PHBhdGggZD0iTTEzIDJ2Nmw4LTN2MTNsLTggM3YtNnoiIGZpbGw9IiM2NjdlZWEiLz48cGF0aCBkPSJNMTMgMnY2bTAgNXYxMCIgc3Ryb2tlPSIjZmZmIiBzdHJva2Utd2lkdGg9IjEuNSIgc3Ryb2tlLW9wYWNpdHk9IjAuMyIvPjwvc3ZnPg== icon_url: data:image/svg+xml;base64,PHN2ZyB4bWxucz0iaHR0cDovL3d3dy53My5vcmcvMjAwMC9zdmciIHdpZHRoPSIyNCIgaGVpZ2h0PSIyNCIgdmlld0JveD0iMCAwIDI0IDI0Ij48ZGVmcz48bGluZWFyR3JhZGllbnQgaWQ9ImciIHgxPSIwIiB5MT0iMCIgeDI9IjEiIHkyPSIxIj48c3RvcCBvZmZzZXQ9IjAlIiBzdG9wLWNvbG9yPSIjRkZENzAwIi8+PHN0b3Agb2Zmc2V0PSIxMDAlIiBzdG9wLWNvbG9yPSIjRkZBNzAwIi8+PC9saW5lYXJHcmFkaWVudD48L2RlZnM+PHBhdGggZD0iTTEzIDJMMyA3djEzbDEwIDV2LTZ6IiBmaWxsPSJ1cmwoI2cpIi8+PHBhdGggZD0iTTEzIDJ2Nmw4LTN2MTNsLTggM3YtNnoiIGZpbGw9IiM2NjdlZWEiLz48cGF0aCBkPSJNMTMgMnY2bTAgNXYxMCIgc3Ryb2tlPSIjZmZmIiBzdHJva2Utd2lkdGg9IjEuNSIgc3Ryb2tlLW9wYWNpdHk9IjAuMyIvPjwvc3ZnPg==
description: 快速将文本提炼为精美的学习记忆卡片,支持核心要点提取与分类。 description: Quickly generates beautiful flashcards from text, extracting key points and categories.
""" """
from pydantic import BaseModel, Field from pydantic import BaseModel, Field
@@ -24,20 +24,23 @@ class Action:
class Valves(BaseModel): class Valves(BaseModel):
model_id: str = Field( model_id: str = Field(
default="", default="",
description="用于生成卡片内容的模型 ID。如果为空则使用当前模型。", description="Model ID used for generating card content. If empty, uses the current model.",
) )
min_text_length: int = Field( min_text_length: int = Field(
default=50, description="生成闪记卡所需的最小文本长度(字符数)。" default=50,
description="Minimum text length required to generate a flashcard (characters).",
) )
max_text_length: int = Field( max_text_length: int = Field(
default=2000, default=2000,
description="建议的最大文本长度。超过此长度建议使用深度分析工具。", description="Recommended maximum text length. For longer texts, deep analysis tools are recommended.",
) )
language: str = Field( language: str = Field(
default="zh", description="卡片内容的目标语言 (例如 'zh', 'en')。" default="en",
description="Target language for card content (e.g., 'en', 'zh').",
) )
show_status: bool = Field( show_status: bool = Field(
default=True, description="是否在聊天界面显示状态更新。" default=True,
description="Whether to show status updates in the chat interface.",
) )
def __init__(self): def __init__(self):
@@ -72,7 +75,7 @@ class Action:
"type": "notification", "type": "notification",
"data": { "data": {
"type": "warning", "type": "warning",
"content": f"文本过短({text_length}字符),建议至少{self.valves.min_text_length}字符。", "content": f"Text too short ({text_length} chars), recommended at least {self.valves.min_text_length} chars.",
}, },
} }
) )
@@ -85,7 +88,7 @@ class Action:
"type": "notification", "type": "notification",
"data": { "data": {
"type": "info", "type": "info",
"content": f"文本较长({text_length}字符),建议使用'墨海拾贝'进行深度分析。", "content": f"Text quite long ({text_length} chars), consider using 'Deep Reading' for deep analysis.",
}, },
} }
) )
@@ -97,7 +100,7 @@ class Action:
"type": "notification", "type": "notification",
"data": { "data": {
"type": "info", "type": "info",
"content": "正在生成闪记卡...", "content": "Generating Flash Card...",
}, },
} }
) )
@@ -110,29 +113,29 @@ class Action:
model = self.valves.model_id if self.valves.model_id else body.get("model") model = self.valves.model_id if self.valves.model_id else body.get("model")
system_prompt = f""" system_prompt = f"""
你是一个闪记卡生成专家,专注于创建适合学习和记忆的知识卡片。你的任务是将文本提炼成简洁、易记的学习卡片。 You are a Flash Card Generation Expert, specializing in creating knowledge cards suitable for learning and memorization. Your task is to distill text into concise, easy-to-remember flashcards.
请提取以下字段,并以 JSON 格式返回: Please extract the following fields and return them in JSON format:
1. "title": 创建一个简短、精准的标题6-12 字),突出核心概念 1. "title": Create a short, precise title (3-8 words), highlighting the core concept.
2. "summary": 用一句话总结核心要义20-40 字),要通俗易懂、便于记忆 2. "summary": Summarize the core essence in one sentence (10-25 words), making it easy to understand and remember.
3. "key_points": 列出 3-5 个关键记忆点(每个 10-20 字) 3. "key_points": List 3-5 key memory points (5-15 words each).
- 每个要点应该是独立的知识点 - Each point should be an independent knowledge unit.
- 使用简洁、口语化的表达 - Use concise, conversational expression.
- 避免冗长的句子 - Avoid long sentences.
4. "tags": 列出 2-4 个分类标签(每个 2-5 字) 4. "tags": List 2-4 classification tags (1-3 words each).
5. "category": 选择一个主分类(如:概念、技能、事实、方法等) 5. "category": Choose a main category (e.g., Concept, Skill, Fact, Method, etc.).
目标语言: {self.valves.language} Target Language: {self.valves.language}
重要原则: Important Principles:
- **极简主义**: 每个要点都要精炼到极致 - **Minimalism**: Refine each point to the extreme.
- **记忆优先**: 内容要便于记忆和回忆 - **Memory First**: Content should be easy to memorize and recall.
- **核心聚焦**: 只提取最核心的知识点 - **Core Focus**: Extract only the most core knowledge points.
- **口语化**: 使用通俗易懂的语言 - **Conversational**: Use easy-to-understand language.
- 只返回 JSON 对象,不要包含 markdown 格式 - Return ONLY the JSON object, do not include markdown formatting.
""" """
prompt = f"请将以下文本提炼成一张学习记忆卡片:\n\n{target_message}" prompt = f"Please refine the following text into a learning flashcard:\n\n{target_message}"
payload = { payload = {
"model": model, "model": model,
@@ -163,7 +166,7 @@ class Action:
"type": "notification", "type": "notification",
"data": { "data": {
"type": "error", "type": "error",
"content": "生成卡片数据失败,请重试。", "content": "Failed to generate card data, please try again.",
}, },
} }
) )
@@ -173,11 +176,6 @@ class Action:
html_card = self.generate_html_card(card_data) html_card = self.generate_html_card(card_data)
# 3. Append to message # 3. Append to message
# We append it to the user message so it shows up as part of the interaction
# Or we can append it to the assistant response if we were a Pipe, but this is an Action.
# Actions usually modify the input or trigger a side effect.
# To show the card, we can append it to the message content.
html_embed_tag = f"```html\n{html_card}\n```" html_embed_tag = f"```html\n{html_card}\n```"
body["messages"][-1]["content"] += f"\n\n{html_embed_tag}" body["messages"][-1]["content"] += f"\n\n{html_embed_tag}"
@@ -187,7 +185,7 @@ class Action:
"type": "notification", "type": "notification",
"data": { "data": {
"type": "success", "type": "success",
"content": "闪记卡生成成功!", "content": "Flash Card generated successfully!",
}, },
} }
) )
@@ -202,7 +200,7 @@ class Action:
"type": "notification", "type": "notification",
"data": { "data": {
"type": "error", "type": "error",
"content": f"生成知识卡片时出错: {str(e)}", "content": f"Error generating knowledge card: {str(e)}",
}, },
} }
) )
@@ -519,7 +517,7 @@ class Action:
# Enhanced HTML structure # Enhanced HTML structure
html = f"""<!DOCTYPE html> html = f"""<!DOCTYPE html>
<html lang="zh-CN"> <html lang="en">
<head> <head>
<meta charset="UTF-8"> <meta charset="UTF-8">
<meta name="viewport" content="width=device-width, initial-scale=1.0"> <meta name="viewport" content="width=device-width, initial-scale=1.0">
@@ -530,20 +528,20 @@ class Action:
<div class="knowledge-card"> <div class="knowledge-card">
<div class="card-inner"> <div class="card-inner">
<div class="card-header"> <div class="card-header">
<div class="card-category">{data.get('category', '通用知识')}</div> <div class="card-category">{data.get('category', 'General Knowledge')}</div>
<h2 class="card-title">{data.get('title', '知识卡片')}</h2> <h2 class="card-title">{data.get('title', 'Flash Card')}</h2>
</div> </div>
<div class="card-body"> <div class="card-body">
<div class="card-summary"> <div class="card-summary">
{data.get('summary', '')} {data.get('summary', '')}
</div> </div>
<div class="card-section-title">核心要点</div> <div class="card-section-title">Key Points</div>
<ul class="card-points"> <ul class="card-points">
{''.join([f'<li>{point}</li>' for point in data.get('key_points', [])])} {''.join([f'<li>{point}</li>' for point in data.get('key_points', [])])}
</ul> </ul>
</div> </div>
<div class="card-footer"> <div class="card-footer">
<span class="card-tag-label">标签</span> <span class="card-tag-label">Tags</span>
{''.join([f'<span class="card-tag">#{tag}</span>' for tag in data.get('tags', [])])} {''.join([f'<span class="card-tag">#{tag}</span>' for tag in data.get('tags', [])])}
</div> </div>
</div> </div>

View File

@@ -1,11 +1,11 @@
""" """
title: Flash Card title: 闪记卡 (Flash Card)
author: Antigravity author: Fu-Jie
author_url: https://github.com/open-webui author_url: https://github.com/Fu-Jie
funding_url: https://github.com/open-webui funding_url: https://github.com/Fu-Jie/awesome-openwebui
version: 0.2.1 version: 0.2.1
icon_url: data:image/svg+xml;base64,PHN2ZyB4bWxucz0iaHR0cDovL3d3dy53My5vcmcvMjAwMC9zdmciIHdpZHRoPSIyNCIgaGVpZ2h0PSIyNCIgdmlld0JveD0iMCAwIDI0IDI0Ij48ZGVmcz48bGluZWFyR3JhZGllbnQgaWQ9ImciIHgxPSIwIiB5MT0iMCIgeDI9IjEiIHkyPSIxIj48c3RvcCBvZmZzZXQ9IjAlIiBzdG9wLWNvbG9yPSIjRkZENzAwIi8+PHN0b3Agb2Zmc2V0PSIxMDAlIiBzdG9wLWNvbG9yPSIjRkZBNzAwIi8+PC9saW5lYXJHcmFkaWVudD48L2RlZnM+PHBhdGggZD0iTTEzIDJMMyA3djEzbDEwIDV2LTZ6IiBmaWxsPSJ1cmwoI2cpIi8+PHBhdGggZD0iTTEzIDJ2Nmw4LTN2MTNsLTggM3YtNnoiIGZpbGw9IiM2NjdlZWEiLz48cGF0aCBkPSJNMTMgMnY2bTAgNXYxMCIgc3Ryb2tlPSIjZmZmIiBzdHJva2Utd2lkdGg9IjEuNSIgc3Ryb2tlLW9wYWNpdHk9IjAuMyIvPjwvc3ZnPg== icon_url: data:image/svg+xml;base64,PHN2ZyB4bWxucz0iaHR0cDovL3d3dy53My5vcmcvMjAwMC9zdmciIHdpZHRoPSIyNCIgaGVpZ2h0PSIyNCIgdmlld0JveD0iMCAwIDI0IDI0Ij48ZGVmcz48bGluZWFyR3JhZGllbnQgaWQ9ImciIHgxPSIwIiB5MT0iMCIgeDI9IjEiIHkyPSIxIj48c3RvcCBvZmZzZXQ9IjAlIiBzdG9wLWNvbG9yPSIjRkZENzAwIi8+PHN0b3Agb2Zmc2V0PSIxMDAlIiBzdG9wLWNvbG9yPSIjRkZBNzAwIi8+PC9saW5lYXJHcmFkaWVudD48L2RlZnM+PHBhdGggZD0iTTEzIDJMMyA3djEzbDEwIDV2LTZ6IiBmaWxsPSJ1cmwoI2cpIi8+PHBhdGggZD0iTTEzIDJ2Nmw4LTN2MTNsLTggM3YtNnoiIGZpbGw9IiM2NjdlZWEiLz48cGF0aCBkPSJNMTMgMnY2bTAgNXYxMCIgc3Ryb2tlPSIjZmZmIiBzdHJva2Utd2lkdGg9IjEuNSIgc3Ryb2tlLW9wYWNpdHk9IjAuMyIvPjwvc3ZnPg==
description: Quickly generates beautiful flashcards from text, extracting key points and categories. description: 快速将文本提炼为精美的学习记忆卡片支持核心要点提取与分类
""" """
from pydantic import BaseModel, Field from pydantic import BaseModel, Field

View File

@@ -2,7 +2,7 @@
title: Smart Mind Map title: Smart Mind Map
icon_url: data:image/svg+xml;base64,PHN2ZyB4bWxucz0iaHR0cDovL3d3dy53My5vcmcvMjAwMC9zdmciIHdpZHRoPSIyNCIgaGVpZ2h0PSIyNCIgdmlld0JveD0iMCAwIDI0IDI0IiBmaWxsPSJub25lIiBzdHJva2U9ImN1cnJlbnRDb2xvciIgc3Ryb2tlLXdpZHRoPSIxLjUiIHN0cm9rZS1saW5lY2FwPSJyb3VuZCIgc3Ryb2tlLWxpbmVqb2luPSJyb3VuZCI+CiAgPGNpcmNsZSBjeD0iMTIiIGN5PSIxMiIgcj0iMyIgZmlsbD0iY3VycmVudENvbG9yIi8+CiAgPGxpbmUgeDE9IjEyIiB5MT0iOSIgeDI9IjEyIiB5Mj0iNCIvPgogIDxjaXJjbGUgY3g9IjEyIiBjeT0iMyIgcj0iMS41Ii8+CiAgPGxpbmUgeDE9IjEyIiB5MT0iMTUiIHgyPSIxMiIgeTI9IjIwIi8+CiAgPGNpcmNsZSBjeD0iMTIiIGN5PSIyMSIgcj0iMS41Ii8+CiAgPGxpbmUgeDE9IjkiIHkxPSIxMiIgeDI9IjQiIHkyPSIxMiIvPgogIDxjaXJjbGUgY3g9IjMiIGN5PSIxMiIgcj0iMS41Ii8+CiAgPGxpbmUgeDE9IjE1IiB5MT0iMTIiIHgyPSIyMCIgeTI9IjEyIi8+CiAgPGNpcmNsZSBjeD0iMjEiIGN5PSIxMiIgcj0iMS41Ii8+CiAgPGxpbmUgeDE9IjEwLjUiIHkxPSIxMC41IiB4Mj0iNiIgeTI9IjYiLz4KICA8Y2lyY2xlIGN4PSI1IiBjeT0iNSIgcj0iMS41Ii8+CiAgPGxpbmUgeDE9IjEzLjUiIHkxPSIxMC41IiB4Mj0iMTgiIHkyPSI2Ii8+CiAgPGNpcmNsZSBjeD0iMTkiIGN5PSI1IiByPSIxLjUiLz4KICA8bGluZSB4MT0iMTAuNSIgeTE9IjEzLjUiIHgyPSI2IiB5Mj0iMTgiLz4KICA8Y2lyY2xlIGN4PSI1IiBjeT0iMTkiIHI9IjEuNSIvPgogIDxsaW5lIHgxPSIxMy41IiB5MT0iMTMuNSIgeDI9IjE4IiB5Mj0iMTgiLz4KICA8Y2lyY2xlIGN4PSIxOSIgY3k9IjE5IiByPSIxLjUiLz4KPC9zdmc+ icon_url: data:image/svg+xml;base64,PHN2ZyB4bWxucz0iaHR0cDovL3d3dy53My5vcmcvMjAwMC9zdmciIHdpZHRoPSIyNCIgaGVpZ2h0PSIyNCIgdmlld0JveD0iMCAwIDI0IDI0IiBmaWxsPSJub25lIiBzdHJva2U9ImN1cnJlbnRDb2xvciIgc3Ryb2tlLXdpZHRoPSIxLjUiIHN0cm9rZS1saW5lY2FwPSJyb3VuZCIgc3Ryb2tlLWxpbmVqb2luPSJyb3VuZCI+CiAgPGNpcmNsZSBjeD0iMTIiIGN5PSIxMiIgcj0iMyIgZmlsbD0iY3VycmVudENvbG9yIi8+CiAgPGxpbmUgeDE9IjEyIiB5MT0iOSIgeDI9IjEyIiB5Mj0iNCIvPgogIDxjaXJjbGUgY3g9IjEyIiBjeT0iMyIgcj0iMS41Ii8+CiAgPGxpbmUgeDE9IjEyIiB5MT0iMTUiIHgyPSIxMiIgeTI9IjIwIi8+CiAgPGNpcmNsZSBjeD0iMTIiIGN5PSIyMSIgcj0iMS41Ii8+CiAgPGxpbmUgeDE9IjkiIHkxPSIxMiIgeDI9IjQiIHkyPSIxMiIvPgogIDxjaXJjbGUgY3g9IjMiIGN5PSIxMiIgcj0iMS41Ii8+CiAgPGxpbmUgeDE9IjE1IiB5MT0iMTIiIHgyPSIyMCIgeTI9IjEyIi8+CiAgPGNpcmNsZSBjeD0iMjEiIGN5PSIxMiIgcj0iMS41Ii8+CiAgPGxpbmUgeDE9IjEwLjUiIHkxPSIxMC41IiB4Mj0iNiIgeTI9IjYiLz4KICA8Y2lyY2xlIGN4PSI1IiBjeT0iNSIgcj0iMS41Ii8+CiAgPGxpbmUgeDE9IjEzLjUiIHkxPSIxMC41IiB4Mj0iMTgiIHkyPSI2Ii8+CiAgPGNpcmNsZSBjeD0iMTkiIGN5PSI1IiByPSIxLjUiLz4KICA8bGluZSB4MT0iMTAuNSIgeTE9IjEzLjUiIHgyPSI2IiB5Mj0iMTgiLz4KICA8Y2lyY2xlIGN4PSI1IiBjeT0iMTkiIHI9IjEuNSIvPgogIDxsaW5lIHgxPSIxMy41IiB5MT0iMTMuNSIgeDI9IjE4IiB5Mj0iMTgiLz4KICA8Y2lyY2xlIGN4PSIxOSIgY3k9IjE5IiByPSIxLjUiLz4KPC9zdmc+
version: 0.7.3 version: 0.7.3
description: 智能分析长文本并生成交互式思维导图,支持 SVG/Markdown 导出。 description: Intelligently analyzes long texts and generates interactive mind maps, supporting SVG/Markdown export.
""" """
from pydantic import BaseModel, Field from pydantic import BaseModel, Field

View File

@@ -1,8 +1,8 @@
""" """
title: Deep Reading & Summary title: Deep Reading & Summary
author: Antigravity author: Fu-Jie
author_url: https://github.com/open-webui author_url: https://github.com/Fu-Jie
funding_url: https://github.com/open-webui funding_url: https://github.com/Fu-Jie/awesome-openwebui
version: 0.1.0 version: 0.1.0
icon_url: data:image/svg+xml;base64,PHN2ZyB4bWxucz0iaHR0cDovL3d3dy53My5vcmcvMjAwMC9zdmciIHdpZHRoPSIyNCIgaGVpZ2h0PSIyNCIgdmlld0JveD0iMCAwIDI0IDI0IiBmaWxsPSJub25lIiBzdHJva2U9ImN1cnJlbnRDb2xvciIgc3Ryb2tlLXdpZHRoPSIyIiBzdHJva2UtbGluZWNhcD0icm91bmQiIHN0cm9rZS1saW5lam9pbj0icm91bmQiPjxwYXRoIGQ9Ik0yIDNIMGEyIDIgMCAwIDAgMiAyIi8+PHBhdGggZD0iTTIyIDNIMjBhMiAyIDAgMCAwLTIgMiIvPjxwYXRoIGQ9Ik0yIDdoMjB2MTRhMiAyIDAgMCAxLTIgMmgtMTZhMiAyIDAgMCAxLTItMnYtMTQiLz48cGF0aCBkPSJNMTEgMTJ2NiIvPjxwYXRoIGQ9Ik0xNiAxMnY2Ii8+PHBhdGggZD0iTTYgMTJ2NiIvPjwvc3ZnPg== icon_url: data:image/svg+xml;base64,PHN2ZyB4bWxucz0iaHR0cDovL3d3dy53My5vcmcvMjAwMC9zdmciIHdpZHRoPSIyNCIgaGVpZ2h0PSIyNCIgdmlld0JveD0iMCAwIDI0IDI0IiBmaWxsPSJub25lIiBzdHJva2U9ImN1cnJlbnRDb2xvciIgc3Ryb2tlLXdpZHRoPSIyIiBzdHJva2UtbGluZWNhcD0icm91bmQiIHN0cm9rZS1saW5lam9pbj0icm91bmQiPjxwYXRoIGQ9Ik0yIDNIMGEyIDIgMCAwIDAgMiAyIi8+PHBhdGggZD0iTTIyIDNIMjBhMiAyIDAgMCAwLTIgMiIvPjxwYXRoIGQ9Ik0yIDdoMjB2MTRhMiAyIDAgMCAxLTIgMmgtMTZhMiAyIDAgMCAxLTItMnYtMTQiLz48cGF0aCBkPSJNMTEgMTJ2NiIvPjxwYXRoIGQ9Ik0xNiAxMnY2Ii8+PHBhdGggZD0iTTYgMTJ2NiIvPjwvc3ZnPg==
description: Provides deep reading analysis and summarization for long texts. description: Provides deep reading analysis and summarization for long texts.
@@ -28,69 +28,69 @@ logging.basicConfig(
logger = logging.getLogger(__name__) logger = logging.getLogger(__name__)
# ================================================================= # =================================================================
# 内部 LLM 提示词设计 # Internal LLM Prompts
# ================================================================= # =================================================================
SYSTEM_PROMPT_READING_ASSISTANT = """ SYSTEM_PROMPT_READING_ASSISTANT = """
你是一个专业的深度文本分析专家,擅长精读长篇文本并提炼精华。你的任务是进行全面、深入的分析。 You are a professional Deep Text Analysis Expert, specializing in reading long texts and extracting the essence. Your task is to conduct a comprehensive and in-depth analysis.
请提供以下内容: Please provide the following:
1. **详细摘要**:用 2-3 段话全面总结文本的核心内容,确保准确性和完整性。不要过于简略,要让读者充分理解文本主旨。 1. **Detailed Summary**: Summarize the core content of the text in 2-3 paragraphs, ensuring accuracy and completeness. Do not be too brief; ensure the reader fully understands the main idea.
2. **关键信息点**:列出 5-8 个最重要的事实、观点或论据。每个信息点应该: 2. **Key Information Points**: List 5-8 most important facts, viewpoints, or arguments. Each point should:
- 具体且有深度 - Be specific and insightful
- 包含必要的细节和背景 - Include necessary details and context
- 使用 Markdown 列表格式 - Use Markdown list format
3. **行动建议**:从文本中识别并提炼出具体的、可执行的行动项。每个建议应该: 3. **Actionable Advice**: Identify and refine specific, actionable items from the text. Each suggestion should:
- 明确且可操作 - Be clear and actionable
- 包含执行的优先级或时间建议 - Include execution priority or timing suggestions
- 如果没有明确的行动项,可以提供学习建议或思考方向 - If there are no clear action items, provide learning suggestions or thinking directions
请严格遵循以下指导原则: Please strictly follow these guidelines:
- **语言**:所有输出必须使用用户指定的语言。 - **Language**: All output must be in the user's specified language.
- **格式**:请严格按照以下 Markdown 格式输出,确保每个部分都有明确的标题: - **Format**: Please strictly follow the Markdown format below, ensuring each section has a clear header:
## 摘要 ## Summary
[这里是详细的摘要内容2-3段话可以使用 Markdown 进行**加粗**或*斜体*强调重点] [Detailed summary content here, 2-3 paragraphs, use Markdown **bold** or *italic* to emphasize key points]
## 关键信息点 ## Key Information Points
- [关键点1包含具体细节和背景] - [Key Point 1: Include specific details and context]
- [关键点2包含具体细节和背景] - [Key Point 2: Include specific details and context]
- [关键点3包含具体细节和背景] - [Key Point 3: Include specific details and context]
- [至少5个最多8个关键点] - [At least 5, at most 8 key points]
## 行动建议 ## Actionable Advice
- [行动项1具体、可执行包含优先级] - [Action Item 1: Specific, actionable, include priority]
- [行动项2具体、可执行包含优先级] - [Action Item 2: Specific, actionable, include priority]
- [如果没有明确行动项,提供学习建议或思考方向] - [If no clear action items, provide learning suggestions or thinking directions]
- **深度优先**:分析要深入、全面,不要浮于表面。 - **Depth First**: Analysis should be deep and comprehensive, not superficial.
- **行动导向**:重点关注可执行的建议和下一步行动。 - **Action Oriented**: Focus on actionable suggestions and next steps.
- **只输出分析结果**:不要包含任何额外的寒暄、解释或引导性文字。 - **Analysis Results Only**: Do not include any extra pleasantries, explanations, or leading text.
""" """
USER_PROMPT_GENERATE_SUMMARY = """ USER_PROMPT_GENERATE_SUMMARY = """
请对以下长篇文本进行深度分析,提供: Please conduct a deep analysis of the following long text, providing:
1. 详细的摘要2-3段话全面概括文本内容 1. Detailed Summary (2-3 paragraphs, comprehensive overview)
2. 关键信息点列表5-8个包含具体细节 2. Key Information Points List (5-8 items, including specific details)
3. 可执行的行动建议(具体、明确,包含优先级) 3. Actionable Advice (Specific, clear, including priority)
--- ---
**用户上下文信息:** **User Context:**
用户姓名: {user_name} User Name: {user_name}
当前日期时间: {current_date_time_str} Current Date/Time: {current_date_time_str}
当前星期: {current_weekday} Weekday: {current_weekday}
当前时区: {current_timezone_str} Timezone: {current_timezone_str}
用户语言: {user_language} User Language: {user_language}
--- ---
**长篇文本内容:** **Long Text Content:**
``` ```
{long_text_content} {long_text_content}
``` ```
请进行深入、全面的分析,重点关注可执行的行动建议。 Please conduct a deep and comprehensive analysis, focusing on actionable advice.
""" """
# ================================================================= # =================================================================
# 前端 HTML 模板 (Jinja2 语法) # Frontend HTML Template (Jinja2 Syntax)
# ================================================================= # =================================================================
HTML_TEMPLATE = """ HTML_TEMPLATE = """
@@ -99,7 +99,7 @@ HTML_TEMPLATE = """
<head> <head>
<meta charset="UTF-8"> <meta charset="UTF-8">
<meta name="viewport" content="width=device-width, initial-scale=1.0"> <meta name="viewport" content="width=device-width, initial-scale=1.0">
<title>精读:深度分析报告</title> <title>Deep Reading: Deep Analysis Report</title>
<style> <style>
:root { :root {
--primary-color: #4285f4; --primary-color: #4285f4;
@@ -245,29 +245,29 @@ HTML_TEMPLATE = """
<body> <body>
<div class="container"> <div class="container">
<div class="header"> <div class="header">
<h1>📖 精读:深度分析报告</h1> <h1>📖 Deep Reading: Deep Analysis Report</h1>
</div> </div>
<div class="user-context"> <div class="user-context">
<span><strong>用户:</strong> {{ user_name }}</span> <span><strong>User:</strong> {{ user_name }}</span>
<span><strong>分析时间:</strong> {{ current_date_time_str }}</span> <span><strong>Analysis Time:</strong> {{ current_date_time_str }}</span>
<span><strong>星期:</strong> {{ current_weekday }}</span> <span><strong>Weekday:</strong> {{ current_weekday }}</span>
</div> </div>
<div class="content"> <div class="content">
<div class="section summary-section"> <div class="section summary-section">
<h2><span class="icon">📝</span>详细摘要</h2> <h2><span class="icon">📝</span>Detailed Summary</h2>
<div class="html-content">{{ summary_html | safe }}</div> <div class="html-content">{{ summary_html | safe }}</div>
</div> </div>
<div class="section keypoints-section"> <div class="section keypoints-section">
<h2><span class="icon">💡</span>关键信息点</h2> <h2><span class="icon">💡</span>Key Information Points</h2>
<div class="html-content">{{ keypoints_html | safe }}</div> <div class="html-content">{{ keypoints_html | safe }}</div>
</div> </div>
<div class="section actions-section"> <div class="section actions-section">
<h2><span class="icon">🎯</span>行动建议</h2> <h2><span class="icon">🎯</span>Actionable Advice</h2>
<div class="html-content">{{ actions_html | safe }}</div> <div class="html-content">{{ actions_html | safe }}</div>
</div> </div>
</div> </div>
<div class="footer"> <div class="footer">
<p>&copy; {{ current_year }} 精读 - 深度文本分析服务</p> <p>&copy; {{ current_year }} Deep Reading - Deep Text Analysis Service</p>
</div> </div>
</div> </div>
</body> </body>
@@ -277,18 +277,20 @@ HTML_TEMPLATE = """
class Action: class Action:
class Valves(BaseModel): class Valves(BaseModel):
show_status: bool = Field( show_status: bool = Field(
default=True, description="是否在聊天界面显示操作状态更新。" default=True,
description="Whether to show operation status updates in the chat interface.",
) )
LLM_MODEL_ID: str = Field( LLM_MODEL_ID: str = Field(
default="gemini-2.5-flash", default="gemini-2.5-flash",
description="用于文本分析的内置LLM模型ID。", description="Built-in LLM Model ID used for text analysis.",
) )
MIN_TEXT_LENGTH: int = Field( MIN_TEXT_LENGTH: int = Field(
default=200, default=200,
description="进行深度分析所需的最小文本长度(字符数)。建议200字符以上。", description="Minimum text length required for deep analysis (characters). Recommended 200+.",
) )
RECOMMENDED_MIN_LENGTH: int = Field( RECOMMENDED_MIN_LENGTH: int = Field(
default=500, description="建议的最小文本长度,以获得最佳分析效果。" default=500,
description="Recommended minimum text length for best analysis results.",
) )
def __init__(self): def __init__(self):
@@ -296,16 +298,20 @@ class Action:
def _process_llm_output(self, llm_output: str) -> Dict[str, str]: def _process_llm_output(self, llm_output: str) -> Dict[str, str]:
""" """
解析LLMMarkdown输出,将其转换为HTML片段。 Parse LLM Markdown output and convert to HTML fragments.
""" """
summary_match = re.search( summary_match = re.search(
r"##\s*摘要\s*\n(.*?)(?=\n##|$)", llm_output, re.DOTALL r"##\s*Summary\s*\n(.*?)(?=\n##|$)", llm_output, re.DOTALL | re.IGNORECASE
) )
keypoints_match = re.search( keypoints_match = re.search(
r"##\s*关键信息点\s*\n(.*?)(?=\n##|$)", llm_output, re.DOTALL r"##\s*Key Information Points\s*\n(.*?)(?=\n##|$)",
llm_output,
re.DOTALL | re.IGNORECASE,
) )
actions_match = re.search( actions_match = re.search(
r"##\s*行动建议\s*\n(.*?)(?=\n##|$)", llm_output, re.DOTALL r"##\s*Actionable Advice\s*\n(.*?)(?=\n##|$)",
llm_output,
re.DOTALL | re.IGNORECASE,
) )
summary_md = summary_match.group(1).strip() if summary_match else "" summary_md = summary_match.group(1).strip() if summary_match else ""
@@ -314,24 +320,26 @@ class Action:
if not any([summary_md, keypoints_md, actions_md]): if not any([summary_md, keypoints_md, actions_md]):
summary_md = llm_output.strip() summary_md = llm_output.strip()
logger.warning("LLM输出未遵循预期的Markdown格式。将整个输出视为摘要。") logger.warning(
"LLM output did not follow expected Markdown format. Treating entire output as summary."
)
# 使用 'nl2br' 扩展将换行符 \n 转换为 <br> # Use 'nl2br' extension to convert newlines \n to <br>
md_extensions = ["nl2br"] md_extensions = ["nl2br"]
summary_html = ( summary_html = (
markdown.markdown(summary_md, extensions=md_extensions) markdown.markdown(summary_md, extensions=md_extensions)
if summary_md if summary_md
else '<p class="no-content">未能提取摘要信息。</p>' else '<p class="no-content">Failed to extract summary.</p>'
) )
keypoints_html = ( keypoints_html = (
markdown.markdown(keypoints_md, extensions=md_extensions) markdown.markdown(keypoints_md, extensions=md_extensions)
if keypoints_md if keypoints_md
else '<p class="no-content">未能提取关键信息点。</p>' else '<p class="no-content">Failed to extract key information points.</p>'
) )
actions_html = ( actions_html = (
markdown.markdown(actions_md, extensions=md_extensions) markdown.markdown(actions_md, extensions=md_extensions)
if actions_md if actions_md
else '<p class="no-content">暂无明确的行动建议。</p>' else '<p class="no-content">No explicit actionable advice.</p>'
) )
return { return {
@@ -342,7 +350,7 @@ class Action:
def _build_html(self, context: dict) -> str: def _build_html(self, context: dict) -> str:
""" """
使用 Jinja2 模板和上下文数据构建最终的HTML内容。 Build final HTML content using Jinja2 template and context data.
""" """
template = Template(HTML_TEMPLATE) template = Template(HTML_TEMPLATE)
return template.render(context) return template.render(context)
@@ -354,39 +362,39 @@ class Action:
__event_emitter__: Optional[Any] = None, __event_emitter__: Optional[Any] = None,
__request__: Optional[Request] = None, __request__: Optional[Request] = None,
) -> Optional[dict]: ) -> Optional[dict]:
logger.info("Action: 精读启动 (v2.0.0 - Deep Reading)") logger.info("Action: Deep Reading Started (v2.0.0)")
if isinstance(__user__, (list, tuple)): if isinstance(__user__, (list, tuple)):
user_language = ( user_language = (
__user__[0].get("language", "zh-CN") if __user__ else "zh-CN" __user__[0].get("language", "en-US") if __user__ else "en-US"
) )
user_name = __user__[0].get("name", "用户") if __user__[0] else "用户" user_name = __user__[0].get("name", "User") if __user__[0] else "User"
user_id = ( user_id = (
__user__[0]["id"] __user__[0]["id"]
if __user__ and "id" in __user__[0] if __user__ and "id" in __user__[0]
else "unknown_user" else "unknown_user"
) )
elif isinstance(__user__, dict): elif isinstance(__user__, dict):
user_language = __user__.get("language", "zh-CN") user_language = __user__.get("language", "en-US")
user_name = __user__.get("name", "用户") user_name = __user__.get("name", "User")
user_id = __user__.get("id", "unknown_user") user_id = __user__.get("id", "unknown_user")
now = datetime.now() now = datetime.now()
current_date_time_str = now.strftime("%Y-%m-%d %H:%M:%S") current_date_time_str = now.strftime("%Y-%m-%d %H:%M:%S")
current_weekday = now.strftime("%A") current_weekday = now.strftime("%A")
current_year = now.strftime("%Y") current_year = now.strftime("%Y")
current_timezone_str = "未知时区" current_timezone_str = "Unknown Timezone"
original_content = "" original_content = ""
try: try:
messages = body.get("messages", []) messages = body.get("messages", [])
if not messages or not messages[-1].get("content"): if not messages or not messages[-1].get("content"):
raise ValueError("无法获取有效的用户消息内容。") raise ValueError("Unable to get valid user message content.")
original_content = messages[-1]["content"] original_content = messages[-1]["content"]
if len(original_content) < self.valves.MIN_TEXT_LENGTH: if len(original_content) < self.valves.MIN_TEXT_LENGTH:
short_text_message = f"文本内容过短({len(original_content)}字符),建议至少{self.valves.MIN_TEXT_LENGTH}字符以获得有效的深度分析。\n\n💡 提示:对于短文本,建议使用'⚡ 闪记卡'进行快速提炼。" short_text_message = f"Text content too short ({len(original_content)} chars), recommended at least {self.valves.MIN_TEXT_LENGTH} chars for effective deep analysis.\n\n💡 Tip: For short texts, consider using '⚡ Flash Card' for quick refinement."
if __event_emitter__: if __event_emitter__:
await __event_emitter__( await __event_emitter__(
{ {
@@ -408,7 +416,7 @@ class Action:
"type": "notification", "type": "notification",
"data": { "data": {
"type": "info", "type": "info",
"content": f"文本长度为{len(original_content)}字符。建议{self.valves.RECOMMENDED_MIN_LENGTH}字符以上可获得更好的分析效果。", "content": f"Text length is {len(original_content)} chars. Recommended {self.valves.RECOMMENDED_MIN_LENGTH}+ chars for best analysis results.",
}, },
} }
) )
@@ -419,7 +427,7 @@ class Action:
"type": "notification", "type": "notification",
"data": { "data": {
"type": "info", "type": "info",
"content": "📖 精读已启动,正在进行深度分析...", "content": "📖 Deep Reading started, analyzing deeply...",
}, },
} }
) )
@@ -428,7 +436,7 @@ class Action:
{ {
"type": "status", "type": "status",
"data": { "data": {
"description": "📖 精读: 深入分析文本,提炼精华...", "description": "📖 Deep Reading: Analyzing text, extracting essence...",
"done": False, "done": False,
}, },
} }
@@ -454,7 +462,7 @@ class Action:
user_obj = Users.get_user_by_id(user_id) user_obj = Users.get_user_by_id(user_id)
if not user_obj: if not user_obj:
raise ValueError(f"无法获取用户对象, 用户ID: {user_id}") raise ValueError(f"Unable to get user object, User ID: {user_id}")
llm_response = await generate_chat_completion( llm_response = await generate_chat_completion(
__request__, llm_payload, user_obj __request__, llm_payload, user_obj
@@ -482,7 +490,10 @@ class Action:
await __event_emitter__( await __event_emitter__(
{ {
"type": "status", "type": "status",
"data": {"description": "📖 精读: 分析完成!", "done": True}, "data": {
"description": "📖 Deep Reading: Analysis complete!",
"done": True,
},
} }
) )
await __event_emitter__( await __event_emitter__(
@@ -490,18 +501,18 @@ class Action:
"type": "notification", "type": "notification",
"data": { "data": {
"type": "success", "type": "success",
"content": f"📖 精读完成,{user_name}!深度分析报告已生成。", "content": f"📖 Deep Reading complete, {user_name}! Deep analysis report generated.",
}, },
} }
) )
except Exception as e: except Exception as e:
error_message = f"精读处理失败: {str(e)}" error_message = f"Deep Reading processing failed: {str(e)}"
logger.error(f"精读错误: {error_message}", exc_info=True) logger.error(f"Deep Reading Error: {error_message}", exc_info=True)
user_facing_error = f"抱歉, 精读在处理时遇到错误: {str(e)}\n请检查Open WebUI后端日志获取更多详情。" user_facing_error = f"Sorry, Deep Reading encountered an error while processing: {str(e)}.\nPlease check Open WebUI backend logs for more details."
body["messages"][-1][ body["messages"][-1][
"content" "content"
] = f"{original_content}\n\n❌ **错误:** {user_facing_error}" ] = f"{original_content}\n\n❌ **Error:** {user_facing_error}"
if __event_emitter__: if __event_emitter__:
if self.valves.show_status: if self.valves.show_status:
@@ -509,7 +520,7 @@ class Action:
{ {
"type": "status", "type": "status",
"data": { "data": {
"description": "精读: 处理失败。", "description": "Deep Reading: Processing failed.",
"done": True, "done": True,
}, },
} }
@@ -519,7 +530,7 @@ class Action:
"type": "notification", "type": "notification",
"data": { "data": {
"type": "error", "type": "error",
"content": f"精读处理失败, {user_name}!", "content": f"Deep Reading processing failed, {user_name}!",
}, },
} }
) )

View File

@@ -16,6 +16,8 @@
- ✅ **灵活保留策略**: 可自由配置保留对话头部和尾部的消息数量,确保关键信息和上下文的连贯性。 - ✅ **灵活保留策略**: 可自由配置保留对话头部和尾部的消息数量,确保关键信息和上下文的连贯性。
- ✅ **智能注入**: 将生成的历史摘要智能地注入到新的上下文中。 - ✅ **智能注入**: 将生成的历史摘要智能地注入到新的上下文中。
详细的工作原理和流程请参考 [工作流程指南](WORKFLOW_GUIDE_CN.md)。
--- ---
## 安装与配置 ## 安装与配置
@@ -49,16 +51,51 @@
您可以在过滤器的设置中调整以下参数: 您可以在过滤器的设置中调整以下参数:
### 核心参数
| 参数 | 默认值 | 描述 | | 参数 | 默认值 | 描述 |
| :--- | :--- | :--- | | :--- | :--- | :--- |
| `priority` | `10` | 过滤器执行顺序,数值越小越先执行。 | | `priority` | `10` | 过滤器执行顺序,数值越小越先执行。 |
| `compression_threshold` | `15` | 当总消息数达到此值时,将在后台触发摘要生成。 | | `compression_threshold_tokens` | `64000` | **(重要)** 当上下文总 Token 数超过此值时,将在后台触发摘要生成。建议设置为模型最大上下文窗口的 50%-70%。 |
| `keep_first` | `1` | 始终保留对话开始的 N 条消息。第一条消息通常包含重要的系统提示。 | | `max_context_tokens` | `128000` | **(重要)** 上下文的硬性上限。如果超过此值,将强制移除最早的消息(保留受保护消息除外)。防止 Token 溢出。 |
| `keep_last` | `6` | 始终保留对话末尾的 N 条消息,以确保上下文连贯。 | | `keep_first` | `1` | 始终保留对话开始的 N 条消息。第一条消息通常包含重要的系统提示或环境变量,建议至少保留 1 条。 |
| `summary_model` | `None` | 用于生成摘要的模型。**强烈建议**配置一个快速、经济的兼容模型(如 `gemini-2.5-flash`)。如果留空,将尝试使用当前对话的模型,但这可能因模型不兼容(如 Pipe 模型)而失败。 | | `keep_last` | `6` | 始终保留对话末尾的 N 条消息,以确保最近对话的连贯性。 |
| `max_summary_tokens` | `4000` | 生成摘要时允许的最大 Token 数。 |
| `summary_temperature` | `0.3` | 控制摘要生成的随机性,较低的值结果更稳定。 | ### 摘要生成配置
| `debug_mode` | `true` | 是否在日志中打印详细的调试信息。生产环境建议设为 `false`。 |
| 参数 | 默认值 | 描述 |
| :--- | :--- | :--- |
| `summary_model` | `None` | 用于生成摘要的模型 ID。**强烈建议**配置一个快速、经济且上下文窗口较大的模型(如 `gemini-2.5-flash`, `deepseek-v3`)。如果留空,将尝试使用当前对话的模型。 |
| `max_summary_tokens` | `16384` | 生成摘要时允许的最大 Token 数。 |
| `summary_temperature` | `0.1` | 控制摘要生成的随机性,较低的值结果更稳定。 |
### 高级配置
#### `model_thresholds` (模型特定阈值)
这是一个字典配置,允许您为特定的模型 ID 覆盖全局的 `compression_threshold_tokens``max_context_tokens`。这对于混合使用不同上下文窗口大小的模型非常有用。
**默认配置包含了主流模型(如 GPT-4, Claude 3.5, Gemini 1.5/2.0, Qwen 2.5/3, DeepSeek V3 等)的推荐阈值。**
**配置示例:**
```json
{
"gpt-4": {
"compression_threshold_tokens": 8000,
"max_context_tokens": 32000
},
"gemini-2.5-flash": {
"compression_threshold_tokens": 734000,
"max_context_tokens": 1048576
}
}
```
#### `debug_mode`
- **默认值**: `true`
- **描述**: 是否在 Open WebUI 的控制台日志中打印详细的调试信息(如 Token 计数、压缩进度、数据库操作等)。生产环境建议设为 `false`
--- ---
@@ -68,10 +105,10 @@
- **解决**:请确认 `DATABASE_URL` 环境变量已正确设置,并且数据库服务运行正常。 - **解决**:请确认 `DATABASE_URL` 环境变量已正确设置,并且数据库服务运行正常。
- **问题:摘要未生成** - **问题:摘要未生成**
- **解决**:检查 `compression_threshold` 是否已达到,并确认 `summary_model` 配置正确。查看日志以获取详细错误。 - **解决**:检查 `compression_threshold_tokens` 是否已达到,并确认 `summary_model` 配置正确。查看日志以获取详细错误。
- **问题:初始的系统提示丢失** - **问题:初始的系统提示丢失**
- **解决**:确保 `keep_first` 的值大于 0以保留包含重要信息的初始消息。 - **解决**:确保 `keep_first` 的值大于 0以保留包含重要信息的初始消息。
- **问题:压缩效果不明显** - **问题:压缩效果不明显**
- **解决**:尝试适当提高 `compression_threshold`,或减少 `keep_first` / `keep_last` 的值。 - **解决**:尝试适当提高 `compression_threshold_tokens`,或减少 `keep_first` / `keep_last` 的值。

View File

@@ -373,109 +373,7 @@ class Filter:
default=128000, ge=0, description="上下文的硬性上限。超过此值将强制移除最早的消息 (全局默认值)" default=128000, ge=0, description="上下文的硬性上限。超过此值将强制移除最早的消息 (全局默认值)"
) )
model_thresholds: dict = Field( model_thresholds: dict = Field(
default={ default={},
# Groq
"groq-openai/gpt-oss-20b": {"max_context_tokens": 8000, "compression_threshold_tokens": 5600},
"groq-openai/gpt-oss-120b": {"max_context_tokens": 8000, "compression_threshold_tokens": 5600},
# Qwen (ModelScope / CF)
"modelscope-Qwen/Qwen3-Coder-480B-A35B-Instruct": {"max_context_tokens": 256000, "compression_threshold_tokens": 179200},
"cfchatqwen-qwen3-max-search": {"max_context_tokens": 262144, "compression_threshold_tokens": 183500},
"modelscope-Qwen/Qwen3-235B-A22B-Thinking-2507": {"max_context_tokens": 262144, "compression_threshold_tokens": 183500},
"cfchatqwen-qwen3-max": {"max_context_tokens": 262144, "compression_threshold_tokens": 183500},
"cfchatqwen-qwen3-vl-plus-thinking": {"max_context_tokens": 262144, "compression_threshold_tokens": 183500},
"cfchatqwen-qwen3-coder-plus-search": {"max_context_tokens": 1048576, "compression_threshold_tokens": 734000},
"cfchatqwen-qwen3-vl-plus": {"max_context_tokens": 262144, "compression_threshold_tokens": 183500},
"cfchatqwen-qwen3-coder-plus": {"max_context_tokens": 1048576, "compression_threshold_tokens": 734000},
"cfchatqwen-qwen3-omni-flash-thinking": {"max_context_tokens": 65536, "compression_threshold_tokens": 45875},
"cfchatqwen-qwen3-omni-flash": {"max_context_tokens": 65536, "compression_threshold_tokens": 45875},
"cfchatqwen-qwen3-next-80b-a3b-thinking": {"max_context_tokens": 262144, "compression_threshold_tokens": 183500},
"modelscope-Qwen/Qwen3-VL-235B-A22B-Instruct": {"max_context_tokens": 262144, "compression_threshold_tokens": 183500},
"cfchatqwen-qwen3-next-80b-a3b-thinking-search": {"max_context_tokens": 262144, "compression_threshold_tokens": 183500},
"cfchatqwen-qwen3-next-80b-a3b": {"max_context_tokens": 262144, "compression_threshold_tokens": 183500},
"cfchatqwen-qwen3-235b-a22b-thinking-search": {"max_context_tokens": 131072, "compression_threshold_tokens": 91750},
"cfchatqwen-qwen3-235b-a22b": {"max_context_tokens": 131072, "compression_threshold_tokens": 91750},
"cfchatqwen-qwen3-235b-a22b-thinking": {"max_context_tokens": 131072, "compression_threshold_tokens": 91750},
"cfchatqwen-qwen3-coder-flash-search": {"max_context_tokens": 262144, "compression_threshold_tokens": 183500},
"cfchatqwen-qwen3-coder-flash": {"max_context_tokens": 262144, "compression_threshold_tokens": 183500},
"cfchatqwen-qwen3-max-2025-10-30": {"max_context_tokens": 262144, "compression_threshold_tokens": 183500},
"cfchatqwen-qwen3-max-2025-10-30-thinking": {"max_context_tokens": 262144, "compression_threshold_tokens": 183500},
"cfchatqwen-qwen3-max-2025-10-30-thinking-search": {"max_context_tokens": 262144, "compression_threshold_tokens": 183500},
"modelscope-Qwen/Qwen3-235B-A22B-Instruct-2507": {"max_context_tokens": 262144, "compression_threshold_tokens": 183500},
"cfchatqwen-qwen3-vl-30b-a3b": {"max_context_tokens": 131072, "compression_threshold_tokens": 91750},
"cfchatqwen-qwen3-vl-30b-a3b-thinking": {"max_context_tokens": 131072, "compression_threshold_tokens": 91750},
# Gemini
"gemini-2.5-pro-search": {"max_context_tokens": 1048576, "compression_threshold_tokens": 734000},
"gemini-2.5-flash-search": {"max_context_tokens": 1048576, "compression_threshold_tokens": 734000},
"gemini-2.5-flash": {"max_context_tokens": 1048576, "compression_threshold_tokens": 734000},
"gemini-2.5-flash-lite": {"max_context_tokens": 1048576, "compression_threshold_tokens": 734000},
"gemini-2.5-flash-lite-search": {"max_context_tokens": 1048576, "compression_threshold_tokens": 734000},
"gemini-2.5-pro": {"max_context_tokens": 1048576, "compression_threshold_tokens": 734000},
"gemini-2.0-flash-search": {"max_context_tokens": 1048576, "compression_threshold_tokens": 734000},
"gemini-2.0-flash": {"max_context_tokens": 1048576, "compression_threshold_tokens": 734000},
"gemini-2.0-flash-exp": {"max_context_tokens": 1048576, "compression_threshold_tokens": 734000},
"gemini-2.0-flash-lite": {"max_context_tokens": 1048576, "compression_threshold_tokens": 734000},
"copilot-gemini-2.5-pro": {"max_context_tokens": 128000, "compression_threshold_tokens": 89600},
"gemini-pro-latest": {"max_context_tokens": 1048576, "compression_threshold_tokens": 734000},
"gemini-3-pro-preview": {"max_context_tokens": 128000, "compression_threshold_tokens": 89600},
"gemini-pro-latest-search": {"max_context_tokens": 1048576, "compression_threshold_tokens": 734000},
"gemini-flash-latest": {"max_context_tokens": 1048576, "compression_threshold_tokens": 734000},
"gemini-flash-latest-search": {"max_context_tokens": 1048576, "compression_threshold_tokens": 734000},
"gemini-flash-lite-latest-search": {"max_context_tokens": 1048576, "compression_threshold_tokens": 734000},
"gemini-flash-lite-latest": {"max_context_tokens": 1048576, "compression_threshold_tokens": 734000},
"gemini-robotics-er-1.5-preview": {"max_context_tokens": 1048576, "compression_threshold_tokens": 734000},
# DeepSeek
"modelscope-deepseek-ai/DeepSeek-V3.1": {"max_context_tokens": 128000, "compression_threshold_tokens": 89600},
"cfdeepseek-deepseek-search": {"max_context_tokens": 128000, "compression_threshold_tokens": 89600},
"openrouter-deepseek/deepseek-r1-0528:free": {"max_context_tokens": 163840, "compression_threshold_tokens": 114688},
"modelscope-deepseek-ai/DeepSeek-V3.2-Exp": {"max_context_tokens": 128000, "compression_threshold_tokens": 89600},
"cfdeepseek-deepseek-r1-search": {"max_context_tokens": 128000, "compression_threshold_tokens": 89600},
"cfdeepseek-deepseek-r1": {"max_context_tokens": 128000, "compression_threshold_tokens": 89600},
"openrouter-deepseek/deepseek-chat-v3.1:free": {"max_context_tokens": 163800, "compression_threshold_tokens": 114660},
"modelscope-deepseek-ai/DeepSeek-R1-0528": {"max_context_tokens": 128000, "compression_threshold_tokens": 89600},
"cfdeepseek-deepseek": {"max_context_tokens": 128000, "compression_threshold_tokens": 89600},
# Kimi (Moonshot)
"cfkimi-kimi-k2-search": {"max_context_tokens": 128000, "compression_threshold_tokens": 89600},
"cfkimi-kimi-k1.5-search": {"max_context_tokens": 128000, "compression_threshold_tokens": 89600},
"cfkimi-kimi-k1.5-thinking-search": {"max_context_tokens": 128000, "compression_threshold_tokens": 89600},
"cfkimi-kimi-research": {"max_context_tokens": 128000, "compression_threshold_tokens": 89600},
"openrouter-moonshotai/kimi-k2:free": {"max_context_tokens": 32768, "compression_threshold_tokens": 22937},
"cfkimi-kimi-k2": {"max_context_tokens": 128000, "compression_threshold_tokens": 89600},
"cfkimi-kimi-k1.5": {"max_context_tokens": 128000, "compression_threshold_tokens": 89600},
# GPT / OpenAI
"gpt-4.1": {"max_context_tokens": 128000, "compression_threshold_tokens": 89600},
"gpt-4o": {"max_context_tokens": 64000, "compression_threshold_tokens": 44800},
"gpt-5": {"max_context_tokens": 128000, "compression_threshold_tokens": 89600},
"github-gpt-4.1": {"max_context_tokens": 7500, "compression_threshold_tokens": 5250},
"gpt-5-mini": {"max_context_tokens": 128000, "compression_threshold_tokens": 89600},
"gpt-5.1": {"max_context_tokens": 128000, "compression_threshold_tokens": 89600},
"gpt-5.1-codex": {"max_context_tokens": 128000, "compression_threshold_tokens": 89600},
"gpt-5.1-codex-mini": {"max_context_tokens": 128000, "compression_threshold_tokens": 89600},
"gpt-5-codex": {"max_context_tokens": 200000, "compression_threshold_tokens": 140000},
"github-gpt-4.1-mini": {"max_context_tokens": 7500, "compression_threshold_tokens": 5250},
"openrouter-openai/gpt-oss-20b:free": {"max_context_tokens": 128000, "compression_threshold_tokens": 89600},
# Claude / Anthropic
"claude-sonnet-4.5": {"max_context_tokens": 128000, "compression_threshold_tokens": 89600},
"claude-haiku-4.5": {"max_context_tokens": 128000, "compression_threshold_tokens": 89600},
"copilot-claude-opus-41": {"max_context_tokens": 80000, "compression_threshold_tokens": 56000},
"copilot-claude-sonnet-4": {"max_context_tokens": 80000, "compression_threshold_tokens": 56000},
# Other / OpenRouter / OSWE
"oswe-vscode-insiders": {"max_context_tokens": 256000, "compression_threshold_tokens": 179200},
"modelscope-MiniMax/MiniMax-M2": {"max_context_tokens": 204800, "compression_threshold_tokens": 143360},
"oswe-vscode-prime": {"max_context_tokens": 200000, "compression_threshold_tokens": 140000},
"grok-code-fast-1": {"max_context_tokens": 128000, "compression_threshold_tokens": 89600},
"copilot-auto": {"max_context_tokens": 128000, "compression_threshold_tokens": 89600},
"modelscope-ZhipuAI/GLM-4.6": {"max_context_tokens": 32000, "compression_threshold_tokens": 22400},
"openrouter-x-ai/grok-4.1-fast:free": {"max_context_tokens": 2000000, "compression_threshold_tokens": 1400000},
"openrouter-qwen/qwen3-coder:free": {"max_context_tokens": 262000, "compression_threshold_tokens": 183400},
"openrouter-qwen/qwen3-235b-a22b:free": {"max_context_tokens": 40960, "compression_threshold_tokens": 28672},
},
description="针对特定模型的阈值覆盖配置。仅包含需要特殊配置的模型。" description="针对特定模型的阈值覆盖配置。仅包含需要特殊配置的模型。"
) )

View File

@@ -1,45 +0,0 @@
需求文档:异步上下文压缩插件优化 (Async Context Compression Optimization)
1. 核心目标 将现有的基于消息数量的压缩逻辑升级为基于 Token 数量的压缩逻辑,并引入递归摘要机制,以更精准地控制上下文窗口,提高摘要质量,并防止历史信息丢失。
2. 功能需求
Token 计数与阈值控制
引入 tiktoken: 使用 tiktoken 库进行精确的 Token 计数。如果环境不支持,则回退到字符估算 (1 token ≈ 4 chars)。
新配置参数 (Valves):
compression_threshold_tokens (默认: 64000): 当上下文总 Token 数超过此值时,触发压缩(生成摘要)。
max_context_tokens (默认: 128000): 上下文的硬性上限。如果超过此值,强制移除最早的消息(保留受保护消息除外)。
model_thresholds (字典): 支持针对不同模型 ID 配置不同的阈值。例如:{'gpt-4': {'compression_threshold_tokens': 8000, ...}}。
废弃旧参数: compression_threshold (基于消息数) 将被标记为废弃,优先使用 Token 阈值。
递归摘要 (Recursive Summarization)
机制: 在生成新摘要时,必须读取并包含上一次的摘要。
逻辑: 新摘要 = LLM(上一次摘要 + 新产生的对话消息)。
目的: 防止随着对话进行,最早期的摘要信息被丢弃,确保长期记忆的连续性。
消息保护与修剪策略
保护机制: keep_first (保留头部 N 条) 和 keep_last (保留尾部 N 条) 的消息绝对不参与压缩,也不被移除。
修剪逻辑: 当触发 max_context_tokens 限制时,优先移除 keep_first 之后、keep_last 之前的最早消息。
优化的提示词 (Prompt Engineering)
目标: 去除无用信息(寒暄、重复),保留关键信号(事实、代码、决策)。
指令:
提炼与净化: 明确要求移除噪音。
关键保留: 强调代码片段必须逐字保留。
合并与更新: 明确指示将新信息合并到旧摘要中。
语言一致性: 输出语言必须与对话语言保持一致。
3. 实现细节
文件:
async_context_compression.py
类:
Filter
关键方法:
_count_tokens(text): 实现 Token 计数。
_calculate_messages_tokens(messages): 计算消息列表总 Token。
_generate_summary_async(...)
: 修改为加载旧摘要,并传入 LLM。
_call_summary_llm(...)
: 更新 Prompt接受 previous_summary 和 new_messages。
inlet(...)
:
使用 compression_threshold_tokens 判断是否注入摘要。
实现 max_context_tokens 的强制修剪逻辑。
outlet(...)
: 使用 compression_threshold_tokens 判断是否触发后台摘要任务。