feat: 更新插件作者信息并将深度阅读插件本地化为英文
This commit is contained in:
@@ -1,9 +1,11 @@
|
||||
"""
|
||||
title: 导出到Excel
|
||||
title: Export to Excel
|
||||
author: Fu-Jie
|
||||
description: 从最后一条AI回答消息中提取Markdown表格到Excel文件,并在浏览器中触发下载。支持多表并自动根据标题命名
|
||||
icon_url: data:image/svg+xml;base64,PD94bWwgdmVyc2lvbj0iMS4wIiBlbmNvZGluZz0idXRmLTgiPz48IS0tIFVwbG9hZGVkIHRvOiBTVkcgUmVwbywgd3d3LnN2Z3JlcG8uY29tLCBHZW5lcmF0b3I6IFNWRyBSZXBvIE1peGVyIFRvb2xzIC0tPgo8c3ZnIHdpZHRoPSI4MDBweCIgaGVpZ2h0PSI4MDBweCIgdmlld0JveD0iMCAtMS4yNyAxMTAuMDM3IDExMC4wMzciIHhtbG5zPSJodHRwOi8vd3d3LnczLm9yZy8yMDAwL3N2ZyI+PHBhdGggZD0iTTU3LjU1IDBoNy40MjV2MTBjMTIuNTEzIDAgMjUuMDI1LjAyNSAzNy41MzctLjAzOCAyLjExMy4wODcgNC40MzgtLjA2MiA2LjI3NSAxLjIgMS4yODcgMS44NSAxLjEzOCA0LjIgMS4yMjUgNi4zMjUtLjA2MiAyMS43LS4wMzcgNDMuMzg4LS4wMjQgNjUuMDc1LS4wNjIgMy42MzguMzM3IDcuMzUtLjQyNSAxMC45MzgtLjUgMi42LTMuNjI1IDIuNjYyLTUuNzEzIDIuNzUtMTIuOTUuMDM3LTI1LjkxMi0uMDI1LTM4Ljg3NSAwdjExLjI1aC03Ljc2M2MtMTkuMDUtMy40NjMtMzguMTM4LTYuNjYyLTU3LjIxMi0xMFYxMC4wMTNDMTkuMTg4IDYuNjc1IDM4LjM3NSAzLjM4OCA1Ny41NSAweiIgZmlsbD0iIzIwNzI0NSIvPjxwYXRoIGQ9Ik02NC45NzUgMTMuNzVoNDEuMjVWOTIuNWgtNDEuMjVWODVoMTB2LTguNzVoLTEwdi01aDEwVjYyLjVoLTEwdi01aDEwdi04Ljc1aC0xMHYtNWgxMFYzNWgtMTB2LTVoMTB2LTguNzVoLTEwdi03LjV6IiBmaWxsPSIjZmZmZmZmIi8+PHBhdGggZD0iTTc5Ljk3NSAyMS4yNWgxNy41VjMwaC0xNy41di04Ljc1eiIgZmlsbD0iIzIwNzI0NSIvPjxwYXRoIGQ9Ik0zNy4wMjUgMzIuOTYyYzIuODI1LS4yIDUuNjYzLS4zNzUgOC41LS41MTJhMjYwNy4zNDQgMjYwNy4zNDQgMCAwIDEtMTAuMDg3IDIwLjQ4N2MzLjQzOCA3IDYuOTQ5IDEzLjk1IDEwLjM5OSAyMC45NSBhNzE2LjI4IDcxNi4yOCAwIDAgMS05LjAyNC0uNTc1Yy0yLjEyNS01LjIxMy00LjcxMy0xMC4yNS02LjIzOC0xNS43Yy0xLjY5OSA1LjA3NS00LjEyNSA5Ljg2Mi02LjA3NCAxNC44MzgtMi43MzgtLjAzOC01LjQ3Ni0uMTUtOC4yMTMtLjI2M0MxOS41IDY1LjkgMjIuNiA1OS41NjIgMjUuOTEyIDUzLjMxMmMtMi44MTItNi40MzgtNS45LTEyLjc1LTguOC0xOS4xNSAyLjc1LS4xNjMgNS41LS4zMjUgOC4yNS0uNDc1IDEuODYyIDQuODg4IDMuODk5IDkuNzEyIDUuNDM4IDE0LjcyNSAxLjY0OS01LjMxMiA0LjExMi0xMC4zMTIgNi4yMjUtMTUuNDV6IiBmaWxsPSIjZmZmZmZmIi8+PHBhdGggZD0iTTc5Ljk3NSAzNWgxNy41djguNzVoLTE3LjVWMzV6TTc5Ljk3NSA0OC43NWgxNy41djguNzVoLTE3LjV2LTguNzV6TTc5Ljk3NSA2Mi41aDE3LjV2OC43NWgtMTcuNVY2Mi41ek03OS45NzUgNzYuMjVoMTcuNVY4NWgtMTcuNXYtOC43NXoiIGZpbGw9IiMyMDcyNDUiLz48L3N2Zz4=
|
||||
version: 0.1.0
|
||||
author_url: https://github.com/Fu-Jie
|
||||
funding_url: https://github.com/Fu-Jie/awesome-openwebui
|
||||
version: 0.3.3
|
||||
icon_url: data:image/svg+xml;base64,PHN2ZyB4bWxucz0iaHR0cDovL3d3dy53My5vcmcvMjAwMC9zdmciIHdpZHRoPSIyNCIgaGVpZ2h0PSIyNCIgdmlld0JveD0iMCAwIDI0IDI0IiBmaWxsPSJub25lIiBzdHJva2U9ImN1cnJlbnRDb2xvciIgc3Ryb2tlLXdpZHRoPSIyIiBzdHJva2UtbGluZWNhcD0icm91bmQiIHN0cm9rZS1saW5lam9pbj0icm91bmQiPjxwYXRoIGQ9Ik0xNCAyaDZhMiAyIDAgMCAxIDIgMnYxNmEyIDAgMCAwIDEtMiAySDZhMiAyIDAgMCAxLTItMnYtNSIvPjxwb2x5bGluZSBwb2ludHM9IjE0IDIgMTQgOCAyMCA4Ii8+PHBhdGggZD0iTTE2IDEzdjgiLz48cGF0aCBkPSJNOCAxM3Y4Ii8+PHBhdGggZD0iTTEyIDEzdjgiLz48cGF0aCBkPSJNMTYgMTdoLTgiLz48cGF0aCBkPSJNMTYgMjFoLTgiLz48cGF0aCBkPSJNMTYgMTNoLTgiLz48L3N2Zz4=
|
||||
description: Exports the current chat history to an Excel (.xlsx) file, with automatic header extraction.
|
||||
"""
|
||||
|
||||
import os
|
||||
@@ -37,17 +39,17 @@ class Action:
|
||||
print(f"action:{__name__}")
|
||||
if isinstance(__user__, (list, tuple)):
|
||||
user_language = (
|
||||
__user__[0].get("language", "zh-CN") if __user__ else "zh-CN"
|
||||
__user__[0].get("language", "en-US") if __user__ else "en-US"
|
||||
)
|
||||
user_name = __user__[0].get("name", "用户") if __user__[0] else "用户"
|
||||
user_name = __user__[0].get("name", "User") if __user__[0] else "User"
|
||||
user_id = (
|
||||
__user__[0]["id"]
|
||||
if __user__ and "id" in __user__[0]
|
||||
else "unknown_user"
|
||||
)
|
||||
elif isinstance(__user__, dict):
|
||||
user_language = __user__.get("language", "zh-CN")
|
||||
user_name = __user__.get("name", "用户")
|
||||
user_language = __user__.get("language", "en-US")
|
||||
user_name = __user__.get("name", "User")
|
||||
user_id = __user__.get("id", "unknown_user")
|
||||
|
||||
if __event_emitter__:
|
||||
@@ -56,7 +58,7 @@ class Action:
|
||||
await __event_emitter__(
|
||||
{
|
||||
"type": "status",
|
||||
"data": {"description": "正在保存到文件...", "done": False},
|
||||
"data": {"description": "Saving to file...", "done": False},
|
||||
}
|
||||
)
|
||||
|
||||
@@ -65,18 +67,18 @@ class Action:
|
||||
tables = self.extract_tables_from_message(message_content)
|
||||
|
||||
if not tables:
|
||||
raise HTTPException(status_code=400, detail="未找到任何表格。")
|
||||
raise HTTPException(status_code=400, detail="No tables found.")
|
||||
|
||||
# 获取动态文件名和sheet名称
|
||||
# Get dynamic filename and sheet names
|
||||
workbook_name, sheet_names = self.generate_names_from_content(
|
||||
message_content, tables
|
||||
)
|
||||
|
||||
# 使用优化后的文件名生成逻辑
|
||||
# Use optimized filename generation logic
|
||||
current_datetime = datetime.datetime.now()
|
||||
formatted_date = current_datetime.strftime("%Y%m%d")
|
||||
|
||||
# 如果没找到标题则使用 user_yyyymmdd 格式
|
||||
# If no title found, use user_yyyymmdd format
|
||||
if not workbook_name:
|
||||
workbook_name = f"{user_name}_{formatted_date}"
|
||||
|
||||
@@ -87,10 +89,10 @@ class Action:
|
||||
|
||||
os.makedirs(os.path.dirname(excel_file_path), exist_ok=True)
|
||||
|
||||
# 保存表格到Excel(使用符合中国规范的格式化功能)
|
||||
# Save tables to Excel (using enhanced formatting)
|
||||
self.save_tables_to_excel_enhanced(tables, excel_file_path, sheet_names)
|
||||
|
||||
# 触发文件下载
|
||||
# Trigger file download
|
||||
if __event_call__:
|
||||
with open(excel_file_path, "rb") as file:
|
||||
file_content = file.read()
|
||||
@@ -121,7 +123,7 @@ class Action:
|
||||
URL.revokeObjectURL(url);
|
||||
document.body.removeChild(a);
|
||||
}} catch (error) {{
|
||||
console.error('触发下载时出错:', error);
|
||||
console.error('Error triggering download:', error);
|
||||
}}
|
||||
"""
|
||||
},
|
||||
@@ -130,15 +132,15 @@ class Action:
|
||||
await __event_emitter__(
|
||||
{
|
||||
"type": "status",
|
||||
"data": {"description": "输出已保存", "done": True},
|
||||
"data": {"description": "File saved", "done": True},
|
||||
}
|
||||
)
|
||||
|
||||
# 清理临时文件
|
||||
# Clean up temp file
|
||||
if os.path.exists(excel_file_path):
|
||||
os.remove(excel_file_path)
|
||||
|
||||
return {"message": "下载事件已触发"}
|
||||
return {"message": "Download triggered"}
|
||||
|
||||
except HTTPException as e:
|
||||
print(f"Error processing tables: {str(e.detail)}")
|
||||
@@ -146,13 +148,13 @@ class Action:
|
||||
{
|
||||
"type": "status",
|
||||
"data": {
|
||||
"description": f"保存文件时出错: {e.detail}",
|
||||
"description": f"Error saving file: {e.detail}",
|
||||
"done": True,
|
||||
},
|
||||
}
|
||||
)
|
||||
await self._send_notification(
|
||||
__event_emitter__, "error", "没有找到可以导出的表格!"
|
||||
__event_emitter__, "error", "No tables found to export!"
|
||||
)
|
||||
raise e
|
||||
except Exception as e:
|
||||
@@ -161,22 +163,22 @@ class Action:
|
||||
{
|
||||
"type": "status",
|
||||
"data": {
|
||||
"description": f"保存文件时出错: {str(e)}",
|
||||
"description": f"Error saving file: {str(e)}",
|
||||
"done": True,
|
||||
},
|
||||
}
|
||||
)
|
||||
await self._send_notification(
|
||||
__event_emitter__, "error", "没有找到可以导出的表格!"
|
||||
__event_emitter__, "error", "No tables found to export!"
|
||||
)
|
||||
|
||||
def extract_tables_from_message(self, message: str) -> List[Dict]:
|
||||
"""
|
||||
从消息文本中提取Markdown表格及位置信息
|
||||
返回结构: [{
|
||||
"data": 表格数据,
|
||||
"start_line": 起始行号,
|
||||
"end_line": 结束行号
|
||||
Extract Markdown tables and their positions from message text
|
||||
Returns structure: [{
|
||||
"data": table data,
|
||||
"start_line": start line number,
|
||||
"end_line": end line number
|
||||
}]
|
||||
"""
|
||||
table_row_pattern = r"^\s*\|.*\|.*\s*$"
|
||||
@@ -190,17 +192,17 @@ class Action:
|
||||
current_line += 1
|
||||
if re.search(table_row_pattern, row):
|
||||
if start_line is None:
|
||||
start_line = current_line # 记录表格起始行
|
||||
start_line = current_line # Record table start line
|
||||
|
||||
# 处理表格行
|
||||
# Process table row
|
||||
cells = [cell.strip() for cell in row.strip().strip("|").split("|")]
|
||||
|
||||
# 跳过分隔行
|
||||
# Skip separator row
|
||||
is_separator_row = all(re.fullmatch(r"[:\-]+", cell) for cell in cells)
|
||||
if not is_separator_row:
|
||||
current_table.append(cells)
|
||||
elif current_table:
|
||||
# 表格结束
|
||||
# Table ends
|
||||
tables.append(
|
||||
{
|
||||
"data": current_table,
|
||||
@@ -211,7 +213,7 @@ class Action:
|
||||
current_table = []
|
||||
start_line = None
|
||||
|
||||
# 处理最后一个表格
|
||||
# Process the last table
|
||||
if current_table:
|
||||
tables.append(
|
||||
{
|
||||
@@ -225,106 +227,106 @@ class Action:
|
||||
|
||||
def generate_names_from_content(self, content: str, tables: List[Dict]) -> tuple:
|
||||
"""
|
||||
根据内容生成工作簿名称和sheet名称
|
||||
- 忽略非空段落,只使用 markdown 标题 (h1-h6)。
|
||||
- 单表格: 使用最近的标题作为工作簿和工作表名。
|
||||
- 多表格: 使用文档第一个标题作为工作簿名,各表格最近的标题作为工作表名。
|
||||
- 默认命名:
|
||||
- 工作簿: 在主流程中处理 (user_yyyymmdd.xlsx)。
|
||||
- 工作表: 表1, 表2, ...
|
||||
Generate workbook name and sheet names based on content
|
||||
- Ignore non-empty paragraphs, only use markdown headers (h1-h6).
|
||||
- Single table: Use the closest header as workbook and sheet name.
|
||||
- Multiple tables: Use the first header in the document as workbook name, and closest header for each table as sheet name.
|
||||
- Default naming:
|
||||
- Workbook: Handled in main flow (user_yyyymmdd.xlsx).
|
||||
- Sheet: Sheet1, Sheet2, ...
|
||||
"""
|
||||
lines = content.split("\n")
|
||||
workbook_name = ""
|
||||
sheet_names = []
|
||||
all_headers = []
|
||||
|
||||
# 1. 查找文档中所有 h1-h6 标题及其位置
|
||||
# 1. Find all h1-h6 headers and their positions
|
||||
for i, line in enumerate(lines):
|
||||
if re.match(r"^#{1,6}\s+", line):
|
||||
all_headers.append(
|
||||
{"text": re.sub(r"^#{1,6}\s+", "", line).strip(), "line_num": i}
|
||||
)
|
||||
|
||||
# 2. 为每个表格生成 sheet 名称
|
||||
# 2. Generate sheet name for each table
|
||||
for i, table in enumerate(tables):
|
||||
table_start_line = table["start_line"] - 1 # 转换为 0-based 索引
|
||||
table_start_line = table["start_line"] - 1 # Convert to 0-based index
|
||||
closest_header_text = None
|
||||
|
||||
# 查找当前表格上方最近的标题
|
||||
# Find closest header above current table
|
||||
candidate_headers = [
|
||||
h for h in all_headers if h["line_num"] < table_start_line
|
||||
]
|
||||
if candidate_headers:
|
||||
# 找到候选标题中行号最大的,即为最接近的
|
||||
# Find the header with the largest line number among candidates
|
||||
closest_header = max(candidate_headers, key=lambda x: x["line_num"])
|
||||
closest_header_text = closest_header["text"]
|
||||
|
||||
if closest_header_text:
|
||||
# 清理并添加找到的标题
|
||||
# Clean and add found header
|
||||
sheet_names.append(self.clean_sheet_name(closest_header_text))
|
||||
else:
|
||||
# 如果找不到标题,使用默认名称 "表{i+1}"
|
||||
sheet_names.append(f"表{i+1}")
|
||||
# If no header found, use default name "Sheet{i+1}"
|
||||
sheet_names.append(f"Sheet{i+1}")
|
||||
|
||||
# 3. 根据表格数量确定工作簿名称
|
||||
# 3. Determine workbook name based on table count
|
||||
if len(tables) == 1:
|
||||
# 单个表格: 使用其工作表名作为工作簿名 (前提是该名称不是默认的 "表1")
|
||||
if sheet_names[0] != "表1":
|
||||
# Single table: Use its sheet name as workbook name (if not default "Sheet1")
|
||||
if sheet_names[0] != "Sheet1":
|
||||
workbook_name = sheet_names[0]
|
||||
elif len(tables) > 1:
|
||||
# 多个表格: 使用文档中的第一个标题作为工作簿名
|
||||
# Multiple tables: Use the first header in the document as workbook name
|
||||
if all_headers:
|
||||
# 找到所有标题中行号最小的,即为第一个标题
|
||||
# Find header with smallest line number
|
||||
first_header = min(all_headers, key=lambda x: x["line_num"])
|
||||
workbook_name = first_header["text"]
|
||||
|
||||
# 4. 清理工作簿名称 (如果为空,主流程会使用默认名称)
|
||||
# 4. Clean workbook name (if empty, main flow will use default name)
|
||||
workbook_name = self.clean_filename(workbook_name) if workbook_name else ""
|
||||
|
||||
return workbook_name, sheet_names
|
||||
|
||||
def clean_filename(self, name: str) -> str:
|
||||
"""清理文件名中的非法字符"""
|
||||
"""Clean illegal characters in filename"""
|
||||
return re.sub(r'[\\/*?:"<>|]', "", name).strip()
|
||||
|
||||
def clean_sheet_name(self, name: str) -> str:
|
||||
"""清理sheet名称(限制31字符,去除非法字符)"""
|
||||
"""Clean sheet name (limit 31 chars, remove illegal chars)"""
|
||||
name = re.sub(r"[\\/*?[\]:]", "", name).strip()
|
||||
return name[:31] if len(name) > 31 else name
|
||||
|
||||
# ======================== 符合中国规范的格式化功能 ========================
|
||||
# ======================== Enhanced Formatting ========================
|
||||
|
||||
def calculate_text_width(self, text: str) -> float:
|
||||
"""
|
||||
计算文本显示宽度,考虑中英文字符差异
|
||||
中文字符按2个单位计算,英文字符按1个单位计算
|
||||
Calculate text display width, considering CJK characters
|
||||
CJK characters count as 2 units, others as 1 unit
|
||||
"""
|
||||
if not text:
|
||||
return 0
|
||||
|
||||
width = 0
|
||||
for char in str(text):
|
||||
# 判断是否为中文字符(包括中文标点)
|
||||
# Check if CJK character
|
||||
if "\u4e00" <= char <= "\u9fff" or "\u3000" <= char <= "\u303f":
|
||||
width += 2 # 中文字符占2个单位宽度
|
||||
width += 2
|
||||
else:
|
||||
width += 1 # 英文字符占1个单位宽度
|
||||
width += 1
|
||||
|
||||
return width
|
||||
|
||||
def calculate_text_height(self, text: str, max_width: int = 50) -> int:
|
||||
"""
|
||||
计算文本显示所需的行数
|
||||
根据换行符和文本长度计算
|
||||
Calculate required lines for text display
|
||||
Based on newlines and text length
|
||||
"""
|
||||
if not text:
|
||||
return 1
|
||||
|
||||
text = str(text)
|
||||
# 计算换行符导致的行数
|
||||
# Calculate lines from newlines
|
||||
explicit_lines = text.count("\n") + 1
|
||||
|
||||
# 计算因文本长度超出而需要的额外行数
|
||||
# Calculate extra lines from wrapping
|
||||
text_width = self.calculate_text_width(text.replace("\n", ""))
|
||||
wrapped_lines = max(
|
||||
1, int(text_width / max_width) + (1 if text_width % max_width > 0 else 0)
|
||||
@@ -334,7 +336,7 @@ class Action:
|
||||
|
||||
def get_column_letter(self, col_index: int) -> str:
|
||||
"""
|
||||
将列索引转换为Excel列字母 (A, B, C, ..., AA, AB, ...)
|
||||
Convert column index to Excel column letter (A, B, C, ..., AA, AB, ...)
|
||||
"""
|
||||
result = ""
|
||||
while col_index >= 0:
|
||||
@@ -344,44 +346,42 @@ class Action:
|
||||
|
||||
def determine_content_type(self, header: str, values: list) -> str:
|
||||
"""
|
||||
根据表头和内容智能判断数据类型,符合中国官方表格规范
|
||||
返回: 'number', 'date', 'sequence', 'text'
|
||||
Intelligently determine data type based on header and content
|
||||
Returns: 'number', 'date', 'sequence', 'text'
|
||||
"""
|
||||
header_lower = str(header).lower().strip()
|
||||
|
||||
# 检查表头关键词
|
||||
# Check header keywords
|
||||
number_keywords = [
|
||||
"数量",
|
||||
"金额",
|
||||
"价格",
|
||||
"费用",
|
||||
"成本",
|
||||
"收入",
|
||||
"支出",
|
||||
"总计",
|
||||
"小计",
|
||||
"百分比",
|
||||
"quantity",
|
||||
"amount",
|
||||
"price",
|
||||
"cost",
|
||||
"revenue",
|
||||
"expense",
|
||||
"total",
|
||||
"subtotal",
|
||||
"percentage",
|
||||
"%",
|
||||
"比例",
|
||||
"率",
|
||||
"数值",
|
||||
"分数",
|
||||
"成绩",
|
||||
"得分",
|
||||
"ratio",
|
||||
"rate",
|
||||
"value",
|
||||
"score",
|
||||
"points",
|
||||
]
|
||||
date_keywords = ["日期", "时间", "年份", "月份", "时刻", "date", "time"]
|
||||
date_keywords = ["date", "time", "year", "month", "moment"]
|
||||
sequence_keywords = [
|
||||
"序号",
|
||||
"编号",
|
||||
"号码",
|
||||
"排序",
|
||||
"次序",
|
||||
"顺序",
|
||||
"no",
|
||||
"no.",
|
||||
"id",
|
||||
"编码",
|
||||
"index",
|
||||
"rank",
|
||||
"order",
|
||||
"sequence",
|
||||
"code",
|
||||
]
|
||||
|
||||
# 检查表头
|
||||
# Check header
|
||||
for keyword in number_keywords:
|
||||
if keyword in header_lower:
|
||||
return "number"
|
||||
@@ -394,13 +394,13 @@ class Action:
|
||||
if keyword in header_lower:
|
||||
return "sequence"
|
||||
|
||||
# 检查数据内容
|
||||
# Check data content
|
||||
if not values:
|
||||
return "text"
|
||||
|
||||
sample_values = [
|
||||
str(v).strip() for v in values[:10] if str(v).strip()
|
||||
] # 取前10个非空值作为样本
|
||||
] # Use first 10 non-empty values as sample
|
||||
if not sample_values:
|
||||
return "text"
|
||||
|
||||
@@ -409,22 +409,17 @@ class Action:
|
||||
sequence_count = 0
|
||||
|
||||
for value in sample_values:
|
||||
# 检查是否为数字
|
||||
# Check if number
|
||||
try:
|
||||
float(
|
||||
value.replace(",", "")
|
||||
.replace(",", "")
|
||||
.replace("%", "")
|
||||
.replace("%", "")
|
||||
)
|
||||
float(value.replace(",", "").replace("%", ""))
|
||||
numeric_count += 1
|
||||
continue
|
||||
except ValueError:
|
||||
pass
|
||||
|
||||
# 检查是否为日期格式
|
||||
# Check if date format
|
||||
date_patterns = [
|
||||
r"\d{4}[-/年]\d{1,2}[-/月]\d{1,2}日?",
|
||||
r"\d{4}[-/]\d{1,2}[-/]\d{1,2}",
|
||||
r"\d{1,2}[-/]\d{1,2}[-/]\d{4}",
|
||||
r"\d{4}\d{2}\d{2}",
|
||||
]
|
||||
@@ -433,15 +428,15 @@ class Action:
|
||||
date_count += 1
|
||||
break
|
||||
|
||||
# 检查是否为序号格式
|
||||
# Check if sequence format
|
||||
if (
|
||||
re.match(r"^\d+$", value) and len(value) <= 4
|
||||
): # 纯数字且不超过4位,可能是序号
|
||||
): # Pure digits and <= 4 chars, likely sequence
|
||||
sequence_count += 1
|
||||
|
||||
total_count = len(sample_values)
|
||||
|
||||
# 根据比例判断类型
|
||||
# Determine type based on ratio
|
||||
if numeric_count / total_count >= 0.7:
|
||||
return "number"
|
||||
elif date_count / total_count >= 0.7:
|
||||
@@ -451,27 +446,17 @@ class Action:
|
||||
else:
|
||||
return "text"
|
||||
|
||||
def get_column_letter(self, col_index: int) -> str:
|
||||
"""
|
||||
将列索引转换为Excel列字母 (A, B, C, ..., AA, AB, ...)
|
||||
"""
|
||||
result = ""
|
||||
while col_index >= 0:
|
||||
result = chr(65 + col_index % 26) + result
|
||||
col_index = col_index // 26 - 1
|
||||
return result
|
||||
|
||||
def save_tables_to_excel_enhanced(
|
||||
self, tables: List[Dict], file_path: str, sheet_names: List[str]
|
||||
):
|
||||
"""
|
||||
符合中国官方表格规范的Excel保存功能
|
||||
Enhanced Excel saving function with standard formatting
|
||||
"""
|
||||
try:
|
||||
with pd.ExcelWriter(file_path, engine="xlsxwriter") as writer:
|
||||
workbook = writer.book
|
||||
|
||||
# 定义表头样式 - 居中对齐(符合中国规范)
|
||||
# Define header style - Center aligned
|
||||
header_format = workbook.add_format(
|
||||
{
|
||||
"bold": True,
|
||||
@@ -479,62 +464,62 @@ class Action:
|
||||
"font_color": "white",
|
||||
"bg_color": "#00abbd",
|
||||
"border": 1,
|
||||
"align": "center", # 表头居中
|
||||
"align": "center",
|
||||
"valign": "vcenter",
|
||||
"text_wrap": True,
|
||||
}
|
||||
)
|
||||
|
||||
# 文本单元格样式 - 左对齐
|
||||
# Text cell style - Left aligned
|
||||
text_format = workbook.add_format(
|
||||
{
|
||||
"border": 1,
|
||||
"align": "left", # 文本左对齐
|
||||
"align": "left",
|
||||
"valign": "vcenter",
|
||||
"text_wrap": True,
|
||||
}
|
||||
)
|
||||
|
||||
# 数值单元格样式 - 右对齐
|
||||
# Number cell style - Right aligned
|
||||
number_format = workbook.add_format(
|
||||
{"border": 1, "align": "right", "valign": "vcenter"} # 数值右对齐
|
||||
{"border": 1, "align": "right", "valign": "vcenter"}
|
||||
)
|
||||
|
||||
# 整数格式 - 右对齐
|
||||
# Integer format - Right aligned
|
||||
integer_format = workbook.add_format(
|
||||
{
|
||||
"num_format": "0",
|
||||
"border": 1,
|
||||
"align": "right", # 整数右对齐
|
||||
"align": "right",
|
||||
"valign": "vcenter",
|
||||
}
|
||||
)
|
||||
|
||||
# 小数格式 - 右对齐
|
||||
# Decimal format - Right aligned
|
||||
decimal_format = workbook.add_format(
|
||||
{
|
||||
"num_format": "0.00",
|
||||
"border": 1,
|
||||
"align": "right", # 小数右对齐
|
||||
"align": "right",
|
||||
"valign": "vcenter",
|
||||
}
|
||||
)
|
||||
|
||||
# 日期格式 - 居中对齐
|
||||
# Date format - Center aligned
|
||||
date_format = workbook.add_format(
|
||||
{
|
||||
"border": 1,
|
||||
"align": "center", # 日期居中对齐
|
||||
"align": "center",
|
||||
"valign": "vcenter",
|
||||
"text_wrap": True,
|
||||
}
|
||||
)
|
||||
|
||||
# 序号格式 - 居中对齐
|
||||
# Sequence format - Center aligned
|
||||
sequence_format = workbook.add_format(
|
||||
{
|
||||
"border": 1,
|
||||
"align": "center", # 序号居中对齐
|
||||
"align": "center",
|
||||
"valign": "vcenter",
|
||||
}
|
||||
)
|
||||
@@ -548,12 +533,12 @@ class Action:
|
||||
|
||||
print(f"Processing table {i+1} with {len(table_data)} rows")
|
||||
|
||||
# 获取sheet名称
|
||||
# Get sheet name
|
||||
sheet_name = (
|
||||
sheet_names[i] if i < len(sheet_names) else f"表{i+1}"
|
||||
sheet_names[i] if i < len(sheet_names) else f"Sheet{i+1}"
|
||||
)
|
||||
|
||||
# 创建DataFrame
|
||||
# Create DataFrame
|
||||
headers = [
|
||||
str(cell).strip()
|
||||
for cell in table_data[0]
|
||||
@@ -561,7 +546,7 @@ class Action:
|
||||
]
|
||||
if not headers:
|
||||
print(f"Warning: No valid headers found for table {i+1}")
|
||||
headers = [f"列{j+1}" for j in range(len(table_data[0]))]
|
||||
headers = [f"Col{j+1}" for j in range(len(table_data[0]))]
|
||||
|
||||
data_rows = []
|
||||
if len(table_data) > 1:
|
||||
@@ -580,14 +565,14 @@ class Action:
|
||||
|
||||
print(f"DataFrame created with columns: {list(df.columns)}")
|
||||
|
||||
# 修复pandas FutureWarning - 使用try-except替代errors='ignore'
|
||||
# Fix pandas FutureWarning
|
||||
for col in df.columns:
|
||||
try:
|
||||
df[col] = pd.to_numeric(df[col])
|
||||
except (ValueError, TypeError):
|
||||
pass
|
||||
|
||||
# 先写入数据(不包含表头)
|
||||
# Write data first (without header)
|
||||
df.to_excel(
|
||||
writer,
|
||||
sheet_name=sheet_name,
|
||||
@@ -597,8 +582,8 @@ class Action:
|
||||
)
|
||||
worksheet = writer.sheets[sheet_name]
|
||||
|
||||
# 应用符合中国规范的格式化
|
||||
self.apply_chinese_standard_formatting(
|
||||
# Apply enhanced formatting
|
||||
self.apply_enhanced_formatting(
|
||||
worksheet,
|
||||
df,
|
||||
headers,
|
||||
@@ -620,7 +605,7 @@ class Action:
|
||||
print(f"Error saving Excel file: {str(e)}")
|
||||
raise
|
||||
|
||||
def apply_chinese_standard_formatting(
|
||||
def apply_enhanced_formatting(
|
||||
self,
|
||||
worksheet,
|
||||
df,
|
||||
@@ -635,24 +620,24 @@ class Action:
|
||||
sequence_format,
|
||||
):
|
||||
"""
|
||||
应用符合中国官方表格规范的格式化
|
||||
- 表头: 居中对齐
|
||||
- 数值: 右对齐
|
||||
- 文本: 左对齐
|
||||
- 日期: 居中对齐
|
||||
- 序号: 居中对齐
|
||||
Apply enhanced formatting
|
||||
- Header: Center aligned
|
||||
- Number: Right aligned
|
||||
- Text: Left aligned
|
||||
- Date: Center aligned
|
||||
- Sequence: Center aligned
|
||||
"""
|
||||
try:
|
||||
# 1. 写入表头(居中对齐)
|
||||
print(f"Writing headers with Chinese standard alignment: {headers}")
|
||||
# 1. Write headers (Center aligned)
|
||||
print(f"Writing headers with enhanced alignment: {headers}")
|
||||
for col_idx, header in enumerate(headers):
|
||||
if header and str(header).strip():
|
||||
worksheet.write(0, col_idx, str(header).strip(), header_format)
|
||||
else:
|
||||
default_header = f"列{col_idx+1}"
|
||||
default_header = f"Col{col_idx+1}"
|
||||
worksheet.write(0, col_idx, default_header, header_format)
|
||||
|
||||
# 2. 分析每列的数据类型并应用相应格式
|
||||
# 2. Analyze column types
|
||||
column_types = {}
|
||||
for col_idx, column in enumerate(headers):
|
||||
if col_idx < len(df.columns):
|
||||
@@ -666,14 +651,14 @@ class Action:
|
||||
else:
|
||||
column_types[col_idx] = "text"
|
||||
|
||||
# 3. 写入并格式化数据(根据类型使用不同对齐方式)
|
||||
# 3. Write and format data
|
||||
for row_idx, row in df.iterrows():
|
||||
for col_idx, value in enumerate(row):
|
||||
content_type = column_types.get(col_idx, "text")
|
||||
|
||||
# 根据内容类型选择格式
|
||||
# Select format based on content type
|
||||
if content_type == "number":
|
||||
# 数值类型 - 右对齐
|
||||
# Number - Right aligned
|
||||
if pd.api.types.is_numeric_dtype(df.iloc[:, col_idx]):
|
||||
if pd.api.types.is_integer_dtype(df.iloc[:, col_idx]):
|
||||
current_format = integer_format
|
||||
@@ -691,49 +676,45 @@ class Action:
|
||||
current_format = number_format
|
||||
|
||||
elif content_type == "date":
|
||||
# 日期类型 - 居中对齐
|
||||
# Date - Center aligned
|
||||
current_format = date_format
|
||||
|
||||
elif content_type == "sequence":
|
||||
# 序号类型 - 居中对齐
|
||||
# Sequence - Center aligned
|
||||
current_format = sequence_format
|
||||
|
||||
else:
|
||||
# 文本类型 - 左对齐
|
||||
# Text - Left aligned
|
||||
current_format = text_format
|
||||
|
||||
worksheet.write(row_idx + 1, col_idx, value, current_format)
|
||||
|
||||
# 4. 自动调整列宽
|
||||
# 4. Auto-adjust column width
|
||||
for col_idx, column in enumerate(headers):
|
||||
col_letter = self.get_column_letter(col_idx)
|
||||
|
||||
# 计算表头宽度
|
||||
# Calculate header width
|
||||
header_width = self.calculate_text_width(str(column))
|
||||
|
||||
# 计算数据列的最大宽度
|
||||
# Calculate max data width
|
||||
max_data_width = 0
|
||||
if not df.empty and col_idx < len(df.columns):
|
||||
for value in df.iloc[:, col_idx]:
|
||||
value_width = self.calculate_text_width(str(value))
|
||||
max_data_width = max(max_data_width, value_width)
|
||||
|
||||
# 基础宽度:取表头和数据的最大宽度
|
||||
# Base width
|
||||
base_width = max(header_width, max_data_width)
|
||||
|
||||
# 根据内容类型调整宽度
|
||||
# Adjust width based on type
|
||||
content_type = column_types.get(col_idx, "text")
|
||||
if content_type == "sequence":
|
||||
# 序号列通常比较窄
|
||||
optimal_width = max(8, min(15, base_width + 2))
|
||||
elif content_type == "number":
|
||||
# 数值列需要额外空间显示数字
|
||||
optimal_width = max(12, min(25, base_width + 3))
|
||||
elif content_type == "date":
|
||||
# 日期列需要固定宽度
|
||||
optimal_width = max(15, min(20, base_width + 2))
|
||||
else:
|
||||
# 文本列根据内容调整
|
||||
if base_width <= 10:
|
||||
optimal_width = base_width + 3
|
||||
elif base_width <= 20:
|
||||
@@ -744,13 +725,11 @@ class Action:
|
||||
|
||||
worksheet.set_column(f"{col_letter}:{col_letter}", optimal_width)
|
||||
|
||||
# 5. 自动调整行高
|
||||
# 设置表头行高为35点
|
||||
# 5. Auto-adjust row height
|
||||
worksheet.set_row(0, 35)
|
||||
|
||||
# 设置数据行行高
|
||||
for row_idx, row in df.iterrows():
|
||||
max_row_height = 20 # 中国表格规范建议的最小行高
|
||||
max_row_height = 20
|
||||
|
||||
for col_idx, value in enumerate(row):
|
||||
if col_idx < len(headers):
|
||||
@@ -764,26 +743,24 @@ class Action:
|
||||
col_width = 15
|
||||
|
||||
cell_lines = self.calculate_text_height(str(value), col_width)
|
||||
cell_height = cell_lines * 20 # 每行20点高度,符合中国规范
|
||||
cell_height = cell_lines * 20
|
||||
|
||||
max_row_height = max(max_row_height, cell_height)
|
||||
|
||||
final_height = min(120, max_row_height)
|
||||
worksheet.set_row(row_idx + 1, final_height)
|
||||
|
||||
print(f"Successfully applied Chinese standard formatting")
|
||||
print(f"Successfully applied enhanced formatting")
|
||||
|
||||
except Exception as e:
|
||||
print(f"Warning: Failed to apply Chinese standard formatting: {str(e)}")
|
||||
# 降级到基础格式化
|
||||
print(f"Warning: Failed to apply enhanced formatting: {str(e)}")
|
||||
self.apply_basic_formatting_fallback(worksheet, df)
|
||||
|
||||
def apply_basic_formatting_fallback(self, worksheet, df):
|
||||
"""
|
||||
基础格式化降级方案
|
||||
Basic formatting fallback
|
||||
"""
|
||||
try:
|
||||
# 基础列宽调整
|
||||
for i, column in enumerate(df.columns):
|
||||
column_width = (
|
||||
max(
|
||||
@@ -798,7 +775,5 @@ class Action:
|
||||
f"{col_letter}:{col_letter}", min(60, max(10, column_width))
|
||||
)
|
||||
|
||||
print("Applied basic formatting fallback")
|
||||
|
||||
except Exception as e:
|
||||
print(f"Warning: Even basic formatting failed: {str(e)}")
|
||||
print(f"Error in basic formatting: {str(e)}")
|
||||
|
||||
@@ -1,8 +1,8 @@
|
||||
"""
|
||||
title: 导出为 Excel
|
||||
author: Antigravity
|
||||
author_url: https://github.com/open-webui
|
||||
funding_url: https://github.com/open-webui
|
||||
author: Fu-Jie
|
||||
author_url: https://github.com/Fu-Jie
|
||||
funding_url: https://github.com/Fu-Jie/awesome-openwebui
|
||||
version: 0.3.3
|
||||
icon_url: data:image/svg+xml;base64,PHN2ZyB4bWxucz0iaHR0cDovL3d3dy53My5vcmcvMjAwMC9zdmciIHdpZHRoPSIyNCIgaGVpZ2h0PSIyNCIgdmlld0JveD0iMCAwIDI0IDI0IiBmaWxsPSJub25lIiBzdHJva2U9ImN1cnJlbnRDb2xvciIgc3Ryb2tlLXdpZHRoPSIyIiBzdHJva2UtbGluZWNhcD0icm91bmQiIHN0cm9rZS1saW5lam9pbj0icm91bmQiPjxwYXRoIGQ9Ik0xNCAyaDZhMiAyIDAgMCAxIDIgMnYxNmEyIDIgMCAwIDEtMiAyaC02YTIgMiAwIDAgMS0yLTJ2LTVhMiAyIDAgMCAxLTItMnYtNSIvPjxwb2x5bGluZSBwb2ludHM9IjE0IDIgMTQgOCAyMCA4Ii8+PHBhdGggZD0iTTE2IDEzdjgiLz48cGF0aCBkPSJNOCAxM3Y4Ii8+PHBhdGggZD0iTTEyIDEzdjgiLz48cGF0aCBkPSJNMTYgMTdoLTgiLz48cGF0aCBkPSJNMTYgMjFoLTgiLz48cGF0aCBkPSJNMTYgMTNoLTgiLz48L3N2Zz4=
|
||||
description: 将当前对话历史导出为 Excel (.xlsx) 文件,支持自动提取表头。
|
||||
@@ -1,11 +1,11 @@
|
||||
"""
|
||||
title: 闪记卡 (Flash Card)
|
||||
author: Antigravity
|
||||
author_url: https://github.com/open-webui
|
||||
funding_url: https://github.com/open-webui
|
||||
title: Flash Card
|
||||
author: Fu-Jie
|
||||
author_url: https://github.com/Fu-Jie
|
||||
funding_url: https://github.com/Fu-Jie/awesome-openwebui
|
||||
version: 0.2.1
|
||||
icon_url: data:image/svg+xml;base64,PHN2ZyB4bWxucz0iaHR0cDovL3d3dy53My5vcmcvMjAwMC9zdmciIHdpZHRoPSIyNCIgaGVpZ2h0PSIyNCIgdmlld0JveD0iMCAwIDI0IDI0Ij48ZGVmcz48bGluZWFyR3JhZGllbnQgaWQ9ImciIHgxPSIwIiB5MT0iMCIgeDI9IjEiIHkyPSIxIj48c3RvcCBvZmZzZXQ9IjAlIiBzdG9wLWNvbG9yPSIjRkZENzAwIi8+PHN0b3Agb2Zmc2V0PSIxMDAlIiBzdG9wLWNvbG9yPSIjRkZBNzAwIi8+PC9saW5lYXJHcmFkaWVudD48L2RlZnM+PHBhdGggZD0iTTEzIDJMMyA3djEzbDEwIDV2LTZ6IiBmaWxsPSJ1cmwoI2cpIi8+PHBhdGggZD0iTTEzIDJ2Nmw4LTN2MTNsLTggM3YtNnoiIGZpbGw9IiM2NjdlZWEiLz48cGF0aCBkPSJNMTMgMnY2bTAgNXYxMCIgc3Ryb2tlPSIjZmZmIiBzdHJva2Utd2lkdGg9IjEuNSIgc3Ryb2tlLW9wYWNpdHk9IjAuMyIvPjwvc3ZnPg==
|
||||
description: 快速将文本提炼为精美的学习记忆卡片,支持核心要点提取与分类。
|
||||
description: Quickly generates beautiful flashcards from text, extracting key points and categories.
|
||||
"""
|
||||
|
||||
from pydantic import BaseModel, Field
|
||||
@@ -24,20 +24,23 @@ class Action:
|
||||
class Valves(BaseModel):
|
||||
model_id: str = Field(
|
||||
default="",
|
||||
description="用于生成卡片内容的模型 ID。如果为空,则使用当前模型。",
|
||||
description="Model ID used for generating card content. If empty, uses the current model.",
|
||||
)
|
||||
min_text_length: int = Field(
|
||||
default=50, description="生成闪记卡所需的最小文本长度(字符数)。"
|
||||
default=50,
|
||||
description="Minimum text length required to generate a flashcard (characters).",
|
||||
)
|
||||
max_text_length: int = Field(
|
||||
default=2000,
|
||||
description="建议的最大文本长度。超过此长度建议使用深度分析工具。",
|
||||
description="Recommended maximum text length. For longer texts, deep analysis tools are recommended.",
|
||||
)
|
||||
language: str = Field(
|
||||
default="zh", description="卡片内容的目标语言 (例如 'zh', 'en')。"
|
||||
default="en",
|
||||
description="Target language for card content (e.g., 'en', 'zh').",
|
||||
)
|
||||
show_status: bool = Field(
|
||||
default=True, description="是否在聊天界面显示状态更新。"
|
||||
default=True,
|
||||
description="Whether to show status updates in the chat interface.",
|
||||
)
|
||||
|
||||
def __init__(self):
|
||||
@@ -72,7 +75,7 @@ class Action:
|
||||
"type": "notification",
|
||||
"data": {
|
||||
"type": "warning",
|
||||
"content": f"文本过短({text_length}字符),建议至少{self.valves.min_text_length}字符。",
|
||||
"content": f"Text too short ({text_length} chars), recommended at least {self.valves.min_text_length} chars.",
|
||||
},
|
||||
}
|
||||
)
|
||||
@@ -85,7 +88,7 @@ class Action:
|
||||
"type": "notification",
|
||||
"data": {
|
||||
"type": "info",
|
||||
"content": f"文本较长({text_length}字符),建议使用'墨海拾贝'进行深度分析。",
|
||||
"content": f"Text quite long ({text_length} chars), consider using 'Deep Reading' for deep analysis.",
|
||||
},
|
||||
}
|
||||
)
|
||||
@@ -97,7 +100,7 @@ class Action:
|
||||
"type": "notification",
|
||||
"data": {
|
||||
"type": "info",
|
||||
"content": "⚡ 正在生成闪记卡...",
|
||||
"content": "⚡ Generating Flash Card...",
|
||||
},
|
||||
}
|
||||
)
|
||||
@@ -110,29 +113,29 @@ class Action:
|
||||
model = self.valves.model_id if self.valves.model_id else body.get("model")
|
||||
|
||||
system_prompt = f"""
|
||||
你是一个闪记卡生成专家,专注于创建适合学习和记忆的知识卡片。你的任务是将文本提炼成简洁、易记的学习卡片。
|
||||
You are a Flash Card Generation Expert, specializing in creating knowledge cards suitable for learning and memorization. Your task is to distill text into concise, easy-to-remember flashcards.
|
||||
|
||||
请提取以下字段,并以 JSON 格式返回:
|
||||
1. "title": 创建一个简短、精准的标题(6-12 字),突出核心概念
|
||||
2. "summary": 用一句话总结核心要义(20-40 字),要通俗易懂、便于记忆
|
||||
3. "key_points": 列出 3-5 个关键记忆点(每个 10-20 字)
|
||||
- 每个要点应该是独立的知识点
|
||||
- 使用简洁、口语化的表达
|
||||
- 避免冗长的句子
|
||||
4. "tags": 列出 2-4 个分类标签(每个 2-5 字)
|
||||
5. "category": 选择一个主分类(如:概念、技能、事实、方法等)
|
||||
Please extract the following fields and return them in JSON format:
|
||||
1. "title": Create a short, precise title (3-8 words), highlighting the core concept.
|
||||
2. "summary": Summarize the core essence in one sentence (10-25 words), making it easy to understand and remember.
|
||||
3. "key_points": List 3-5 key memory points (5-15 words each).
|
||||
- Each point should be an independent knowledge unit.
|
||||
- Use concise, conversational expression.
|
||||
- Avoid long sentences.
|
||||
4. "tags": List 2-4 classification tags (1-3 words each).
|
||||
5. "category": Choose a main category (e.g., Concept, Skill, Fact, Method, etc.).
|
||||
|
||||
目标语言: {self.valves.language}
|
||||
Target Language: {self.valves.language}
|
||||
|
||||
重要原则:
|
||||
- **极简主义**: 每个要点都要精炼到极致
|
||||
- **记忆优先**: 内容要便于记忆和回忆
|
||||
- **核心聚焦**: 只提取最核心的知识点
|
||||
- **口语化**: 使用通俗易懂的语言
|
||||
- 只返回 JSON 对象,不要包含 markdown 格式
|
||||
Important Principles:
|
||||
- **Minimalism**: Refine each point to the extreme.
|
||||
- **Memory First**: Content should be easy to memorize and recall.
|
||||
- **Core Focus**: Extract only the most core knowledge points.
|
||||
- **Conversational**: Use easy-to-understand language.
|
||||
- Return ONLY the JSON object, do not include markdown formatting.
|
||||
"""
|
||||
|
||||
prompt = f"请将以下文本提炼成一张学习记忆卡片:\n\n{target_message}"
|
||||
prompt = f"Please refine the following text into a learning flashcard:\n\n{target_message}"
|
||||
|
||||
payload = {
|
||||
"model": model,
|
||||
@@ -163,7 +166,7 @@ class Action:
|
||||
"type": "notification",
|
||||
"data": {
|
||||
"type": "error",
|
||||
"content": "生成卡片数据失败,请重试。",
|
||||
"content": "Failed to generate card data, please try again.",
|
||||
},
|
||||
}
|
||||
)
|
||||
@@ -173,11 +176,6 @@ class Action:
|
||||
html_card = self.generate_html_card(card_data)
|
||||
|
||||
# 3. Append to message
|
||||
# We append it to the user message so it shows up as part of the interaction
|
||||
# Or we can append it to the assistant response if we were a Pipe, but this is an Action.
|
||||
# Actions usually modify the input or trigger a side effect.
|
||||
# To show the card, we can append it to the message content.
|
||||
|
||||
html_embed_tag = f"```html\n{html_card}\n```"
|
||||
body["messages"][-1]["content"] += f"\n\n{html_embed_tag}"
|
||||
|
||||
@@ -187,7 +185,7 @@ class Action:
|
||||
"type": "notification",
|
||||
"data": {
|
||||
"type": "success",
|
||||
"content": "⚡ 闪记卡生成成功!",
|
||||
"content": "⚡ Flash Card generated successfully!",
|
||||
},
|
||||
}
|
||||
)
|
||||
@@ -202,7 +200,7 @@ class Action:
|
||||
"type": "notification",
|
||||
"data": {
|
||||
"type": "error",
|
||||
"content": f"生成知识卡片时出错: {str(e)}",
|
||||
"content": f"Error generating knowledge card: {str(e)}",
|
||||
},
|
||||
}
|
||||
)
|
||||
@@ -519,7 +517,7 @@ class Action:
|
||||
|
||||
# Enhanced HTML structure
|
||||
html = f"""<!DOCTYPE html>
|
||||
<html lang="zh-CN">
|
||||
<html lang="en">
|
||||
<head>
|
||||
<meta charset="UTF-8">
|
||||
<meta name="viewport" content="width=device-width, initial-scale=1.0">
|
||||
@@ -530,20 +528,20 @@ class Action:
|
||||
<div class="knowledge-card">
|
||||
<div class="card-inner">
|
||||
<div class="card-header">
|
||||
<div class="card-category">{data.get('category', '通用知识')}</div>
|
||||
<h2 class="card-title">{data.get('title', '知识卡片')}</h2>
|
||||
<div class="card-category">{data.get('category', 'General Knowledge')}</div>
|
||||
<h2 class="card-title">{data.get('title', 'Flash Card')}</h2>
|
||||
</div>
|
||||
<div class="card-body">
|
||||
<div class="card-summary">
|
||||
{data.get('summary', '')}
|
||||
</div>
|
||||
<div class="card-section-title">核心要点</div>
|
||||
<div class="card-section-title">Key Points</div>
|
||||
<ul class="card-points">
|
||||
{''.join([f'<li>{point}</li>' for point in data.get('key_points', [])])}
|
||||
</ul>
|
||||
</div>
|
||||
<div class="card-footer">
|
||||
<span class="card-tag-label">标签</span>
|
||||
<span class="card-tag-label">Tags</span>
|
||||
{''.join([f'<span class="card-tag">#{tag}</span>' for tag in data.get('tags', [])])}
|
||||
</div>
|
||||
</div>
|
||||
|
||||
@@ -1,11 +1,11 @@
|
||||
"""
|
||||
title: Flash Card
|
||||
author: Antigravity
|
||||
author_url: https://github.com/open-webui
|
||||
funding_url: https://github.com/open-webui
|
||||
title: 闪记卡 (Flash Card)
|
||||
author: Fu-Jie
|
||||
author_url: https://github.com/Fu-Jie
|
||||
funding_url: https://github.com/Fu-Jie/awesome-openwebui
|
||||
version: 0.2.1
|
||||
icon_url: data:image/svg+xml;base64,PHN2ZyB4bWxucz0iaHR0cDovL3d3dy53My5vcmcvMjAwMC9zdmciIHdpZHRoPSIyNCIgaGVpZ2h0PSIyNCIgdmlld0JveD0iMCAwIDI0IDI0Ij48ZGVmcz48bGluZWFyR3JhZGllbnQgaWQ9ImciIHgxPSIwIiB5MT0iMCIgeDI9IjEiIHkyPSIxIj48c3RvcCBvZmZzZXQ9IjAlIiBzdG9wLWNvbG9yPSIjRkZENzAwIi8+PHN0b3Agb2Zmc2V0PSIxMDAlIiBzdG9wLWNvbG9yPSIjRkZBNzAwIi8+PC9saW5lYXJHcmFkaWVudD48L2RlZnM+PHBhdGggZD0iTTEzIDJMMyA3djEzbDEwIDV2LTZ6IiBmaWxsPSJ1cmwoI2cpIi8+PHBhdGggZD0iTTEzIDJ2Nmw4LTN2MTNsLTggM3YtNnoiIGZpbGw9IiM2NjdlZWEiLz48cGF0aCBkPSJNMTMgMnY2bTAgNXYxMCIgc3Ryb2tlPSIjZmZmIiBzdHJva2Utd2lkdGg9IjEuNSIgc3Ryb2tlLW9wYWNpdHk9IjAuMyIvPjwvc3ZnPg==
|
||||
description: Quickly generates beautiful flashcards from text, extracting key points and categories.
|
||||
description: 快速将文本提炼为精美的学习记忆卡片,支持核心要点提取与分类。
|
||||
"""
|
||||
|
||||
from pydantic import BaseModel, Field
|
||||
@@ -2,7 +2,7 @@
|
||||
title: Smart Mind Map
|
||||
icon_url: data:image/svg+xml;base64,PHN2ZyB4bWxucz0iaHR0cDovL3d3dy53My5vcmcvMjAwMC9zdmciIHdpZHRoPSIyNCIgaGVpZ2h0PSIyNCIgdmlld0JveD0iMCAwIDI0IDI0IiBmaWxsPSJub25lIiBzdHJva2U9ImN1cnJlbnRDb2xvciIgc3Ryb2tlLXdpZHRoPSIxLjUiIHN0cm9rZS1saW5lY2FwPSJyb3VuZCIgc3Ryb2tlLWxpbmVqb2luPSJyb3VuZCI+CiAgPGNpcmNsZSBjeD0iMTIiIGN5PSIxMiIgcj0iMyIgZmlsbD0iY3VycmVudENvbG9yIi8+CiAgPGxpbmUgeDE9IjEyIiB5MT0iOSIgeDI9IjEyIiB5Mj0iNCIvPgogIDxjaXJjbGUgY3g9IjEyIiBjeT0iMyIgcj0iMS41Ii8+CiAgPGxpbmUgeDE9IjEyIiB5MT0iMTUiIHgyPSIxMiIgeTI9IjIwIi8+CiAgPGNpcmNsZSBjeD0iMTIiIGN5PSIyMSIgcj0iMS41Ii8+CiAgPGxpbmUgeDE9IjkiIHkxPSIxMiIgeDI9IjQiIHkyPSIxMiIvPgogIDxjaXJjbGUgY3g9IjMiIGN5PSIxMiIgcj0iMS41Ii8+CiAgPGxpbmUgeDE9IjE1IiB5MT0iMTIiIHgyPSIyMCIgeTI9IjEyIi8+CiAgPGNpcmNsZSBjeD0iMjEiIGN5PSIxMiIgcj0iMS41Ii8+CiAgPGxpbmUgeDE9IjEwLjUiIHkxPSIxMC41IiB4Mj0iNiIgeTI9IjYiLz4KICA8Y2lyY2xlIGN4PSI1IiBjeT0iNSIgcj0iMS41Ii8+CiAgPGxpbmUgeDE9IjEzLjUiIHkxPSIxMC41IiB4Mj0iMTgiIHkyPSI2Ii8+CiAgPGNpcmNsZSBjeD0iMTkiIGN5PSI1IiByPSIxLjUiLz4KICA8bGluZSB4MT0iMTAuNSIgeTE9IjEzLjUiIHgyPSI2IiB5Mj0iMTgiLz4KICA8Y2lyY2xlIGN4PSI1IiBjeT0iMTkiIHI9IjEuNSIvPgogIDxsaW5lIHgxPSIxMy41IiB5MT0iMTMuNSIgeDI9IjE4IiB5Mj0iMTgiLz4KICA8Y2lyY2xlIGN4PSIxOSIgY3k9IjE5IiByPSIxLjUiLz4KPC9zdmc+
|
||||
version: 0.7.3
|
||||
description: 智能分析长文本并生成交互式思维导图,支持 SVG/Markdown 导出。
|
||||
description: Intelligently analyzes long texts and generates interactive mind maps, supporting SVG/Markdown export.
|
||||
"""
|
||||
|
||||
from pydantic import BaseModel, Field
|
||||
|
||||
@@ -1,8 +1,8 @@
|
||||
"""
|
||||
title: Deep Reading & Summary
|
||||
author: Antigravity
|
||||
author_url: https://github.com/open-webui
|
||||
funding_url: https://github.com/open-webui
|
||||
author: Fu-Jie
|
||||
author_url: https://github.com/Fu-Jie
|
||||
funding_url: https://github.com/Fu-Jie/awesome-openwebui
|
||||
version: 0.1.0
|
||||
icon_url: data:image/svg+xml;base64,PHN2ZyB4bWxucz0iaHR0cDovL3d3dy53My5vcmcvMjAwMC9zdmciIHdpZHRoPSIyNCIgaGVpZ2h0PSIyNCIgdmlld0JveD0iMCAwIDI0IDI0IiBmaWxsPSJub25lIiBzdHJva2U9ImN1cnJlbnRDb2xvciIgc3Ryb2tlLXdpZHRoPSIyIiBzdHJva2UtbGluZWNhcD0icm91bmQiIHN0cm9rZS1saW5lam9pbj0icm91bmQiPjxwYXRoIGQ9Ik0yIDNIMGEyIDIgMCAwIDAgMiAyIi8+PHBhdGggZD0iTTIyIDNIMjBhMiAyIDAgMCAwLTIgMiIvPjxwYXRoIGQ9Ik0yIDdoMjB2MTRhMiAyIDAgMCAxLTIgMmgtMTZhMiAyIDAgMCAxLTItMnYtMTQiLz48cGF0aCBkPSJNMTEgMTJ2NiIvPjxwYXRoIGQ9Ik0xNiAxMnY2Ii8+PHBhdGggZD0iTTYgMTJ2NiIvPjwvc3ZnPg==
|
||||
description: Provides deep reading analysis and summarization for long texts.
|
||||
@@ -28,69 +28,69 @@ logging.basicConfig(
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
# =================================================================
|
||||
# 内部 LLM 提示词设计
|
||||
# Internal LLM Prompts
|
||||
# =================================================================
|
||||
|
||||
SYSTEM_PROMPT_READING_ASSISTANT = """
|
||||
你是一个专业的深度文本分析专家,擅长精读长篇文本并提炼精华。你的任务是进行全面、深入的分析。
|
||||
You are a professional Deep Text Analysis Expert, specializing in reading long texts and extracting the essence. Your task is to conduct a comprehensive and in-depth analysis.
|
||||
|
||||
请提供以下内容:
|
||||
1. **详细摘要**:用 2-3 段话全面总结文本的核心内容,确保准确性和完整性。不要过于简略,要让读者充分理解文本主旨。
|
||||
2. **关键信息点**:列出 5-8 个最重要的事实、观点或论据。每个信息点应该:
|
||||
- 具体且有深度
|
||||
- 包含必要的细节和背景
|
||||
- 使用 Markdown 列表格式
|
||||
3. **行动建议**:从文本中识别并提炼出具体的、可执行的行动项。每个建议应该:
|
||||
- 明确且可操作
|
||||
- 包含执行的优先级或时间建议
|
||||
- 如果没有明确的行动项,可以提供学习建议或思考方向
|
||||
Please provide the following:
|
||||
1. **Detailed Summary**: Summarize the core content of the text in 2-3 paragraphs, ensuring accuracy and completeness. Do not be too brief; ensure the reader fully understands the main idea.
|
||||
2. **Key Information Points**: List 5-8 most important facts, viewpoints, or arguments. Each point should:
|
||||
- Be specific and insightful
|
||||
- Include necessary details and context
|
||||
- Use Markdown list format
|
||||
3. **Actionable Advice**: Identify and refine specific, actionable items from the text. Each suggestion should:
|
||||
- Be clear and actionable
|
||||
- Include execution priority or timing suggestions
|
||||
- If there are no clear action items, provide learning suggestions or thinking directions
|
||||
|
||||
请严格遵循以下指导原则:
|
||||
- **语言**:所有输出必须使用用户指定的语言。
|
||||
- **格式**:请严格按照以下 Markdown 格式输出,确保每个部分都有明确的标题:
|
||||
## 摘要
|
||||
[这里是详细的摘要内容,2-3段话,可以使用 Markdown 进行**加粗**或*斜体*强调重点]
|
||||
Please strictly follow these guidelines:
|
||||
- **Language**: All output must be in the user's specified language.
|
||||
- **Format**: Please strictly follow the Markdown format below, ensuring each section has a clear header:
|
||||
## Summary
|
||||
[Detailed summary content here, 2-3 paragraphs, use Markdown **bold** or *italic* to emphasize key points]
|
||||
|
||||
## 关键信息点
|
||||
- [关键点1:包含具体细节和背景]
|
||||
- [关键点2:包含具体细节和背景]
|
||||
- [关键点3:包含具体细节和背景]
|
||||
- [至少5个,最多8个关键点]
|
||||
## Key Information Points
|
||||
- [Key Point 1: Include specific details and context]
|
||||
- [Key Point 2: Include specific details and context]
|
||||
- [Key Point 3: Include specific details and context]
|
||||
- [At least 5, at most 8 key points]
|
||||
|
||||
## 行动建议
|
||||
- [行动项1:具体、可执行,包含优先级]
|
||||
- [行动项2:具体、可执行,包含优先级]
|
||||
- [如果没有明确行动项,提供学习建议或思考方向]
|
||||
- **深度优先**:分析要深入、全面,不要浮于表面。
|
||||
- **行动导向**:重点关注可执行的建议和下一步行动。
|
||||
- **只输出分析结果**:不要包含任何额外的寒暄、解释或引导性文字。
|
||||
## Actionable Advice
|
||||
- [Action Item 1: Specific, actionable, include priority]
|
||||
- [Action Item 2: Specific, actionable, include priority]
|
||||
- [If no clear action items, provide learning suggestions or thinking directions]
|
||||
- **Depth First**: Analysis should be deep and comprehensive, not superficial.
|
||||
- **Action Oriented**: Focus on actionable suggestions and next steps.
|
||||
- **Analysis Results Only**: Do not include any extra pleasantries, explanations, or leading text.
|
||||
"""
|
||||
|
||||
USER_PROMPT_GENERATE_SUMMARY = """
|
||||
请对以下长篇文本进行深度分析,提供:
|
||||
1. 详细的摘要(2-3段话,全面概括文本内容)
|
||||
2. 关键信息点列表(5-8个,包含具体细节)
|
||||
3. 可执行的行动建议(具体、明确,包含优先级)
|
||||
Please conduct a deep analysis of the following long text, providing:
|
||||
1. Detailed Summary (2-3 paragraphs, comprehensive overview)
|
||||
2. Key Information Points List (5-8 items, including specific details)
|
||||
3. Actionable Advice (Specific, clear, including priority)
|
||||
|
||||
---
|
||||
**用户上下文信息:**
|
||||
用户姓名: {user_name}
|
||||
当前日期时间: {current_date_time_str}
|
||||
当前星期: {current_weekday}
|
||||
当前时区: {current_timezone_str}
|
||||
用户语言: {user_language}
|
||||
**User Context:**
|
||||
User Name: {user_name}
|
||||
Current Date/Time: {current_date_time_str}
|
||||
Weekday: {current_weekday}
|
||||
Timezone: {current_timezone_str}
|
||||
User Language: {user_language}
|
||||
---
|
||||
|
||||
**长篇文本内容:**
|
||||
**Long Text Content:**
|
||||
```
|
||||
{long_text_content}
|
||||
```
|
||||
|
||||
请进行深入、全面的分析,重点关注可执行的行动建议。
|
||||
Please conduct a deep and comprehensive analysis, focusing on actionable advice.
|
||||
"""
|
||||
|
||||
# =================================================================
|
||||
# 前端 HTML 模板 (Jinja2 语法)
|
||||
# Frontend HTML Template (Jinja2 Syntax)
|
||||
# =================================================================
|
||||
|
||||
HTML_TEMPLATE = """
|
||||
@@ -99,7 +99,7 @@ HTML_TEMPLATE = """
|
||||
<head>
|
||||
<meta charset="UTF-8">
|
||||
<meta name="viewport" content="width=device-width, initial-scale=1.0">
|
||||
<title>精读:深度分析报告</title>
|
||||
<title>Deep Reading: Deep Analysis Report</title>
|
||||
<style>
|
||||
:root {
|
||||
--primary-color: #4285f4;
|
||||
@@ -245,29 +245,29 @@ HTML_TEMPLATE = """
|
||||
<body>
|
||||
<div class="container">
|
||||
<div class="header">
|
||||
<h1>📖 精读:深度分析报告</h1>
|
||||
<h1>📖 Deep Reading: Deep Analysis Report</h1>
|
||||
</div>
|
||||
<div class="user-context">
|
||||
<span><strong>用户:</strong> {{ user_name }}</span>
|
||||
<span><strong>分析时间:</strong> {{ current_date_time_str }}</span>
|
||||
<span><strong>星期:</strong> {{ current_weekday }}</span>
|
||||
<span><strong>User:</strong> {{ user_name }}</span>
|
||||
<span><strong>Analysis Time:</strong> {{ current_date_time_str }}</span>
|
||||
<span><strong>Weekday:</strong> {{ current_weekday }}</span>
|
||||
</div>
|
||||
<div class="content">
|
||||
<div class="section summary-section">
|
||||
<h2><span class="icon">📝</span>详细摘要</h2>
|
||||
<h2><span class="icon">📝</span>Detailed Summary</h2>
|
||||
<div class="html-content">{{ summary_html | safe }}</div>
|
||||
</div>
|
||||
<div class="section keypoints-section">
|
||||
<h2><span class="icon">💡</span>关键信息点</h2>
|
||||
<h2><span class="icon">💡</span>Key Information Points</h2>
|
||||
<div class="html-content">{{ keypoints_html | safe }}</div>
|
||||
</div>
|
||||
<div class="section actions-section">
|
||||
<h2><span class="icon">🎯</span>行动建议</h2>
|
||||
<h2><span class="icon">🎯</span>Actionable Advice</h2>
|
||||
<div class="html-content">{{ actions_html | safe }}</div>
|
||||
</div>
|
||||
</div>
|
||||
<div class="footer">
|
||||
<p>© {{ current_year }} 精读 - 深度文本分析服务</p>
|
||||
<p>© {{ current_year }} Deep Reading - Deep Text Analysis Service</p>
|
||||
</div>
|
||||
</div>
|
||||
</body>
|
||||
@@ -277,18 +277,20 @@ HTML_TEMPLATE = """
|
||||
class Action:
|
||||
class Valves(BaseModel):
|
||||
show_status: bool = Field(
|
||||
default=True, description="是否在聊天界面显示操作状态更新。"
|
||||
default=True,
|
||||
description="Whether to show operation status updates in the chat interface.",
|
||||
)
|
||||
LLM_MODEL_ID: str = Field(
|
||||
default="gemini-2.5-flash",
|
||||
description="用于文本分析的内置LLM模型ID。",
|
||||
description="Built-in LLM Model ID used for text analysis.",
|
||||
)
|
||||
MIN_TEXT_LENGTH: int = Field(
|
||||
default=200,
|
||||
description="进行深度分析所需的最小文本长度(字符数)。建议200字符以上。",
|
||||
description="Minimum text length required for deep analysis (characters). Recommended 200+.",
|
||||
)
|
||||
RECOMMENDED_MIN_LENGTH: int = Field(
|
||||
default=500, description="建议的最小文本长度,以获得最佳分析效果。"
|
||||
default=500,
|
||||
description="Recommended minimum text length for best analysis results.",
|
||||
)
|
||||
|
||||
def __init__(self):
|
||||
@@ -296,16 +298,20 @@ class Action:
|
||||
|
||||
def _process_llm_output(self, llm_output: str) -> Dict[str, str]:
|
||||
"""
|
||||
解析LLM的Markdown输出,将其转换为HTML片段。
|
||||
Parse LLM Markdown output and convert to HTML fragments.
|
||||
"""
|
||||
summary_match = re.search(
|
||||
r"##\s*摘要\s*\n(.*?)(?=\n##|$)", llm_output, re.DOTALL
|
||||
r"##\s*Summary\s*\n(.*?)(?=\n##|$)", llm_output, re.DOTALL | re.IGNORECASE
|
||||
)
|
||||
keypoints_match = re.search(
|
||||
r"##\s*关键信息点\s*\n(.*?)(?=\n##|$)", llm_output, re.DOTALL
|
||||
r"##\s*Key Information Points\s*\n(.*?)(?=\n##|$)",
|
||||
llm_output,
|
||||
re.DOTALL | re.IGNORECASE,
|
||||
)
|
||||
actions_match = re.search(
|
||||
r"##\s*行动建议\s*\n(.*?)(?=\n##|$)", llm_output, re.DOTALL
|
||||
r"##\s*Actionable Advice\s*\n(.*?)(?=\n##|$)",
|
||||
llm_output,
|
||||
re.DOTALL | re.IGNORECASE,
|
||||
)
|
||||
|
||||
summary_md = summary_match.group(1).strip() if summary_match else ""
|
||||
@@ -314,24 +320,26 @@ class Action:
|
||||
|
||||
if not any([summary_md, keypoints_md, actions_md]):
|
||||
summary_md = llm_output.strip()
|
||||
logger.warning("LLM输出未遵循预期的Markdown格式。将整个输出视为摘要。")
|
||||
logger.warning(
|
||||
"LLM output did not follow expected Markdown format. Treating entire output as summary."
|
||||
)
|
||||
|
||||
# 使用 'nl2br' 扩展将换行符 \n 转换为 <br>
|
||||
# Use 'nl2br' extension to convert newlines \n to <br>
|
||||
md_extensions = ["nl2br"]
|
||||
summary_html = (
|
||||
markdown.markdown(summary_md, extensions=md_extensions)
|
||||
if summary_md
|
||||
else '<p class="no-content">未能提取摘要信息。</p>'
|
||||
else '<p class="no-content">Failed to extract summary.</p>'
|
||||
)
|
||||
keypoints_html = (
|
||||
markdown.markdown(keypoints_md, extensions=md_extensions)
|
||||
if keypoints_md
|
||||
else '<p class="no-content">未能提取关键信息点。</p>'
|
||||
else '<p class="no-content">Failed to extract key information points.</p>'
|
||||
)
|
||||
actions_html = (
|
||||
markdown.markdown(actions_md, extensions=md_extensions)
|
||||
if actions_md
|
||||
else '<p class="no-content">暂无明确的行动建议。</p>'
|
||||
else '<p class="no-content">No explicit actionable advice.</p>'
|
||||
)
|
||||
|
||||
return {
|
||||
@@ -342,7 +350,7 @@ class Action:
|
||||
|
||||
def _build_html(self, context: dict) -> str:
|
||||
"""
|
||||
使用 Jinja2 模板和上下文数据构建最终的HTML内容。
|
||||
Build final HTML content using Jinja2 template and context data.
|
||||
"""
|
||||
template = Template(HTML_TEMPLATE)
|
||||
return template.render(context)
|
||||
@@ -354,39 +362,39 @@ class Action:
|
||||
__event_emitter__: Optional[Any] = None,
|
||||
__request__: Optional[Request] = None,
|
||||
) -> Optional[dict]:
|
||||
logger.info("Action: 精读启动 (v2.0.0 - Deep Reading)")
|
||||
logger.info("Action: Deep Reading Started (v2.0.0)")
|
||||
|
||||
if isinstance(__user__, (list, tuple)):
|
||||
user_language = (
|
||||
__user__[0].get("language", "zh-CN") if __user__ else "zh-CN"
|
||||
__user__[0].get("language", "en-US") if __user__ else "en-US"
|
||||
)
|
||||
user_name = __user__[0].get("name", "用户") if __user__[0] else "用户"
|
||||
user_name = __user__[0].get("name", "User") if __user__[0] else "User"
|
||||
user_id = (
|
||||
__user__[0]["id"]
|
||||
if __user__ and "id" in __user__[0]
|
||||
else "unknown_user"
|
||||
)
|
||||
elif isinstance(__user__, dict):
|
||||
user_language = __user__.get("language", "zh-CN")
|
||||
user_name = __user__.get("name", "用户")
|
||||
user_language = __user__.get("language", "en-US")
|
||||
user_name = __user__.get("name", "User")
|
||||
user_id = __user__.get("id", "unknown_user")
|
||||
|
||||
now = datetime.now()
|
||||
current_date_time_str = now.strftime("%Y-%m-%d %H:%M:%S")
|
||||
current_weekday = now.strftime("%A")
|
||||
current_year = now.strftime("%Y")
|
||||
current_timezone_str = "未知时区"
|
||||
current_timezone_str = "Unknown Timezone"
|
||||
|
||||
original_content = ""
|
||||
try:
|
||||
messages = body.get("messages", [])
|
||||
if not messages or not messages[-1].get("content"):
|
||||
raise ValueError("无法获取有效的用户消息内容。")
|
||||
raise ValueError("Unable to get valid user message content.")
|
||||
|
||||
original_content = messages[-1]["content"]
|
||||
|
||||
if len(original_content) < self.valves.MIN_TEXT_LENGTH:
|
||||
short_text_message = f"文本内容过短({len(original_content)}字符),建议至少{self.valves.MIN_TEXT_LENGTH}字符以获得有效的深度分析。\n\n💡 提示:对于短文本,建议使用'⚡ 闪记卡'进行快速提炼。"
|
||||
short_text_message = f"Text content too short ({len(original_content)} chars), recommended at least {self.valves.MIN_TEXT_LENGTH} chars for effective deep analysis.\n\n💡 Tip: For short texts, consider using '⚡ Flash Card' for quick refinement."
|
||||
if __event_emitter__:
|
||||
await __event_emitter__(
|
||||
{
|
||||
@@ -408,7 +416,7 @@ class Action:
|
||||
"type": "notification",
|
||||
"data": {
|
||||
"type": "info",
|
||||
"content": f"文本长度为{len(original_content)}字符。建议{self.valves.RECOMMENDED_MIN_LENGTH}字符以上可获得更好的分析效果。",
|
||||
"content": f"Text length is {len(original_content)} chars. Recommended {self.valves.RECOMMENDED_MIN_LENGTH}+ chars for best analysis results.",
|
||||
},
|
||||
}
|
||||
)
|
||||
@@ -419,7 +427,7 @@ class Action:
|
||||
"type": "notification",
|
||||
"data": {
|
||||
"type": "info",
|
||||
"content": "📖 精读已启动,正在进行深度分析...",
|
||||
"content": "📖 Deep Reading started, analyzing deeply...",
|
||||
},
|
||||
}
|
||||
)
|
||||
@@ -428,7 +436,7 @@ class Action:
|
||||
{
|
||||
"type": "status",
|
||||
"data": {
|
||||
"description": "📖 精读: 深入分析文本,提炼精华...",
|
||||
"description": "📖 Deep Reading: Analyzing text, extracting essence...",
|
||||
"done": False,
|
||||
},
|
||||
}
|
||||
@@ -454,7 +462,7 @@ class Action:
|
||||
|
||||
user_obj = Users.get_user_by_id(user_id)
|
||||
if not user_obj:
|
||||
raise ValueError(f"无法获取用户对象, 用户ID: {user_id}")
|
||||
raise ValueError(f"Unable to get user object, User ID: {user_id}")
|
||||
|
||||
llm_response = await generate_chat_completion(
|
||||
__request__, llm_payload, user_obj
|
||||
@@ -482,7 +490,10 @@ class Action:
|
||||
await __event_emitter__(
|
||||
{
|
||||
"type": "status",
|
||||
"data": {"description": "📖 精读: 分析完成!", "done": True},
|
||||
"data": {
|
||||
"description": "📖 Deep Reading: Analysis complete!",
|
||||
"done": True,
|
||||
},
|
||||
}
|
||||
)
|
||||
await __event_emitter__(
|
||||
@@ -490,18 +501,18 @@ class Action:
|
||||
"type": "notification",
|
||||
"data": {
|
||||
"type": "success",
|
||||
"content": f"📖 精读完成,{user_name}!深度分析报告已生成。",
|
||||
"content": f"📖 Deep Reading complete, {user_name}! Deep analysis report generated.",
|
||||
},
|
||||
}
|
||||
)
|
||||
|
||||
except Exception as e:
|
||||
error_message = f"精读处理失败: {str(e)}"
|
||||
logger.error(f"精读错误: {error_message}", exc_info=True)
|
||||
user_facing_error = f"抱歉, 精读在处理时遇到错误: {str(e)}。\n请检查Open WebUI后端日志获取更多详情。"
|
||||
error_message = f"Deep Reading processing failed: {str(e)}"
|
||||
logger.error(f"Deep Reading Error: {error_message}", exc_info=True)
|
||||
user_facing_error = f"Sorry, Deep Reading encountered an error while processing: {str(e)}.\nPlease check Open WebUI backend logs for more details."
|
||||
body["messages"][-1][
|
||||
"content"
|
||||
] = f"{original_content}\n\n❌ **错误:** {user_facing_error}"
|
||||
] = f"{original_content}\n\n❌ **Error:** {user_facing_error}"
|
||||
|
||||
if __event_emitter__:
|
||||
if self.valves.show_status:
|
||||
@@ -509,7 +520,7 @@ class Action:
|
||||
{
|
||||
"type": "status",
|
||||
"data": {
|
||||
"description": "精读: 处理失败。",
|
||||
"description": "Deep Reading: Processing failed.",
|
||||
"done": True,
|
||||
},
|
||||
}
|
||||
@@ -519,7 +530,7 @@ class Action:
|
||||
"type": "notification",
|
||||
"data": {
|
||||
"type": "error",
|
||||
"content": f"精读处理失败, {user_name}!",
|
||||
"content": f"Deep Reading processing failed, {user_name}!",
|
||||
},
|
||||
}
|
||||
)
|
||||
|
||||
@@ -16,6 +16,8 @@
|
||||
- ✅ **灵活保留策略**: 可自由配置保留对话头部和尾部的消息数量,确保关键信息和上下文的连贯性。
|
||||
- ✅ **智能注入**: 将生成的历史摘要智能地注入到新的上下文中。
|
||||
|
||||
详细的工作原理和流程请参考 [工作流程指南](WORKFLOW_GUIDE_CN.md)。
|
||||
|
||||
---
|
||||
|
||||
## 安装与配置
|
||||
@@ -49,16 +51,51 @@
|
||||
|
||||
您可以在过滤器的设置中调整以下参数:
|
||||
|
||||
### 核心参数
|
||||
|
||||
| 参数 | 默认值 | 描述 |
|
||||
| :--- | :--- | :--- |
|
||||
| `priority` | `10` | 过滤器执行顺序,数值越小越先执行。 |
|
||||
| `compression_threshold` | `15` | 当总消息数达到此值时,将在后台触发摘要生成。 |
|
||||
| `keep_first` | `1` | 始终保留对话开始的 N 条消息。第一条消息通常包含重要的系统提示。 |
|
||||
| `keep_last` | `6` | 始终保留对话末尾的 N 条消息,以确保上下文连贯。 |
|
||||
| `summary_model` | `None` | 用于生成摘要的模型。**强烈建议**配置一个快速、经济的兼容模型(如 `gemini-2.5-flash`)。如果留空,将尝试使用当前对话的模型,但这可能因模型不兼容(如 Pipe 模型)而失败。 |
|
||||
| `max_summary_tokens` | `4000` | 生成摘要时允许的最大 Token 数。 |
|
||||
| `summary_temperature` | `0.3` | 控制摘要生成的随机性,较低的值结果更稳定。 |
|
||||
| `debug_mode` | `true` | 是否在日志中打印详细的调试信息。生产环境建议设为 `false`。 |
|
||||
| `compression_threshold_tokens` | `64000` | **(重要)** 当上下文总 Token 数超过此值时,将在后台触发摘要生成。建议设置为模型最大上下文窗口的 50%-70%。 |
|
||||
| `max_context_tokens` | `128000` | **(重要)** 上下文的硬性上限。如果超过此值,将强制移除最早的消息(保留受保护消息除外)。防止 Token 溢出。 |
|
||||
| `keep_first` | `1` | 始终保留对话开始的 N 条消息。第一条消息通常包含重要的系统提示或环境变量,建议至少保留 1 条。 |
|
||||
| `keep_last` | `6` | 始终保留对话末尾的 N 条消息,以确保最近对话的连贯性。 |
|
||||
|
||||
### 摘要生成配置
|
||||
|
||||
| 参数 | 默认值 | 描述 |
|
||||
| :--- | :--- | :--- |
|
||||
| `summary_model` | `None` | 用于生成摘要的模型 ID。**强烈建议**配置一个快速、经济且上下文窗口较大的模型(如 `gemini-2.5-flash`, `deepseek-v3`)。如果留空,将尝试使用当前对话的模型。 |
|
||||
| `max_summary_tokens` | `16384` | 生成摘要时允许的最大 Token 数。 |
|
||||
| `summary_temperature` | `0.1` | 控制摘要生成的随机性,较低的值结果更稳定。 |
|
||||
|
||||
### 高级配置
|
||||
|
||||
#### `model_thresholds` (模型特定阈值)
|
||||
|
||||
这是一个字典配置,允许您为特定的模型 ID 覆盖全局的 `compression_threshold_tokens` 和 `max_context_tokens`。这对于混合使用不同上下文窗口大小的模型非常有用。
|
||||
|
||||
**默认配置包含了主流模型(如 GPT-4, Claude 3.5, Gemini 1.5/2.0, Qwen 2.5/3, DeepSeek V3 等)的推荐阈值。**
|
||||
|
||||
**配置示例:**
|
||||
|
||||
```json
|
||||
{
|
||||
"gpt-4": {
|
||||
"compression_threshold_tokens": 8000,
|
||||
"max_context_tokens": 32000
|
||||
},
|
||||
"gemini-2.5-flash": {
|
||||
"compression_threshold_tokens": 734000,
|
||||
"max_context_tokens": 1048576
|
||||
}
|
||||
}
|
||||
```
|
||||
|
||||
#### `debug_mode`
|
||||
|
||||
- **默认值**: `true`
|
||||
- **描述**: 是否在 Open WebUI 的控制台日志中打印详细的调试信息(如 Token 计数、压缩进度、数据库操作等)。生产环境建议设为 `false`。
|
||||
|
||||
---
|
||||
|
||||
@@ -68,10 +105,10 @@
|
||||
- **解决**:请确认 `DATABASE_URL` 环境变量已正确设置,并且数据库服务运行正常。
|
||||
|
||||
- **问题:摘要未生成**
|
||||
- **解决**:检查 `compression_threshold` 是否已达到,并确认 `summary_model` 配置正确。查看日志以获取详细错误。
|
||||
- **解决**:检查 `compression_threshold_tokens` 是否已达到,并确认 `summary_model` 配置正确。查看日志以获取详细错误。
|
||||
|
||||
- **问题:初始的系统提示丢失**
|
||||
- **解决**:确保 `keep_first` 的值大于 0,以保留包含重要信息的初始消息。
|
||||
|
||||
- **问题:压缩效果不明显**
|
||||
- **解决**:尝试适当提高 `compression_threshold`,或减少 `keep_first` / `keep_last` 的值。
|
||||
- **解决**:尝试适当提高 `compression_threshold_tokens`,或减少 `keep_first` / `keep_last` 的值。
|
||||
@@ -373,109 +373,7 @@ class Filter:
|
||||
default=128000, ge=0, description="上下文的硬性上限。超过此值将强制移除最早的消息 (全局默认值)"
|
||||
)
|
||||
model_thresholds: dict = Field(
|
||||
default={
|
||||
# Groq
|
||||
"groq-openai/gpt-oss-20b": {"max_context_tokens": 8000, "compression_threshold_tokens": 5600},
|
||||
"groq-openai/gpt-oss-120b": {"max_context_tokens": 8000, "compression_threshold_tokens": 5600},
|
||||
|
||||
# Qwen (ModelScope / CF)
|
||||
"modelscope-Qwen/Qwen3-Coder-480B-A35B-Instruct": {"max_context_tokens": 256000, "compression_threshold_tokens": 179200},
|
||||
"cfchatqwen-qwen3-max-search": {"max_context_tokens": 262144, "compression_threshold_tokens": 183500},
|
||||
"modelscope-Qwen/Qwen3-235B-A22B-Thinking-2507": {"max_context_tokens": 262144, "compression_threshold_tokens": 183500},
|
||||
"cfchatqwen-qwen3-max": {"max_context_tokens": 262144, "compression_threshold_tokens": 183500},
|
||||
"cfchatqwen-qwen3-vl-plus-thinking": {"max_context_tokens": 262144, "compression_threshold_tokens": 183500},
|
||||
"cfchatqwen-qwen3-coder-plus-search": {"max_context_tokens": 1048576, "compression_threshold_tokens": 734000},
|
||||
"cfchatqwen-qwen3-vl-plus": {"max_context_tokens": 262144, "compression_threshold_tokens": 183500},
|
||||
"cfchatqwen-qwen3-coder-plus": {"max_context_tokens": 1048576, "compression_threshold_tokens": 734000},
|
||||
"cfchatqwen-qwen3-omni-flash-thinking": {"max_context_tokens": 65536, "compression_threshold_tokens": 45875},
|
||||
"cfchatqwen-qwen3-omni-flash": {"max_context_tokens": 65536, "compression_threshold_tokens": 45875},
|
||||
"cfchatqwen-qwen3-next-80b-a3b-thinking": {"max_context_tokens": 262144, "compression_threshold_tokens": 183500},
|
||||
"modelscope-Qwen/Qwen3-VL-235B-A22B-Instruct": {"max_context_tokens": 262144, "compression_threshold_tokens": 183500},
|
||||
"cfchatqwen-qwen3-next-80b-a3b-thinking-search": {"max_context_tokens": 262144, "compression_threshold_tokens": 183500},
|
||||
"cfchatqwen-qwen3-next-80b-a3b": {"max_context_tokens": 262144, "compression_threshold_tokens": 183500},
|
||||
"cfchatqwen-qwen3-235b-a22b-thinking-search": {"max_context_tokens": 131072, "compression_threshold_tokens": 91750},
|
||||
"cfchatqwen-qwen3-235b-a22b": {"max_context_tokens": 131072, "compression_threshold_tokens": 91750},
|
||||
"cfchatqwen-qwen3-235b-a22b-thinking": {"max_context_tokens": 131072, "compression_threshold_tokens": 91750},
|
||||
"cfchatqwen-qwen3-coder-flash-search": {"max_context_tokens": 262144, "compression_threshold_tokens": 183500},
|
||||
"cfchatqwen-qwen3-coder-flash": {"max_context_tokens": 262144, "compression_threshold_tokens": 183500},
|
||||
"cfchatqwen-qwen3-max-2025-10-30": {"max_context_tokens": 262144, "compression_threshold_tokens": 183500},
|
||||
"cfchatqwen-qwen3-max-2025-10-30-thinking": {"max_context_tokens": 262144, "compression_threshold_tokens": 183500},
|
||||
"cfchatqwen-qwen3-max-2025-10-30-thinking-search": {"max_context_tokens": 262144, "compression_threshold_tokens": 183500},
|
||||
"modelscope-Qwen/Qwen3-235B-A22B-Instruct-2507": {"max_context_tokens": 262144, "compression_threshold_tokens": 183500},
|
||||
"cfchatqwen-qwen3-vl-30b-a3b": {"max_context_tokens": 131072, "compression_threshold_tokens": 91750},
|
||||
"cfchatqwen-qwen3-vl-30b-a3b-thinking": {"max_context_tokens": 131072, "compression_threshold_tokens": 91750},
|
||||
|
||||
# Gemini
|
||||
"gemini-2.5-pro-search": {"max_context_tokens": 1048576, "compression_threshold_tokens": 734000},
|
||||
"gemini-2.5-flash-search": {"max_context_tokens": 1048576, "compression_threshold_tokens": 734000},
|
||||
"gemini-2.5-flash": {"max_context_tokens": 1048576, "compression_threshold_tokens": 734000},
|
||||
"gemini-2.5-flash-lite": {"max_context_tokens": 1048576, "compression_threshold_tokens": 734000},
|
||||
"gemini-2.5-flash-lite-search": {"max_context_tokens": 1048576, "compression_threshold_tokens": 734000},
|
||||
"gemini-2.5-pro": {"max_context_tokens": 1048576, "compression_threshold_tokens": 734000},
|
||||
"gemini-2.0-flash-search": {"max_context_tokens": 1048576, "compression_threshold_tokens": 734000},
|
||||
"gemini-2.0-flash": {"max_context_tokens": 1048576, "compression_threshold_tokens": 734000},
|
||||
"gemini-2.0-flash-exp": {"max_context_tokens": 1048576, "compression_threshold_tokens": 734000},
|
||||
"gemini-2.0-flash-lite": {"max_context_tokens": 1048576, "compression_threshold_tokens": 734000},
|
||||
"copilot-gemini-2.5-pro": {"max_context_tokens": 128000, "compression_threshold_tokens": 89600},
|
||||
"gemini-pro-latest": {"max_context_tokens": 1048576, "compression_threshold_tokens": 734000},
|
||||
"gemini-3-pro-preview": {"max_context_tokens": 128000, "compression_threshold_tokens": 89600},
|
||||
"gemini-pro-latest-search": {"max_context_tokens": 1048576, "compression_threshold_tokens": 734000},
|
||||
"gemini-flash-latest": {"max_context_tokens": 1048576, "compression_threshold_tokens": 734000},
|
||||
"gemini-flash-latest-search": {"max_context_tokens": 1048576, "compression_threshold_tokens": 734000},
|
||||
"gemini-flash-lite-latest-search": {"max_context_tokens": 1048576, "compression_threshold_tokens": 734000},
|
||||
"gemini-flash-lite-latest": {"max_context_tokens": 1048576, "compression_threshold_tokens": 734000},
|
||||
"gemini-robotics-er-1.5-preview": {"max_context_tokens": 1048576, "compression_threshold_tokens": 734000},
|
||||
|
||||
# DeepSeek
|
||||
"modelscope-deepseek-ai/DeepSeek-V3.1": {"max_context_tokens": 128000, "compression_threshold_tokens": 89600},
|
||||
"cfdeepseek-deepseek-search": {"max_context_tokens": 128000, "compression_threshold_tokens": 89600},
|
||||
"openrouter-deepseek/deepseek-r1-0528:free": {"max_context_tokens": 163840, "compression_threshold_tokens": 114688},
|
||||
"modelscope-deepseek-ai/DeepSeek-V3.2-Exp": {"max_context_tokens": 128000, "compression_threshold_tokens": 89600},
|
||||
"cfdeepseek-deepseek-r1-search": {"max_context_tokens": 128000, "compression_threshold_tokens": 89600},
|
||||
"cfdeepseek-deepseek-r1": {"max_context_tokens": 128000, "compression_threshold_tokens": 89600},
|
||||
"openrouter-deepseek/deepseek-chat-v3.1:free": {"max_context_tokens": 163800, "compression_threshold_tokens": 114660},
|
||||
"modelscope-deepseek-ai/DeepSeek-R1-0528": {"max_context_tokens": 128000, "compression_threshold_tokens": 89600},
|
||||
"cfdeepseek-deepseek": {"max_context_tokens": 128000, "compression_threshold_tokens": 89600},
|
||||
|
||||
# Kimi (Moonshot)
|
||||
"cfkimi-kimi-k2-search": {"max_context_tokens": 128000, "compression_threshold_tokens": 89600},
|
||||
"cfkimi-kimi-k1.5-search": {"max_context_tokens": 128000, "compression_threshold_tokens": 89600},
|
||||
"cfkimi-kimi-k1.5-thinking-search": {"max_context_tokens": 128000, "compression_threshold_tokens": 89600},
|
||||
"cfkimi-kimi-research": {"max_context_tokens": 128000, "compression_threshold_tokens": 89600},
|
||||
"openrouter-moonshotai/kimi-k2:free": {"max_context_tokens": 32768, "compression_threshold_tokens": 22937},
|
||||
"cfkimi-kimi-k2": {"max_context_tokens": 128000, "compression_threshold_tokens": 89600},
|
||||
"cfkimi-kimi-k1.5": {"max_context_tokens": 128000, "compression_threshold_tokens": 89600},
|
||||
|
||||
# GPT / OpenAI
|
||||
"gpt-4.1": {"max_context_tokens": 128000, "compression_threshold_tokens": 89600},
|
||||
"gpt-4o": {"max_context_tokens": 64000, "compression_threshold_tokens": 44800},
|
||||
"gpt-5": {"max_context_tokens": 128000, "compression_threshold_tokens": 89600},
|
||||
"github-gpt-4.1": {"max_context_tokens": 7500, "compression_threshold_tokens": 5250},
|
||||
"gpt-5-mini": {"max_context_tokens": 128000, "compression_threshold_tokens": 89600},
|
||||
"gpt-5.1": {"max_context_tokens": 128000, "compression_threshold_tokens": 89600},
|
||||
"gpt-5.1-codex": {"max_context_tokens": 128000, "compression_threshold_tokens": 89600},
|
||||
"gpt-5.1-codex-mini": {"max_context_tokens": 128000, "compression_threshold_tokens": 89600},
|
||||
"gpt-5-codex": {"max_context_tokens": 200000, "compression_threshold_tokens": 140000},
|
||||
"github-gpt-4.1-mini": {"max_context_tokens": 7500, "compression_threshold_tokens": 5250},
|
||||
"openrouter-openai/gpt-oss-20b:free": {"max_context_tokens": 128000, "compression_threshold_tokens": 89600},
|
||||
|
||||
# Claude / Anthropic
|
||||
"claude-sonnet-4.5": {"max_context_tokens": 128000, "compression_threshold_tokens": 89600},
|
||||
"claude-haiku-4.5": {"max_context_tokens": 128000, "compression_threshold_tokens": 89600},
|
||||
"copilot-claude-opus-41": {"max_context_tokens": 80000, "compression_threshold_tokens": 56000},
|
||||
"copilot-claude-sonnet-4": {"max_context_tokens": 80000, "compression_threshold_tokens": 56000},
|
||||
|
||||
# Other / OpenRouter / OSWE
|
||||
"oswe-vscode-insiders": {"max_context_tokens": 256000, "compression_threshold_tokens": 179200},
|
||||
"modelscope-MiniMax/MiniMax-M2": {"max_context_tokens": 204800, "compression_threshold_tokens": 143360},
|
||||
"oswe-vscode-prime": {"max_context_tokens": 200000, "compression_threshold_tokens": 140000},
|
||||
"grok-code-fast-1": {"max_context_tokens": 128000, "compression_threshold_tokens": 89600},
|
||||
"copilot-auto": {"max_context_tokens": 128000, "compression_threshold_tokens": 89600},
|
||||
"modelscope-ZhipuAI/GLM-4.6": {"max_context_tokens": 32000, "compression_threshold_tokens": 22400},
|
||||
"openrouter-x-ai/grok-4.1-fast:free": {"max_context_tokens": 2000000, "compression_threshold_tokens": 1400000},
|
||||
"openrouter-qwen/qwen3-coder:free": {"max_context_tokens": 262000, "compression_threshold_tokens": 183400},
|
||||
"openrouter-qwen/qwen3-235b-a22b:free": {"max_context_tokens": 40960, "compression_threshold_tokens": 28672},
|
||||
},
|
||||
default={},
|
||||
description="针对特定模型的阈值覆盖配置。仅包含需要特殊配置的模型。"
|
||||
)
|
||||
|
||||
|
||||
@@ -1,45 +0,0 @@
|
||||
需求文档:异步上下文压缩插件优化 (Async Context Compression Optimization)
|
||||
1. 核心目标 将现有的基于消息数量的压缩逻辑升级为基于 Token 数量的压缩逻辑,并引入递归摘要机制,以更精准地控制上下文窗口,提高摘要质量,并防止历史信息丢失。
|
||||
|
||||
2. 功能需求
|
||||
|
||||
Token 计数与阈值控制
|
||||
引入 tiktoken: 使用 tiktoken 库进行精确的 Token 计数。如果环境不支持,则回退到字符估算 (1 token ≈ 4 chars)。
|
||||
新配置参数 (Valves):
|
||||
compression_threshold_tokens (默认: 64000): 当上下文总 Token 数超过此值时,触发压缩(生成摘要)。
|
||||
max_context_tokens (默认: 128000): 上下文的硬性上限。如果超过此值,强制移除最早的消息(保留受保护消息除外)。
|
||||
model_thresholds (字典): 支持针对不同模型 ID 配置不同的阈值。例如:{'gpt-4': {'compression_threshold_tokens': 8000, ...}}。
|
||||
废弃旧参数: compression_threshold (基于消息数) 将被标记为废弃,优先使用 Token 阈值。
|
||||
递归摘要 (Recursive Summarization)
|
||||
机制: 在生成新摘要时,必须读取并包含上一次的摘要。
|
||||
逻辑: 新摘要 = LLM(上一次摘要 + 新产生的对话消息)。
|
||||
目的: 防止随着对话进行,最早期的摘要信息被丢弃,确保长期记忆的连续性。
|
||||
消息保护与修剪策略
|
||||
保护机制: keep_first (保留头部 N 条) 和 keep_last (保留尾部 N 条) 的消息绝对不参与压缩,也不被移除。
|
||||
修剪逻辑: 当触发 max_context_tokens 限制时,优先移除 keep_first 之后、keep_last 之前的最早消息。
|
||||
优化的提示词 (Prompt Engineering)
|
||||
目标: 去除无用信息(寒暄、重复),保留关键信号(事实、代码、决策)。
|
||||
指令:
|
||||
提炼与净化: 明确要求移除噪音。
|
||||
关键保留: 强调代码片段必须逐字保留。
|
||||
合并与更新: 明确指示将新信息合并到旧摘要中。
|
||||
语言一致性: 输出语言必须与对话语言保持一致。
|
||||
3. 实现细节
|
||||
|
||||
文件:
|
||||
async_context_compression.py
|
||||
类:
|
||||
Filter
|
||||
关键方法:
|
||||
_count_tokens(text): 实现 Token 计数。
|
||||
_calculate_messages_tokens(messages): 计算消息列表总 Token。
|
||||
_generate_summary_async(...)
|
||||
: 修改为加载旧摘要,并传入 LLM。
|
||||
_call_summary_llm(...)
|
||||
: 更新 Prompt,接受 previous_summary 和 new_messages。
|
||||
inlet(...)
|
||||
:
|
||||
使用 compression_threshold_tokens 判断是否注入摘要。
|
||||
实现 max_context_tokens 的强制修剪逻辑。
|
||||
outlet(...)
|
||||
: 使用 compression_threshold_tokens 判断是否触发后台摘要任务。
|
||||
Reference in New Issue
Block a user