更新导出为 Word 功能，优化文件命名逻辑，增加聊天标题提取，完善文档说明

2025-12-30 14:45:51 +08:00
parent 0f635013c7
commit 1bf1d7ac23
4 changed files with 224 additions and 25 deletions
--- a/plugins/actions/export_to_docx/README.md
+++ b/plugins/actions/export_to_docx/README.md
@@ -1,13 +1,13 @@
 # Export to Word

-Export current conversation from Markdown to Word (.docx) file with proper Chinese and English encoding.
+Export current conversation from Markdown to Word (.docx) with proper Chinese and English encoding and smarter filenames.

 ## Features

 - **One-Click Export**: Adds an "Export to Word" action button to the chat.
 - **Markdown Conversion**: Converts Markdown syntax to Word formatting (headings, bold, italic, code, tables, lists).
 - **Multi-language Support**: Properly handles both Chinese and English text without garbled characters.
- **Auto Title Extraction**: Automatically uses the first heading as the filename.
+- **Smarter Filenames**: Prefers chat title (from body or chat_id lookup) → first Markdown h1/h2 → user + date.

 ## Supported Markdown Syntax

@@ -32,6 +32,17 @@ Export current conversation from Markdown to Word (.docx) file with proper Chine
 2. In any chat, click the "Export to Word" button.
 3. The .docx file will be automatically downloaded to your device.

+
+### Notes
+
+- Title detection only considers h1/h2 headings.
+- If the request carries `chat_id` (body or metadata), the plugin will fetch the chat title from the database when the body lacks one.
+- Default fonts: Times New Roman (en), SimSun/SimHei (zh), Consolas (code).
+
+### Requirements
+
+- python-docx==1.1.2 (already declared in the plugin docstring; ensure installed in your environment).
+
 ## Font Configuration

 - **English Text**: Times New Roman
--- a/plugins/actions/export_to_docx/README_CN.md
+++ b/plugins/actions/export_to_docx/README_CN.md
@@ -1,13 +1,13 @@
 # 导出为 Word

-将当前对话内容从 Markdown 转换并导出为 Word (.docx) 文件，支持中英文无乱码。
+将当前对话内容从 Markdown 转换并导出为 Word (.docx) 文件，支持中英文无乱码，且具备更智能的文件命名。

 ## 功能特点

 - **一键导出**：在聊天界面添加"导出为 Word"动作按钮。
 - **Markdown 转换**：将 Markdown 语法转换为 Word 格式（标题、粗体、斜体、代码、表格、列表）。
 - **多语言支持**：正确处理中文和英文文本，无乱码问题。
- **自动提取标题**：自动使用第一个标题作为文件名。
+- **更智能的文件名**：优先使用对话标题（来自请求体或基于 chat_id 查询），其次 Markdown 一级/二级标题，最后用户+日期。

 ## 支持的 Markdown 语法

@@ -32,6 +32,16 @@
 2. 在任意对话中，点击"导出为 Word"按钮。
 3. .docx 文件将自动下载到你的设备。

+### 说明
+
+- 标题检测仅考虑一级/二级标题（h1/h2）。
+- 若请求体或 metadata 提供 `chat_id`，当正文缺少标题时会从数据库查询对话标题。
+- 默认字体：英文 Times New Roman，中文宋体/黑体，代码 Consolas。
+
+### 依赖
+
+- python-docx==1.1.2（已在插件文档字符串中声明，请确保环境已安装）。
+
 ## 字体配置

 - **英文文本**：Times New Roman
--- a/plugins/actions/export_to_docx/export_to_word.py
+++ b/plugins/actions/export_to_docx/export_to_word.py
@@ -4,7 +4,7 @@ author: Fu-Jie
 author_url: https://github.com/Fu-Jie
 funding_url: https://github.com/Fu-Jie/awesome-openwebui
 version: 0.1.0
-icon_url: data:image/svg+xml;base64,PHN2ZyB4bWxucz0iaHR0cDovL3d3dy53My5vcmcvMjAwMC9zdmciIHdpZHRoPSIyNCIgaGVpZ2h0PSIyNCIgdmlld0JveD0iMCAwIDI0IDI0IiBmaWxsPSJub25lIiBzdHJva2U9ImN1cnJlbnRDb2xvciIgc3Ryb2tlLXdpZHRoPSIyIiBzdHJva2UtbGluZWNhcD0icm91bmQiIHN0cm9rZS1saW5lam9pbj0icm91bmQiPjxwYXRoIGQ9Ik0xNCAySDZhMiAyIDAgMCAwLTIgMnYxNmEyIDIgMCAwIDAgMiAyaDEyYTIgMiAwIDAgMCAyLTJWOFoiLz48cGF0aCBkPSJNMTQgMnY2aDYiLz48cGF0aCBkPSJNMTYgMTNoLTIuNWEyIDIgMCAwIDAgMCA0SDEyIi8+PHBhdGggZD0iTTggMTNoMiIvPjxwYXRoIGQ9Ik04IDE3aDIiLz48L3N2Zz4=
+icon_url: data:image/svg+xml;base64,PHN2ZwogIHhtbG5zPSJodHRwOi8vd3d3LnczLm9yZy8yMDAwL3N2ZyIKICB3aWR0aD0iMjQiCiAgaGVpZ2h0PSIyNCIKICB2aWV3Qm94PSIwIDAgMjQgMjQiCiAgZmlsbD0ibm9uZSIKICBzdHJva2U9ImN1cnJlbnRDb2xvciIKICBzdHJva2Utd2lkdGg9IjIiCiAgc3Ryb2tlLWxpbmVjYXA9InJvdW5kIgogIHN0cm9rZS1saW5lam9pbj0icm91bmQiCj4KICA8cGF0aCBkPSJNNiAyMmEyIDIgMCAwIDEtMi0yVjRhMiAyIDAgMCAxIDItMmg4YTIuNCAyLjQgMCAwIDEgMS43MDQuNzA2bDMuNTg4IDMuNTg4QTIuNCAyLjQgMCAwIDEgMjAgOHYxMmEyIDIgMCAwIDEtMiAyeiIgLz4KICA8cGF0aCBkPSJNMTQgMnY1YTEgMSAwIDAgMCAxIDFoNSIgLz4KICA8cGF0aCBkPSJNMTAgOUg4IiAvPgogIDxwYXRoIGQ9Ik0xNiAxM0g4IiAvPgogIDxwYXRoIGQ9Ik0xNiAxN0g4IiAvPgo8L3N2Zz4K
 requirements: python-docx==1.1.2
 description: Export current conversation from Markdown to Word (.docx) file with proper Chinese and English encoding.
 """
@@ -14,6 +14,8 @@ import re
 import base64
 import datetime
 import io
+import asyncio
+import logging
 from typing import Optional, Callable, Awaitable, Any, List, Tuple
 from docx import Document
 from docx.shared import Pt, Inches, RGBColor, Cm
@@ -22,6 +24,14 @@ from docx.enum.table import WD_TABLE_ALIGNMENT
 from docx.enum.style import WD_STYLE_TYPE
 from docx.oxml.ns import qn
 from docx.oxml import OxmlElement
+from open_webui.models.chats import Chats
+
+
+logging.basicConfig(
+    level=logging.INFO,
+    format="%(asctime)s - %(name)s - %(levelname)s - %(message)s",
+)
+logger = logging.getLogger(__name__)


 class Action:
@@ -39,8 +49,9 @@ class Action:
        __user__=None,
        __event_emitter__=None,
        __event_call__: Optional[Callable[[Any], Awaitable[None]]] = None,
+        __metadata__: Optional[dict] = None,
    ):
-        print(f"action:{__name__}")
+        logger.info(f"action:{__name__}")

        # Parse user info
        if isinstance(__user__, (list, tuple)):
@@ -80,18 +91,27 @@ class Action:
                    )
                    return

-                # Generate filename
+                # Generate filename (prefer chat title; fetch via chat_id if missing; then markdown title; then fallback)
+                chat_id = self.extract_chat_id(body, __metadata__)
+                chat_title = self.extract_chat_title(body)
+                if not chat_title and chat_id:
+                    chat_title = await self.fetch_chat_title(chat_id, user_id)
                title = self.extract_title(message_content)
                current_datetime = datetime.datetime.now()
                formatted_date = current_datetime.strftime("%Y%m%d")

-                if title:
+                if chat_title:
+                    filename = f"{self.clean_filename(chat_title)}.docx"
+                elif title:
                    filename = f"{self.clean_filename(title)}.docx"
                else:
                    filename = f"{user_name}_{formatted_date}.docx"

-                # Create Word document
-                doc = self.markdown_to_docx(message_content)
+                # Create Word document; if no h1 exists, inject chat title as h1
+                has_h1 = bool(re.search(r"^#\s+.+$", message_content, re.MULTILINE))
+                doc = self.markdown_to_docx(
+                    message_content, top_heading=chat_title, has_h1=has_h1
+                )

                # Save to memory
                doc_buffer = io.BytesIO()
@@ -148,7 +168,7 @@ class Action:
                return {"message": "Download triggered"}

            except Exception as e:
-                print(f"Error exporting to Word: {str(e)}")
+                logger.exception(f"Error exporting to Word: {str(e)}")
                await __event_emitter__(
                    {
                        "type": "status",
@@ -165,20 +185,85 @@ class Action:
                )

    def extract_title(self, content: str) -> str:
-        """Extract title from Markdown content"""
+        """Extract title from Markdown h1/h2 only"""
        lines = content.split("\n")
        for line in lines:
-            # Match h1-h3 headings
-            match = re.match(r"^#{1,3}\s+(.+)$", line.strip())
+            # Match h1-h2 headings only
+            match = re.match(r"^#{1,2}\s+(.+)$", line.strip())
            if match:
                return match.group(1).strip()
        return ""

+    def extract_chat_title(self, body: dict) -> str:
+        """Extract chat title from common payload fields."""
+        if not isinstance(body, dict):
+            return ""
+
+        candidates = []
+
+        for key in ("chat", "conversation"):
+            if isinstance(body.get(key), dict):
+                candidates.append(body.get(key, {}).get("title", ""))
+
+        for key in ("title", "chat_title"):
+            value = body.get(key)
+            if isinstance(value, str):
+                candidates.append(value)
+
+        for candidate in candidates:
+            if candidate and isinstance(candidate, str):
+                return candidate.strip()
+        return ""
+
+    def extract_chat_id(self, body: dict, metadata: Optional[dict]) -> str:
+        """Extract chat_id from body or metadata"""
+        if isinstance(body, dict):
+            chat_id = body.get("chat_id") or body.get("id")
+            if isinstance(chat_id, str) and chat_id.strip():
+                return chat_id.strip()
+
+            for key in ("chat", "conversation"):
+                nested = body.get(key)
+                if isinstance(nested, dict):
+                    nested_id = nested.get("id") or nested.get("chat_id")
+                    if isinstance(nested_id, str) and nested_id.strip():
+                        return nested_id.strip()
+        if isinstance(metadata, dict):
+            chat_id = metadata.get("chat_id")
+            if isinstance(chat_id, str) and chat_id.strip():
+                return chat_id.strip()
+        return ""
+
+    async def fetch_chat_title(self, chat_id: str, user_id: str = "") -> str:
+        """Fetch chat title from database by chat_id"""
+        if not chat_id:
+            return ""
+
+        def _load_chat():
+            if user_id:
+                return Chats.get_chat_by_id_and_user_id(id=chat_id, user_id=user_id)
+            return Chats.get_chat_by_id(chat_id)
+
+        try:
+            chat = await asyncio.to_thread(_load_chat)
+        except Exception as exc:
+            logger.warning(f"Failed to load chat {chat_id}: {exc}")
+            return ""
+
+        if not chat:
+            return ""
+
+        data = getattr(chat, "chat", {}) or {}
+        title = data.get("title") or getattr(chat, "title", "")
+        return title.strip() if isinstance(title, str) else ""
+
    def clean_filename(self, name: str) -> str:
        """Clean illegal characters from filename"""
        return re.sub(r'[\\/*?:"<>|]', "", name).strip()[:50]

-    def markdown_to_docx(self, markdown_text: str) -> Document:
+    def markdown_to_docx(
+        self, markdown_text: str, top_heading: str = "", has_h1: bool = False
+    ) -> Document:
        """
        Convert Markdown text to Word document
        Supports: headings, paragraphs, bold, italic, code blocks, lists, tables, links
@@ -188,6 +273,10 @@ class Action:
        # Set default fonts
        self.set_document_default_font(doc)

+        # If there is no h1 in content, prepend chat title as h1 when provided
+        if top_heading and not has_h1:
+            self.add_heading(doc, top_heading, 1)
+
        lines = markdown_text.split("\n")
        i = 0
        in_code_block = False
--- a/plugins/actions/export_to_docx/导出为Word.py
+++ b/plugins/actions/export_to_docx/导出为Word.py
@@ -4,7 +4,7 @@ author: Fu-Jie
 author_url: https://github.com/Fu-Jie
 funding_url: https://github.com/Fu-Jie/awesome-openwebui
 version: 0.1.0
-icon_url: data:image/svg+xml;base64,PHN2ZyB4bWxucz0iaHR0cDovL3d3dy53My5vcmcvMjAwMC9zdmciIHdpZHRoPSIyNCIgaGVpZ2h0PSIyNCIgdmlld0JveD0iMCAwIDI0IDI0IiBmaWxsPSJub25lIiBzdHJva2U9ImN1cnJlbnRDb2xvciIgc3Ryb2tlLXdpZHRoPSIyIiBzdHJva2UtbGluZWNhcD0icm91bmQiIHN0cm9rZS1saW5lam9pbj0icm91bmQiPjxwYXRoIGQ9Ik0xNCAySDZhMiAyIDAgMCAwLTIgMnYxNmEyIDIgMCAwIDAgMiAyaDEyYTIgMiAwIDAgMCAyLTJWOFoiLz48cGF0aCBkPSJNMTQgMnY2aDYiLz48cGF0aCBkPSJNMTYgMTNoLTIuNWEyIDIgMCAwIDAgMCA0SDEyIi8+PHBhdGggZD0iTTggMTNoMiIvPjxwYXRoIGQ9Ik04IDE3aDIiLz48L3N2Zz4=
+icon_url: data:image/svg+xml;base64,PHN2ZwogIHhtbG5zPSJodHRwOi8vd3d3LnczLm9yZy8yMDAwL3N2ZyIKICB3aWR0aD0iMjQiCiAgaGVpZ2h0PSIyNCIKICB2aWV3Qm94PSIwIDAgMjQgMjQiCiAgZmlsbD0ibm9uZSIKICBzdHJva2U9ImN1cnJlbnRDb2xvciIKICBzdHJva2Utd2lkdGg9IjIiCiAgc3Ryb2tlLWxpbmVjYXA9InJvdW5kIgogIHN0cm9rZS1saW5lam9pbj0icm91bmQiCj4KICA8cGF0aCBkPSJNNiAyMmEyIDIgMCAwIDEtMi0yVjRhMiAyIDAgMCAxIDItMmg4YTIuNCAyLjQgMCAwIDEgMS43MDQuNzA2bDMuNTg4IDMuNTg4QTIuNCAyLjQgMCAwIDEgMjAgOHYxMmEyIDIgMCAwIDEtMiAyeiIgLz4KICA8cGF0aCBkPSJNMTQgMnY1YTEgMSAwIDAgMCAxIDFoNSIgLz4KICA8cGF0aCBkPSJNMTAgOUg4IiAvPgogIDxwYXRoIGQ9Ik0xNiAxM0g4IiAvPgogIDxwYXRoIGQ9Ik0xNiAxN0g4IiAvPgo8L3N2Zz4K
 requirements: python-docx==1.1.2
 description: 将当前对话内容从 Markdown 转换并导出为 Word (.docx) 文件，支持中英文无乱码。
 """
@@ -14,6 +14,8 @@ import re
 import base64
 import datetime
 import io
+import asyncio
+import logging
 from typing import Optional, Callable, Awaitable, Any, List, Tuple
 from docx import Document
 from docx.shared import Pt, Inches, RGBColor, Cm
@@ -22,6 +24,14 @@ from docx.enum.table import WD_TABLE_ALIGNMENT
 from docx.enum.style import WD_STYLE_TYPE
 from docx.oxml.ns import qn
 from docx.oxml import OxmlElement
+from open_webui.models.chats import Chats
+
+
+logging.basicConfig(
+    level=logging.INFO,
+    format="%(asctime)s - %(name)s - %(levelname)s - %(message)s",
+)
+logger = logging.getLogger(__name__)


 class Action:
@@ -39,8 +49,9 @@ class Action:
        __user__=None,
        __event_emitter__=None,
        __event_call__: Optional[Callable[[Any], Awaitable[None]]] = None,
+        __metadata__: Optional[dict] = None,
    ):
-        print(f"action:{__name__}")
+        logger.info(f"action:{__name__}")

        # 解析用户信息
        if isinstance(__user__, (list, tuple)):
@@ -77,18 +88,27 @@ class Action:
                    )
                    return

-                # 生成文件名
+                # 生成文件名（优先对话标题；若缺失则通过 chat_id 查询；再到 Markdown 标题；最后用户+日期）
+                chat_id = self.extract_chat_id(body, __metadata__)
+                chat_title = self.extract_chat_title(body)
+                if not chat_title and chat_id:
+                    chat_title = await self.fetch_chat_title(chat_id, user_id)
                title = self.extract_title(message_content)
                current_datetime = datetime.datetime.now()
                formatted_date = current_datetime.strftime("%Y%m%d")

-                if title:
+                if chat_title:
+                    filename = f"{self.clean_filename(chat_title)}.docx"
+                elif title:
                    filename = f"{self.clean_filename(title)}.docx"
                else:
                    filename = f"{user_name}_{formatted_date}.docx"

-                # 创建 Word 文档
-                doc = self.markdown_to_docx(message_content)
+                # 创建 Word 文档；若正文无一级标题，使用对话标题作为一级标题
+                has_h1 = bool(re.search(r"^#\s+.+$", message_content, re.MULTILINE))
+                doc = self.markdown_to_docx(
+                    message_content, top_heading=chat_title, has_h1=has_h1
+                )

                # 保存到内存
                doc_buffer = io.BytesIO()
@@ -160,20 +180,85 @@ class Action:
                )

    def extract_title(self, content: str) -> str:
-        """从 Markdown 内容中提取标题"""
+        """从 Markdown 内容提取一级/二级标题"""
        lines = content.split("\n")
        for line in lines:
-            # 匹配 h1-h3 标题
-            match = re.match(r"^#{1,3}\s+(.+)$", line.strip())
+            # 仅匹配 h1-h2 标题
+            match = re.match(r"^#{1,2}\s+(.+)$", line.strip())
            if match:
                return match.group(1).strip()
        return ""

+    def extract_chat_title(self, body: dict) -> str:
+        """从请求体中提取会话标题"""
+        if not isinstance(body, dict):
+            return ""
+
+        candidates = []
+
+        for key in ("chat", "conversation"):
+            if isinstance(body.get(key), dict):
+                candidates.append(body.get(key, {}).get("title", ""))
+
+        for key in ("title", "chat_title"):
+            value = body.get(key)
+            if isinstance(value, str):
+                candidates.append(value)
+
+        for candidate in candidates:
+            if candidate and isinstance(candidate, str):
+                return candidate.strip()
+        return ""
+
+    def extract_chat_id(self, body: dict, metadata: Optional[dict]) -> str:
+        """从 body 或 metadata 中提取 chat_id"""
+        if isinstance(body, dict):
+            chat_id = body.get("chat_id") or body.get("id")
+            if isinstance(chat_id, str) and chat_id.strip():
+                return chat_id.strip()
+
+            for key in ("chat", "conversation"):
+                nested = body.get(key)
+                if isinstance(nested, dict):
+                    nested_id = nested.get("id") or nested.get("chat_id")
+                    if isinstance(nested_id, str) and nested_id.strip():
+                        return nested_id.strip()
+        if isinstance(metadata, dict):
+            chat_id = metadata.get("chat_id")
+            if isinstance(chat_id, str) and chat_id.strip():
+                return chat_id.strip()
+        return ""
+
+    async def fetch_chat_title(self, chat_id: str, user_id: str = "") -> str:
+        """根据 chat_id 从数据库获取标题"""
+        if not chat_id:
+            return ""
+
+        def _load_chat():
+            if user_id:
+                return Chats.get_chat_by_id_and_user_id(id=chat_id, user_id=user_id)
+            return Chats.get_chat_by_id(chat_id)
+
+        try:
+            chat = await asyncio.to_thread(_load_chat)
+        except Exception as exc:
+            logger.warning(f"加载聊天 {chat_id} 失败: {exc}")
+            return ""
+
+        if not chat:
+            return ""
+
+        data = getattr(chat, "chat", {}) or {}
+        title = data.get("title") or getattr(chat, "title", "")
+        return title.strip() if isinstance(title, str) else ""
+
    def clean_filename(self, name: str) -> str:
        """清理文件名中的非法字符"""
        return re.sub(r'[\\/*?:"<>|]', "", name).strip()[:50]

-    def markdown_to_docx(self, markdown_text: str) -> Document:
+    def markdown_to_docx(
+        self, markdown_text: str, top_heading: str = "", has_h1: bool = False
+    ) -> Document:
        """
        将 Markdown 文本转换为 Word 文档
        支持：标题、段落、粗体、斜体、代码块、列表、表格、链接
@@ -183,6 +268,10 @@ class Action:
        # 设置默认中文字体
        self.set_document_default_font(doc)

+        # 若正文无一级标题且有对话标题，则作为一级标题写入
+        if top_heading and not has_h1:
+            self.add_heading(doc, top_heading, 1)
+
        lines = markdown_text.split("\n")
        i = 0
        in_code_block = False