From 278a6ce12c8a16dfa42a62020cbcc6d7f789aa99 Mon Sep 17 00:00:00 2001 From: Jeff fu Date: Tue, 30 Dec 2025 13:29:44 +0800 Subject: [PATCH] Add export to Word functionality with Markdown support - Implemented a new plugin to export current conversation content from Markdown to Word (.docx) format. - Added README_CN.md for Chinese documentation, detailing features, supported Markdown syntax, usage instructions, font configurations, and author information. - Developed export_to_word.py to handle the conversion process, including title extraction, document creation, and file download triggering. - Included support for various Markdown elements such as headings, lists, tables, and inline formatting. - Ensured proper handling of Chinese and English text without encoding issues. --- plugins/actions/export_to_docx/README.md | 48 ++ plugins/actions/export_to_docx/README_CN.md | 48 ++ .../actions/export_to_docx/export_to_word.py | 574 ++++++++++++++++++ plugins/actions/export_to_docx/导出为Word.py | 574 ++++++++++++++++++ 4 files changed, 1244 insertions(+) create mode 100644 plugins/actions/export_to_docx/README.md create mode 100644 plugins/actions/export_to_docx/README_CN.md create mode 100644 plugins/actions/export_to_docx/export_to_word.py create mode 100644 plugins/actions/export_to_docx/导出为Word.py diff --git a/plugins/actions/export_to_docx/README.md b/plugins/actions/export_to_docx/README.md new file mode 100644 index 0000000..b007818 --- /dev/null +++ b/plugins/actions/export_to_docx/README.md @@ -0,0 +1,48 @@ +# Export to Word + +Export current conversation from Markdown to Word (.docx) file with proper Chinese and English encoding. + +## Features + +- **One-Click Export**: Adds an "Export to Word" action button to the chat. +- **Markdown Conversion**: Converts Markdown syntax to Word formatting (headings, bold, italic, code, tables, lists). +- **Multi-language Support**: Properly handles both Chinese and English text without garbled characters. +- **Auto Title Extraction**: Automatically uses the first heading as the filename. + +## Supported Markdown Syntax + +| Syntax | Word Result | +| :---------------------------------- | :----------------------------- | +| `# Heading 1` to `###### Heading 6` | Heading levels 1-6 | +| `**bold**` or `__bold__` | Bold text | +| `*italic*` or `_italic_` | Italic text | +| `***bold italic***` | Bold + Italic | +| `` `inline code` `` | Monospace with gray background | +| ` ``` code block ``` ` | Code block with indentation | +| `[link](url)` | Blue underlined link text | +| `~~strikethrough~~` | Strikethrough text | +| `- item` or `* item` | Bullet list | +| `1. item` | Numbered list | +| Markdown tables | Table with grid | +| `---` or `***` | Horizontal rule | + +## Usage + +1. Install the plugin. +2. In any chat, click the "Export to Word" button. +3. The .docx file will be automatically downloaded to your device. + +## Font Configuration + +- **English Text**: Times New Roman +- **Chinese Text**: SimSun (宋体) for body, SimHei (黑体) for headings +- **Code**: Consolas + +## Author + +Fu-Jie +GitHub: [Fu-Jie/awesome-openwebui](https://github.com/Fu-Jie/awesome-openwebui) + +## License + +MIT License diff --git a/plugins/actions/export_to_docx/README_CN.md b/plugins/actions/export_to_docx/README_CN.md new file mode 100644 index 0000000..a0a448e --- /dev/null +++ b/plugins/actions/export_to_docx/README_CN.md @@ -0,0 +1,48 @@ +# 导出为 Word + +将当前对话内容从 Markdown 转换并导出为 Word (.docx) 文件,支持中英文无乱码。 + +## 功能特点 + +- **一键导出**:在聊天界面添加"导出为 Word"动作按钮。 +- **Markdown 转换**:将 Markdown 语法转换为 Word 格式(标题、粗体、斜体、代码、表格、列表)。 +- **多语言支持**:正确处理中文和英文文本,无乱码问题。 +- **自动提取标题**:自动使用第一个标题作为文件名。 + +## 支持的 Markdown 语法 + +| 语法 | Word 效果 | +| :-------------------------- | :------------------ | +| `# 标题1` 到 `###### 标题6` | 标题级别 1-6 | +| `**粗体**` 或 `__粗体__` | 粗体文本 | +| `*斜体*` 或 `_斜体_` | 斜体文本 | +| `***粗斜体***` | 粗体 + 斜体 | +| `` `行内代码` `` | 等宽字体 + 灰色背景 | +| ` ``` 代码块 ``` ` | 带缩进的代码块 | +| `[链接](url)` | 蓝色下划线链接文本 | +| `~~删除线~~` | 删除线文本 | +| `- 项目` 或 `* 项目` | 无序列表 | +| `1. 项目` | 有序列表 | +| Markdown 表格 | 带边框表格 | +| `---` 或 `***` | 水平分割线 | + +## 使用方法 + +1. 安装插件。 +2. 在任意对话中,点击"导出为 Word"按钮。 +3. .docx 文件将自动下载到你的设备。 + +## 字体配置 + +- **英文文本**:Times New Roman +- **中文文本**:宋体(正文)、黑体(标题) +- **代码**:Consolas + +## 作者 + +Fu-Jie +GitHub: [Fu-Jie/awesome-openwebui](https://github.com/Fu-Jie/awesome-openwebui) + +## 许可证 + +MIT License diff --git a/plugins/actions/export_to_docx/export_to_word.py b/plugins/actions/export_to_docx/export_to_word.py new file mode 100644 index 0000000..290791a --- /dev/null +++ b/plugins/actions/export_to_docx/export_to_word.py @@ -0,0 +1,574 @@ +""" +title: Export to Word +author: Fu-Jie +author_url: https://github.com/Fu-Jie +funding_url: https://github.com/Fu-Jie/awesome-openwebui +version: 0.1.0 +icon_url: data:image/svg+xml;base64,PHN2ZyB4bWxucz0iaHR0cDovL3d3dy53My5vcmcvMjAwMC9zdmciIHdpZHRoPSIyNCIgaGVpZ2h0PSIyNCIgdmlld0JveD0iMCAwIDI0IDI0IiBmaWxsPSJub25lIiBzdHJva2U9ImN1cnJlbnRDb2xvciIgc3Ryb2tlLXdpZHRoPSIyIiBzdHJva2UtbGluZWNhcD0icm91bmQiIHN0cm9rZS1saW5lam9pbj0icm91bmQiPjxwYXRoIGQ9Ik0xNCAySDZhMiAyIDAgMCAwLTIgMnYxNmEyIDIgMCAwIDAgMiAyaDEyYTIgMiAwIDAgMCAyLTJWOFoiLz48cGF0aCBkPSJNMTQgMnY2aDYiLz48cGF0aCBkPSJNMTYgMTNoLTIuNWEyIDIgMCAwIDAgMCA0SDEyIi8+PHBhdGggZD0iTTggMTNoMiIvPjxwYXRoIGQ9Ik04IDE3aDIiLz48L3N2Zz4= +requirements: python-docx==1.1.2 +description: Export current conversation from Markdown to Word (.docx) file with proper Chinese and English encoding. +""" + +import os +import re +import base64 +import datetime +import io +from typing import Optional, Callable, Awaitable, Any, List, Tuple +from docx import Document +from docx.shared import Pt, Inches, RGBColor, Cm +from docx.enum.text import WD_ALIGN_PARAGRAPH, WD_LINE_SPACING +from docx.enum.table import WD_TABLE_ALIGNMENT +from docx.enum.style import WD_STYLE_TYPE +from docx.oxml.ns import qn +from docx.oxml import OxmlElement + + +class Action: + def __init__(self): + pass + + async def _send_notification(self, emitter: Callable, type: str, content: str): + await emitter( + {"type": "notification", "data": {"type": type, "content": content}} + ) + + async def action( + self, + body: dict, + __user__=None, + __event_emitter__=None, + __event_call__: Optional[Callable[[Any], Awaitable[None]]] = None, + ): + print(f"action:{__name__}") + + # Parse user info + if isinstance(__user__, (list, tuple)): + user_language = ( + __user__[0].get("language", "en-US") if __user__ else "en-US" + ) + user_name = __user__[0].get("name", "User") if __user__[0] else "User" + user_id = ( + __user__[0]["id"] + if __user__ and "id" in __user__[0] + else "unknown_user" + ) + elif isinstance(__user__, dict): + user_language = __user__.get("language", "en-US") + user_name = __user__.get("name", "User") + user_id = __user__.get("id", "unknown_user") + + if __event_emitter__: + last_assistant_message = body["messages"][-1] + + await __event_emitter__( + { + "type": "status", + "data": { + "description": "Converting to Word document...", + "done": False, + }, + } + ) + + try: + message_content = last_assistant_message["content"] + + if not message_content or not message_content.strip(): + await self._send_notification( + __event_emitter__, "error", "No content found to export!" + ) + return + + # Generate filename + title = self.extract_title(message_content) + current_datetime = datetime.datetime.now() + formatted_date = current_datetime.strftime("%Y%m%d") + + if title: + filename = f"{self.clean_filename(title)}.docx" + else: + filename = f"{user_name}_{formatted_date}.docx" + + # Create Word document + doc = self.markdown_to_docx(message_content) + + # Save to memory + doc_buffer = io.BytesIO() + doc.save(doc_buffer) + doc_buffer.seek(0) + file_content = doc_buffer.read() + base64_blob = base64.b64encode(file_content).decode("utf-8") + + # Trigger file download + if __event_call__: + await __event_call__( + { + "type": "execute", + "data": { + "code": f""" + try {{ + const base64Data = "{base64_blob}"; + const binaryData = atob(base64Data); + const arrayBuffer = new Uint8Array(binaryData.length); + for (let i = 0; i < binaryData.length; i++) {{ + arrayBuffer[i] = binaryData.charCodeAt(i); + }} + const blob = new Blob([arrayBuffer], {{ type: "application/vnd.openxmlformats-officedocument.wordprocessingml.document" }}); + const filename = "{filename}"; + + const url = URL.createObjectURL(blob); + const a = document.createElement("a"); + a.style.display = "none"; + a.href = url; + a.download = filename; + document.body.appendChild(a); + a.click(); + URL.revokeObjectURL(url); + document.body.removeChild(a); + }} catch (error) {{ + console.error('Error triggering download:', error); + }} + """ + }, + } + ) + + await __event_emitter__( + { + "type": "status", + "data": {"description": "Word document exported", "done": True}, + } + ) + + await self._send_notification( + __event_emitter__, "success", f"Successfully exported to {filename}" + ) + + return {"message": "Download triggered"} + + except Exception as e: + print(f"Error exporting to Word: {str(e)}") + await __event_emitter__( + { + "type": "status", + "data": { + "description": f"Export failed: {str(e)}", + "done": True, + }, + } + ) + await self._send_notification( + __event_emitter__, + "error", + f"Error exporting Word document: {str(e)}", + ) + + def extract_title(self, content: str) -> str: + """Extract title from Markdown content""" + lines = content.split("\n") + for line in lines: + # Match h1-h3 headings + match = re.match(r"^#{1,3}\s+(.+)$", line.strip()) + if match: + return match.group(1).strip() + return "" + + def clean_filename(self, name: str) -> str: + """Clean illegal characters from filename""" + return re.sub(r'[\\/*?:"<>|]', "", name).strip()[:50] + + def markdown_to_docx(self, markdown_text: str) -> Document: + """ + Convert Markdown text to Word document + Supports: headings, paragraphs, bold, italic, code blocks, lists, tables, links + """ + doc = Document() + + # Set default fonts + self.set_document_default_font(doc) + + lines = markdown_text.split("\n") + i = 0 + in_code_block = False + code_block_content = [] + code_block_lang = "" + in_list = False + list_items = [] + list_type = None # 'ordered' or 'unordered' + + while i < len(lines): + line = lines[i] + + # Handle code blocks + if line.strip().startswith("```"): + if not in_code_block: + # Process pending list first + if in_list and list_items: + self.add_list_to_doc(doc, list_items, list_type) + list_items = [] + in_list = False + + in_code_block = True + code_block_lang = line.strip()[3:].strip() + code_block_content = [] + else: + # End code block + in_code_block = False + self.add_code_block( + doc, "\n".join(code_block_content), code_block_lang + ) + code_block_content = [] + code_block_lang = "" + i += 1 + continue + + if in_code_block: + code_block_content.append(line) + i += 1 + continue + + # Handle tables + if line.strip().startswith("|") and line.strip().endswith("|"): + # Process pending list first + if in_list and list_items: + self.add_list_to_doc(doc, list_items, list_type) + list_items = [] + in_list = False + + table_lines = [] + while i < len(lines) and lines[i].strip().startswith("|"): + table_lines.append(lines[i]) + i += 1 + self.add_table(doc, table_lines) + continue + + # Handle headings + header_match = re.match(r"^(#{1,6})\s+(.+)$", line.strip()) + if header_match: + # Process pending list first + if in_list and list_items: + self.add_list_to_doc(doc, list_items, list_type) + list_items = [] + in_list = False + + level = len(header_match.group(1)) + text = header_match.group(2) + self.add_heading(doc, text, level) + i += 1 + continue + + # Handle unordered lists + unordered_match = re.match(r"^(\s*)[-*+]\s+(.+)$", line) + if unordered_match: + if not in_list or list_type != "unordered": + if in_list and list_items: + self.add_list_to_doc(doc, list_items, list_type) + list_items = [] + in_list = True + list_type = "unordered" + indent = len(unordered_match.group(1)) // 2 + list_items.append((indent, unordered_match.group(2))) + i += 1 + continue + + # Handle ordered lists + ordered_match = re.match(r"^(\s*)\d+[.)]\s+(.+)$", line) + if ordered_match: + if not in_list or list_type != "ordered": + if in_list and list_items: + self.add_list_to_doc(doc, list_items, list_type) + list_items = [] + in_list = True + list_type = "ordered" + indent = len(ordered_match.group(1)) // 2 + list_items.append((indent, ordered_match.group(2))) + i += 1 + continue + + # Handle horizontal rules + if re.match(r"^[-*_]{3,}$", line.strip()): + # Process pending list first + if in_list and list_items: + self.add_list_to_doc(doc, list_items, list_type) + list_items = [] + in_list = False + + self.add_horizontal_rule(doc) + i += 1 + continue + + # Handle empty lines + if not line.strip(): + # End list + if in_list and list_items: + self.add_list_to_doc(doc, list_items, list_type) + list_items = [] + in_list = False + i += 1 + continue + + # Handle normal paragraphs + if in_list and list_items: + self.add_list_to_doc(doc, list_items, list_type) + list_items = [] + in_list = False + + self.add_paragraph(doc, line) + i += 1 + + # Process remaining list + if in_list and list_items: + self.add_list_to_doc(doc, list_items, list_type) + + return doc + + def set_document_default_font(self, doc: Document): + """Set document default fonts for both Chinese and English""" + # Set Normal style + style = doc.styles["Normal"] + font = style.font + font.name = "Times New Roman" # English font + font.size = Pt(11) + + # Set Chinese font + style._element.rPr.rFonts.set(qn("w:eastAsia"), "SimSun") + + # Set paragraph format + paragraph_format = style.paragraph_format + paragraph_format.line_spacing_rule = WD_LINE_SPACING.ONE_POINT_FIVE + paragraph_format.space_after = Pt(6) + + def add_heading(self, doc: Document, text: str, level: int): + """Add heading""" + # Word heading levels start from 0, Markdown from 1 + heading_level = min(level, 9) # Word supports up to Heading 9 + heading = doc.add_heading(level=heading_level) + + # Parse and add formatted text + self.add_formatted_text(heading, text) + + # Set Chinese font + for run in heading.runs: + run.font.name = "Times New Roman" + run._element.rPr.rFonts.set(qn("w:eastAsia"), "SimHei") + + def add_paragraph(self, doc: Document, text: str): + """Add paragraph with inline formatting support""" + paragraph = doc.add_paragraph() + self.add_formatted_text(paragraph, text) + + # Set Chinese font + for run in paragraph.runs: + run.font.name = "Times New Roman" + run._element.rPr.rFonts.set(qn("w:eastAsia"), "SimSun") + + def add_formatted_text(self, paragraph, text: str): + """ + Parse Markdown inline formatting and add to paragraph + Supports: bold, italic, inline code, links, strikethrough + """ + # Define formatting patterns + patterns = [ + # Bold italic ***text*** or ___text___ + (r"\*\*\*(.+?)\*\*\*|___(.+?)___", {"bold": True, "italic": True}), + # Bold **text** or __text__ + (r"\*\*(.+?)\*\*|__(.+?)__", {"bold": True}), + # Italic *text* or _text_ + ( + r"(? 1 else None + ), + } + ) + + # Sort by position + all_matches.sort(key=lambda x: x["start"]) + + # Remove overlapping matches + filtered_matches = [] + last_end = 0 + for m in all_matches: + if m["start"] >= last_end: + filtered_matches.append(m) + last_end = m["end"] + + # Build final text + pos = 0 + for match in filtered_matches: + # Add plain text before match + if match["start"] > pos: + plain_text = text[pos : match["start"]] + if plain_text: + paragraph.add_run(plain_text) + + # Add formatted text + style = match["style"] + run_text = match["text"] + + if style.get("link"): + # Link handling + run = paragraph.add_run(run_text) + run.font.color.rgb = RGBColor(0, 0, 255) + run.font.underline = True + elif style.get("code"): + # Inline code + run = paragraph.add_run(run_text) + run.font.name = "Consolas" + run._element.rPr.rFonts.set(qn("w:eastAsia"), "SimHei") + run.font.size = Pt(10) + # Add background color + shading = OxmlElement("w:shd") + shading.set(qn("w:fill"), "E8E8E8") + run._element.rPr.append(shading) + else: + run = paragraph.add_run(run_text) + if style.get("bold"): + run.bold = True + if style.get("italic"): + run.italic = True + if style.get("strike"): + run.font.strike = True + + pos = match["end"] + + # Add remaining plain text + if pos < len(text): + paragraph.add_run(text[pos:]) + + def add_code_block(self, doc: Document, code: str, language: str = ""): + """Add code block""" + paragraph = doc.add_paragraph() + paragraph.paragraph_format.left_indent = Cm(0.5) + paragraph.paragraph_format.space_before = Pt(6) + paragraph.paragraph_format.space_after = Pt(6) + + # Set code block font + run = paragraph.add_run(code) + run.font.name = "Consolas" + run._element.rPr.rFonts.set(qn("w:eastAsia"), "SimHei") + run.font.size = Pt(10) + + # Add light gray background + shading = OxmlElement("w:shd") + shading.set(qn("w:fill"), "F5F5F5") + paragraph._element.pPr.append(shading) + + def add_table(self, doc: Document, table_lines: List[str]): + """Add table""" + if len(table_lines) < 2: + return + + # Parse table data + rows = [] + for line in table_lines: + cells = [cell.strip() for cell in line.strip().strip("|").split("|")] + # Skip separator row + if all(re.fullmatch(r"[-:]+", cell) for cell in cells): + continue + rows.append(cells) + + if not rows: + return + + # Determine column count + num_cols = max(len(row) for row in rows) + + # Create table + table = doc.add_table(rows=len(rows), cols=num_cols) + table.style = "Table Grid" + table.alignment = WD_TABLE_ALIGNMENT.CENTER + + # Fill table + for row_idx, row_data in enumerate(rows): + row = table.rows[row_idx] + for col_idx, cell_text in enumerate(row_data): + if col_idx < num_cols: + cell = row.cells[col_idx] + # Clear default paragraph + cell.paragraphs[0].clear() + self.add_formatted_text(cell.paragraphs[0], cell_text) + + # Set cell font + for run in cell.paragraphs[0].runs: + run.font.name = "Times New Roman" + run._element.rPr.rFonts.set(qn("w:eastAsia"), "SimSun") + run.font.size = Pt(10) + + # Bold header + if row_idx == 0: + for run in cell.paragraphs[0].runs: + run.bold = True + + # Center align cells + for row in table.rows: + for cell in row.cells: + cell.paragraphs[0].alignment = WD_ALIGN_PARAGRAPH.CENTER + + def add_list_to_doc( + self, doc: Document, items: List[Tuple[int, str]], list_type: str + ): + """Add list""" + for indent, text in items: + paragraph = doc.add_paragraph() + + if list_type == "unordered": + # Unordered list with bullets + paragraph.style = "List Bullet" + else: + # Ordered list with numbers + paragraph.style = "List Number" + + # Set indent + paragraph.paragraph_format.left_indent = Cm(0.5 * (indent + 1)) + + # Add formatted text + self.add_formatted_text(paragraph, text) + + # Set font + for run in paragraph.runs: + run.font.name = "Times New Roman" + run._element.rPr.rFonts.set(qn("w:eastAsia"), "SimSun") + + def add_horizontal_rule(self, doc: Document): + """Add horizontal rule""" + paragraph = doc.add_paragraph() + paragraph.paragraph_format.space_before = Pt(12) + paragraph.paragraph_format.space_after = Pt(12) + + # Add bottom border as horizontal rule + pPr = paragraph._element.get_or_add_pPr() + pBdr = OxmlElement("w:pBdr") + bottom = OxmlElement("w:bottom") + bottom.set(qn("w:val"), "single") + bottom.set(qn("w:sz"), "6") + bottom.set(qn("w:space"), "1") + bottom.set(qn("w:color"), "auto") + pBdr.append(bottom) + pPr.append(pBdr) diff --git a/plugins/actions/export_to_docx/导出为Word.py b/plugins/actions/export_to_docx/导出为Word.py new file mode 100644 index 0000000..7fa1ae8 --- /dev/null +++ b/plugins/actions/export_to_docx/导出为Word.py @@ -0,0 +1,574 @@ +""" +title: 导出为 Word +author: Fu-Jie +author_url: https://github.com/Fu-Jie +funding_url: https://github.com/Fu-Jie/awesome-openwebui +version: 0.1.0 +icon_url: data:image/svg+xml;base64,PHN2ZyB4bWxucz0iaHR0cDovL3d3dy53My5vcmcvMjAwMC9zdmciIHdpZHRoPSIyNCIgaGVpZ2h0PSIyNCIgdmlld0JveD0iMCAwIDI0IDI0IiBmaWxsPSJub25lIiBzdHJva2U9ImN1cnJlbnRDb2xvciIgc3Ryb2tlLXdpZHRoPSIyIiBzdHJva2UtbGluZWNhcD0icm91bmQiIHN0cm9rZS1saW5lam9pbj0icm91bmQiPjxwYXRoIGQ9Ik0xNCAySDZhMiAyIDAgMCAwLTIgMnYxNmEyIDIgMCAwIDAgMiAyaDEyYTIgMiAwIDAgMCAyLTJWOFoiLz48cGF0aCBkPSJNMTQgMnY2aDYiLz48cGF0aCBkPSJNMTYgMTNoLTIuNWEyIDIgMCAwIDAgMCA0SDEyIi8+PHBhdGggZD0iTTggMTNoMiIvPjxwYXRoIGQ9Ik04IDE3aDIiLz48L3N2Zz4= +requirements: python-docx==1.1.2 +description: 将当前对话内容从 Markdown 转换并导出为 Word (.docx) 文件,支持中英文无乱码。 +""" + +import os +import re +import base64 +import datetime +import io +from typing import Optional, Callable, Awaitable, Any, List, Tuple +from docx import Document +from docx.shared import Pt, Inches, RGBColor, Cm +from docx.enum.text import WD_ALIGN_PARAGRAPH, WD_LINE_SPACING +from docx.enum.table import WD_TABLE_ALIGNMENT +from docx.enum.style import WD_STYLE_TYPE +from docx.oxml.ns import qn +from docx.oxml import OxmlElement + + +class Action: + def __init__(self): + pass + + async def _send_notification(self, emitter: Callable, type: str, content: str): + await emitter( + {"type": "notification", "data": {"type": type, "content": content}} + ) + + async def action( + self, + body: dict, + __user__=None, + __event_emitter__=None, + __event_call__: Optional[Callable[[Any], Awaitable[None]]] = None, + ): + print(f"action:{__name__}") + + # 解析用户信息 + if isinstance(__user__, (list, tuple)): + user_language = ( + __user__[0].get("language", "zh-CN") if __user__ else "zh-CN" + ) + user_name = __user__[0].get("name", "用户") if __user__[0] else "用户" + user_id = ( + __user__[0]["id"] + if __user__ and "id" in __user__[0] + else "unknown_user" + ) + elif isinstance(__user__, dict): + user_language = __user__.get("language", "zh-CN") + user_name = __user__.get("name", "用户") + user_id = __user__.get("id", "unknown_user") + + if __event_emitter__: + last_assistant_message = body["messages"][-1] + + await __event_emitter__( + { + "type": "status", + "data": {"description": "正在转换为 Word 文档...", "done": False}, + } + ) + + try: + message_content = last_assistant_message["content"] + + if not message_content or not message_content.strip(): + await self._send_notification( + __event_emitter__, "error", "没有找到可导出的内容!" + ) + return + + # 生成文件名 + title = self.extract_title(message_content) + current_datetime = datetime.datetime.now() + formatted_date = current_datetime.strftime("%Y%m%d") + + if title: + filename = f"{self.clean_filename(title)}.docx" + else: + filename = f"{user_name}_{formatted_date}.docx" + + # 创建 Word 文档 + doc = self.markdown_to_docx(message_content) + + # 保存到内存 + doc_buffer = io.BytesIO() + doc.save(doc_buffer) + doc_buffer.seek(0) + file_content = doc_buffer.read() + base64_blob = base64.b64encode(file_content).decode("utf-8") + + # 触发文件下载 + if __event_call__: + await __event_call__( + { + "type": "execute", + "data": { + "code": f""" + try {{ + const base64Data = "{base64_blob}"; + const binaryData = atob(base64Data); + const arrayBuffer = new Uint8Array(binaryData.length); + for (let i = 0; i < binaryData.length; i++) {{ + arrayBuffer[i] = binaryData.charCodeAt(i); + }} + const blob = new Blob([arrayBuffer], {{ type: "application/vnd.openxmlformats-officedocument.wordprocessingml.document" }}); + const filename = "{filename}"; + + const url = URL.createObjectURL(blob); + const a = document.createElement("a"); + a.style.display = "none"; + a.href = url; + a.download = filename; + document.body.appendChild(a); + a.click(); + URL.revokeObjectURL(url); + document.body.removeChild(a); + }} catch (error) {{ + console.error('触发下载时出错:', error); + }} + """ + }, + } + ) + + await __event_emitter__( + { + "type": "status", + "data": {"description": "Word 文档已导出", "done": True}, + } + ) + + await self._send_notification( + __event_emitter__, "success", f"已成功导出为 {filename}" + ) + + return {"message": "下载事件已触发"} + + except Exception as e: + print(f"Error exporting to Word: {str(e)}") + await __event_emitter__( + { + "type": "status", + "data": { + "description": f"导出失败: {str(e)}", + "done": True, + }, + } + ) + await self._send_notification( + __event_emitter__, "error", f"导出 Word 文档时出错: {str(e)}" + ) + + def extract_title(self, content: str) -> str: + """从 Markdown 内容中提取标题""" + lines = content.split("\n") + for line in lines: + # 匹配 h1-h3 标题 + match = re.match(r"^#{1,3}\s+(.+)$", line.strip()) + if match: + return match.group(1).strip() + return "" + + def clean_filename(self, name: str) -> str: + """清理文件名中的非法字符""" + return re.sub(r'[\\/*?:"<>|]', "", name).strip()[:50] + + def markdown_to_docx(self, markdown_text: str) -> Document: + """ + 将 Markdown 文本转换为 Word 文档 + 支持:标题、段落、粗体、斜体、代码块、列表、表格、链接 + """ + doc = Document() + + # 设置默认中文字体 + self.set_document_default_font(doc) + + lines = markdown_text.split("\n") + i = 0 + in_code_block = False + code_block_content = [] + code_block_lang = "" + in_list = False + list_items = [] + list_type = None # 'ordered' or 'unordered' + + while i < len(lines): + line = lines[i] + + # 处理代码块 + if line.strip().startswith("```"): + if not in_code_block: + # 先处理之前积累的列表 + if in_list and list_items: + self.add_list_to_doc(doc, list_items, list_type) + list_items = [] + in_list = False + + in_code_block = True + code_block_lang = line.strip()[3:].strip() + code_block_content = [] + else: + # 代码块结束 + in_code_block = False + self.add_code_block( + doc, "\n".join(code_block_content), code_block_lang + ) + code_block_content = [] + code_block_lang = "" + i += 1 + continue + + if in_code_block: + code_block_content.append(line) + i += 1 + continue + + # 处理表格 + if line.strip().startswith("|") and line.strip().endswith("|"): + # 先处理之前积累的列表 + if in_list and list_items: + self.add_list_to_doc(doc, list_items, list_type) + list_items = [] + in_list = False + + table_lines = [] + while i < len(lines) and lines[i].strip().startswith("|"): + table_lines.append(lines[i]) + i += 1 + self.add_table(doc, table_lines) + continue + + # 处理标题 + header_match = re.match(r"^(#{1,6})\s+(.+)$", line.strip()) + if header_match: + # 先处理之前积累的列表 + if in_list and list_items: + self.add_list_to_doc(doc, list_items, list_type) + list_items = [] + in_list = False + + level = len(header_match.group(1)) + text = header_match.group(2) + self.add_heading(doc, text, level) + i += 1 + continue + + # 处理无序列表 + unordered_match = re.match(r"^(\s*)[-*+]\s+(.+)$", line) + if unordered_match: + if not in_list or list_type != "unordered": + if in_list and list_items: + self.add_list_to_doc(doc, list_items, list_type) + list_items = [] + in_list = True + list_type = "unordered" + indent = len(unordered_match.group(1)) // 2 + list_items.append((indent, unordered_match.group(2))) + i += 1 + continue + + # 处理有序列表 + ordered_match = re.match(r"^(\s*)\d+[.)]\s+(.+)$", line) + if ordered_match: + if not in_list or list_type != "ordered": + if in_list and list_items: + self.add_list_to_doc(doc, list_items, list_type) + list_items = [] + in_list = True + list_type = "ordered" + indent = len(ordered_match.group(1)) // 2 + list_items.append((indent, ordered_match.group(2))) + i += 1 + continue + + # 处理水平分割线 + if re.match(r"^[-*_]{3,}$", line.strip()): + # 先处理之前积累的列表 + if in_list and list_items: + self.add_list_to_doc(doc, list_items, list_type) + list_items = [] + in_list = False + + self.add_horizontal_rule(doc) + i += 1 + continue + + # 处理空行 + if not line.strip(): + # 列表结束 + if in_list and list_items: + self.add_list_to_doc(doc, list_items, list_type) + list_items = [] + in_list = False + i += 1 + continue + + # 处理普通段落 + if in_list and list_items: + self.add_list_to_doc(doc, list_items, list_type) + list_items = [] + in_list = False + + self.add_paragraph(doc, line) + i += 1 + + # 处理剩余的列表 + if in_list and list_items: + self.add_list_to_doc(doc, list_items, list_type) + + return doc + + def set_document_default_font(self, doc: Document): + """设置文档默认字体,确保中英文都正常显示""" + # 设置正文样式 + style = doc.styles["Normal"] + font = style.font + font.name = "Times New Roman" # 英文字体 + font.size = Pt(11) + + # 设置中文字体 + style._element.rPr.rFonts.set(qn("w:eastAsia"), "宋体") + + # 设置段落格式 + paragraph_format = style.paragraph_format + paragraph_format.line_spacing_rule = WD_LINE_SPACING.ONE_POINT_FIVE + paragraph_format.space_after = Pt(6) + + def add_heading(self, doc: Document, text: str, level: int): + """添加标题""" + # Word 标题级别从 0 开始,Markdown 从 1 开始 + heading_level = min(level, 9) # Word 最多支持 Heading 9 + heading = doc.add_heading(level=heading_level) + + # 解析并添加格式化文本 + self.add_formatted_text(heading, text) + + # 设置中文字体 + for run in heading.runs: + run.font.name = "Times New Roman" + run._element.rPr.rFonts.set(qn("w:eastAsia"), "黑体") + + def add_paragraph(self, doc: Document, text: str): + """添加段落,支持内联格式""" + paragraph = doc.add_paragraph() + self.add_formatted_text(paragraph, text) + + # 设置中文字体 + for run in paragraph.runs: + run.font.name = "Times New Roman" + run._element.rPr.rFonts.set(qn("w:eastAsia"), "宋体") + + def add_formatted_text(self, paragraph, text: str): + """ + 解析 Markdown 内联格式并添加到段落 + 支持:粗体、斜体、行内代码、链接、删除线 + """ + # 定义格式化模式 + patterns = [ + # 粗斜体 ***text*** 或 ___text___ + (r"\*\*\*(.+?)\*\*\*|___(.+?)___", {"bold": True, "italic": True}), + # 粗体 **text** 或 __text__ + (r"\*\*(.+?)\*\*|__(.+?)__", {"bold": True}), + # 斜体 *text* 或 _text_ + ( + r"(? 1 else None + ), + } + ) + + # 按位置排序 + all_matches.sort(key=lambda x: x["start"]) + + # 移除重叠的匹配 + filtered_matches = [] + last_end = 0 + for m in all_matches: + if m["start"] >= last_end: + filtered_matches.append(m) + last_end = m["end"] + + # 构建最终文本 + pos = 0 + for match in filtered_matches: + # 添加匹配前的普通文本 + if match["start"] > pos: + plain_text = text[pos : match["start"]] + if plain_text: + paragraph.add_run(plain_text) + + # 添加格式化文本 + style = match["style"] + run_text = match["text"] + + if style.get("link"): + # 链接处理 + run = paragraph.add_run(run_text) + run.font.color.rgb = RGBColor(0, 0, 255) + run.font.underline = True + elif style.get("code"): + # 行内代码 + run = paragraph.add_run(run_text) + run.font.name = "Consolas" + run._element.rPr.rFonts.set(qn("w:eastAsia"), "SimHei") + run.font.size = Pt(10) + # 添加背景色 + shading = OxmlElement("w:shd") + shading.set(qn("w:fill"), "E8E8E8") + run._element.rPr.append(shading) + else: + run = paragraph.add_run(run_text) + if style.get("bold"): + run.bold = True + if style.get("italic"): + run.italic = True + if style.get("strike"): + run.font.strike = True + + pos = match["end"] + + # 添加剩余的普通文本 + if pos < len(text): + paragraph.add_run(text[pos:]) + + def add_code_block(self, doc: Document, code: str, language: str = ""): + """添加代码块""" + # 添加代码块段落 + paragraph = doc.add_paragraph() + paragraph.paragraph_format.left_indent = Cm(0.5) + paragraph.paragraph_format.space_before = Pt(6) + paragraph.paragraph_format.space_after = Pt(6) + + # 设置代码块背景 + run = paragraph.add_run(code) + run.font.name = "Consolas" + run._element.rPr.rFonts.set(qn("w:eastAsia"), "SimHei") + run.font.size = Pt(10) + + # 添加浅灰色背景 + shading = OxmlElement("w:shd") + shading.set(qn("w:fill"), "F5F5F5") + paragraph._element.pPr.append(shading) + + def add_table(self, doc: Document, table_lines: List[str]): + """添加表格""" + if len(table_lines) < 2: + return + + # 解析表格数据 + rows = [] + for line in table_lines: + cells = [cell.strip() for cell in line.strip().strip("|").split("|")] + # 跳过分隔行 + if all(re.fullmatch(r"[-:]+", cell) for cell in cells): + continue + rows.append(cells) + + if not rows: + return + + # 确定列数 + num_cols = max(len(row) for row in rows) + + # 创建表格 + table = doc.add_table(rows=len(rows), cols=num_cols) + table.style = "Table Grid" + table.alignment = WD_TABLE_ALIGNMENT.CENTER + + # 填充表格 + for row_idx, row_data in enumerate(rows): + row = table.rows[row_idx] + for col_idx, cell_text in enumerate(row_data): + if col_idx < num_cols: + cell = row.cells[col_idx] + # 清除默认段落 + cell.paragraphs[0].clear() + self.add_formatted_text(cell.paragraphs[0], cell_text) + + # 设置单元格字体 + for run in cell.paragraphs[0].runs: + run.font.name = "Times New Roman" + run._element.rPr.rFonts.set(qn("w:eastAsia"), "宋体") + run.font.size = Pt(10) + + # 表头加粗 + if row_idx == 0: + for run in cell.paragraphs[0].runs: + run.bold = True + + # 设置表格列宽度自适应 + for row in table.rows: + for cell in row.cells: + cell.paragraphs[0].alignment = WD_ALIGN_PARAGRAPH.CENTER + + def add_list_to_doc( + self, doc: Document, items: List[Tuple[int, str]], list_type: str + ): + """添加列表""" + for indent, text in items: + paragraph = doc.add_paragraph() + + if list_type == "unordered": + # 无序列表使用项目符号 + paragraph.style = "List Bullet" + else: + # 有序列表使用编号 + paragraph.style = "List Number" + + # 设置缩进 + paragraph.paragraph_format.left_indent = Cm(0.5 * (indent + 1)) + + # 添加格式化文本 + self.add_formatted_text(paragraph, text) + + # 设置字体 + for run in paragraph.runs: + run.font.name = "Times New Roman" + run._element.rPr.rFonts.set(qn("w:eastAsia"), "宋体") + + def add_horizontal_rule(self, doc: Document): + """添加水平分割线""" + paragraph = doc.add_paragraph() + paragraph.paragraph_format.space_before = Pt(12) + paragraph.paragraph_format.space_after = Pt(12) + + # 添加底部边框作为分割线 + pPr = paragraph._element.get_or_add_pPr() + pBdr = OxmlElement("w:pBdr") + bottom = OxmlElement("w:bottom") + bottom.set(qn("w:val"), "single") + bottom.set(qn("w:sz"), "6") + bottom.set(qn("w:space"), "1") + bottom.set(qn("w:color"), "auto") + pBdr.append(bottom) + pPr.append(pBdr)