From f6369a1591500f9ac3bf5179427a8f9a42840d70 Mon Sep 17 00:00:00 2001 From: fujie Date: Sat, 7 Feb 2026 18:14:02 +0800 Subject: [PATCH] feat: release export-to-docx v0.4.4 w/ formatting & font fixes --- docs/plugins/actions/export-to-word.md | 12 +- docs/plugins/actions/export-to-word.zh.md | 13 +- docs/plugins/actions/index.md | 18 +- docs/plugins/actions/index.zh.md | 16 +- plugins/actions/export_to_docx/README.md | 11 +- plugins/actions/export_to_docx/README_CN.md | 11 +- .../actions/export_to_docx/export_to_word.py | 322 ++++++++++++++---- .../export_to_docx/export_to_word_cn.py | 321 +++++++++++++---- 8 files changed, 553 insertions(+), 171 deletions(-) diff --git a/docs/plugins/actions/export-to-word.md b/docs/plugins/actions/export-to-word.md index 6daa969..20ef282 100644 --- a/docs/plugins/actions/export-to-word.md +++ b/docs/plugins/actions/export-to-word.md @@ -1,7 +1,7 @@ # Export to Word Action -v0.4.3 +v0.4.4 Export conversation to Word (.docx) with **syntax highlighting**, **native math equations**, **Mermaid diagrams**, **citations**, and **enhanced table formatting**. @@ -53,11 +53,17 @@ You can configure the following settings via the **Valves** button in the plugin | `MATH_ENABLE` | Enable LaTeX math block conversion. | `True` | | `MATH_INLINE_DOLLAR_ENABLE` | Enable inline `$ ... $` math conversion. | `True` | -## 🔥 What's New in v0.4.3 +## 🔥 What's New in v0.4.4 + +- 🧹 **Content Cleanup**: Enhanced stripping of `
` blocks (often used for tool calls/thinking process) to ensure a clean final document. +- 📄 **Standard Document Formatting**: Applied professional document formatting standards for titles and headings (centered title, bold, optimized font sizes and spacing), including GB/T compliance for Chinese content. +- 🔠 **Font Rendering Fix**: Fixed an issue where CJK characters would fallback to MS Gothic in Word; now correctly uses the configured Asian font (e.g., SimSun). +- ⚙️ **Title Alignment**: Added `TITLE_ALIGNMENT` valve to configure document title alignment (left, center, right). ### User-Level Configuration (UserValves) Users can override the following settings in their personal settings: + - `TITLE_SOURCE` - `UI_LANGUAGE` - `FONT_LATIN`, `FONT_ASIAN`, `FONT_CODE` @@ -120,4 +126,4 @@ Users can override the following settings in their personal settings: ## Source Code [:fontawesome-brands-github: View on GitHub](https://github.com/Fu-Jie/awesome-openwebui/tree/main/plugins/actions/export_to_docx){ .md-button } -**Author:** [Fu-Jie](https://github.com/Fu-Jie) | **Version:** 0.4.3 | **Project:** [Awesome OpenWebUI](https://github.com/Fu-Jie/awesome-openwebui) +**Author:** [Fu-Jie](https://github.com/Fu-Jie) | **Version:** 0.4.4 | **Project:** [Awesome OpenWebUI](https://github.com/Fu-Jie/awesome-openwebui) diff --git a/docs/plugins/actions/export-to-word.zh.md b/docs/plugins/actions/export-to-word.zh.md index 643b122..447bd21 100644 --- a/docs/plugins/actions/export-to-word.zh.md +++ b/docs/plugins/actions/export-to-word.zh.md @@ -1,7 +1,7 @@ # Export to Word(导出为 Word) Action -v0.4.3 +v0.4.4 将当前对话导出为完美格式的 Word 文档,支持**代码语法高亮**、**原生数学公式**、**Mermaid 图表**、**引用资料**以及**增强表格**渲染。 @@ -53,9 +53,17 @@ Export to Word 插件会把聊天消息从 Markdown 转成精致的 Word 文档 | `启用数学公式` | 启用 LaTeX 数学公式块转换。 | `True` | | `启用行内公式` | 启用行内 `$ ... $` 数学公式转换。 | `True` | +## 🔥 v0.4.4 更新内容 + +- 🧹 **内容清理加强**: 增强了对 `
` 块(通常包含工具调用或思考过程)的清理,确保最终文档整洁。 +- 📄 **文档格式标准化**: 采用了专业的文档排版标准(兼容中文 GB/T 规范),标题居中加粗,各级标题使用标准字号和间距。 +- 🔠 **字体渲染修复**: 修复了 CJK 字符在 Word 中回退到 MS Gothic 的问题;现在正确使用配置的中文字体(例如宋体)。 +- ⚙️ **标题对齐配置**: 新增 `标题对齐方式` Valve,支持配置文档标题的对齐方式(左对齐、居中、右对齐)。 + ### 用户级配置 (UserValves) 用户可以在个人设置中覆盖以下配置: + - `文档标题来源` - `界面语言` - `英文字体`, `中文字体`, `代码字体` @@ -117,4 +125,5 @@ Export to Word 插件会把聊天消息从 Markdown 转成精致的 Word 文档 ## 源码 -[:fontawes**Author:** [Fu-Jie](https://github.com/Fu-Jie) | **Version:** 0.4.3 | **Project:** [Awesome OpenWebUI](https://github.com/Fu-Jie/awesome-openwebui)/tree/main/plugins/actions/export_to_docx){ .md-button } +[:fontawesome-brands-github: View on GitHub](https://github.com/Fu-Jie/awesome-openwebui/tree/main/plugins/actions/export_to_docx){ .md-button } +**Author:** [Fu-Jie](https://github.com/Fu-Jie) | **Version:** 0.4.4 | **Project:** [Awesome OpenWebUI](https://github.com/Fu-Jie/awesome-openwebui) diff --git a/docs/plugins/actions/index.md b/docs/plugins/actions/index.md index 643d53f..0fb06b7 100644 --- a/docs/plugins/actions/index.md +++ b/docs/plugins/actions/index.md @@ -17,7 +17,7 @@ Actions are interactive plugins that:
-- :material-brain:{ .lg .middle } **Smart Mind Map** +- :material-brain:{ .lg .middle } **Smart Mind Map** --- @@ -27,7 +27,7 @@ Actions are interactive plugins that: [:octicons-arrow-right-24: Documentation](smart-mind-map.md) -- :material-chart-bar:{ .lg .middle } **Smart Infographic** +- :material-chart-bar:{ .lg .middle } **Smart Infographic** --- @@ -37,7 +37,7 @@ Actions are interactive plugins that: [:octicons-arrow-right-24: Documentation](smart-infographic.md) -- :material-card-text:{ .lg .middle } **Flash Card** +- :material-card-text:{ .lg .middle } **Flash Card** --- @@ -47,7 +47,7 @@ Actions are interactive plugins that: [:octicons-arrow-right-24: Documentation](flash-card.md) -- :material-file-excel:{ .lg .middle } **Export to Excel** +- :material-file-excel:{ .lg .middle } **Export to Excel** --- @@ -57,17 +57,17 @@ Actions are interactive plugins that: [:octicons-arrow-right-24: Documentation](export-to-excel.md) -- :material-file-word-box:{ .lg .middle } **Export to Word (Enhanced Formatting)** +- :material-file-word-box:{ .lg .middle } **Export to Word (Enhanced Formatting)** --- Export the current conversation to a formatted Word doc with **syntax highlighting**, **native math equations**, **Mermaid diagrams**, **citations**, and **enhanced table formatting**. - - **Version:** 0.4.2 + + **Version:** 0.4.4 [:octicons-arrow-right-24: Documentation](export-to-word.md) -- :material-brain:{ .lg .middle } **Deep Dive** +- :material-brain:{ .lg .middle } **Deep Dive** --- @@ -77,8 +77,6 @@ Actions are interactive plugins that: [:octicons-arrow-right-24: Documentation](deep-dive.md) - -
--- diff --git a/docs/plugins/actions/index.zh.md b/docs/plugins/actions/index.zh.md index 7ee074b..d41ab5b 100644 --- a/docs/plugins/actions/index.zh.md +++ b/docs/plugins/actions/index.zh.md @@ -17,7 +17,7 @@ Actions 是交互式插件,能够:
-- :material-brain:{ .lg .middle } **Smart Mind Map** +- :material-brain:{ .lg .middle } **Smart Mind Map** --- @@ -27,7 +27,7 @@ Actions 是交互式插件,能够: [:octicons-arrow-right-24: 查看文档](smart-mind-map.md) -- :material-chart-bar:{ .lg .middle } **Smart Infographic** +- :material-chart-bar:{ .lg .middle } **Smart Infographic** --- @@ -37,7 +37,7 @@ Actions 是交互式插件,能够: [:octicons-arrow-right-24: 查看文档](smart-infographic.md) -- :material-card-text:{ .lg .middle } **Flash Card(闪记卡)** +- :material-card-text:{ .lg .middle } **Flash Card(闪记卡)** --- @@ -47,7 +47,7 @@ Actions 是交互式插件,能够: [:octicons-arrow-right-24: 查看文档](flash-card.md) -- :material-file-excel:{ .lg .middle } **Export to Excel** +- :material-file-excel:{ .lg .middle } **Export to Excel** --- @@ -57,17 +57,17 @@ Actions 是交互式插件,能够: [:octicons-arrow-right-24: 查看文档](export-to-excel.md) -- :material-file-word-box:{ .lg .middle } **Word 导出 (格式增强)** +- :material-file-word-box:{ .lg .middle } **Word 导出 (格式增强)** --- 将当前对话导出为完美格式的 Word 文档,支持**代码语法高亮**、**原生数学公式**、**Mermaid 图表**、**引用资料**以及**增强表格**渲染。 - **版本:** 0.4.2 + **版本:** 0.4.4 [:octicons-arrow-right-24: 查看文档](export-to-word.md) -- :material-brain:{ .lg .middle } **精读 (Deep Dive)** +- :material-brain:{ .lg .middle } **精读 (Deep Dive)** --- @@ -77,8 +77,6 @@ Actions 是交互式插件,能够: [:octicons-arrow-right-24: 查看文档](deep-dive.zh.md) - -
--- diff --git a/plugins/actions/export_to_docx/README.md b/plugins/actions/export_to_docx/README.md index efd1e38..ae13103 100644 --- a/plugins/actions/export_to_docx/README.md +++ b/plugins/actions/export_to_docx/README.md @@ -1,14 +1,15 @@ # 📝 Export to Word (Enhanced) -**Author:** [Fu-Jie](https://github.com/Fu-Jie/awesome-openwebui) | **Version:** 0.4.3 | **Project:** [Awesome OpenWebUI](https://github.com/Fu-Jie/awesome-openwebui) | **License:** MIT +**Author:** [Fu-Jie](https://github.com/Fu-Jie/awesome-openwebui) | **Version:** 0.4.4 | **Project:** [Awesome OpenWebUI](https://github.com/Fu-Jie/awesome-openwebui) | **License:** MIT Export conversation to Word (.docx) with **syntax highlighting**, **native math equations**, **Mermaid diagrams**, **citations**, and **enhanced table formatting**. -## 🔥 What's New in v0.4.3 +## 🔥 What's New in v0.4.4 -- ✨ **S3 Object Storage Support**: Direct access to images stored in S3/MinIO via boto3, bypassing API layer for faster exports. -- 🔧 **Multi-level File Fallback**: 6-level fallback mechanism for file retrieval (DB → S3 → Local → URL → API → Attributes). -- 🛡️ **Improved Error Handling**: Better logging and error messages for file retrieval failures. +- 🧹 **Content Cleanup**: Enhanced stripping of `
` blocks (often used for tool calls/thinking process) to ensure a clean final document. +- 📄 **Standard Document Formatting**: Applied professional document formatting standards for titles and headings (centered title, bold, optimized font sizes and spacing), including GB/T compliance for Chinese content. +- 🔠 **Font Rendering Fix**: Fixed an issue where CJK characters would fallback to MS Gothic in Word; now correctly uses the configured Asian font (e.g., SimSun). +- ⚙️ **Title Alignment**: Added `TITLE_ALIGNMENT` valve to configure document title alignment (left, center, right). ## ✨ Key Features diff --git a/plugins/actions/export_to_docx/README_CN.md b/plugins/actions/export_to_docx/README_CN.md index 6138c12..e45e7d6 100644 --- a/plugins/actions/export_to_docx/README_CN.md +++ b/plugins/actions/export_to_docx/README_CN.md @@ -1,14 +1,15 @@ # 📝 导出为 Word (增强版) -**Author:** [Fu-Jie](https://github.com/Fu-Jie/awesome-openwebui) | **Version:** 0.4.3 | **Project:** [Awesome OpenWebUI](https://github.com/Fu-Jie/awesome-openwebui) | **许可证:** MIT +**Author:** [Fu-Jie](https://github.com/Fu-Jie/awesome-openwebui) | **Version:** 0.4.4 | **Project:** [Awesome OpenWebUI](https://github.com/Fu-Jie/awesome-openwebui) | **许可证:** MIT 将对话导出为 Word (.docx),支持**代码语法高亮**、**原生数学公式**、**Mermaid 图表**、**引用参考**和**增强表格格式**。 -## 🔥 v0.4.3 更新内容 +## 🔥 v0.4.4 更新内容 -- ✨ **S3 对象存储支持**: 通过 boto3 直连 S3/MinIO,绕过 API 层,导出速度更快。 -- 🔧 **多级文件回退**: 6 级文件获取机制(数据库 → S3 → 本地 → URL → API → 属性)。 -- 🛡️ **错误处理优化**: 更完善的日志记录和错误提示,便于调试文件访问问题。 +- 🧹 **内容清理加强**: 增强了对 `
` 块(通常包含工具调用或思考过程)的清理,确保最终文档整洁。 +- 📄 **文档格式标准化**: 采用了专业的文档排版标准(兼容中文 GB/T 规范),标题居中加粗,各级标题使用标准字号和间距。 +- 🔠 **字体渲染修复**: 修复了 CJK 字符在 Word 中回退到 MS Gothic 的问题;现在正确使用配置的中文字体(例如宋体)。 +- ⚙️ **标题对齐配置**: 新增 `标题对齐方式` Valve,支持配置文档标题的对齐方式(左对齐、居中、右对齐)。 ## ✨ 核心特性 diff --git a/plugins/actions/export_to_docx/export_to_word.py b/plugins/actions/export_to_docx/export_to_word.py index 56fb1ed..df6aa42 100644 --- a/plugins/actions/export_to_docx/export_to_word.py +++ b/plugins/actions/export_to_docx/export_to_word.py @@ -1,9 +1,9 @@ """ -title: Export to Word (Enhanced) +title: Export to Word Enhanced author: Fu-Jie author_url: https://github.com/Fu-Jie/awesome-openwebui funding_url: https://github.com/open-webui -version: 0.4.3 +version: 0.4.4 openwebui_id: fca6a315-2a45-42cc-8c96-55cbc85f87f2 icon_url: data:image/svg+xml;base64,PHN2ZwogIHhtbG5zPSJodHRwOi8vd3d3LnczLm9yZy8yMDAwL3N2ZyIKICB3aWR0aD0iMjQiCiAgaGVpZ2h0PSIyNCIKICB2aWV3Qm94PSIwIDAgMjQgMjQiCiAgZmlsbD0ibm9uZSIKICBzdHJva2U9ImN1cnJlbnRDb2xvciIKICBzdHJva2Utd2lkdGg9IjIiCiAgc3Ryb2tlLWxpbmVjYXA9InJvdW5kIgogIHN0cm9rZS1saW5lam9pbj0icm91bmQiCj4KICA8cGF0aCBkPSJNNiAyMmEyIDIgMCAwIDEtMi0yVjRhMiAyIDAgMCAxIDItMmg4YTIuNCAyLjQgMCAwIDEgMS43MDQuNzA2bDMuNTg4IDMuNTg4QTIuNCAyLjQgMCAwIDEgMjAgOHYxMmEyIDIgMCAwIDEtMiAyeiIgLz4KICA8cGF0aCBkPSJNMTQgMnY1YTEgMSAwIDAgMCAxIDFoNSIgLz4KICA8cGF0aCBkPSJNMTAgOUg4IiAvPgogIDxwYXRoIGQ9Ik0xNiAxM0g4IiAvPgogIDxwYXRoIGQ9Ik0xNiAxN0g4IiAvPgo8L3N2Zz4K requirements: python-docx, Pygments, latex2mathml, mathml2omml @@ -101,9 +101,8 @@ _TRANSPARENT_1PX_PNG = base64.b64decode( _ASVG_NS = "http://schemas.microsoft.com/office/drawing/2016/SVG/main" nsmap.setdefault("asvg", _ASVG_NS) -_REASONING_DETAILS_RE = re.compile( - r"]*\btype\s*=\s*(?:\"reasoning\"|'reasoning'|reasoning)[^>]*>.*?", - re.IGNORECASE | re.DOTALL, +_ALL_DETAILS_RE = re.compile( + r"]*>.*?", re.IGNORECASE | re.DOTALL ) _THINK_RE = re.compile(r"]*>.*?", re.IGNORECASE | re.DOTALL) _ANALYSIS_RE = re.compile( @@ -178,6 +177,12 @@ class Action: description="Font for code blocks and inline code (e.g., 'Consolas', 'Courier New', 'Monaco')", ) + # Title alignment + TITLE_ALIGNMENT: str = Field( + default="center", + description="Title alignment: 'left', 'center', or 'right'", + ) + # Table styling TABLE_HEADER_COLOR: str = Field( default="F2F2F2", @@ -242,60 +247,60 @@ class Action: ) class UserValves(BaseModel): - TITLE_SOURCE: str = Field( - default="chat_title", + TITLE_SOURCE: Optional[str] = Field( + default=None, description="Title Source: 'chat_title' (Chat Title), 'ai_generated' (AI Generated), 'markdown_title' (Markdown Title)", ) - UI_LANGUAGE: str = Field( - default="en", + UI_LANGUAGE: Optional[str] = Field( + default=None, description="UI language for export messages. Options: 'en' (English), 'zh' (Chinese)", ) - FONT_LATIN: str = Field( - default="Times New Roman", + FONT_LATIN: Optional[str] = Field( + default=None, description="Font for Latin characters (e.g., 'Times New Roman', 'Calibri', 'Arial')", ) - FONT_ASIAN: str = Field( - default="SimSun", + FONT_ASIAN: Optional[str] = Field( + default=None, description="Font for Asian characters (e.g., 'SimSun', 'Microsoft YaHei', 'PingFang SC')", ) - FONT_CODE: str = Field( - default="Consolas", + FONT_CODE: Optional[str] = Field( + default=None, description="Font for code blocks and inline code (e.g., 'Consolas', 'Courier New', 'Monaco')", ) - TABLE_HEADER_COLOR: str = Field( - default="F2F2F2", + TABLE_HEADER_COLOR: Optional[str] = Field( + default=None, description="Table header background color (hex, without #)", ) - TABLE_ZEBRA_COLOR: str = Field( - default="FBFBFB", + TABLE_ZEBRA_COLOR: Optional[str] = Field( + default=None, description="Table zebra stripe background color for alternate rows (hex, without #)", ) - MERMAID_PNG_SCALE: float = Field( - default=3.0, + MERMAID_PNG_SCALE: Optional[float] = Field( + default=None, description="PNG render resolution multiplier (higher = clearer, larger file)", ) - MERMAID_DISPLAY_SCALE: float = Field( - default=1.0, + MERMAID_DISPLAY_SCALE: Optional[float] = Field( + default=None, description="Diagram width relative to available page width (<=1 recommended)", ) - MERMAID_OPTIMIZE_LAYOUT: bool = Field( - default=False, + MERMAID_OPTIMIZE_LAYOUT: Optional[bool] = Field( + default=None, description="Optimize Mermaid layout: convert LR to TD for graph/flowchart", ) - MERMAID_BACKGROUND: str = Field( - default="", + MERMAID_BACKGROUND: Optional[str] = Field( + default=None, description="Mermaid background color. Empty = transparent (recommended for Word dark mode). Used only for optional PNG fill.", ) - MERMAID_CAPTIONS_ENABLE: bool = Field( - default=True, + MERMAID_CAPTIONS_ENABLE: Optional[bool] = Field( + default=None, description="Add figure captions under Mermaid images/charts", ) - MATH_ENABLE: bool = Field( - default=True, + MATH_ENABLE: Optional[bool] = Field( + default=None, description="Enable LaTeX math block conversion (\\\\[...\\\\] and $$...$$) into Word equations", ) - MATH_INLINE_DOLLAR_ENABLE: bool = Field( - default=True, + MATH_INLINE_DOLLAR_ENABLE: Optional[bool] = Field( + default=None, description="Enable inline $...$ math conversion into Word equations (conservative parsing to reduce false positives)", ) @@ -449,13 +454,21 @@ class Action: user_id = __user__.get("id", "unknown_user") # Apply UserValves if present - if __user__ and "valves" in __user__: - # Update self.valves with user-specific values - # Note: This assumes per-request instantiation or that we are okay with modifying the singleton. - # Given the plugin architecture, we'll update it for this execution. - for key, value in __user__["valves"].model_dump().items(): - if hasattr(self.valves, key): - setattr(self.valves, key, value) + if __user__: + # Robustly parse UserValves whether it's a dict or Pydantic model + raw_valves = __user__.get("valves", {}) + if isinstance(raw_valves, self.UserValves): + user_valves = raw_valves + elif isinstance(raw_valves, dict): + user_valves = self.UserValves(**raw_valves) + else: + user_valves = None + + if user_valves: + for key, value in user_valves.model_dump(exclude_unset=True).items(): + # Only override if the value is not None (and explicitly set) + if hasattr(self.valves, key) and value is not None: + setattr(self.valves, key, value) # Get user language from Valves configuration self._user_lang = self._get_lang_key(self.valves.UI_LANGUAGE) @@ -492,6 +505,37 @@ class Action: try: message_content = last_assistant_message["content"] if isinstance(message_content, str): + if __event_emitter__ and self.valves.SHOW_DEBUG_LOG: + debug_data = {} + for name, regex in [ + ("Details Block", _ALL_DETAILS_RE), + ("Think Block", _THINK_RE), + ("Analysis Block", _ANALYSIS_RE), + ]: + matches = regex.findall(message_content) + if matches: + debug_data[name] = [ + (m[:200] + "...") if len(m) > 200 else m + for m in matches + ] + if debug_data: + await self._emit_debug_log( + __event_emitter__, + "Context Stripping Analysis", + debug_data, + ) + + # Log font configuration + await self._emit_debug_log( + __event_emitter__, + "Font Configuration", + { + "Latin Font": self.valves.FONT_LATIN, + "Asian Font": self.valves.FONT_ASIAN, + "Code Font": self.valves.FONT_CODE, + }, + ) + message_content = self._strip_reasoning_blocks(message_content) if not message_content or not message_content.strip(): @@ -1107,30 +1151,7 @@ class Action: if not isinstance(name, str): return "" - def _is_emoji_codepoint(codepoint: int) -> bool: - # Common emoji ranges + flag regional indicators. - return ( - 0x1F000 <= codepoint <= 0x1FAFF - or 0x1F1E6 <= codepoint <= 0x1F1FF - or 0x2600 <= codepoint <= 0x26FF - or 0x2700 <= codepoint <= 0x27BF - or 0x2300 <= codepoint <= 0x23FF - or 0x2B00 <= codepoint <= 0x2BFF - ) - - def _is_emoji_modifier(codepoint: int) -> bool: - # VS15/VS16, ZWJ, keycap, skin tones, and tag characters used in some emoji sequences. - return ( - codepoint in (0x200D, 0xFE0E, 0xFE0F, 0x20E3) - or 0x1F3FB <= codepoint <= 0x1F3FF - or 0xE0020 <= codepoint <= 0xE007F - ) - - without_emoji = "".join( - ch - for ch in name - if not (_is_emoji_codepoint(ord(ch)) or _is_emoji_modifier(ord(ch))) - ) + without_emoji = self._remove_emojis(name) cleaned = re.sub(r'[\\/*?:"<>|]', "", without_emoji) cleaned = re.sub(r"\s+", " ", cleaned).strip().strip(".") return cleaned[:50].strip() @@ -1498,7 +1519,10 @@ class Action: # If there is no h1 in content, prepend chat title as h1 when provided if top_heading and not has_h1: - self.add_heading(doc, top_heading, 1) + # Remove emojis from title for a professional look + clean_title = self._remove_emojis(top_heading) + # Use Title style (level 0) for the main document title + self.add_heading(doc, clean_title, 0) lines = markdown_text.split("\n") i = 0 @@ -1758,7 +1782,7 @@ class Action: cur = text for _ in range(10): prev = cur - cur = _REASONING_DETAILS_RE.sub("", cur) + cur = _ALL_DETAILS_RE.sub("", cur) cur = _THINK_RE.sub("", cur) cur = _ANALYSIS_RE.sub("", cur) if cur == prev: @@ -2242,14 +2266,155 @@ class Action: font = style.font font.name = self.valves.FONT_LATIN font.size = Pt(11) - # Set Asian font - style._element.rPr.rFonts.set(qn("w:eastAsia"), self.valves.FONT_ASIAN) + + # Ensure rPr element exists + rPr = style._element.get_or_add_rPr() + rFonts = rPr.get_or_add_rFonts() + + # Set Latin and Asian fonts explicitly + rFonts.set(qn("w:ascii"), self.valves.FONT_LATIN) + rFonts.set(qn("w:hAnsi"), self.valves.FONT_LATIN) + rFonts.set(qn("w:eastAsia"), self.valves.FONT_ASIAN) + + # Set language to zh-CN to prevent MS Gothic fallback (Japanese font) + # Even for English interface, we want to prioritize Chinese glyphs over Japanese for CJK + lang = rPr.find(qn("w:lang")) + if lang is None: + lang = OxmlElement("w:lang") + rPr.append(lang) + lang.set(qn("w:val"), "en-US") + lang.set(qn("w:eastAsia"), "zh-CN") + + logger.info( + f"[Font Config] Latin: {self.valves.FONT_LATIN}, Asian: {self.valves.FONT_ASIAN}" + ) # Set paragraph format paragraph_format = style.paragraph_format paragraph_format.line_spacing_rule = WD_LINE_SPACING.ONE_POINT_FIVE paragraph_format.space_after = Pt(6) + # Configure Title style (used for document title) + # Standard format: 22pt (二号), bold, centered, 24pt space after + if "Title" in doc.styles: + title_style = doc.styles["Title"] + title_font = title_style.font + title_font.name = self.valves.FONT_LATIN + title_font.size = Pt(22) # 二号字体 + title_font.bold = True + title_font.color.rgb = RGBColor(0, 0, 0) + + # Set paragraph format: alignment based on configuration + title_pf = title_style.paragraph_format + alignment_map = { + "left": WD_ALIGN_PARAGRAPH.LEFT, + "center": WD_ALIGN_PARAGRAPH.CENTER, + "right": WD_ALIGN_PARAGRAPH.RIGHT, + } + title_pf.alignment = alignment_map.get( + self.valves.TITLE_ALIGNMENT.lower(), WD_ALIGN_PARAGRAPH.CENTER + ) + title_pf.space_before = Pt(0) + title_pf.space_after = Pt(24) + + t_rPr = title_style._element.get_or_add_rPr() + t_rFonts = t_rPr.get_or_add_rFonts() + t_rFonts.set(qn("w:ascii"), self.valves.FONT_LATIN) + t_rFonts.set(qn("w:hAnsi"), self.valves.FONT_LATIN) + t_rFonts.set(qn("w:eastAsia"), self.valves.FONT_ASIAN) + + # Set language to zh-CN + t_lang = t_rPr.find(qn("w:lang")) + if t_lang is None: + t_lang = OxmlElement("w:lang") + t_rPr.append(t_lang) + t_lang.set(qn("w:val"), "en-US") + t_lang.set(qn("w:eastAsia"), "zh-CN") + + # Standard heading sizes based on Chinese document standards: + # Heading 1: 16pt (三号), bold, space before 24pt, space after 12pt + # Heading 2: 15pt (小三), bold, space before 18pt, space after 6pt + # Heading 3: 14pt (四号), bold, space before 12pt, space after 6pt + # Heading 4-9: 12pt (小四), bold, gradually reduced spacing + heading_formats = { + 1: {"size": 16, "space_before": 24, "space_after": 12}, + 2: {"size": 15, "space_before": 18, "space_after": 6}, + 3: {"size": 14, "space_before": 12, "space_after": 6}, + 4: {"size": 12, "space_before": 12, "space_after": 6}, + 5: {"size": 12, "space_before": 6, "space_after": 6}, + 6: {"size": 11, "space_before": 6, "space_after": 3}, + 7: {"size": 11, "space_before": 6, "space_after": 3}, + 8: {"size": 10.5, "space_before": 6, "space_after": 3}, + 9: {"size": 10.5, "space_before": 6, "space_after": 3}, + } + + # Apply font settings to Heading 1-9 + for i in range(1, 10): + style_id = f"Heading {i}" + if style_id in doc.styles: + heading_style = doc.styles[style_id] + heading_font = heading_style.font + heading_font.name = self.valves.FONT_LATIN + heading_font.color.rgb = RGBColor(0, 0, 0) + + # Apply standard formatting + fmt = heading_formats.get( + i, {"size": 11, "space_before": 6, "space_after": 3} + ) + heading_font.size = Pt(fmt["size"]) + heading_font.bold = True + + heading_pf = heading_style.paragraph_format + heading_pf.space_before = Pt(fmt["space_before"]) + heading_pf.space_after = Pt(fmt["space_after"]) + + # Ensure rPr exists + h_rPr = heading_style._element.get_or_add_rPr() + h_rFonts = h_rPr.get_or_add_rFonts() + + # Set fonts + h_rFonts.set(qn("w:ascii"), self.valves.FONT_LATIN) + h_rFonts.set(qn("w:hAnsi"), self.valves.FONT_LATIN) + h_rFonts.set(qn("w:eastAsia"), self.valves.FONT_ASIAN) + + # Set language to zh-CN + h_lang = h_rPr.find(qn("w:lang")) + if h_lang is None: + h_lang = OxmlElement("w:lang") + h_rPr.append(h_lang) + h_lang.set(qn("w:val"), "en-US") + h_lang.set(qn("w:eastAsia"), "zh-CN") + + def _remove_emojis(self, text: str) -> str: + """Remove emojis from text based on unicode ranges.""" + if not isinstance(text, str): + return "" + + def _is_emoji_codepoint(codepoint: int) -> bool: + # Common emoji ranges + flag regional indicators. + return ( + 0x1F000 <= codepoint <= 0x1FAFF + or 0x1F1E6 <= codepoint <= 0x1F1FF + or 0x2600 <= codepoint <= 0x26FF + or 0x2700 <= codepoint <= 0x27BF + or 0x2300 <= codepoint <= 0x23FF + or 0x2B00 <= codepoint <= 0x2BFF + ) + + def _is_emoji_modifier(codepoint: int) -> bool: + # VS15/VS16, ZWJ, keycap, skin tones, and tag characters used in some emoji sequences. + return ( + codepoint in (0x200D, 0xFE0E, 0xFE0F, 0x20E3) + or 0x1F3FB <= codepoint <= 0x1F3FF + or 0xE0020 <= codepoint <= 0xE007F + ) + + return "".join( + ch + for ch in text + if not (_is_emoji_codepoint(ord(ch)) or _is_emoji_modifier(ord(ch))) + ) + def add_heading(self, doc: Document, text: str, level: int): """Add heading""" # Word heading levels start from 0, Markdown from 1 @@ -2285,6 +2450,12 @@ class Action: if strike: run.font.strike = True + # Explicitly set East Asian font to prevent MS Gothic fallback + # Word may not inherit w:eastAsia from style, causing Japanese font fallback for CJK + rPr = run._element.get_or_add_rPr() + rFonts = rPr.get_or_add_rFonts() + rFonts.set(qn("w:eastAsia"), self.valves.FONT_ASIAN) + def _add_inline_code(self, paragraph, s: str): if s == "": return @@ -2684,7 +2855,11 @@ class Action: ): u = self._normalize_url(url) if not u: - paragraph.add_run(display_text or text) + run = paragraph.add_run(display_text or text) + # Set East Asian font to prevent MS Gothic fallback + rPr = run._element.get_or_add_rPr() + rFonts = rPr.get_or_add_rFonts() + rFonts.set(qn("w:eastAsia"), self.valves.FONT_ASIAN) return part = getattr(paragraph, "part", None) @@ -2693,6 +2868,10 @@ class Action: run = paragraph.add_run(display_text or text) run.font.color.rgb = RGBColor(0, 0, 255) run.font.underline = True + # Set East Asian font to prevent MS Gothic fallback + rPr = run._element.get_or_add_rPr() + rFonts = rPr.get_or_add_rFonts() + rFonts.set(qn("w:eastAsia"), self.valves.FONT_ASIAN) return r_id = part.relate_to(u, RT.HYPERLINK, is_external=True) @@ -2706,6 +2885,11 @@ class Action: rStyle.set(qn("w:val"), "Hyperlink") rPr.append(rStyle) + # Set East Asian font to prevent MS Gothic fallback + rFonts = OxmlElement("w:rFonts") + rFonts.set(qn("w:eastAsia"), self.valves.FONT_ASIAN) + rPr.append(rFonts) + color = OxmlElement("w:color") color.set(qn("w:val"), "0000FF") rPr.append(color) diff --git a/plugins/actions/export_to_docx/export_to_word_cn.py b/plugins/actions/export_to_docx/export_to_word_cn.py index ec89efa..3209186 100644 --- a/plugins/actions/export_to_docx/export_to_word_cn.py +++ b/plugins/actions/export_to_docx/export_to_word_cn.py @@ -1,9 +1,9 @@ """ -title: 导出为 Word (增强版) +title: 导出为Word增强版 author: Fu-Jie author_url: https://github.com/Fu-Jie/awesome-openwebui funding_url: https://github.com/open-webui -version: 0.4.3 +version: 0.4.4 openwebui_id: 8a6306c0-d005-4e46-aaae-8db3532c9ed5 icon_url: data:image/svg+xml;base64,PHN2ZwogIHhtbG5zPSJodHRwOi8vd3d3LnczLm9yZy8yMDAwL3N2ZyIKICB3aWR0aD0iMjQiCiAgaGVpZ2h0PSIyNCIKICB2aWV3Qm94PSIwIDAgMjQgMjQiCiAgZmlsbD0ibm9uZSIKICBzdHJva2U9ImN1cnJlbnRDb2xvciIKICBzdHJva2Utd2lkdGg9IjIiCiAgc3Ryb2tlLWxpbmVjYXA9InJvdW5kIgogIHN0cm9rZS1saW5lam9pbj0icm91bmQiCj4KICA8cGF0aCBkPSJNNiAyMmEyIDIgMCAwIDEtMi0yVjRhMiAyIDAgMCAxIDItMmg4YTIuNCAyLjQgMCAwIDEgMS43MDQuNzA2bDMuNTg4IDMuNTg4QTIuNCAyLjQgMCAwIDEgMjAgOHYxMmEyIDIgMCAwIDEtMiAyeiIgLz4KICA8cGF0aCBkPSJNMTQgMnY1YTEgMSAwIDAgMCAxIDFoNSIgLz4KICA8cGF0aCBkPSJNMTAgOUg4IiAvPgogIDxwYXRoIGQ9Ik0xNiAxM0g4IiAvPgogIDxwYXRoIGQ9Ik0xNiAxN0g4IiAvPgo8L3N2Zz4K requirements: python-docx, Pygments, latex2mathml, mathml2omml @@ -101,9 +101,8 @@ _TRANSPARENT_1PX_PNG = base64.b64decode( _ASVG_NS = "http://schemas.microsoft.com/office/drawing/2016/SVG/main" nsmap.setdefault("asvg", _ASVG_NS) -_REASONING_DETAILS_RE = re.compile( - r"]*\btype\s*=\s*(?:\"reasoning\"|'reasoning'|reasoning)[^>]*>.*?", - re.IGNORECASE | re.DOTALL, +_ALL_DETAILS_RE = re.compile( + r"]*>.*?", re.IGNORECASE | re.DOTALL ) _THINK_RE = re.compile(r"]*>.*?", re.IGNORECASE | re.DOTALL) _ANALYSIS_RE = re.compile( @@ -178,6 +177,12 @@ class Action: description="Font for code blocks and inline code (e.g., 'Consolas', 'Courier New', 'Monaco')", ) + # Title alignment + 标题对齐方式: str = Field( + default="center", + description="标题对齐方式: 'left' (左对齐), 'center' (居中), 或 'right' (右对齐)", + ) + # Table styling 表头背景色: str = Field( default="F2F2F2", @@ -242,60 +247,60 @@ class Action: ) class UserValves(BaseModel): - 文档标题来源: str = Field( - default="chat_title", + 文档标题来源: Optional[str] = Field( + default=None, description="Title Source: 'chat_title' (Chat Title), 'ai_generated' (AI Generated), 'markdown_title' (Markdown Title)", ) - 界面语言: str = Field( - default="zh", + 界面语言: Optional[str] = Field( + default=None, description="UI language for export messages. Options: 'en' (English), 'zh' (Chinese)", ) - 英文字体: str = Field( - default="Calibri", + 英文字体: Optional[str] = Field( + default=None, description="Font for Latin characters (e.g., 'Times New Roman', '', 'Arial')", ) - 中文字体: str = Field( - default="SimSun", + 中文字体: Optional[str] = Field( + default=None, description="Font for Asian characters (e.g., 'SimSun', 'Microsoft YaHei', 'PingFang SC')", ) - 代码字体: str = Field( - default="Consolas", + 代码字体: Optional[str] = Field( + default=None, description="Font for code blocks and inline code (e.g., 'Consolas', 'Courier New', 'Monaco')", ) - 表头背景色: str = Field( - default="F2F2F2", + 表头背景色: Optional[str] = Field( + default=None, description="Table header background color (hex, without #)", ) - 表格隔行背景色: str = Field( - default="FBFBFB", + 表格隔行背景色: Optional[str] = Field( + default=None, description="Table zebra stripe background color for alternate rows (hex, without #)", ) - Mermaid_PNG缩放比例: float = Field( - default=3.0, + Mermaid_PNG缩放比例: Optional[float] = Field( + default=None, description="PNG render resolution multiplier (higher = clearer, larger file)", ) - Mermaid显示比例: float = Field( - default=1.0, + Mermaid显示比例: Optional[float] = Field( + default=None, description="Diagram width relative to available page width (<=1 recommended)", ) - Mermaid布局优化: bool = Field( - default=False, + Mermaid布局优化: Optional[bool] = Field( + default=None, description="Optimize Mermaid layout: convert LR to TD for graph/flowchart", ) - Mermaid背景色: str = Field( - default="", + Mermaid背景色: Optional[str] = Field( + default=None, description="Mermaid background color. Empty = transparent (recommended for Word dark mode). Used only for optional PNG fill.", ) - 启用Mermaid图注: bool = Field( - default=True, + 启用Mermaid图注: Optional[bool] = Field( + default=None, description="Add figure captions under Mermaid images/charts", ) - 启用数学公式: bool = Field( - default=True, + 启用数学公式: Optional[bool] = Field( + default=None, description="Enable LaTeX math block conversion (\\\\[...\\\\] and $$...$$) into Word equations", ) - 启用行内公式: bool = Field( - default=True, + 启用行内公式: Optional[bool] = Field( + default=None, description="Enable inline $...$ math conversion into Word equations (conservative parsing to reduce false positives)", ) @@ -449,11 +454,21 @@ class Action: user_id = __user__.get("id", "unknown_user") # Apply UserValves if present - if __user__ and "valves" in __user__: - # Update self.valves with user-specific values - for key, value in __user__["valves"].model_dump().items(): - if hasattr(self.valves, key): - setattr(self.valves, key, value) + if __user__: + # Robustly parse UserValves whether it's a dict or Pydantic model + raw_valves = __user__.get("valves", {}) + if isinstance(raw_valves, self.UserValves): + user_valves = raw_valves + elif isinstance(raw_valves, dict): + user_valves = self.UserValves(**raw_valves) + else: + user_valves = None + + if user_valves: + for key, value in user_valves.model_dump(exclude_unset=True).items(): + # Only override if the value is not None (and explicitly set) + if hasattr(self.valves, key) and value is not None: + setattr(self.valves, key, value) # Get user language from Valves configuration self._user_lang = self._get_lang_key(self.valves.界面语言) @@ -490,6 +505,37 @@ class Action: try: message_content = last_assistant_message["content"] if isinstance(message_content, str): + if __event_emitter__ and self.valves.SHOW_DEBUG_LOG: + debug_data = {} + for name, regex in [ + ("Details Block (详情块)", _ALL_DETAILS_RE), + ("Think Block (思考块)", _THINK_RE), + ("Analysis Block (分析块)", _ANALYSIS_RE), + ]: + matches = regex.findall(message_content) + if matches: + debug_data[name] = [ + (m[:200] + "...") if len(m) > 200 else m + for m in matches + ] + if debug_data: + await self._emit_debug_log( + __event_emitter__, + "上下文内容清理分析 (Context Stripping Analysis)", + debug_data, + ) + + # Log font configuration + await self._emit_debug_log( + __event_emitter__, + "字体配置 (Font Configuration)", + { + "英文字体 (Latin Font)": self.valves.英文字体, + "中文字体 (Asian Font)": self.valves.中文字体, + "代码字体 (Code Font)": self.valves.代码字体, + }, + ) + message_content = self._strip_reasoning_blocks(message_content) if not message_content or not message_content.strip(): @@ -1101,34 +1147,11 @@ class Action: return title.strip() if isinstance(title, str) else "" def clean_filename(self, name: str) -> str: - """Clean illegal characters from filename and strip emoji.""" + """清理文件名中的非法字符并移除 Emoji""" if not isinstance(name, str): return "" - def _is_emoji_codepoint(codepoint: int) -> bool: - # Common emoji ranges + flag regional indicators. - return ( - 0x1F000 <= codepoint <= 0x1FAFF - or 0x1F1E6 <= codepoint <= 0x1F1FF - or 0x2600 <= codepoint <= 0x26FF - or 0x2700 <= codepoint <= 0x27BF - or 0x2300 <= codepoint <= 0x23FF - or 0x2B00 <= codepoint <= 0x2BFF - ) - - def _is_emoji_modifier(codepoint: int) -> bool: - # VS15/VS16, ZWJ, keycap, skin tones, and tag characters used in some emoji sequences. - return ( - codepoint in (0x200D, 0xFE0E, 0xFE0F, 0x20E3) - or 0x1F3FB <= codepoint <= 0x1F3FF - or 0xE0020 <= codepoint <= 0xE007F - ) - - without_emoji = "".join( - ch - for ch in name - if not (_is_emoji_codepoint(ord(ch)) or _is_emoji_modifier(ord(ch))) - ) + without_emoji = self._remove_emojis(name) cleaned = re.sub(r'[\\/*?:"<>|]', "", without_emoji) cleaned = re.sub(r"\s+", " ", cleaned).strip().strip(".") return cleaned[:50].strip() @@ -1496,7 +1519,10 @@ class Action: # If there is no h1 in content, prepend chat title as h1 when provided if top_heading and not has_h1: - self.add_heading(doc, top_heading, 1) + # Remove emojis from title for a professional look + clean_title = self._remove_emojis(top_heading) + # Use Title style (level 0) for the main document title + self.add_heading(doc, clean_title, 0) lines = markdown_text.split("\n") i = 0 @@ -1756,7 +1782,7 @@ class Action: cur = text for _ in range(10): prev = cur - cur = _REASONING_DETAILS_RE.sub("", cur) + cur = _ALL_DETAILS_RE.sub("", cur) cur = _THINK_RE.sub("", cur) cur = _ANALYSIS_RE.sub("", cur) if cur == prev: @@ -2240,14 +2266,154 @@ class Action: font = style.font font.name = self.valves.英文字体 font.size = Pt(11) - # Set Asian font - style._element.rPr.rFonts.set(qn("w:eastAsia"), self.valves.中文字体) + + # Ensure rPr element exists + rPr = style._element.get_or_add_rPr() + rFonts = rPr.get_or_add_rFonts() + + # Set Latin and Asian fonts explicitly + rFonts.set(qn("w:ascii"), self.valves.英文字体) + rFonts.set(qn("w:hAnsi"), self.valves.英文字体) + rFonts.set(qn("w:eastAsia"), self.valves.中文字体) + + # Set language to zh-CN to prevent MS Gothic fallback (Japanese font) + lang = rPr.find(qn("w:lang")) + if lang is None: + lang = OxmlElement("w:lang") + rPr.append(lang) + lang.set(qn("w:val"), "en-US") + lang.set(qn("w:eastAsia"), "zh-CN") + + logger.info( + f"[Font Config] Latin: {self.valves.英文字体}, Asian: {self.valves.中文字体}" + ) # Set paragraph format paragraph_format = style.paragraph_format paragraph_format.line_spacing_rule = WD_LINE_SPACING.ONE_POINT_FIVE paragraph_format.space_after = Pt(6) + # 配置 Title 样式 (用于文档标题) + # 标准格式: 22pt (二号), 加粗, 居中, 段后 24pt + if "Title" in doc.styles: + title_style = doc.styles["Title"] + title_font = title_style.font + title_font.name = self.valves.英文字体 + title_font.size = Pt(22) # 二号字体 + title_font.bold = True + title_font.color.rgb = RGBColor(0, 0, 0) + + # 段落格式: 根据配置设置对齐方式和间距 + title_pf = title_style.paragraph_format + alignment_map = { + "left": WD_ALIGN_PARAGRAPH.LEFT, + "center": WD_ALIGN_PARAGRAPH.CENTER, + "right": WD_ALIGN_PARAGRAPH.RIGHT, + } + title_pf.alignment = alignment_map.get( + self.valves.标题对齐方式.lower(), WD_ALIGN_PARAGRAPH.CENTER + ) + title_pf.space_before = Pt(0) + title_pf.space_after = Pt(24) + + t_rPr = title_style._element.get_or_add_rPr() + t_rFonts = t_rPr.get_or_add_rFonts() + t_rFonts.set(qn("w:ascii"), self.valves.英文字体) + t_rFonts.set(qn("w:hAnsi"), self.valves.英文字体) + t_rFonts.set(qn("w:eastAsia"), self.valves.中文字体) + + # Set language for Title + t_lang = t_rPr.find(qn("w:lang")) + if t_lang is None: + t_lang = OxmlElement("w:lang") + t_rPr.append(t_lang) + t_lang.set(qn("w:val"), "en-US") + t_lang.set(qn("w:eastAsia"), "zh-CN") + + # 标准标题字号 (基于中文文档规范): + # Heading 1: 16pt (三号), 加粗, 段前 24pt, 段后 12pt + # Heading 2: 15pt (小三), 加粗, 段前 18pt, 段后 6pt + # Heading 3: 14pt (四号), 加粗, 段前 12pt, 段后 6pt + # Heading 4-9: 12pt (小四), 加粗, 逐级减小间距 + heading_formats = { + 1: {"size": 16, "space_before": 24, "space_after": 12}, + 2: {"size": 15, "space_before": 18, "space_after": 6}, + 3: {"size": 14, "space_before": 12, "space_after": 6}, + 4: {"size": 12, "space_before": 12, "space_after": 6}, + 5: {"size": 12, "space_before": 6, "space_after": 6}, + 6: {"size": 11, "space_before": 6, "space_after": 3}, + 7: {"size": 11, "space_before": 6, "space_after": 3}, + 8: {"size": 10.5, "space_before": 6, "space_after": 3}, + 9: {"size": 10.5, "space_before": 6, "space_after": 3}, + } + + # Apply font settings to Heading 1-9 + for i in range(1, 10): + style_id = f"Heading {i}" + if style_id in doc.styles: + heading_style = doc.styles[style_id] + heading_font = heading_style.font + heading_font.name = self.valves.英文字体 + heading_font.color.rgb = RGBColor(0, 0, 0) + + # 应用标准格式 + fmt = heading_formats.get( + i, {"size": 11, "space_before": 6, "space_after": 3} + ) + heading_font.size = Pt(fmt["size"]) + heading_font.bold = True + + heading_pf = heading_style.paragraph_format + heading_pf.space_before = Pt(fmt["space_before"]) + heading_pf.space_after = Pt(fmt["space_after"]) + + # Ensure rPr exists + h_rPr = heading_style._element.get_or_add_rPr() + h_rFonts = h_rPr.get_or_add_rFonts() + + # Set fonts + h_rFonts.set(qn("w:ascii"), self.valves.英文字体) + h_rFonts.set(qn("w:hAnsi"), self.valves.英文字体) + h_rFonts.set(qn("w:eastAsia"), self.valves.中文字体) + + # Set language for Heading + h_lang = h_rPr.find(qn("w:lang")) + if h_lang is None: + h_lang = OxmlElement("w:lang") + h_rPr.append(h_lang) + h_lang.set(qn("w:val"), "en-US") + h_lang.set(qn("w:eastAsia"), "zh-CN") + + def _remove_emojis(self, text: str) -> str: + """从文本中移除 Emoji (基于 Unicode 范围)""" + if not isinstance(text, str): + return "" + + def _is_emoji_codepoint(codepoint: int) -> bool: + # Common emoji ranges + flag regional indicators. + return ( + 0x1F000 <= codepoint <= 0x1FAFF + or 0x1F1E6 <= codepoint <= 0x1F1FF + or 0x2600 <= codepoint <= 0x26FF + or 0x2700 <= codepoint <= 0x27BF + or 0x2300 <= codepoint <= 0x23FF + or 0x2B00 <= codepoint <= 0x2BFF + ) + + def _is_emoji_modifier(codepoint: int) -> bool: + # VS15/VS16, ZWJ, keycap, skin tones, and tag characters used in some emoji sequences. + return ( + codepoint in (0x200D, 0xFE0E, 0xFE0F, 0x20E3) + or 0x1F3FB <= codepoint <= 0x1F3FF + or 0xE0020 <= codepoint <= 0xE007F + ) + + return "".join( + ch + for ch in text + if not (_is_emoji_codepoint(ord(ch)) or _is_emoji_modifier(ord(ch))) + ) + def add_heading(self, doc: Document, text: str, level: int): """Add heading""" # Word heading levels start from 0, Markdown from 1 @@ -2283,6 +2449,12 @@ class Action: if strike: run.font.strike = True + # Explicitly set East Asian font to prevent MS Gothic fallback + # Word may not inherit w:eastAsia from style, causing Japanese font fallback for CJK + rPr = run._element.get_or_add_rPr() + rFonts = rPr.get_or_add_rFonts() + rFonts.set(qn("w:eastAsia"), self.valves.中文字体) + def _add_inline_code(self, paragraph, s: str): if s == "": return @@ -2678,7 +2850,11 @@ class Action: ): u = self._normalize_url(url) if not u: - paragraph.add_run(display_text or text) + run = paragraph.add_run(display_text or text) + # Set East Asian font to prevent MS Gothic fallback + rPr = run._element.get_or_add_rPr() + rFonts = rPr.get_or_add_rFonts() + rFonts.set(qn("w:eastAsia"), self.valves.中文字体) return part = getattr(paragraph, "part", None) @@ -2687,6 +2863,10 @@ class Action: run = paragraph.add_run(display_text or text) run.font.color.rgb = RGBColor(0, 0, 255) run.font.underline = True + # Set East Asian font to prevent MS Gothic fallback + rPr = run._element.get_or_add_rPr() + rFonts = rPr.get_or_add_rFonts() + rFonts.set(qn("w:eastAsia"), self.valves.中文字体) return r_id = part.relate_to(u, RT.HYPERLINK, is_external=True) @@ -2700,6 +2880,11 @@ class Action: rStyle.set(qn("w:val"), "Hyperlink") rPr.append(rStyle) + # Set East Asian font to prevent MS Gothic fallback + rFonts = OxmlElement("w:rFonts") + rFonts.set(qn("w:eastAsia"), self.valves.中文字体) + rPr.append(rFonts) + color = OxmlElement("w:color") color.set(qn("w:val"), "0000FF") rPr.append(color)