From eaa6319991adf72bca55a5a67951b9d10c76909f Mon Sep 17 00:00:00 2001 From: fujie Date: Sat, 20 Dec 2025 12:34:49 +0800 Subject: [PATCH] =?UTF-8?q?feat:=20=E6=96=B0=E5=A2=9E=E6=8F=92=E4=BB=B6?= =?UTF-8?q?=E7=B3=BB=E7=BB=9F=E3=80=81=E5=A4=9A=E7=A7=8D=E6=8F=92=E4=BB=B6?= =?UTF-8?q?=E7=B1=BB=E5=9E=8B=E3=80=81=E5=BC=80=E5=8F=91=E6=8C=87=E5=8D=97?= =?UTF-8?q?=E5=8F=8A=E5=A4=9A=E8=AF=AD=E8=A8=80=E6=96=87=E6=A1=A3=E3=80=82?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- .gitignore | 141 + CHANGELOG.md | 27 + CONTRIBUTING.md | 51 + LICENSE | 21 + README.md | 72 + README_CN.md | 99 + docs/en/gemini_manifold_plugin_philosophy.md | 45 + docs/en/implementation_plan.md | 50 + docs/en/plugin_development_guide.md | 234 ++ ...ction_plugin_export_to_excel_example_cn.md | 117 + ...action_plugin_smart_mind_map_example_cn.md | 291 ++ ...in_async_context_compression_example_cn.md | 235 ++ ...in_gemini_manifold_companion_example_cn.md | 163 + .../filter_plugin_inject_env_example_cn.md | 134 + .../gemini_manifold_plugin_examples.md | 1838 +++++++++ .../pipe_plugin_gemini_manifold_example_cn.md | 185 + .../plugin/development/_category_.json | 7 + docs/features/plugin/development/events.mdx | 424 +++ .../plugin/development/reserved-args.mdx | 340 ++ docs/features/plugin/development/valves.mdx | 77 + docs/features/plugin/functions/action.mdx | 316 ++ docs/features/plugin/functions/filter.mdx | 423 +++ docs/features/plugin/functions/index.mdx | 133 + docs/features/plugin/functions/pipe.mdx | 400 ++ docs/features/plugin/index.mdx | 91 + docs/features/plugin/migration/index.mdx | 255 ++ docs/features/plugin/tools/development.mdx | 1651 ++++++++ docs/features/plugin/tools/index.mdx | 144 + .../plugin/tools/openapi-servers/faq.mdx | 176 + .../plugin/tools/openapi-servers/index.mdx | 70 + .../plugin/tools/openapi-servers/mcp.mdx | 199 + .../tools/openapi-servers/open-webui.mdx | 211 + .../future_plugin_development_roadmap_cn.md | 2562 +++++++++++++ docs/zh/plugin_development_guide.md | 234 ++ docs/zh/从问一个AI到运营一支AI团队.md | 2236 +++++++++++ plugins/README.md | 124 + plugins/README_CN.md | 124 + plugins/actions/README.md | 227 ++ plugins/actions/README_CN.md | 226 ++ plugins/actions/export_to_excel/README.md | 15 + plugins/actions/export_to_excel/README_CN.md | 15 + .../export_to_excel/export_to_excel.py | 804 ++++ .../export_to_excel/export_to_excel_cn.py | 806 ++++ plugins/actions/knowledge-card/README.md | 15 + plugins/actions/knowledge-card/README_CN.md | 15 + .../actions/knowledge-card/knowledge_card.py | 554 +++ .../knowledge-card/knowledge_card_en.py | 554 +++ plugins/actions/smart-mind-map/README.md | 210 + plugins/actions/smart-mind-map/README_CN.md | 210 + .../actions/smart-mind-map/smart_mind_map.py | 611 +++ plugins/actions/smart-mind-map/思维导图.py | 611 +++ plugins/actions/summary/README.md | 15 + plugins/actions/summary/README_CN.md | 15 + plugins/actions/summary/summary.py | 527 +++ plugins/actions/summary/精读.py | 521 +++ plugins/filters/README.md | 45 + plugins/filters/README_CN.md | 67 + .../async_context_compression.md | 77 + .../async_context_compression.py | 780 ++++ .../async_context_compression_cn.md | 77 + .../async-context-compression/工作流程指南.md | 662 ++++ .../异步上下文压缩.py | 1100 ++++++ .../异步上下文压缩优化.md | 45 + .../context_enhancement_filter.py | 572 +++ .../gemini_manifold_companion.py | 1102 ++++++ plugins/filters/multi_model_context_merger.py | 212 ++ plugins/pipelines/moe_prompt_refiner.py | 208 + .../pipelines/moe_prompt_refiner/valves.json | 1 + plugins/pipelines/requirements.txt | 0 plugins/pipes/README.md | 60 + plugins/pipes/README_CN.md | 60 + plugins/pipes/gemini_mainfold/README.md | 54 + plugins/pipes/gemini_mainfold/README_CN.md | 54 + .../pipes/gemini_mainfold/gemini_manifold.py | 3382 +++++++++++++++++ 74 files changed, 28409 insertions(+) create mode 100644 .gitignore create mode 100644 CHANGELOG.md create mode 100644 CONTRIBUTING.md create mode 100644 LICENSE create mode 100644 README.md create mode 100644 README_CN.md create mode 100644 docs/en/gemini_manifold_plugin_philosophy.md create mode 100644 docs/en/implementation_plan.md create mode 100644 docs/en/plugin_development_guide.md create mode 100644 docs/examples/action_plugin_export_to_excel_example_cn.md create mode 100644 docs/examples/action_plugin_smart_mind_map_example_cn.md create mode 100644 docs/examples/filter_plugin_async_context_compression_example_cn.md create mode 100644 docs/examples/filter_plugin_gemini_manifold_companion_example_cn.md create mode 100644 docs/examples/filter_plugin_inject_env_example_cn.md create mode 100644 docs/examples/gemini_manifold_plugin_examples.md create mode 100644 docs/examples/pipe_plugin_gemini_manifold_example_cn.md create mode 100644 docs/features/plugin/development/_category_.json create mode 100644 docs/features/plugin/development/events.mdx create mode 100644 docs/features/plugin/development/reserved-args.mdx create mode 100644 docs/features/plugin/development/valves.mdx create mode 100644 docs/features/plugin/functions/action.mdx create mode 100644 docs/features/plugin/functions/filter.mdx create mode 100644 docs/features/plugin/functions/index.mdx create mode 100644 docs/features/plugin/functions/pipe.mdx create mode 100644 docs/features/plugin/index.mdx create mode 100644 docs/features/plugin/migration/index.mdx create mode 100644 docs/features/plugin/tools/development.mdx create mode 100644 docs/features/plugin/tools/index.mdx create mode 100644 docs/features/plugin/tools/openapi-servers/faq.mdx create mode 100644 docs/features/plugin/tools/openapi-servers/index.mdx create mode 100644 docs/features/plugin/tools/openapi-servers/mcp.mdx create mode 100644 docs/features/plugin/tools/openapi-servers/open-webui.mdx create mode 100644 docs/zh/future_plugin_development_roadmap_cn.md create mode 100644 docs/zh/plugin_development_guide.md create mode 100644 docs/zh/从问一个AI到运营一支AI团队.md create mode 100644 plugins/README.md create mode 100644 plugins/README_CN.md create mode 100644 plugins/actions/README.md create mode 100644 plugins/actions/README_CN.md create mode 100644 plugins/actions/export_to_excel/README.md create mode 100644 plugins/actions/export_to_excel/README_CN.md create mode 100644 plugins/actions/export_to_excel/export_to_excel.py create mode 100644 plugins/actions/export_to_excel/export_to_excel_cn.py create mode 100644 plugins/actions/knowledge-card/README.md create mode 100644 plugins/actions/knowledge-card/README_CN.md create mode 100644 plugins/actions/knowledge-card/knowledge_card.py create mode 100644 plugins/actions/knowledge-card/knowledge_card_en.py create mode 100644 plugins/actions/smart-mind-map/README.md create mode 100644 plugins/actions/smart-mind-map/README_CN.md create mode 100644 plugins/actions/smart-mind-map/smart_mind_map.py create mode 100644 plugins/actions/smart-mind-map/思维导图.py create mode 100644 plugins/actions/summary/README.md create mode 100644 plugins/actions/summary/README_CN.md create mode 100644 plugins/actions/summary/summary.py create mode 100644 plugins/actions/summary/精读.py create mode 100644 plugins/filters/README.md create mode 100644 plugins/filters/README_CN.md create mode 100644 plugins/filters/async-context-compression/async_context_compression.md create mode 100644 plugins/filters/async-context-compression/async_context_compression.py create mode 100644 plugins/filters/async-context-compression/async_context_compression_cn.md create mode 100644 plugins/filters/async-context-compression/工作流程指南.md create mode 100644 plugins/filters/async-context-compression/异步上下文压缩.py create mode 100644 plugins/filters/async-context-compression/异步上下文压缩优化.md create mode 100644 plugins/filters/context_enhancement_filter/context_enhancement_filter.py create mode 100644 plugins/filters/gemini_manifold_companion/gemini_manifold_companion.py create mode 100644 plugins/filters/multi_model_context_merger.py create mode 100644 plugins/pipelines/moe_prompt_refiner.py create mode 100644 plugins/pipelines/moe_prompt_refiner/valves.json create mode 100644 plugins/pipelines/requirements.txt create mode 100644 plugins/pipes/README.md create mode 100644 plugins/pipes/README_CN.md create mode 100644 plugins/pipes/gemini_mainfold/README.md create mode 100644 plugins/pipes/gemini_mainfold/README_CN.md create mode 100644 plugins/pipes/gemini_mainfold/gemini_manifold.py diff --git a/.gitignore b/.gitignore new file mode 100644 index 0000000..3117b43 --- /dev/null +++ b/.gitignore @@ -0,0 +1,141 @@ +# Byte-compiled / optimized / DLL files +__pycache__/ +*.py[cod] +*$py.class + +# C extensions +*.so + +# Distribution / packaging +.Python +build/ +develop-eggs/ +dist/ +downloads/ +eggs/ +.eggs/ +lib/ +lib64/ +parts/ +sdist/ +var/ +wheels/ +*.egg-info/ +.installed.cfg +*.egg +MANIFEST + +# PyInstaller +# Usually these files are written by a python script from a template +# before PyInstaller builds the exe, so as to inject date/other infos into it. +*.manifest +*.spec + +# Installer logs +pip-log.txt +pip-delete-this-directory.txt + +# Unit test / coverage reports +htmlcov/ +.tox/ +.nox/ +.coverage +.coverage.* +.cache +nosetests.xml +coverage.xml +*.cover +.hypothesis/ +.pytest_cache/ + +# Translations +*.mo +*.pot + +# Django stuff: +*.log +local_settings.py +db.sqlite3 + +# Flask stuff: +instance/ +.webassets-cache + +# Scrapy stuff: +.scrapy + +# Sphinx documentation +docs/_build/ + +# PyBuilder +target/ + +# Jupyter Notebook +.ipynb_checkpoints + +# IPython +profile_default/ +ipython_config.py + +# pyenv +.python-version + +# celery beat schedule file +celerybeat-schedule + +# SageMath parsed files +*.sage.py + +# Environments +.env +.venv +env/ +venv/ +ENV/ +env.bak/ +venv.bak/ + +# Spyder project settings +.spyderproject +.spyproject + +# Rope project settings +.ropeproject + +# mkdocs documentation +/site + +# mypy +.mypy_cache/ +.dmypy.json +dmypy.json + +# Pyre type checker +.pyre/ + +# IDEs +.vscode/ +.idea/ +*.swp +*.swo +*~ + +# OS +.DS_Store +.DS_Store? +._* +.Spotlight-V100 +.Trashes +ehthumbs.db +Thumbs.db + +# Logs +*.log +logs/ + +# Temporary files +*.tmp +*.temp + +# OpenWebUI specific +# Add any specific ignores for OpenWebUI plugins if needed diff --git a/CHANGELOG.md b/CHANGELOG.md new file mode 100644 index 0000000..01e8a65 --- /dev/null +++ b/CHANGELOG.md @@ -0,0 +1,27 @@ +# Changelog + +All notable changes to this project will be documented in this file. + +## [Unreleased] + +### 🚀 New Features +- **Smart Mind Map**: Updated to v0.7.3. Improved description and metadata. +- **Knowledge Card**: Updated to v0.2.1. Improved description and metadata. +- **Documentation**: Added comprehensive `plugin_development_guide_cn.md` consolidating all previous guides. + +### 📦 Project Structure +- **Renamed**: Project renamed from `awesome-openwebui` to **OpenWebUI Extras**. +- **Reorganized**: + - Moved `run.py` to `scripts/`. + - Moved large documentation files to `docs/`. + - Removed `requirements.txt` to emphasize "resource collection" nature. +- **Added**: `CONTRIBUTING.md` guide. + +### 📝 Documentation +- **README**: Updated English and Chinese READMEs with new project name and structure. +- **Plan**: Updated `implementation_plan.md` to reflect the new direction. + +--- + +## [0.1.0] - 2025-12-19 +- Initial release of the reorganized project structure. diff --git a/CONTRIBUTING.md b/CONTRIBUTING.md new file mode 100644 index 0000000..0ea56ca --- /dev/null +++ b/CONTRIBUTING.md @@ -0,0 +1,51 @@ +# 贡献指南 (Contributing Guide) + +感谢你对 **OpenWebUI Extras** 感兴趣!我们非常欢迎社区贡献更多的插件、提示词和创意。 + +## 🤝 如何贡献 + +### 1. 分享提示词 (Prompts) + +如果你有一个好用的提示词: +1. 在 `prompts/` 目录下找到合适的分类(如 `coding/`, `writing/`)。如果没有合适的,可以新建一个文件夹。 +2. 创建一个新的 `.md` 或 `.json` 文件。 +3. 提交 Pull Request (PR)。 + +### 2. 开发插件 (Plugins) + +如果你开发了一个新的 OpenWebUI 插件 (Function/Tool): +1. 确保你的插件代码包含完整的元数据(Frontmatter): + ```python + """ + title: 插件名称 + author: 你的名字 + version: 0.1.0 + description: 简短描述插件的功能 + """ + ``` +2. 将插件文件放入 `plugins/` 目录下的合适位置: + - `plugins/actions/`: 用于添加按钮或修改消息的 Action 插件。 + - `plugins/filters/`: 用于拦截请求或响应的 Filter 插件。 + - `plugins/pipes/`: 用于自定义模型或 API 的 Pipe 插件。 + - `plugins/tools/`: 用于 LLM 调用的 Tool 插件。 +3. 建议在 `docs/` 下添加一个简单的使用说明。 + +### 3. 改进文档 + +如果你发现文档有错误或可以改进的地方,直接提交 PR 即可。 + +## 🛠️ 开发规范 + +- **代码风格**:Python 代码请遵循 PEP 8 规范。 +- **注释**:关键逻辑请添加注释,方便他人理解。 +- **测试**:提交前请在本地 OpenWebUI 环境中测试通过。 + +## 📝 提交 PR + +1. Fork 本仓库。 +2. 创建一个新的分支 (`git checkout -b feature/AmazingFeature`)。 +3. 提交你的修改 (`git commit -m 'Add some AmazingFeature'`)。 +4. 推送到分支 (`git push origin feature/AmazingFeature`)。 +5. 开启一个 Pull Request。 + +再次感谢你的贡献!🚀 diff --git a/LICENSE b/LICENSE new file mode 100644 index 0000000..c5d3ac2 --- /dev/null +++ b/LICENSE @@ -0,0 +1,21 @@ +MIT License + +Copyright (c) 2024 Awesome OpenWebUI Contributors + +Permission is hereby granted, free of charge, to any person obtaining a copy +of this software and associated documentation files (the "Software"), to deal +in the Software without restriction, including without limitation the rights +to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +copies of the Software, and to permit persons to whom the Software is +furnished to do so, subject to the following conditions: + +The above copyright notice and this permission notice shall be included in all +copies or substantial portions of the Software. + +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE +SOFTWARE. diff --git a/README.md b/README.md new file mode 100644 index 0000000..7d9e482 --- /dev/null +++ b/README.md @@ -0,0 +1,72 @@ +# OpenWebUI Extras + +English | [中文](./README_CN.md) + +A collection of enhancements, plugins, and prompts for [OpenWebUI](https://github.com/open-webui/open-webui), developed and curated for personal use to extend functionality and improve experience. + +[Contributing](./CONTRIBUTING.md) | [Changelog](./CHANGELOG.md) + +## 📦 Project Contents + +### 🧩 Plugins + +Located in the `plugins/` directory, containing Python-based enhancements: + +#### Actions +- **Smart Mind Map** (`smart-mind-map`): Generates interactive mind maps from text. +- **Knowledge Card** (`knowledge-card`): Creates beautiful flashcards for learning. +- **Export to Excel** (`export_to_excel`): Exports chat history to Excel files. +- **Summary** (`summary`): Text summarization tool. +- **AI Agent Solver** (`ai-agent-solver`): Intelligent agent solver. + +#### Filters +- **Async Context Compression** (`async-context-compression`): Optimizes token usage via context compression. +- **Context Enhancement** (`context_enhancement_filter`): Enhances chat context. +- **Gemini Manifold Companion** (`gemini_manifold_companion`): Companion filter for Gemini Manifold. +- **Multi-Model Context Merger** (`multi_model_context_merger`): Merges context from multiple models. + +#### Pipes +- **Gemini Manifold** (`gemini_mainfold`): Pipeline for Gemini model integration. + +#### Pipelines +- **MoE Prompt Refiner** (`moe_prompt_refiner`): Refines prompts for Mixture of Experts (MoE) summary requests to generate high-quality comprehensive reports. + +### 🎯 Prompts + +Located in the `prompts/` directory, containing fine-tuned System Prompts: + +- **Coding**: Programming assistance prompts. +- **Marketing**: Marketing and copywriting prompts. + +## 📖 Documentation + +Located in the `docs/en/` directory: + +- **[Plugin Development Guide](./docs/en/plugin_development_guide.md)** - The authoritative guide covering everything from getting started to advanced patterns and best practices. ⭐ + +For code examples, please check the `docs/examples/` directory. + +## 🚀 Quick Start + +This project is a collection of resources and does not require a Python environment. Simply download the files you need and import them into your OpenWebUI instance. + +### Using Prompts + +1. Browse the `/prompts` directory and select a prompt file (`.md`). +2. Copy the file content. +3. In the OpenWebUI chat interface, click the "Prompt" button above the input box. +4. Paste the content and save. + +### Using Plugins + +1. Browse the `/plugins` directory and download the plugin file (`.py`) you need. +2. Go to OpenWebUI **Admin Panel** -> **Settings** -> **Plugins**. +3. Click the upload button and select the `.py` file you just downloaded. +4. Once uploaded, refresh the page to enable the plugin in your chat settings or toolbar. + +### Contributing + +If you have great prompts or plugins to share: +1. Fork this repository. +2. Add your files to the appropriate `prompts/` or `plugins/` directory. +3. Submit a Pull Request. diff --git a/README_CN.md b/README_CN.md new file mode 100644 index 0000000..b67832b --- /dev/null +++ b/README_CN.md @@ -0,0 +1,99 @@ +# OpenWebUI Extras + +[English](./README.md) | 中文 + +OpenWebUI 增强功能集合。包含个人开发与收集的### 🧩 插件 (Plugins) + +位于 `plugins/` 目录,包含各类 Python 编写的功能增强插件: + +#### Actions (交互增强) +- **Smart Mind Map** (`smart-mind-map`): 智能分析文本并生成交互式思维导图。 +- **Knowledge Card** (`knowledge-card`): 快速生成精美的学习记忆卡片。 +- **Export to Excel** (`export_to_excel`): 将对话内容导出为 Excel 文件。 +- **Summary** (`summary`): 文本摘要生成工具。 +- **AI Agent Solver** (`ai-agent-solver`): 智能代理解决器。 + +#### Filters (消息处理) +- **Async Context Compression** (`async-context-compression`): 异步上下文压缩,优化 Token 使用。 +- **Context Enhancement** (`context_enhancement_filter`): 上下文增强过滤器。 +- **Gemini Manifold Companion** (`gemini_manifold_companion`): Gemini Manifold 配套增强。 +- **Multi-Model Context Merger** (`multi_model_context_merger`): 多模型上下文合并。 + +#### Pipes (模型管道) +- **Gemini Manifold** (`gemini_mainfold`): 集成 Gemini 模型的管道。 + +#### Pipelines (工作流管道) +- **MoE Prompt Refiner** (`moe_prompt_refiner`): 优化多模型 (MoE) 汇总请求的提示词,生成高质量的综合报告。 + +### 🎯 提示词 (Prompts) + +位于 `prompts/` 目录,包含精心调优的 System Prompts: + +- **Coding**: 编程辅助类提示词。 +- **Marketing**: 营销文案类提示词。(`/prompts/marketing`): 内容创作、品牌策划、市场分析相关的提示词 + +每个提示词都独立保存为 Markdown 文件,可直接在 OpenWebUI 中使用。 + +### 🔧 插件 (Plugins) + +{{ ... }} + +[贡献指南](./CONTRIBUTING.md) | [更新日志](./CHANGELOG.md) + +## 📦 项目内容 + +### 🎯 提示词 (Prompts) + +位于 `/prompts` 目录,包含针对不同领域的优质提示词模板: + +- **编程类** (`/prompts/coding`): 代码生成、调试、优化相关的提示词 +- **营销类** (`/prompts/marketing`): 内容创作、品牌策划、市场分析相关的提示词 + +每个提示词都独立保存为 Markdown 文件,可直接在 OpenWebUI 中使用。 + +### 🔧 插件 (Plugins) + +位于 `/plugins` 目录,提供三种类型的插件扩展: + +- **过滤器 (Filters)** - 在用户输入发送给 LLM 前进行处理和优化 + - 异步上下文压缩:智能压缩长上下文,优化 token 使用效率 + +- **动作 (Actions)** - 自定义功能,从聊天中触发 + - 思维导图生成:快速生成和导出思维导图 + +- **管道 (Pipes)** - 对 LLM 响应进行处理和增强 + - 各类响应处理和格式化插件 + +## 📖 开发文档 + +位于 `docs/zh/` 目录: + +- **[插件开发权威指南](./docs/zh/plugin_development_guide.md)** - 整合了入门教程、核心 SDK 详解及最佳实践的系统化指南。 ⭐ +- **[从问一个AI到运营一支AI团队](./docs/zh/从问一个AI到运营一支AI团队.md)** - 深度运营经验分享。 + +更多示例请查看 `docs/examples/` 目录。 + +## 🚀 快速开始 + +本项目是一个资源集合,无需安装 Python 环境。你只需要下载对应的文件并导入到你的 OpenWebUI 实例中即可。 + +### 使用提示词 (Prompts) + +1. 在 `/prompts` 目录中浏览并选择你感兴趣的提示词文件 (`.md`)。 +2. 复制文件内容。 +3. 在 OpenWebUI 聊天界面中,点击输入框上方的 "Prompt" 按钮。 +4. 粘贴内容并保存。 + +### 使用插件 (Plugins) + +1. 在 `/plugins` 目录中浏览并下载你需要的插件文件 (`.py`)。 +2. 打开 OpenWebUI 的 **管理员面板 (Admin Panel)** -> **设置 (Settings)** -> **插件 (Plugins)**。 +3. 点击上传按钮,选择刚才下载的 `.py` 文件。 +4. 上传成功后,刷新页面,你就可以在聊天设置或工具栏中启用该插件了。 + +### 贡献代码 + +如果你有优质的提示词或插件想要分享: +1. Fork 本仓库。 +2. 将你的文件添加到对应的 `prompts/` 或 `plugins/` 目录。 +3. 提交 Pull Request。 diff --git a/docs/en/gemini_manifold_plugin_philosophy.md b/docs/en/gemini_manifold_plugin_philosophy.md new file mode 100644 index 0000000..f0c113d --- /dev/null +++ b/docs/en/gemini_manifold_plugin_philosophy.md @@ -0,0 +1,45 @@ +# Gemini Manifold 插件开发哲学 + +## 概览 + +- 源码位于 `plugins/pipes/gemini_mainfold/gemini_manifold.py`,作为 Open WebUI 的 Pipe 插件,主要负责把前端的请求转化成 Google Gemini/Vertex AI 的调用,并将结果通过 `AsyncGenerator` 回流给前端。 +- 插件采用了 `Valves + UserValves` 的配置模式、异步事件与状态汇报、细粒度日志、文件缓存与上传中间件,以及统一响应处理器,充分体现了 Open WebUI 通用插件的开发模式。 + +## 核心模块 + +1. **`Pipe` 类(入口)** + - `pipes` 方法注册可选模型,缓存模型列表并仅在配置变更时刷新。 + - `pipe` 方法为每个请求建立 Google GenAI 客户端、`EventEmitter` 与 `FilesAPIManager`,构建 `GeminiContentBuilder`,并统一把返回值交给 `_unified_response_processor`。 + +2. **`GeminiContentBuilder`(请求构建)** + - 解析 Open WebUI 消息、引用历史、文件上传、YouTube/Markdown 媒体等内容,并通过 `UploadStatusManager` 与 `FilesAPIManager` 协作,确保上传进度可视。 + +3. **`FilesAPIManager`(文件缓存+上传)** + - 采用 xxHash 内容地址、热/暖/冷路径、自定义锁、 TTL 缓存等手段防止重复上传,同时会在发生错误时用 `FilesAPIError` 抛出并告知前端。 + +4. **`EventEmitter` + `UploadStatusManager`(状态反馈)** + - 抽象 Toast/Status/Completion 的交互,按需异步发送,赋予前端实时反馈能力,避免阻塞主流程。 + +5. **统一响应处理与后置处理** + - `_unified_response_processor` 兼容流式/一次性响应,调用 `_process_part`、`_disable_special_tags` 保护前端,再在 `_do_post_processing` 发出 usage、grounding 等数据。 + +## 与 Open WebUI 插件哲学契合的实践 + +- **配置层叠与安全**:`Valves` 提供 admin 默认,`UserValves` 允许用户按需覆盖。`USER_MUST_PROVIDE_AUTH_CONFIG` + `AUTH_WHITELIST` 确保敏感场景必须使用各自凭证。 +- **异步状态与进度可视**:所有上传/调用都在 `EventEmitter` 中报告 toast/status,`UploadStatusManager` 用 queue 追踪并呈现进度,流式响应直接产出 `choices` chunk 与 `[DONE]`,前端无需额外猜测。 +- **功能可拓展性**:基于 `Functions.get_function_by_id` 检查 filter、依据 `features`/`toggle` 开启 Search、Code Execution、URL Context、Maps grounding,体现 Open WebUI 组件可组合的插件模型。 +- **文件与资源复用**:`FilesAPIManager` 通过 deterministic name、缓存、stateless GET,提高效率;生成图片也回写到 Open WebUI 的 `Files` 模型,为前端提供可持久化的 URL。 +- **透明日志与错误可控**:自定义 loguru handler(截断 `payload`)、统一的异常类、对 SDK 错误的 toast+error emission、对特殊 tag 的 ZWS 处理,确保插件运行时的状态始终可追踪并兼容前端。 +- **统一流程**:全链路从 request -> builder -> client -> response processor -> post-processing,严格对齐 Open WebUI pipe+filter 结构,便于扩展和维护。 + +## 下一步建议 + +- 如果需要,可将上述内容整理到 Plugin README/开发指南中。也可以基于该文档再绘制流程图或生成寄语性文档,供团队使用。 + +## 进一步参考 + +详细的代码示例与使用场景请参见 `docs/gemini_manifold_plugin_examples.md`,包括: + +- 配置层叠、异步事件、文件缓存等基础模式 +- 响应处理、标签防护、异常管理等中级技巧 +- 后处理流程、日志控制等高级实践 diff --git a/docs/en/implementation_plan.md b/docs/en/implementation_plan.md new file mode 100644 index 0000000..82d1cc8 --- /dev/null +++ b/docs/en/implementation_plan.md @@ -0,0 +1,50 @@ +# 开源项目重组实施计划 + +## 1. 目标 +将 `openwebui-extras` 打造为一个 **OpenWebUI 增强功能集合库**,专注于分享个人开发和收集的优质插件、提示词,而非作为一个独立的 Python 应用程序发布。 + +## 2. 当前状态分析 +- **定位明确**:项目核心价值在于内容(Plugins, Prompts, Docs),而非运行环境。 +- **结构已优化**: + - `plugins/`:核心插件资源。 + - `prompts/`:提示词资源。 + - `docs/`:详细的使用和开发文档。 + - `scripts/`:辅助工具脚本(如本地测试用的 `run.py`)。 +- **已移除不必要文件**:移除了 `requirements.txt`,避免用户误以为需要配置 Python 环境。 + +## 3. 重组方案 + +### 3.1 目录结构 +保持当前的清晰结构,强调“拿来即用”: + +``` +openwebui-extras/ +├── docs/ # 文档与教程 +├── plugins/ # 插件库 (核心资源) +│ ├── actions/ +│ ├── filters/ +│ ├── pipelines/ +│ └── pipes/ +├── prompts/ # 提示词库 (核心资源) +├── scripts/ # 维护者工具 (非用户必须) +├── LICENSE # MIT 许可证 +├── README.md # 项目入口与资源索引 +└── index.html # 项目展示页 +``` + +### 3.2 核心调整 +1. **移除依赖管理**:删除了 `requirements.txt`。用户不需要 `pip install` 任何东西,只需下载对应的 `.py` 或 `.md` 文件导入 OpenWebUI 即可。 +2. **文档侧重**:README 和文档将侧重于“如何下载”和“如何导入”,而不是“如何安装项目”。 + +### 3.3 后续建议 +1. **资源索引**:建议在 `README.md` 中维护一个高质量的插件/提示词索引表,方便用户快速查找。 +2. **贡献指南**:制定简单的 `CONTRIBUTING.md`,告诉其他人如何提交他们的插件或提示词(例如:只需提交文件到对应目录)。 +3. **版本控制**:虽然不需要 Python 环境,但建议在插件文件的头部注释中保留版本号和兼容性说明(如 `Compatible with OpenWebUI v0.3.x`)。 + +## 4. 发布流程 +1. **提交更改**:`git add . && git commit -m "Update project structure for resource sharing"` +2. **推送到 GitHub**。 +3. **宣传**:在 OpenWebUI 社区分享此仓库链接。 + +--- +*生成时间:2025-12-19* diff --git a/docs/en/plugin_development_guide.md b/docs/en/plugin_development_guide.md new file mode 100644 index 0000000..d2ea013 --- /dev/null +++ b/docs/en/plugin_development_guide.md @@ -0,0 +1,234 @@ +# OpenWebUI Plugin Development Guide + +> This guide consolidates official documentation, SDK details, and best practices to provide a systematic tutorial for developers, from beginner to expert. + +## 📚 Table of Contents + +1. [Quick Start](#1-quick-start) +2. [Core Concepts & SDK Details](#2-core-concepts--sdk-details) +3. [Deep Dive into Plugin Types](#3-deep-dive-into-plugin-types) + * [Action](#31-action) + * [Filter](#32-filter) + * [Pipe](#33-pipe) +4. [Advanced Development Patterns](#4-advanced-development-patterns) +5. [Best Practices & Design Principles](#5-best-practices--design-principles) +6. [Troubleshooting](#6-troubleshooting) + +--- + +## 1. Quick Start + +### 1.1 What are OpenWebUI Plugins? + +OpenWebUI Plugins (officially called "Functions") are the primary way to extend the platform's capabilities. Running in a backend Python environment, they allow you to: +* 🔌 **Integrate New Models**: Connect to Claude, Gemini, or custom RAGs via Pipes. +* 🎨 **Enhance Interaction**: Add buttons (e.g., "Export", "Generate Chart") next to messages via Actions. +* 🔧 **Intervene in Processes**: Modify data before requests or after responses (e.g., inject context, filter sensitive words) via Filters. + +### 1.2 Your First Plugin (Hello World) + +Save the following code as `hello.py` and upload it to the **Functions** panel in OpenWebUI: + +```python +""" +title: Hello World Action +author: Demo +version: 1.0.0 +""" + +from pydantic import BaseModel, Field +from typing import Optional + +class Action: + class Valves(BaseModel): + greeting: str = Field(default="Hello", description="Greeting message") + + def __init__(self): + self.valves = self.Valves() + + async def action( + self, + body: dict, + __event_emitter__=None, + __user__=None + ) -> Optional[dict]: + user_name = __user__.get("name", "Friend") if __user__ else "Friend" + + if __event_emitter__: + await __event_emitter__({ + "type": "notification", + "data": {"type": "success", "content": f"{self.valves.greeting}, {user_name}!"} + }) + return body +``` + +--- + +## 2. Core Concepts & SDK Details + +### 2.1 ⚠️ Important: Sync vs Async + +OpenWebUI plugins run within an `asyncio` event loop. +* **Principle**: All I/O operations (database, file, network) must be non-blocking. +* **Pitfall**: Calling synchronous methods directly (e.g., `time.sleep`, `requests.get`) will freeze the entire server. +* **Solution**: Wrap synchronous calls using `await asyncio.to_thread(sync_func, ...)`. + +### 2.2 Core Parameters + +All plugin methods (`inlet`, `outlet`, `pipe`, `action`) support injecting the following special parameters: + +| Parameter | Type | Description | +| :--- | :--- | :--- | +| `body` | `dict` | **Core Data**. Contains request info like `messages`, `model`, `stream`. | +| `__user__` | `dict` | **Current User**. Contains `id`, `name`, `role`, `valves` (user config), etc. | +| `__metadata__` | `dict` | **Metadata**. Contains `chat_id`, `message_id`. The `variables` field holds preset variables like `{{USER_NAME}}`, `{{CURRENT_TIME}}`. | +| `__request__` | `Request` | **FastAPI Request Object**. Access `app.state` for cross-plugin communication. | +| `__event_emitter__` | `func` | **One-way Notification**. Used to send Toast notifications or status bar updates. | +| `__event_call__` | `func` | **Two-way Interaction**. Used to execute JS code, show confirmation dialogs, or input boxes on the frontend. | + +### 2.3 Configuration System (Valves) + +* **`Valves`**: Global admin configuration. +* **`UserValves`**: User-level configuration (higher priority, overrides global). + +```python +class Filter: + class Valves(BaseModel): + API_KEY: str = Field(default="", description="Global API Key") + + class UserValves(BaseModel): + API_KEY: str = Field(default="", description="User Private API Key") + + def inlet(self, body, __user__): + # Prioritize user's Key + user_valves = __user__.get("valves", self.UserValves()) + api_key = user_valves.API_KEY or self.valves.API_KEY +``` + +--- + +## 3. Deep Dive into Plugin Types + +### 3.1 Action + +**Role**: Adds buttons below messages that trigger upon user click. + +**Advanced Usage: Execute JavaScript on Frontend (File Download Example)** + +```python +import base64 + +async def action(self, body, __event_call__): + # 1. Generate content on backend + content = "Hello OpenWebUI".encode() + b64 = base64.b64encode(content).decode() + + # 2. Send JS to frontend for execution + js = f""" + const blob = new Blob([atob('{b64}')], {{type: 'text/plain'}}); + const a = document.createElement('a'); + a.href = URL.createObjectURL(blob); + a.download = 'hello.txt'; + a.click(); + """ + await __event_call__({"type": "execute", "data": {"code": js}}) +``` + +### 3.2 Filter + +**Role**: Middleware that intercepts and modifies requests/responses. + +* **`inlet`**: Before request. Used for injecting context, modifying model parameters. +* **`outlet`**: After response. Used for formatting output, logging. +* **`stream`**: During streaming. Used for real-time sensitive word filtering. + +**Example: Injecting Environment Variables** + +```python +async def inlet(self, body, __metadata__): + vars = __metadata__.get("variables", {}) + context = f"Current Time: {vars.get('{{CURRENT_DATETIME}}')}" + + # Inject into System Prompt or first message + if body.get("messages"): + body["messages"][0]["content"] += f"\n\n{context}" + return body +``` + +### 3.3 Pipe + +**Role**: Custom Model/Agent. + +**Example: Simple OpenAI Wrapper** + +```python +import requests + +class Pipe: + def pipes(self): + return [{"id": "my-gpt", "name": "My GPT Wrapper"}] + + def pipe(self, body): + # Modify body here, e.g., force add prompt + headers = {"Authorization": f"Bearer {self.valves.API_KEY}"} + r = requests.post("https://api.openai.com/v1/chat/completions", json=body, headers=headers, stream=True) + return r.iter_lines() +``` + +--- + +## 4. Advanced Development Patterns + +### 4.1 Pipe & Filter Collaboration +Use `__request__.app.state` to share data between plugins. +* **Pipe**: `__request__.app.state.search_results = [...]` +* **Filter (Outlet)**: Read `search_results` and format them as citation links appended to the response. + +### 4.2 Async Background Tasks +Execute time-consuming operations (e.g., summarization, database storage) in the background without blocking the user response. + +```python +import asyncio + +async def outlet(self, body, __metadata__): + asyncio.create_task(self.background_job(__metadata__["chat_id"])) + return body + +async def background_job(self, chat_id): + # Execute time-consuming operation... + pass +``` + +--- + +## 5. Best Practices & Design Principles + +### 5.1 Naming & Positioning +* **Short & Punchy**: e.g., "FlashCard", "DeepRead". Avoid generic terms like "Text Analysis Assistant". +* **Complementary**: Don't reinvent the wheel; clarify what specific problem your plugin solves. + +### 5.2 User Experience (UX) +* **Timely Feedback**: Send a `notification` ("Generating...") before time-consuming operations. +* **Visual Appeal**: When Action outputs HTML, use modern CSS (rounded corners, shadows, gradients). +* **Smart Guidance**: If text is too short, prompt the user: "Suggest entering more content for better results". + +### 5.3 Error Handling +Never let a plugin fail silently. Catch exceptions and inform the user via `__event_emitter__`. + +```python +try: + # Business logic +except Exception as e: + await __event_emitter__({ + "type": "notification", + "data": {"type": "error", "content": f"Processing failed: {str(e)}"} + }) +``` + +--- + +## 6. Troubleshooting + +* **HTML not showing?** Ensure it's wrapped in a ` ```html ... ``` ` code block. +* **Database error?** Check if you called synchronous DB methods directly in an `async` function; use `asyncio.to_thread`. +* **Parameters not working?** Check if `Valves` are defined correctly and if they are being overridden by `UserValves`. diff --git a/docs/examples/action_plugin_export_to_excel_example_cn.md b/docs/examples/action_plugin_export_to_excel_example_cn.md new file mode 100644 index 0000000..95f7fe9 --- /dev/null +++ b/docs/examples/action_plugin_export_to_excel_example_cn.md @@ -0,0 +1,117 @@ +# `Export to Excel` 插件深度解析:文件生成与下载实战 + +## 引言 + +`Export to Excel` 是一个非常实用的 `Action` 插件,它能智能地从 AI 的回答中提取 Markdown 表格,将其转换为格式精美的 Excel 文件,并直接在用户的浏览器中触发下载。 + +这个插件是一个绝佳的实战案例,它完整地展示了如何实现一个“数据处理 -> 文件生成 -> 前端交互”的闭环。通过解析它,开发者可以学习到如何在 Open WebUI 插件中利用强大的 Python 数据科学生态(如 `pandas`),以及如何实现将后端生成的文件无缝传递给用户。 + +## 核心工作流 + +该插件的工作流程清晰而高效,可以概括为以下六个步骤: + +1. **解析 (Parse)**: 使用正则表达式从最后一条聊天消息中精准地提取一个或多个 Markdown 表格。 +2. **分析 (Analyze)**: 智能地查找表格上下文中的 Markdown 标题(`#`, `##` 等),并以此为依据生成有意义的 Excel 工作簿及工作表(Sheet)的名称。 +3. **生成 (Generate)**: 将解析出的表格数据转换为 `pandas.DataFrame` 对象,这是进行后续处理的基础。 +4. **格式化与保存 (Format & Save)**: 利用 `pandas` 和 `XlsxWriter` 引擎,在服务器的临时目录中创建一个带有自定义样式(如颜色、对齐、自动列宽)的、符合专业规范的 `.xlsx` 文件。 +5. **传输与下载 (Transfer & Download)**: 将生成的 Excel 文件内容读取为字节流,进行 Base64 编码,然后通过 `__event_call__` 将编码后的字符串和一段 JavaScript 代码发送到前端。JS 代码在浏览器中解码数据、创建 Blob 对象并触发下载。 +6. **清理 (Cleanup)**: 下载触发后,立即删除服务器上的临时 Excel 文件,确保不占用服务器资源。 + +--- + +## 关键开发模式与技术剖析 + +### 1. 纯 Python 数据处理生态的威力 + +与一些需要深度集成 Open WebUI 后端模型的插件不同,`Export to Excel` 的核心功能完全由通用的 Python 库驱动,这展示了 Open WebUI 插件生态的开放性。 + +- **`re` (正则表达式)**: 用于从纯文本消息中稳健地解析出结构化的表格数据。 +- **`pandas`**: Python 数据分析的事实标准。插件用它来将原始的列表数据转换为强大的 DataFrame,为写入 Excel 提供了极大的便利。 +- **`xlsxwriter`**: 一个与 `pandas` 无缝集成的库,用于创建具有丰富格式的 Excel 文件,远比 `pandas` 默认的引擎功能更强大。 + +**启示**: 开发者可以将庞大而成熟的 Python 第三方库生态无缝地引入到 Open WebUI 插件中,以实现各种复杂的功能。 + +### 2. 智能文本上下文分析 + +一个优秀的插件不仅应完成任务,还应尽可能“智能”地理解用户意图。该插件的 `generate_names_from_content` 方法就是一个很好的例子。 + +- **目标**: 避免生成如 `output.xlsx` 或 `Sheet1` 这样无意义的文件/工作表名。 +- **实现**: + 1. 首先,遍历消息内容,找出所有的 Markdown 标题(`#` 到 `######`)及其所在的行号。 + 2. 对于每一个提取出的表格,在所有位于其上方的标题中,选择**行号最大**(即距离最近)的一个作为该表格的名称。 + 3. 如果只有一个表格,则直接使用其名称作为工作簿的名称。 + 4. 如果有多个表格,则使用整篇消息中的**第一个标题**作为工作簿的名称。 + 5. 如果找不到任何标题,则优雅地回退到默认命名方案(如 `用户_20231026.xlsx`)。 + +**启示**: 通过对上下文(而不只是目标数据本身)的简单分析,可以极大地提升插件的用户体验。 + +### 3. 高质量文件生成 (`pandas` + `xlsxwriter`) + +简单地调用 `df.to_excel()` 只能生成一个“能用”的文件。而此插件通过 `apply_chinese_standard_formatting` 方法展示了如何生成一个“专业”的文件。 + +- **引擎选择**: `pd.ExcelWriter(file_path, engine="xlsxwriter")` 是关键,它允许我们访问底层的 `workbook` 和 `worksheet` 对象。 +- **核心格式化技术**: + - **自定义单元格样式**: 通过 `workbook.add_format()` 创建多种样式(如表头、文本、数字、日期),并分别定义字体、颜色、边框、对齐方式等。 + - **智能内容对齐**: 遵循标准的制表规范,实现了“文本左对齐、数值右对齐、标题/日期/序号居中对齐”。 + - **中文字符感知列宽**: `calculate_text_width` 方法在计算内容宽度时,将中文字符(及标点)的宽度视为英文字符的两倍,确保了自动调整列宽 (`worksheet.set_column`) 对中文内容同样有效,避免了文字溢出。 + - **动态行高**: `calculate_text_height` 方法会根据单元格内容的换行符和折行情况计算所需行数,并以此为依据设置行高 (`worksheet.set_row`),确保了包含长文本的单元格也能完整显示。 + +**启示**: 魔鬼在细节中。对生成文件的精细格式化是区分“玩具”和“工具”的重要标准。 + +### 4. 后端文件生成与下载的标准模式 + +如何在 `Action` 插件中安全、高效地让用户下载后端生成的文件?`export_to_excel` 展示了目前**最佳的、也是标准的实现模式**。 + +**流程详解**: + +1. **在服务器临时位置创建文件**: + ```python + filename = f"{workbook_name}.xlsx" + excel_file_path = os.path.join("app", "backend", "data", "temp", filename) + # ... 使用 pandas 保存文件到 excel_file_path ... + ``` +2. **将文件读入内存并编码**: + ```python + with open(excel_file_path, "rb") as file: + file_content = file.read() + base64_blob = base64.b64encode(file_content).decode("utf-8") + ``` +3. **通过 `__event_call__` 发送数据和下载指令**: + - 将 Base64 字符串和文件名嵌入一段预设的 JavaScript 代码中。 + - 这段 JS 的作用是在浏览器端解码 Base64、创建文件 Blob、生成一个隐藏的下载链接 (`` 标签),然后模拟用户点击该链接。 + + ```python + js_code = f""" + const base64Data = "{base64_blob}"; + // ... JS 解码并创建下载链接的代码 ... + a.download = "{filename}"; + a.click(); + """ + await __event_call__({"type": "execute", "data": {"code": js_code}}) + ``` +4. **立即清理临时文件**: + ```python + if os.path.exists(excel_file_path): + os.remove(excel_file_path) + ``` + +**模式优势**: +- **安全**: 不会暴露服务器的任何文件路径或创建公共的下载 URL。 +- **无状态**: 服务器上不保留任何用户生成的文件,请求结束后立即清理,节约了存储空间。 +- **体验好**: 对用户来说,点击按钮后直接弹出浏览器下载框,体验非常流畅。 + +### 5. 优雅的错误处理 + +插件的 `action` 方法被一个完整的 `try...except` 块包裹。 +- 当 `extract_tables_from_message` 找不到表格时,它会主动抛出 `HTTPException`。 +- 在 `except` 块中,插件会通过 `__event_emitter__` 向前端发送一个内容为“没有找到可以导出的表格!”的错误通知 (`notification`),并更新状态栏 (`status`),清晰地告知用户发生了什么。 + +**启示**: 任何可能失败的操作都应被捕获,并向用户提供清晰、友好的错误反馈。 + +## 总结 + +`Export to Excel` 插件是一个将数据处理与前端交互完美结合的典范。通过学习它,我们可以掌握: +- 如何利用 `pandas` 和 `xlsxwriter` 等库在后端生成专业品质的二进制文件。 +- 如何通过 `__event_call__` 这一强大的机制,实现从后端到前端的文件传输和下载触发。 +- 服务器临时文件的创建、使用和清理这一完整的、安全的生命周期管理模式。 +- 如何通过解析上下文来提升插件的“智能化”和用户体验。 diff --git a/docs/examples/action_plugin_smart_mind_map_example_cn.md b/docs/examples/action_plugin_smart_mind_map_example_cn.md new file mode 100644 index 0000000..eb99c7a --- /dev/null +++ b/docs/examples/action_plugin_smart_mind_map_example_cn.md @@ -0,0 +1,291 @@ +# Open WebUI Action 插件开发范例:智绘心图 + +## 引言 + +“智绘心图” (`smart-mind-map`) 是一个功能强大的 Open WebUI Action 插件。它通过分析用户提供的文本,利用大语言模型(LLM)提取关键信息,并最终生成一个可交互的、可视化的思维导图。本文档将深入解析其源码 (`思维导图.py`),提炼其中蕴含的插件开发知识与最佳实践,为开发者提供一个高质量的参考范例。 + +## 核心开发知识点 + +- **插件元数据定义**: 如何通过文件头注释定义插件的标题、图标、版本和描述。 +- **可配置参数 (`Valves`)**: 如何为插件提供灵活的配置选项。 +- **异步 `action` 方法**: 插件主入口的实现方式及其核心参数的使用。 +- **实时前端交互 (`EventEmitter`)**: 如何向用户发送实时状态更新和通知。 +- **与 LLM 交互**: 如何构建动态 Prompt、调用内置 LLM 服务并处理返回结果。 +- **富文本 (HTML/JS) 输出**: 如何生成包含复杂前端逻辑的 HTML 内容,并将其嵌入聊天响应中。 +- **健壮性设计**: 如何实现输入验证、全面的错误处理和日志记录。 +- **访问 Open WebUI 核心模型**: 如何与 Open WebUI 的数据模型(如 `Users`)交互。 + +--- + +### 1. 插件元数据定义 + +Open WebUI 通过文件顶部的特定格式注释来识别和展示插件信息。 + +**代码示例 (`思维导图.py`):** +```python +""" +title: 智绘心图 +icon_url: data:image/svg+xml;base64,PHN2ZyB4bWxucz0iaHR0cDovL3d3dy53My5vcmcvMjAwMC9zdmciIHdpZHRoPSIyNCIgaGVpZ2h0PSIyNCIgdmlld0JveD0iMCAwIDI0IDI0IiBmaWxsPSJub25lIiBzdHJva2U9ImN1cnJlbnRDb2xvciIgc3Ryb2tlLXdpZHRoPSIxLjUiIHN0cm9rZS1saW5lY2FwPSJyb3VuZCIgc3Ryb2tlLWxpbmVqb2luPSJyb3VuZCI+CiAgPGNpcmNsZSBjeD0iMTIiIGN5PSIxMiIgcj0iMyIgZmlsbD0iY3VycmVudENvbG9yIi8+CiAgPGxpbmUgeDE9IjEyIiB5MT0iOSIgeDI9IjEyIiB5Mj0iNCIvPgogIDxjaXJjbGUgY3g9IjEyIiBjeT0iMyIgcj0iMS41Ii8+CiAgPGxpbmUgeDE9IjEyIiB5MT0iMTUiIHgyPSIxMiIgeTI9IjIwIi8+CiAgPGNpcmNsZSBjeD0iMTIiIGN5PSIyMSIgcj0iMS41Ii8+CiAgPGxpbmUgeDE9IjkiIHkxPSIxMiIgeDI9IjQiIHkyPSIxMiIvPgogIDxjaXJjbGUgY3g9IjMiIGN5PSIxMiIgcj0iMS41Ii8+CiAgPGxpbmUgeDE9IjE1IiB5MT0iMTIiIHgyPSIyMCIgeTI9IjEyIi8+CiAgPGNpcmNsZSBjeD0iMjEiIGN5PSIxMiIgcj0iMS41Ii8+CiAgPGxpbmUgeDE9IjEwLjUiIHkxPSྡ1LjUiIHgyPSI2IiB5Mj0iNiIvPgogIDxjaXJjbGUgY3g9IjUiIGN5PSI1Iigcj0iMS41Ii8+CiAgPGxpbmUgeDE9IjEzLjUiIHkxPSྡ5LjUgeDI9IjE1IiB5Mj0iNiIvPgogIDxjaXJjbGUgY3g9IjE5IiBjeT0iNSIgcj0iMS41Ii8+CiAgPGxpbmUgeDE9ྡ1LjUgeTE9ྡ3MuNSB4Mj0iNiIgeTI9IjE4Ii8+CiAgPGNpcmNsZSBjeD0iNSIgY3k9IjE5IiByPSྡ1LjUiLz4KICA8bGluZSB4MT0ྡzIuNSB5MT0ྡzIuNSB4Mj0iNSIgeTI9IjE4Ii8+CiAgPGNpcmNsZSBjeD0ྡ5IiBjeT0ྡ5IiByPSྡ1LjUiLz4KPC9zdmc+Cg== +version: 0.7.2 +description: 智能分析文本内容,生成交互式思维导图,帮助用户结构化和可视化知识。 +""" +``` +**知识点**: +- `title`: 插件在 UI 中显示的名称。 +- `icon_url`: 插件的图标,支持 base64 编码的 SVG,以实现无依赖的矢量图标。 +- `version`: 插件的版本号。 +- `description`: 插件的功能简介。 + +--- + +### 2. 可配置参数 (`Valves`) + +通过在 `Action` 类内部定义一个 `Valves` Pydantic 模型,可以为插件创建可在 Web UI 中配置的参数。 + +**代码示例 (`思维导图.py`):** +```python +class Action: + class Valves(BaseModel): + show_status: bool = Field( + default=True, description="是否在聊天界面显示操作状态更新。" + ) + LLM_MODEL_ID: str = Field( + default="gemini-2.5-flash", + description="用于文本分析的内置LLM模型ID。", + ) + MIN_TEXT_LENGTH: int = Field( + default=100, description="进行思维导图分析所需的最小文本长度(字符数)。" + ) + + def __init__(self): + self.valves = self.Valves() +``` +**知识点**: +- `Valves` 类继承自 `pydantic.BaseModel`。 +- 每个字段都是一个配置项,`default` 是默认值,`description` 会在 UI 中作为提示信息显示。 +- 在 `__init__` 中实例化 `self.valves`,之后可以通过 `self.valves.PARAMETER_NAME` 来访问配置值。 + +--- + +### 3. 异步 `action` 方法 + +`action` 方法是插件的执行入口,它是一个异步函数,接收 Open WebUI 传入的上下文信息。 + +**代码示例 (`思维导图.py`):** +```python + async def action( + self, + body: dict, + __user__: Optional[Dict[str, Any]] = None, + __event_emitter__: Optional[Any] = None, + __request__: Optional[Request] = None, + ) -> Optional[dict]: + # ... 插件逻辑 ... + return body +``` +**知识点**: +- `body`: 包含当前聊天上下文的字典,最重要的是 `body.get("messages")`,它包含了完整的消息历史。 +- `__user__`: 包含当前用户信息的字典,如 `id`, `name`, `language` 等。插件中演示了如何兼容其为 `dict` 或 `list` 的情况。 +- `__event_emitter__`: 一个可调用的异步函数,用于向前端发送事件,是实现实时反馈的关键。 +- `__request__`: FastAPI 的 `Request` 对象,用于访问底层请求信息,例如在调用 `generate_chat_completion` 时需要传递。 +- **返回值**: `action` 方法需要返回修改后的 `body` 字典,其中包含了插件生成的响应。 + +--- + +### 4. 实时前端交互 (`EventEmitter`) + +使用 `__event_emitter__` 可以极大地提升用户体验,让用户了解插件的执行进度。 + +**代码示例 (`思维导图.py`):** +```python +# 发送通知 (Toast) +await __event_emitter__( + { + "type": "notification", + "data": { + "type": "info", # 'info', 'success', 'warning', 'error' + "content": "智绘心图已启动,正在为您生成思维导图...", + }, + } +) + +# 发送状态更新 (Status Bar) +await __event_emitter__( + { + "type": "status", + "data": { + "description": "智绘心图: 深入分析文本结构...", + "done": False, # False 表示进行中 + "hidden": False, + }, + } +) + +# 任务完成 +await __event_emitter__( + { + "type": "status", + "data": { + "description": "智绘心图: 绘制完成!", + "done": True, # True 表示已完成 + "hidden": False, # True 可以让成功状态自动隐藏 + }, + } +) +``` +**知识点**: +- **通知 (`notification`)**: 在屏幕角落弹出短暂的提示信息,适合用于触发、成功或失败的即时反馈。 +- **状态 (`status`)**: 在聊天输入框上方显示一个持久的状态条,适合展示多步骤任务的当前进度。`done: True` 会标记任务完成。 + +--- + +### 5. 与 LLM 交互 + +插件的核心功能通常依赖于 LLM。`智绘心图` 演示了如何构建一个结构化的 Prompt 并调用 LLM。 + +**代码示例 (`思维导图.py`):** +```python +# 1. 构建动态 Prompt +SYSTEM_PROMPT_MINDMAP_ASSISTANT = "..." # 系统指令 +USER_PROMPT_GENERATE_MINDMAP = "..." # 用户指令模板 + +formatted_user_prompt = USER_PROMPT_GENERATE_MINDMAP.format( + user_name=user_name, + # ... 注入其他上下文信息 + long_text_content=long_text_content, +) + +# 2. 准备 LLM 请求体 +llm_payload = { + "model": self.valves.LLM_MODEL_ID, + "messages": [ + {"role": "system", "content": SYSTEM_PROMPT_MINDMAP_ASSISTANT}, + {"role": "user", "content": formatted_user_prompt}, + ], + "temperature": 0.5, + "stream": False, +} + +# 3. 获取用户对象并调用 LLM +from open_webui.utils.chat import generate_chat_completion +from open_webui.models.users import Users + +user_obj = Users.get_user_by_id(user_id) +llm_response = await generate_chat_completion( + __request__, llm_payload, user_obj +) + +# 4. 处理响应 +assistant_response_content = llm_response["choices"][0]["message"]["content"] +markdown_syntax = self._extract_markdown_syntax(assistant_response_content) +``` +**知识点**: +- **Prompt 工程**: 将系统指令和用户指令分离。在用户指令中动态注入上下文信息(如用户名、时间、语言),可以使 LLM 的输出更具个性化和准确性。 +- **调用工具**: 使用 `open_webui.utils.chat.generate_chat_completion` 是与 Open WebUI 内置 LLM 服务交互的标准方式。 +- **用户上下文**: 调用 `generate_chat_completion` 需要传递 `user_obj`,这可能用于权限控制、计费或模型特定的用户标识。通过 `open_webui.models.users.Users.get_user_by_id` 获取该对象。 +- **响应解析**: LLM 的响应需要被解析。该插件使用正则表达式从返回的文本中提取核心的 Markdown 内容,并提供了回退机制。 + +--- + +### 6. 富文本 (HTML/JS) 输出 + +Action 插件的一大亮点是能够生成 HTML,从而在聊天界面中渲染丰富的交互式内容。 + +**代码示例 (`思维导图.py`):** +```python +# 1. 定义 HTML 模板 +HTML_TEMPLATE_MINDMAP = """ + + + + + + + + + +
+ + + + +""" + +# 2. 注入动态内容 +final_html_content = + HTML_TEMPLATE_MINDMAP.replace("{unique_id}", unique_id) + .replace("{markdown_syntax}", markdown_syntax) + # ... 替换其他占位符 + +# 3. 嵌入到聊天响应中 +html_embed_tag = f"```html\n{final_html_content}\n```" +body["messages"][-1]["content"] = f"{long_text_content}\n\n{html_embed_tag}" +``` +**知识点**: +- **HTML 模板**: 将静态 HTML/CSS/JS 代码定义为模板字符串,使用占位符(如 `{unique_id}`)来注入动态数据。 +- **嵌入 JS**: 可以在 HTML 中直接嵌入 JavaScript 代码,用于处理前端交互逻辑,如渲染图表、绑定按钮事件等。`智绘心图` 的 JS 代码负责调用 Markmap.js 库来渲染思维导图,并实现了“复制 SVG”和“复制 Markdown”的按钮功能。 +- **唯一 ID**: 使用 `unique_id` 是一个好习惯,可以防止在同一页面上多次使用该插件时发生 DOM 元素 ID 冲突。 +- **响应格式**: 最终的 HTML 内容需要被包裹在 ````html\n...\n```` 代码块中,Open WebUI 的前端会自动识别并渲染它。 +- **内容追加**: 插件将生成的 HTML 追加到原始用户输入之后,而不是替换它,保留了上下文。 + +--- + +### 7. 健壮性设计 + +一个生产级的插件必须具备良好的健壮性。 + +**代码示例 (`思维导图.py`):** +```python +# 输入验证 +if len(long_text_content) < self.valves.MIN_TEXT_LENGTH: + # ... 返回警告信息 ... + return {"messages": [...]} + +# 完整的异常捕获 +try: + # ... 核心逻辑 ... +except Exception as e: + error_message = f"智绘心图处理失败: {str(e)}" + logger.error(f"智绘心图错误: {error_message}", exc_info=True) + + # 向前端发送错误通知 + if __event_emitter__: + await __event_emitter__(...) + + # 在聊天中显示错误信息 + body["messages"][-1]["content"] = f"❌ **错误:** {user_facing_error}" + +# 日志记录 +import logging +logger = logging.getLogger(__name__) +logger.info("Action started") +logger.error("Error occurred", exc_info=True) +``` +**知识点**: +- **输入验证**: 在执行核心逻辑前,对输入(如文本长度)进行检查,可以避免不必要的资源消耗和潜在错误。 +- **`try...except` 块**: 将主要逻辑包裹在 `try` 块中,并捕获 `Exception`,确保任何意外失败都能被优雅地处理。 +- **用户友好的错误反馈**: 在 `except` 块中,不仅要记录详细的错误日志(`logger.error`),还要通过 `EventEmitter` 和聊天消息向用户提供清晰、可操作的错误提示。 +- **日志**: 使用 `logging` 模块记录关键步骤和错误信息,是调试和监控插件运行状态的重要手段。`exc_info=True` 会记录完整的堆栈跟踪。 + +--- + +### 总结 + +`智绘心图` 插件是一个优秀的 Open WebUI Action 开发学习案例。它全面展示了如何利用 Action 插件的各项功能,构建一个交互性强、用户体验好、功能完整且健壮的 AI 应用。 + +**最佳实践总结**: +- **明确元数据**: 为你的插件提供清晰的 `title`, `icon`, `description`。 +- **提供配置**: 使用 `Valves` 让插件更灵活。 +- **善用反馈**: 积极使用 `EventEmitter` 提供实时状态和通知。 +- **结构化 Prompt**: 精心设计的 Prompt 是高质量输出的保证。 +- **拥抱富文本**: 利用 HTML 和 JS 创造丰富的交互体验。 +- **防御性编程**: 始终考虑输入验证和错误处理。 +- **详细日志**: 记录日志是排查问题的关键。 + +通过学习和借鉴`智绘心图`的设计模式,开发者可以更高效地构建出属于自己的高质量 Open WebUI 插件。 diff --git a/docs/examples/filter_plugin_async_context_compression_example_cn.md b/docs/examples/filter_plugin_async_context_compression_example_cn.md new file mode 100644 index 0000000..e64d6d9 --- /dev/null +++ b/docs/examples/filter_plugin_async_context_compression_example_cn.md @@ -0,0 +1,235 @@ +# Open WebUI Filter 插件开发范例:异步上下文压缩 + +## 引言 + +“异步上下文压缩” (`async-context-compression`) 是一个功能先进的 Open WebUI `Filter` 插件。它旨在通过在后台异步地对长对话历史进行智能摘要,来显著减少发送给大语言模型(LLM)的 Token 数量,从而在节约成本的同时保持对话的连贯性。 + +本文档将深入剖析其源码,提炼其作为高级 `Filter` 插件所展示的设计模式与开发技巧,特别是关于**异步处理**、**数据库集成**和**复杂消息流控制**等方面。 + +## 核心开发知识点 + +- **Filter 插件结构 (`inlet` / `outlet`)**: 掌握过滤器在请求生命周期中的两个核心切入点。 +- **异步后台任务**: 如何使用 `asyncio.create_task` 执行耗时操作而不阻塞用户响应。 +- **数据库持久化**: 如何使用 SQLAlchemy 与数据库(PostgreSQL/SQLite)集成,实现数据的持久化存储。 +- **高级 `Valves` 配置**: 如何使用 Pydantic 的 `@model_validator` 实现复杂的跨字段配置验证。 +- **复杂消息体处理**: 如何安全地操作和修改包含多模态内容的消息结构。 +- **从插件内部调用 LLM**: 在插件中调用 LLM 服务以实现“插件调用插件”的元功能。 +- **环境变量依赖与初始化**: 如何处理对外部环境变量的依赖,并在插件初始化时进行安全配置。 + +--- + +### 1. Filter 插件结构 (`inlet` / `outlet`) + +`Filter` 插件通过 `inlet` 和 `outlet` 两个方法,在请求发送给 LLM **之前**和 LLM 响应返回 **之后**对消息进行处理。 + +- `inlet(self, body: dict, ...)`: 在请求发送前执行。此插件用它来检查是否存在历史摘要,如果存在,则用摘要替换部分历史消息,从而“压缩”上下文。 +- `outlet(self, body: dict, ...)`: 在收到 LLM 响应后执行。此插件用它来判断对话是否达到了需要生成摘要的长度阈值,如果是,则触发一个后台任务来生成新的摘要,以供**下一次**对话使用。 + +这种“读旧,写新”的异步策略是该插件的核心设计。 + +**代码示例 (`async_context_compression.py`):** +```python +class Filter: + def inlet(self, body: dict, ...) -> dict: + """ + 在发送到 LLM 之前执行。 + 应用已有的摘要来压缩本次请求的上下文。 + """ + # 1. 从数据库加载已保存的摘要 + saved_summary = self._load_summary(chat_id, body) + + # 2. 如果摘要存在且消息足够长 + if saved_summary and len(messages) > total_kept_count: + # 3. 替换中间的消息为摘要 + body["messages"] = compressed_messages + + return body + + async def outlet(self, body: dict, ...) -> dict: + """ + 在 LLM 响应完成后执行。 + 检查是否需要为下一次请求生成新的摘要。 + """ + # 1. 检查消息总数是否达到阈值 + if len(messages) >= self.valves.compression_threshold: + # 2. 创建一个异步后台任务来生成摘要,不阻塞当前响应 + asyncio.create_task( + self._generate_summary_async(...) + ) + + return body +``` +**知识点**: +- `inlet` 和 `outlet` 分别作用于请求流的不同阶段,实现了功能的解耦。 +- `inlet` 负责**消费**摘要,`outlet` 负责**生产**摘要,两者通过数据库解耦。 + +--- + +### 2. 异步后台任务 + +对于耗时操作(如调用 LLM 生成摘要),为了不让用户等待,必须采用异步后台处理。这是高级插件必备的技巧。 + +**代码示例 (`async_context_compression.py`):** +```python +# 在 outlet 方法中 +async def outlet(self, ...): + if len(messages) >= self.valves.compression_threshold: + # 核心:创建一个后台任务,并立即返回,不等待其完成 + asyncio.create_task( + self._generate_summary_async(messages, chat_id, body, __user__) + ) + return body + +# 后台任务的具体实现 +async def _generate_summary_async(self, ...): + """ + 在后台异步生成摘要。 + """ + try: + # 1. 提取需要被摘要的消息 + messages_to_summarize = ... + + # 2. 将消息格式化为纯文本 + conversation_text = self._format_messages_for_summary(messages_to_summarize) + + # 3. 调用 LLM 生成摘要 + summary = await self._call_summary_llm(conversation_text, body, user_data) + + # 4. 将新摘要存入数据库 + self._save_summary(chat_id, summary, body) + except Exception as e: + # 错误处理 + ... +``` +**知识点**: +- `asyncio.create_task()`: 这是实现“即发即忘”(fire-and-forget)模式的关键。它将一个协程(`_generate_summary_async`)提交到事件循环中运行,而当前函数(`outlet`)可以继续执行并立即返回,从而确保了前端的快速响应。 +- **健壮性**: 后台任务必须有自己独立的 `try...except` 块,以防止其内部的失败影响到主程序的稳定性。 + +--- + +### 3. 数据库持久化 (SQLAlchemy) + +为了在不同对话回合乃至服务重启后都能保留摘要,插件集成了数据库。 + +**代码示例 (`async_context_compression.py`):** +```python +# 1. 依赖环境变量 +database_url = os.getenv("DATABASE_URL") + +# 2. 定义数据模型 +from sqlalchemy.ext.declarative import declarative_base +Base = declarative_base() + +class ChatSummary(Base): + __tablename__ = "chat_summary" + id = Column(Integer, primary_key=True) + chat_id = Column(String(255), unique=True, index=True) + summary = Column(Text) + # ... 其他字段 + +# 3. 初始化数据库连接 +def _init_database(self): + database_url = os.getenv("DATABASE_URL") + if not database_url: + # ... 错误处理 ... + return + + # 根据 URL 前缀选择驱动 (PostgreSQL/SQLite) + if database_url.startswith("sqlite"): ... + elif database_url.startswith("postgres"): ... + + self._db_engine = create_engine(database_url, ...) + self._SessionLocal = sessionmaker(bind=self._db_engine) + Base.metadata.create_all(bind=self._db_engine) # 自动建表 + +# 4. 封装 CRUD 操作 +def _save_summary(self, chat_id: str, summary: str, body: dict): + session = self._SessionLocal() + try: + # ... 查询、更新或创建记录 ... + session.commit() + finally: + session.close() +``` +**知识点**: +- **配置驱动**: 插件依赖 `DATABASE_URL` 环境变量,并在 `_init_database` 中进行解析,实现了对不同数据库(PostgreSQL, SQLite)的兼容。 +- **ORM 模型**: 使用 SQLAlchemy 的声明式基类定义 `ChatSummary` 表结构,使数据库操作对象化,更易于维护。 +- **自动建表**: `Base.metadata.create_all()` 会在插件首次运行时自动检查并创建不存在的表,简化了部署。 +- **会话管理**: 使用 `sessionmaker` 创建会话,并通过 `try...finally` 确保会话在使用后被正确关闭,这是管理数据库连接的标准实践。 + +--- + +### 4. 高级 `Valves` 配置 + +除了简单的默认值,`Valves` 还可以通过 Pydantic 的验证器实现更复杂的逻辑。 + +**代码示例 (`async_context_compression.py`):** +```python +from pydantic import model_validator + +class Valves(BaseModel): + compression_threshold: int = Field(...) + keep_first: int = Field(...) + keep_last: int = Field(...) + # ... 其他配置 + + @model_validator(mode="after") + def check_thresholds(self) -> "Valves": + kept_count = self.keep_first + self.keep_last + if self.compression_threshold <= kept_count: + raise ValueError( + f"compression_threshold ({self.compression_threshold}) 必须大于 " + f"keep_first 和 keep_last 的总和。" + ) + return self +``` +**知识点**: +- `@model_validator(mode="after")`: 这个装饰器允许你在所有字段都已赋值**之后**,执行一个自定义的验证函数。 +- **跨字段验证**: 该插件用它来确保 `compression_threshold` 必须大于 `keep_first` 和 `keep_last` 之和,保证了插件逻辑的正确性,避免了无效配置。 + +--- + +### 5. 复杂消息体处理 + +Open WebUI 的消息体 `content` 可能是简单的字符串,也可能是用于多模态的列表。插件必须能稳健地处理这两种情况。 + +**代码示例 (`async_context_compression.py`):** +```python +def _inject_summary_to_first_message(self, message: dict, summary: str) -> dict: + content = message.get("content", "") + summary_block = f"【历史对话摘要】\n{summary}\n..." + + if isinstance(content, list): # 多模态内容 + new_content = [] + summary_inserted = False + for part in content: + if part.get("type") == "text" and not summary_inserted: + # 将摘要追加到第一个文本部分的前面 + new_content.append({"type": "text", "text": summary_block + part.get("text", "")}) + summary_inserted = True + else: + new_content.append(part) + message["content"] = new_content + elif isinstance(content, str): # 纯文本 + message["content"] = summary_block + content + + return message +``` +**知识点**: +- **类型检查**: 通过 `isinstance(content, list)` 判断消息是否为多模态类型。 +- **安全注入**: 在处理多模态列表时,代码会遍历所有 `part`,找到第一个文本部分进行注入,同时保持其他部分(如图片)不变。这确保了插件的兼容性和稳定性。 + +--- + +### 总结 + +`异步上下文压缩` 插件是学习如何构建生产级 Open WebUI `Filter` 的绝佳案例。它不仅展示了 `Filter` 的基本用法,更深入地探讨了在 Web 服务中至关重要的**异步处理**和**持久化存储**。 + +**高级实践总结**: +- **分离读写**: 利用 `inlet` 和 `outlet` 的生命周期,结合数据库,实现异步的“读写分离”模式。 +- **非阻塞设计**: 通过 `asyncio.create_task` 将耗时操作移出主请求/响应循环,保证用户体验的流畅性。 +- **外部依赖管理**: 优雅地处理对环境变量和数据库的依赖,并在初始化时提供清晰的日志和错误提示。 +- **健壮配置**: 利用模型验证器 (`@model_validator`) 防止用户设置出不符合逻辑的参数。 +- **兼容性处理**: 在操作消息体时,充分考虑多模态等复杂数据结构,确保插件的广泛适用性。 + +通过研究此插件,开发者可以掌握构建需要与外部服务(如数据库)交互、执行复杂后台任务的高级 `Filter` 的核心技能。 diff --git a/docs/examples/filter_plugin_gemini_manifold_companion_example_cn.md b/docs/examples/filter_plugin_gemini_manifold_companion_example_cn.md new file mode 100644 index 0000000..7198d85 --- /dev/null +++ b/docs/examples/filter_plugin_gemini_manifold_companion_example_cn.md @@ -0,0 +1,163 @@ +# `Gemini Manifold Companion` 深度解析:高级 `Filter` 与 `Pipe` 协同开发 + +## 引言 + +`Gemini Manifold Companion` 是一个 `Filter` 插件,但它的设计目标并非独立运作,而是作为 `Gemini Manifold` 这个 `Pipe` 插件的“伴侣”或“增强包”。它通过在请求到达 `Pipe` 之前和响应返回给用户之后进行一系列精巧的操作,解锁了许多 Open WebUI 原生界面不支持的、`Pipe` 专属的强大功能(如 Google Search, Code Execution 等)。 + +本文档将深度解析这个“伴侣插件”的设计模式,重点阐述其如何通过**拦截与翻译**、**跨阶段通信**和**异步 I/O** 等高级技巧,实现与 `Pipe` 插件的完美协同。 + +## 核心工作流:拦截与翻译 (Hijack and Translate) + +`Companion` 插件的核心价值体现在其 `inlet` 方法中。它像一个智能的“请求路由器”,在不修改 Open WebUI 前端代码的情况下,将前端的通用功能开关“翻译”成 `Pipe` 插件能理解的专属指令。 + +**目标**: 拦截前端通用的功能请求(如“网络搜索”),阻止 Open WebUI 的默认行为,并将其转换为 `Pipe` 插件的专属功能标记。 + +#### 实现步骤 (`inlet` 方法): + +1. **识别目标 `Pipe`**: 过滤器首先会检查当前请求是否发往它需要辅助的 `gemini_manifold`。如果不是,则直接返回,不做任何操作。这是伴侣插件模式的基础。 + + ```python + # _get_model_name 会判断当前模型是否由 gemini_manifold 提供 + canonical_model_name, is_manifold = self._get_model_name(body) + if not is_manifold: + return body # 不是目标,直接放行 + ``` + +2. **拦截功能开关**: 插件检查前端请求的 `body["features"]` 中,`web_search` 是否为 `True`。 + +3. **执行“拦截与翻译”**: + - **拦截 (Hijack)**: 如果 `web_search` 为 `True`,插件会立即将其改回 `False`。这一步至关重要,它阻止了 Open WebUI 触发其内置的、通用的 RAG 或网页搜索流程。 + ```python + features["web_search"] = False + ``` + - **翻译 (Translate)**: 紧接着,插件会在一个更深层的、用于插件间通信的 `metadata` 字典中,添加一个**自定义的**、`Pipe` 插件能识别的标志。 + ```python + # metadata["features"] 是一个专为插件间通信设计的字典 + metadata_features["google_search_tool"] = True + ``` + +4. **传递其他指令**: 除了功能开关,`Companion` 还会做一些其他的预处理,例如: + - **绕过 RAG**: 如果用户开启了 `BYPASS_BACKEND_RAG`,它会清空 `body["files"]` 数组,并设置 `metadata_features["upload_documents"] = True`,告知 `Pipe` 插件“文件由你来处理”。 + - **强制流式**: `Pipe` 插件通常返回 `AsyncGenerator`,需要前端以流式模式处理。`Companion` 会强制设置 `body["stream"] = True`,同时将用户原始的流式/非流式选择保存在 `metadata` 中,供 `Pipe` 后续判断。 + +**设计模式的价值**: 这种模式实现了极高的解耦。前端只需使用标准的功能开关,而 `Pipe` 插件可以定义任意复杂的、私有的功能集。`Companion` 过滤器则充当了两者之间的智能适配器,使得在不改动核心代码的情况下,扩展后端功能成为可能。 + +--- + +## 高级技巧 1: `Pipe` -> `Filter` 的跨阶段通信 + +**问题**: `Pipe` 在处理过程中生成了重要数据(如包含搜索结果的 `grounding_metadata`),但 `Filter` 的 `outlet` 方法在 `Pipe` 执行**之后**才运行。如何将数据从 `Pipe` 安全地传递给 `Filter`? + +**解决方案**: `request.app.state`,一个在单次 HTTP 请求生命周期内持续存在的共享状态对象。 + +#### 实现流程: + +1. **`Pipe` 插件中 (数据写入)**: + - 在 `gemini_manifold.py` 的 `_do_post_processing` 阶段(响应流结束后),`Pipe` 会从 Google API 的响应中提取 `grounding_metadata`。 + - 然后,它使用 `setattr` 将这些数据动态地附加到 `request.app.state` 对象上,使用一个包含 `chat_id` 和 `message_id` 的唯一键。 + + ```python + # 在 gemini_manifold.py 中 (示意代码) + def _do_post_processing(self, ..., __request__: Request): + app_state = __request__.app.state + grounding_key = f"grounding_{chat_id}_{message_id}" + + # 将数据存入请求状态 + setattr(app_state, grounding_key, grounding_metadata) + ``` + +2. **`Companion Filter` 中 (数据读取)**: + - 在 `outlet` 方法中,`Filter` 可以访问同一个 `__request__` 对象。 + - 它使用 `getattr` 和相同的唯一键,从 `request.app.state` 中安全地取出 `Pipe` 之前存入的数据。 + + ```python + # 在 gemini_manifold_companion.py 的 outlet 方法中 + async def outlet(self, ..., __request__: Request, ...): + app_state = __request__.app.state + grounding_key = f"grounding_{chat_id}_{message_id}" + + # 从请求状态中读取数据 + stored_metadata = getattr(app_state, grounding_key, None) + + if stored_metadata: + # 成功获取 Pipe 传来的数据,进行后续处理 + # (如注入引用标记、解析 URL 等) + ... + + # 清理状态,避免内存泄漏 + delattr(app_state, grounding_key) + ``` + +**设计模式的价值**: `request.app.state` 充当了在同一次请求处理链中、不同插件(特别是 `Pipe` 和 `Filter`)之间传递复杂数据的“秘密信道”,是实现高级协同功能的关键。 + +--- + +## 高级技巧 2: 在 `outlet` 中执行异步 I/O + +**问题**: `grounding_metadata` 中的搜索结果 URL 是 Google 的重定向链接,需要通过网络请求解析成最终的真实网址才能展示给用户。如果在 `outlet` 中同步执行这些请求,会阻塞整个响应流程。 + +**解决方案**: 利用 `outlet` 是 `async` 函数的特性,执行并发的异步网络请求。 + +#### 实现流程 (`_resolve_and_emit_sources` 方法): + +1. **收集任务**: 从 `grounding_metadata` 中提取所有需要解析的 URL。 +2. **创建会话**: 使用 `aiohttp.ClientSession` 创建一个异步 HTTP 客户端会话。 +3. **并发执行**: + - 为每个 URL 创建一个 `_resolve_url` 协程任务。 + - 使用 `asyncio.gather` 并发地执行所有 URL 解析任务。 +4. **处理结果**: 等待所有解析完成后,将最终的真实 URL 和其他元数据组合成 `sources` 列表。 +5. **发送事件**: 通过 `__event_emitter__` 将包含最终 `sources` 的 `chat:completion` 事件发送给前端。 + +**代码示例 (逻辑简化):** +```python +async def _resolve_and_emit_sources(self, ...): + # ... 提取所有待解析的 URL ... + urls_to_resolve = [...] + + async with aiohttp.ClientSession() as session: + # 为每个 URL 创建一个异步解析任务 + tasks = [self._resolve_url(session, url) for url in urls_to_resolve] + # 并发执行所有任务 + results = await asyncio.gather(*tasks) + + # ... 处理解析结果 ... + resolved_sources = [...] + + # 通过事件发射器将最终结果发送给前端 + await event_emitter({"type": "chat:completion", "data": {"sources": resolved_sources}}) +``` +**设计模式的价值**: 即使在请求处理的最后阶段 (`outlet`),也能够执行高效、非阻塞的 I/O 操作,极大地丰富了插件的能力,而不会牺牲用户体验。 + +--- + +## 高级技巧 3: 动态日志级别 + +**问题**: 如何在不重启服务的情况下,动态调整一个插件的日志详细程度,以便于在线上环境中进行调试? + +**解决方案**: 在 `inlet` 中检查配置变化,并动态地添加/移除 `loguru` 的日志处理器 (Handler)。 + +#### 实现流程: + +1. **`__init__`**: 插件初始化时,根据 `Valves` 中的 `LOG_LEVEL` 配置,添加一个带特定过滤器(只输出本插件日志)和格式化器的 `loguru` handler。 +2. **`inlet`**: 在每次请求进入时,都比较当前阀门中的 `LOG_LEVEL` 与插件实例中保存的 `self.log_level` 是否一致。 +3. **动态更新**: + - 如果不一致,说明管理员修改了配置。 + - 插件会调用 `log.remove()` 移除旧的 handler。 + - 然后调用 `log.add()`,使用新的日志级别添加一个新的 handler。 + - 最后更新 `self.log_level`。 + +**设计模式的价值**: 这使得插件的日志管理变得极其灵活。管理员只需在 Web UI 中修改插件的 `LOG_LEVEL` 配置,即可立即(在下一次请求时)看到更详细或更简洁的日志输出,极大地提升了生产环境中的问题排查效率。 + +--- + +## 总结 + +`Gemini Manifold Companion` 是一个教科书级的“伴侣插件”范例,它揭示了 `Filter` 插件的巨大潜力。通过学习它,我们可以掌握: + +- **协同设计模式**: 如何让 `Filter` 与 `Pipe` 协同工作,以实现标准 UI 之外的复杂功能。 +- **指令翻译**: 使用 `metadata` 作为 `Filter` 向 `Pipe` 传递“秘密指令”的通信渠道。 +- **跨阶段状态共享**: 使用 `request.app.state` 作为 `Pipe` 向 `Filter` 回传数据的“临时内存”。 +- **全异步流程**: 即使在请求的末端 (`outlet`),也能利用 `asyncio` 和 `aiohttp` 执行高效的异步 I/O 操作。 +- **动态运维能力**: 实现如动态日志级别这样的功能,让插件更易于在生产环境中管理和调试。 + +这些高级技巧共同构成了一个强大、解耦且可扩展的插件生态系统,是所有 Open WebUI 插件开发者进阶的必经之路。 diff --git a/docs/examples/filter_plugin_inject_env_example_cn.md b/docs/examples/filter_plugin_inject_env_example_cn.md new file mode 100644 index 0000000..208aef3 --- /dev/null +++ b/docs/examples/filter_plugin_inject_env_example_cn.md @@ -0,0 +1,134 @@ +# `Inject Env` 插件深度解析:动态修改请求与上下文注入 + +## 引言 + +`Inject Env` 是一个 `Filter` 插件的绝佳范例,它清晰地展示了过滤器的核心价值:在请求到达 LLM **之前** (`inlet` 阶段) 对其进行拦截和动态修改。 + +该插件的核心功能包括: +1. 将用户的环境变量(如姓名、当前时间)自动注入到对话的起始位置。 +2. 根据当前使用的模型和用户信息,智能地开启、关闭或重定向“网络搜索”功能。 +3. 为特定模型补充必要的 API 参数(如 `chat_id`)。 + +通过解析这个插件,开发者可以掌握如何构建一个能够感知上下文(用户、模型、环境变量)并据此动态调整请求内容的智能过滤器。 + +--- + +## 核心工作流 (`inlet` 方法) + +该插件的所有逻辑都集中在 `inlet` 方法中,其工作流程可以分解为: + +1. **注入上下文**: 从 `__metadata__` 参数中获取用户环境变量,并将其作为一个格式化的 Markdown 块,智能地插入到第一条用户消息的开头。 +2. **控制功能**: 分析当前请求的模型名称 (`body['model']`) 和用户信息 (`__user__`),应用一系列规则来决定如何处理“网络搜索”功能。 +3. **补充参数**: 根据模型信息 (`__model__`),为特定的模型(如 `cfchatqwen`)在请求体 `body` 中补充其所需的 `chat_id` 等参数。 + +--- + +## 关键开发模式与技术剖析 + +### 1. 利用 `__metadata__` 和 `__model__` 获取丰富上下文 + +`Filter` 插件的 `inlet` 方法可以接收 `__metadata__` 和 `__model__` 这两个非常有用的参数,它们是插件感知上下文、实现智能化逻辑的关键。 + +- **`__metadata__["variables"]` (环境变量)**: + - **功能**: 这是一个由 Open WebUI 自动填充的、包含当前请求上下文信息的字典。 + - **内容**: 它预置了一系列模板变量,如: + - `{{USER_NAME}}`: 当前用户名 + - `{{CURRENT_DATETIME}}`: 当前日期时间 + - `{{CURRENT_WEEKDAY}}`: 当前星期 + - `{{CURRENT_TIMEZONE}}`: 当前时区 + - `{{USER_LANGUAGE}}`: 用户的语言设置 + - **价值**: 这是在插件中获取用户和环境信息的**标准方式**,无需手动计算。`Inject Env` 插件正是利用这个字典来构建注入到消息中的 Markdown 文本。 + +- **`__model__` (模型信息)**: + - **功能**: 这是一个包含了当前交易所用模型详细信息的字典。 + - **内容**: 开发者可以从中获取模型的 `id`、`info.base_model_id`(对于自定义模型,指向其基础模型)等。 + - **价值**: 允许插件根据不同的模型或模型家族(例如,检查 `base_model_id` 是否以 `qwen` 开头)来执行不同的逻辑分支。 + +**代码示例:** +```python +def inlet( + self, + body: dict, + __metadata__: Optional[dict] = None, + __model__: Optional[dict] = None, +) -> dict: + # 从 __metadata__ 获取环境变量 + variables = __metadata__.get("variables", {}) + if variables: + variable_markdown = f"- **用户姓名**:{variables.get('{{USER_NAME}}', '')}\n" + # ... 注入到消息中 ... + + # 从 __model__ 获取模型基础 ID + if "openai" in __model__: + base_model_id = __model__["openai"]["id"] + else: + base_model_id = __model__["info"]["base_model_id"] + + if base_model_id.startswith("cfchatqwen"): + # ... 执行针对 qwen 模型的特定逻辑 ... +``` + +### 2. 健壮的消息内容注入 + +向用户的消息中动态添加内容时,必须考虑多种情况以确保插件的健壮性。`insert_user_env_info` 函数为此提供了完美的示范。 + +- **幂等性注入 (Idempotent Injection)**: + - **问题**: 如果每次都简单地在消息前添加内容,当用户连续对话时,环境变量块会被重复注入,造成内容冗余。 + - **解决方案**: 在注入前,先用正则表达式 `re.search()` 检查消息中是否**已存在**环境变量块。 + - 如果**存在**,则使用 `re.sub()` 将其**替换**为最新的内容。 + - 如果**不存在**,才在消息开头**添加**新内容。 + - **价值**: 保证了无论 `inlet` 被调用多少次,环境变量信息在消息中只会出现一次,并且始终保持最新。 + +- **兼容多模态消息**: + - **问题**: 用户的消息 `content` 可能是纯文本字符串,也可能是一个包含文本和图片的列表(`[{'type':'text', ...}, {'type':'image_url', ...}]`)。简单地进行字符串拼接会破坏多模态结构。 + - **解决方案**: + 1. 使用 `isinstance(content, list)` 检查内容是否为列表。 + 2. 如果是列表,则遍历它,找到 `type` 为 `text` 的那部分。 + 3. 对文本部分执行上述的“幂等性注入”逻辑。 + 4. 如果列表中**没有**文本部分(例如,用户只发了一张图片),则**主动插入**一个新的文本部分 `{'type': 'text', 'text': ...}` 到列表的开头。 + +**启示**: 对消息体的任何修改都必须考虑其数据结构(`str` 或 `list`),并进行相应的处理,以确保插件的广泛兼容性。 + +### 3. 基于模型的动态路由与功能切换 + +`change_web_search` 函数是“拦截与翻译”模式的又一个精彩应用,并且引入了更高级的“模型重定向”技巧。 + +- **模式一:参数翻译 (适用于通义千问)** + - **场景**: `qwen-max` 模型可能不认识 Open WebUI 的标准 `web_search` 开关,而是需要一个名为 `enable_search` 的参数。 + - **实现**: + 1. 拦截:`features["web_search"] = False` + 2. 翻译:`body.setdefault("enable_search", True)` + - **效果**: 对用户透明地将会话切换到了模型的原生搜索模式。 + +- **模式二:模型重定向 (适用于 Deepseek/Gemini 等)** + - **场景**: 某个模型系列(如 `deepseek`)本身不支持搜索,但其提供商部署了一个带搜索功能的版本,其模型名称可能是 `deepseek-chat-search`。 + - **实现**: + 1. 检查当前模型是否为 `cfdeepseek-deepseek` 且**不**以 `-search` 结尾。 + 2. 如果是,则**直接修改请求体中的模型名称**: `body["model"] = body["model"] + "-search"`。 + 3. 最后,禁用标准的 `web_search` 开关:`features["web_search"] = False`。 + - **效果**: 这种方式巧妙地将用户的请求“重定向”到了一个功能更强的模型版本,而用户在前端选择的仍然是普通模型。这为插件开发者提供了极大的灵活性,可以创建功能增强的“虚拟模型”。 + +### 4. 用户特定逻辑 + +插件还可以根据用户信息执行特定逻辑,这对于 A/B 测试、灰度发布或为特定用户提供定制功能非常有用。 + +**代码示例:** +```python +# 从 __user__ 参数中获取用户邮箱 +user_email = __user__.get("email") + +# 为特定用户禁用网络搜索 +if user_email == "yi204o@qq.com": + features["web_search"] = False +``` + +## 总结 + +`Inject Env` 插件虽然代码量不大,但它像一把精准的手术刀,展示了 `Filter` 插件在请求预处理阶段的强大能力。通过学习它,我们可以掌握: + +- **利用上下文**: 如何充分利用 `__metadata__` 和 `__model__` 参数,让插件变得“智能”和“情境感知”。 +- **稳健地修改内容**: 如何在不破坏多模态结构和保证幂等性的前提下,向用户消息中注入信息。 +- **高级功能控制**: 如何通过“参数翻译”和“模型重定向”等高级技巧,实现对模型功能(如网络搜索)的精细化控制。 +- **构建模板**: 这个插件是任何需要在请求发送前注入动态信息(如 Prompt Engineering、上下文增强、参数调整)的过滤器的绝佳起点。 + +``` \ No newline at end of file diff --git a/docs/examples/gemini_manifold_plugin_examples.md b/docs/examples/gemini_manifold_plugin_examples.md new file mode 100644 index 0000000..81efae1 --- /dev/null +++ b/docs/examples/gemini_manifold_plugin_examples.md @@ -0,0 +1,1838 @@ +# Gemini Manifold 插件通用例子 + +## 1. 配置层叠(Valves + UserValves) + +```python +from pydantic import BaseModel + +class Valves(BaseModel): + GEMINI_API_KEY: str | None = None + USE_VERTEX_AI: bool = False + THINKING_BUDGET: int = 8192 + +class UserValves(BaseModel): + GEMINI_API_KEY: str | None = None + THINKING_BUDGET: int | None = None + + +def merge_valves(default: Valves, user: UserValves | None) -> Valves: + merged = default.model_dump() + if user: + for field in user.model_fields: + value = getattr(user, field) + if value not in (None, ""): + merged[field] = value + return Valves(**merged) + +admin_settings = Valves(GEMINI_API_KEY="admin-key", THINKING_BUDGET=8192) +user_settings = UserValves(GEMINI_API_KEY="user-key", THINKING_BUDGET=4096) +effective = merge_valves(admin_settings, user_settings) +print(effective) +``` + +**场景说明:** 与 `gemini_manifold.py` 中 `Valves`/`UserValves` 合并逻辑一致,适用于需要在 admin 默认与用户覆盖之间做透明优先级控制的插件。 + +## 2. 异步事件与进度反馈(EventEmitter + 上传队列) + +```python +import asyncio +from typing import Callable, Awaitable + +class EventEmitter: + """ + 抽象事件发射器,将所有前端交互统一到异步通道中。 + """ + def __init__(self, emit: Callable[[dict], Awaitable[None]] | None = None, + hide_successful_status: bool = False): + self._emit = emit + self.hide_successful_status = hide_successful_status + + async def emit_status(self, message: str, done: bool = False, hidden: bool = False) -> None: + """ + 发出状态消息。如果 done=True 且 hide_successful_status=True,则在前端隐藏。 + """ + if not self._emit: + return + + if done and self.hide_successful_status: + hidden = True + + event = { + "type": "status", + "data": { + "description": message, + "done": done, + "hidden": hidden + } + } + await self._emit(event) + + async def emit_toast(self, msg: str, toast_type: str = "info") -> None: + """ + 发出 toast 通知(弹窗)。 + """ + if not self._emit: + return + + event = { + "type": "notification", + "data": { + "type": toast_type, + "content": msg + } + } + await self._emit(event) + + async def emit_completion(self, content: str | None = None, done: bool = False, + error: str | None = None, usage: dict | None = None) -> None: + """ + 发出完成事件,可含内容、错误、使用量等信息。 + """ + if not self._emit: + return + + event = {"type": "chat:completion", "data": {"done": done}} + if content is not None: + event["data"]["content"] = content + if error is not None: + event["data"]["error"] = {"detail": error} + if usage is not None: + event["data"]["usage"] = usage + + await self._emit(event) + + +class UploadStatusManager: + """ + 管理并发文件上传的状态,自动追踪注册与完成计数。 + """ + def __init__(self, emitter: EventEmitter, start_time: float): + self.emitter = emitter + self.start_time = start_time + self.queue = asyncio.Queue() + self.total_uploads_expected = 0 + self.uploads_completed = 0 + self.finalize_received = False + self.is_active = False + + async def run(self) -> None: + """ + 后台任务,监听队列并发出状态更新。 + """ + import time + + while not (self.finalize_received and + self.total_uploads_expected == self.uploads_completed): + try: + msg = await asyncio.wait_for(self.queue.get(), timeout=1.0) + except asyncio.TimeoutError: + continue + + msg_type = msg[0] + + if msg_type == "REGISTER_UPLOAD": + self.is_active = True + self.total_uploads_expected += 1 + await self._emit_progress_update(time.monotonic()) + elif msg_type == "COMPLETE_UPLOAD": + self.uploads_completed += 1 + await self._emit_progress_update(time.monotonic()) + elif msg_type == "FINALIZE": + self.finalize_received = True + + self.queue.task_done() + + async def _emit_progress_update(self, current_time: float) -> None: + """发出进度更新到前端。""" + if not self.is_active: + return + + elapsed = current_time - self.start_time + time_str = f"(+{elapsed:.2f}s)" + + is_done = (self.total_uploads_expected > 0 and + self.uploads_completed == self.total_uploads_expected) + + if is_done: + msg = f"上传完成。{self.uploads_completed} 个文件已处理。{time_str}" + else: + msg = f"上传中 {self.uploads_completed + 1}/{self.total_uploads_expected}… {time_str}" + + await self.emitter.emit_status(msg, done=is_done) + + +async def multi_file_upload_workflow( + file_list: list[tuple[str, bytes]], + emitter: EventEmitter +) -> list[str]: + """ + 示范多文件并发上传的完整工作流。 + """ + import time + + start_time = time.monotonic() + status_mgr = UploadStatusManager(emitter, start_time) + + # 启动后台状态管理器 + manager_task = asyncio.create_task(status_mgr.run()) + + # 为每个文件创建上传任务 + async def upload_file(name: str, data: bytes) -> str: + await status_mgr.queue.put(("REGISTER_UPLOAD",)) + try: + await asyncio.sleep(0.5) # 模拟网络延迟 + result = f"uploaded-{name}" + await emitter.emit_toast(f"✓ 文件 {name} 上传成功", "success") + return result + except Exception as e: + await emitter.emit_toast(f"✗ 文件 {name} 上传失败: {e}", "error") + raise + finally: + await status_mgr.queue.put(("COMPLETE_UPLOAD",)) + + # 并发执行所有上传 + tasks = [upload_file(name, data) for name, data in file_list] + results = await asyncio.gather(*tasks, return_exceptions=True) + + # 通知状态管理器完成 + await status_mgr.queue.put(("FINALIZE",)) + await manager_task + + # 汇总结果 + success = [r for r in results if not isinstance(r, Exception)] + return success + + +# 完整使用示例 +async def demo(): + async def fake_emit(payload): + print(f"→ {payload['type']}: {payload['data']}") + + emitter = EventEmitter(fake_emit, hide_successful_status=False) + + files = [ + ("doc1.pdf", b"content1"), + ("image.jpg", b"content2"), + ("data.csv", b"content3"), + ] + + results = await multi_file_upload_workflow(files, emitter) + print(f"\n✓ 上传成功: {len(results)} 个文件") + +asyncio.run(demo()) +``` + +**完整流程状态显示说明:** + +整个异步工作流的状态显示遵循以下链路: + +```text +初始化 + ↓ +发出"准备请求"状态 → [emit_status] → 前端显示状态条 + ↓ +启动后台 UploadStatusManager 任务 + ↓ +并发执行多个上传任务 + ├─→ 任务1: REGISTER_UPLOAD → [更新计数] → emit_status("上传中 1/3…") + ├─→ 任务2: REGISTER_UPLOAD → [更新计数] → emit_status("上传中 2/3…") + └─→ 任务3: REGISTER_UPLOAD → [更新计数] → emit_status("上传中 3/3…") + ↓ +每个任务完成时 + ├─→ emit_toast("✓ 文件上传成功", "success") → 前端弹窗确认 + └─→ COMPLETE_UPLOAD → [更新计数] → emit_status("上传中 1/3…") 或 "上传完成" + ↓ +所有任务完成 → FINALIZE → 关闭后台管理器 + ↓ +发出最终状态 → emit_status("全部完成", done=True) → 前端状态条完成 +``` + +**关键数据流动:** + +1. **EventEmitter** 负责将事件发送到前端 + - `emit_status()`: 状态条消息 + - `emit_toast()`: 弹窗通知 + - `emit_completion()`: 完成事件(含 usage 等) + +2. **UploadStatusManager** 后台任务持续监听队列 + - 接收 `("REGISTER_UPLOAD",)` → 计数加 1 → 计算进度 → 更新状态显示 + - 接收 `("COMPLETE_UPLOAD",)` → 计数加 1 → 重新计算进度 → 更新状态显示 + - 接收 `("FINALIZE",)` → 退出循环 → 任务完成 + +3. **实时计数逻辑** + +```python +已完成数 / 总数 = 进度百分比 +显示: "上传中 {已完成+1}/{总数}… (+X.XXs)" +当完成数 == 总数: 显示 "上传完成。3 个文件已处理。(+2.50s)" +``` + +**场景说明:** 完整模拟 `gemini_manifold.py` 中 `EventEmitter` + `UploadStatusManager` 的实战设计。支持多并发任务状态跟踪、自动计数、toast 通知与后台进度汇报。适用于: + +- 多文件并发上传且需要实时进度反馈的场景 +- API 轮询或长流程中持续向前端汇报进展 +- 需要自隐藏成功状态但保留错误警告的交互流程 +- 复杂的异步任务编排与协调 +- 需要细粒度时间戳与计数统计的长流程 + +**场景说明:** 完整模拟 `gemini_manifold.py` 中 `EventEmitter` + `UploadStatusManager` 的实战设计。支持多并发任务状态跟踪、自动计数、toast 通知与后台进度汇报。适用于: + +- 多文件并发上传且需要实时进度反馈的场景 +- API 轮询或长流程中持续向前端汇报进展 +- 需要自隐藏成功状态但保留错误警告的交互流程 +- 复杂的异步任务编排与协调 +- 需要细粒度时间戳与计数统计的长流程 + +## 3. 文件缓存 + 幂等上传(xxHash + deterministic 名称) + +```python +import xxhash + +def content_hash(data: bytes) -> str: + return xxhash.xxh64(data).hexdigest() + +cache: dict[str, str] = {} + +def deterministic_name(hash_val: str) -> str: + return f"files/owui-v1-{hash_val}" + +async def maybe_upload(data: bytes): + h = content_hash(data) + if h in cache: + print("cache hit", cache[h]) + return cache[h] + name = deterministic_name(h) + cache[h] = name + print("uploading", name) + return name + ``` + + **场景说明:** 简化版 `FilesAPIManager` 热/暖/冷路径,适合需要避免重复上传、并希望后端能通过 deterministic 名称恢复文件状态的场景。 + + +## 4. 统一响应处理(流式 + 非流式适配) + +```python +from typing import AsyncGenerator + +class UnifiedResponseProcessor: + async def process_stream( + self, response_stream: AsyncGenerator, is_stream: bool = True + ) -> AsyncGenerator: + """ + 处理流式或一次性响应,统一返回 AsyncGenerator。 + """ + try: + async for chunk in response_stream: + # 处理单个 chunk + processed = await self._process_chunk(chunk) + if processed: + yield {"choices": [{"delta": processed}]} + except Exception as e: + yield {"choices": [{"delta": {"content": f"Error: {e}"}}]} + finally: + yield "data: [DONE]" + + async def _process_chunk(self, chunk): + # 简化处理逻辑 + return {"content": str(chunk)} + +# 使用示例 +async def main(): + processor = UnifiedResponseProcessor() + async def fake_stream(): + for i in range(3): + yield f"chunk-{i}" + async for item in processor.process_stream(fake_stream()): + print(item) +``` + +**场景说明:** 对应 `gemini_manifold.py` 中 `_unified_response_processor` 的核心思想——无论前端是否启用流式,插件内部都用统一的 AsyncGenerator 处理,避免代码分支。适用于需要兼容流式与非流式响应的任何插件。 + +## 5. 特殊标签禁用(防止前端解析干扰) + +```python +import re + +ZWS = "\u200b" # 零宽空格 +SPECIAL_TAGS = ["think", "details", "thinking", "reason"] + +def disable_special_tags(text: str) -> tuple[str, int]: + """ + 在特殊标签前插入零宽空格,防止前端 HTML 解析器处理它们。 + """ + if not text: + return "", 0 + + TAG_REGEX = re.compile( + r"<(/?(" + "|".join(re.escape(tag) for tag in SPECIAL_TAGS) + r"))" + ) + modified, count = TAG_REGEX.subn(rf"<{ZWS}\1", text) + return modified, count + +def enable_special_tags(text: str) -> str: + """ + 移除零宽空格,恢复原始标签,用于模型理解上下文。 + """ + if not text: + return "" + REVERSE_REGEX = re.compile( + r"<" + ZWS + r"(/?(" + "|".join(re.escape(tag) for tag in SPECIAL_TAGS) + r"))" + ) + return REVERSE_REGEX.sub(r"<\1", text) + +# 使用示例 +original = "这是思考内容" +disabled, count = disable_special_tags(original) +print(f"禁用前: {original}") +print(f"禁用后: {disabled}") +print(f"修改数: {count}") +``` + +**场景说明:** 当模型可能生成会被前端 HTML 解析器误触发的标签(如 `` 推理块)时,通过注入零宽空格破坏标签结构,再在需要时恢复。这是 `gemini_manifold.py` 中保护前端的一种防御手段,对任何可能含有模型生成 HTML 的插件都有借鉴价值。 + +## 6. 统一异常处理与用户反馈 + +```python +class PluginException(Exception): + """插件统一异常基类。""" + pass + +class APIError(PluginException): + """API 调用异常。""" + pass + +class FileUploadError(PluginException): + """文件上传异常。""" + pass + +class EventEmitterForErrors: + def __init__(self): + self.event_queue = [] + + async def emit_error(self, error_msg: str, is_toast: bool = True): + """ + 发出错误事件,同时记录日志。 + """ + event = {"type": "error", "data": {"detail": error_msg}} + if is_toast: + event["data"]["toast_type"] = "error" + self.event_queue.append(event) + print(f"[ERROR] {error_msg}") + +async def call_api_with_fallback(api_key: str, emitter: EventEmitterForErrors): + """ + 调用 API 并完整处理异常。 + """ + try: + # 模拟 API 调用 + if not api_key: + raise ValueError("API key 未提供") + # 成功处理 + return {"status": "ok"} + except ValueError as e: + await emitter.emit_error(f"参数错误: {e}") + raise APIError(f"API 调用失败: {e}") from e + except Exception as e: + await emitter.emit_error(f"意外错误: {e}", is_toast=True) + raise PluginException(f"插件异常: {e}") from e + +# 使用示例 +import asyncio +emitter = EventEmitterForErrors() +try: + result = asyncio.run(call_api_with_fallback("", emitter)) +except PluginException as e: + print(f"捕获到插件异常: {e}") +``` + +**场景说明:** 对应 `gemini_manifold.py` 中 `GenaiApiError`、`FilesAPIError` 等定制异常。通过分层异常类和统一的 emit_error 机制,确保所有错误都能被前端看到,同时便于调试和日志分析。 + +## 7. 后处理与数据回写(Usage + Grounding) + +```python +from datetime import datetime + +class PostProcessor: + def __init__(self, request_state): + self.state = request_state + + async def emit_usage(self, prompt_tokens: int, completion_tokens: int): + """ + 发出 Token 使用情况。 + """ + total = prompt_tokens + completion_tokens + elapsed = datetime.now().timestamp() + usage_data = { + "prompt_tokens": prompt_tokens, + "completion_tokens": completion_tokens, + "total_tokens": total, + "completion_time": elapsed, + } + print(f"Usage: {usage_data}") + return usage_data + + async def emit_grounding(self, chat_id: str, message_id: str, grounding_metadata): + """ + 将 grounding 数据存入应用状态,供 Filter 或后续步骤使用。 + """ + key = f"grounding_{chat_id}_{message_id}" + self.state[key] = grounding_metadata + print(f"存储 grounding 数据到 {key}") + + async def emit_status(self, message: str, done: bool = False): + """ + 发出最终状态。 + """ + status_event = { + "type": "status", + "data": {"description": message, "done": done} + } + print(f"Status: {status_event}") + +# 使用示例 +async def main(): + state = {} # 模拟 request.app.state + processor = PostProcessor(state) + + await processor.emit_usage(prompt_tokens=50, completion_tokens=100) + await processor.emit_grounding( + chat_id="chat_123", + message_id="msg_456", + grounding_metadata={"sources": ["source1", "source2"]} + ) + await processor.emit_status("Response finished", done=True) + print("\n最终状态:", state) + +asyncio.run(main()) +``` + +**场景说明:** 模拟 `gemini_manifold.py` 中 `_do_post_processing` 的职责——在主响应流完成后,将 usage、grounding、状态等元数据通过独立通道发出。这种分离确保前端能获得完整信息,同时不阻塞流式响应。 + +## 8. 日志与数据截断(Loguru + 自动截断) + +```python +import json +from functools import wraps + +class PluginLogger: + def __init__(self, max_payload_length: int = 256): + self.max_length = max_payload_length + + def truncate_payload(self, data: any) -> str: + """ + 将复杂数据序列化并截断。 + """ + try: + serialized = json.dumps(data, default=str) + if len(serialized) > self.max_length: + return serialized[:self.max_length] + "[...]" + return serialized + except Exception as e: + return f"" + + def log_with_payload(self, level: str, message: str, payload: any = None): + """ + 记录带有 payload 的日志,自动截断。 + """ + log_line = f"[{level}] {message}" + if payload is not None: + truncated = self.truncate_payload(payload) + log_line += f" - {truncated}" + print(log_line) + +# 使用示例 +logger = PluginLogger(max_payload_length=100) +logger.log_with_payload("DEBUG", "API Response", + payload={"data": "x" * 200, "status": "ok"}) +logger.log_with_payload("INFO", "File uploaded", + payload={"file_id": "abc123", "size": 1024}) +``` + +**场景说明:** 对应 `gemini_manifold.py` 中自定义 loguru handler 与 `_truncate_long_strings` 的逻辑。当插件需要调试复杂 API 响应或大型 payload 时,通过自动截断避免日志爆炸,同时保留关键信息。 + +## 9. 联网搜索功能与源引用显示 + +```python +from typing import TypedDict + +class SearchSource(TypedDict): + title: str + url: str + snippet: str + +class GroundingMetadata(TypedDict): + search_queries: list[str] # 使用的搜索查询 + sources: list[SearchSource] # 检索到的源 + +class SearchableResponseBuilder: + """ + 当启用联网搜索功能时,构建含有搜索信息的响应。 + 对应 gemini_manifold.py 中依据 features["google_search_tool"] 的逻辑。 + """ + def __init__(self, enable_search: bool = False, emitter = None): + self.enable_search = enable_search + self.emitter = emitter + self.grounding_metadata: GroundingMetadata | None = None + + async def build_response_with_search(self, + query: str, + use_search: bool = True) -> tuple[str, GroundingMetadata | None]: + """ + 构建响应,如果启用搜索则收集源信息。 + """ + if not (self.enable_search and use_search): + # 未启用搜索,直接返回响应 + return "这是直接回答,无搜索", None + + # 模拟搜索过程 + search_results = await self._perform_search(query) + + # 构建 grounding 元数据 + self.grounding_metadata = { + "search_queries": [query], + "sources": search_results + } + + # 构建含源引用的响应 + response_with_sources = await self._format_response_with_citations( + query, search_results + ) + + return response_with_sources, self.grounding_metadata + + async def _perform_search(self, query: str) -> list[SearchSource]: + """ + 模拟调用 Google Search API(实际中由 gemini_manifold.py 的 tool 层处理)。 + """ + # 模拟搜索结果 + results = [ + { + "title": "Open WebUI 官方文档", + "url": "https://docs.openwebui.com", + "snippet": "Open WebUI 是一个开源的大语言模型管理平台..." + }, + { + "title": "Open WebUI GitHub 仓库", + "url": "https://github.com/open-webui/open-webui", + "snippet": "开源代码库,包含所有源码和插件..." + }, + { + "title": "Open WebUI 社区论坛", + "url": "https://community.openwebui.com", + "snippet": "用户交流和问题解答社区..." + } + ] + + if self.emitter: + await self.emitter.emit_toast( + f"✓ 已搜索: '{query}' 找到 {len(results)} 个结果", + "success" + ) + + return results + + async def _format_response_with_citations(self, + query: str, + sources: list[SearchSource]) -> str: + """ + 将搜索结果格式化为含有源引用的响应。 + """ + response = f"关于 '{query}' 的搜索结果:\n\n" + + for idx, source in enumerate(sources, 1): + response += f"[{idx}] **{source['title']}**\n" + response += f" URL: {source['url']}\n" + response += f" 摘要: {source['snippet']}\n\n" + + response += "---\n\n根据上述源的信息,可以得出以下结论:\n" + response += "Open WebUI 是一个功能丰富的平台,提供了完整的文档、源码和社区支持。" + + return response + + def extract_sources_for_frontend(self) -> list[dict]: + """ + 提取 grounding 元数据中的源,用于前端显示为 'sources' 字段。 + 对应 gemini_manifold.py 中 emit_completion(sources=...) 的数据。 + """ + if not self.grounding_metadata: + return [] + + sources_for_ui = [] + for source in self.grounding_metadata["sources"]: + sources_for_ui.append({ + "title": source["title"], + "url": source["url"], + "description": source["snippet"] + }) + + return sources_for_ui + + +async def demo_search_workflow(): + """ + 演示启用联网搜索时的完整工作流。 + """ + class FakeEmitter: + async def emit_toast(self, msg, ttype): + print(f"[{ttype.upper()}] {msg}") + + async def emit_status(self, msg, done=False): + print(f"[STATUS] {msg} (done={done})") + + emitter = FakeEmitter() + + # 创建搜索构建器,启用联网搜索 + builder = SearchableResponseBuilder(enable_search=True, emitter=emitter) + + # 步骤 1: 用户提问 + user_query = "Open WebUI 的插件开发最佳实践" + await emitter.emit_status(f"处理查询: {user_query}") + + # 步骤 2: 构建响应并收集源 + response_text, grounding = await builder.build_response_with_search(user_query) + + # 步骤 3: 提取源引用供前端使用 + sources_for_ui = builder.extract_sources_for_frontend() + + # 步骤 4: 构建完整的 completion 事件 + completion_event = { + "type": "chat:completion", + "data": { + "content": response_text, + "sources": sources_for_ui, # 前端将在消息下方显示这些源 + "done": True + } + } + + print("\n=== 最终响应 ===") + print(f"内容:\n{response_text}") + print(f"\n源信息 (供前端显示):") + for source in sources_for_ui: + print(f" - {source['title']}: {source['url']}") + + print(f"\n完整事件数据:") + import json + print(json.dumps(completion_event, ensure_ascii=False, indent=2)) + +asyncio.run(demo_search_workflow()) +``` + +**实现细节说明:** + +联网搜索功能的完整链路(对应 `gemini_manifold.py`): + +```text +1. 前端请求时,features 包含 "google_search_tool": true + ↓ +2. Pipe.pipe() 检测到 features["google_search_tool"] + ↓ +3. 在 _build_gen_content_config() 中: + gen_content_conf.tools.append( + types.Tool(google_search=types.GoogleSearch()) + ) + ↓ +4. 将 config 传给 Google Gemini API + ↓ +5. API 自动执行搜索并返回搜索结果 + ↓ +6. 获取 response.candidates[0].grounding_metadata + ├─ 包含搜索查询 + ├─ 包含检索到的源(标题、URL、摘要) + └─ 包含段落级的源匹配信息 + ↓ +7. 在 _do_post_processing() 中: + 将 grounding_metadata 存入 request.app.state + 供后续 Filter 使用 + ↓ +8. 在响应流中通过 emit_completion(sources=...) + 将源引用发送到前端 + ↓ +9. 前端在消息下方显示: + [1] 源标题 (链接) + [2] 源标题 (链接) + ... +``` + +**关键实现要点:** + +| 步骤 | 职责 | 代码位置 | +|------|------|---------| +| **检测开关** | 检查 `features["google_search_tool"]` | `_build_gen_content_config()` | +| **配置工具** | 将 `google_search` 添加到 tools 列表 | `gen_content_conf.tools.append()` | +| **执行搜索** | Google API 自动执行,返回 grounding_metadata | API 响应处理 | +| **提取源** | 从 grounding_metadata 提取源信息 | `_do_post_processing()` | +| **存储状态** | 将 grounding 存入 `request.app.state` | `_add_grounding_data_to_state()` | +| **发送前端** | 通过 `emit_completion(sources=...)` 发送 | `_unified_response_processor()` | +| **显示引用** | 前端渲染源链接和摘要 | 前端 UI 逻辑 | + +**场景说明:** 展示如何在启用联网搜索时收集、处理和展示搜索源。适用于: + +- 需要集成搜索功能的插件 +- 需要展示信息来源的智能应用 +- 需要追踪 API 调用的溯源场景 +- 需要构建可引用的 LLM 应用 + +## 总结与最佳实践 + +| 哲学 | 核心机制 | 使用场景 | +|------|---------|----------| +| **配置层叠** | Valves + UserValves 合并 | Admin 全局设置 + 用户按需覆盖 | +| **异步反馈** | EventEmitter + Queue | 长流程中持续向前端汇报状态 | +| **资源复用** | xxHash + 缓存 + Stateless GET | 避免重复上传,快速恢复 | +| **统一处理** | AsyncGenerator + 适配器 | 流式和非流式响应一致处理 | +| **安全防护** | 特殊标签注入 ZWS | 防止模型生成的 HTML 破坏前端 | +| **异常管理** | 分层异常 + emit_error | 所有错误对前端可见 | +| **后处理** | Usage/Grounding 在 response 后 | 非阻塞式补充元数据 | +| **日志控制** | 自动截断 + 多级别 | 避免日志爆炸,便于调试 | +| **搜索集成** | grounding_metadata 提取 + 源展示 | 联网搜索时收集并显示信息来源 | + +## 补充:响应格式与引用解析 + +### 一、源(Source)的数据结构 + +当启用联网搜索时,Google Gemini API 返回的 `grounding_metadata` 包含搜索源信息,对应以下结构: + +```python +# Google API 返回的 grounding_metadata 格式 +{ + "search_queries": ["用户的搜索查询"], + "web_search_results": [ + { + "uri": "https://example.com/page1", + "title": "网页标题", + "snippet": "网页摘要文本...", + "display_uri": "example.com", + }, + # ... 更多搜索结果 + ], + "grounding_supports": [ + { + "segment": { + "start_index": 0, + "end_index": 145, + "text": "模型回答中被引用的这段文本" + }, + "supporting_segments": [ + { + "segment": { + "text": "网页中的相关内容" + }, + "uri": "https://example.com/page1" + } + ], + "confidence_scores": [0.95] + } + ] +} +``` + +### 二、引用标记的格式 + +**API 返回的响应中引用标记格式:** + +Google Gemini API 在响应文本中自动插入引用标记: + +```text +根据搜索结果[1],Open WebUI 是一个开源平台[2]。用户可以通过插件[1][2] +扩展功能。 + +[1] https://docs.openwebui.com - Open WebUI 官方文档 +[2] https://github.com/open-webui/open-webui - GitHub 仓库 +``` + +**引用标记特征:** + +- 格式:`[N]` 其中 N 是数字索引(1-based) +- 位置:内联在文本中,跟在被引用的短语后 +- 对应关系:`[1]` → `web_search_results[0]`,`[2]` → `web_search_results[1]` 等 + +### 三、前端显示的 sources 格式 + +`emit_completion(sources=...)` 发送给前端的数据格式: + +```python +sources = [ + { + "title": "Open WebUI 官方文档", + "uri": "https://docs.openwebui.com", + "snippet": "Open WebUI 是一个开源的大语言模型管理平台...", + "display_uri": "docs.openwebui.com", + }, + { + "title": "Open WebUI GitHub 仓库", + "uri": "https://github.com/open-webui/open-webui", + "snippet": "开源代码库,包含所有源码和插件...", + "display_uri": "github.com", + } +] +``` + +**前端如何渲染:** + +1. **识别内联引用标记** → 将 `[1]` 链接到 `sources[0]` + +2. **在消息下方显示源面板**,通常格式为: + + ```text + [1] Open WebUI 官方文档 (docs.openwebui.com) + [2] Open WebUI GitHub 仓库 (github.com) + ``` + +3. **点击引用标记** → 高亮对应的源,显示摘要 + +4. **点击源链接** → 在新标签页打开 URL + +### 四、完整数据流转 + +```text +1. 用户启用搜索功能 (features["google_search_tool"] = true) + ↓ +2. Pipe 配置 API:gen_content_conf.tools.append( + types.Tool(google_search=types.GoogleSearch()) + ) + ↓ +3. Google Gemini API 执行搜索,返回: + - 文本响应(含内联 [N] 标记) + - grounding_metadata(含搜索结果和支撑段落) + ↓ +4. gemini_manifold.py _process_part() 处理: + - 提取文本响应 + - 通过 _disable_special_tags() 处理特殊标签 + - 返回结构化 chunk: {"content": "文本[1][2]..."} + ↓ +5. _do_post_processing() 后处理: + - 提取 candidate.grounding_metadata + - 存入 request.app.state[f"grounding_{chat_id}_{message_id}"] + - 提取 web_search_results → sources 列表 + ↓ +6. emit_completion(content="...", sources=[...]) + - 发送完整内容给前端 + - 同时发送 sources 列表 + ↓ +7. 前端渲染: + - 消息体显示文本和 [1][2] 引用标记 + - 底部显示 sources 面板 + - 用户可交互查看源信息 +``` + +### 五、可能需要移除的引用标记 + +在某些情况下(如用户编辑消息),需要调用 `_remove_citation_markers()` 移除不再有效的引用标记: + +```python +# 源数据结构(来自 grounding_metadata) +source = { + "uri": "https://example.com", + "title": "Page Title", + "metadata": [ + { + "supports": [ + { + "segment": { + "start_index": 10, + "end_index": 50, + "text": "这是被引用的文本片段" + }, + "grounding_chunk_indices": [0, 1] # 对应 [1], [2] + } + ] + } + ] +} + +# 方法会找到 "这是被引用的文本片段[1][2]" 并删除 [1][2] +cleaned_text = _remove_citation_markers(response_text, [source]) +``` + +### 六、关键要点 + +**✓ 引用的识别规则:** + +- 文本内联的 `[数字]` 是引用标记 +- 必须对应 sources 列表中的同序号元素 +- 通常由 API 自动生成和嵌入 + +**✗ 常见问题:** + +- 删除源但保留标记 → 前端会显示孤立的 `[N]` +- 修改文本后标记位置错误 → 需要重新生成 +- 混合多个搜索结果 → 确保索引连续且不重叠 + +### 七、Chat/Completions 接口的响应格式 + +当直接通过 Open WebUI 的 `chat/completions` API 调用 pipe 时,响应应采用以下格式返回引用信息。 + +**流式响应(streaming=true):** + +Pipe 返回 `AsyncGenerator[dict]`,每个 dict 按以下顺序发送: + +```python +# 流式块(多次) +{ + "choices": [ + { + "delta": { + "content": "根据搜索结果[1],Open WebUI..." + } + } + ] +} + +# 完成标记 +"data: [DONE]" + +# 后续元数据通过 event_emitter 事件发送 +# 1. emit_status - 状态更新消息 +# 2. emit_toast - 弹窗通知(如错误或成功提示) +# 3. emit_usage - Token 使用量数据 +# 4. emit_completion(sources=[...]) - 发送最终的源信息列表 +``` + +**关键特性:** + +- 文本内容通过 `{"choices": [{"delta": {"content": "..."}}]}` 流式返回 +- 引用标记 `[1][2]` 直接包含在内容文本中 +- 源信息通过 `emit_completion(sources=[...])` 以事件形式发送到前端 +- 完成后发送 `"data: [DONE]"` 标记 + +**非流式响应(streaming=false):** + +整个响应通过适配器转换为单次 AsyncGenerator: + +```python +async def single_item_stream(response): + yield response + +# 输出结果类似流式,但内容全部在一个块中 +{ + "choices": [ + { + "delta": { + "content": "完整的回答文本[1][2]..." + } + } + ] +} + +"data: [DONE]" +``` + +### 八、sources 数据的发送方式 + +#### 方式 1:通过 EventEmitter 事件发送(推荐) + +```python +await event_emitter.emit_completion( + content=None, # 内容已通过 delta 发送 + sources=[ + { + "title": "Open WebUI 官方文档", + "uri": "https://docs.openwebui.com", + "snippet": "Open WebUI 是一个开源的大语言模型管理平台...", + "display_uri": "docs.openwebui.com", + }, + { + "title": "Open WebUI GitHub 仓库", + "uri": "https://github.com/open-webui/open-webui", + "snippet": "开源代码库,包含所有源码和插件...", + "display_uri": "github.com", + } + ], + done=True +) +``` + +这会产生事件: + +```python +{ + "type": "chat:completion", + "data": { + "done": True, + "sources": [ + {"title": "...", "uri": "...", ...}, + {"title": "...", "uri": "...", ...} + ] + } +} +``` + +#### 方式 2:通过应用状态存储(Companion Filter 读取) + +gemini_manifold 的 `_add_grounding_data_to_state()` 将 grounding_metadata 存入: + +```python +request.app.state[f"grounding_{chat_id}_{message_id}"] = grounding_metadata_obj +``` + +Companion Filter 或其他处理组件可以读取这个状态并从中提取源信息。 + +#### 方式 3:直接在响应文本中(最简单) + +如果只需要在文本中显示源链接,可以让 API 返回: + +```text +根据搜索结果[1],Open WebUI 是一个开源平台[2]。 + +[1] https://docs.openwebui.com - Open WebUI 官方文档 +[2] https://github.com/open-webui/open-webui - GitHub 仓库 +``` + +前端将识别 `[N]` 标记并自动提取为引用。 + +### 九、完整的 pipe 返回规范 + +**Pipe 方法签名:** + +```python +async def pipe( + self, + body: dict, # 请求体:模型、消息、流式标志等 + __user__: dict, # 用户信息 + __request__: Request, # FastAPI Request + __event_emitter__: Callable[[Event], Awaitable[None]] | None, # 事件发射器 + __metadata__: dict, # 元数据:特性、任务类型等 +) -> AsyncGenerator[dict, None] | str: + ... + return self._unified_response_processor(...) +``` + +**返回的 AsyncGenerator 应产生的消息序列:** + +```text +1. {"choices": [{"delta": {"content": "流式文本块..."}}]} ← 多次 +2. {"choices": [{"delta": {"content": "[1][2]..."}}]} ← 最后的内容块 +3. "data: [DONE]" ← 完成标记 +4. (事件发送阶段) emit_status, emit_toast, emit_usage, emit_completion(sources=[...]) +``` + +**事件发送(通过 EventEmitter):** + +这些不是 AsyncGenerator 的产出,而是通过 `__event_emitter__` 回调发送: + +```python +# 在处理过程中发送状态 +await event_emitter.emit_status("处理中...", done=False) + +# 发送错误或成功提示 +event_emitter.emit_toast("✓ 完成", "success") + +# 发送 Token 使用量 +await event_emitter.emit_usage({ + "prompt_tokens": 100, + "completion_tokens": 50, + "total_tokens": 150, + "completion_time": 2.34 +}) + +# 发送最终的源信息和其他元数据 +await event_emitter.emit_completion( + sources=[...], + usage={...}, + done=True +) +``` + +### 十、实现 Pipe 时的源处理清单 + +当你实现一个支持搜索的 pipe 时,确保: + +**✓ 流式响应部分:** + +- [ ] 文本包含内联的 `[1]`, `[2]` 等引用标记 +- [ ] 每个块通过 `yield {"choices": [{"delta": {"content": "..."}}]}` 返回 +- [ ] 最后一块完成后发送 `yield "data: [DONE]"` + +**✓ 元数据部分:** + +- [ ] 调用 `emit_status()` 显示处理进度 +- [ ] 调用 `emit_toast()` 通知成功或错误 +- [ ] 调用 `emit_usage()` 发送 Token 使用量 +- [ ] 调用 `emit_completion(sources=[...])` 发送源列表 + +**✓ 源数据结构:** + +- [ ] 每个源包含 `title`, `uri`, `snippet`, `display_uri` +- [ ] 源的顺序与文本中 `[N]` 的顺序一一对应 +- [ ] 使用 `emit_completion(sources=[...], done=True)` 标记完成 + +**✗ 常见错误:** + +- [ ] ❌ 只返回文本,不发送源信息 +- [ ] ❌ 源数据格式不完整或字段错误 +- [ ] ❌ 源顺序与引用标记不匹配 +- [ ] ❌ 混合了内容和元数据返回方式 + +## 补充:Open WebUI 核心模块详解 + +开发 Open WebUI Pipe 时,需要调用的五个核心模块及其功能说明: + +```python +from open_webui.models.chats import Chats +from open_webui.models.files import FileForm, Files +from open_webui.storage.provider import Storage +from open_webui.models.functions import Functions +from open_webui.utils.misc import pop_system_message +``` + +### 模块 1:`Chats` - 聊天历史管理 + +**功能:** 访问和管理用户的聊天会话历史记录。 + +**核心方法:** + +```python +Chats.get_chat_by_id_and_user_id(id: str, user_id: str) -> Chat | None +``` + +**使用示例:** + +```python +# 获取特定用户的特定聊天记录 +chat = Chats.get_chat_by_id_and_user_id( + id=chat_id, + user_id=user_data["id"] +) + +if chat: + # 访问聊天内容和消息历史 + chat_content = chat.chat # 获取 ChatObjectDataTD + messages_db = chat_content.get("messages", [])[:-1] # 获取消息列表,排除最后的空响应 + + # 从消息中提取源信息(用于引用过滤) + for i, message_db in enumerate(messages_db): + sources = message_db.get("sources") # 提取引用源 + files = message_db.get("files", []) # 提取文件列表 +else: + log.warning(f"Chat {chat_id} not found") +``` + +**关键数据结构:** + +```python +# Chat 对象包含: +{ + "id": str, + "user_id": str, + "chat": { + "messages": [ + { + "role": "user|assistant", + "content": str, + "files": [{"type": "file|image", "id": str, "url": str}], + "sources": [{"uri": str, "title": str, ...}] + }, + ... + {} # 最后一条消息为空(待填充的助手响应) + ], + "title": str + } +} +``` + +**使用场景:** + +- 需要访问历史消息以过滤引用标记 +- 需要获取用户上传的文件附件列表 +- 需要验证当前请求与数据库消息数量是否匹配 +- 需要在处理过程中追踪消息上下文 + +**注意事项:** + +- ⚠️ **必须在线程中调用**:这是同步阻塞操作,需要用 `asyncio.to_thread()` 包装 +- ⚠️ **返回值可为 None**:聊天不存在时返回 None,需要检查 +- ⚠️ **消息数量验证**:请求体消息数必须等于数据库消息数,否则可能表示数据不同步 + +--- + +### 模块 2:`Files` - 文件数据库操作 + +**功能:** 查询和管理 Open WebUI 文件数据库中的文件元数据。 + +**核心方法:** + +```python +# 查询文件 +Files.get_file_by_id(file_id: str) -> FileModel | None + +# 创建新文件记录 +Files.insert_new_file(user_id: str, file_form: FileForm) -> FileModel | None + +# 获取文件 MIME 类型等 +FileForm( + id: str, + filename: str, + path: str, + meta: dict # 包含 content_type, size, data 等 +) +``` + +**使用示例:** + +```python +# 查询已上传的文件 +file_model = await asyncio.to_thread(Files.get_file_by_id, file_id) + +if file_model: + # 访问文件元数据 + file_path = file_model.path # 磁盘路径或 gs:// 云存储路径 + mime_type = file_model.meta.get("content_type") # e.g., "image/png" + file_size = file_model.meta.get("size") + + # 读取文件内容 + with open(file_path, "rb") as f: + file_bytes = f.read() + +# 创建新文件记录(如生成图像后) +file_item = await asyncio.to_thread( + Files.insert_new_file, + user_id, + FileForm( + id=str(uuid.uuid4()), + filename="generated-image.png", + path="/path/to/file", + meta={ + "name": "generated-image.png", + "content_type": "image/png", + "size": len(image_bytes), + "data": { + "model": model_name, + "chat_id": chat_id, + "message_id": message_id, + } + } + ) +) +``` + +**关键数据结构:** + +```python +class FileModel: + id: str + user_id: str + filename: str + path: str # 本地路径或 gs:// URI + meta: dict # 文件元数据 + created_at: datetime + updated_at: datetime + +meta = { + "name": str, # 显示名称 + "content_type": str, # MIME 类型 + "size": int, # 字节数 + "data": { # 自定义元数据 + "model": str, + "chat_id": str, + "message_id": str, + } +} +``` + +**使用场景:** + +- 获取用户上传文件的实际路径和 MIME 类型 +- 读取文件内容以上传到 Google Gemini API +- 记录生成的图像和其他输出文件 +- 追踪文件与生成任务的关联关系 + +**注意事项:** + +- ⚠️ **必须在线程中调用**:使用 `asyncio.to_thread()` 包装 +- ⚠️ **返回值可为 None**:文件不存在时返回 None +- ⚠️ **路径处理**:可能是本地路径或云存储 URI(gs://),读取时需要相应处理 +- ⚠️ **元数据字段**:`meta["data"]` 是自定义字段,用于存储业务逻辑相关的上下文 + +--- + +### 模块 3:`Storage` - 文件存储管理 + +**功能:** 上传和管理文件到 Open WebUI 的存储后端(本地磁盘或云存储如 Google Cloud Storage)。 + +**核心方法:** + +```python +Storage.upload_file( + file: BinaryIO, # 文件对象 + filename: str, # 文件名 + tags: dict = {} # 标签 +) -> tuple[bytes, str] # 返回 (文件内容, 存储路径) +``` + +**使用示例:** + +```python +import io +import uuid + +# 准备图像数据 +image_data = generate_image() # 生成的字节数据 +image_id = str(uuid.uuid4()) +imagename = f"{image_id}_generated-image.png" +image_file = io.BytesIO(image_data) + +# 上传到存储后端 +try: + contents, storage_path = await asyncio.to_thread( + Storage.upload_file, + image_file, + imagename, + tags={"model": model_name} # 可选标签 + ) + + log.info(f"File uploaded to: {storage_path}") + # storage_path 可能是: + # - 本地: "/data/uploads/uuid_filename.png" + # - 云存储: "gs://bucket/uploads/uuid_filename.png" + +except Exception as e: + log.exception("Upload failed") +``` + +**关键特性:** + +```text +存储层次: +├─ 本地存储:/data/uploads/ 下的文件 +└─ 云存储:gs://bucket/ 下的 GCS 文件 + +自动处理: +├─ 创建目录 +├─ 重命名以避免冲突 +├─ 返回可访问的路径 +└─ 支持标签分类 +``` + +**使用场景:** + +- 上传模型生成的图像 +- 存储处理后的文件 +- 在数据库记录前持久化文件 + +**注意事项:** + +- ⚠️ **必须在线程中调用**:使用 `asyncio.to_thread()` 包装 +- ⚠️ **返回的路径**:取决于配置(本地/云),需要配合 `Files.insert_new_file` 记录 +- ⚠️ **文件大小**:确保内存中有足够空间存储文件 +- ✓ **与 Files 配合**:通常先 `Storage.upload_file()`,再 `Files.insert_new_file()` + +--- + +### 模块 4:`Functions` - 过滤器/插件管理 + +**功能:** 查询已安装的过滤器(Filter)的状态和配置,用于检测依赖的 Companion Filter。 + +**核心方法:** + +```python +Functions.get_function_by_id(filter_id: str) -> Function | None + +# Function 对象属性: +# - id: str +# - name: str +# - is_active: bool # 过滤器在 Functions 仪表板中是否启用 +# - is_global: bool # 是否对所有模型全局启用 +# - models: list[str] # 该过滤器启用的模型列表 +``` + +**使用示例:** + +```python +# 检查 Companion Filter 是否安装并启用 +def is_feature_available(filter_id: str, metadata: dict) -> tuple[bool, bool]: + """ + 检查功能是否可用。 + 返回: (is_available, is_toggled_on) + """ + # 1. 检查过滤器是否已安装 + f = Functions.get_function_by_id(filter_id) + if not f: + log.warning(f"Filter '{filter_id}' not installed") + return (False, False) + + # 2. 检查过滤器在 Functions 仪表板中是否启用 + if not f.is_active: + log.warning(f"Filter '{filter_id}' is disabled in Functions dashboard") + return (False, False) + + # 3. 检查过滤器是否为当前模型启用 + model_id = metadata.get("model", {}).get("id") + model_filters = metadata.get("model", {}).get("info", {}).get("meta", {}).get("filterIds", []) + + is_enabled = filter_id in model_filters or f.is_global + if not is_enabled: + log.debug(f"Filter '{filter_id}' not enabled for model '{model_id}'") + return (False, False) + + # 4. 检查用户是否在当前请求中启用了该功能 + user_toggled = filter_id in metadata.get("filter_ids", []) + + return (True, user_toggled) + +# 使用 +is_available, is_enabled = is_feature_available( + "gemini_manifold_companion_v1.7.0", + metadata +) + +if is_available and is_enabled: + log.info("Companion filter available and enabled") +elif is_available: + log.debug("Companion filter available but user disabled it") +else: + log.warning("Companion filter not available") +``` + +**关键检查流程:** + +```text +功能可用性检查链: + +1. 安装检查 + Functions.get_function_by_id() → None? 返回不可用 + +2. 启用检查 + f.is_active == False? 返回不可用 + +3. 模型启用检查 + filter_id in model_filters or f.is_global? + 否则返回不可用 + +4. 用户切换检查 + filter_id in metadata["filter_ids"]? + 返回用户是否启用 +``` + +**使用场景:** + +- 检测 Companion Filter 是否已安装(用于引用过滤功能) +- 检查 URL Context Tool 或其他高级功能的依赖 +- 在日志中区分"功能不可用"和"用户未启用" +- 决定是否执行相关的处理逻辑 + +**注意事项:** + +- ✓ **同步操作**:不需要 `asyncio.to_thread()` +- ⚠️ **返回值可为 None**:未安装的过滤器返回 None +- ✓ **多层检查**:需要逐层检查安装、启用、配置、用户选择 +- 💡 **日志级别**:根据检查阶段使用不同日志级别(warning/debug) + +--- + +### 模块 5:`pop_system_message` - 消息提取工具 + +**功能:** 从消息列表中提取和分离系统消息。 + +**功能签名:** + +```python +pop_system_message( + messages: list[Message] +) -> tuple[Message | None, list[Message]] +``` + +**使用示例:** + +```python +# 原始消息列表 +messages = [ + { + "role": "system", + "content": "You are a helpful assistant..." + }, + { + "role": "user", + "content": "What is Python?" + }, + { + "role": "assistant", + "content": "Python is a programming language..." + } +] + +# 分离系统消息 +system_message, remaining_messages = pop_system_message(messages) + +# 结果: +# system_message = {"role": "system", "content": "You are a helpful assistant..."} +# remaining_messages = [ +# {"role": "user", "content": "What is Python?"}, +# {"role": "assistant", "content": "Python is a programming language..."} +# ] + +# 提取系统提示文本 +system_prompt = (system_message or {}).get("content") + +# 检查是否存在系统消息 +if system_prompt: + log.debug(f"System prompt found: {system_prompt[:100]}...") +else: + log.debug("No system prompt provided") +``` + +**工作流程:** + +```text +输入消息列表 + ↓ +遍历找第一个 role=="system" 的消息 + ↓ +提取该消息 + ↓ +返回 (提取的消息, 剩余消息列表) +``` + +**关键特性:** + +- 返回元组:`(system_message, remaining_messages)` +- `system_message` 为 None 如果不存在系统消息 +- `remaining_messages` 不包含系统消息 +- 只提取第一个系统消息(如果有多个,后续的被视为普通消息) + +**使用场景:** + +- 从 Open WebUI 的请求中提取系统消息 +- 将系统消息转换为 `GenerateContentConfig.system_instruction` +- 将其余消息作为对话上下文 + +**注意事项:** + +- ✓ **返回类型安全**:总是返回 2 元组 +- ⚠️ **系统消息可为 None**:需要 `(system_message or {})` 防止错误 +- ✓ **消息顺序保留**:`remaining_messages` 中的消息顺序保持原样 +- 💡 **使用场景**:几乎所有 Pipe 都需要这个操作来提取系统提示 + +--- + +### 通用使用技巧总结 + +#### 技巧 1:异步上下文中调用同步 API + +这些模块的大部分方法都是同步阻塞的,但 Pipe 运行在异步上下文中: + +```python +# ❌ 错误:会阻塞事件循环 +chat = Chats.get_chat_by_id_and_user_id(chat_id, user_id) + +# ✓ 正确:在线程池中运行 +chat = await asyncio.to_thread( + Chats.get_chat_by_id_and_user_id, + chat_id, + user_id +) +``` + +#### 技巧 2:链式 None 检查 + +由于这些 API 经常返回 None,使用链式赋值简化代码: + +```python +# ❌ 冗长 +file_model = await asyncio.to_thread(Files.get_file_by_id, file_id) +if file_model is None: + return None +file_path = file_model.path +mime_type = file_model.meta.get("content_type") + +# ✓ 简洁 +if not (file_model := await asyncio.to_thread(Files.get_file_by_id, file_id)): + return None +file_path = file_model.path +mime_type = file_model.meta.get("content_type") +``` + +#### 技巧 3:错误恢复优先级 + +不同模块的错误处理优先级: + +```python +# 1. 功能检查失败 → 返回默认值,继续 +if not (f := Functions.get_function_by_id(filter_id)): + log.warning("Feature not available") + return (False, False) + +# 2. 数据库查询失败 → 记录警告,但不中断流程 +try: + chat = await asyncio.to_thread(Chats.get_chat_by_id_and_user_id, ...) +except Exception as e: + log.exception("Failed to fetch chat history") + chat = None + +# 3. 存储操作失败 → 使用 toast 通知用户,并记录错误 +try: + path = await asyncio.to_thread(Storage.upload_file, ...) +except Exception as e: + event_emitter.emit_toast("File upload failed", "error") + log.exception("Storage error") + raise +``` + +#### 技巧 4:并发操作优化 + +多个 API 调用时使用并发: + +```python +# ❌ 串行:慢 +chat = await asyncio.to_thread(Chats.get_chat_by_id_and_user_id, ...) +file = await asyncio.to_thread(Files.get_file_by_id, ...) +filter_info = Functions.get_function_by_id(...) + +# ✓ 并发:快 +chat, file = await asyncio.gather( + asyncio.to_thread(Chats.get_chat_by_id_and_user_id, ...), + asyncio.to_thread(Files.get_file_by_id, ...), +) +filter_info = Functions.get_function_by_id(...) # 这个本来就是同步的 +``` + +#### 技巧 5:日志级别选择 + +根据严重程度选择日志级别: + +```python +# 配置问题(管理员处理)→ warning +if not f.is_active: + log.warning(f"Filter '{filter_id}' disabled in dashboard") + +# 正常功能流程(调试用)→ debug +if filter_id not in model_filters: + log.debug(f"Filter not in model list: {filter_id}") + +# 数据不一致(可能的 bug)→ error +if len(messages_db) != len(messages_body): + log.error("Message count mismatch") + +# 检查点(流程追踪)→ info +if is_toggled_on: + log.info(f"Feature '{filter_id}' enabled by user") +``` + +#### 技巧 6:元数据字段扩展 + +`Files.meta` 中的 `data` 字段是自定义字段,可存储任意上下文: + +```python +file_item = await asyncio.to_thread( + Files.insert_new_file, + user_id, + FileForm( + id=id, + filename="output.json", + path=path, + meta={ + "name": "output.json", + "content_type": "application/json", + "size": len(contents), + "data": { # 自定义字段,存储业务逻辑上下文 + "model": model_name, + "chat_id": chat_id, + "message_id": message_id, + "timestamp": time.time(), + "processing_time": elapsed_ms, + "version": "v1.0", + } + } + ) +) + +# 后续查询时可以恢复这些信息 +if file_model.meta.get("data", {}).get("processing_time"): + log.debug(f"File processed in {file_model.meta['data']['processing_time']}ms") +``` + +#### 技巧 7:条件式功能启用 + +根据多个条件决定是否启用某项功能: + +```python +# 检查引用过滤是否可用 +companion_available, companion_enabled = is_feature_available( + "gemini_manifold_companion", + __metadata__ +) + +# 结合其他条件 +can_filter_citations = ( + companion_available and # 过滤器已安装 + companion_enabled and # 用户启用了该功能 + self.messages_db is not None and # 聊天历史可用 + len(messages_db) == len(messages) # 消息数量一致 +) + +if can_filter_citations: + # 执行引用过滤逻辑 + ... +else: + # 跳过该功能 + log.debug("Citation filtering unavailable") +``` + +--- + +### 实战代码完整示例 + +```python +import asyncio +from open_webui.models.chats import Chats +from open_webui.models.files import FileForm, Files +from open_webui.storage.provider import Storage +from open_webui.models.functions import Functions +from open_webui.utils.misc import pop_system_message + +class MyPipe: + async def pipe( + self, + body: dict, + __user__: dict, + __request__, + __event_emitter__, + __metadata__: dict, + ): + # 1. 提取系统消息 + system_message, messages = pop_system_message(body.get("messages", [])) + system_prompt = (system_message or {}).get("content") + + # 2. 并发获取聊天和过滤器信息 + chat_data, filter_status = await asyncio.gather( + asyncio.to_thread( + Chats.get_chat_by_id_and_user_id, + __metadata__.get("chat_id", ""), + __user__["id"] + ), + self._check_filter_available("companion_filter_id", __metadata__), + return_exceptions=True + ) + + # 3. 处理结果 + chat = chat_data if not isinstance(chat_data, Exception) else None + is_available, is_enabled = filter_status if not isinstance(filter_status, Exception) else (False, False) + + # 4. 条件式处理文件 + if chat and is_available: + for message in chat.chat.get("messages", []): + if files := message.get("files", []): + for file_ref in files: + file_model = await asyncio.to_thread( + Files.get_file_by_id, + file_ref.get("id") + ) + if file_model: + # 处理文件... + pass + + # 5. 返回结果 + async for chunk in self._generate_response(messages, system_prompt): + yield chunk + + @staticmethod + def _check_filter_available(filter_id: str, metadata: dict) -> tuple[bool, bool]: + f = Functions.get_function_by_id(filter_id) + if not f or not f.is_active: + return (False, False) + + is_enabled = filter_id in metadata.get("filter_ids", []) or f.is_global + return (True, is_enabled) +``` + +> 这些示例可直接集成进团队的插件开发指南或代码模板库,新插件可参考对应场景快速实现相关功能。 diff --git a/docs/examples/pipe_plugin_gemini_manifold_example_cn.md b/docs/examples/pipe_plugin_gemini_manifold_example_cn.md new file mode 100644 index 0000000..77400c9 --- /dev/null +++ b/docs/examples/pipe_plugin_gemini_manifold_example_cn.md @@ -0,0 +1,185 @@ +# `Gemini Manifold` 插件深度解析:高级 `Pipe` 插件开发指南 + +## 引言 + +`Gemini Manifold` (`gemini_manifold.py`) 不仅仅是一个连接到 Google AI 服务的 `Pipe` 插件,它更是一个集成了高级架构设计、复杂功能和最佳实践的“瑞士军刀”。它作为 Open WebUI 与 Google Gemini 及 Vertex AI 之间的桥梁,全面展示了如何构建一个生产级的、功能丰富的、高性能且用户体验良好的 `Pipe` 插件。 + +本文档是对该插件的**深度解析**,旨在帮助开发者通过剖析一个顶级的范例,掌握 Open WebUI 高级插件的开发思想与核心技术。 + +## Part 1: 复杂配置管理艺术 (`Valves` 系统) + +在复杂的应用场景中,配置管理需要同时兼顾安全性、灵活性和多用户隔离。`Gemini Manifold` 通过一个精巧的双层 `Valves` 系统完美地解决了这个问题。 + +**目标**: 解决多用户、多环境下的配置灵活性与安全性问题。 + +#### 1.1 双层结构:`Valves` 与 `UserValves` + +- **`Pipe.Valves` (管理员层)**: 定义了插件的全局默认配置,由管理员在 Open WebUI 的设置界面中配置。这些是插件运行的基础。 + + ```python + class Pipe: + class Valves(BaseModel): + GEMINI_API_KEY: str | None = Field(default=None) + USE_VERTEX_AI: bool = Field(default=False) + USER_MUST_PROVIDE_AUTH_CONFIG: bool = Field(default=False) + AUTH_WHITELIST: str | None = Field(default=None) + # ... 40+ 其他全局配置 + ``` + +- **`Pipe.UserValves` (用户层)**: 允许每个用户在每次请求时,通过请求体(`body`)传入自己的配置,用于临时覆盖管理员的默认设置。 + + ```python + class Pipe: + class UserValves(BaseModel): + GEMINI_API_KEY: str | None = Field(default=None) + USE_VERTEX_AI: bool | None | Literal[""] = Field(default=None) + # ... 其他用户可覆盖的配置 + ``` + +#### 1.2 核心合并逻辑 `_get_merged_valves` + +该函数在每次请求时被调用,负责将 `UserValves` 合并到 `Valves` 中,生成最终生效的配置。 + +#### 1.3 关键模式:强制认证与白名单 + +这是该配置系统中最精妙的部分,专为需要进行成本分摊和安全管控的团队环境设计。 + +- **场景**: 公司希望员工使用自己的 API Key,而不是共用一个高额度的 Key。 +- **实现**: + 1. 管理员在 `Valves` 中设置 `USER_MUST_PROVIDE_AUTH_CONFIG: True`。 + 2. 同时,可以将少数特权用户(如测试人员)的邮箱加入 `AUTH_WHITELIST`。 + 3. 在合并配置时,插件会检查当前用户是否在白名单内。 + - **非白名单用户**: **强制**使用其在 `UserValves` 中提供的 `GEMINI_API_KEY`,并**禁用**管理员配置的 `USE_VERTEX_AI`。如果用户没提供 Key,请求会失败。 + - **白名单用户**: 不受此限制,可以正常使用管理员配置的默认值。 + +这种设计通过代码强制执行了组织的策略,比单纯的文档约定要可靠得多。 + +## Part 2: 高性能文件上传与缓存 (`FilesAPIManager`) + +`FilesAPIManager` 是该插件的性能核心,它通过一套复杂但高效的机制,解决了文件上传中的重复、并发和性能三大难题。 + +**目标**: 避免重复上传,减少API调用,并在高并发下保持稳定。 + +#### 2.1 核心概念:内容寻址 (Content-Addressable Storage) + +- **原理**: 文件的唯一标识符**不是文件名**,而是其**文件内容的哈希值**。插件使用 `xxhash`(一种速度极快的非加密哈希算法)来计算文件哈希。 +- **优势**: 无论一个文件被上传多少次,只要内容不变,其哈希值就永远相同。这意味着插件只需为每个独一无二的文件内容执行一次上传操作。 + +#### 2.2 实现:三级缓存路径 (Hot/Warm/Cold Path) + +`FilesAPIManager` 的 `get_or_upload_file` 方法实现了精妙的三级缓存策略: + +1. **Hot Path (内存缓存)**: + - **实现**: 使用 `aiocache` 将“文件哈希 -> `types.File` 对象”的映射关系缓存在内存中。`types.File` 对象包含了 Google API 返回的文件 URI 和过期时间。 + - **流程**: 收到文件后,先查内存缓存。如果命中,直接返回 `types.File` 对象,无任何网络 I/O,速度最快。 + +2. **Warm Path (无状态恢复)**: + - **场景**: 内存缓存未命中(例如服务重启,内存被清空)。 + - **实现**: 插件根据文件哈希构造一个**确定性的文件名**(`deterministic_name = f"files/owui-v1-{content_hash}"`),然后直接调用 `client.aio.files.get()` 尝试从 Google API 获取该文件。 + - **优势**: 如果文件之前被上传过,这次 `get` 调用就会成功,并返回文件的状态信息。这样**仅用一次轻量的 `GET` 请求就恢复了文件状态,避免了昂贵的重新上传**。 + +3. **Cold Path (文件上传)**: + - **场景**: Hot 和 Warm 路径全部失败,说明这确实是一个新文件(或者在 Google 服务器上已过期)。 + - **实现**: 执行完整的文件上传流程,并将成功后的 `types.File` 对象存入内存缓存(Hot Path),以备后续使用。 + +#### 2.3 关键模式:并发上传安全 + +- **问题**: 如果 10 个用户同时上传同一个大文件,会发生什么? +- **解决方案**: 使用 `asyncio.Lock` 结合 "双重检查锁定" (Double-Checked Locking) 模式。 + 1. 为每一个**文件哈希**维护一个独立的 `asyncio.Lock`。 + 2. 当一个任务进入 `get_or_upload_file` 时,它会先尝试获取该文件哈希对应的锁。 + 3. **第一个任务**会成功获取锁,并继续执行 Warm/Cold Path 逻辑。 + 4. **后续 9个任务**会被阻塞在 `async with lock:` 处,异步等待。 + 5. 第一个任务完成后,它会将结果写入缓存并释放锁。 + 6. 后续 9 个任务依次获取到锁,但它们在获取锁之后会**再次检查缓存**。此时,它们会发现缓存中已有数据,于是直接从缓存返回,不再执行任何网络操作。 + +这个模式优雅地解决了并发上传的资源浪费和竞态问题。 + +## Part 3: 异步并发与流程编排 + +为了在处理复杂请求(例如,包含多个文件的消息)时保持前端的流畅响应,插件大量使用了 `asyncio` 的高级特性。 + +**目标**: 最大化 I/O 效率,缩短用户的等待时间。 + +#### 3.1 `asyncio.gather`:并发处理所有消息 + +`GeminiContentBuilder.build_contents` 方法是并发处理的典范。它没有按顺序循环处理每条消息,而是: +1. 为对话历史中的**每一条消息**创建一个 `_process_message_turn` 协程任务。 +2. 将所有任务放入一个列表。 +3. 使用 `await asyncio.gather(*tasks)` **同时启动并等待所有任务完成**。 + +这意味着,如果一条消息包含 5 个待上传的文件,另一条包含 3 个,这 8 个文件的上传和处理是**并行进行**的,总耗时取决于最慢的那个文件,而不是所有文件耗时的总和。 + +#### 3.2 `asyncio.Queue`:解耦的进度汇报 + +`UploadStatusManager` 展示了如何通过生产者-消费者模型实现优雅的进度汇报。 + +- **生产者 (上传任务)**: + - 当一个 `_process_message_turn` 任务确定需要上传文件时,它会向一个共享的 `asyncio.Queue` 中 `put` 一个 `('REGISTER_UPLOAD',)` 元组。 + - 上传完成后,它会 `put` 一个 `('COMPLETE_UPLOAD',)` 元组。 + +- **消费者 (`UploadStatusManager`)**: + - 它在一个独立的后台任务 (`asyncio.create_task`) 中运行,循环地从队列中 `get` 消息。 + - 每当收到 `REGISTER_UPLOAD`,它就将预期总数加一。 + - 每当收到 `COMPLETE_UPLOAD`,它就将完成数加一。 + - 每次计数变化后,它会重新计算进度(例如,“正在上传 3/8…”),并通过 `EventEmitter` 发送给前端。 + +这种设计将“执行业务逻辑”(上传)和“汇报进度”两个职责完全解耦。上传任务只管“生产”状态事件,进度管理器只管“消费”事件并更新 UI,代码非常清晰。 + +## Part 4: 响应处理与前端兼容性 + +**目标**: 提供流畅、信息丰富且绝对不会“搞乱”前端页面的用户体验。 + +#### 4.1 统一响应处理器 `_unified_response_processor` + +- **问题**: Google API 同时支持流式(streaming)和非流式(non-streaming)两种响应模式,如果为两种模式都写一套处理逻辑,代码会很冗余。 +- **解决方案**: `pipe` 方法的核心返回部分,无论是哪种模式,最终都会调用 `_unified_response_processor`。 + - 对于**流式**响应,直接将 API 返回的异步生成器传入。 + - 对于**非流式**响应,它会先将单个响应对象包装成一个只含一项的简单异步生成器。 +- **效果**: `_unified_response_processor` 内部只需用一套 `async for` 循环逻辑即可处理所有情况,极大地简化了代码。 + +#### 4.2 后置元数据处理 `_do_post_processing` + +- **问题**: 像 Token 使用量 (`usage`)、搜索引用来源 (`sources`) 等信息,只有在整个响应完全生成后才能获得。如果和内容混在一起发送,会影响流式输出的体验。 +- **解决方案**: `_unified_response_processor` 在主内容流(`choices`)完全结束后,会进入后置处理阶段。它会调用 `_do_post_processing` 来提取这些元数据,并通过 `EventEmitter` 的 `emit_completion` 或 `emit_usage` 方法,作为**独立的、附加的事件**发送给前端。 + +#### 4.3 前端兼容性技巧 `_disable_special_tags` + +- **问题**: LLM 很可能在思考过程中生成 `...` 或 `
...
` 这样的 XML/HTML 风格标签。如果这些文本原样发送到前端,浏览器会尝试将其解析为 HTML 元素,导致页面布局错乱或内容丢失。 +- **解决方案**: 一个极其巧妙的技巧——在这些特殊标签的开头注入一个**零宽度空格(Zero-Width Space, ZWS, `\u200b`)**。 + - 例如,将 `` 替换为 `<​think>` (后者尖括号后多一个 ZWS)。 + - 这个改动对人类用户完全不可见,但对于浏览器的 HTML 解析器来说,`<​think>` 不再是一个合法的标签名,因此它会被当作纯文本处理,从而保证了前端渲染的绝对安全。 + - 当需要将这段历史作为上下文发回给模型时,再通过 `_enable_special_tags` 将这些 ZWS 移除,恢复原始文本。 + +## Part 5: 与 Open WebUI 和 Google API 的深度集成 + +`Gemini Manifold` 充分利用了 Open WebUI 的框架特性和 Google API 的高级功能。 + +#### 5.1 `pipes` 方法与模型缓存 + +- `pipes()` 方法负责向 Open WebUI 注册所有可用的 Gemini 模型。 +- 它使用了 `@cached` 装饰器,这意味着对 Google API 的 `list_models` 调用结果会被缓存。只要插件配置(如 API Key, 白名单等)不变,后续的 `pipes` 调用会直接从缓存返回,避免了不必要的网络请求。 + +#### 5.2 多源内容处理 (`_genai_parts_from_text`) + +`GeminiContentBuilder` 的核心能力之一是从一段文本中智能地解析出多种类型的内容。 +- 它使用正则表达式一次性地从用户输入中匹配出 Markdown 图片链接 (`![]()`) 和 YouTube 视频链接。 +- 对于匹配到的每一种 URI,它都会分派给统一的 `_genai_part_from_uri` 方法处理。 +- `_genai_part_from_uri` 内部进一步区分 URI 类型(是本地文件、data URI 还是 YouTube 链接),并调用相应的处理器(例如,从数据库读取文件、解码 base64、或解析 YouTube URL 参数)。 + +#### 5.3 与 Open WebUI 数据库交互 + +为了处理用户上传的文件,插件需要访问 Open WebUI 的内部数据库。 +- 它通过 `from open_webui.models.files import Files` 导入 `Files` 模型。 +- 在 `_get_file_data` 方法中,它调用 `Files.get_file_by_id(file_id)` 来获取文件的元数据(如存储路径、MIME 类型)。 +- **关键点**: 由于数据库 API 是同步阻塞的,插件明智地使用了 `await asyncio.to_thread(Files.get_file_by_id, file_id)`,将同步调用放入一个独立的线程中执行,从而避免了对主异步事件循环的阻塞。 + +## 总结 + +`Gemini Manifold` 是一个教科书级别的 Open WebUI `Pipe` 插件。它展示了超越简单 API 调用的高级插件应该具备的特质: +- **架构思维**: 通过职责分离的类和清晰的流程编排来管理复杂性。 +- **性能意识**: 在所有 I/O 密集型操作中,都将性能优化(缓存、并发)放在首位。 +- **用户为本**: 通过丰富的、非阻塞的实时反馈,极大地提升了用户体验。 +- **健壮与安全**: 通过精巧的技巧和周密的错误处理,确保插件在各种异常情况下都能稳定运行。 + +对于任何希望超越基础,构建企业级、高性能 Open WebUI 插件的开发者而言,`Gemini Manifold` 的每一行代码都值得细细品味。 \ No newline at end of file diff --git a/docs/features/plugin/development/_category_.json b/docs/features/plugin/development/_category_.json new file mode 100644 index 0000000..404bd0f --- /dev/null +++ b/docs/features/plugin/development/_category_.json @@ -0,0 +1,7 @@ +{ + "label": "Development", + "position": 800, + "link": { + "type": "generated-index" + } +} diff --git a/docs/features/plugin/development/events.mdx b/docs/features/plugin/development/events.mdx new file mode 100644 index 0000000..09f66ab --- /dev/null +++ b/docs/features/plugin/development/events.mdx @@ -0,0 +1,424 @@ +--- +sidebar_position: 3 +title: "Events" +--- + +# 🔔 Events: Using `__event_emitter__` and `__event_call__` in Open WebUI + +Open WebUI's plugin architecture is not just about processing input and producing output—**it's about real-time, interactive communication with the UI and users**. To make your Tools, Functions, and Pipes more dynamic, Open WebUI provides a built-in event system via the `__event_emitter__` and `__event_call__` helpers. + +This guide explains **what events are**, **how you can trigger them** from your code, and **the full catalog of event types** you can use (including much more than just `"input"`). + +--- + +## 🌊 What Are Events? + +**Events** are real-time notifications or interactive requests sent from your backend code (Tool, or Function) to the web UI. They allow you to update the chat, display notifications, request confirmation, run UI flows, and more. + +- Events are sent using the `__event_emitter__` helper for one-way updates, or `__event_call__` when you need user input or a response (e.g., confirmation, input, etc.). + +**Metaphor:** +Think of Events like push notifications and modal dialogs that your plugin can trigger, making the chat experience richer and more interactive. + +--- + +## 🧰 Basic Usage + +### Sending an Event + +You can trigger an event anywhere inside your Tool, or Function by calling: + +```python +await __event_emitter__( + { + "type": "status", # See the event types list below + "data": { + "description": "Processing started!", + "done": False, + "hidden": False, + }, + } +) +``` + +You **do not** need to manually add fields like `chat_id` or `message_id`—these are handled automatically by Open WebUI. + +### Interactive Events + +When you need to pause execution until the user responds (e.g., confirm/cancel dialogs, code execution, or input), use `__event_call__`: + +```python +result = await __event_call__( + { + "type": "input", # Or "confirmation", "execute" + "data": { + "title": "Please enter your password", + "message": "Password is required for this action", + "placeholder": "Your password here", + }, + } +) + +# result will contain the user's input value +``` + +--- + +## 📜 Event Payload Structure + +When you emit or call an event, the basic structure is: + +```json +{ + "type": "event_type", // See full list below + "data": { ... } // Event-specific payload +} +``` + +Most of the time, you only set `"type"` and `"data"`. Open WebUI fills in the routing automatically. + +--- + +## 🗂 Full List of Event Types + +Below is a comprehensive table of **all supported `type` values** for events, along with their intended effect and data structure. (This is based on up-to-date analysis of Open WebUI event handling logic.) + +| type | When to use | Data payload structure (examples) | +| -------------------------------------------- | ---------------------------------------------------- | ---------------------------------------------------------------------------------------------------- | +| `status` | Show a status update/history for a message | `{description: ..., done: bool, hidden: bool}` | +| `chat:completion` | Provide a chat completion result | (Custom, see Open WebUI internals) | +| `chat:message:delta`,
`message` | Append content to the current message | `{content: "text to append"}` | +| `chat:message`,
`replace` | Replace current message content completely | `{content: "replacement text"}` | +| `chat:message:files`,
`files` | Set or overwrite message files (for uploads, output) | `{files: [...]}` | +| `chat:title` | Set (or update) the chat conversation title | Topic string OR `{title: ...}` | +| `chat:tags` | Update the set of tags for a chat | Tag array or object | +| `source`,
`citation` | Add a source/citation, or code execution result | For code: See [below.](/features/plugin/development/events#source-or-citation-and-code-execution) | +| `notification` | Show a notification ("toast") in the UI | `{type: "info" or "success" or "error" or "warning", content: "..."}` | +| `confirmation`
(needs `__event_call__`) | Ask for confirmation (OK/Cancel dialog) | `{title: "...", message: "..."}` | +| `input`
(needs `__event_call__`) | Request simple user input ("input box" dialog) | `{title: "...", message: "...", placeholder: "...", value: ...}` | +| `execute`
(needs `__event_call__`) | Request user-side code execution and return result | `{code: "...javascript code..."}` | | + +**Other/Advanced types:** + +- You can define your own types and handle them at the UI layer (or use upcoming event-extension mechanisms). + +### ❗ Details on Specific Event Types + +### `status` + +Show a status/progress update in the UI: + +```python +await __event_emitter__( + { + "type": "status", + "data": { + "description": "Step 1/3: Fetching data...", + "done": False, + "hidden": False, + }, + } +) +``` + +--- + +### `chat:message:delta` or `message` + +**Streaming output** (append text): + +```python +await __event_emitter__( + { + "type": "chat:message:delta", # or simply "message" + "data": { + "content": "Partial text, " + }, + } +) + +# Later, as you generate more: +await __event_emitter__( + { + "type": "chat:message:delta", + "data": { + "content": "next chunk of response." + }, + } +) +``` + +--- + +### `chat:message` or `replace` + +**Set (or replace) the entire message content:** + +```python +await __event_emitter__( + { + "type": "chat:message", # or "replace" + "data": { + "content": "Final, complete response." + }, + } +) +``` + +--- + +### `files` or `chat:message:files` + +**Attach or update files:** + +```python +await __event_emitter__( + { + "type": "files", # or "chat:message:files" + "data": { + "files": [ + # Open WebUI File Objects + ] + }, + } +) +``` + +--- + +### `chat:title` + +**Update the chat's title:** + +```python +await __event_emitter__( + { + "type": "chat:title", + "data": { + "title": "Market Analysis Bot Session" + }, + } +) +``` + +--- + +### `chat:tags` + +**Update the chat's tags:** + +```python +await __event_emitter__( + { + "type": "chat:tags", + "data": { + "tags": ["finance", "AI", "daily-report"] + }, + } +) +``` + +--- + +### `source` or `citation` (and code execution) + +**Add a reference/citation:** + +```python +await __event_emitter__( + { + "type": "source", # or "citation" + "data": { + # Open WebUI Source (Citation) Object + } + } +) +``` + +**For code execution (track execution state):** + +```python +await __event_emitter__( + { + "type": "source", + "data": { + # Open WebUI Code Source (Citation) Object + } + } +) +``` + +--- + +### `notification` + +**Show a toast notification:** + +```python +await __event_emitter__( + { + "type": "notification", + "data": { + "type": "info", # "success", "warning", "error" + "content": "The operation completed successfully!" + } + } +) +``` + +--- + +### `confirmation` (**requires** `__event_call__`) + +**Show a confirm dialog and get user response:** + +```python +result = await __event_call__( + { + "type": "confirmation", + "data": { + "title": "Are you sure?", + "message": "Do you really want to proceed?" + } + } +) + +if result: # or check result contents + await __event_emitter__({ + "type": "notification", + "data": {"type": "success", "content": "User confirmed operation."} + }) +else: + await __event_emitter__({ + "type": "notification", + "data": {"type": "warning", "content": "User cancelled."} + }) +``` + +--- + +### `input` (**requires** `__event_call__`) + +**Prompt user for text input:** + +```python +result = await __event_call__( + { + "type": "input", + "data": { + "title": "Enter your name", + "message": "We need your name to proceed.", + "placeholder": "Your full name" + } + } +) + +user_input = result +await __event_emitter__( + { + "type": "notification", + "data": {"type": "info", "content": f"You entered: {user_input}"} + } +) +``` + +--- + +### `execute` (**requires** `__event_call__`) + +**Run code dynamically on the user's side:** + +```python +result = await __event_call__( + { + "type": "execute", + "data": { + "code": "print(40 + 2);", + } + } +) + +await __event_emitter__( + { + "type": "notification", + "data": { + "type": "info", + "content": f"Code executed, result: {result}" + } + } +) +``` + +--- + +## 🏗️ When & Where to Use Events + +- **From any Tool, or Function** in Open WebUI. +- To **stream responses**, show progress, request user data, update the UI, or display supplementary info/files. +- `await __event_emitter__` is for one-way messages (fire and forget). +- `await __event_call__` is for when you need a response from the user (input, execute, confirmation). + +--- + +## 💡 Tips & Advanced Notes + +- **Multiple types per message:** You can emit several events of different types for one message—for example, show `status` updates, then stream with `chat:message:delta`, then complete with a `chat:message`. +- **Custom event types:** While the above list is the standard, you may use your own types and detect/handle them in custom UI code. +- **Extensibility:** The event system is designed to evolve—always check the [Open WebUI documentation](https://github.com/open-webui/open-webui) for the most current list and advanced usage. + +--- + +## 🧐 FAQ + +### Q: How do I trigger a notification for the user? +Use `notification` type: +```python +await __event_emitter__({ + "type": "notification", + "data": {"type": "success", "content": "Task complete"} +}) +``` + +### Q: How do I prompt the user for input and get their answer? +Use: +```python +response = await __event_call__({ + "type": "input", + "data": { + "title": "What's your name?", + "message": "Please enter your preferred name:", + "placeholder": "Name" + } +}) + +# response will be: {"value": "user's answer"} +``` + +### Q: What event types are available for `__event_call__`? +- `"input"`: Input box dialog +- `"confirmation"`: Yes/No, OK/Cancel dialog +- `"execute"`: Run provided code on client and return result + +### Q: Can I update files attached to a message? +Yes—use the `"files"` or `"chat:message:files"` event type with a `{files: [...]}` payload. + +### Q: Can I update the conversation title or tags? +Absolutely: use `"chat:title"` or `"chat:tags"` accordingly. + +### Q: Can I stream responses (partial tokens) to the user? +Yes—emit `"chat:message:delta"` events in a loop, then finish with `"chat:message"`. + +--- + +## 📝 Conclusion + +**Events** give you real-time, interactive superpowers inside Open WebUI. They let your code update content, trigger notifications, request user input, stream results, handle code, and much more—seamlessly plugging your backend intelligence into the chat UI. + +- Use `__event_emitter__` for one-way status/content updates. +- Use `__event_call__` for interactions that require user follow-up (input, confirmation, execution). + +Refer to this document for common event types and structures, and explore Open WebUI source code or docs for breaking updates or custom events! + +--- + +**Happy event-driven coding in Open WebUI! 🚀** \ No newline at end of file diff --git a/docs/features/plugin/development/reserved-args.mdx b/docs/features/plugin/development/reserved-args.mdx new file mode 100644 index 0000000..1f389e7 --- /dev/null +++ b/docs/features/plugin/development/reserved-args.mdx @@ -0,0 +1,340 @@ +--- +sidebar_position: 999 +title: "Reserved Arguments" +--- + +:::warning + +This tutorial is a community contribution and is not supported by the Open WebUI team. It serves only as a demonstration on how to customize Open WebUI for your specific use case. Want to contribute? Check out the contributing tutorial. + +::: + +# 🪄 Special Arguments + +When developping your own `Tools`, `Functions` (`Filters`, `Pipes` or `Actions`), `Pipelines` etc, you can use special arguments explore the full spectrum of what Open-WebUI has to offer. + +This page aims to detail the type and structure of each special argument as well as provide an example. + +### `body` + +A `dict` usually destined to go almost directly to the model. Although it is not strictly a special argument, it is included here for easier reference and because it contains itself some special arguments. + +
+Example + +```json + +{ + "stream": true, + "model": "my-cool-model", + # lowercase string with - separated words: this is the ID of the model + "messages": [ + { + "role": "user", + "content": [ + { + "type": "text", + "text": "What is in this picture?" + }, + { + "type": "image_url", + "image_url": { + "url": "data:image/png;base64,iVBORw0KGgoAAAANSUhEUgAAAdYAAAGcCAYAAABk2YF[REDACTED]" + # Images are passed as base64 encoded data + } + } + ] + }, + { + "role": "assistant", + "content": "The image appears to be [REDACTED]" + }, + ], + "features": { + "image_generation": false, + "code_interpreter": false, + "web_search": false + }, + "stream_options": { + "include_usage": true + }, + "metadata": "[The exact same dict as __metadata__]", + "files": "[The exact same list as __files__]" +} + +``` + +
+ +### `__user__` + +A `dict` with user information. + +Note that if the `UserValves` class is defined, its instance has to be accessed via `__user__["valves"]`. Otherwise, the `valves` keyvalue is missing entirely from `__user__`. + +
+Example + +```json +{ + "id": "xxxxxxxx-xxxx-xxxx-xxxx-xxxxxxxxxxxx", + "email": "cheesy_dude@openwebui.com", + "name": "Patrick", + "role": "user", + # role can be either `user` or `admin` + "valves": "[the UserValve instance]" +} +``` + +
+ +### `__metadata__` + +A `dict` with wide ranging information about the chat, model, files, etc. + +
+Example + +```json +{ + "user_id": "xxxxxxxx-xxxx-xxxx-xxxx-xxxxxxxxxxxx", + "chat_id": "xxxxxxxx-xxxx-xxxx-xxxx-xxxxxxxxxxxx", + "message_id": "xxxxxxxx-xxxx-xxxx-xxxx-xxxxxxxxxxxx", + "session_id": "xxxxxxxxxxxxxxxxxxxx", + "tool_ids": null, + # tool_ids is a list of str. + "tool_servers": [], + "files": "[Same as in body['files']]", + # If no files are given, the files key exists in __metadata__ and its value is [] + "features": { + "image_generation": false, + "code_interpreter": false, + "web_search": false + }, + "variables": { + "{{USER_NAME}}": "cheesy_username", + "{{USER_LOCATION}}": "Unknown", + "{{CURRENT_DATETIME}}": "2025-02-02 XX:XX:XX", + "{{CURRENT_DATE}}": "2025-02-02", + "{{CURRENT_TIME}}": "XX:XX:XX", + "{{CURRENT_WEEKDAY}}": "Monday", + "{{CURRENT_TIMEZONE}}": "Europe/Berlin", + "{{USER_LANGUAGE}}": "en-US" + }, + "model": "[The exact same dict as __model__]", + "direct": false, + "function_calling": "native", + "type": "user_response", + "interface": "open-webui" +} + +``` + +
+ +### `__model__` + +A `dict` with information about the model. + +
+Example + +```json +{ + "id": "my-cool-model", + "name": "My Cool Model", + "object": "model", + "created": 1746000000, + "owned_by": "openai", + # either openai or ollama + "info": { + "id": "my-cool-model", + "user_id": "xxxxxxxx-xxxx-xxxx-xxxx-xxxxxxxxxxxx", + "base_model_id": "gpt-4o", + # this is the name of model that the model endpoint serves + "name": "My Cool Model", + "params": { + "system": "You are my best assistant. You answer [REDACTED]", + "function_calling": "native" + # custom options appear here, for example "Top K" + }, + "meta": { + "profile_image_url": "/static/favicon.png", + "description": "Description of my-cool-model", + "capabilities": { + "vision": true, + "usage": true, + "citations": true + }, + "position": 17, + "tags": [ + { + "name": "for_friends" + }, + { + "name": "vision_enabled" + } + ], + "suggestion_prompts": null + }, + "access_control": { + "read": { + "group_ids": [], + "user_ids": [] + }, + "write": { + "group_ids": [], + "user_ids": [] + } + }, + "is_active": true, + "updated_at": 1740000000, + "created_at": 1740000000 + }, + "preset": true, + "actions": [], + "tags": [ + { + "name": "for_friends" + }, + { + "name": "vision_enabled" + } + ] +} + +``` + +
+ +### `__messages__` + +A `list` of the previous messages. + +See the `body["messages"]` value above. + +### `__chat_id__` + +The `str` of the `chat_id`. + +See the `__metadata__["chat_id"]` value above. + +### `__session_id__` + +The `str` of the `session_id`. + +See the `__metadata__["session_id"]` value above. + +### `__message_id__` + +The `str` of the `message_id`. + +See the `__metadata__["message_id"]` value above. + +### `__event_emitter__` + +A `Callable` used to display event information to the user. + +### `__event_call__` + +A `Callable` used for `Actions`. + +### `__files__` + +A `list` of files sent via the chat. Note that images are not considered files and are sent directly to the model as part of the `body["messages"]` list. + +The actual binary of the file is not part of the arguments for performance reason, but the file remain nonetheless accessible by its path if needed. For example using `docker` the python syntax for the path could be: + +```python +from pathlib import Path + +the_file = Path(f"/app/backend/data/uploads/{__files__[0]["files"]["id"]}_{__files__[0]["files"]["filename"]}") +assert the_file.exists() +``` + +Note that the same files dict can also be accessed via `__metadata__["files"]` (and its value is `[]` if no files are sent) or via `body["files"]` (but the `files` key is missing entirely from `body` if no files are sent). + +
+Example + +```json + +[ + { + "type": "file", + "file": { + "id": "xxxxxxxx-xxxx-xxxx-xxxx-xxxxxxxxxxxx", + "filename": "Napoleon - Wikipedia.pdf", + "user_id": "xxxxxxxx-xxxx-xxxx-xxxx-xxxxxxxxxxxx", + "hash": "xxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxx", + "data": { + "content": "Napoleon - Wikipedia\n\n\nNapoleon I\n\nThe Emperor Napoleon in His Study at the\nTuileries, 1812\n\nEmperor of the French\n\n1st reign 18 May 1804 – 6 April 1814\n\nSuccessor Louis XVIII[a]\n\n2nd reign 20 March 1815 – 22 June 1815\n\nSuccessor Louis XVIII[a]\n\nFirst Consul of the French Republic\n\nIn office\n13 December 1799 – 18 May 1804\n\nBorn Napoleone Buonaparte\n15 August 1769\nAjaccio, Corsica, Kingdom of\nFrance\n\nDied 5 May 1821 (aged 51)\nLongwood, Saint Helena\n\nBurial 15 December 1840\nLes Invalides, Paris\n\nNapoleon\nNapoleon Bonaparte[b] (born Napoleone\nBuonaparte;[1][c] 15 August 1769 – 5 May 1821), later\nknown [REDACTED]", + # The content value is the output of the document parser, the above example is with Tika as a document parser + }, + "meta": { + "name": "Napoleon - Wikipedia.pdf", + "content_type": "application/pdf", + "size": 10486578, + # in bytes, here about 10Mb + "data": {}, + "collection_name": "file-96xxxxxx-xxxx-xxxx-xxxx-xxxxxxxxxxxx" + # always begins by 'file' + }, + "created_at": 1740000000, + "updated_at": 1740000000 + }, + "id": "xxxxxxxx-xxxx-xxxx-xxxx-xxxxxxxxxxxx", + "url": "/api/v1/files/xxxxxxxx-xxxx-xxxx-xxxx-xxxxxxxxxxxx", + "name": "Napoleon - Wikipedia.pdf", + "collection_name": "file-96xxxxxx-xxxx-xxxx-xxxx-xxxxxxxxxxxx" + "status": "uploaded", + "size": 10486578, + "error": "", + "itemId": "xxxxxxxx-xxxx-xxxx-xxxx-xxxxxxxxxxxx" + # itemId is not the same as file["id"] + } +] + +``` + +
+ +### `__request__` + +An instance of `fastapi.Request`. You can read more in the [migration page](/docs/features/plugin/migration/index.mdx) or in [fastapi's documentation](https://fastapi.tiangolo.com/reference/request/). + +### `__task__` + +A `str` for the type of task. Its value is just a shorthand for `__metadata__["task"]` if present, otherwise `None`. + +
+Possible values + +```json + +[ + "title_generation", + "tags_generation", + "emoji_generation", + "query_generation", + "image_prompt_generation", + "autocomplete_generation", + "function_calling", + "moa_response_generation" +] +``` + +
+ +### `__task_body__` + +A `dict` containing the `body` needed to accomplish a given `__task__`. Its value is just a shorthand for `__metadata__["task_body"]` if present, otherwise `None`. + +Its structure is the same as `body` above, with modifications like using the appropriate model and system message etc. + +### `__tools__` + +A `list` of `ToolUserModel` instances. + +For details the attributes of `ToolUserModel` instances, the code can be found in [tools.py](https://github.com/open-webui/open-webui/blob/main/backend/open_webui/models/tools.py). + diff --git a/docs/features/plugin/development/valves.mdx b/docs/features/plugin/development/valves.mdx new file mode 100644 index 0000000..b91ff07 --- /dev/null +++ b/docs/features/plugin/development/valves.mdx @@ -0,0 +1,77 @@ +--- +sidebar_position: 3 +title: "Valves" +--- + +## Valves + +Valves and UserValves are used to allow users to provide dynamic details such as an API key or a configuration option. These will create a fillable field or a bool switch in the GUI menu for the given function. They are always optional, but HIGHLY encouraged. + +Hence, Valves and UserValves class can be defined in either a `Pipe`, `Pipeline`, `Filter` or `Tools` class. + +Valves are configurable by admins alone via the Tools or Functions menus. On the other hand UserValves are configurable by any users directly from a chat session. + +
+Commented example + +```python + +from pydantic import BaseModel, Field +from typing import Literal + +# Define and Valves +class Filter: + # Notice the current indentation: Valves and UserValves must be declared as + # attributes of a Tools, Filter or Pipe class. Here we take the + # example of a Filter. + class Valves(BaseModel): + # Valves and UserValves inherit from pydantic's BaseModel. This + # enables complex use cases like model validators etc. + test_valve: int = Field( # Notice the type hint: it is used to + # choose the kind of UI element to show the user (buttons, + # texts, etc). + default=4, + description="A valve controlling a numberical value" + # required=False, # you can enforce fields using True + ) + # To give the user the choice between multiple strings, you can use Literal from typing: + choice_option: Literal["choiceA", "choiceB"] = Field( + default="choiceA", + description="An example of a multi choice valve", + ) + priority: int = Field( + default=0, + description="Priority level for the filter operations. Lower values are passed through first" + ) + # The priority field is optional but if present will be used to + # order the Filters. + pass + # Note that this 'pass' helps for parsing and is recommended. + + # UserValves are defined the same way. + class UserValves(BaseModel): + test_user_valve: bool = Field( + default=False, description="A user valve controlling a True/False (on/off) switch" + ) + pass + + def __init__(self): + self.valves = self.Valves() + # Because they are set by the admin, they are accessible directly + # upon code execution. + pass + + # The inlet method is only used for Filter but the __user__ handling is the same + def inlet(self, body: dict, __user__: dict): + # Because UserValves are defined per user they are only available + # on use. + # Note that although __user__ is a dict, __user__["valves"] is a + # UserValves object. Hence you can access values like that: + test_user_valve = __user__["valves"].test_user_valve + # Or: + test_user_valve = dict(__user__["valves"])["test_user_valve"] + # But this will return the default value instead of the actual value: + # test_user_valve = __user__["valves"]["test_user_valve"] # Do not do that! +``` + +
diff --git a/docs/features/plugin/functions/action.mdx b/docs/features/plugin/functions/action.mdx new file mode 100644 index 0000000..8057760 --- /dev/null +++ b/docs/features/plugin/functions/action.mdx @@ -0,0 +1,316 @@ +--- +sidebar_position: 3 +title: "Action Function" +--- + +Action functions allow you to write custom buttons that appear in the message toolbar for end users to interact with. This feature enables more interactive messaging, allowing users to grant permission before a task is performed, generate visualizations of structured data, download an audio snippet of chats, and many other use cases. + +Actions are admin-managed functions that extend the chat interface with custom interactive capabilities. When a message is generated by a model that has actions configured, these actions appear as clickable buttons beneath the message. + +A scaffold of Action code can be found [in the community section](https://openwebui.com/f/hub/custom_action/). For more Action Function examples built by the community, visit [https://openwebui.com/functions](https://openwebui.com/functions). + +An example of a graph visualization Action can be seen in the video below. + +
+ + Graph Visualization Action + +
+ +## Action Function Architecture + +Actions are Python-based functions that integrate directly into the chat message toolbar. They execute server-side and can interact with users through real-time events, modify message content, and access the full Open WebUI context. + +### Function Structure + +Actions follow a specific class structure with an `action` method as the main entry point: + +```python +class Action: + def __init__(self): + self.valves = self.Valves() + + class Valves(BaseModel): + # Configuration parameters + parameter_name: str = "default_value" + + async def action(self, body: dict, __user__=None, __event_emitter__=None, __event_call__=None): + # Action implementation + return {"content": "Modified message content"} +``` + +### Action Method Parameters + +The `action` method receives several parameters that provide access to the execution context: + +- **`body`** - Dictionary containing the message data and context +- **`__user__`** - Current user object with permissions and settings +- **`__event_emitter__`** - Function to send real-time updates to the frontend +- **`__event_call__`** - Function for bidirectional communication (confirmations, inputs) +- **`__model__`** - Model information that triggered the action +- **`__request__`** - FastAPI request object for accessing headers, etc. +- **`__id__`** - Action ID (useful for multi-action functions) + +## Event System Integration + +Actions can utilize Open WebUI's real-time event system for interactive experiences: + +### Event Emitter (`__event_emitter__`) + +**For more information about Events and Event emitters, see [Events and Event Emitters](https://docs.openwebui.com/features/plugin/events/).** + +Send real-time updates to the frontend during action execution: + +```python +async def action(self, body: dict, __event_emitter__=None): + # Send status updates + await __event_emitter__({ + "type": "status", + "data": {"description": "Processing request..."} + }) + + # Send notifications + await __event_emitter__({ + "type": "notification", + "data": {"type": "info", "content": "Action completed successfully"} + }) +``` + +### Event Call (`__event_call__`) +Request user input or confirmation during execution: + +```python +async def action(self, body: dict, __event_call__=None): + # Request user confirmation + response = await __event_call__({ + "type": "confirmation", + "data": { + "title": "Confirm Action", + "message": "Are you sure you want to proceed?" + } + }) + + # Request user input + user_input = await __event_call__({ + "type": "input", + "data": { + "title": "Enter Value", + "message": "Please provide additional information:", + "placeholder": "Type your input here..." + } + }) +``` + +## Action Types and Configurations + +### Single Actions +Standard actions with one `action` method: + +```python +async def action(self, body: dict, **kwargs): + # Single action implementation + return {"content": "Action result"} +``` + +### Multi-Actions +Functions can define multiple sub-actions through an `actions` array: + +```python +actions = [ + { + "id": "summarize", + "name": "Summarize", + "icon_url": "data:image/svg+xml;base64,..." + }, + { + "id": "translate", + "name": "Translate", + "icon_url": "data:image/svg+xml;base64,..." + } +] + +async def action(self, body: dict, __id__=None, **kwargs): + if __id__ == "summarize": + # Summarization logic + return {"content": "Summary: ..."} + elif __id__ == "translate": + # Translation logic + return {"content": "Translation: ..."} +``` + +### Global vs Model-Specific Actions +- **Global Actions** - Turn on the toggle in the Action's settings, to globally enable it for all users and all models. +- **Model-Specific Actions** - Configure enabled actions for specific models in the model settings. + +## Advanced Capabilities + +### Background Task Execution +For long-running operations, actions can integrate with the task system: + +```python +async def action(self, body: dict, __event_emitter__=None): + # Start long-running process + await __event_emitter__({ + "type": "status", + "data": {"description": "Starting background processing..."} + }) + + # Perform time-consuming operation + result = await some_long_running_function() + + return {"content": f"Processing completed: {result}"} +``` + +### File and Media Handling +Actions can work with uploaded files and generate new media: + +```python +async def action(self, body: dict): + message = body + + # Access uploaded files + if message.get("files"): + for file in message["files"]: + # Process file based on type + if file["type"] == "image": + # Image processing logic + pass + + # Return new files + return { + "content": "Analysis complete", + "files": [ + { + "type": "image", + "url": "generated_chart.png", + "name": "Analysis Chart" + } + ] + } +``` + +### User Context and Permissions +Actions can access user information and respect permissions: + +```python +async def action(self, body: dict, __user__=None): + if __user__["role"] != "admin": + return {"content": "This action requires admin privileges"} + + user_name = __user__["name"] + return {"content": f"Hello {user_name}, admin action completed"} +``` + +## Example - Specifying Action Frontmatter + +Each Action function can include a docstring at the top to define metadata for the button. This helps customize the display and behavior of your Action in Open WebUI. + +Example of supported frontmatter fields: +- `title`: Display name of the Action. +- `author`: Name of the creator. +- `version`: Version number of the Action. +- `required_open_webui_version`: Minimum compatible version of Open WebUI. +- `icon_url (optional)`: URL or Base64 string for a custom icon. + +**Base64-Encoded Example:** + +
+Example + +```python +""" +title: Enhanced Message Processor +author: @admin +version: 1.2.0 +required_open_webui_version: 0.5.0 +icon_url: data:image/svg+xml;base64,PHN2ZyB3aWR0aD0iMjQiIGhlaWdodD0iMjQiIHZpZXdCb3g9IjAgMCAyNCAyNCIgZmlsbD0ibm9uZSIgeG1sbnM9Imh0dHA6Ly93d3cudzMub3JnLzIwMDAvc3ZnIj4KPHBhdGggZD0iTTEyIDJMMTMuMDkgOC4yNkwyMCA5TDEzLjA5IDE1Ljc0TDEyIDIyTDEwLjkxIDE1Ljc0TDQgOUwxMC45MSA4LjI2TDEyIDJaIiBzdHJva2U9ImN1cnJlbnRDb2xvciIgc3Ryb2tlLXdpZHRoPSIyIiBzdHJva2UtbGluZWNhcD0icm91bmQiIHN0cm9rZS1saW5lam9pbj0icm91bmQiLz4KPHN2Zz4K +requirements: requests,beautifulsoup4 +""" + +from pydantic import BaseModel + +class Action: + def __init__(self): + self.valves = self.Valves() + + class Valves(BaseModel): + api_key: str = "" + processing_mode: str = "standard" + + async def action( + self, + body: dict, + __user__=None, + __event_emitter__=None, + __event_call__=None, + ): + # Send initial status + await __event_emitter__({ + "type": "status", + "data": {"description": "Processing message..."} + }) + + # Get user confirmation + response = await __event_call__({ + "type": "confirmation", + "data": { + "title": "Process Message", + "message": "Do you want to enhance this message?" + } + }) + + if not response: + return {"content": "Action cancelled by user"} + + # Process the message + original_content = body.get("content", "") + enhanced_content = f"Enhanced: {original_content}" + + return {"content": enhanced_content} +``` + +
+ +## Best Practices + +### Error Handling +Always implement proper error handling in your actions: + +```python +async def action(self, body: dict, __event_emitter__=None): + try: + # Action logic here + result = perform_operation() + return {"content": f"Success: {result}"} + except Exception as e: + await __event_emitter__({ + "type": "notification", + "data": {"type": "error", "content": f"Action failed: {str(e)}"} + }) + return {"content": "Action encountered an error"} +``` + +### Performance Considerations +- Use async/await for I/O operations +- Implement timeouts for external API calls +- Provide progress updates for long-running operations +- Consider using background tasks for heavy processing + +### User Experience +- Always provide clear feedback through event emitters +- Use confirmation dialogs for destructive actions +- Include helpful error messages + +## Integration with Open WebUI Features + +Actions integrate seamlessly with other Open WebUI features: +- **Models** - Actions can be model-specific or global +- **Tools** - Actions can invoke external tools and APIs +- **Files** - Actions can process uploaded files and generate new ones +- **Memory** - Actions can access conversation history and context +- **Permissions** - Actions respect user roles and access controls + +For more examples and community-contributed actions, visit [https://openwebui.com/functions](https://openwebui.com/functions) where you can discover, download, and explore custom functions built by the Open WebUI community. diff --git a/docs/features/plugin/functions/filter.mdx b/docs/features/plugin/functions/filter.mdx new file mode 100644 index 0000000..95e68e5 --- /dev/null +++ b/docs/features/plugin/functions/filter.mdx @@ -0,0 +1,423 @@ +--- +sidebar_position: 2 +title: "Filter Function" +--- + +# 🪄 Filter Function: Modify Inputs and Outputs + +Welcome to the comprehensive guide on Filter Functions in Open WebUI! Filters are a flexible and powerful **plugin system** for modifying data *before it's sent to the Large Language Model (LLM)* (input) or *after it’s returned from the LLM* (output). Whether you’re transforming inputs for better context or cleaning up outputs for improved readability, **Filter Functions** let you do it all. + +This guide will break down **what Filters are**, how they work, their structure, and everything you need to know to build powerful and user-friendly filters of your own. Let’s dig in, and don’t worry—I’ll use metaphors, examples, and tips to make everything crystal clear! 🌟 + +--- + +## 🌊 What Are Filters in Open WebUI? + +Imagine Open WebUI as a **stream of water** flowing through pipes: + +- **User inputs** and **LLM outputs** are the water. +- **Filters** are the **water treatment stages** that clean, modify, and adapt the water before it reaches the final destination. + +Filters sit in the middle of the flow—like checkpoints—where you decide what needs to be adjusted. + +Here’s a quick summary of what Filters do: + +1. **Modify User Inputs (Inlet Function)**: Tweak the input data before it reaches the AI model. This is where you enhance clarity, add context, sanitize text, or reformat messages to match specific requirements. +2. **Intercept Model Outputs (Stream Function)**: Capture and adjust the AI’s responses **as they’re generated** by the model. This is useful for real-time modifications, like filtering out sensitive information or formatting the output for better readability. +3. **Modify Model Outputs (Outlet Function)**: Adjust the AI's response **after it’s processed**, before showing it to the user. This can help refine, log, or adapt the data for a cleaner user experience. + +> **Key Concept:** Filters are not standalone models but tools that enhance or transform the data traveling *to* and *from* models. + +Filters are like **translators or editors** in the AI workflow: you can intercept and change the conversation without interrupting the flow. + +--- + +## 🗺️ Structure of a Filter Function: The Skeleton + +Let's start with the simplest representation of a Filter Function. Don't worry if some parts feel technical at first—we’ll break it all down step by step! + +### 🦴 Basic Skeleton of a Filter + +```python +from pydantic import BaseModel +from typing import Optional + +class Filter: + # Valves: Configuration options for the filter + class Valves(BaseModel): + pass + + def __init__(self): + # Initialize valves (optional configuration for the Filter) + self.valves = self.Valves() + + def inlet(self, body: dict) -> dict: + # This is where you manipulate user inputs. + print(f"inlet called: {body}") + return body + + def stream(self, event: dict) -> dict: + # This is where you modify streamed chunks of model output. + print(f"stream event: {event}") + return event + + def outlet(self, body: dict) -> None: + # This is where you manipulate model outputs. + print(f"outlet called: {body}") +``` + +--- + +### 🆕 🧲 Toggle Filter Example: Adding Interactivity and Icons (New in Open WebUI 0.6.10) + +Filters can do more than simply modify text—they can expose UI toggles and display custom icons. For instance, you might want a filter that can be turned on/off with a user interface button, and displays a special icon in Open WebUI’s message input UI. + +Here’s how you could create such a toggle filter: + +```python +from pydantic import BaseModel, Field +from typing import Optional + +class Filter: + class Valves(BaseModel): + pass + + def __init__(self): + self.valves = self.Valves() + self.toggle = True # IMPORTANT: This creates a switch UI in Open WebUI + # TIP: Use SVG Data URI! + self.icon = """data:image/svg+xml;base64,PHN2ZyB4bWxucz0iaHR0cDovL3d3dy53My5vcmcvMjAwMC9zdmciIGZpbGw9Im5vbmUiIHZpZXdCb3g9IjAgMCAyNCAyNCIgc3Ryb2tlLXdpZHRoPSIxLjUiIHN0cm9rZT0iY3VycmVudENvbG9yIiBjbGFzcz0ic2l6ZS02Ij4KICA8cGF0aCBzdHJva2UtbGluZWNhcD0icm91bmQiIHN0cm9rZS1saW5lam9pbj0icm91bmQiIGQ9Ik0xMiAxOHYtNS4yNW0wIDBhNi4wMSA2LjAxIDAgMCAwIDEuNS0uMTg5bS0xLjUuMTg5YTYuMDEgNi4wMSAwIDAgMS0xLjUtLjE4OW0zLjc1IDcuNDc4YTEyLjA2IDEyLjA2IDAgMCAxLTQuNSAwbTMuNzUgMi4zODNhMTQuNDA2IDE0LjQwNiAwIDAgMS0zIDBNMTQuMjUgMTh2LS4xOTJjMC0uOTgzLjY1OC0xLjgyMyAxLjUwOC0yLjMxNmE3LjUgNy41IDAgMSAwLTcuNTE3IDBjLjg1LjQ5MyAxLjUwOSAxLjMzMyAxLjUwOSAyLjMxNlYxOCIgLz4KPC9zdmc+Cg==""" + pass + + async def inlet( + self, body: dict, __event_emitter__, __user__: Optional[dict] = None + ) -> dict: + await __event_emitter__( + { + "type": "status", + "data": { + "description": "Toggled!", + "done": True, + "hidden": False, + }, + } + ) + return body +``` + +#### 🖼️ What’s happening? +- **toggle = True** creates a switch UI in Open WebUI—users can manually enable or disable the filter in real time. +- **icon** (with a Data URI) will show up as a little image next to the filter’s name. You can use any SVG as long as it’s Data URI encoded! +- **The `inlet` function** uses the `__event_emitter__` special argument to broadcast feedback/status to the UI, such as a little toast/notification that reads "Toggled!" + +![Toggle Filter](/images/features/plugin/functions/toggle-filter.png) + +You can use these mechanisms to make your filters dynamic, interactive, and visually unique within Open WebUI’s plugin ecosystem. + +--- + +### 🎯 Key Components Explained + +#### 1️⃣ **`Valves` Class (Optional Settings)** + +Think of **Valves** as the knobs and sliders for your filter. If you want to give users configurable options to adjust your Filter’s behavior, you define those here. + +```python +class Valves(BaseModel): + OPTION_NAME: str = "Default Value" +``` + +For example: +If you're creating a filter that converts responses into uppercase, you might allow users to configure whether every output gets totally capitalized via a valve like `TRANSFORM_UPPERCASE: bool = True/False`. + +##### Configuring Valves with Dropdown Menus (Enums) + +You can enhance the user experience for your filter's settings by providing dropdown menus instead of free-form text inputs for certain `Valves`. This is achieved using `json_schema_extra` with the `enum` keyword in your Pydantic `Field` definitions. + +The `enum` keyword allows you to specify a list of predefined values that the UI should present as options in a dropdown. + +**Example:** Creating a dropdown for color themes in a filter. + +```python +from pydantic import BaseModel, Field +from typing import Optional + +# Define your available options (e.g., color themes) +COLOR_THEMES = { + "Plain (No Color)": [], + "Monochromatic Blue": ["blue", "RoyalBlue", "SteelBlue", "LightSteelBlue"], + "Warm & Energetic": ["orange", "red", "magenta", "DarkOrange"], + "Cool & Calm": ["cyan", "blue", "green", "Teal", "CadetBlue"], + "Forest & Earth": ["green", "DarkGreen", "LimeGreen", "OliveGreen"], + "Mystical Purple": ["purple", "DarkOrchid", "MediumPurple", "Lavender"], + "Grayscale": ["gray", "DarkGray", "LightGray"], + "Rainbow Fun": [ + "red", + "orange", + "yellow", + "green", + "blue", + "indigo", + "violet", + ], + "Ocean Breeze": ["blue", "cyan", "LightCyan", "DarkTurquoise"], + "Sunset Glow": ["DarkRed", "DarkOrange", "Orange", "gold"], + "Custom Sequence (See Code)": [], +} + +class Filter: + class Valves(BaseModel): + selected_theme: str = Field( + "Monochromatic Blue", + description="Choose a predefined color theme for LLM responses. 'Plain (No Color)' disables coloring.", + json_schema_extra={"enum": list(COLOR_THEMES.keys())}, # KEY: This creates the dropdown + ) + custom_colors_csv: str = Field( + "", + description="CSV of colors for 'Custom Sequence' theme (e.g., 'red,blue,green'). Uses xcolor names.", + ) + strip_existing_latex: bool = Field( + True, + description="If true, attempts to remove existing LaTeX color commands. Recommended to avoid nested rendering issues.", + ) + colorize_type: str = Field( + "sequential_word", + description="How to apply colors: 'sequential_word' (word by word), 'sequential_line' (line by line), 'per_letter' (letter by letter), 'full_message' (entire message).", + json_schema_extra={ + "enum": [ + "sequential_word", + "sequential_line", + "per_letter", + "full_message", + ] + }, # Another example of an enum dropdown + ) + color_cycle_reset_per_message: bool = Field( + True, + description="If true, the color sequence restarts for each new LLM response message. If false, it continues across messages.", + ) + debug_logging: bool = Field( + False, + description="Enable verbose logging to the console for debugging filter operations.", + ) + + def __init__(self): + self.valves = self.Valves() + # ... rest of your __init__ logic ... +``` + +**What's happening?** + +* **`json_schema_extra`**: This argument in `Field` allows you to inject arbitrary JSON Schema properties that Pydantic doesn't explicitly support but can be used by downstream tools (like Open WebUI's UI renderer). +* **`"enum": list(COLOR_THEMES.keys())`**: This tells Open WebUI that the `selected_theme` field should present a selection of values, specifically the keys from our `COLOR_THEMES` dictionary. The UI will then render a dropdown menu with "Plain (No Color)", "Monochromatic Blue", "Warm & Energetic", etc., as selectable options. +* The `colorize_type` field also demonstrates another `enum` dropdown for different coloring methods. + +Using `enum` for your `Valves` options makes your filters more user-friendly and prevents invalid inputs, leading to a smoother configuration experience. + +--- + +#### 2️⃣ **`inlet` Function (Input Pre-Processing)** + +The `inlet` function is like **prepping food before cooking**. Imagine you’re a chef: before the ingredients go into the recipe (the LLM in this case), you might wash vegetables, chop onions, or season the meat. Without this step, your final dish could lack flavor, have unwashed produce, or simply be inconsistent. + +In the world of Open WebUI, the `inlet` function does this important prep work on the **user input** before it’s sent to the model. It ensures the input is as clean, contextual, and helpful as possible for the AI to handle. + +📥 **Input**: +- **`body`**: The raw input from Open WebUI to the model. It is in the format of a chat-completion request (usually a dictionary that includes fields like the conversation's messages, model settings, and other metadata). Think of this as your recipe ingredients. + +🚀 **Your Task**: +Modify and return the `body`. The modified version of the `body` is what the LLM works with, so this is your chance to bring clarity, structure, and context to the input. + +##### 🍳 Why Would You Use the `inlet`? +1. **Adding Context**: Automatically append crucial information to the user’s input, especially if their text is vague or incomplete. For example, you might add "You are a friendly assistant" or "Help this user troubleshoot a software bug." + +2. **Formatting Data**: If the input requires a specific format, like JSON or Markdown, you can transform it before sending it to the model. + +3. **Sanitizing Input**: Remove unwanted characters, strip potentially harmful or confusing symbols (like excessive whitespace or emojis), or replace sensitive information. + +4. **Streamlining User Input**: If your model’s output improves with additional guidance, you can use the `inlet` to inject clarifying instructions automatically! + +##### 💡 Example Use Cases: Build on Food Prep + +###### 🥗 Example 1: Adding System Context +Let’s say the LLM is a chef preparing a dish for Italian cuisine, but the user hasn’t mentioned "This is for Italian cooking." You can ensure the message is clear by appending this context before sending the data to the model. + +```python +def inlet(self, body: dict, __user__: Optional[dict] = None) -> dict: + # Add system message for Italian context in the conversation + context_message = { + "role": "system", + "content": "You are helping the user prepare an Italian meal." + } + # Insert the context at the beginning of the chat history + body.setdefault("messages", []).insert(0, context_message) + return body +``` + +📖 **What Happens?** +- Any user input like "What are some good dinner ideas?" now carries the Italian theme because we’ve set the system context! Cheesecake might not show up as an answer, but pasta sure will. + +###### 🔪 Example 2: Cleaning Input (Remove Odd Characters) +Suppose the input from the user looks messy or includes unwanted symbols like `!!!`, making the conversation inefficient or harder for the model to parse. You can clean it up while preserving the core content. + +```python +def inlet(self, body: dict, __user__: Optional[dict] = None) -> dict: + # Clean the last user input (from the end of the 'messages' list) + last_message = body["messages"][-1]["content"] + body["messages"][-1]["content"] = last_message.replace("!!!", "").strip() + return body +``` + +📖 **What Happens?** +- Before: `"How can I debug this issue!!!"` ➡️ Sent to the model as `"How can I debug this issue"` + +:::note + +Note: The user feels the same, but the model processes a cleaner and easier-to-understand query. + +::: + +##### 📊 How `inlet` Helps Optimize Input for the LLM: +- Improves **accuracy** by clarifying ambiguous queries. +- Makes the AI **more efficient** by removing unnecessary noise like emojis, HTML tags, or extra punctuation. +- Ensures **consistency** by formatting user input to match the model’s expected patterns or schemas (like, say, JSON for a specific use case). + +💭 **Think of `inlet` as the sous-chef in your kitchen**—ensuring everything that goes into the model (your AI "recipe") has been prepped, cleaned, and seasoned to perfection. The better the input, the better the output! + +--- + +#### 🆕 3️⃣ **`stream` Hook (New in Open WebUI 0.5.17)** + +##### 🔄 What is the `stream` Hook? +The **`stream` function** is a new feature introduced in Open WebUI **0.5.17** that allows you to **intercept and modify streamed model responses** in real time. + +Unlike `outlet`, which processes an entire completed response, `stream` operates on **individual chunks** as they are received from the model. + +##### 🛠️ When to Use the Stream Hook? +- Modify **streaming responses** before they are displayed to users. +- Implement **real-time censorship or cleanup**. +- **Monitor streamed data** for logging/debugging. + +##### 📜 Example: Logging Streaming Chunks + +Here’s how you can inspect and modify streamed LLM responses: +```python +def stream(self, event: dict) -> dict: + print(event) # Print each incoming chunk for inspection + return event +``` + +> **Example Streamed Events:** +```jsonl +{"id": "chatcmpl-B4l99MMaP3QLGU5uV7BaBM0eDS0jb","choices": [{"delta": {"content": "Hi"}}]} +{"id": "chatcmpl-B4l99MMaP3QLGU5uV7BaBM0eDS0jb","choices": [{"delta": {"content": "!"}}]} +{"id": "chatcmpl-B4l99MMaP3QLGU5uV7BaBM0eDS0jb","choices": [{"delta": {"content": " 😊"}}]} +``` +📖 **What Happens?** +- Each line represents a **small fragment** of the model's streamed response. +- The **`delta.content` field** contains the progressively generated text. + +##### 🔄 Example: Filtering Out Emojis from Streamed Data +```python +def stream(self, event: dict) -> dict: + for choice in event.get("choices", []): + delta = choice.get("delta", {}) + if "content" in delta: + delta["content"] = delta["content"].replace("😊", "") # Strip emojis + return event +``` +📖 **Before:** `"Hi 😊"` +📖 **After:** `"Hi"` + +--- + +#### 4️⃣ **`outlet` Function (Output Post-Processing)** + +The `outlet` function is like a **proofreader**: tidy up the AI's response (or make final changes) *after it’s processed by the LLM.* + +📤 **Input**: +- **`body`**: This contains **all current messages** in the chat (user history + LLM replies). + +🚀 **Your Task**: Modify this `body`. You can clean, append, or log changes, but be mindful of how each adjustment impacts the user experience. + +💡 **Best Practices**: +- Prefer logging over direct edits in the outlet (e.g., for debugging or analytics). +- If heavy modifications are needed (like formatting outputs), consider using the **pipe function** instead. + +💡 **Example Use Case**: Strip out sensitive API responses you don't want the user to see: +```python +def outlet(self, body: dict, __user__: Optional[dict] = None) -> dict: + for message in body["messages"]: + message["content"] = message["content"].replace("", "[REDACTED]") + return body +``` + +--- + +## 🌟 Filters in Action: Building Practical Examples + +Let’s build some real-world examples to see how you’d use Filters! + +### 📚 Example #1: Add Context to Every User Input + +Want the LLM to always know it's assisting a customer in troubleshooting software bugs? You can add instructions like **"You're a software troubleshooting assistant"** to every user query. + +```python +class Filter: + def inlet(self, body: dict, __user__: Optional[dict] = None) -> dict: + context_message = { + "role": "system", + "content": "You're a software troubleshooting assistant." + } + body.setdefault("messages", []).insert(0, context_message) + return body +``` + +--- + +### 📚 Example #2: Highlight Outputs for Easy Reading + +Returning output in Markdown or another formatted style? Use the `outlet` function! + +```python +class Filter: + def outlet(self, body: dict, __user__: Optional[dict] = None) -> dict: + # Add "highlight" markdown for every response + for message in body["messages"]: + if message["role"] == "assistant": # Target model response + message["content"] = f"**{message['content']}**" # Highlight with Markdown + return body +``` + +--- + +## 🚧 Potential Confusion: Clear FAQ 🛑 + +### **Q: How Are Filters Different From Pipe Functions?** + +Filters modify data **going to** and **coming from models** but do not significantly interact with logic outside of these phases. Pipes, on the other hand: +- Can integrate **external APIs** or significantly transform how the backend handles operations. +- Expose custom logic as entirely new "models." + +### **Q: Can I Do Heavy Post-Processing Inside `outlet`?** + +You can, but **it’s not the best practice.**: +- **Filters** are designed to make lightweight changes or apply logging. +- If heavy modifications are required, consider a **Pipe Function** instead. + +--- + +## 🎉 Recap: Why Build Filter Functions? + +By now, you’ve learned: +1. **Inlet** manipulates **user inputs** (pre-processing). +2. **Stream** intercepts and modifies **streamed model outputs** (real-time). +3. **Outlet** tweaks **AI outputs** (post-processing). +4. Filters are best for lightweight, real-time alterations to the data flow. +5. With **Valves**, you empower users to configure Filters dynamically for tailored behavior. + +--- + +🚀 **Your Turn**: Start experimenting! What small tweak or context addition could elevate your Open WebUI experience? Filters are fun to build, flexible to use, and can take your models to the next level! + +Happy coding! ✨ diff --git a/docs/features/plugin/functions/index.mdx b/docs/features/plugin/functions/index.mdx new file mode 100644 index 0000000..558bcfe --- /dev/null +++ b/docs/features/plugin/functions/index.mdx @@ -0,0 +1,133 @@ +--- +sidebar_position: 1 +title: "Functions" +--- + +## 🚀 What Are Functions? + +Functions are like **plugins** for Open WebUI. They help you **extend its capabilities**—whether it’s adding support for new AI model providers like Anthropic or Vertex AI, tweaking how messages are processed, or introducing custom buttons to the interface for better usability. + +Unlike external tools that may require complex integrations, **Functions are built-in and run within the Open WebUI environment.** That means they are fast, modular, and don’t rely on external dependencies. + +Think of Functions as **modular building blocks** that let you enhance how the WebUI works, tailored exactly to what you need. They’re lightweight, highly customizable, and written in **pure Python**, so you have the freedom to create anything—from new AI-powered workflows to integrations with anything you use, like Google Search or Home Assistant. + +--- + +## 🏗️ Types of Functions + +There are **three types of Functions** in Open WebUI, each with a specific purpose. Let’s break them down and explain exactly what they do: + +--- + +### 1. [**Pipe Function** – Create Custom "Agents/Models"](./pipe.mdx) + +A **Pipe Function** is how you create **custom agents/models** or integrations, which then appear in the interface as if they were standalone models. + +**What does it do?** +- Pipes let you define complex workflows. For instance, you could create a Pipe that sends data to **Model A** and **Model B**, processes their outputs, and combines the results into one finalized answer. +- Pipes don’t even have to use AI! They can be setups for **search APIs**, **weather data**, or even systems like **Home Assistant**. Basically, anything you’d like to interact with can become part of Open WebUI. + +**Use case example:** +Imagine you want to query Google Search directly from Open WebUI. You can create a Pipe Function that: +1. Takes your message as the search query. +2. Sends the query to Google Search’s API. +3. Processes the response and returns it to you inside the WebUI like a normal "model" response. + +When enabled, **Pipe Functions show up as their own selectable model**. Use Pipes whenever you need custom functionality that works like a model in the interface. + +For a detailed guide, see [**Pipe Functions**](./pipe.mdx). + +--- + +### 2. [**Filter Function** – Modify Inputs and Outputs](./filter.mdx) + +A **Filter Function** is like a tool for tweaking data before it gets sent to the AI **or** after it comes back. + +**What does it do?** +Filters act as "hooks" in the workflow and have two main parts: +- **Inlet**: Adjust the input that is sent to the model. For example, adding additional instructions, keywords, or formatting tweaks. +- **Outlet**: Modify the output that you receive from the model. For instance, cleaning up the response, adjusting tone, or formatting data into a specific style. + +**Use case example:** +Suppose you’re working on a project that needs precise formatting. You can use a Filter to ensure: +1. Your input is always transformed into the required format. +2. The output from the model is cleaned up before being displayed. + +Filters are **linked to specific models** or can be enabled for all models **globally**, depending on your needs. + +Check out the full guide for more examples and instructions: [**Filter Functions**](./filter.mdx). + +--- + +### 3. [**Action Function** – Add Custom Buttons](./action.mdx) + +An **Action Function** is used to add **custom buttons** to the chat interface. + +**What does it do?** +Actions allow you to define **interactive shortcuts** that trigger specific functionality directly from the chat. These buttons appear underneath individual chat messages, giving you convenient, one-click access to the actions you define. + +**Use case example:** +Let’s say you often need to summarize long messages or generate specific outputs like translations. You can create an Action Function to: +1. Add a “Summarize” button under every incoming message. +2. When clicked, it triggers your custom function to process that message and return the summary. + +Buttons provide a **clean and user-friendly way** to interact with extended functionality you define. + +Learn how to set them up in the [**Action Functions Guide**](./action.mdx). + +--- + +## 🛠️ How to Use Functions + +Here's how to put Functions to work in Open WebUI: + +### 1. **Install Functions** +You can install Functions via the Open WebUI interface or by importing them manually. You can find community-created functions on the [Open WebUI Community Site](https://openwebui.com/functions). + +⚠️ **Be cautious.** Only install Functions from trusted sources. Running unknown code poses security risks. + +--- + +### 2. **Enable Functions** +Functions must be explicitly enabled after installation: +- When you enable a **Pipe Function**, it becomes available as its own **model** in the interface. +- For **Filter** and **Action Functions**, enabling them isn’t enough—you also need to assign them to specific models or enable them globally for all models. + +--- + +### 3. **Assign Filters or Actions to Models** +- Navigate to `Workspace => Models` and assign your Filter or Action to the relevant model there. +- Alternatively, enable Functions for **all models globally** by going to `Workspace => Functions`, selecting the "..." menu, and toggling the **Global** switch. + +--- + +### Quick Summary +- **Pipes** appear as standalone models you can interact with. +- **Filters** modify inputs/outputs for smoother AI interactions. +- **Actions** add clickable buttons to individual chat messages. + +Once you’ve followed the setup process, Functions will seamlessly enhance your workflows. + +--- + +## ✅ Why Use Functions? + +Functions are designed for anyone who wants to **unlock new possibilities** with Open WebUI: + +- **Extend**: Add new models or integrate with non-AI tools like APIs, databases, or smart devices. +- **Optimize**: Tweak inputs and outputs to fit your use case perfectly. +- **Simplify**: Add buttons or shortcuts to make the interface intuitive and efficient. + +Whether you’re customizing workflows for specific projects, integrating external data, or just making Open WebUI easier to use, Functions are the key to taking control of your instance. + +--- + +### 📝 Final Notes: +1. Always install Functions from **trusted sources only**. +2. Make sure you understand the difference between Pipe, Filter, and Action Functions to use them effectively. +3. Explore the official guides: + - [Pipe Functions Guide](./pipe.mdx) + - [Filter Functions Guide](./filter.mdx) + - [Action Functions Guide](./action.mdx) + +By leveraging Functions, you’ll bring entirely new capabilities to your Open WebUI setup. Start experimenting today! 🚀 \ No newline at end of file diff --git a/docs/features/plugin/functions/pipe.mdx b/docs/features/plugin/functions/pipe.mdx new file mode 100644 index 0000000..1919745 --- /dev/null +++ b/docs/features/plugin/functions/pipe.mdx @@ -0,0 +1,400 @@ +--- +sidebar_position: 1 +title: "Pipe Function" +--- + +# 🚰 Pipe Function: Create Custom "Agents/Models" +Welcome to this guide on creating **Pipes** in Open WebUI! Think of Pipes as a way to **adding** a new model to Open WebUI. In this document, we'll break down what a Pipe is, how it works, and how you can create your own to add custom logic and processing to your Open WebUI models. We'll use clear metaphors and go through every detail to ensure you have a comprehensive understanding. + +## Introduction to Pipes + +Imagine Open WebUI as a **plumbing system** where data flows through pipes and valves. In this analogy: + +- **Pipes** are like **plugins** that let you introduce new pathways for data to flow, allowing you to inject custom logic and processing. +- **Valves** are the **configurable parts** of your pipe that control how data flows through it. + +By creating a Pipe, you're essentially crafting a custom model with the specific behavior you want, all within the Open WebUI framework. + +--- + +## Understanding the Pipe Structure + +Let's start with a basic, barebones version of a Pipe to understand its structure: + +```python +from pydantic import BaseModel, Field + +class Pipe: + class Valves(BaseModel): + MODEL_ID: str = Field(default="") + + def __init__(self): + self.valves = self.Valves() + + def pipe(self, body: dict): + # Logic goes here + print(self.valves, body) # This will print the configuration options and the input body + return "Hello, World!" +``` + +### The Pipe Class + +- **Definition**: The `Pipe` class is where you define your custom logic. +- **Purpose**: Acts as the blueprint for your plugin, determining how it behaves within Open WebUI. + +### Valves: Configuring Your Pipe + +- **Definition**: `Valves` is a nested class within `Pipe`, inheriting from `BaseModel`. +- **Purpose**: It contains the configuration options (parameters) that persist across the use of your Pipe. +- **Example**: In the above code, `MODEL_ID` is a configuration option with a default empty string. + +**Metaphor**: Think of Valves as the knobs on a real-world pipe system that control the flow of water. In your Pipe, Valves allow users to adjust settings that influence how the data flows and is processed. + +### The `__init__` Method + +- **Definition**: The constructor method for the `Pipe` class. +- **Purpose**: Initializes the Pipe's state and sets up any necessary components. +- **Best Practice**: Keep it simple; primarily initialize `self.valves` here. + +```python +def __init__(self): + self.valves = self.Valves() +``` + +### The `pipe` Function + +- **Definition**: The core function where your custom logic resides. +- **Parameters**: + - `body`: A dictionary containing the input data. +- **Purpose**: Processes the input data using your custom logic and returns the result. + +```python +def pipe(self, body: dict): + # Logic goes here + print(self.valves, body) # This will print the configuration options and the input body + return "Hello, World!" +``` + +**Note**: Always place `Valves` at the top of your `Pipe` class, followed by `__init__`, and then the `pipe` function. This structure ensures clarity and consistency. + +--- + +## Creating Multiple Models with Pipes + +What if you want your Pipe to create **multiple models** within Open WebUI? You can achieve this by defining a `pipes` function or variable inside your `Pipe` class. This setup, informally called a **manifold**, allows your Pipe to represent multiple models. + +Here's how you can do it: + +```python +from pydantic import BaseModel, Field + +class Pipe: + class Valves(BaseModel): + MODEL_ID: str = Field(default="") + + def __init__(self): + self.valves = self.Valves() + + def pipes(self): + return [ + {"id": "model_id_1", "name": "model_1"}, + {"id": "model_id_2", "name": "model_2"}, + {"id": "model_id_3", "name": "model_3"}, + ] + + def pipe(self, body: dict): + # Logic goes here + print(self.valves, body) # Prints the configuration options and the input body + model = body.get("model", "") + return f"{model}: Hello, World!" +``` + +### Explanation + +- **`pipes` Function**: + - Returns a list of dictionaries. + - Each dictionary represents a model with unique `id` and `name` keys. + - These models will show up individually in the Open WebUI model selector. + +- **Updated `pipe` Function**: + - Processes input based on the selected model. + - In this example, it includes the model name in the returned string. + +--- + +## Example: OpenAI Proxy Pipe + +Let's dive into a practical example where we'll create a Pipe that proxies requests to the OpenAI API. This Pipe will fetch available models from OpenAI and allow users to interact with them through Open WebUI. + +```python +from pydantic import BaseModel, Field +import requests + +class Pipe: + class Valves(BaseModel): + NAME_PREFIX: str = Field( + default="OPENAI/", + description="Prefix to be added before model names.", + ) + OPENAI_API_BASE_URL: str = Field( + default="https://api.openai.com/v1", + description="Base URL for accessing OpenAI API endpoints.", + ) + OPENAI_API_KEY: str = Field( + default="", + description="API key for authenticating requests to the OpenAI API.", + ) + + def __init__(self): + self.valves = self.Valves() + + def pipes(self): + if self.valves.OPENAI_API_KEY: + try: + headers = { + "Authorization": f"Bearer {self.valves.OPENAI_API_KEY}", + "Content-Type": "application/json", + } + + r = requests.get( + f"{self.valves.OPENAI_API_BASE_URL}/models", headers=headers + ) + models = r.json() + return [ + { + "id": model["id"], + "name": f'{self.valves.NAME_PREFIX}{model.get("name", model["id"])}', + } + for model in models["data"] + if "gpt" in model["id"] + ] + + except Exception as e: + return [ + { + "id": "error", + "name": "Error fetching models. Please check your API Key.", + }, + ] + else: + return [ + { + "id": "error", + "name": "API Key not provided.", + }, + ] + + def pipe(self, body: dict, __user__: dict): + print(f"pipe:{__name__}") + headers = { + "Authorization": f"Bearer {self.valves.OPENAI_API_KEY}", + "Content-Type": "application/json", + } + + # Extract model id from the model name + model_id = body["model"][body["model"].find(".") + 1 :] + + # Update the model id in the body + payload = {**body, "model": model_id} + try: + r = requests.post( + url=f"{self.valves.OPENAI_API_BASE_URL}/chat/completions", + json=payload, + headers=headers, + stream=True, + ) + + r.raise_for_status() + + if body.get("stream", False): + return r.iter_lines() + else: + return r.json() + except Exception as e: + return f"Error: {e}" +``` + +### Detailed Breakdown + +#### Valves Configuration + +- **`NAME_PREFIX`**: + - Adds a prefix to the model names displayed in Open WebUI. + - Default: `"OPENAI/"`. +- **`OPENAI_API_BASE_URL`**: + - Specifies the base URL for the OpenAI API. + - Default: `"https://api.openai.com/v1"`. +- **`OPENAI_API_KEY`**: + - Your OpenAI API key for authentication. + - Default: `""` (empty string; must be provided). + +#### The `pipes` Function + +- **Purpose**: Fetches available OpenAI models and makes them accessible in Open WebUI. + +- **Process**: + 1. **Check for API Key**: Ensures that an API key is provided. + 2. **Fetch Models**: Makes a GET request to the OpenAI API to retrieve available models. + 3. **Filter Models**: Returns models that have `"gpt"` in their `id`. + 4. **Error Handling**: If there's an issue, returns an error message. + +- **Return Format**: A list of dictionaries with `id` and `name` for each model. + +#### The `pipe` Function + +- **Purpose**: Handles the request to the selected OpenAI model and returns the response. + +- **Parameters**: + - `body`: Contains the request data. + - `__user__`: Contains user information (not used in this example but can be useful for authentication or logging). + +- **Process**: + 1. **Prepare Headers**: Sets up the headers with the API key and content type. + 2. **Extract Model ID**: Extracts the actual model ID from the selected model name. + 3. **Prepare Payload**: Updates the body with the correct model ID. + 4. **Make API Request**: Sends a POST request to the OpenAI API's chat completions endpoint. + 5. **Handle Streaming**: If `stream` is `True`, returns an iterable of lines. + 6. **Error Handling**: Catches exceptions and returns an error message. + +### Extending the Proxy Pipe + +You can modify this proxy Pipe to support additional service providers like Anthropic, Perplexity, and more by adjusting the API endpoints, headers, and logic within the `pipes` and `pipe` functions. + +--- + +## Using Internal Open WebUI Functions + +Sometimes, you may want to leverage the internal functions of Open WebUI within your Pipe. You can import these functions directly from the `open_webui` package. Keep in mind that while unlikely, internal functions may change for optimization purposes, so always refer to the latest documentation. + +Here's how you can use internal Open WebUI functions: + +```python +from pydantic import BaseModel, Field +from fastapi import Request + +from open_webui.models.users import Users +from open_webui.utils.chat import generate_chat_completion + +class Pipe: + def __init__(self): + pass + + async def pipe( + self, + body: dict, + __user__: dict, + __request__: Request, + ) -> str: + # Use the unified endpoint with the updated signature + user = Users.get_user_by_id(__user__["id"]) + body["model"] = "llama3.2:latest" + return await generate_chat_completion(__request__, body, user) +``` + +### Explanation + +- **Imports**: + - `Users` from `open_webui.models.users`: To fetch user information. + - `generate_chat_completion` from `open_webui.utils.chat`: To generate chat completions using internal logic. + +- **Asynchronous `pipe` Function**: + - **Parameters**: + - `body`: Input data for the model. + - `__user__`: Dictionary containing user information. + - `__request__`: The request object from FastAPI (required by `generate_chat_completion`). + - **Process**: + 1. **Fetch User Object**: Retrieves the user object using their ID. + 2. **Set Model**: Specifies the model to be used. + 3. **Generate Completion**: Calls `generate_chat_completion` to process the input and produce an output. + +### Important Notes + +- **Function Signatures**: Refer to the latest Open WebUI codebase or documentation for the most accurate function signatures and parameters. +- **Best Practices**: Always handle exceptions and errors gracefully to ensure a smooth user experience. + +--- + +## Frequently Asked Questions + +### Q1: Why should I use Pipes in Open WebUI? + +**A**: Pipes allow you to add new "model" with custom logic and processing to Open WebUI. It's a flexible plugin system that lets you integrate external APIs, customize model behaviors, and create innovative features without altering the core codebase. + +--- + +### Q2: What are Valves, and why are they important? + +**A**: Valves are the configurable parameters of your Pipe. They function like settings or controls that determine how your Pipe operates. By adjusting Valves, you can change the behavior of your Pipe without modifying the underlying code. + +--- + +### Q3: Can I create a Pipe without Valves? + +**A**: Yes, you can create a simple Pipe without defining a Valves class if your Pipe doesn't require any persistent configuration options. However, including Valves is a good practice for flexibility and future scalability. + +--- + +### Q4: How do I ensure my Pipe is secure when using API keys? + +**A**: Never hard-code sensitive information like API keys into your Pipe. Instead, use Valves to input and store API keys securely. Ensure that your code handles these keys appropriately and avoids logging or exposing them. + +--- + +### Q5: What is the difference between the `pipe` and `pipes` functions? + +**A**: + +- **`pipe` Function**: The primary function where you process the input data and generate an output. It handles the logic for a single model. + +- **`pipes` Function**: Allows your Pipe to represent multiple models by returning a list of model definitions. Each model will appear individually in Open WebUI. + +--- + +### Q6: How can I handle errors in my Pipe? + +**A**: Use try-except blocks within your `pipe` and `pipes` functions to catch exceptions. Return meaningful error messages or handle the errors gracefully to ensure the user is informed about what went wrong. + +--- + +### Q7: Can I use external libraries in my Pipe? + +**A**: Yes, you can import and use external libraries as needed. Ensure that any dependencies are properly installed and managed within your environment. + +--- + +### Q8: How do I test my Pipe? + +**A**: Test your Pipe by running Open WebUI in a development environment and selecting your custom model from the interface. Validate that your Pipe behaves as expected with various inputs and configurations. + +--- + +### Q9: Are there any best practices for organizing my Pipe's code? + +**A**: Yes, follow these guidelines: + +- Keep `Valves` at the top of your `Pipe` class. +- Initialize variables in the `__init__` method, primarily `self.valves`. +- Place the `pipe` function after the `__init__` method. +- Use clear and descriptive variable names. +- Comment your code for clarity. + +--- + +### Q10: Where can I find the latest Open WebUI documentation? + +**A**: Visit the official Open WebUI repository or documentation site for the most up-to-date information, including function signatures, examples, and migration guides if any changes occur. + +--- + +## Conclusion + +By now, you should have a thorough understanding of how to create and use Pipes in Open WebUI. Pipes offer a powerful way to extend and customize the capabilities of Open WebUI to suit your specific needs. Whether you're integrating external APIs, adding new models, or injecting complex logic, Pipes provide the flexibility to make it happen. + +Remember to: + +- **Use clear and consistent structure** in your Pipe classes. +- **Leverage Valves** for configurable options. +- **Handle errors gracefully** to improve the user experience. +- **Consult the latest documentation** for any updates or changes. + +Happy coding, and enjoy extending your Open WebUI with Pipes! diff --git a/docs/features/plugin/index.mdx b/docs/features/plugin/index.mdx new file mode 100644 index 0000000..f8e6dd4 --- /dev/null +++ b/docs/features/plugin/index.mdx @@ -0,0 +1,91 @@ +--- +sidebar_position: 300 +title: "Tools & Functions (Plugins)" +--- + +# 🛠️ Tools & Functions + +Imagine you've just stumbled upon Open WebUI, or maybe you're already using it, but you're a bit lost with all the talk about "Tools", "Functions", and "Pipelines". Everything sounds like some mysterious tech jargon, right? No worries! Let's break it down piece by piece, super clearly, step by step. By the end of this, you'll have a solid understanding of what these terms mean, how they work, and why know it's not as complicated as it seems. + +## TL;DR + +- **Tools** extend the abilities of LLMs, allowing them to collect real-world, real-time data like weather, stock prices, etc. +- **Functions** extend the capabilities of the Open WebUI itself, enabling you to add new AI model support (like Anthropic or Vertex AI) or improve usability (like creating custom buttons or filters). +- **Pipelines** are more for advanced users who want to transform Open WebUI features into API-compatible workflows—mainly for offloading heavy processing. + +Getting started with Tools and Functions is easy because everything’s already built into the core system! You just **click a button** and **import these features directly from the community**, so there’s no coding or deep technical work required. + +## What are "Tools" and "Functions"? + +Let's start by thinking of **Open WebUI** as a "base" software that can do many tasks related to using Large Language Models (LLMs). But sometimes, you need extra features or abilities that don't come *out of the box*—this is where **tools** and **functions** come into play. + +### Tools + +**Tools** are an exciting feature because they allow LLMs to do more than just process text. They provide **external abilities** that LLMs wouldn't otherwise have on their own. + +#### Example of a Tool: + +Imagine you're chatting with an LLM and you want it to give you the latest weather update or stock prices in real time. Normally, the LLM can't do that because it's just working on pre-trained knowledge. This is where **tools** come in! + +- **Tools are like plugins** that the LLM can use to gather **real-world, real-time data**. So, with a "weather tool" enabled, the model can go out on the internet, gather live weather data, and display it in your conversation. + +Tools are essentially **abilities** you’re giving your AI to help it interact with the outside world. By adding these, the LLM can "grab" useful information or perform specialized tasks based on the context of the conversation. + +#### Examples of Tools (extending LLM’s abilities): + +1. **Real-time weather predictions** 🛰️. +2. **Stock price retrievers** 📈. +3. **Flight tracking information** ✈️. + +### Functions + +While **tools** are used by the AI during a conversation, **functions** help extend or customize the capabilities of Open WebUI itself. Imagine tools are like adding new ingredients to a dish, and functions are the process you use to control the kitchen! 🚪 + +#### Let's break that down: + +- **Functions** give you the ability to tweak or add **features** inside **Open WebUI** itself. +- You’re not giving new abilities to the LLM, but instead, you’re extending the **interface, behavior, or logic** of the platform itself! + +For instance, maybe you want to: + +1. Add a new AI model like **Anthropic** to the WebUI. +2. Create a custom button in your toolbar that performs a frequently used command. +3. Implement a better **filter** function that catches inappropriate or **spammy messages** from the incoming text. + +Without functions, these would all be out of reach. But with this framework in Open WebUI, you can easily extend these features! + +### Where to Find and Manage Functions + +Functions are not located in the same place as Tools. + +- **Tools** are about model access and live in your **Workspace tabs** (where you add models, prompts, and knowledge collections). They can be added by users if granted permissions. +- **Functions** are about **platform customization** and are found in the **Admin Panel**. + They are configured and managed only by admins who want to extend the platform interface or behavior for all users. + +### Summary of Differences: + +- **Tools** are things that allow LLMs to **do more things** outside their default abilities (such as retrieving live info or performing custom tasks based on external data). +- **Functions** help the WebUI itself **do more things**, like adding new AI models or creating smarter ways to filter data. + +Both are designed to be **pluggable**, meaning you can easily import them into your system with just one click from the community! 🎉 You won’t have to spend hours coding or tinkering with them. + +## What are Pipelines? + +And then, we have **Pipelines**… Here’s where things start to sound pretty technical—but don’t despair. + +**Pipelines** are part of an Open WebUI initiative focused on making every piece of the WebUI **inter-operable with OpenAI’s API system**. Essentially, they extend what both **Tools** and **Functions** can already do, but now with even more flexibility. They allow you to turn features into OpenAI API-compatible formats. 🧠 + +### But here’s the thing… + +You likely **won't need** pipelines unless you're dealing with super-advanced setups. + +- **Who are pipelines for?** Typically, **experts** or people running more complicated use cases. +- **When do you need them?** If you're trying to offload processing from your primary Open WebUI instance to a different machine (so you don’t overload your primary system). + +In most cases, as a beginner or even an intermediate user, you won’t have to worry about pipelines. Just focus on enjoying the benefits that **tools** and **functions** bring to your Open WebUI experience! + +## Want to Try? 🚀 + +Jump into Open WebUI, head over to the community section, and try importing a tool like **weather updates** or maybe adding a new feature to the toolbar with a function. Exploring these tools will show you how powerful and flexible Open WebUI can be! + +🌟 There's always more to learn, so stay curious and keep experimenting! diff --git a/docs/features/plugin/migration/index.mdx b/docs/features/plugin/migration/index.mdx new file mode 100644 index 0000000..8602a54 --- /dev/null +++ b/docs/features/plugin/migration/index.mdx @@ -0,0 +1,255 @@ +--- +sidebar_position: 9999 +title: "Migrating Tools & Functions: 0.4 to 0.5" +--- + +# 🚚 Migration Guide: Open WebUI 0.4 to 0.5 + +Welcome to the Open WebUI 0.5 migration guide! If you're working on existing projects or building new ones, this guide will walk you through the key changes from **version 0.4 to 0.5** and provide an easy-to-follow roadmap for upgrading your Functions. Let's make this transition as smooth as possible! 😊 + +--- + +## 🧐 What Has Changed and Why? + +With Open WebUI 0.5, we’ve overhauled the architecture to make the project **simpler, more unified, and scalable**. Here's the big picture: + +- **Old Architecture:** 🎯 Previously, Open WebUI was built on a **sub-app architecture** where each app (e.g., `ollama`, `openai`) was a separate FastAPI application. This caused fragmentation and extra complexity when managing apps. +- **New Architecture:** 🚀 With version 0.5, we have transitioned to a **single FastAPI app** with multiple **routers**. This means better organization, centralized flow, and reduced redundancy. + +### Key Changes: +Here’s an overview of what changed: +1. **Apps have been moved to Routers.** + - Previous: `open_webui.apps` + - Now: `open_webui.routers` + +2. **Main app structure simplified.** + - The old `open_webui.apps.webui` has been transformed into `open_webui.main`, making it the central entry point for the project. + +3. **Unified API Endpoint** + - Open WebUI 0.5 introduces a **unified function**, `chat_completion`, in `open_webui.main`, replacing separate functions for models like `ollama` and `openai`. This offers a consistent and streamlined API experience. However, the **direct successor** of these individual functions is `generate_chat_completion` from `open_webui.utils.chat`. If you prefer a lightweight POST request without handling additional parsing (e.g., files, tools, or misc), this utility function is likely what you want. + +#### Example: +```python + +# Full API flow with parsing (new function): +from open_webui.main import chat_completion + +# Lightweight, direct POST request (direct successor): +from open_webui.utils.chat import generate_chat_completion +``` + +Choose the approach that best fits your use case! + +4. **Updated Function Signatures.** + - Function signatures now adhere to a new format, requiring a `request` object. + - The `request` object can be obtained using the `__request__` parameter in the function signature. Below is an example: + +```python +class Pipe: + def __init__(self): + pass + + async def pipe( + self, + body: dict, + __user__: dict, + __request__: Request, # New parameter + ) -> str: + # Write your function here +``` + +📌 **Why did we make these changes?** +- To simplify the codebase, making it easier to extend and maintain. +- To unify APIs for a more streamlined developer experience. +- To enhance performance by consolidating redundant elements. + +--- + +## ✅ Step-by-Step Migration Guide + +Follow this guide to smoothly update your project. + +--- + +### 🔄 1. Shifting from `apps` to `routers` + +All apps have been renamed and relocated under `open_webui.routers`. This affects imports in your codebase. + +Quick changes for import paths: + +| **Old Path** | **New Path** | +|-----------------------------------|-----------------------------------| +| `open_webui.apps.ollama` | `open_webui.routers.ollama` | +| `open_webui.apps.openai` | `open_webui.routers.openai` | +| `open_webui.apps.audio` | `open_webui.routers.audio` | +| `open_webui.apps.retrieval` | `open_webui.routers.retrieval` | +| `open_webui.apps.webui` | `open_webui.main` | + +### 📜 An Important Example + +To clarify the special case of the main app (`webui`), here’s a simple rule of thumb: + +- **Was in `webui`?** It’s now in the project’s root or `open_webui.main`. +- For example: + - **Before (0.4):** + ```python + from open_webui.apps.webui.models import SomeModel + ``` + - **After (0.5):** + ```python + from open_webui.models import SomeModel + ``` + +In general, **just replace `open_webui.apps` with `open_webui.routers`—except for `webui`, which is now `open_webui.main`!** + +--- + +### 👩‍💻 2. Updating Import Statements + +Let’s look at what this update looks like in your code: + +#### Before: +```python +from open_webui.apps.ollama import main as ollama +from open_webui.apps.openai import main as openai +``` + +#### After: +```python + +# Separate router imports +from open_webui.routers.ollama import generate_chat_completion +from open_webui.routers.openai import generate_chat_completion + +# Or use the unified endpoint +from open_webui.main import chat_completion +``` + +:::tip + +Prioritize the unified endpoint (`chat_completion`) for simplicity and future compatibility. + +::: + +### 📝 **Additional Note: Choosing Between `main.chat_completion` and `utils.chat.generate_chat_completion`** + +Depending on your use case, you can choose between: + +1. **`open_webui.main.chat_completion`:** + - Simulates making a POST request to `/api/chat/completions`. + - Processes files, tools, and other miscellaneous tasks. + - Best when you want the complete API flow handled automatically. + +2. **`open_webui.utils.chat.generate_chat_completion`:** + - Directly makes a POST request without handling extra parsing or tasks. + - This is the **direct successor** to the previous `main.generate_chat_completions`, `ollama.generate_chat_completion` and `openai.generate_chat_completion` functions in Open WebUI 0.4. + - Best for simplified and more lightweight scenarios. + +#### Example: +```python + +# Use this for the full API flow with parsing: +from open_webui.main import chat_completion + +# Use this for a stripped-down, direct POST request: +from open_webui.utils.chat import generate_chat_completion +``` + +--- + +### 📋 3. Adapting to Updated Function Signatures + +We’ve updated the **function signatures** to better fit the new architecture. If you're looking for a direct replacement, start with the lightweight utility function `generate_chat_completion` from `open_webui.utils.chat`. For the full API flow, use the new unified `chat_completion` function in `open_webui.main`. + +#### Function Signature Changes: + +| **Old** | **Direct Successor (New)** | **Unified Option (New)** | +|-----------------------------------------|-----------------------------------------|-----------------------------------------| +| `openai.generate_chat_completion(form_data: dict, user: UserModel)` | `generate_chat_completion(request: Request, form_data: dict, user: UserModel)` | `chat_completion(request: Request, form_data: dict, user: UserModel)` | + +- **Direct Successor (`generate_chat_completion`)**: A lightweight, 1:1 replacement for previous `ollama`/`openai` methods. +- **Unified Option (`chat_completion`)**: Use this for the complete API flow, including file parsing and additional functionality. + +#### Example: + +If you're using `chat_completion`, here’s how your function should look now: + +### 🛠️ How to Refactor Your Custom Function +Let’s rewrite a sample function to match the new structure: + +#### Before (0.4): +```python +from pydantic import BaseModel +from open_webui.apps.ollama import generate_chat_completion + +class User(BaseModel): + id: str + email: str + name: str + role: str + +class Pipe: + def __init__(self): + pass + + async def pipe(self, body: dict, __user__: dict) -> str: + # Calls OpenAI endpoint + user = User(**__user__) + body["model"] = "llama3.2:latest" + return await ollama.generate_chat_completion(body, user) +``` + +#### After (0.5): +```python +from pydantic import BaseModel +from fastapi import Request + +from open_webui.utils.chat import generate_chat_completion + +class User(BaseModel): + id: str + email: str + name: str + role: str + +class Pipe: + def __init__(self): + pass + + async def pipe( + self, + body: dict, + __user__: dict, + __request__: Request, + ) -> str: + # Uses the unified endpoint with updated signature + user = User(**__user__) + body["model"] = "llama3.2:latest" + return await generate_chat_completion(__request__, body, user) +``` + +### Important Notes: +- You must pass a `Request` object (`__request__`) in the new function signature. +- Other optional parameters (like `__user__` and `__event_emitter__`) ensure flexibility for more complex use cases. + +--- + +### 🌟 4. Recap: Key Concepts in Simple Terms + +Here’s a quick cheat sheet to remember: +- **Apps to Routers:** Update all imports from `open_webui.apps` ➡️ `open_webui.routers`. +- **Unified Endpoint:** Use `open_webui.main.chat_completion` for simplicity if both `ollama` and `openai` are involved. +- **Adapt Function Signatures:** Ensure your functions pass the required `request` object. + +--- + +## 🎉 Hooray! You're Ready! + +That's it! You've successfully migrated from **Open WebUI 0.4 to 0.5**. By refactoring your imports, using the unified endpoint, and updating function signatures, you'll be fully equipped to leverage the latest features and improvements in version 0.5. + +--- + +💬 **Questions or Feedback?** +If you run into any issues or have suggestions, feel free to open a [GitHub issue](https://github.com/open-webui/open-webui) or ask in the community forums! + +Happy coding! ✨ \ No newline at end of file diff --git a/docs/features/plugin/tools/development.mdx b/docs/features/plugin/tools/development.mdx new file mode 100644 index 0000000..5bfe2a2 --- /dev/null +++ b/docs/features/plugin/tools/development.mdx @@ -0,0 +1,1651 @@ +--- +sidebar_position: 2 +title: "Development" +--- + +## Writing A Custom Toolkit + +Toolkits are defined in a single Python file, with a top level docstring with metadata and a `Tools` class. + +### Example Top-Level Docstring + +```python +""" +title: String Inverse +author: Your Name +author_url: https://website.com +git_url: https://github.com/username/string-reverse.git +description: This tool calculates the inverse of a string +required_open_webui_version: 0.4.0 +requirements: langchain-openai, langgraph, ollama, langchain_ollama +version: 0.4.0 +licence: MIT +""" +``` + +### Tools Class + +Tools have to be defined as methods within a class called `Tools`, with optional subclasses called `Valves` and `UserValves`, for example: + +```python +class Tools: + def __init__(self): + """Initialize the Tool.""" + self.valves = self.Valves() + + class Valves(BaseModel): + api_key: str = Field("", description="Your API key here") + + def reverse_string(self, string: str) -> str: + """ + Reverses the input string. + :param string: The string to reverse + """ + # example usage of valves + if self.valves.api_key != "42": + return "Wrong API key" + return string[::-1] +``` + +### Type Hints +Each tool must have type hints for arguments. The types may also be nested, such as `queries_and_docs: list[tuple[str, int]]`. Those type hints are used to generate the JSON schema that is sent to the model. Tools without type hints will work with a lot less consistency. + +### Valves and UserValves - (optional, but HIGHLY encouraged) + +Valves and UserValves are used for specifying customizable settings of the Tool, you can read more on the dedicated [Valves & UserValves page](/features/plugin/development/valves). + +### Optional Arguments +Below is a list of optional arguments your tools can depend on: +- `__event_emitter__`: Emit events (see following section) +- `__event_call__`: Same as event emitter but can be used for user interactions +- `__user__`: A dictionary with user information. It also contains the `UserValves` object in `__user__["valves"]`. +- `__metadata__`: Dictionary with chat metadata +- `__messages__`: List of previous messages +- `__files__`: Attached files +- `__model__`: A dictionary with model information +- `__oauth_token__`: A dictionary containing the user's valid, automatically refreshed OAuth token payload. This is the **new, recommended, and secure** way to access user tokens for making authenticated API calls. The dictionary typically contains `access_token`, `id_token`, and other provider-specific data. + +For more information about `__oauth_token__` and how to configure this token to be sent to tools, check out the OAuth section in the [environment variable docs page](https://docs.openwebui.com/getting-started/env-configuration/) and the [SSO documentation](https://docs.openwebui.com/features/auth/). + +Just add them as argument to any method of your Tool class just like `__user__` in the example above. + +#### Using the OAuth Token in a Tool + +When building tools that need to interact with external APIs on the user's behalf, you can now directly access their OAuth token. This removes the need for fragile cookie scraping and ensures the token is always valid. + +**Example:** A tool that calls an external API using the user's access token. + +```python +import httpx +from typing import Optional + +class Tools: + # ... other class setup ... + + async def get_user_profile_from_external_api(self, __oauth_token__: Optional[dict] = None) -> str: + """ + Fetches user profile data from a secure external API using their OAuth access token. + + :param __oauth_token__: Injected by Open WebUI, contains the user's token data. + """ + if not __oauth_token__ or "access_token" not in __oauth_token__: + return "Error: User is not authenticated via OAuth or token is unavailable." + + access_token = __oauth_token__["access_token"] + + headers = { + "Authorization": f"Bearer {access_token}", + "Content-Type": "application/json" + } + + try: + async with httpx.AsyncClient() as client: + response = await client.get("https://api.my-service.com/v1/profile", headers=headers) + response.raise_for_status() # Raise an exception for bad status codes + return f"API Response: {response.json()}" + except httpx.HTTPStatusError as e: + return f"Error: Failed to fetch data from API. Status: {e.response.status_code}" + except Exception as e: + return f"An unexpected error occurred: {e}" +``` + +### Event Emitters + +Event Emitters are used to add additional information to the chat interface. Similarly to Filter Outlets, Event Emitters are capable of appending content to the chat. Unlike Filter Outlets, they are not capable of stripping information. Additionally, emitters can be activated at any stage during the Tool. + +**⚠️ CRITICAL: Function Calling Mode Compatibility** + +Event Emitter behavior is **significantly different** depending on your function calling mode. The function calling mode is controlled by the `function_calling` parameter: + +- **Default Mode**: Uses traditional function calling approach with wider model compatibility +- **Native Mode**: Leverages model's built-in tool-calling capabilities for reduced latency + +Before using event emitters, you must understand these critical limitations: + +- **Default Mode** (`function_calling = "default"`): Full event emitter support with all event types working as expected +- **Native Mode** (`function_calling = "native"`): **Limited event emitter support** - many event types don't work properly due to native function calling bypassing Open WebUI's custom tool processing pipeline + +**When to Use Each Mode:** +- **Use Default Mode** when you need full event emitter functionality, complex tool interactions, or real-time UI updates +- **Use Native Mode** when you need reduced latency and basic tool calling without complex UI interactions + +#### Function Calling Mode Configuration + +You can configure the function calling mode in two places: + +1. **Model Settings**: Go to Model page → Advanced Params → Function Calling (set to "Default" or "Native") +2. **Per-request basis**: Set `params.function_calling = "native"` or `"default"` in your request + +If the model seems to be unable to call the tool, make sure it is enabled (either via the Model page or via the `+` sign next to the chat input field). + +#### Complete Event Type Compatibility Matrix + +Here's the comprehensive breakdown of how each event type behaves across function calling modes: + +| Event Type | Default Mode Functionality | Native Mode Functionality | Status | +|------------|---------------------------|--------------------------|--------| +| `status` | ✅ Full support - Updates status history during tool execution | ✅ **Identical** - Tracks function execution status | **COMPATIBLE** | +| `message` | ✅ Full support - Appends incremental content during streaming | ❌ **BROKEN** - Gets overwritten by native completion snapshots | **INCOMPATIBLE** | +| `chat:completion` | ✅ Full support - Handles streaming responses and completion data | ⚠️ **LIMITED** - Carries function results but may overwrite tool updates | **PARTIALLY COMPATIBLE** | +| `chat:message:delta` | ✅ Full support - Streams delta content during execution | ❌ **BROKEN** - Content gets replaced by native function snapshots | **INCOMPATIBLE** | +| `chat:message` | ✅ Full support - Replaces entire message content cleanly | ❌ **BROKEN** - Gets overwritten by subsequent native completions | **INCOMPATIBLE** | +| `replace` | ✅ Full support - Replaces content with precise control | ❌ **BROKEN** - Replaced content gets overwritten immediately | **INCOMPATIBLE** | +| `chat:message:files` / `files` | ✅ Full support - Handles file attachments in messages | ✅ **Identical** - Processes files from function outputs | **COMPATIBLE** | +| `chat:message:error` | ✅ Full support - Displays error notifications | ✅ **Identical** - Shows function call errors | **COMPATIBLE** | +| `chat:message:follow_ups` | ✅ Full support - Shows follow-up suggestions | ✅ **Identical** - Displays function-generated follow-ups | **COMPATIBLE** | +| `chat:title` | ✅ Full support - Updates chat title dynamically | ✅ **Identical** - Updates title based on function interactions | **COMPATIBLE** | +| `chat:tags` | ✅ Full support - Modifies chat tags | ✅ **Identical** - Manages tags from function outputs | **COMPATIBLE** | +| `chat:tasks:cancel` | ✅ Full support - Cancels ongoing tasks | ✅ **Identical** - Cancels native function executions | **COMPATIBLE** | +| `citation` / `source` | ✅ Full support - Handles citations with full metadata | ✅ **Identical** - Processes function-generated citations | **COMPATIBLE** | +| `notification` | ✅ Full support - Shows toast notifications | ✅ **Identical** - Displays function execution notifications | **COMPATIBLE** | +| `confirmation` | ✅ Full support - Requests user confirmations | ✅ **Identical** - Confirms function executions | **COMPATIBLE** | +| `execute` | ✅ Full support - Executes code dynamically | ✅ **Identical** - Runs function-generated code | **COMPATIBLE** | +| `input` | ✅ Full support - Requests user input with full UI | ✅ **Identical** - Collects input for functions | **COMPATIBLE** | + +#### Why Native Mode Breaks Certain Event Types + +In **Native Mode**, the server constructs content blocks from streaming model output and repeatedly emits `"chat:completion"` events with full serialized content snapshots. The client treats these snapshots as authoritative and completely replaces message content, effectively overwriting any prior tool-emitted updates like `message`, `chat:message`, or `replace` events. + +**Technical Details:** +- `middleware.py` adds tools directly to form data for native model handling +- Streaming handler emits repeated content snapshots via `chat:completion` events +- Client's `chatCompletionEventHandler` treats snapshots as complete replacements: `message.content = content` +- This causes tool-emitted content updates to flicker and disappear + +#### Best Practices and Recommendations + +**For Tools Requiring Real-time UI Updates:** +```python +class Tools: + def __init__(self): + # Add a note about function calling mode requirements + self.description = "This tool requires Default function calling mode for full functionality" + + async def interactive_tool(self, prompt: str, __event_emitter__=None) -> str: + """ + ⚠️ This tool requires function_calling = "default" for proper event emission + """ + if not __event_emitter__: + return "Event emitter not available - ensure Default function calling mode is enabled" + + # Safe to use message events in Default mode + await __event_emitter__({ + "type": "message", + "data": {"content": "Processing step 1..."} + }) + # ... rest of tool logic +``` + +**For Tools That Must Work in Both Modes:** +```python +async def universal_tool(self, prompt: str, __event_emitter__=None, __metadata__=None) -> str: + """ + Tool designed to work in both Default and Native function calling modes + """ + # Check if we're in native mode (this is a rough heuristic) + is_native_mode = __metadata__ and __metadata__.get("params", {}).get("function_calling") == "native" + + if __event_emitter__: + if is_native_mode: + # Use only compatible event types in native mode + await __event_emitter__({ + "type": "status", + "data": {"description": "Processing in native mode...", "done": False} + }) + else: + # Full event functionality in default mode + await __event_emitter__({ + "type": "message", + "data": {"content": "Processing with full event support..."} + }) + + # ... tool logic here + + if __event_emitter__: + await __event_emitter__({ + "type": "status", + "data": {"description": "Completed successfully", "done": True} + }) + + return "Tool execution completed" +``` + +#### Troubleshooting Event Emitter Issues + +**Symptoms of Native Mode Conflicts:** +- Tool-emitted messages appear briefly then disappear +- Content flickers during tool execution +- `message` or `replace` events seem to be ignored +- Status updates work but content updates don't persist + +**Solutions:** +1. **Switch to Default Mode**: Change `function_calling` from `"native"` to `"default"` in model settings +2. **Use Compatible Event Types**: Stick to `status`, `citation`, `notification`, and other compatible event types in native mode +3. **Implement Mode Detection**: Add logic to detect function calling mode and adjust event usage accordingly +4. **Consider Hybrid Approaches**: Use compatible events for core functionality and degrade gracefully + +**Debugging Your Event Emitters:** +```python +async def debug_events_tool(self, __event_emitter__=None, __metadata__=None) -> str: + """Debug tool to test event emitter functionality""" + + if not __event_emitter__: + return "No event emitter available" + + # Test various event types + test_events = [ + {"type": "status", "data": {"description": "Testing status events", "done": False}}, + {"type": "message", "data": {"content": "Testing message events (may not work in native mode)"}}, + {"type": "notification", "data": {"content": "Testing notification events"}}, + ] + + mode_info = "Unknown" + if __metadata__: + mode_info = __metadata__.get("params", {}).get("function_calling", "default") + + await __event_emitter__({ + "type": "status", + "data": {"description": f"Function calling mode: {mode_info}", "done": False} + }) + + for i, event in enumerate(test_events): + await asyncio.sleep(1) # Space out events + await __event_emitter__(event) + await __event_emitter__({ + "type": "status", + "data": {"description": f"Sent event {i+1}/{len(test_events)}", "done": False} + }) + + await __event_emitter__({ + "type": "status", + "data": {"description": "Event testing complete", "done": True} + }) + + return f"Event testing completed in {mode_info} mode. Check for missing or flickering content." +``` + +There are several specific event types with different behaviors: + +#### Status Events ✅ FULLY COMPATIBLE + +**Status events work identically in both Default and Native function calling modes.** This is the most reliable event type for providing real-time feedback during tool execution. + +Status events add live status updates to a message while it's performing steps. These can be emitted at any stage during tool execution. Status messages appear right above the message content and are essential for tools that delay the LLM response or process large amounts of information. + +**Basic Status Event Structure:** +```python +await __event_emitter__({ + "type": "status", + "data": { + "description": "Message that shows up in the chat", + "done": False, # False = still processing, True = completed + "hidden": False # False = visible, True = auto-hide when done + } +}) +``` + +**Status Event Parameters:** +- `description`: The status message text shown to users +- `done`: Boolean indicating if this status represents completion +- `hidden`: Boolean to auto-hide the status once `done: True` is set + +
+Basic Status Example + +```python +async def data_processing_tool( + self, data_file: str, __user__: dict, __event_emitter__=None + ) -> str: + """ + Processes a large data file with status updates + ✅ Works in both Default and Native function calling modes + """ + + if not __event_emitter__: + return "Processing completed (no status updates available)" + + # Step 1: Loading + await __event_emitter__({ + "type": "status", + "data": {"description": "Loading data file...", "done": False} + }) + + # Simulate loading time + await asyncio.sleep(2) + + # Step 2: Processing + await __event_emitter__({ + "type": "status", + "data": {"description": "Analyzing 10,000 records...", "done": False} + }) + + # Simulate processing time + await asyncio.sleep(3) + + # Step 3: Completion + await __event_emitter__({ + "type": "status", + "data": {"description": "Analysis complete!", "done": True, "hidden": False} + }) + + return "Data analysis completed successfully. Found 23 anomalies." +``` +
+ +
+Advanced Status with Error Handling + +```python +async def api_integration_tool( + self, endpoint: str, __event_emitter__=None + ) -> str: + """ + Integrates with external API with comprehensive status tracking + ✅ Compatible with both function calling modes + """ + + if not __event_emitter__: + return "API integration completed (no status available)" + + try: + await __event_emitter__({ + "type": "status", + "data": {"description": "Connecting to API...", "done": False} + }) + + # Simulate API connection + await asyncio.sleep(1.5) + + await __event_emitter__({ + "type": "status", + "data": {"description": "Authenticating...", "done": False} + }) + + # Simulate authentication + await asyncio.sleep(1) + + await __event_emitter__({ + "type": "status", + "data": {"description": "Fetching data...", "done": False} + }) + + # Simulate data fetching + await asyncio.sleep(2) + + # Success status + await __event_emitter__({ + "type": "status", + "data": {"description": "API integration successful", "done": True} + }) + + return "Successfully retrieved 150 records from the API" + + except Exception as e: + # Error status - always visible for debugging + await __event_emitter__({ + "type": "status", + "data": {"description": f"Error: {str(e)}", "done": True, "hidden": False} + }) + + return f"API integration failed: {str(e)}" +``` +
+ +
+Multi-Step Progress Status + +```python +async def batch_processor_tool( + self, items: list, __event_emitter__=None + ) -> str: + """ + Processes items in batches with detailed progress tracking + ✅ Works perfectly in both function calling modes + """ + + if not __event_emitter__ or not items: + return "Batch processing completed" + + total_items = len(items) + batch_size = 10 + completed = 0 + + for i in range(0, total_items, batch_size): + batch = items[i:i + batch_size] + batch_num = (i // batch_size) + 1 + total_batches = (total_items + batch_size - 1) // batch_size + + # Update status for current batch + await __event_emitter__({ + "type": "status", + "data": { + "description": f"Processing batch {batch_num}/{total_batches} ({len(batch)} items)...", + "done": False + } + }) + + # Simulate batch processing + await asyncio.sleep(1) + + completed += len(batch) + + # Progress update + progress_pct = int((completed / total_items) * 100) + await __event_emitter__({ + "type": "status", + "data": { + "description": f"Progress: {completed}/{total_items} items ({progress_pct}%)", + "done": False + } + }) + + # Final completion status + await __event_emitter__({ + "type": "status", + "data": { + "description": f"Batch processing complete! Processed {total_items} items", + "done": True + } + }) + + return f"Successfully processed {total_items} items in {total_batches} batches" +``` +
+ +#### Message Events ⚠️ DEFAULT MODE ONLY + +:::warning + +**🚨 CRITICAL WARNING: Message events are INCOMPATIBLE with Native function calling mode!** + +::: + +Message events (`message`, `chat:message`, `chat:message:delta`, `replace`) allow you to append or modify message content at any stage during tool execution. This enables embedding images, rendering web pages, streaming content updates, and creating rich interactive experiences. + +**However, these event types have major compatibility issues:** +- ✅ **Default Mode**: Full functionality - content persists and displays properly +- ❌ **Native Mode**: BROKEN - content gets overwritten by completion snapshots and disappears + +**Why Message Events Break in Native Mode:** +Native function calling emits repeated `chat:completion` events with full content snapshots that completely replace message content, causing any tool-emitted message updates to flicker and disappear. + +**Safe Message Event Structure (Default Mode Only):** +```python +await __event_emitter__({ + "type": "message", # Also: "chat:message", "chat:message:delta", "replace" + "data": {"content": "This content will be appended/replaced in the chat"}, + # Note: message types do NOT require a "done" condition +}) +``` + +**Message Event Types:** +- `message` / `chat:message:delta`: Appends content to existing message +- `chat:message` / `replace`: Replaces entire message content +- Both types will be overwritten in Native mode + +
+Safe Message Streaming (Default Mode) + +```python +async def streaming_content_tool( + self, query: str, __event_emitter__=None, __metadata__=None + ) -> str: + """ + Streams content updates during processing + ⚠️ REQUIRES function_calling = "default" - Will not work in Native mode! + """ + + # Check function calling mode (rough detection) + mode = "unknown" + if __metadata__: + mode = __metadata__.get("params", {}).get("function_calling", "default") + + if mode == "native": + return "❌ This tool requires Default function calling mode. Message streaming is not supported in Native mode due to content overwriting issues." + + if not __event_emitter__: + return "Event emitter not available" + + # Stream progressive content updates + content_chunks = [ + "🔍 **Phase 1: Research**\nGathering information about your query...\n\n", + "📊 **Phase 2: Analysis**\nAnalyzing gathered data patterns...\n\n", + "✨ **Phase 3: Synthesis**\nGenerating insights and recommendations...\n\n", + "📝 **Phase 4: Final Report**\nCompiling comprehensive results...\n\n" + ] + + accumulated_content = "" + + for i, chunk in enumerate(content_chunks): + accumulated_content += chunk + + # Append this chunk to the message + await __event_emitter__({ + "type": "message", + "data": {"content": chunk} + }) + + # Show progress status + await __event_emitter__({ + "type": "status", + "data": { + "description": f"Processing phase {i+1}/{len(content_chunks)}...", + "done": False + } + }) + + # Simulate processing time + await asyncio.sleep(2) + + # Final completion + await __event_emitter__({ + "type": "status", + "data": {"description": "Content streaming complete!", "done": True} + }) + + return "Content streaming completed successfully. All phases processed." +``` +
+ +
+Dynamic Content Replacement (Default Mode) + +```python +async def live_dashboard_tool( + self, __event_emitter__=None, __metadata__=None + ) -> str: + """ + Creates a live-updating dashboard using content replacement + ⚠️ ONLY WORKS in Default function calling mode + """ + + # Verify we're not in Native mode + mode = __metadata__.get("params", {}).get("function_calling", "default") if __metadata__ else "default" + + if mode == "native": + return """ +❌ **Native Mode Incompatibility** + +This dashboard tool cannot function in Native mode because: +- Content replacement events get overwritten by completion snapshots +- Live updates will flicker and disappear +- Real-time data will not persist in the interface + +**Solution:** Switch to Default function calling mode in Model Settings → Advanced Params → Function Calling = "Default" +""" + + if not __event_emitter__: + return "Dashboard created (static mode - no live updates)" + + # Create initial dashboard + initial_dashboard = """ + +# 📊 Live System Dashboard + +## System Status: 🟡 Initializing... + +### Current Metrics: +- **CPU Usage**: Loading... +- **Memory**: Loading... +- **Active Users**: Loading... +- **Response Time**: Loading... + +--- +*Last Updated: Initializing...* +""" + + await __event_emitter__({ + "type": "replace", + "data": {"content": initial_dashboard} + }) + + # Simulate live data updates + updates = [ + { + "status": "🟢 Online", + "cpu": "23%", + "memory": "64%", + "users": "1,247", + "response": "145ms" + }, + { + "status": "🟢 Optimal", + "cpu": "18%", + "memory": "61%", + "users": "1,352", + "response": "132ms" + }, + { + "status": "🟡 Busy", + "cpu": "67%", + "memory": "78%", + "users": "1,891", + "response": "234ms" + } + ] + + for i, data in enumerate(updates): + await asyncio.sleep(3) # Simulate data collection delay + + updated_dashboard = f""" + +# 📊 Live System Dashboard + +## System Status: {data['status']} + +### Current Metrics: +- **CPU Usage**: {data['cpu']} +- **Memory**: {data['memory']} +- **Active Users**: {data['users']} +- **Response Time**: {data['response']} + +--- +*Last Updated: {datetime.now().strftime('%H:%M:%S')}* +*Update {i+1}/{len(updates)}* +""" + + # Replace entire dashboard content + await __event_emitter__({ + "type": "replace", + "data": {"content": updated_dashboard} + }) + + # Status update + await __event_emitter__({ + "type": "status", + "data": {"description": f"Dashboard updated ({i+1}/{len(updates)})", "done": False} + }) + + await __event_emitter__({ + "type": "status", + "data": {"description": "Live dashboard monitoring complete", "done": True} + }) + + return "Dashboard monitoring session completed." +``` +
+ +
+Mode-Safe Message Tool + +```python +async def adaptive_content_tool( + self, content_type: str, __event_emitter__=None, __metadata__=None + ) -> str: + """ + Adapts behavior based on function calling mode + ✅ Provides best possible experience in both modes + """ + + # Detect function calling mode + mode = "default" # Default assumption + if __metadata__: + mode = __metadata__.get("params", {}).get("function_calling", "default") + + if not __event_emitter__: + return f"Generated {content_type} content (no real-time updates available)" + + # Mode-specific behavior + if mode == "native": + # Use only compatible events in Native mode + await __event_emitter__({ + "type": "status", + "data": {"description": f"Generating {content_type} content in Native mode...", "done": False} + }) + + await asyncio.sleep(2) + + await __event_emitter__({ + "type": "status", + "data": {"description": "Content generation complete", "done": True} + }) + + # Return content normally - no message events + return f""" + +# {content_type.title()} Content + +**Mode**: Native Function Calling (Limited Event Support) + +Generated content here... This content is returned as the tool result rather than being streamed via message events. + +*Note: Live content updates are not available in Native mode due to event compatibility limitations.* +""" + + else: # Default mode + # Full message event functionality available + await __event_emitter__({ + "type": "status", + "data": {"description": "Generating content with full streaming support...", "done": False} + }) + + # Stream content progressively + progressive_content = [ + f"# {content_type.title()} Content\n\n**Mode**: Default Function Calling ✅\n\n", + "## Section 1: Introduction\nStreaming content in real-time...\n\n", + "## Section 2: Details\nAdding detailed information...\n\n", + "## Section 3: Conclusion\nFinalizing content delivery...\n\n", + "*✅ Content streaming completed successfully!*" + ] + + for i, chunk in enumerate(progressive_content): + await __event_emitter__({ + "type": "message", + "data": {"content": chunk} + }) + + await __event_emitter__({ + "type": "status", + "data": {"description": f"Streaming section {i+1}/{len(progressive_content)}...", "done": False} + }) + + await asyncio.sleep(1.5) + + await __event_emitter__({ + "type": "status", + "data": {"description": "Content streaming complete!", "done": True} + }) + + return "Content has been streamed above with full Default mode capabilities." +``` +
+ +#### Citations ✅ FULLY COMPATIBLE + +**Citation events work identically in both Default and Native function calling modes.** This event type provides source references and citations in the chat interface, allowing users to click and view source materials. + +Citations are essential for tools that retrieve information from external sources, databases, or documents. They provide transparency and allow users to verify information sources. + +**Citation Event Structure:** +```python +await __event_emitter__({ + "type": "citation", + "data": { + "document": [content], # Array of content strings + "metadata": [ # Array of metadata objects + { + "date_accessed": datetime.now().isoformat(), + "source": title, + "author": "Author Name", # Optional + "publication_date": "2024-01-01", # Optional + "url": "https://source-url.com" # Optional + } + ], + "source": {"name": title, "url": url} # Primary source info + } +}) +``` + +**Important Citation Setup:** +When implementing custom citations, you **must** disable automatic citations in your `Tools` class: + +```python +def __init__(self): + self.citation = False # REQUIRED - prevents automatic citations from overriding custom ones +``` + +:::warning + +**⚠️ Critical Citation Warning:** +If you set `self.citation = True` (or don't set it to `False`), automatic citations will replace any custom citations you send. Always disable automatic citations when using custom citation events. + +::: + +
+Basic Citation Example + +```python +class Tools: + def __init__(self): + self.citation = False # Disable automatic citations + + async def research_tool( + self, topic: str, __event_emitter__=None + ) -> str: + """ + Researches a topic and provides proper citations + ✅ Works identically in both Default and Native modes + """ + + if not __event_emitter__: + return "Research completed (citations not available)" + + # Simulate research findings + sources = [ + { + "title": "Advanced AI Systems", + "url": "https://example.com/ai-systems", + "content": "Artificial intelligence systems have evolved significantly...", + "author": "Dr. Jane Smith", + "date": "2024-03-15" + }, + { + "title": "Machine Learning Fundamentals", + "url": "https://example.com/ml-fundamentals", + "content": "The core principles of machine learning include...", + "author": "Prof. John Doe", + "date": "2024-02-20" + } + ] + + # Emit citations for each source + for source in sources: + await __event_emitter__({ + "type": "citation", + "data": { + "document": [source["content"]], + "metadata": [ + { + "date_accessed": datetime.now().isoformat(), + "source": source["title"], + "author": source["author"], + "publication_date": source["date"], + "url": source["url"] + } + ], + "source": { + "name": source["title"], + "url": source["url"] + } + } + }) + + return f"Research on '{topic}' completed. Found {len(sources)} relevant sources with detailed citations." +``` +
+ +
+Advanced Multi-Source Citations + +```python +async def comprehensive_analysis_tool( + self, query: str, __event_emitter__=None + ) -> str: + """ + Performs comprehensive analysis with multiple source types + ✅ Full compatibility across all function calling modes + """ + + if not __event_emitter__: + return "Analysis completed" + + # Multiple source types with rich metadata + research_sources = { + "academic": [ + { + "title": "Neural Network Architecture in Modern AI", + "authors": ["Dr. Sarah Chen", "Prof. Michael Rodriguez"], + "journal": "Journal of AI Research", + "volume": "Vol. 45, Issue 2", + "pages": "123-145", + "doi": "10.1000/182", + "date": "2024-01-15", + "content": "This comprehensive study examines the evolution of neural network architectures..." + } + ], + "web_sources": [ + { + "title": "Industry AI Implementation Trends", + "url": "https://tech-insights.com/ai-trends-2024", + "site_name": "TechInsights", + "published": "2024-03-01", + "content": "Recent industry surveys show that 78% of companies are implementing AI solutions..." + } + ], + "reports": [ + { + "title": "Global AI Market Report 2024", + "organization": "International Tech Research Institute", + "report_number": "ITRI-2024-AI-001", + "date": "2024-02-28", + "content": "The global artificial intelligence market is projected to reach $1.8 trillion by 2030..." + } + ] + } + + citation_count = 0 + + # Process academic sources + for source in research_sources["academic"]: + citation_count += 1 + await __event_emitter__({ + "type": "citation", + "data": { + "document": [source["content"]], + "metadata": [ + { + "date_accessed": datetime.now().isoformat(), + "source": source["title"], + "authors": source["authors"], + "journal": source["journal"], + "volume": source["volume"], + "pages": source["pages"], + "doi": source["doi"], + "publication_date": source["date"], + "type": "academic_journal" + } + ], + "source": { + "name": f"{source['title']} - {source['journal']}", + "url": f"https://doi.org/{source['doi']}" + } + } + }) + + # Process web sources + for source in research_sources["web_sources"]: + citation_count += 1 + await __event_emitter__({ + "type": "citation", + "data": { + "document": [source["content"]], + "metadata": [ + { + "date_accessed": datetime.now().isoformat(), + "source": source["title"], + "site_name": source["site_name"], + "publication_date": source["published"], + "url": source["url"], + "type": "web_article" + } + ], + "source": { + "name": source["title"], + "url": source["url"] + } + } + }) + + # Process reports + for source in research_sources["reports"]: + citation_count += 1 + await __event_emitter__({ + "type": "citation", + "data": { + "document": [source["content"]], + "metadata": [ + { + "date_accessed": datetime.now().isoformat(), + "source": source["title"], + "organization": source["organization"], + "report_number": source["report_number"], + "publication_date": source["date"], + "type": "research_report" + } + ], + "source": { + "name": f"{source['title']} - {source['organization']}", + "url": f"https://reports.example.com/{source['report_number']}" + } + } + }) + + return f""" + +# Analysis Complete + +Comprehensive analysis of '{query}' has been completed using {citation_count} authoritative sources: + +- **{len(research_sources['academic'])}** Academic journal articles +- **{len(research_sources['web_sources'])}** Industry web sources +- **{len(research_sources['reports'])}** Research reports + +All sources have been properly cited and are available for review by clicking the citation links above. +""" +``` +
+ +
+Database Citation Tool + +```python +async def database_query_tool( + self, sql_query: str, __event_emitter__=None + ) -> str: + """ + Queries database and provides data citations + ✅ Works perfectly in both function calling modes + """ + + if not __event_emitter__: + return "Database query executed" + + # Simulate database results with citation metadata + query_results = [ + { + "record_id": "USR_001247", + "data": "John Smith, Software Engineer, joined 2023-01-15", + "table": "employees", + "last_updated": "2024-03-10T14:30:00Z", + "updated_by": "admin_user" + }, + { + "record_id": "USR_001248", + "data": "Jane Wilson, Product Manager, joined 2023-02-20", + "table": "employees", + "last_updated": "2024-03-08T09:15:00Z", + "updated_by": "hr_system" + } + ] + + # Create citations for each database record + for i, record in enumerate(query_results): + await __event_emitter__({ + "type": "citation", + "data": { + "document": [f"Database Record: {record['data']}"], + "metadata": [ + { + "date_accessed": datetime.now().isoformat(), + "source": f"Database Table: {record['table']}", + "record_id": record['record_id'], + "last_updated": record['last_updated'], + "updated_by": record['updated_by'], + "query": sql_query, + "type": "database_record" + } + ], + "source": { + "name": f"Record {record['record_id']} - {record['table']}", + "url": f"database://internal/tables/{record['table']}/{record['record_id']}" + } + } + }) + + return f""" + +# Database Query Results + +Executed query: `{sql_query}` + +Retrieved **{len(query_results)}** records with complete citation metadata. Each record includes: +- Record ID and source table +- Last modification timestamp +- Update attribution +- Full audit trail + +All data sources have been properly cited for transparency and verification. +""" +``` +
+ +#### Additional Compatible Event Types ✅ + +The following event types work identically in both Default and Native function calling modes: + +**Notification Events** +```python +await __event_emitter__({ + "type": "notification", + "data": {"content": "Toast notification message"} +}) +``` + +**File Events** +```python +await __event_emitter__({ + "type": "files", # or "chat:message:files" + "data": {"files": [{"name": "report.pdf", "url": "/files/report.pdf"}]} +}) +``` + +**Follow-up Events** +```python +await __event_emitter__({ + "type": "chat:message:follow_ups", + "data": {"follow_ups": ["What about X?", "Tell me more about Y"]} +}) +``` + +**Title Update Events** +```python +await __event_emitter__({ + "type": "chat:title", + "data": {"title": "New Chat Title"} +}) +``` + +**Tag Events** +```python +await __event_emitter__({ + "type": "chat:tags", + "data": {"tags": ["research", "analysis", "completed"]} +}) +``` + +**Error Events** +```python +await __event_emitter__({ + "type": "chat:message:error", + "data": {"content": "Error message to display"} +}) +``` + +**Confirmation Events** +```python +await __event_emitter__({ + "type": "confirmation", + "data": {"message": "Are you sure you want to continue?"} +}) +``` + +**Input Request Events** +```python +await __event_emitter__({ + "type": "input", + "data": {"prompt": "Please enter additional information:"} +}) +``` + +**Code Execution Events** +```python +await __event_emitter__({ + "type": "execute", + "data": {"code": "print('Hello from tool-generated code!')"} +}) +``` + +#### Comprehensive Function Calling Mode Guide + +Choosing the right function calling mode is crucial for your tool's functionality. This guide helps you make an informed decision based on your specific requirements. + +**Mode Comparison Overview:** + +| Aspect | Default Mode | Native Mode | +|--------|-------------|-------------| +| **Latency** | Higher - processes through Open WebUI pipeline | Lower - direct model handling | +| **Event Support** | ✅ Full - all event types work perfectly | ⚠️ Limited - many event types broken | +| **Complexity** | Handles complex tool interactions well | Best for simple tool calls | +| **Compatibility** | Works with all models | Requires models with native tool calling | +| **Streaming** | Perfect for real-time updates | Poor - content gets overwritten | +| **Citations** | ✅ Full support | ✅ Full support | +| **Status Updates** | ✅ Full support | ✅ Full support | +| **Message Events** | ✅ Full support | ❌ Broken - content disappears | + +**Decision Framework:** + +1. **Do you need real-time content streaming, live updates, or dynamic message modification?** + - **Yes** → Use **Default Mode** (Native mode will break these features) + - **No** → Either mode works + +2. **Is your tool primarily for simple data retrieval or computation?** + - **Yes** → **Native Mode** is fine (lower latency) + - **No** → Consider **Default Mode** for complex interactions + +3. **Do you need maximum performance and minimal latency?** + - **Yes** → **Native Mode** (if compatible with your features) + - **No** → **Default Mode** provides more features + +4. **Are you building interactive experiences, dashboards, or multi-step workflows?** + - **Yes** → **Default Mode** required + - **No** → Either mode works + +**Recommended Usage Patterns:** + +
+🏆 Best Practices for Mode Selection + +**Choose Default Mode For:** +- Tools with progressive content updates +- Interactive dashboards or live data displays +- Multi-step workflows with visual feedback +- Complex tool chains with intermediate results +- Educational tools that show step-by-step processes +- Any tool that needs `message`, `replace`, or `chat:message` events + +**Choose Native Mode For:** +- Simple API calls or database queries +- Basic calculations or data transformations +- Tools that only need status updates and citations +- Performance-critical applications where latency matters +- Simple retrieval tools without complex UI requirements + +**Universal Compatibility Pattern:** +```python +async def mode_adaptive_tool( + self, query: str, __event_emitter__=None, __metadata__=None + ) -> str: + """ + Tool that adapts its behavior based on function calling mode + ✅ Provides optimal experience in both modes + """ + + # Detect current mode + mode = "default" + if __metadata__: + mode = __metadata__.get("params", {}).get("function_calling", "default") + + is_native_mode = (mode == "native") + + if not __event_emitter__: + return "Tool executed successfully (no event support)" + + # Always safe: status updates work in both modes + await __event_emitter__({ + "type": "status", + "data": {"description": f"Running in {mode} mode...", "done": False} + }) + + # Mode-specific logic + if is_native_mode: + # Native mode: use compatible events only + await __event_emitter__({ + "type": "status", + "data": {"description": "Processing with native efficiency...", "done": False} + }) + + # Simulate processing + await asyncio.sleep(1) + + # Return results directly - no message streaming + result = f"Query '{query}' processed successfully in Native mode." + + else: + # Default mode: full event capabilities + await __event_emitter__({ + "type": "message", + "data": {"content": f"🔍 **Processing Query**: {query}\n\n"} + }) + + await __event_emitter__({ + "type": "status", + "data": {"description": "Analyzing with full streaming...", "done": False} + }) + + await asyncio.sleep(1) + + await __event_emitter__({ + "type": "message", + "data": {"content": "📊 **Results**: Analysis complete with detailed findings.\n\n"} + }) + + result = "Query processed with full Default mode capabilities." + + # Final status (works in both modes) + await __event_emitter__({ + "type": "status", + "data": {"description": "Processing complete!", "done": True} + }) + + return result +``` +
+ +
+🔧 Debugging Event Emitter Issues + +**Common Issues and Solutions:** + +**Issue: Content appears then disappears** +- **Cause**: Using message events in Native mode +- **Solution**: Switch to Default mode or use status events instead + +**Issue: Tool seems unresponsive** +- **Cause**: Function calling not enabled for model +- **Solution**: Enable tools in Model settings or via `+` button + +**Issue: Events not firing at all** +- **Cause**: `__event_emitter__` parameter missing or None +- **Solution**: Ensure parameter is included in tool method signature + +**Issue: Citations being overwritten** +- **Cause**: `self.citation = True` (or not set to False) +- **Solution**: Set `self.citation = False` in `__init__` method + +**Diagnostic Tool:** +```python +async def event_diagnostics_tool( + self, __event_emitter__=None, __metadata__=None, __user__=None + ) -> str: + """ + Comprehensive diagnostic tool for event emitter debugging + """ + + report = ["# 🔍 Event Emitter Diagnostic Report\n"] + + # Check event emitter availability + if __event_emitter__: + report.append("✅ Event emitter is available\n") + else: + report.append("❌ Event emitter is NOT available\n") + return "".join(report) + + # Check metadata availability + if __metadata__: + mode = __metadata__.get("params", {}).get("function_calling", "default") + report.append(f"✅ Function calling mode: **{mode}**\n") + else: + report.append("⚠️ Metadata not available (mode unknown)\n") + mode = "unknown" + + # Check user context + if __user__: + report.append("✅ User context available\n") + else: + report.append("⚠️ User context not available\n") + + # Test compatible events (work in both modes) + report.append("\n## Testing Compatible Events:\n") + + try: + await __event_emitter__({ + "type": "status", + "data": {"description": "Testing status events...", "done": False} + }) + report.append("✅ Status events: WORKING\n") + except Exception as e: + report.append(f"❌ Status events: FAILED - {str(e)}\n") + + try: + await __event_emitter__({ + "type": "notification", + "data": {"content": "Test notification"} + }) + report.append("✅ Notification events: WORKING\n") + except Exception as e: + report.append(f"❌ Notification events: FAILED - {str(e)}\n") + + # Test problematic events (broken in Native mode) + report.append("\n## Testing Mode-Dependent Events:\n") + + try: + await __event_emitter__({ + "type": "message", + "data": {"content": "**Test message event** - This should appear in Default mode only\n"} + }) + report.append("✅ Message events: SENT (may disappear in Native mode)\n") + except Exception as e: + report.append(f"❌ Message events: FAILED - {str(e)}\n") + + # Final status + await __event_emitter__({ + "type": "status", + "data": {"description": "Diagnostic complete", "done": True} + }) + + # Mode-specific recommendations + report.append("\n## Recommendations:\n") + + if mode == "native": + report.append(""" +⚠️ **Native Mode Detected**: Limited event support +- ✅ Use: status, citation, notification, files events +- ❌ Avoid: message, replace, chat:message events +- 💡 Switch to Default mode for full functionality +""") + elif mode == "default": + report.append(""" +✅ **Default Mode Detected**: Full event support available +- All event types should work perfectly +- Optimal for interactive and streaming tools +""") + else: + report.append(""" +❓ **Unknown Mode**: Check your model configuration +- Ensure function calling is enabled +- Verify model supports tool calling +""") + + return "".join(report) +``` +
+ +
+📚 Event Emitter Quick Reference + +**Always Compatible (Both Modes):** +```python + +# Status updates - perfect for progress tracking +await __event_emitter__({ + "type": "status", + "data": {"description": "Processing...", "done": False} +}) + +# Citations - essential for source attribution +await __event_emitter__({ + "type": "citation", + "data": { + "document": ["Content"], + "source": {"name": "Source", "url": "https://example.com"} + } +}) + +# Notifications - user alerts +await __event_emitter__({ + "type": "notification", + "data": {"content": "Task completed!"} +}) +``` + +**Default Mode Only (Broken in Native):** +```python + +# ⚠️ These will flicker/disappear in Native mode + +# Progressive content streaming +await __event_emitter__({ + "type": "message", + "data": {"content": "Streaming content..."} +}) + +# Content replacement +await __event_emitter__({ + "type": "replace", + "data": {"content": "New complete content"} +}) + +# Delta updates +await __event_emitter__({ + "type": "chat:message:delta", + "data": {"content": "Additional content"} +}) +``` + +**Mode Detection Pattern:** +```python +def get_function_calling_mode(__metadata__): + """Utility to detect current function calling mode""" + if not __metadata__: + return "unknown" + return __metadata__.get("params", {}).get("function_calling", "default") + +# Usage in tools: +mode = get_function_calling_mode(__metadata__) +is_native = (mode == "native") +can_stream_messages = not is_native +``` + +**Essential Imports:** +```python +import asyncio +from datetime import datetime +from typing import Optional, Callable, Awaitable +``` +
+ +### Rich UI Element Embedding + +Both External and Built-In Tools now support rich UI element embedding, allowing tools to return HTML content and interactive iframes that display directly within chat conversations. This feature enables tools to provide sophisticated visual interfaces, interactive widgets, charts, dashboards, and other rich web content. + +When a tool returns an `HTMLResponse` with the appropriate headers, the content will be embedded as an interactive iframe in the chat interface rather than displayed as plain text. + +#### Basic Usage + +To embed HTML content, your tool should return an `HTMLResponse` with the `Content-Disposition: inline` header: + +```python +from fastapi.responses import HTMLResponse + +def create_visualization_tool(self, data: str) -> HTMLResponse: + """ + Creates an interactive data visualization that embeds in the chat. + + :param data: The data to visualize + """ + html_content = """ + + + + Data Visualization + + + +
+ + + + """ + + headers = {"Content-Disposition": "inline"} + return HTMLResponse(content=html_content, headers=headers) +``` + +#### Advanced Features + +The embedded iframes support auto-resizing and include configurable security settings. The system automatically handles: + +- **Auto-resizing**: Embedded content automatically adjusts height based on its content +- **Cross-origin communication**: Safe message passing between the iframe and parent window +- **Security sandbox**: Configurable security restrictions for embedded content + +#### Security Considerations + +When embedding external content, several security options can be configured through the UI settings: + +- `iframeSandboxAllowForms`: Allow form submissions within embedded content +- `iframeSandboxAllowSameOrigin`: Allow same-origin requests (use with caution) +- `iframeSandboxAllowPopups`: Allow popup windows from embedded content + +#### Use Cases + +Rich UI embedding is perfect for: + +- **Interactive dashboards**: Real-time data visualization and controls +- **Form interfaces**: Complex input forms with validation and dynamic behavior +- **Charts and graphs**: Interactive plotting with libraries like Plotly, D3.js, or Chart.js +- **Media players**: Video, audio, or interactive media content +- **Custom widgets**: Specialized UI components for specific tool functionality +- **External integrations**: Embedding content from external services or APIs + +#### External Tool Example + +For external tools served via HTTP endpoints: + +```python +@app.post("/tools/dashboard") +async def create_dashboard(): + html = """ +
+

System Dashboard

+ + + +
+ """ + + return HTMLResponse( + content=html, + headers={"Content-Disposition": "inline"} + ) +``` + +The embedded content automatically inherits responsive design and integrates seamlessly with the chat interface, providing a native-feeling experience for users interacting with your tools. + +#### CORS and Direct Tools + +Direct external tools are tools that run directly from the browser. In this case, the tool is called by JavaScript in the user's browser. +Because we depend on the Content-Disposition header, when using CORS on a remote tool server, the Open WebUI cannot read that header due to Access-Control-Expose-Headers, which prevents certain headers from being read from the fetch result. +To prevent this, you must set Access-Control-Expose-Headers to Content-Disposition. Check the example below of a tool using Node.js: + + +```javascript +const app = express(); +const cors = require('cors'); + +app.use(cors()) + +app.get('/tools/dashboard', (req,res) => { + let html = ` +
+

System Dashboard

+ + + +
+ ` + res.set({ + 'Content-Disposition': 'inline' + ,'Access-Control-Expose-Headers':'Content-Disposition' + }) + res.send(html) +}) +``` + +More info about the header: https://developer.mozilla.org/en-US/docs/Web/HTTP/Reference/Headers/Access-Control-Expose-Headers + + +## External packages + +In the Tools definition metadata you can specify custom packages. When you click `Save` the line will be parsed and `pip install` will be run on all requirements at once. + +Keep in mind that as pip is used in the same process as Open WebUI, the UI will be completely unresponsive during the installation. + +No measures are taken to handle package conflicts with Open WebUI's requirements. That means that specifying requirements can break Open WebUI if you're not careful. You might be able to work around this by specifying `open-webui` itself as a requirement. + +
+Example + +```python +""" +title: myToolName +author: myName +funding_url: [any link here will be shown behind a `Heart` button for users to show their support to you] +version: 1.0.0 + +# the version is displayed in the UI to help users keep track of updates. +license: GPLv3 +description: [recommended] +requirements: package1>=2.7.0,package2,package3 +""" +``` + +
diff --git a/docs/features/plugin/tools/index.mdx b/docs/features/plugin/tools/index.mdx new file mode 100644 index 0000000..a7db57e --- /dev/null +++ b/docs/features/plugin/tools/index.mdx @@ -0,0 +1,144 @@ +--- +sidebar_position: 2 +title: "Tools" +--- + +# ⚙️ What are Tools? + +Tools are small Python scripts that add superpowers to your LLM. When enabled, they allow your chatbot to do amazing things — like search the web, scrape data, generate images, talk back using AI voices, and more. + +Think of Tools as useful plugins that your AI can use when chatting with you. + +--- + +## 🚀 What Can Tools Help Me Do? + +Here are just a few examples of what Tools let your AI assistant do: + +- 🌍 Web Search: Get real-time answers by searching the internet. +- 🖼️ Image Generation: Create images from your prompts. +- 🔊 Voice Output: Generate AI voices using ElevenLabs. + +Explore ready-to-use tools in the 🧰 [Tools Showcase](https://openwebui.com/tools) + +--- + +## 📦 How to Install Tools + +There are two easy ways to install Tools in Open WebUI: + +1. Go to [Community Tool Library](https://openwebui.com/tools) +2. Choose a Tool, then click the Get button. +3. Enter your Open WebUI instance’s IP address or URL. +4. Click “Import to WebUI” — done! + +:::warning + +Safety Tip: Never import a Tool you don’t recognize or trust. These are Python scripts and might run unsafe code. + +::: + +--- + +## 🔧 How to Use Tools in Open WebUI + +Once you've installed Tools (we’ll show you how below), here’s how to enable and use them: + +You have two ways to enable a Tool for your model: + +### ➕ Option 1: Enable from the Chat Window + +While chatting, click the ➕ icon in the input area. You’ll see a list of available Tools — you can enable any of them on the fly for that session. + +:::tip + +Tip: Enabling a Tool gives the model permission to use it — but it may not use it unless it's useful for the task. + +::: + +### ✏️ Option 2: Enable by Default (Recommended for Frequent Use) +1. Go to: Workspace ➡️ Models +2. Choose the model you’re using (like GPT-4 or LLaMa2) and click the ✏️ edit icon. +3. Scroll down to the “Tools” section. +4. ✅ Check the Tools you want your model to have access to by default. +5. Click Save. + +This ensures the model always has these Tools ready to use whenever you chat with it. + +You can also let your LLM auto-select the right Tools using the AutoTool Filter: + +🔗 [AutoTool Filter](https://openwebui.com/f/hub/autotool_filter/) + +🎯 Note: Even when using AutoTool, you still need to enable your Tools using Option 2. + +✅ And that’s it — your LLM is now Tool-powered! You're ready to supercharge your chats with web search, image generation, voice output, and more. + +--- + +## 🧠 Choosing How Tools Are Used: Default vs Native + +Once Tools are enabled for your model, Open WebUI gives you two different ways to let your LLM use them in conversations. + +You can decide how the model should call Tools by choosing between: + +- 🟡 Default Mode (Prompt-based) +- 🟢 Native Mode (Built-in function calling) + +Let’s break it down: + +### 🟡 Default Mode (Prompt-based Tool Triggering) + +This is the default setting in Open WebUI. + +Here, your LLM doesn’t need to natively support function calling. Instead, we guide the model using smart tool selection prompt template to select and use a Tool. + +✅ Works with almost any model +✅ Great way to unlock Tools with basic or local models +❗ Not as reliable or flexible as Native Mode when chaining tools + +### 🟢 Native Mode (Function Calling Built-In) + +If your model does support “native” function calling (like GPT-4o or GPT-3.5-turbo-1106), you can use this powerful mode to let the LLM decide — in real time — when and how to call multiple Tools during a single chat message. + +✅ Fast, accurate, and can chain multiple Tools in one response +✅ The most natural and advanced experience +❗ Requires a model that actually supports native function calling + +### ✳️ How to Switch Between Modes + +Want to enable native function calling in your chats? Here's how: + +![Chat Controls](/images/features/plugin/tools/chat-controls.png) + +1. Open the chat window with your model. +2. Click ⚙️ Chat Controls > Advanced Params. +3. Look for the Function Calling setting and switch it from Default → Native + +That’s it! Your chat is now using true native Tool support (as long as the model supports it). + +➡️ We recommend using GPT-4o or another OpenAI model for the best native function-calling experience. +🔎 Some local models may claim support, but often struggle with accurate or complex Tool usage. + +💡 Summary: + +| Mode | Who it’s for | Pros | Cons | +|----------|----------------------------------|-----------------------------------------|--------------------------------------| +| Default | Any model | Broad compatibility, safer, flexible | May be less accurate or slower | +| Native | GPT-4o, etc. | Fast, smart, excellent tool chaining | Needs proper function call support | + +Choose the one that works best for your setup — and remember, you can always switch on the fly via Chat Controls. + +👏 And that's it — your LLM now knows how and when to use Tools, intelligently. + +--- + +## 🧠 Summary + +Tools are add-ons that help your AI model do much more than just chat. From answering real-time questions to generating images or speaking out loud — Tools bring your AI to life. + +- Visit: [https://openwebui.com/tools](https://openwebui.com/tools) to discover new Tools. +- Install them manually or with one-click. +- Enable them per model from Workspace ➡️ Models. +- Use them in chat by clicking ➕ + +Now go make your AI waaaaay smarter 🤖✨ diff --git a/docs/features/plugin/tools/openapi-servers/faq.mdx b/docs/features/plugin/tools/openapi-servers/faq.mdx new file mode 100644 index 0000000..85e77a9 --- /dev/null +++ b/docs/features/plugin/tools/openapi-servers/faq.mdx @@ -0,0 +1,176 @@ +--- +sidebar_position: 10 +title: "FAQ" +--- + +#### 🌐 Q: Why isn't my local OpenAPI tool server accessible from the WebUI interface? + +**A:** If your tool server is running locally (e.g., http://localhost:8000), browser-based clients may be restricted from accessing it due to CORS (Cross-Origin Resource Sharing) policies. + +Make sure to explicitly enable CORS headers in your OpenAPI server. For example, if you're using FastAPI, you can add: + +```python +from fastapi.middleware.cors import CORSMiddleware + +app.add_middleware( + CORSMiddleware, + allow_origins=["*"], # or specify your client origin + allow_credentials=True, + allow_methods=["*"], + allow_headers=["*"], +) +``` + +Also, if Open WebUI is served over HTTPS (e.g., https://yourdomain.com), your local server must meet one of the following conditions: + +- Be accessed from the same domain using HTTPS (e.g., https://localhost:8000). +- OR run on localhost (127.0.0.1) to allow browsers to relax security for local development. +- Otherwise, browsers may block insecure requests from HTTPS pages to HTTP APIs due to mixed-content rules. + +To work securely in production over HTTPS, your OpenAPI servers must also be served over HTTPS. + +--- + +#### 🚀 Q: Do I need to use FastAPI for my server implementation? + +**A:** No! While our reference implementations are written using FastAPI for clarity and ease of use, you can use any framework or language that produces a valid OpenAPI (Swagger) specification. Some common choices include: + +- FastAPI (Python) +- Flask + Flask-RESTX (Python) +- Express + Swagger UI (JavaScript/Node) +- Spring Boot (Java) +- Go with Swag or Echo + +The key is to ensure your server exposes a valid OpenAPI schema, and that it communicates over HTTP(S). +It is important to set a custom operationId for all endpoints. + +--- + +#### 🚀 Q: Why choose OpenAPI over MCP? + +**A:** OpenAPI wins over MCP in most real-world scenarios due to its simplicity, tooling ecosystem, stability, and developer-friendliness. Here's why: + +- ✅ **Reuse Your Existing Code**: If you’ve built REST APIs before, you're mostly done—you don’t need to rewrite your logic. Just define a compliant OpenAPI spec and expose your current code as a tool server. + + With MCP, you had to reimplement your tool logic inside a custom protocol layer, duplicating work and increasing the surface area to maintain. + +- 💼 **Less to Maintain & Debug**: OpenAPI fits naturally into modern dev workflows. You can test endpoints with Postman, inspect logs with built-in APIs, troubleshoot easily with mature ecosystem tools—and often without modifying your core app at all. + + MCP introduced new layers of transport, schema parsing, and runtime quirks, all of which had to be debugged manually. + +- 🌍 **Standards-Based**: OpenAPI is widely adopted across the tech industry. Its well-defined structure means tools, agents, and servers can interoperate immediately, without needing special bridges or translations. + +- 🧰 **Better Tooling**: There’s an entire universe of tools that support OpenAPI—automatic client/server generation, documentation, validation, mocking, testing, and even security audit tools. + +- 🔐 **First-Class Security Support**: OpenAPI includes native support for things like OAuth2, JWTs, API Keys, and HTTPS—making it easier to build secure endpoints with common libraries and standards. + +- 🧠 **More Devs Already Know It**: Using OpenAPI means you're speaking a language already familiar to backend teams, frontend developers, DevOps, and product engineers. There’s no learning curve or costly onboarding required. + +- 🔄 **Future-Proof & Extensible**: OpenAPI evolves with API standards and remains forward-compatible. MCP, by contrast, was bespoke and experimental—often requiring changes as the surrounding ecosystem changed. + +🧵 Bottom line: OpenAPI lets you do more with less effort, less code duplication, and fewer surprises. It’s a production-ready, developer-friendly route to powering LLM tools—without rebuilding everything from scratch. + +--- + +#### 🔐 Q: How do I secure my OpenAPI tool server? + +**A:** OpenAPI supports industry-standard security mechanisms like: + +- OAuth 2.0 +- API Key headers +- JWT (JSON Web Token) +- Basic Auth + +Use HTTPS in production to encrypt data in transit, and restrict endpoints with proper auth/authz methods as appropriate. You can incorporate these directly in your OpenAPI schema using the securitySchemes field. + +--- + +#### ❓ Q: What kind of tools can I build using OpenAPI tool servers? + +**A:** If it can be exposed via a REST API, you can build it. Common tool types include: + +- Filesystem operations (read/write files, list directories) +- Git and document repository access +- Database querying or schema exploration +- Web scrapers or summarizers +- External SaaS integrations (e.g., Salesforce, Jira, Slack) +- LLM-attached memory stores / RAG components +- Secure internal microservices exposed to your agent + +--- + +#### 🔌 Q: Can I run more than one tool server at the same time? + +**A:** Absolutely. Each tool server runs independently and exposes its own OpenAPI schema. Your agent configuration can point to multiple tool servers, allowing you to mix and match based on need. + +There's no limit—just ensure each server runs on its own port or address and is reachable by the agent host. + +--- + +#### 🧪 Q: How do I test a tool server before linking it to an LLM agent? + +**A:** You can test your OpenAPI tool servers using: + +- Swagger UI or ReDoc (built into FastAPI by default) +- Postman or Insomnia +- curl or httpie from the command line +- Python’s requests module +- OpenAPI validators and mockers + +Once validated, you can register the tool server with an LLM agent or through Open WebUI. + +--- + +#### 🛠️ Q: Can I extend or customize the reference servers? + +**A:** Yes! All servers in the servers/ directory are built to be simple templates. Fork and modify them to: + +- Add new endpoints and business logic +- Integrate authentication +- Change response formats +- Connect to new services or internal APIs +- Deploy via Docker, Kubernetes, or any cloud host + +--- + +#### 🌍 Q: Can I run OpenAPI tool servers on cloud platforms like AWS or GCP? + +**A:** Yes. These servers are plain HTTP services. You can deploy them as: + +- AWS Lambda with API Gateway (serverless) +- EC2 or GCP Compute Engine instances +- Kubernetes services in GKE/EKS/AKS +- Cloud Run or App Engine +- Render, Railway, Heroku, etc. + +Just make sure they’re securely configured and publicly reachable (or VPN'd) if needed by the agent or user. + +--- + +#### 🧪 Q: What if I have an existing MCP server? + +**A:** Great news! You can use our MCP-to-OpenAPI Bridge: [mcpo](https://github.com/open-webui/mcpo), exposing your existing MCP-based tools as OpenAPI-compatible APIs is now easier than ever. No rewrites, no headaches — just plug and go! 🚀 + +If you've already built tools using the MCP protocol, `mcpo` helps you instantly unlock compatibility with Open WebUI and any OpenAPI-based agent — ensuring your hard work remains fully accessible and future-ready. + +[Check out the optional Bridge to MCP section in the docs for setup instructions.](https://github.com/open-webui/openapi-servers?tab=readme-ov-file#-bridge-to-mcp-optional) + +**Quick Start:** +```bash +uvx mcpo --port 8000 -- uvx mcp-server-time --local-timezone=America/New_York +``` + +✨ That’s it — your MCP server is now OpenAPI-ready! + +--- + +#### 🗂️ Q: Can one OpenAPI server implement multiple tools? + +**A:** Yes. A single OpenAPI server can offer multiple related capabilities grouped under different endpoints. For example, a document server may provide search, upload, OCR, and summarization—all within one schema. + +You can also modularize completely by creating one OpenAPI server per tool if you prefer isolation and flexibility. + +--- + +🙋 Have more questions? Visit the GitHub discussions for help and feedback from the community: +👉 [Community Discussions](https://github.com/open-webui/openapi-servers/discussions) diff --git a/docs/features/plugin/tools/openapi-servers/index.mdx b/docs/features/plugin/tools/openapi-servers/index.mdx new file mode 100644 index 0000000..fbe3aee --- /dev/null +++ b/docs/features/plugin/tools/openapi-servers/index.mdx @@ -0,0 +1,70 @@ +--- +sidebar_position: 400 +title: "OpenAPI Tool Servers" +--- + +import { TopBanners } from "@site/src/components/TopBanners"; + + + +# 🌟 OpenAPI Tool Servers + +This repository provides reference OpenAPI Tool Server implementations making it easy and secure for developers to integrate external tooling and data sources into LLM agents and workflows. Designed for maximum ease of use and minimal learning curve, these implementations utilize the widely adopted and battle-tested [OpenAPI specification](https://www.openapis.org/) as the standard protocol. + +By leveraging OpenAPI, we eliminate the need for a proprietary or unfamiliar communication protocol, ensuring you can quickly and confidently build or integrate servers. This means less time spent figuring out custom interfaces and more time building powerful tools that enhance your AI applications. + +## ☝️ Why OpenAPI? + +- **Established Standard**: OpenAPI is a widely used, production-proven API standard backed by thousands of tools, companies, and communities. + +- **No Reinventing the Wheel**: No additional documentation or proprietary spec confusion. If you build REST APIs or use OpenAPI today, you're already set. + +- **Easy Integration & Hosting**: Deploy your tool servers externally or locally without vendor lock-in or complex configurations. + +- **Strong Security Focus**: Built around HTTP/REST APIs, OpenAPI inherently supports widely used, secure communication methods including HTTPS and well-proven authentication standards (OAuth, JWT, API Keys). + +- **Future-Friendly & Stable**: Unlike less mature or experimental protocols, OpenAPI promises reliability, stability, and long-term community support. + +## 🚀 Quickstart + +Get started quickly with our reference FastAPI-based implementations provided in the `servers/` directory. (You can adapt these examples into your preferred stack as needed, such as using [FastAPI](https://fastapi.tiangolo.com/), [FastOpenAPI](https://github.com/mr-fatalyst/fastopenapi) or any other OpenAPI-compatible library): + +```bash +git clone https://github.com/open-webui/openapi-servers +cd openapi-servers +``` + +### With Bash + +```bash + +# Example: Installing dependencies for a specific server 'filesystem' +cd servers/filesystem +pip install -r requirements.txt +uvicorn main:app --host 0.0.0.0 --reload +``` + +The filesystem server should be reachable from: [http://localhost:8000](http://localhost:8000) + +The documentation path will be: [http://localhost:8000](http://localhost:8000) + +### With Docker + +If you have docker compose installed, bring the servers up with: + +```bash +docker compose up +``` + +The services will be reachable from: + + * [Filesystem localhost:8081](http://localhost:8081) + * [memory server localhost:8082](http://localhost:8082) + * [time-server localhost:8083](http://localhost:8083) + +Now, simply point your OpenAPI-compatible clients or AI agents to your local or publicly deployed URL—no configuration headaches, no complicated transports. + +## 🌱 Open WebUI Community + +- For general discussions, technical exchange, and announcements, visit our [Community Discussions](https://github.com/open-webui/openapi-servers/discussions) page. +- Have ideas or feedback? Please open an issue! diff --git a/docs/features/plugin/tools/openapi-servers/mcp.mdx b/docs/features/plugin/tools/openapi-servers/mcp.mdx new file mode 100644 index 0000000..20dbea4 --- /dev/null +++ b/docs/features/plugin/tools/openapi-servers/mcp.mdx @@ -0,0 +1,199 @@ +--- +sidebar_position: 3 +title: "MCP Support" +--- + +This documentation explains how to easily set up and deploy the [**MCP (Model Context Protocol)-to-OpenAPI proxy server** (mcpo)](https://github.com/open-webui/mcpo) provided by Open WebUI. Learn how you can effortlessly expose MCP-based tool servers using standard, familiar OpenAPI endpoints suitable for end-users and developers. + +### 📌 What is the MCP Proxy Server? + +The MCP-to-OpenAPI proxy server lets you use tool servers implemented with MCP (Model Context Protocol) directly via standard REST/OpenAPI APIs—no need to manage unfamiliar or complicated custom protocols. If you're an end-user or application developer, this means you can interact easily with powerful MCP-based tooling directly through familiar REST-like endpoints. + +### 💡 Why Use mcpo? + +While MCP tool servers are powerful and flexible, they commonly communicate via standard input/output (stdio)—often running on your local machine where they can easily access your filesystem, environment, and other native system capabilities. + +That’s a strength—but also a limitation. + +If you want to deploy your main interface (like Open WebUI) on the cloud, you quickly run into a problem: your cloud instance can’t speak directly to an MCP server running locally on your machine via stdio. + +[That’s where mcpo comes in with a game-changing solution.](https://github.com/open-webui/mcpo) + +MCP servers typically rely on raw stdio communication, which is: + +- 🔓 Inherently insecure across environments +- ❌ Incompatible with most modern tools, UIs, or platforms +- 🧩 Lacking critical features like authentication, documentation, and error handling + +The mcpo proxy eliminates those issues—automatically: + +- ✅ Instantly compatible with existing OpenAPI tools, SDKs, and clients +- 🛡 Wraps your tools with secure, scalable, and standards-based HTTP endpoints +- 🧠 Auto-generates interactive OpenAPI documentation for every tool, entirely config-free +- 🔌 Uses plain HTTP—no socket setup, daemon juggling, or platform-specific glue code + +So even though adding mcpo might at first seem like "just one more layer"—in reality, it simplifies everything while giving you: + +- Better integration ✅ +- Better security ✅ +- Better scalability ✅ +- Happier developers & users ✅ + +✨ With mcpo, your local-only AI tools become cloud-ready, UI-friendly, and instantly interoperable—without changing a single line of tool server code. + +### ✅ Quickstart: Running the Proxy Locally + +Here's how simple it is to launch the MCP-to-OpenAPI proxy server using the lightweight, easy-to-use tool **mcpo** ([GitHub Repository](https://github.com/open-webui/mcpo)): + +1. **Prerequisites** + - **Python 3.8+** with `pip` installed. + - MCP-compatible application (for example: `mcp-server-time`) + - (Optional but recommended) `uv` installed for faster startup and zero-config convenience. + +2. **Install mcpo** + +Using **uv** (recommended): + +```bash +uvx mcpo --port 8000 -- your_mcp_server_command +``` + +Or using `pip`: + +```bash +pip install mcpo +mcpo --port 8000 -- your_mcp_server_command +``` + +3. 🚀 **Run the Proxy Server** + +To start your MCP-to-OpenAPI proxy server, you need an MCP-compatible tool server. If you don't have one yet, the MCP community provides various ready-to-use MCP server implementations. + +✨ **Where to find MCP Servers?** + +You can discover officially supported MCP servers at the following repository example: + +- [modelcontextprotocol/servers on GitHub](https://github.com/modelcontextprotocol/servers) + +For instance, the popular **Time MCP Server** is documented [here](https://github.com/modelcontextprotocol/servers/blob/main/src/time/README.md), and is typically referenced clearly in the README, inside the provided MCP configuration. Specifically, the README states: + +> Add to your Claude settings: +> +> ```json +> "mcpServers": { +> "time": { +> "command": "uvx", +> "args": ["mcp-server-time", "--local-timezone=America/New_York"] +> } +> } +> ``` + +🔑 **Translating this MCP setup to a quick local proxy command**: + +You can easily run the recommended MCP server (`mcp-server-time`) directly through the **MCP-to-OpenAPI proxy** (`mcpo`) like this: + +```bash +uvx mcpo --port 8000 -- uvx mcp-server-time --local-timezone=America/New_York +``` + +That's it! You're now running the MCP-to-OpenAPI Proxy locally and exposing the powerful **MCP Time Server** through standard OpenAPI endpoints accessible at: + +- 📖 **Interactive OpenAPI Documentation:** [`http://localhost:8000/docs`](http://localhost:8000/docs) + +Feel free to replace `uvx mcp-server-time --local-timezone=America/New_York` with your preferred MCP Server command from other available MCP implementations found in the official repository. + +🤝 **To integrate with Open WebUI after launching the server, check our [docs](https://docs.openwebui.com/openapi-servers/open-webui/).** + +### 🚀 Accessing the Generated APIs + +As soon as it starts, the MCP Proxy (`mcpo`) automatically: + +- Discovers MCP tools dynamically and generates REST endpoints. +- Creates interactive, human-readable OpenAPI documentation accessible at: + - `http://localhost:8000/docs` + +Simply call the auto-generated API endpoints directly via HTTP clients, AI agents, or other OpenAPI tools of your preference. + +### 📖 Example Workflow for End-Users + +Assuming you started the above server command (`uvx mcp-server-time`): + +- Visit your local API documentation at `http://localhost:8000/docs`. +- Select a generated endpoint (e.g., `/get_current_time`) and use the provided interactive form. +- Click "**Execute**" and instantly receive your response. + +No setup complexity—just instant REST APIs. + +## 🚀 Deploying in Production (Example) + +Deploying your MCP-to-OpenAPI proxy (powered by mcpo) is straightforward. Here's how to easily Dockerize and deploy it to cloud or VPS solutions: + +### 🐳 Dockerize your Proxy Server using mcpo + +1. **Dockerfile Example** + +Create the following `Dockerfile` inside your deployment directory: + +```dockerfile +FROM python:3.11-slim +WORKDIR /app +RUN pip install mcpo uv + +# Replace with your MCP server command; example: uvx mcp-server-time +CMD ["uvx", "mcpo", "--host", "0.0.0.0", "--port", "8000", "--", "uvx", "mcp-server-time", "--local-timezone=America/New_York"] +``` + +2. **Build & Run the Container Locally** + +```bash +docker build -t mcp-proxy-server . +docker run -d -p 8000:8000 mcp-proxy-server +``` + +3. **Deploying Your Container** + +Push to DockerHub or another registry: + +```bash +docker tag mcp-proxy-server yourdockerusername/mcp-proxy-server:latest +docker push yourdockerusername/mcp-proxy-server:latest +``` + +Deploy using Docker Compose, Kubernetes YAML manifests, or your favorite cloud container services (AWS ECS, Azure Container Instances, Render.com, or Heroku). + +✔️ Your production MCP servers are now effortlessly available via REST APIs! + +## 🧑‍💻 Technical Details and Background + +### 🍃 How It Works (Technical Summary) + +- **Dynamic Schema Discovery & Endpoints:** At server startup, the proxy connects to the MCP server to query available tools. It automatically builds FastAPI endpoints based on the MCP tool schemas, creating concise and clear REST endpoints. + +- **OpenAPI Auto-documentation:** Endpoints generated are seamlessly documented and available via FastAPI's built-in Swagger UI (`/docs`). No extra doc writing required. + +- **Asynchronous & Performant**: Built on robust asynchronous libraries, ensuring speed and reliability for concurrent users. + +### 📚 Under the Hood: + +- FastAPI (Automatic routing & docs generation) +- MCP Client (Standard MCP integration & schema discovery) +- Standard JSON over HTTP (Easy integration) + +## ⚡️ Why is the MCP-to-OpenAPI Proxy Superior? + +Here's why leveraging MCP servers through OpenAPI via the proxy approach is significantly better and why Open WebUI enthusiastically supports it: + +- **User-friendly & Familiar Interface**: No custom clients; just HTTP REST endpoints you already know. +- **Instant Integration**: Immediately compatible with thousands of existing REST/OpenAPI tools, SDKs, and services. +- **Powerful & Automatic Docs**: Built-in Swagger UI documentation is automatically generated, always accurate, and maintained. +- **No New Protocol overhead**: Eliminates the necessity to directly handle MCP-specific protocol complexities and socket communication issues. +- **Battle-Tested Security & Stability**: Inherits well-established HTTPS transport, standard auth methods (JWT, API keys), solid async libraries, and FastAPI’s proven robustness. +- **Future-Proof**: MCP proxy uses existing, stable, standard REST/OpenAPI formats guaranteed long-term community support and evolution. + +🌟 **Bottom line:** MCP-to-OpenAPI makes your powerful MCP-based AI tools broadly accessible through intuitive, reliable, and scalable REST endpoints. Open WebUI proudly supports and recommends this best-in-class approach. + +## 📢 Community & Support + +- For questions, suggestions, or feature requests, please use our [GitHub Issue tracker](https://github.com/open-webui/openapi-servers/issues) or join our [Community Discussions](https://github.com/open-webui/openapi-servers/discussions). + +Happy integrations! 🌟🚀 \ No newline at end of file diff --git a/docs/features/plugin/tools/openapi-servers/open-webui.mdx b/docs/features/plugin/tools/openapi-servers/open-webui.mdx new file mode 100644 index 0000000..cc9861c --- /dev/null +++ b/docs/features/plugin/tools/openapi-servers/open-webui.mdx @@ -0,0 +1,211 @@ +--- +sidebar_position: 1 +title: "Open WebUI Integration" +--- + +## Overview + +Open WebUI v0.6+ supports seamless integration with external tools via the OpenAPI servers — meaning you can easily extend your LLM workflows using custom or community-powered tool servers 🧰. + +In this guide, you'll learn how to launch an OpenAPI-compatible tool server and connect it to Open WebUI through the intuitive user interface. Let’s get started! 🚀 + +--- + +## Step 1: Launch an OpenAPI Tool Server + +To begin, you'll need to start one of the reference tool servers available in the [openapi-servers repo](https://github.com/open-webui/openapi-servers). For quick testing, we’ll use the time tool server as an example. + +🛠️ Example: Starting the `time` server locally + +```bash +git clone https://github.com/open-webui/openapi-servers +cd openapi-servers + +# Navigate to the time server +cd servers/time + +# Install required dependencies +pip install -r requirements.txt + +# Start the server +uvicorn main:app --host 0.0.0.0 --reload +``` + +Once running, this will host a local OpenAPI server at http://localhost:8000, which you can point Open WebUI to. + +![Time Server](/images/openapi-servers/open-webui/time-server.png) + +--- + +## Step 2: Connect Tool Server in Open WebUI + +Next, connect your running tool server to Open WebUI: + +1. Open WebUI in your browser. +2. Open ⚙️ **Settings**. +3. Click on ➕ **Tools** to add a new tool server. +4. Enter the URL where your OpenAPI tool server is running (e.g., http://localhost:8000). +5. Click "Save". + +![Settings Page](/images/openapi-servers/open-webui/settings.png) + +### 🧑‍💻 User Tool Servers vs. 🛠️ Global Tool Servers + +There are two ways to register tool servers in Open WebUI: + +#### 1. User Tool Servers (added via regular Settings) + +- Only accessible to the user who registered the tool server. +- The connection is made directly from the browser (client-side) by the user. +- Perfect for personal workflows or when testing custom/local tools. + +#### 2. Global Tool Servers (added via Admin Settings) + +Admins can manage shared tool servers available to all or selected users across the entire deployment: + +- Go to 🛠️ **Admin Settings > Tools**. +- Add the tool server URL just as you would in user settings. +- These tools are treated similarly to Open WebUI’s built-in tools. + +#### Main Difference: Where Are Requests Made From? + +The primary distinction between **User Tool Servers** and **Global Tool Servers** is where the API connection and requests are actually made: + +- **User Tool Servers** + - Requests to the tool server are performed **directly from your browser** (the client). + - This means you can safely connect to localhost URLs (like `http://localhost:8000`)—even exposing private or development-only endpoints such as your local filesystem or dev tools—without risking exposure to the wider internet or other users. + - Your connection is isolated; only your browser can access that tool server. + +- **Global Tool Servers** + - Requests are sent **from the Open WebUI backend/server** (not your browser). + - The backend must be able to reach the tool server URL you specify—so `localhost` means the backend server's localhost, *not* your computer's. + - Use this for sharing tools with other users across the deployment, but be mindful: since the backend makes the requests, you cannot access your personal local resources (like your own filesystem) through this method. + - Think security! Only expose remote/global endpoints that are safe and meant to be accessed by multiple users. + +**Summary Table:** + +| Tool Server Type | Request Origin | Use Localhost? | Use Case Example | +| ------------------ | -------------------- | ------------------ | ---------------------------------------- | +| User Tool Server | User's Browser (Client-side) | Yes (private to you) | Personal tools, local dev/testing | +| Global Tool Server | Open WebUI Backend (Server-side) | No (unless running on the backend itself) | Team/shared tools, enterprise integrations | + +:::tip + +User Tool Servers are best for personal or experimental tools, especially those running on your own machine, while Global Tool Servers are ideal for production or shared environments where everyone needs access to the same tools. + +::: + +### 👉 Optional: Using a Config File with mcpo + +If you're running multiple tools through mcpo using a config file, take note: + +🧩 Each tool is mounted under its own unique path! + +For example, if you’re using memory and time tools simultaneously through mcpo, they’ll each be available at a distinct route: + +- http://localhost:8000/time +- http://localhost:8000/memory + +This means: + +- When connecting a tool in Open WebUI, you must enter the full route to that specific tool — do NOT enter just the root URL (http://localhost:8000). +- Add each tool individually in Open WebUI Settings using their respective subpath URLs. + +![MCPO Config Tools Setting](/images/openapi-servers/open-webui/mcpo-config-tools.png) + +✅ Good: + +http://localhost:8000/time +http://localhost:8000/memory + +🚫 Not valid: + +http://localhost:8000 + +This ensures Open WebUI recognizes and communicates with each tool server correctly. + +--- + +## Step 3: Confirm Your Tool Server Is Connected ✅ + +Once your tool server is successfully connected, Open WebUI will display a 👇 tool server indicator directly in the message input area: + +📍 You'll now see this icon below the input box: + +![Tool Server Indicator](/images/openapi-servers/open-webui/message-input.png) + +Clicking this icon opens a popup where you can: + +- View connected tool server information +- See which tools are available and which server they're provided by +- Debug or disconnect any tool if needed + +🔍 Here’s what the tool information modal looks like: + +![Tool Info Modal Expanded](/images/openapi-servers/open-webui/info-modal.png) + +### 🛠️ Global Tool Servers Look Different — And Are Hidden by Default! + +If you've connected a Global Tool Server (i.e., one that’s admin-configured), it will not appear automatically in the input area like user tool servers do. + +Instead: + +- Global tools are hidden by default and must be explicitly activated per user. +- To enable them, you'll need to click on the ➕ button in the message input area (bottom left of the chat box), and manually toggle on the specific global tool(s) you want to use. + +Here’s what that looks like: + +![Global Tool Server Message Input](/images/openapi-servers/open-webui/global-message-input.png) + +⚠️ Important Notes for Global Tool Servers: + +- They will not show up in the tool indicator popup until enabled from the ➕ menu. +- Each global tool must be individually toggled on to become active inside your current chat. +- Once toggled on, they function the same way as user tools. +- Admins can control access to global tools via role-based permissions. + +This is ideal for team setups or shared environments, where commonly-used tools (e.g., document search, memory, or web lookup) should be centrally accessible by multiple users. + +--- + +## (Optional) Step 4: Use "Native" Function Calling (ReACT-style) Tool Use 🧠 + +:::info + +For this to work effectively, **your selected model must support native tool calling**. Some local models claim support but often produce poor results. We strongly recommend using GPT-4o or another OpenAI model that supports function calling natively for the best experience. + +::: + +Want to enable ReACT-style (Reasoning + Acting) native function calls directly inside your conversations? You can switch Open WebUI to use native function calling. + +✳️ How to enable native function calling: + +1. Open the chat window. +2. Go to ⚙️ **Chat Controls > Advanced Params**. +3. Change the **Function Calling** parameter from `Default` to `Native`. + +![Native Tool Call](/images/openapi-servers/open-webui/native.png) + +--- + +## Need More Tools? Explore & Expand! 🧱 + +The [openapi-servers repo](https://github.com/open-webui/openapi-servers) includes a variety of useful reference servers: + +- 📂 Filesystem access +- 🧠 Memory & knowledge graphs +- 🗃️ Git repo browsing +- 🌎 Web search (WIP) +- 🛢️ Database querying (WIP) + +You can run any of these in the same way and connect them to Open WebUI by repeating the steps above. + +--- + +## Troubleshooting & Tips 🧩 + +- ❌ Not connecting? Make sure the URL is correct and accessible from the browser used to run Open WebUI. +- 🔒 If you're using remote servers, check firewalls and HTTPS configs! +- 📝 To make servers persist, consider deploying them in Docker or with system services. + +Need help? Visit the 👉 [Discussions page](https://github.com/open-webui/openapi-servers/discussions) or [open an issue](https://github.com/open-webui/openapi-servers/issues). diff --git a/docs/zh/future_plugin_development_roadmap_cn.md b/docs/zh/future_plugin_development_roadmap_cn.md new file mode 100644 index 0000000..70baab2 --- /dev/null +++ b/docs/zh/future_plugin_development_roadmap_cn.md @@ -0,0 +1,2562 @@ +# OpenWebUI 未来插件开发路线图 + +> 探索 AI 插件的无限可能,从学习到投资,从生活到工作,打造全方位的智能助手生态 + +## 📋 目录 + +1. [概述](#概述) +2. [插件开发方向总览](#插件开发方向总览) +3. [学习辅助方向](#1-学习辅助方向) +4. [数据开发方向](#2-数据开发方向) +5. [生活服务方向](#3-生活服务方向) +6. [A股投资方向](#4-a股投资方向) +7. [工作效率方向](#5-工作效率方向) +8. [创意设计方向](#6-创意设计方向) +9. [健康管理方向](#7-健康管理方向) +10. [社交通讯方向](#8-社交通讯方向) +11. [自媒体创作方向](#9-自媒体创作方向) +12. [OpenWebUI 垂直领域自媒体标准流程](#10-openwebui-垂直领域自媒体标准流程-) +13. [技术实现指南](#技术实现指南) +14. [开发优先级建议](#开发优先级建议) + +--- + +## 概述 + +随着 AI 技术的快速发展,OpenWebUI 插件系统为开发者提供了一个强大的平台来扩展 AI 能力。本文档旨在提供一个全面的插件开发路线图,涵盖多个领域,帮助开发者理解未来的发展方向和实现路径。 + +### 为什么需要插件? + +- **垂直领域专业化**:通用 AI 模型在特定领域需要专业化增强 +- **工作流程自动化**:将重复性任务封装为一键操作 +- **数据整合**:连接外部数据源,提供实时信息 +- **个性化体验**:根据用户需求定制 AI 交互方式 + +### 插件类型快速回顾 + +| 类型 | 用途 | 适用场景 | +|------|------|---------| +| **Filter** | 预处理/后处理 | 上下文注入、格式转换 | +| **Action** | 用户触发操作 | 导出文件、生成可视化 | +| **Pipe** | 自定义模型 | API 集成、多模型组合 | + +--- + +## 插件开发方向总览 + +``` + OpenWebUI 插件生态系统 + │ + ┌──────────┬──────────┬──────┴───────┬──────────┬──────────┐ + │ │ │ │ │ │ + 学习辅助 数据开发 生活服务 投资理财 工作效率 自媒体创作 + │ │ │ │ │ │ + ├─单词卡片 ├─SQL助手 ├─食谱推荐 ├─股票分析 ├─会议纪要 ├─标题党 + ├─错题本 ├─数据可视化├─旅行规划 ├─基金评估 ├─邮件助手 ├─选题雷达 + ├─知识图谱 ├─报表生成 ├─天气提醒 ├─财报解读 ├─日程管理 ├─文案魔方 + └─学习计划 └─ETL流程 └─购物比价 └─交易信号 └─文档摘要 └─数据罗盘 +``` + +--- + +## 1. 学习辅助方向 + +### 1.1 智能单词卡片 📚 + +**插件名称**:闪词卡 (Flash Vocab) + +**插件类型**:Action + +**功能描述**: +- 从文本中自动提取生词和专业术语 +- 生成精美的单词记忆卡片(正面词汇,背面释义和例句) +- 支持导出为 Anki 格式 +- 根据艾宾浩斯遗忘曲线安排复习 + +**技术实现**: +```python +""" +title: 闪词卡 (Flash Vocab) +version: 1.0.0 +description: 智能提取并生成精美单词记忆卡片 +""" + +from pydantic import BaseModel, Field +from typing import Optional, Dict, Any, List +import json + +class Action: + class Valves(BaseModel): + target_language: str = Field( + default="en", + description="目标语言代码 (en/ja/ko/fr等)" + ) + difficulty_level: str = Field( + default="intermediate", + description="难度级别 (beginner/intermediate/advanced)" + ) + max_words: int = Field( + default=10, + description="每次提取的最大单词数" + ) + include_pronunciation: bool = Field( + default=True, + description="是否包含发音指南" + ) + + def __init__(self): + self.valves = self.Valves() + + async def action( + self, + body: dict, + __user__: Optional[Dict[str, Any]] = None, + __event_emitter__: Optional[Any] = None, + __request__: Optional[Any] = None, + ) -> Optional[dict]: + """ + 核心逻辑: + 1. 提取用户消息中的文本 + 2. 调用 LLM 识别生词和术语 + 3. 生成结构化的单词数据 + 4. 渲染为精美的 HTML 卡片 + 5. 支持导出为 Anki 格式 + """ + pass +``` + +**输出示例**: +```html + +
+
+

ephemeral

+ /ɪˈfem(ə)rəl/ + +
+
+

adj. 短暂的,瞬息的

+

"The ephemeral beauty of cherry blossoms."

+

同义词: fleeting, transient

+
+
+``` + +**应用场景**: +- 英语学习者阅读外文资料时快速积累词汇 +- 专业人士学习领域术语 +- 考试备考(托福/雅思/GRE) + +--- + +### 1.2 智能错题本 📝 + +**插件名称**:错题收集器 (Mistake Collector) + +**插件类型**:Action + Filter(组合使用) + +**功能描述**: +- 自动识别对话中的问答环节 +- 标记用户的错误理解或回答 +- 分析错误原因并生成改进建议 +- 定期生成错题复习报告 +- 支持按学科/难度分类管理 + +**技术架构**: +``` +用户回答 → Filter (inlet) → 判断对错 → 错误记录 + ↓ + Action → 生成错题卡片 + ↓ + Filter (outlet) → 推荐相似练习 +``` + +**核心功能代码**: +```python +""" +title: 错题收集器 +version: 1.0.0 +""" + +from pydantic import BaseModel, Field +from typing import List + +class Action: + class Valves(BaseModel): + subjects: List[str] = Field( + default=["数学", "物理", "编程"], + description="跟踪的学科列表" + ) + auto_review_interval: int = Field( + default=7, + description="自动提醒复习的天数间隔" + ) + + async def action(self, body, __user__, __event_emitter__, __request__): + # 1. 分析历史对话,识别错误 + # 2. 调用 LLM 分析错误原因 + # 3. 生成结构化错题记录 + # 4. 存储到用户个人数据库 + # 5. 渲染错题卡片 + pass +``` + +--- + +### 1.3 知识图谱生成器 🕸️ + +**插件名称**:知识织网 (Knowledge Web) + +**插件类型**:Action + +**功能描述**: +- 从长文本中自动提取概念和关系 +- 生成交互式知识图谱可视化 +- 支持节点展开和详情查看 +- 可导出为多种格式(JSON、GraphML、PNG) + +**技术栈**: +- 前端:D3.js / ECharts 实现图谱渲染 +- 后端:LLM 进行概念提取和关系识别 +- 存储:JSON 格式保存图谱数据 + +--- + +## 2. 数据开发方向 + +### 2.1 SQL 智能助手 🗄️ + +**插件名称**:SQL 精灵 (SQL Genie) + +**插件类型**:Pipe + Action + +**功能描述**: +- 自然语言转 SQL 查询 +- 自动检测和优化慢查询 +- 支持多种数据库方言(MySQL、PostgreSQL、SQLite) +- 生成 ER 图和数据字典 +- 执行查询并可视化结果 + +**Valves 配置**: +```python +from pydantic import BaseModel, Field + +class Valves(BaseModel): + database_type: str = Field( + default="mysql", + description="数据库类型 (mysql/postgresql/sqlite)" + ) + enable_query_execution: bool = Field( + default=False, + description="是否允许执行查询(需要数据库连接)" + ) + connection_string: str = Field( + default="", + description="数据库连接字符串(⚠️ 敏感信息,建议通过环境变量配置)" + ) + max_result_rows: int = Field( + default=100, + description="查询结果最大行数" + ) + enable_optimization: bool = Field( + default=True, + description="是否自动优化 SQL" + ) +``` + +**使用示例**: +``` +用户: 帮我查询最近30天销售额最高的10个产品 + +AI: 基于您的描述,我生成了以下 SQL 查询: + +SELECT + p.product_name, + SUM(o.quantity * o.unit_price) as total_sales +FROM orders o +JOIN products p ON o.product_id = p.id +WHERE o.order_date >= DATE_SUB(CURDATE(), INTERVAL 30 DAY) +GROUP BY p.id, p.product_name +ORDER BY total_sales DESC +LIMIT 10; + +📊 优化建议: +- 建议在 orders.order_date 列上创建索引 +- 考虑使用物化视图加速频繁查询 +``` + +--- + +### 2.2 数据可视化工厂 📊 + +**插件名称**:图表大师 (Chart Master) + +**插件类型**:Action + +**功能描述**: +- 自动分析数据结构推荐合适的图表类型 +- 支持多种图表:折线图、柱状图、饼图、散点图、热力图等 +- 一键导出为 PNG/SVG/PDF +- 支持自定义主题和配色方案 +- 生成可嵌入的交互式 HTML + +**核心实现**: +```python +""" +title: 图表大师 +version: 1.0.0 +""" + +class Action: + CHART_TYPES = { + "trend": ["line", "area"], + "comparison": ["bar", "column", "radar"], + "distribution": ["pie", "donut", "histogram"], + "relationship": ["scatter", "bubble", "heatmap"], + "composition": ["stacked_bar", "treemap"] + } + + async def action(self, body, __user__, __event_emitter__, __request__): + # 1. 解析消息中的数据(支持表格、JSON、CSV) + data = self.extract_data(body["messages"][-1]["content"]) + + # 2. 分析数据特征 + data_type = self.analyze_data_type(data) + + # 3. 推荐图表类型 + recommended_charts = self.CHART_TYPES.get(data_type, ["bar"]) + + # 4. 生成 ECharts 配置 + chart_config = self.generate_chart_config(data, recommended_charts[0]) + + # 5. 渲染为交互式 HTML + html = self.render_chart_html(chart_config) + + return html +``` + +--- + +### 2.3 自动报表生成器 📋 + +**插件名称**:报表精灵 (Report Wizard) + +**插件类型**:Action + +**功能描述**: +- 根据数据自动生成专业报表 +- 支持多种模板:日报、周报、月报、季度分析报告 +- 自动计算同比/环比增长 +- 生成关键发现和行动建议 +- 导出为 Word/PDF/HTML 格式 + +--- + +## 3. 生活服务方向 + +### 3.1 智能食谱推荐 🍳 + +**插件名称**:今天吃啥 (What's Cooking) + +**插件类型**:Pipe + Action + +**功能描述**: +- 根据冰箱现有食材推荐食谱 +- 考虑营养均衡和饮食偏好 +- 生成详细的烹饪步骤和时间估算 +- 支持根据人数自动调整配料量 +- 生成购物清单 + +**Valves 配置**: +```python +from pydantic import BaseModel, Field +from typing import List + +class Valves(BaseModel): + dietary_restrictions: List[str] = Field( + default=[], + description="饮食限制 (素食/无麸质/低脂/低糖等)" + ) + cuisine_preferences: List[str] = Field( + default=["中餐", "西餐"], + description="偏好的菜系" + ) + cooking_skill_level: str = Field( + default="intermediate", + description="烹饪技能水平 (beginner/intermediate/expert)" + ) + max_cooking_time: int = Field( + default=60, + description="最长烹饪时间(分钟)" + ) + servings: int = Field( + default=2, + description="默认用餐人数" + ) +``` + +**输出示例**: +```markdown +## 🍜 推荐食谱:番茄牛腩面 + +### 📊 基本信息 +- ⏱️ 烹饪时间:45分钟 +- 👥 份量:2人份 +- 🔥 难度:中等 +- 💪 热量:约650卡/份 + +### 🥘 所需食材 +| 食材 | 用量 | 状态 | +|------|------|------| +| 牛腩 | 300g | ✅ 已有 | +| 番茄 | 2个 | ✅ 已有 | +| 面条 | 200g | ❌ 需购买 | +| 葱姜蒜 | 适量 | ✅ 已有 | + +### 👨‍🍳 烹饪步骤 +1. **准备工作 (10分钟)** + - 牛腩切块,冷水下锅焯水去血沫 + - 番茄切块,葱切段,姜蒜切片 + +2. **炖煮 (30分钟)** + - 热锅凉油,爆香葱姜蒜 + - 加入牛腩翻炒上色 + - 加入番茄和适量水,小火慢炖 +``` + +--- + +### 3.2 智能旅行规划 ✈️ + +**插件名称**:旅程设计师 (Trip Designer) + +**插件类型**:Pipe + Action + +**功能描述**: +- 根据预算、时间、偏好生成行程 +- 自动规划路线和交通方式 +- 推荐当地特色美食和景点 +- 生成每日详细日程表 +- 估算整体花费 +- 导出为可打印的行程单 + +**核心功能**: +```python +class TripDesigner: + async def generate_itinerary(self, params): + """ + params: { + "destination": "日本东京", + "duration": 7, # 天数 + "budget": 15000, # 人均预算(人民币) + "travel_style": "文艺", # 文艺/冒险/休闲/美食 + "must_visit": ["浅草寺", "涩谷"], + "avoid": ["购物中心"], + "accommodation_level": "中档" + } + """ + # 1. 查询目的地信息 + # 2. 规划每日行程 + # 3. 计算预算分配 + # 4. 生成交通建议 + # 5. 渲染行程卡片 + pass +``` + +--- + +### 3.3 智能购物比价 🛒 + +**插件名称**:比价精灵 (Price Hunter) + +**插件类型**:Pipe + +**功能描述**: +- 跨平台商品价格对比 +- 历史价格走势分析 +- 优惠券和促销信息聚合 +- 最佳购买时机建议 +- 性价比评分 + +--- + +## 4. A股投资方向 + +### 4.1 财报解读助手 📈 + +**插件名称**:财报解读师 (Financial Report Analyst) + +**插件类型**:Action + Filter + +**功能描述**: +- 上传年报/季报 PDF 自动解析 +- 提取关键财务指标(营收、利润、ROE、负债率等) +- 同行业对比分析 +- 识别财务风险信号 +- 生成投资价值评估报告 + +**Valves 配置**: +```python +from pydantic import BaseModel, Field +from typing import List + +class Valves(BaseModel): + focus_metrics: List[str] = Field( + default=["营业收入", "净利润", "ROE", "资产负债率", "现金流"], + description="重点关注的财务指标" + ) + compare_peers: bool = Field( + default=True, + description="是否与同行业公司对比" + ) + risk_alert_threshold: float = Field( + default=0.7, + description="风险预警阈值 (0-1)" + ) + historical_periods: int = Field( + default=5, + description="历史对比期数(年)" + ) +``` + +**输出示例**: +```markdown +## 📊 财报解读报告:贵州茅台 (600519) + +### 📈 核心指标 (2024Q3) + +| 指标 | 数值 | 同比 | 行业平均 | 评级 | +|------|------|------|---------|------| +| 营业收入 | 1,032亿 | +15.2% | +8.3% | ⭐⭐⭐⭐⭐ | +| 净利润 | 524亿 | +12.8% | +5.1% | ⭐⭐⭐⭐⭐ | +| ROE | 32.5% | +1.2% | 15.3% | ⭐⭐⭐⭐⭐ | +| 资产负债率 | 21.3% | -2.1% | 45.2% | ⭐⭐⭐⭐⭐ | + +### 🎯 关键发现 + +1. **盈利能力突出** + - ROE 连续5年保持30%以上,远超行业平均 + - 毛利率稳定在91%左右,护城河深厚 + +2. **增长趋势** + - 营收增速连续3季度加速 + - 直销占比提升至35%,渠道优化效果显现 + +### ⚠️ 风险提示 +- 应收账款增速高于营收增速,需关注 +- 存货周转天数小幅上升 + +### 💡 投资建议 +综合评分:**8.5/10** +建议:当前估值处于历史中位数偏下,可考虑分批建仓 +``` + +--- + +### 4.2 股票技术分析 📉 + +**插件名称**:K线解读 (Chart Decoder) + +**插件类型**:Pipe + Action + +**功能描述**: +- 实时获取 A 股行情数据 +- 识别经典 K 线形态(头肩顶、双底、三角整理等) +- 计算技术指标(MA、MACD、RSI、BOLL 等) +- 识别支撑位和压力位 +- 生成技术分析报告 + +**核心代码框架**: +```python +""" +title: K线解读 +version: 1.0.0 +""" + +import asyncio +from typing import Optional, Dict, Any, List +from pydantic import BaseModel, Field + +class Pipe: + class Valves(BaseModel): + data_source: str = Field( + default="tushare", + description="数据源 (tushare/akshare/eastmoney)" + ) + api_token: str = Field( + default="", + description="数据源 API Token(⚠️ 敏感信息,建议通过环境变量配置)" + ) + default_period: str = Field( + default="daily", + description="默认K线周期 (daily/weekly/monthly)" + ) + technical_indicators: List[str] = Field( + default=["MA", "MACD", "RSI", "BOLL"], + description="默认显示的技术指标" + ) + + def pipes(self): + return [{"id": "chart_decoder", "name": "K线解读"}] + + async def pipe(self, body, __user__, __event_emitter__): + # 1. 解析用户查询(股票代码、时间范围) + query_params = self.parse_query(body["messages"][-1]["content"]) + + # 2. 获取历史行情数据 + stock_data = await self.fetch_stock_data(query_params) + + # 3. 计算技术指标 + indicators = self.calculate_indicators(stock_data) + + # 4. 识别 K 线形态 + patterns = self.identify_patterns(stock_data) + + # 5. 生成分析报告 + report = self.generate_report(stock_data, indicators, patterns) + + return report +``` + +--- + +### 4.3 投资组合分析 💼 + +**插件名称**:组合诊断师 (Portfolio Doctor) + +**插件类型**:Action + +**功能描述**: +- 导入持仓数据分析组合健康度 +- 计算组合风险指标(夏普比率、最大回撤、Beta等) +- 行业和风格分布分析 +- 相关性热力图 +- 优化建议和再平衡方案 + +--- + +### 4.4 财经新闻解读 📰 + +**插件名称**:财经速递 (Financial Express) + +**插件类型**:Filter + Pipe + +**功能描述**: +- 实时监控财经新闻和公告 +- 自动评估新闻对个股的影响 +- 识别利好/利空信号 +- 关联历史类似事件的市场反应 +- 生成简明扼要的解读 + +--- + +## 5. 工作效率方向 + +### 5.1 智能会议纪要 🎙️ + +**插件名称**:会议精灵 (Meeting Genie) + +**插件类型**:Action + +**功能描述**: +- 支持音频/视频文件上传转录 +- 自动识别发言人 +- 提取关键讨论点和决策 +- 生成结构化会议纪要 +- 自动分配待办事项 +- 导出为多种格式 + +**Valves 配置**: +```python +from pydantic import BaseModel, Field + +class Valves(BaseModel): + transcription_service: str = Field( + default="whisper", + description="转录服务 (whisper/azure/google)" + ) + language: str = Field( + default="zh-CN", + description="会议语言" + ) + identify_speakers: bool = Field( + default=True, + description="是否识别不同发言人" + ) + extract_action_items: bool = Field( + default=True, + description="是否提取待办事项" + ) + summary_style: str = Field( + default="detailed", + description="纪要风格 (brief/detailed/executive)" + ) +``` + +**输出示例**: +```markdown +## 📋 会议纪要 + +**会议主题**:Q4 产品规划会议 +**日期**:2024-11-20 +**参与者**:张总、李经理、王工、陈工 +**时长**:65分钟 + +--- + +### 🎯 关键决策 + +1. **决定**:Q4 重点推进 AI 功能模块 + - 决策人:张总 + - 截止日期:2024-12-31 + +2. **决定**:增加 2 名前端开发人员 + - 决策人:李经理 + - 预算:30万 + +### 📝 讨论要点 + +1. **AI 功能模块** (讨论时长: 25分钟) + - 王工提出技术方案 A,预计开发周期 6 周 + - 陈工建议采用现有开源方案加速开发 + - 最终决定:采用混合方案 + +### ✅ 待办事项 + +| 事项 | 负责人 | 截止日期 | 优先级 | +|------|--------|---------|--------| +| 完成技术方案文档 | 王工 | 11-25 | 高 | +| 招聘需求提交 | 李经理 | 11-22 | 高 | +| 竞品分析报告 | 陈工 | 11-28 | 中 | + +### 📅 下次会议 +- 时间:2024-11-27 14:00 +- 议题:技术方案评审 +``` + +--- + +### 5.2 智能邮件助手 ✉️ + +**插件名称**:邮件专家 (Email Pro) + +**插件类型**:Action + +**功能描述**: +- 根据上下文生成专业邮件 +- 支持多种场景模板(商务、求职、催款、道歉等) +- 自动调整语气和正式程度 +- 多语言邮件翻译 +- 邮件摘要和要点提取 + +--- + +### 5.3 日程智能管理 📅 + +**插件名称**:时间管家 (Time Butler) + +**插件类型**:Pipe + Action + +**功能描述**: +- 自然语言创建日程 +- 智能冲突检测和建议 +- 优先级排序和时间块规划 +- 提醒和跟进管理 +- 与主流日历应用同步 + +--- + +## 6. 创意设计方向 + +### 6.1 文案创意生成器 ✍️ + +**插件名称**:文案魔法师 (Copywriting Wizard) + +**插件类型**:Action + +**功能描述**: +- 支持多种文案类型(广告、社交媒体、产品描述等) +- 基于品牌调性定制风格 +- A/B 测试文案变体生成 +- SEO 优化建议 +- 情感分析和可读性评分 + +**Valves 配置**: +```python +from pydantic import BaseModel, Field + +class Valves(BaseModel): + brand_voice: str = Field( + default="professional", + description="品牌调性 (professional/friendly/playful/luxury)" + ) + target_audience: str = Field( + default="general", + description="目标受众 (general/youth/business/senior)" + ) + platform: str = Field( + default="general", + description="发布平台 (wechat/weibo/xiaohongshu/douyin/linkedin)" + ) + include_emoji: bool = Field( + default=True, + description="是否包含表情符号" + ) + max_length: int = Field( + default=500, + description="文案最大长度" + ) +``` + +--- + +### 6.2 UI/UX 设计助手 🎨 + +**插件名称**:设计灵感 (Design Muse) + +**插件类型**:Action + +**功能描述**: +- 根据需求生成 UI 设计建议 +- 配色方案推荐 +- 组件布局建议 +- 可用性分析 +- 生成设计规范文档 + +--- + +## 7. 健康管理方向 + +### 7.1 健康数据分析 💪 + +**插件名称**:健康管家 (Health Manager) + +**插件类型**:Action + +**功能描述**: +- 整合可穿戴设备数据 +- 睡眠质量分析 +- 运动建议生成 +- 营养摄入跟踪 +- 健康趋势报告 + +**注意事项**: +⚠️ 健康类插件需要添加免责声明,明确说明不能替代专业医疗建议。 + +--- + +### 7.2 心理健康助手 🧠 + +**插件名称**:心灵陪伴 (Mind Companion) + +**插件类型**:Pipe + +**功能描述**: +- 情绪识别和跟踪 +- 正念冥想引导 +- 压力管理建议 +- 积极心理学练习 +- 心理健康资源推荐 + +--- + +## 8. 社交通讯方向 + +### 8.1 社交内容创作 📱 + +**插件名称**:社交达人 (Social Star) + +**插件类型**:Action + +**功能描述**: +- 生成适合各平台的内容 +- 热点话题追踪和结合 +- 发布时间建议 +- 互动话题设计 +- 数据分析和优化建议 + +--- + +### 8.2 多语言翻译增强 🌍 + +**插件名称**:译境 (TransBridge) + +**插件类型**:Filter + +**功能描述**: +- 实时对话翻译 +- 保留语气和文化特色 +- 专业术语库支持 +- 翻译质量评分 +- 语言学习模式 + +--- + +## 9. 自媒体创作方向 🎬 + +> 专为自媒体博主设计的 AI 插件套件,覆盖内容创作、运营分析、粉丝互动等全流程 + +### 9.1 爆款标题生成器 🔥 + +**插件名称**:标题党 (Title Master) + +**插件类型**:Action + +**功能描述**: +- 根据内容自动生成多个吸睛标题 +- 支持多平台风格(微信公众号、抖音、B站、小红书、知乎) +- 标题吸引力评分和优化建议 +- A/B 测试标题变体生成 +- 违禁词检测和规避 + +**Valves 配置**: +```python +from pydantic import BaseModel, Field +from typing import List + +class Valves(BaseModel): + platform: str = Field( + default="wechat", + description="目标平台 (wechat/douyin/bilibili/xiaohongshu/zhihu)" + ) + style: str = Field( + default="curiosity", + description="标题风格 (curiosity/emotional/practical/controversial/storytelling)" + ) + title_count: int = Field( + default=5, + description="生成标题数量" + ) + max_length: int = Field( + default=30, + description="标题最大字数" + ) + include_emoji: bool = Field( + default=True, + description="是否包含表情符号" + ) +``` + +**输出示例**: +```markdown +## 🔥 标题生成结果 + +### 原始主题:如何用 AI 提升工作效率 + +| 序号 | 标题 | 平台适配 | 吸引力评分 | +|------|------|---------|-----------| +| 1 | 🚀 用了这个 AI 工具,我每天多出 3 小时摸鱼时间 | 微信 | ⭐⭐⭐⭐⭐ | +| 2 | 90后程序员靠 AI 副业月入 5 万,方法竟然这么简单 | 抖音 | ⭐⭐⭐⭐ | +| 3 | 【干货】AI 效率神器大揭秘,看完直接起飞 | B站 | ⭐⭐⭐⭐ | +| 4 | 姐妹们!这个 AI 工具绝了,打工人必备 💪 | 小红书 | ⭐⭐⭐⭐⭐ | +| 5 | 如何科学地利用 AI 工具提升 10 倍工作效率? | 知乎 | ⭐⭐⭐⭐ | + +### 💡 优化建议 +- 标题 1 使用数字+利益点,点击率预估较高 +- 建议 A/B 测试标题 1 和标题 4 +``` + +--- + +### 9.2 内容选题助手 💡 + +**插件名称**:选题雷达 (Topic Radar) + +**插件类型**:Pipe + Action + +**功能描述**: +- 实时追踪全网热点话题 +- 分析竞品账号的爆款内容 +- 结合账号定位推荐选题 +- 预测话题热度趋势 +- 生成内容日历规划 + +**核心功能**: +```python +""" +title: 选题雷达 +version: 1.0.0 +""" + +from pydantic import BaseModel, Field +from typing import List + +class Pipe: + class Valves(BaseModel): + niche: str = Field( + default="科技", + description="账号垂直领域 (科技/生活/美食/旅行/教育等)" + ) + platforms: List[str] = Field( + default=["weibo", "douyin", "bilibili"], + description="监控的平台列表" + ) + competitor_accounts: List[str] = Field( + default=[], + description="竞品账号列表" + ) + update_frequency: str = Field( + default="daily", + description="更新频率 (hourly/daily/weekly)" + ) + + async def pipe(self, body, __user__, __event_emitter__): + # 1. 爬取热搜榜单 + # 2. 分析竞品最新内容 + # 3. 结合账号定位筛选 + # 4. 评估选题潜力 + # 5. 生成选题建议 + pass +``` + +**输出示例**: +```markdown +## 📊 今日选题推荐 (2024-11-29) + +### 🔥 热点追踪 +| 热度 | 话题 | 平台 | 相关度 | 建议切入角度 | +|------|------|------|--------|-------------| +| 🔥🔥🔥 | #GPT-5发布 | 全平台 | 高 | 深度评测 + 使用教程 | +| 🔥🔥 | #双十二攻略 | 小红书 | 中 | AI 购物助手推荐 | +| 🔥 | #年终总结 | 微信 | 高 | AI 辅助做年终总结 | + +### 📈 竞品爆款分析 +- @科技大V 发布《AI 写作工具横评》获赞 5.2w +- @效率达人 发布《用 ChatGPT 做 PPT》获赞 3.8w + +### 💡 本周选题建议 +1. **【高优先级】** GPT-5 首发体验评测 +2. **【中优先级】** AI 工具年度盘点 +3. **【储备选题】** 2025 年 AI 趋势预测 +``` + +--- + +### 9.3 脚本文案生成器 📝 + +**插件名称**:文案魔方 (Script Cube) + +**插件类型**:Action + +**功能描述**: +- 生成短视频/直播脚本 +- 支持多种内容类型(教程、种草、故事、观点输出) +- 自动匹配平台算法偏好 +- 生成分镜脚本和口播稿 +- 包含钩子、高潮、结尾的完整结构 + +**Valves 配置**: +```python +from pydantic import BaseModel, Field + +class Valves(BaseModel): + content_type: str = Field( + default="tutorial", + description="内容类型 (tutorial/review/story/opinion/vlog)" + ) + duration: int = Field( + default=60, + description="目标时长(秒)" + ) + platform: str = Field( + default="douyin", + description="发布平台" + ) + tone: str = Field( + default="casual", + description="语气风格 (casual/professional/humorous/emotional)" + ) + include_hooks: bool = Field( + default=True, + description="是否生成开头钩子" + ) +``` + +**输出示例**: +```markdown +## 📹 短视频脚本 + +**主题**:3 个 AI 工具让你效率翻倍 +**时长**:60秒 +**平台**:抖音 + +--- + +### 🎬 分镜脚本 + +| 时间 | 画面 | 口播/字幕 | 备注 | +|------|------|----------|------| +| 0-3s | 主播惊讶表情 | "我靠!这也太强了吧" | 钩子,吸引停留 | +| 3-8s | 问题场景 | "是不是经常加班到深夜?" | 引起共鸣 | +| 8-20s | 工具 1 演示 | "第一个工具..." | 干货输出 | +| 20-35s | 工具 2 演示 | "第二个更绝..." | 递进 | +| 35-50s | 工具 3 演示 | "最后这个直接封神" | 高潮 | +| 50-55s | 效果对比 | "用完之后效率直接翻倍" | 价值总结 | +| 55-60s | 引导互动 | "还想看什么工具?评论区告诉我" | CTA | + +### 📢 完整口播稿 + +> 我靠!这也太强了吧! +> +> 是不是经常加班到深夜,工作怎么都做不完? +> +> 今天分享 3 个 AI 神器,用完效率直接翻倍! +> +> 第一个是 XXX,它可以... +> (详细内容略) +``` + +--- + +### 9.4 评论互动助手 💬 + +**插件名称**:评论达人 (Comment Pro) + +**插件类型**:Action + +**功能描述**: +- 批量生成高质量回复 +- 识别粉丝情感和意图 +- 生成互动话题引导评论 +- 识别潜在负面评论并建议处理 +- 生成粉丝画像分析 + +**核心功能**: +```python +""" +title: 评论达人 +version: 1.0.0 +""" + +class Action: + async def action(self, body, __user__, __event_emitter__, __request__): + # 1. 解析评论列表 + comments = self.parse_comments(body["messages"][-1]["content"]) + + # 2. 情感分析和分类 + classified = self.classify_comments(comments) + + # 3. 生成回复建议 + replies = [] + for comment in classified: + reply = await self.generate_reply( + comment, + tone=self.valves.reply_tone, + style=self.valves.reply_style + ) + replies.append(reply) + + # 4. 渲染结果 + return self.render_replies(replies) +``` + +**输出示例**: +```markdown +## 💬 评论回复建议 + +### 原评论分析 +| 评论 | 情感 | 类型 | 优先级 | +|------|------|------|--------| +| "太棒了,学到了!" | 😊 正面 | 认可 | 低 | +| "能出个详细教程吗?" | 🤔 中性 | 需求 | 高 | +| "这个工具收费吗?" | 🤔 中性 | 咨询 | 高 | +| "感觉一般般" | 😐 负面 | 质疑 | 中 | + +### 建议回复 +1. **"太棒了,学到了!"** + > 谢谢支持!后续还有更多干货,记得关注不迷路哦~ 💪 + +2. **"能出个详细教程吗?"** + > 好问题!详细教程已经在做了,预计下周发布,先关注等更新吧! + +3. **"这个工具收费吗?"** + > 基础功能免费,高级功能付费~我视频里用的都是免费的,放心用! + +4. **"感觉一般般"** + > 感谢反馈!可以说说哪里不满意吗?我后续改进~ +``` + +--- + +### 9.5 数据分析仪表盘 📊 + +**插件名称**:数据罗盘 (Data Compass) + +**插件类型**:Action + +**功能描述**: +- 多平台数据整合分析 +- 粉丝增长趋势可视化 +- 内容表现分析(播放量、点赞、评论、转发) +- 最佳发布时间分析 +- 竞品对比分析 +- 生成周报/月报 + +**Valves 配置**: +```python +from pydantic import BaseModel, Field +from typing import List + +class Valves(BaseModel): + platforms: List[str] = Field( + default=["douyin", "bilibili", "xiaohongshu"], + description="分析的平台列表" + ) + analysis_period: str = Field( + default="7d", + description="分析周期 (7d/30d/90d)" + ) + compare_previous: bool = Field( + default=True, + description="是否与上一周期对比" + ) + generate_insights: bool = Field( + default=True, + description="是否生成智能洞察" + ) +``` + +**输出示例**: +```markdown +## 📊 自媒体数据周报 (11.22 - 11.28) + +### 📈 核心指标概览 + +| 指标 | 本周 | 上周 | 环比 | 趋势 | +|------|------|------|------|------| +| 总粉丝 | 12.5w | 11.8w | +5.9% | 📈 | +| 新增粉丝 | 7,234 | 5,102 | +41.8% | 📈 | +| 总播放量 | 89.2w | 72.1w | +23.7% | 📈 | +| 平均点赞 | 2,341 | 1,892 | +23.7% | 📈 | +| 互动率 | 8.7% | 7.2% | +20.8% | 📈 | + +### 🏆 本周爆款内容 TOP3 + +| 排名 | 标题 | 播放量 | 点赞 | 转发 | +|------|------|--------|------|------| +| 1 | 3个AI工具让你效率翻倍 | 23.5w | 1.2w | 892 | +| 2 | ChatGPT 最新玩法 | 18.2w | 8.9k | 567 | +| 3 | AI 绘画入门教程 | 12.1w | 6.2k | 423 | + +### ⏰ 最佳发布时间分析 + +| 平台 | 最佳时间 | 次佳时间 | +|------|---------|---------| +| 抖音 | 12:00-13:00 | 19:00-21:00 | +| B站 | 18:00-20:00 | 21:00-23:00 | +| 小红书 | 20:00-22:00 | 12:00-13:00 | + +### 💡 智能洞察 + +1. **增长亮点**:AI 工具类内容表现突出,建议继续深耕 +2. **优化建议**:视频时长 45-60s 表现最佳,建议控制时长 +3. **内容方向**:教程类内容转发率高,可增加此类内容占比 +4. **发布策略**:周三、周五发布效果最好,建议调整发布计划 +``` + +--- + +### 9.6 AI 封面生成器 🖼️ + +**插件名称**:封面工坊 (Cover Studio) + +**插件类型**:Action + +**功能描述**: +- 根据标题和内容生成封面提示词 +- 支持多种封面风格(简约、炫酷、可爱、专业) +- 自动适配各平台封面尺寸 +- 生成封面文案排版建议 +- 与 DALL-E / Midjourney 集成 + +**输出示例**: +```markdown +## 🖼️ 封面生成建议 + +### 视频主题:5 个提升效率的 AI 工具 + +### 🎨 封面风格 A:科技感 +**Midjourney 提示词**: +> futuristic tech interface, glowing blue AI icons, dark background with neon lights, +> professional tech youtube thumbnail style, 16:9 aspect ratio, high contrast, +> cinematic lighting --ar 16:9 --v 5 + +**文案排版**: +- 主标题:"5个AI神器" (大号加粗,渐变色) +- 副标题:"效率翻倍" (右下角,白色描边) +- 表情符号:🚀💡 (左上角点缀) + +### 🎨 封面风格 B:人物出镜 +**建议构图**: +- 博主惊讶/兴奋表情在左侧 +- 右侧放 AI 工具 logo 或截图 +- 大字标题叠加在画面上方 + +### 📐 尺寸适配 +| 平台 | 尺寸 | 备注 | +|------|------|------| +| 抖音 | 1080x1440 | 3:4 竖版 | +| B站 | 1280x720 | 16:9 横版 | +| 小红书 | 1080x1440 | 3:4 竖版 | +| YouTube | 1280x720 | 16:9 横版 | +``` + +--- + +## 10. OpenWebUI 垂直领域自媒体标准流程 🎯 + +> 专为 OpenWebUI 领域自媒体博主设计的标准化内容生产流程,打造专业的 AI 工具类自媒体矩阵 + +### 10.1 OpenWebUI 内容生产标准流程 + +作为 OpenWebUI 垂直领域的自媒体博主,建议遵循以下标准化流程: + +``` +┌─────────────────────────────────────────────────────────────────────┐ +│ OpenWebUI 自媒体内容生产流程 │ +├─────────────────────────────────────────────────────────────────────┤ +│ │ +│ 1️⃣ 功能发现 2️⃣ 深度体验 3️⃣ 内容策划 4️⃣ 素材制作 5️⃣ 发布运营 │ +│ ↓ ↓ ↓ ↓ ↓ │ +│ ┌──────┐ ┌──────┐ ┌──────┐ ┌──────┐ ┌──────┐ │ +│ │版本更新│ │插件测试│ │脚本撰写│ │录屏截图│ │多平台│ │ +│ │官方动态│ │场景复现│ │大纲设计│ │视频剪辑│ │数据分析│ │ +│ │社区讨论│ │问题记录│ │亮点提炼│ │封面设计│ │互动回复│ │ +│ └──────┘ └──────┘ └──────┘ └──────┘ └──────┘ │ +│ │ +└─────────────────────────────────────────────────────────────────────┘ +``` + +### 10.2 OpenWebUI 内容助手插件套件 🛠️ + +#### 插件 1:版本追踪器 (Version Tracker) + +**插件类型**:Pipe + +**功能描述**: +- 自动追踪 OpenWebUI GitHub 仓库更新 +- 解析 Release Notes 和 Changelog +- 识别重大功能更新和 Breaking Changes +- 生成中文版本更新摘要 +- 推送更新通知 + +**Valves 配置**: +```python +from pydantic import BaseModel, Field +from typing import List + +class Valves(BaseModel): + github_repo: str = Field( + default="open-webui/open-webui", + description="GitHub 仓库地址" + ) + check_interval: str = Field( + default="daily", + description="检查频率 (hourly/daily/weekly)" + ) + notify_types: List[str] = Field( + default=["release", "pre-release", "commit"], + description="通知类型" + ) + auto_translate: bool = Field( + default=True, + description="自动翻译为中文" + ) + highlight_keywords: List[str] = Field( + default=["plugin", "function", "filter", "pipe", "action", "breaking"], + description="重点关注的关键词" + ) +``` + +**输出示例**: +```markdown +## 🆕 OpenWebUI 版本更新速报 + +### v0.4.5 (2024-11-28) + +#### 🔥 重大更新 +- **新增 Function Calling 支持** - 插件现在可以调用外部函数 +- **Filter 插件增强** - 支持 stream 方法实时处理流式响应 + +#### 🛠️ 改进 +- 优化了插件加载性能 +- 修复了多模态消息处理问题 + +#### ⚠️ Breaking Changes +- `inlet` 方法签名变更,需要更新现有插件 + +#### 📝 内容建议 +基于本次更新,推荐制作以下内容: +1. 【教程】Function Calling 完整使用指南 +2. 【实战】用 Filter 插件实现实时翻译 +3. 【迁移指南】插件升级到 v0.4.5 +``` + +--- + +#### 插件 2:插件文档生成器 (Plugin Doc Generator) + +**插件类型**:Action + +**功能描述**: +- 自动解析插件代码结构 +- 生成标准化的插件文档 +- 提取 Valves 配置说明 +- 生成使用示例和最佳实践 +- 支持中英双语输出 + +**核心功能**: +```python +""" +title: 插件文档生成器 +version: 1.0.0 +""" + +from pydantic import BaseModel, Field + +class Action: + class Valves(BaseModel): + output_format: str = Field( + default="markdown", + description="输出格式 (markdown/html/pdf)" + ) + include_code: bool = Field( + default=True, + description="是否包含代码示例" + ) + language: str = Field( + default="zh-CN", + description="文档语言" + ) + template_style: str = Field( + default="detailed", + description="模板风格 (minimal/standard/detailed)" + ) + + async def action(self, body, __user__, __event_emitter__, __request__): + # 1. 解析插件代码 + plugin_code = body["messages"][-1]["content"] + + # 2. 提取元数据 + metadata = self.extract_metadata(plugin_code) + + # 3. 解析 Valves 配置 + valves = self.parse_valves(plugin_code) + + # 4. 生成文档 + doc = self.generate_documentation(metadata, valves) + + return doc +``` + +**输出示例**: +```markdown +# 📖 插件文档:智能摘要生成器 + +## 基本信息 +| 属性 | 值 | +|------|-----| +| 名称 | Smart Summary | +| 版本 | 1.0.0 | +| 类型 | Action | +| 作者 | @your_name | + +## 功能说明 +该插件可以自动分析对话内容,生成结构化摘要... + +## 配置参数 + +| 参数 | 类型 | 默认值 | 说明 | +|------|------|--------|------| +| max_length | int | 500 | 摘要最大长度 | +| style | str | "bullet" | 摘要风格 | + +## 使用示例 +1. 在聊天界面选择该插件 +2. 发送需要总结的长文本 +3. 点击插件按钮生成摘要 + +## 常见问题 +Q: 支持多语言吗? +A: 是的,自动检测输入语言... +``` + +--- + +#### 插件 3:教程脚本生成器 (Tutorial Script Generator) + +**插件类型**:Action + +**功能描述**: +- 根据功能点自动生成教程脚本 +- 包含分步骤操作指南 +- 生成配套的录屏提示 +- 支持多种教程类型(入门/进阶/实战) +- 自动生成时间轴和章节标记 + +**Valves 配置**: +```python +from pydantic import BaseModel, Field + +class Valves(BaseModel): + tutorial_type: str = Field( + default="beginner", + description="教程类型 (beginner/intermediate/advanced/practical)" + ) + target_duration: int = Field( + default=10, + description="目标时长(分钟)" + ) + platform: str = Field( + default="bilibili", + description="发布平台 (bilibili/youtube/douyin)" + ) + include_timestamps: bool = Field( + default=True, + description="是否生成时间轴" + ) + voice_style: str = Field( + default="casual", + description="配音风格 (casual/professional/energetic)" + ) +``` + +**输出示例**: +```markdown +## 📹 教程脚本:OpenWebUI 插件开发入门 + +### 视频信息 +- **标题**:5分钟学会开发你的第一个 OpenWebUI 插件 +- **时长**:约 8 分钟 +- **难度**:入门级 +- **适合人群**:OpenWebUI 用户、Python 初学者 + +--- + +### 📋 时间轴 + +| 时间 | 章节 | 内容 | +|------|------|------| +| 0:00-0:30 | 开场 | 介绍本期内容 | +| 0:30-2:00 | 概念介绍 | 什么是 OpenWebUI 插件 | +| 2:00-5:00 | 实战演示 | 创建第一个 Action 插件 | +| 5:00-7:00 | 部署测试 | 上传并测试插件 | +| 7:00-8:00 | 总结 | 回顾要点 + 下期预告 | + +--- + +### 🎬 分镜脚本 + +#### 场景 1:开场 (0:00-0:30) +**画面**:博主出镜 + OpenWebUI logo +**口播**: +> 大家好,今天教大家开发你的第一个 OpenWebUI 插件! +> 不需要任何编程基础,跟着我做,5分钟就能学会! + +**录屏提示**:无 + +--- + +#### 场景 2:概念介绍 (0:30-2:00) +**画面**:PPT/动画演示 +**口播**: +> OpenWebUI 的插件系统非常强大,分为三种类型: +> - Filter:处理输入输出 +> - Action:添加自定义按钮 +> - Pipe:创建自定义模型 +> +> 今天我们先从最简单的 Action 插件开始... + +**录屏提示**:展示插件类型对比图 + +--- + +#### 场景 3:实战演示 (2:00-5:00) +**画面**:屏幕录制 +**口播**: +> 现在打开你的代码编辑器,新建一个文件... +> 首先我们需要定义插件的元数据... + +**录屏提示**: +1. 打开 VS Code +2. 新建 `my_first_plugin.py` +3. 输入代码模板 +4. 逐行讲解 + +--- + +### 📝 B站简介模板 + +``` +【保姆级教程】5分钟开发你的第一个 OpenWebUI 插件! + +⏰ 时间轴: +00:00 开场介绍 +00:30 插件类型讲解 +02:00 实战开发 +05:00 部署测试 +07:00 总结回顾 + +📦 资源下载: +- 代码模板:github.com/xxx +- 插件合集:xxx + +🔗 相关视频: +- OpenWebUI 安装教程 +- 插件进阶开发 + +#OpenWebUI #AI工具 #插件开发 +``` +``` + +--- + +#### 插件 4:功能演示录制助手 (Demo Recorder Helper) + +**插件类型**:Action + +**功能描述**: +- 生成功能演示的标准化流程 +- 提供录屏检查清单 +- 自动生成演示数据和测试用例 +- 生成字幕文本 +- 支持多场景演示脚本 + +**输出示例**: +```markdown +## 🎬 功能演示录制清单 + +### 演示功能:Filter 插件 - 上下文压缩 + +#### ✅ 录制前检查 +- [ ] OpenWebUI 版本:v0.4.5+ +- [ ] 插件已安装并启用 +- [ ] 测试数据已准备 +- [ ] 录屏软件已打开 (建议 OBS) +- [ ] 分辨率设置:1920x1080 +- [ ] 字体大小:已放大便于观看 + +#### 📝 演示步骤 + +**Step 1:展示问题场景** (30s) +- 打开一个长对话(10轮以上) +- 展示 token 消耗提示 +- 说明:"对话太长会消耗大量 token" + +**Step 2:启用插件** (20s) +- 打开聊天设置 +- 找到 Filter 插件 +- 启用"上下文压缩" +- 说明:"现在我们启用压缩插件" + +**Step 3:演示效果** (40s) +- 继续对话 +- 展示压缩后的 token 数 +- 对比压缩前后 +- 说明:"token 消耗减少了 60%" + +#### 🎤 配套字幕 + +```srt +1 +00:00:00,000 --> 00:00:03,000 +大家好,今天演示上下文压缩插件 + +2 +00:00:03,000 --> 00:00:08,000 +可以看到这个对话已经很长了 + +3 +00:00:08,000 --> 00:00:12,000 +每次请求都会消耗大量 token +``` +``` + +--- + +#### 插件 5:素材库管理器 (Asset Manager) + +**插件类型**:Action + +**功能描述**: +- 管理 OpenWebUI 相关的截图、录屏素材 +- 自动分类和标签 +- 生成素材使用记录 +- 支持快速检索 +- 生成素材引用代码 + +**Valves 配置**: +```python +from pydantic import BaseModel, Field +from typing import List + +class Valves(BaseModel): + storage_path: str = Field( + default="./assets", + description="素材存储路径" + ) + auto_categorize: bool = Field( + default=True, + description="自动分类素材" + ) + categories: List[str] = Field( + default=["screenshots", "recordings", "icons", "diagrams"], + description="素材分类" + ) + generate_thumbnails: bool = Field( + default=True, + description="自动生成缩略图" + ) +``` + +--- + +### 10.3 OpenWebUI 内容选题矩阵 📊 + +针对 OpenWebUI 垂直领域,推荐以下内容选题分类: + +#### 内容类型矩阵 + +| 内容类型 | 频率 | 难度 | 目标受众 | 示例选题 | +|---------|------|------|---------|---------| +| **入门教程** | 周更 | ⭐ | 新手用户 | 安装部署、基础配置、界面介绍 | +| **插件教程** | 周更 | ⭐⭐ | 进阶用户 | Filter/Action/Pipe 开发 | +| **实战案例** | 双周更 | ⭐⭐⭐ | 开发者 | 具体插件开发全流程 | +| **版本解读** | 跟随更新 | ⭐⭐ | 全部用户 | 新功能介绍、升级指南 | +| **问题解决** | 按需 | ⭐⭐ | 遇到问题的用户 | 常见错误排查、优化技巧 | +| **对比评测** | 月更 | ⭐⭐⭐ | 决策者 | 与其他工具对比 | + +#### 选题日历模板 + +```markdown +## 📅 12月内容计划 + +### 第1周 +- 周一:【入门】OpenWebUI v0.4.5 新功能速览 +- 周三:【教程】Filter 插件开发入门 +- 周五:【实战】开发一个 Markdown 增强插件 + +### 第2周 +- 周一:【问答】OpenWebUI 常见问题 Top 10 +- 周三:【进阶】Pipe 插件与外部 API 集成 +- 周五:【案例】用插件实现自动摘要功能 + +### 第3周 +... +``` + +--- + +### 10.4 OpenWebUI 博主工具箱 🧰 + +作为 OpenWebUI 垂直领域博主,建议配备以下工具链: + +| 工具类型 | 推荐工具 | 用途 | +|---------|---------|------| +| **代码编辑** | VS Code + Python 插件 | 插件开发、代码演示 | +| **录屏软件** | OBS Studio | 教程录制 | +| **截图工具** | Snipaste / CleanShot | 界面截图 | +| **图表绘制** | Excalidraw / Draw.io | 流程图、架构图 | +| **视频剪辑** | 剪映 / DaVinci Resolve | 视频后期 | +| **封面设计** | Canva / Figma | 缩略图制作 | +| **文档协作** | Notion / 语雀 | 脚本撰写、素材管理 | + +--- + +### 10.5 内容变现路径 💰 + +OpenWebUI 垂直领域的变现建议: + +``` + OpenWebUI 自媒体变现路径 + │ + ┌─────────────────────┼─────────────────────┐ + │ │ │ + 📚 知识付费 🛠️ 技术服务 🤝 商业合作 + │ │ │ + ├─付费专栏 ├─插件定制开发 ├─品牌合作 + ├─视频课程 ├─部署咨询服务 ├─产品推广 + ├─1v1 答疑 ├─技术顾问 ├─社区运营 + └─会员社群 └─企业培训 └─开源贡献 +``` + +**建议变现节奏**: +1. **0-1000 粉丝**:专注内容质量,建立专业形象 +2. **1000-5000 粉丝**:开通付费专栏,建立社群 +3. **5000+ 粉丝**:承接定制开发,开设系统课程 + +--- + +### 10.6 OpenWebUI 内容创作系统提示词库 📝 + +> 专为 OpenWebUI 垂直领域自媒体设计的系统提示词,可直接在 OpenWebUI 中使用 + +#### 提示词 1:版本更新解读专家 + +```markdown +# 角色定位 +你是一位专注于 OpenWebUI 项目的技术内容创作者,擅长将技术更新转化为易懂的内容。 + +# 核心能力 +- 深入理解 OpenWebUI 架构和功能 +- 精通插件开发(Filter/Action/Pipe) +- 熟悉 AI 应用和 LLM 集成 +- 擅长技术内容创作和科普 + +# 工作流程 +当收到 OpenWebUI 版本更新信息时,请按以下步骤处理: + +1. **更新解析** + - 识别重大功能更新 + - 标记 Breaking Changes + - 提取关键技术点 + +2. **内容策划** + - 评估内容类型(入门/进阶/实战) + - 确定目标受众 + - 设计内容大纲 + +3. **生成输出** + 提供以下内容: + - 📰 更新速报(200字内) + - 🎯 核心亮点(3-5条) + - 📹 视频选题建议(含标题) + - 📝 文章大纲 + - 💡 实战案例建议 + +# 输出格式 +```markdown +## 📰 OpenWebUI v[版本号] 更新速报 + +[简短描述,突出最重要的更新] + +## 🔥 核心亮点 +1. [亮点1] - [为什么重要] +2. [亮点2] - [为什么重要] +3. [亮点3] - [为什么重要] + +## 📹 推荐选题 +### 视频1:[标题] +- 类型:教程/评测/实战 +- 难度:⭐⭐⭐ +- 预计时长:X分钟 +- 核心内容:[简述] + +## 📝 文章大纲 +[提供详细的文章结构] + +## 💡 实战案例 +[基于新功能的实际应用场景] +``` + +# 注意事项 +- 保持中文输出,术语使用中英对照 +- 避免过度技术化,照顾初学者 +- 强调实用价值和应用场景 +- 提供可操作的学习路径 +``` + +--- + +#### 提示词 2:插件教程创作助手 + +```markdown +# 角色定位 +你是 OpenWebUI 插件开发教程的专业创作者,能够将复杂的插件开发过程转化为易学的教程内容。 + +# 专业领域 +- OpenWebUI 插件系统(Filter/Action/Pipe) +- Python 异步编程 +- Pydantic 配置管理 +- LLM API 调用 +- 前端交互设计 + +# 任务说明 +当收到插件开发需求时,生成完整的教程内容,包括: + +## 输出结构 + +### 1. 教程基本信息 +```yaml +标题: [吸引人的标题] +副标题: [说明具体功能] +难度: 入门/进阶/高级 +预计时间: X分钟 +前置知识: [列出需要的基础] +``` + +### 2. 开场白(吸引注意) +- 用一个实际问题或场景开场 +- 说明这个插件能解决什么问题 +- 展示最终效果 + +### 3. 核心内容 + +#### 3.1 概念讲解 +- 插件类型选择理由 +- 工作原理图解 +- 关键概念说明 + +#### 3.2 代码实现(分步骤) +```python +# Step 1: 基础结构 +[代码 + 详细注释] + +# Step 2: 配置参数 +[代码 + 详细注释] + +# Step 3: 核心逻辑 +[代码 + 详细注释] +``` + +#### 3.3 部署测试 +- 上传步骤 +- 配置方法 +- 测试用例 +- 常见问题 + +### 4. 进阶扩展 +- 功能增强建议 +- 性能优化技巧 +- 最佳实践 + +### 5. 完整代码 +- 提供完整的可运行代码 +- 添加详细注释 +- 标注关键部分 + +## 教学原则 +1. **渐进式**:从简单到复杂 +2. **可视化**:多用图表和示例 +3. **实战导向**:每个概念都有实际应用 +4. **互动性**:鼓励读者尝试和修改 + +## 输出要求 +- 使用 Markdown 格式 +- 代码块要有语法高亮 +- 重点内容用表格或列表 +- 添加适当的 emoji 增强可读性 +``` + +--- + +#### 提示词 3:视频脚本生成器 + +```markdown +# 角色定位 +你是专业的技术类短视频脚本创作者,专注于 OpenWebUI 相关内容。 + +# 创作标准 +- 平台:抖音/B站/YouTube +- 时长:1-15分钟 +- 风格:通俗易懂、节奏紧凑 +- 目标:知识传播 + 粉丝增长 + +# 脚本模板 + +## 基本信息 +```yaml +视频标题: [标题] +副标题: [副标题] +目标时长: X分钟 +适合平台: [平台] +内容类型: 教程/评测/实战/新闻 +``` + +## 脚本结构 + +### 【开场】(0-10秒) - 黄金钩子 +**画面**: [描述] +**口播**: +> [用问题/数据/痛点开场,3秒抓住注意力] + +**字幕**: [强调关键词] + +--- + +### 【问题引入】(10-30秒) +**画面**: [描述] +**口播**: +> [说明为什么要学这个,观众能获得什么] + +--- + +### 【核心内容】(30秒-X分钟) + +#### 要点1 (时间) +**画面**: [录屏/PPT/动画] +**口播**: +> [讲解内容,通俗易懂] + +**演示**: [具体操作步骤] + +#### 要点2 (时间) +**画面**: [描述] +**口播**: +> [内容] + +--- + +### 【总结】(最后30秒) +**画面**: [总结页面] +**口播**: +> [回顾重点,强调价值] + +**CTA**: +> [引导关注/评论/转发] + +## 配套元素 + +### 封面文案 +- 主标题: [大号字] +- 副标题: [小号字] +- 元素: [emoji/图标] + +### 视频简介 +``` +[3行描述 + 时间轴 + 相关链接] +``` + +### 评论区引导 +[预设 3-5 个互动问题] + +## 创作要点 +1. **节奏控制**:信息密度适中,避免拖沓 +2. **视觉辅助**:关键信息用字幕/标注强化 +3. **情感连接**:用第二人称"你"拉近距离 +4. **价值先行**:前30秒必须展示价值 +``` + +--- + +#### 提示词 4:技术文章撰写助手 + +```markdown +# 角色定位 +你是 OpenWebUI 技术内容撰写专家,擅长将技术知识转化为高质量文章。 + +# 文章类型 +1. **入门教程** - 面向新手,详细步骤 +2. **实战案例** - 解决实际问题 +3. **技术解析** - 深入原理 +4. **最佳实践** - 总结经验 +5. **踩坑指南** - 问题排查 + +# 写作框架 + +## 标题设计 +- 主标题:[吸引人 + 包含关键词] +- 副标题:[说明价值 + 降低门槛] + +## 文章结构 + +### 1. 引言 (10%) +```markdown +## 为什么需要这个功能? + +[场景描述] +[痛点分析] +[解决方案预览] + +**本文你将学到:** +- [要点1] +- [要点2] +- [要点3] +``` + +### 2. 背景知识 (15%) +```markdown +## 基础概念 + +[必要的概念解释] +[示意图] +[与读者已知知识的关联] +``` + +### 3. 实现步骤 (50%) +```markdown +## 实现步骤 + +### Step 1: [步骤名称] +[详细说明] +```代码 +[代码示例] +``` +💡 **提示**: [注意事项] + +### Step 2: [步骤名称] +... +``` + +### 4. 测试验证 (10%) +```markdown +## 测试与验证 + +[测试用例] +[预期结果] +[实际演示] +``` + +### 5. 进阶内容 (10%) +```markdown +## 进阶优化 + +### 性能优化 +[优化建议] + +### 功能扩展 +[扩展方向] + +### 常见问题 +Q: [问题] +A: [解答] +``` + +### 6. 总结 (5%) +```markdown +## 总结 + +本文介绍了 [核心内容],主要知识点: +1. [要点1] +2. [要点2] +3. [要点3] + +**相关资源:** +- [代码仓库] +- [参考文档] +- [讨论社区] +``` + +## 写作技巧 +1. **金字塔原理**:结论先行 +2. **代码注释**:每段代码都要解释 +3. **可视化**:多用图表、表格、代码块 +4. **互动性**:设置思考题、练习题 +5. **SEO 优化**:标题、关键词、内链 + +## 质量检查 +- [ ] 标题吸引人且准确 +- [ ] 代码可运行 +- [ ] 图片清晰 +- [ ] 排版规范 +- [ ] 链接有效 +- [ ] 无错别字 +``` + +--- + +#### 提示词 5:社区互动管理助手 + +```markdown +# 角色定位 +你是 OpenWebUI 社区的互动管理专家,擅长与粉丝沟通和内容运营。 + +# 核心职责 +1. 回复评论和私信 +2. 收集用户反馈 +3. 发现内容选题 +4. 维护社区氛围 + +# 互动策略 + +## 评论回复原则 +1. **及时**:24小时内回复 +2. **专业**:准确回答技术问题 +3. **友好**:保持亲和力 +4. **引导**:转化为内容素材 + +## 回复模板 + +### 类型1:技术咨询 +``` +感谢提问![针对性解答] + +💡 这个问题很有代表性,我会考虑出一期详细教程。 + +如果解决了你的问题,麻烦点个赞让更多人看到~ +``` + +### 类型2:功能建议 +``` +很赞的想法![具体分析] + +这个功能确实有需求,我会在后续内容中涉及。 + +关注我的账号,第一时间收到更新通知! +``` + +### 类型3:问题反馈 +``` +感谢反馈![问题确认] + +我会尝试复现并找出解决方案,预计X天内发布解决教程。 + +可以加入我的学习群(简介有链接),第一时间获取答案~ +``` + +### 类型4:表扬认可 +``` +感谢支持![真诚回应] + +你的认可是我创作的动力,后续会继续输出优质内容。 + +有想看的主题欢迎留言点播! +``` + +## 选题收集 +从评论中识别高频问题和需求,转化为内容选题: + +**选题记录模板**: +```markdown +## 待开发选题 + +### [日期] 来自评论 +- 用户痛点:[描述] +- 需求频次:⭐⭐⭐ +- 内容类型:教程/问答 +- 优先级:高/中/低 +- 预计篇幅:[时长/字数] +``` + +## 数据分析 +定期分析互动数据: +- 评论质量和类型分布 +- 高频问题TOP10 +- 粉丝画像变化 +- 内容效果对比 + +## 注意事项 +- 避免争论,保持专业 +- 不回复广告和恶意评论 +- 保护用户隐私 +- 及时更新FAQ文档 +``` + +--- + +#### 提示词 6:内容规划战略家 + +```markdown +# 角色定位 +你是 OpenWebUI 内容矩阵的战略规划者,负责长期内容规划和账号成长策略。 + +# 工作内容 + +## 1. 月度内容规划 + +### 规划维度 +| 维度 | 说明 | +|------|------| +| 主题方向 | OpenWebUI 核心功能/插件开发/实战案例 | +| 内容比例 | 40%教程 + 30%实战 + 20%资讯 + 10%互动 | +| 发布频率 | 周更3次,固定时间 | +| 平台策略 | B站长视频 + 抖音短视频 + 公众号图文 | + +### 月度模板 +```markdown +## X月内容计划 + +### 主题:[月度主题] + +#### 第1周 +- 周一:[内容] - 平台:[平台] - 类型:[类型] +- 周三:[内容] - 平台:[平台] - 类型:[类型] +- 周五:[内容] - 平台:[平台] - 类型:[类型] + +#### 第2周 +... + +### 关键目标 +- 粉丝增长:+[数字] +- 互动率提升:+[百分比] +- 专栏产品化:[阶段] + +### 备选选题池 +1. [备选1] - 触发条件:[说明] +2. [备选2] - 触发条件:[说明] +``` + +## 2. 内容矩阵设计 + +### 平台定位 +``` +B站(长视频) 抖音(短视频) 公众号(图文) + ↓ ↓ ↓ +系统教程 快速技巧 深度文章 +15-30分钟 1-3分钟 2000字+ +完整流程 单一功能 原理解析 + ↓ ↓ ↓ + 互相导流,形成内容生态 +``` + +### 内容复用策略 +一个核心内容,多平台改编: +1. **B站**:完整教程(20分钟) +2. **抖音**:精华片段3条(各1分钟) +3. **公众号**:图文教程 + 代码 +4. **小红书**:图文卡片版 +5. **知乎**:深度技术解析 + +## 3. 增长策略 + +### 冷启动期(0-1000粉) +- 聚焦细分领域 +- 保证发布频率 +- 主动参与社区 +- 寻找种子用户 + +### 成长期(1000-5000粉) +- 建立个人品牌 +- 开设付费内容 +- 建立用户社群 +- 承接商业合作 + +### 成熟期(5000+粉) +- 系统化课程 +- 技术咨询服务 +- 孵化产品项目 +- 培养团队 + +## 4. 数据驱动优化 + +### 关键指标 +- 播放完成率 +- 点赞/收藏比 +- 评论互动率 +- 粉丝增长率 +- 转化率(付费) + +### 优化循环 +``` +数据采集 → 分析洞察 → 策略调整 → 内容优化 → 数据采集 +``` + +## 输出格式 +每月提供: +1. 📅 月度内容日历 +2. 📊 上月数据分析报告 +3. 💡 优化建议 +4. 🎯 下月增长目标 +``` + +--- + +## 技术实现指南 + +### 通用开发模式 + +#### 1. 数据获取层 +```python +import httpx + +class DataFetcher: + """外部数据获取的统一接口""" + + async def fetch_stock_data(self, symbol: str, period: str): + """获取股票数据""" + async with httpx.AsyncClient() as client: + response = await client.get(f"{API_URL}/stock/{symbol}") + return response.json() + + async def fetch_weather(self, location: str): + """获取天气数据""" + pass + + async def fetch_news(self, keywords: list): + """获取新闻数据""" + pass +``` + +#### 2. LLM 调用封装 +```python +from open_webui.utils.chat import generate_chat_completion +from open_webui.models.users import Users + +async def call_llm(request, user_id: str, system_prompt: str, user_prompt: str, model_id: str = None): + """统一的 LLM 调用封装""" + user_obj = Users.get_user_by_id(user_id) + + response = await generate_chat_completion( + request, + { + "model": model_id or "gpt-4", + "messages": [ + {"role": "system", "content": system_prompt}, + {"role": "user", "content": user_prompt} + ], + "stream": False + }, + user_obj + ) + + return response["choices"][0]["message"]["content"] +``` + +#### 3. 可视化输出模板 +```python +import json + +def generate_chart_html(chart_type: str, data: dict, options: dict = None) -> str: + """生成 ECharts 图表 HTML""" + + html = f""" + + + + + + + + +
+ + +""" + + return f"```html\n{html}\n```" +``` + +### API 集成最佳实践 + +#### 1. 认证管理 +```python +class APIManager: + def __init__(self, valves): + self.valves = valves + self._token_cache = {} + + async def get_auth_header(self, service: str) -> dict: + """获取认证头,支持缓存和刷新""" + if service not in self._token_cache or self._is_token_expired(service): + await self._refresh_token(service) + + return {"Authorization": f"Bearer {self._token_cache[service]}"} +``` + +#### 2. 请求重试和错误处理 +```python +import asyncio +import httpx +from tenacity import retry, stop_after_attempt, wait_exponential + +@retry(stop=stop_after_attempt(3), wait=wait_exponential(multiplier=1, min=1, max=10)) +async def fetch_with_retry(url: str, headers: dict = None): + """带重试的请求""" + async with httpx.AsyncClient(timeout=30.0) as client: + response = await client.get(url, headers=headers) + response.raise_for_status() + return response.json() +``` + +### 性能优化技巧 + +#### 1. 并发处理 +```python +import asyncio +from typing import List + +async def process_multiple_stocks(symbols: List[str]): + """并发获取多只股票数据""" + tasks = [fetch_stock_data(symbol) for symbol in symbols] + results = await asyncio.gather(*tasks, return_exceptions=True) + return results +``` + +#### 2. 缓存策略 +```python +from datetime import datetime, timedelta + +class CacheManager: + def __init__(self, ttl_seconds: int = 300): + self._cache = {} + self._ttl = ttl_seconds + + def get(self, key: str): + if key in self._cache: + value, timestamp = self._cache[key] + if datetime.now() - timestamp < timedelta(seconds=self._ttl): + return value + return None + + def set(self, key: str, value): + self._cache[key] = (value, datetime.now()) +``` + +--- + +## 开发优先级建议 + +基于实用性、技术可行性和市场需求,推荐以下开发优先级: + +### 🔴 高优先级(短期,1-2个月) + +| 插件 | 方向 | 原因 | +|------|------|------| +| 财报解读师 | A股投资 | 市场需求大,技术成熟 | +| 会议精灵 | 工作效率 | 刚需场景,用户基数大 | +| 图表大师 | 数据开发 | 通用性强,复用价值高 | +| **标题党** | **自媒体创作** | **自媒体刚需,技术门槛低** | +| **文案魔方** | **自媒体创作** | **内容创作核心工具** | + +### 🟡 中优先级(中期,2-4个月) + +| 插件 | 方向 | 原因 | +|------|------|------| +| 闪词卡 | 学习辅助 | 教育市场广阔 | +| K线解读 | A股投资 | 与财报解读师形成组合 | +| 今天吃啥 | 生活服务 | 高频使用场景 | +| 邮件专家 | 工作效率 | 通用办公场景 | +| **选题雷达** | **自媒体创作** | **提升内容策划效率** | +| **数据罗盘** | **自媒体创作** | **运营必备分析工具** | + +### 🟢 低优先级(长期,4-6个月) + +| 插件 | 方向 | 原因 | +|------|------|------| +| 旅程设计师 | 生活服务 | 需要大量外部数据整合 | +| 健康管家 | 健康管理 | 需要设备数据接入 | +| 知识织网 | 学习辅助 | 技术复杂度较高 | +| 评论达人 | 自媒体创作 | 需要平台 API 支持 | +| 封面工坊 | 自媒体创作 | 需要图像生成能力 | + +--- + +## 总结 + +本文档提供了 10 个主要方向、27+ 个具体插件建议的详细开发路线图。每个插件都包含了: + +- ✅ 清晰的功能定义 +- ✅ 技术实现框架 +- ✅ Valves 配置示例 +- ✅ 输出格式参考 +- ✅ 应用场景说明 + +### 🎯 OpenWebUI 自媒体博主专属 + +针对 OpenWebUI 垂直领域自媒体博主,本文档特别提供了: + +- ✅ 标准化内容生产流程(5步法) +- ✅ 专属插件套件(版本追踪、文档生成、教程脚本、录制助手、素材管理) +- ✅ 内容选题矩阵和日历模板 +- ✅ 博主工具箱推荐 +- ✅ 内容变现路径规划 + +### 下一步行动 + +1. **选择方向**:根据团队能力和市场需求选择 1-2 个方向 +2. **MVP 开发**:先完成核心功能,快速验证 +3. **用户反馈**:收集使用反馈,持续迭代 +4. **生态建设**:鼓励社区贡献,丰富插件库 + +--- + +*最后更新:2024-12-02* +*本文档持续更新中,欢迎贡献更多创意和建议* diff --git a/docs/zh/plugin_development_guide.md b/docs/zh/plugin_development_guide.md new file mode 100644 index 0000000..74570fd --- /dev/null +++ b/docs/zh/plugin_development_guide.md @@ -0,0 +1,234 @@ +# OpenWebUI 插件开发权威指南 + +> 本指南整合了官方文档、SDK 详解及最佳实践,旨在为开发者提供一份从入门到精通的系统化教程。 + +## 📚 目录 + +1. [插件开发快速入门](#1-插件开发快速入门) +2. [核心概念与 SDK 详解](#2-核心概念与-sdk-详解) +3. [插件类型深度解析](#3-插件类型深度解析) + * [Action (动作)](#31-action-动作) + * [Filter (过滤器)](#32-filter-过滤器) + * [Pipe (管道)](#33-pipe-管道) +4. [高级开发模式](#4-高级开发模式) +5. [最佳实践与设计原则](#5-最佳实践与设计原则) +6. [故障排查](#6-故障排查) + +--- + +## 1. 插件开发快速入门 + +### 1.1 什么是 OpenWebUI 插件? + +OpenWebUI 插件(官方称为 "Functions")是扩展平台功能的主要方式。它们运行在后端 Python 环境中,允许你: +* 🔌 **集成新模型**:通过 Pipe 接入 Claude、Gemini 或自定义 RAG。 +* 🎨 **增强交互**:通过 Action 在消息旁添加按钮(如"导出"、"生成图表")。 +* 🔧 **干预流程**:通过 Filter 在请求前后修改数据(如注入上下文、敏感词过滤)。 + +### 1.2 你的第一个插件 (Hello World) + +保存以下代码为 `hello.py` 并上传到 OpenWebUI 的 **Functions** 面板: + +```python +""" +title: Hello World Action +author: Demo +version: 1.0.0 +""" + +from pydantic import BaseModel, Field +from typing import Optional + +class Action: + class Valves(BaseModel): + greeting: str = Field(default="你好", description="问候语") + + def __init__(self): + self.valves = self.Valves() + + async def action( + self, + body: dict, + __event_emitter__=None, + __user__=None + ) -> Optional[dict]: + user_name = __user__.get("name", "朋友") if __user__ else "朋友" + + if __event_emitter__: + await __event_emitter__({ + "type": "notification", + "data": {"type": "success", "content": f"{self.valves.greeting}, {user_name}!"} + }) + return body +``` + +--- + +## 2. 核心概念与 SDK 详解 + +### 2.1 ⚠️ 重要:同步与异步 + +OpenWebUI 插件运行在 `asyncio` 事件循环中。 +* **原则**:所有 I/O 操作(数据库、文件、网络)必须非阻塞。 +* **陷阱**:直接调用同步方法(如 `time.sleep`, `requests.get`)会卡死整个服务器。 +* **解决**:使用 `await asyncio.to_thread(sync_func, ...)` 包装同步调用。 + +### 2.2 核心参数详解 + +所有插件方法(`inlet`, `outlet`, `pipe`, `action`)都支持注入以下特殊参数: + +| 参数名 | 类型 | 说明 | +| :--- | :--- | :--- | +| `body` | `dict` | **核心数据**。包含 `messages`, `model`, `stream` 等请求信息。 | +| `__user__` | `dict` | **当前用户**。包含 `id`, `name`, `role`, `valves` (用户配置) 等。 | +| `__metadata__` | `dict` | **元数据**。包含 `chat_id`, `message_id`。其中 `variables` 字段包含 `{{USER_NAME}}`, `{{CURRENT_TIME}}` 等预置变量。 | +| `__request__` | `Request` | **FastAPI 请求对象**。可访问 `app.state` 进行跨插件通信。 | +| `__event_emitter__` | `func` | **单向通知**。用于发送 Toast 通知或状态条更新。 | +| `__event_call__` | `func` | **双向交互**。用于在前端执行 JS 代码、弹出确认框或输入框。 | + +### 2.3 配置系统 (Valves) + +* **`Valves`**: 管理员全局配置。 +* **`UserValves`**: 用户级配置(优先级更高,可覆盖全局)。 + +```python +class Filter: + class Valves(BaseModel): + API_KEY: str = Field(default="", description="全局 API Key") + + class UserValves(BaseModel): + API_KEY: str = Field(default="", description="用户私有 API Key") + + def inlet(self, body, __user__): + # 优先使用用户的 Key + user_valves = __user__.get("valves", self.UserValves()) + api_key = user_valves.API_KEY or self.valves.API_KEY +``` + +--- + +## 3. 插件类型深度解析 + +### 3.1 Action (动作) + +**定位**:在消息下方添加按钮,用户点击触发。 + +**高级用法:前端执行 JavaScript (文件下载示例)** + +```python +import base64 + +async def action(self, body, __event_call__): + # 1. 后端生成内容 + content = "Hello OpenWebUI".encode() + b64 = base64.b64encode(content).decode() + + # 2. 发送 JS 到前端执行 + js = f""" + const blob = new Blob([atob('{b64}')], {{type: 'text/plain'}}); + const a = document.createElement('a'); + a.href = URL.createObjectURL(blob); + a.download = 'hello.txt'; + a.click(); + """ + await __event_call__({"type": "execute", "data": {"code": js}}) +``` + +### 3.2 Filter (过滤器) + +**定位**:中间件,拦截并修改请求/响应。 + +* **`inlet`**: 请求前。用于注入上下文、修改模型参数。 +* **`outlet`**: 响应后。用于格式化输出、保存日志。 +* **`stream`**: 流式处理中。用于实时敏感词过滤。 + +**示例:注入环境变量** + +```python +async def inlet(self, body, __metadata__): + vars = __metadata__.get("variables", {}) + context = f"当前时间: {vars.get('{{CURRENT_DATETIME}}')}" + + # 注入到 System Prompt 或第一条消息 + if body.get("messages"): + body["messages"][0]["content"] += f"\n\n{context}" + return body +``` + +### 3.3 Pipe (管道) + +**定位**:自定义模型/代理。 + +**示例:简单的 OpenAI 代理** + +```python +import requests + +class Pipe: + def pipes(self): + return [{"id": "my-gpt", "name": "My GPT Wrapper"}] + + def pipe(self, body): + # 可以在这里修改 body,例如强制添加 prompt + headers = {"Authorization": f"Bearer {self.valves.API_KEY}"} + r = requests.post("https://api.openai.com/v1/chat/completions", json=body, headers=headers, stream=True) + return r.iter_lines() +``` + +--- + +## 4. 高级开发模式 + +### 4.1 Pipe 与 Filter 协同 +利用 `__request__.app.state` 在不同插件间共享数据。 +* **Pipe**: `__request__.app.state.search_results = [...]` +* **Filter (Outlet)**: 读取 `search_results` 并将其格式化为引用链接附加到回复末尾。 + +### 4.2 异步后台任务 +不阻塞用户响应,在后台执行耗时操作(如生成总结、存库)。 + +```python +import asyncio + +async def outlet(self, body, __metadata__): + asyncio.create_task(self.background_job(__metadata__["chat_id"])) + return body + +async def background_job(self, chat_id): + # 执行耗时操作... + pass +``` + +--- + +## 5. 最佳实践与设计原则 + +### 5.1 命名与定位 +* **简短有力**:如 "闪记卡", "精读"。避免 "文本分析助手" 这种泛词。 +* **功能互补**:不要重复造轮子,明确你的插件解决了什么特定问题。 + +### 5.2 用户体验 (UX) +* **反馈及时**:耗时操作前先发送 `notification` ("正在生成...")。 +* **视觉美观**:Action 输出 HTML 时,使用现代化的 CSS(圆角、阴影、渐变)。 +* **智能引导**:检测到文本过短时,提示用户"建议输入更多内容以获得更好结果"。 + +### 5.3 错误处理 +永远不要让插件静默失败。捕获异常并通过 `__event_emitter__` 告知用户。 + +```python +try: + # 业务逻辑 +except Exception as e: + await __event_emitter__({ + "type": "notification", + "data": {"type": "error", "content": f"处理失败: {str(e)}"} + }) +``` + +--- + +## 6. 故障排查 + +* **HTML 不显示?** 确保包裹在 ` ```html ... ``` ` 代码块中。 +* **数据库报错?** 检查是否在 `async` 函数中直接调用了同步的 DB 方法,请使用 `asyncio.to_thread`。 +* **参数未生效?** 检查 `Valves` 定义是否正确,以及是否被 `UserValves` 覆盖。 diff --git a/docs/zh/从问一个AI到运营一支AI团队.md b/docs/zh/从问一个AI到运营一支AI团队.md new file mode 100644 index 0000000..98931f6 --- /dev/null +++ b/docs/zh/从问一个AI到运营一支AI团队.md @@ -0,0 +1,2236 @@ +# 从"问一个AI"到"运营一支AI团队" + +## 解读OpenWebUI的协同野心与平台价值 + +从与一个AI对话,到指挥一支多模型协作的AI团队——这不仅仅是工具的升级,更是工作方式的革命。 + +OpenWebUI通过**协同、扩展、定制、生态**四大维度,将AI从辅助工具升级为智囊团和工作平台。 + +--- + +## 第一部分:构建AI团队的基础——多模型协同对话系统 + +### 思想的交响乐:体验多模型并行的力量 + +#### 告别选择困难:让多个 AI 同时为您服务 + +```mermaid +graph TB + subgraph "OpenWebUI 四大核心功能" + A["🔶 多模型独立并行
同一问题同时发送至多个模型
各模型维护独立上下文
同步生成独立回答"] + + B["🔷 @提及特定模型
随时指定任一模型单独回答
被@模型的回答进入共享上下文
后续并行模型可参考此内容"] + + C["🔹 智能合并总结
分析多个回答的核心观点
提炼共识、差异、独特洞察
生成综合分析报告"] + + D["🔸 内容选中与深度追问
选中任意AI回复的内容
浮动窗格展示精准对话
支持选择性的上下文注入"] + end + + subgraph "功能特性" + E["独立性
完全隔离的思考空间"] + F["协同性
通过上下文共享实现协作"] + G["智能性
自动化的内容分析与整合"] + H["精准性
微观层面的内容优化"] + end + + A --> E + B --> F + C --> G + D --> H + + style A fill:#4A90E2,stroke:#2E5C8A,color:#fff + style B fill:#50E3C2,stroke:#2EA896,color:#fff + style C fill:#F5A623,stroke:#C27D0E,color:#fff + style D fill:#E85D75,stroke:#A23E52,color:#fff + style E fill:#7ED321,stroke:#5BA30A,color:#fff + style F fill:#7ED321,stroke:#5BA30A,color:#fff + style G fill:#7ED321,stroke:#5BA30A,color:#fff + style H fill:#7ED321,stroke:#5BA30A,color:#fff +``` + +--- + +#### 独立思考,同步输出:并行工作流揭秘 + +```mermaid +graph TD + subgraph "多模型独立并行工作流" + A["👤 用户提出统一问题"] + + B["📤 问题同时分发至所选模型"] + + C["模型A
独立处理"] + D["模型B
独立处理"] + E["模型C
独立处理"] + + F["完全隔离的上下文A"] + G["完全隔离的上下文B"] + H["完全隔离的上下文C"] + + I["模型A 独立回答"] + J["模型B 独立回答"] + K["模型C 独立回答"] + + L["📥 同步展示于统一界面"] + end + + A --> B + B --> C + B --> D + B --> E + + C --> F --> I + D --> G --> J + E --> H --> K + + I --> L + J --> L + K --> L + + style A fill:#4A90E2,stroke:#2E5C8A,color:#fff + style B fill:#4A90E2,stroke:#2E5C8A,color:#fff + style C fill:#7ED321,stroke:#5BA30A,color:#fff + style D fill:#7ED321,stroke:#5BA30A,color:#fff + style E fill:#7ED321,stroke:#5BA30A,color:#fff + style F fill:#50E3C2,stroke:#2EA896,color:#fff + style G fill:#50E3C2,stroke:#2EA896,color:#fff + style H fill:#50E3C2,stroke:#2EA896,color:#fff + style I fill:#F5A623,stroke:#C27D0E,color:#fff + style J fill:#F5A623,stroke:#C27D0E,color:#fff + style K fill:#F5A623,stroke:#C27D0E,color:#fff + style L fill:#B8E986,stroke:#7BA30A,color:#000 +``` + +--- + +### 精准指挥,深度协作:像管理团队一样与 AI 对话 + +#### “@”一下,指定专家:随时调用特定模型 + +```mermaid +graph TD + subgraph "@提及特定模型的工作流" + A["当前状态
多个模型回答已展示"] + + B["👤 用户行为
@指定某一模型"] + + C["新问题/指令发送至被@模型"] + + D["被@模型处理
基于独立上下文"] + + E["上下文注入
被@模型的新回答
进入共享对话历史"] + + F["共享上下文更新"] + + G["后续操作选择"] + G1["继续并行模式
发起新一轮多模型并行"] + G2["继续@功能
@其他模型针对新问题回答"] + G3["合并总结
分析所有回答"] + end + + A --> B --> C --> D --> E --> F + F --> G + G --> G1 + G --> G2 + G --> G3 + + style A fill:#4A90E2,stroke:#2E5C8A,color:#fff + style B fill:#E85D75,stroke:#A23E52,color:#fff + style C fill:#4A90E2,stroke:#2E5C8A,color:#fff + style D fill:#7ED321,stroke:#5BA30A,color:#fff + style E fill:#50E3C2,stroke:#2EA896,color:#fff + style F fill:#50E3C2,stroke:#2EA896,color:#fff + style G fill:#F5A623,stroke:#C27D0E,color:#fff + style G1 fill:#4A90E2,stroke:#2E5C8A,color:#fff + style G2 fill:#E85D75,stroke:#A23E52,color:#fff + style G3 fill:#F5A623,stroke:#C27D0E,color:#fff +``` + +#### 知识的传递:通过上下文注入实现 AI 间协作 + +```mermaid +graph TD + subgraph "阶段一:多模型独立探索" + A["发送统一问题"] --> B["模型A、B、C各自回答"] --> C["各维护独立上下文"] + end + + subgraph "阶段二:指定模型深度挖掘" + D["@模型A
提出深化问题"] --> E["模型A基于自身上下文
进行深度思考"] --> F["模型A新回答
进入共享对话历史"] + end + + subgraph "阶段三:新一轮并行处理" + G["发起新的多模型并行提问"] --> H["所有模型可参考
模型A的深度回答"] --> I["所有模型基于更新的
共享上下文生成新回答"] + end + + subgraph "阶段四:可选的继续@" + J["@模型B
针对新话题回答"] --> K["模型B回答进入共享上下文"] + end + + subgraph "知识演进" + L["共享上下文不断丰富"] --> M["多模型知识逐步对齐"] --> N["AI团队整体认知提升"] + end + + C --> D + F --> G + I --> J + + C --> L + F --> L + K --> L + + style A fill:#4A90E2,stroke:#2E5C8A,color:#fff + style B fill:#7ED321,stroke:#5BA30A,color:#fff + style C fill:#50E3C2,stroke:#2EA896,color:#fff + style D fill:#E85D75,stroke:#A23E52,color:#fff + style E fill:#7ED321,stroke:#5BA30A,color:#fff + style F fill:#50E3C2,stroke:#2EA896,color:#fff + style G fill:#4A90E2,stroke:#2E5C8A,color:#fff + style H fill:#50E3C2,stroke:#2EA896,color:#fff + style I fill:#7ED321,stroke:#5BA30A,color:#fff + style J fill:#E85D75,stroke:#A23E52,color:#fff + style K fill:#50E3C2,stroke:#2EA896,color:#fff + style L fill:#F5A623,stroke:#C27D0E,color:#fff + style M fill:#F5A623,stroke:#C27D0E,color:#fff + style N fill:#F5A623,stroke:#C27D0E,color:#fff +``` + +--- + +### 去粗取精,洞见未来:一键生成多维智能分析 + +#### 化繁为简:智能合并总结的工作流程 + +```mermaid +graph TD + subgraph "智能合并总结工作流" + + subgraph "输入层" + A["模型A 的回答"] + B["模型B 的回答"] + C["模型C 的回答"] + end + + subgraph "分析层" + D["内容解析
提取核心观点、论据、立场"] + E["共识识别
所有模型一致性内容"] + F["差异分析
模型间的不同视角"] + G["洞察提取
各模型的创新思想"] + end + + subgraph "合成层" + H["结构化组织信息"] + I["生成综合分析"] + J["融合最优观点"] + end + + subgraph "输出层" + K["合并总结报告
包含共识、差异、洞察、建议"] + end + end + + A --> D + B --> D + C --> D + + D --> E + D --> F + D --> G + + E --> H + F --> H + G --> H + + H --> I --> J --> K + + style A fill:#7ED321,stroke:#5BA30A,color:#fff + style B fill:#7ED321,stroke:#5BA30A,color:#fff + style C fill:#7ED321,stroke:#5BA30A,color:#fff + style D fill:#4A90E2,stroke:#2E5C8A,color:#fff + style E fill:#F5A623,stroke:#C27D0E,color:#fff + style F fill:#F5A623,stroke:#C27D0E,color:#fff + style G fill:#F5A623,stroke:#C27D0E,color:#fff + style H fill:#50E3C2,stroke:#2EA896,color:#fff + style I fill:#50E3C2,stroke:#2EA896,color:#fff + style J fill:#50E3C2,stroke:#2EA896,color:#fff + style K fill:#B8E986,stroke:#7BA30A,color:#000 +``` + +--- + +### 微观雕琢,极致优化:对 AI 的每一句话进行精准追问 + +#### 选中即追问:浮动窗格带来的“对话中的对话” + +```mermaid +graph TD + subgraph "内容选中与浮动窗格工作流" + + subgraph "触发阶段" + A["多个模型的回答已展示"] + B["👤 用户选中某段内容
该内容来自模型A的回答"] + end + + subgraph "浮动窗格出现" + C["浮动窗格弹出
展示选中的内容"] + D["窗格包含两部分上下文"] + D1["完整模型对话上下文
模型A的所有历史消息"] + D2["选中的具体内容片段"] + end + + subgraph "用户操作" + E["用户在窗格中输入问题
自定义提问内容"] + F["提问示例
- 解释这个概念的含义
- 优化这段表达
- 举例说明
- 详细展开
等等"] + end + + subgraph "模型处理" + G["问题发送至选中的模型A"] + H["模型A基于完整上下文
+选中的具体内容
进行精准回答"] + end + + subgraph "结果展示" + I["回答方式选择"] + I1["仅在浮动窗格中展示
不进入主对话历史"] + I2["选择性注入主上下文
成为对话历史的一部分
其他模型可见"] + end + end + + A --> B --> C --> D + D --> D1 + D --> D2 + C --> E --> F + E --> G --> H --> I + I --> I1 + I --> I2 + + style A fill:#4A90E2,stroke:#2E5C8A,color:#fff + style B fill:#E85D75,stroke:#A23E52,color:#fff + style C fill:#50E3C2,stroke:#2EA896,color:#fff + style D fill:#F5A623,stroke:#C27D0E,color:#fff + style D1 fill:#7ED321,stroke:#5BA30A,color:#fff + style D2 fill:#7ED321,stroke:#5BA30A,color:#fff + style E fill:#E85D75,stroke:#A23E52,color:#fff + style F fill:#4A90E2,stroke:#2E5C8A,color:#fff + style G fill:#7ED321,stroke:#5BA30A,color:#fff + style H fill:#7ED321,stroke:#5BA30A,color:#fff + style I fill:#50E3C2,stroke:#2EA896,color:#fff + style I1 fill:#B8E986,stroke:#7BA30A,color:#000 + style I2 fill:#B8E986,stroke:#7BA30A,color:#000 +``` + +#### 临时讨论或永久记录:灵活的上下文注入策略 + +```mermaid +graph TD + subgraph "浮动窗格中的结果处理" + + subgraph "模型A 在浮动窗格中生成回答" + A["基于完整上下文"] + B["+选中的内容"] + C["+用户的提问"] + D["生成精准回答"] + end + + subgraph "用户的决策" + E["查看浮动窗格中的回答"] + F{是否满意?} + end + + subgraph "路径一:不进入主历史" + G["浮动窗格中查看"] + H["保留为临时对话"] + I["主对话历史保持不变"] + J["其他模型无法看到"] + end + + subgraph "路径二:选择性注入" + K["点击'注入上下文'"] + L["回答进入共享对话历史"] + M["成为所有模型的新上下文"] + N["后续并行提问时
所有模型都能参考"] + end + + subgraph "后续操作" + O["继续在浮动窗格中提问
or"] + P["返回主界面
进行新的并行提问
or"] + Q["继续@其他模型"] + end + end + + A --> D + B --> D + C --> D + + D --> E --> F + F -->|暂不注入| G --> H --> I + I --> J + + F -->|要注入| K --> L --> M --> N + + F --> O + F --> P + F --> Q + + style A fill:#7ED321,stroke:#5BA30A,color:#fff + style B fill:#F5A623,stroke:#C27D0E,color:#fff + style C fill:#E85D75,stroke:#A23E52,color:#fff + style D fill:#4A90E2,stroke:#2E5C8A,color:#fff + style E fill:#50E3C2,stroke:#2EA896,color:#fff + style F fill:#E85D75,stroke:#A23E52,color:#fff + style G fill:#7ED321,stroke:#5BA30A,color:#fff + style H fill:#7ED321,stroke:#5BA30A,color:#fff + style I fill:#7ED321,stroke:#5BA30A,color:#fff + style J fill:#B8E986,stroke:#7BA30A,color:#000 + style K fill:#F5A623,stroke:#C27D0E,color:#fff + style L fill:#50E3C2,stroke:#2EA896,color:#fff + style M fill:#50E3C2,stroke:#2EA896,color:#fff + style N fill:#50E3C2,stroke:#2EA896,color:#fff + style O fill:#4A90E2,stroke:#2E5C8A,color:#fff + style P fill:#4A90E2,stroke:#2E5C8A,color:#fff + style Q fill:#E85D75,stroke:#A23E52,color:#fff +``` + +#### 从概念解释到内容批判:深度追问的无限可能 + +```mermaid +graph TB + subgraph "内容选中功能的典型应用" + + A["选中内容"] + + A1["概念解释
选中:复杂概念
问题:这是什么意思
结果:详细解释说明"] + + A2["表达优化
选中:某句话
问题:如何更清晰地表达
结果:多个表达方案"] + + A3["细节展开
选中:简洁的观点
问题:详细展开这个观点
结果:深入分析"] + + A4["举例补充
选中:抽象概念
问题:举具体例子
结果:生动的实例"] + + A5["逻辑校验
选中:论证过程
问题:这个逻辑是否严谨
结果:逻辑分析和改进"] + + A6["内容批评
选中:观点
问题:这个观点有什么问题
结果:批判性分析"] + end + + subgraph "关键优势" + B["精准定位
只针对选中的内容"] + C["完整上下文
理解该内容的生成背景"] + D["模型一致性
确保深度追问来自同一模型"] + E["灵活性
支持任意自定义提问"] + end + + A --> A1 + A --> A2 + A --> A3 + A --> A4 + A --> A5 + A --> A6 + + A1 --> B + A2 --> C + A3 --> D + A4 --> E + + style A fill:#E85D75,stroke:#A23E52,color:#fff + style A1 fill:#4A90E2,stroke:#2E5C8A,color:#fff + style A2 fill:#4A90E2,stroke:#2E5C8A,color:#fff + style A3 fill:#4A90E2,stroke:#2E5C8A,color:#fff + style A4 fill:#4A90E2,stroke:#2E5C8A,color:#fff + style A5 fill:#4A90E2,stroke:#2E5C8A,color:#fff + style A6 fill:#4A90E2,stroke:#2E5C8A,color:#fff + style B fill:#7ED321,stroke:#5BA30A,color:#fff + style C fill:#7ED321,stroke:#5BA30A,color:#fff + style D fill:#7ED321,stroke:#5BA30A,color:#fff + style E fill:#7ED321,stroke:#5BA30A,color:#fff +``` + +--- + +### 从创意到方案:掌握 OpenWebUI 高效工作流 + +#### 标准化力量:一个完整的工作流框架 + +```mermaid +graph TD + + A ~~~ J + + %% Column 1 + subgraph "阶段一:启动" + A["定义问题
选择参与模型"] --> B["多模型并行
获得多元视角"] + end + + subgraph "阶段二:评估" + C["查看所有回答"] --> D["使用合并总结
获得全景分析"] --> E["识别核心共识
与关键差异"] + end + + subgraph "阶段三:内容微调" + F["选中某段重要内容"] --> G["浮动窗格打开"] --> H["针对该内容提出追问"] --> I["获得精准的微观回答"] + end + + %% Column 2 + subgraph "阶段四:聚焦" + J["确定优先方向"] --> K["@指定模型
进行宏观深度挖掘"] + end + + subgraph "阶段五:迭代" + L["被@模型回答
进入共享上下文"] --> M["发起新一轮
多模型并行"] --> N["基于更新的共享上下文
生成新回答"] + end + + subgraph "阶段六:决策" + O["可选:再次合并总结"] --> P["做出决策或
确定方向"] + end + + subgraph "阶段七:产出" + Q["根据需求继续迭代"] --> R["导出方案、
保存记录"] + end + + %% Connections + B --> C + E --> F + K --> L + N --> O + P --> Q + E --> J + I -->|注入后| J + I -->|不注入| E + + style A fill:#4A90E2,stroke:#2E5C8A,color:#fff + style B fill:#7ED321,stroke:#5BA30A,color:#fff + style C fill:#F5A623,stroke:#C27D0E,color:#fff + style D fill:#50E3C2,stroke:#2EA896,color:#fff + style E fill:#F5A623,stroke:#C27D0E,color:#fff + style F fill:#E85D75,stroke:#A23E52,color:#fff + style G fill:#50E3C2,stroke:#2EA896,color:#fff + style H fill:#E85D75,stroke:#A23E52,color:#fff + style I fill:#4A90E2,stroke:#2E5C8A,color:#fff + style J fill:#E85D75,stroke:#A23E52,color:#fff + style K fill:#E85D75,stroke:#A23E52,color:#fff + style L fill:#7ED321,stroke:#5BA30A,color:#fff + style M fill:#4A90E2,stroke:#2E5C8A,color:#fff + style N fill:#7ED321,stroke:#5BA30A,color:#fff + style O fill:#50E3C2,stroke:#2EA896,color:#fff + style P fill:#E85D75,stroke:#A23E52,color:#fff + style Q fill:#F5A623,stroke:#C27D0E,color:#fff + style R fill:#B8E986,stroke:#7BA30A,color:#000 +``` + +#### 灵活应变:根据任务复杂度动态调整 + +```mermaid +graph TD + subgraph "简单任务路径" + direction LR + A["简单问题"] --> B["多模型并行"] --> C["查看回答"] --> D["快速决策"] + end + + subgraph "需要微调优化的路径" + direction LR + E["初步回答需要优化"] --> F["选中特定内容"] --> G["浮动窗格精准优化
如:表达改进
细节补充
概念解释"] --> H["选择性注入或保留"] --> I["继续主流程"] + end + + subgraph "复杂任务路径" + J["复杂问题"] --> K["多模型并行"] --> L["合并总结评估"] --> M{"需要深入某方向"} + M --> N["@指定模型深化"] + M --> O["选中内容精准追问"] + N --> P["多轮迭代"] + O --> P + P -->|继续| M + P -->|完成| Q["综合决策"] + end + + style A fill:#4A90E2,stroke:#2E5C8A,color:#fff + style E fill:#4A90E2,stroke:#2E5C8A,color:#fff + style J fill:#4A90E2,stroke:#2E5C8A,color:#fff + style D fill:#B8E986,stroke:#7BA30A,color:#000 + style Q fill:#B8E986,stroke:#7BA30A,color:#000 + style F fill:#E85D75,stroke:#A23E52,color:#fff + style G fill:#50E3C2,stroke:#2EA896,color:#fff + style H fill:#F5A623,stroke:#C27D0E,color:#fff + style M fill:#E85D75,stroke:#A23E52,color:#fff + style N fill:#E85D75,stroke:#A23E52,color:#fff + style O fill:#E85D75,stroke:#A23E52,color:#fff +``` + +--- + +#### 功能协同,效果倍增:四大核心如何无缝配合 + +```mermaid +graph TB + subgraph "四大功能的协同体系" + + A["🔶 多模型独立并行
发散探索
获得多元视角"] + + B["🔹 智能合并总结
分析聚焦
理解关键信息"] + + C["🔷 @提及机制
宏观深化
针对性优化"] + + D["🔸 内容选中追问
微观精调
精准优化"] + end + + subgraph "协同流程" + E["启动并行探索"] + F["汇总分析结果"] + G["微观调整"] + H["宏观深化"] + I["新回答进入共享上下文"] + J["发起新一轮并行"] + K["可选:继续循环"] + end + + A --> E + B --> F + C --> H + D --> G + + E --> F --> G + G -->|选择性注入| H + H --> I + + F -->|直接深化| H + + I --> J --> K + K -->|循环| E + + style A fill:#4A90E2,stroke:#2E5C8A,color:#fff + style B fill:#50E3C2,stroke:#2EA896,color:#fff + style C fill:#E85D75,stroke:#A23E52,color:#fff + style D fill:#F5A623,stroke:#C27D0E,color:#fff + style E fill:#7ED321,stroke:#5BA30A,color:#fff + style F fill:#7ED321,stroke:#5BA30A,color:#fff + style G fill:#7ED321,stroke:#5BA30A,color:#fff + style H fill:#7ED321,stroke:#5BA30A,color:#fff + style I fill:#50E3C2,stroke:#2EA896,color:#fff + style J fill:#4A90E2,stroke:#2E5C8A,color:#fff + style K fill:#4A90E2,stroke:#2E5C8A,color:#fff +``` + +--- + +### 总结:OpenWebUI——您的私人 AI 智囊团 + +OpenWebUI 通过**多模型独立并行**、**@提及机制**、**智能合并总结**和**内容选中追问**四大功能的有机结合,构建了一个多维度、多层次的AI对话平台。 + +- **多模型并行**为用户提供了多元化的视角和创意 +- **@提及机制**通过动态的上下文注入,实现了AI团队的宏观深度协作 +- **智能合并总结**让用户快速掌握关键信息并做出决策 +- **内容选中追问**通过浮动窗格实现了精准的微观层面优化 + +这四大功能的循环使用,既保持了广度的多元探索,又实现了深度的精准优化,能够帮助用户在宏观战略和微观细节之间实现完美平衡,最终获得融合多方优势、精致高效的精品方案。 + +## 第二部分:超越聊天的智能工作台——组织、知识与自动化 + +### 一、文件夹即项目:将对话空间转化为专业工作室 + +#### 从混乱到秩序:文件夹的三重身份 + +```mermaid +graph LR + subgraph "文件夹的三重身份" + direction TB + A["📁 分类容器
organize
━━━━━
按项目类型
条理化管理"] + + B["⚙️ 项目配置器
automate
━━━━━
系统提示词
知识库绑定"] + + C["🎯 上下文作用域
contextualize
━━━━━
一致的对话
风格与规范"] + end + + style A fill:#4A90E2,stroke:#2E5C8A,color:#fff,width:200px + style B fill:#50E3C2,stroke:#2EA896,color:#fff,width:200px + style C fill:#E85D75,stroke:#A23E52,color:#fff,width:200px +``` + +#### 工作流:如何在文件夹中建立专业工作环境 + +```mermaid +graph TB + subgraph step1 ["第一步:创建项目文件夹"] + A["新建文件夹
例如:'产品需求分析'"] + end + + subgraph step2 ["第二步:定义系统提示词"] + B1["设置角色身份"] + B2["定义输出格式"] + B3["明确交互风格"] + end + + subgraph step3 ["第三步:绑定知识库"] + C1["关联知识库A
竞品分析"] + C2["关联知识库B
用户研究"] + C3["关联知识库C
市场数据"] + end + + subgraph step4 ["第四步:开始工作 ✨"] + D["在该文件夹内创建对话"] + end + + subgraph step5 ["第五步:自动应用"] + E1["✅ 系统提示词激活"] + E2["✅ 知识库自动可用"] + E3["✅ 风格遵循设定"] + end + + subgraph step6 ["第六步:灵活管理"] + F1["拖拽移动对话
到其他文件夹"] + F2["自动继承
新文件夹配置"] + F3["随时调整
文件夹设置"] + end + + step1 --> step2 + step2 --> step3 + step3 --> step4 + step4 --> step5 + step5 --> step6 + + B1 -.-> B2 -.-> B3 + C1 -.-> C2 -.-> C3 + E1 -.-> E2 -.-> E3 + F1 -.-> F2 -.-> F3 + + style step1 fill:#4A90E2,stroke:#2E5C8A,color:#fff + style step2 fill:#50E3C2,stroke:#2EA896,color:#fff + style step3 fill:#E85D75,stroke:#A23E52,color:#fff + style step4 fill:#F5A623,stroke:#C27D0E,color:#fff + style step5 fill:#B8E986,stroke:#7BA30A,color:#000 + style step6 fill:#7ED321,stroke:#5BA30A,color:#fff +``` + +#### 真实应用示例:社交媒体内容创作工作室 + +```mermaid +graph LR + subgraph folder ["📁 Social Media Content 文件夹"] + A["系统提示词配置"] + B["知识库关联"] + end + + subgraph config ["配置内容"] + A1["你是社交媒体
内容策略专家
━━━
• 风格:幽默有趣
• 框架:Hook→Value→CTA
• 受众:Z世代"] + B1["Brand Guidelines
━━━
竞品内容分析
━━━
用户评论反馈
━━━
月度热点日历"] + end + + subgraph chats ["对话示例"] + C1["TikTok
脚本创意"] + C2["Instagram
文案优化"] + C3["小红书
笔记框架"] + end + + subgraph benefit ["自动应用的好处"] + D1["✅ 一致品牌
声音"] + D2["✅ 自动参考
品牌指南"] + D3["✅ 遵循内容
框架"] + D4["✅ 查阅竞品
动向"] + end + + folder --> config + config --> chats + chats --> benefit + + A -.-> A1 + B -.-> B1 + + C1 --> D1 + C1 --> D2 + C2 --> D3 + C3 --> D4 + + style folder fill:#4A90E2,stroke:#2E5C8A,color:#fff + style A fill:#50E3C2,stroke:#2EA896,color:#fff + style B fill:#E85D75,stroke:#A23E52,color:#fff + style A1 fill:#7ED321,stroke:#5BA30A,color:#fff + style B1 fill:#7ED321,stroke:#5BA30A,color:#fff + style C1 fill:#50E3C2,stroke:#2EA896,color:#fff + style C2 fill:#50E3C2,stroke:#2EA896,color:#fff + style C3 fill:#50E3C2,stroke:#2EA896,color:#fff + style D1 fill:#B8E986,stroke:#7BA30A,color:#000 + style D2 fill:#B8E986,stroke:#7BA30A,color:#000 + style D3 fill:#B8E986,stroke:#7BA30A,color:#000 + style D4 fill:#B8E986,stroke:#7BA30A,color:#000 +``` + +#### 文件夹管理的超级能力 + +```mermaid +graph LR + subgraph drag ["拖拽操作"] + A["初始对话
在默认位置"] + B["发现是
营销内容"] + C["拖拽到
Social Media
文件夹"] + end + + subgraph auto ["自动应用"] + D["✨ 系统提示词
自动激活"] + E["✨ 知识库
自动可用"] + F["✨ 风格规范
立即生效"] + end + + subgraph nested ["嵌套与层级"] + G["2024年项目
├─ 产品线A
│ ├─ 需求分析
│ ├─ 设计方案
│ └─ 开发文档
└─ 产品线B"] + end + + drag --> auto + drag --> nested + + A --> B --> C + + style drag fill:#4A90E2,stroke:#2E5C8A,color:#fff + style auto fill:#B8E986,stroke:#7BA30A,color:#000 + style nested fill:#7ED321,stroke:#5BA30A,color:#fff + style A fill:#50E3C2,stroke:#2EA896,color:#fff + style B fill:#E85D75,stroke:#A23E52,color:#fff + style C fill:#F5A623,stroke:#C27D0E,color:#fff + style D fill:#B8E986,stroke:#7BA30A,color:#000 + style E fill:#B8E986,stroke:#7BA30A,color:#000 + style F fill:#B8E986,stroke:#7BA30A,color:#000 +``` + +--- + +### 二、一切皆知识库:构建你的专业智库系统 + +#### 知识库的多源生态 + +```mermaid +graph LR + A["📝 OpenWebUI
笔记
━━━
在聊天中记录
重要洞察与总结"] + + B["📚 OpenWebUI
知识库
━━━
上传各类文件
PDF/Word/MD
代码/图片"] + + C["🌐 URL 链接
━━━
直接引用网页
博客/新闻
文档/API"] + + D["💬 对话记录
━━━
将聊天转化为
知识源
专家讨论"] + + E["📄 上传文件
━━━
批量导入
内部文档
论文/数据表"] + + F["🧠 统一知识库
━━━
多源融合
智能检索
上下文注入"] + + A --> F + B --> F + C --> F + D --> F + E --> F + + style A fill:#4A90E2,stroke:#2E5C8A,color:#fff + style B fill:#50E3C2,stroke:#2EA896,color:#fff + style C fill:#E85D75,stroke:#A23E52,color:#fff + style D fill:#F5A623,stroke:#C27D0E,color:#fff + style E fill:#7ED321,stroke:#5BA30A,color:#fff + style F fill:#B8E986,stroke:#7BA30A,color:#000 +``` + +#### 知识库的使用工作流 + +```mermaid +graph TB + subgraph input ["阶段1:知识输入"] + A1["撰写笔记"] + A2["上传文件"] + A3["粘贴URL"] + A4["导入对话"] + end + + subgraph org ["阶段2:关联与组织"] + B["在文件夹中
关联知识库"] + C1["知识库A
竞品分析"] + C2["知识库B
用户研究"] + C3["知识库C
市场报告"] + end + + subgraph use ["阶段3:对话中应用"] + D["用户提问"] + E["AI 自动检索
相关知识"] + F["知识注入上下文
精准回答"] + G["引用来源
明确追溯"] + end + + subgraph evolve ["阶段4:知识演进"] + H["对话产生
新洞察"] + I["保存为笔记"] + J["添加到知识库"] + K["螺旋式
上升"] + end + + A1 --> B + A2 --> B + A3 --> B + A4 --> B + + B --> C1 + B --> C2 + B --> C3 + + C1 --> D + C2 --> D + C3 --> D + + D --> E --> F --> G + + G --> H --> I --> J --> K + + style input fill:#4A90E2,stroke:#2E5C8A,color:#fff + style org fill:#50E3C2,stroke:#2EA896,color:#fff + style use fill:#F5A623,stroke:#C27D0E,color:#fff + style evolve fill:#E85D75,stroke:#A23E52,color:#fff + style A1 fill:#7ED321,stroke:#5BA30A,color:#fff + style A2 fill:#7ED321,stroke:#5BA30A,color:#fff + style A3 fill:#7ED321,stroke:#5BA30A,color:#fff + style A4 fill:#7ED321,stroke:#5BA30A,color:#fff + style B fill:#7ED321,stroke:#5BA30A,color:#fff + style F fill:#B8E986,stroke:#7BA30A,color:#000 + style K fill:#B8E986,stroke:#7BA30A,color:#000 +``` + +#### 应用示例对比 + +```mermaid +graph LR + subgraph legal ["法律团队的案例库"] + A["📁 Legal KB"] + A1["案例库
判例+判决"] + A2["法律文献
法律条款"] + A3["内部经验
案件记录"] + B["律师提问:
合同风险?"] + C["🔍 自动检索
相关案例"] + D["✅ 有据可查
的分析"] + end + + subgraph research ["研究人员的论文库"] + E["📁 Research KB"] + E1["已发表论文
50篇核心论文"] + E2["数据集
实验数据"] + E3["研究笔记
理解总结"] + F["研究员提问:
有论证支持吗?"] + G["📚 文献综述
自动完成"] + H["✅ 快速定位
研究空白"] + end + + A --> A1 + A --> A2 + A --> A3 + A1 --> C + A2 --> C + A3 --> C + B --> C + C --> D + + E --> E1 + E --> E2 + E --> E3 + E1 --> G + E2 --> G + E3 --> G + F --> G + G --> H + + style legal fill:#4A90E2,stroke:#2E5C8A,color:#fff + style research fill:#50E3C2,stroke:#2EA896,color:#fff + style A fill:#4A90E2,stroke:#2E5C8A,color:#fff + style E fill:#50E3C2,stroke:#2EA896,color:#fff + style C fill:#B8E986,stroke:#7BA30A,color:#000 + style G fill:#B8E986,stroke:#7BA30A,color:#000 + style D fill:#B8E986,stroke:#7BA30A,color:#000 + style H fill:#B8E986,stroke:#7BA30A,color:#000 +``` + +--- + +### 三、用户提示词:将即时需求转化为交互式表单 + +#### 什么是用户提示词? + +```mermaid +graph LR + subgraph old ["❌ 传统方式"] + A["每次手动输入
完整的问题"] + B["容易遗漏参数
效率低下"] + end + + subgraph new ["✅ 用户提示词方式"] + C["创建一次模板
包含变量占位符
以 / 开头触发"] + D["输入 / 后
自动弹出表单"] + E["选择填空
自动生成完整问题"] + end + + A --> B + C --> D --> E + + style old fill:#E85D75,stroke:#A23E52,color:#fff + style new fill:#B8E986,stroke:#7BA30A,color:#000 + style A fill:#E85D75,stroke:#A23E52,color:#fff + style B fill:#E85D75,stroke:#A23E52,color:#fff + style C fill:#4A90E2,stroke:#2E5C8A,color:#fff + style D fill:#50E3C2,stroke:#2EA896,color:#fff + style E fill:#B8E986,stroke:#7BA30A,color:#000 +``` + +#### 用户提示词的工作流 + +```mermaid +graph LR + subgraph create ["创建阶段"] + A["打开编辑器"] + B["设计模板
with 变量"] + C["定义变量类型
text/select/number"] + D["配置表单"] + end + + subgraph trigger ["触发阶段"] + E["输入 /"] + F["选择提示词"] + G["表单弹出"] + end + + subgraph fill ["填表生成"] + H["用户填空
或选择"] + I["自动生成
完整提问"] + J["发送给 AI"] + end + + subgraph result ["获得结果"] + K["格式一致
的回答"] + L["可复用
的输出"] + end + + A --> B --> C --> D + E --> F --> G --> H --> I --> J --> K --> L + + style create fill:#4A90E2,stroke:#2E5C8A,color:#fff + style trigger fill:#F5A623,stroke:#C27D0E,color:#fff + style fill fill:#50E3C2,stroke:#2EA896,color:#fff + style result fill:#B8E986,stroke:#7BA30A,color:#000 +``` + +#### 三个实用模板示例 + +```mermaid +graph TB + subgraph t1 ["📋 Template 1: Content Outline"] + A1["输入:/content_outline"] + A2["表单字段:
• 主题 (textarea)
• 类型 (select)
• 受众 (select)
• 长度 (select)
• 要点数 (number)
• 包含案例 (checkbox)
• 语言风格 (select)"] + A3["输出:
论点 + 大纲 + 展开
+ 案例 + 建议"] + end + + subgraph t2 ["🔍 Template 2: Code Review"] + B1["输入:/code_review"] + B2["表单字段:
• 编程语言 (select)
• 审查焦点 (select)
• 项目类型 (select)
• 严格程度 (select)
• 代码内容 (textarea)"] + B3["输出:
质量评分 + 风险
+ 改进 + 优先级"] + end + + subgraph t3 ["🧠 Template 3: Brainstorm"] + C1["输入:/brainstorm"] + C2["表单字段:
• 主题 (textarea)
• 目标 (select)
• 参与者 (multi-select)
• 限制条件 (textarea)
• 创意数量 (number)
• 分类维度 (select)"] + C3["输出:
多维创意 + 可行性
+ 潜力评估 + 行动"] + end + + A1 --> A2 --> A3 + B1 --> B2 --> B3 + C1 --> C2 --> C3 + + style t1 fill:#4A90E2,stroke:#2E5C8A,color:#fff + style t2 fill:#50E3C2,stroke:#2EA896,color:#fff + style t3 fill:#E85D75,stroke:#A23E52,color:#fff + style A3 fill:#B8E986,stroke:#7BA30A,color:#000 + style B3 fill:#B8E986,stroke:#7BA30A,color:#000 + style C3 fill:#B8E986,stroke:#7BA30A,color:#000 +``` + +#### 用户提示词的五大优势 + +```mermaid +graph TB + subgraph benefits ["用户提示词的核心优势"] + A["🎯 精准性
不遗漏参数
提问完整清晰"] + B["⚡ 高效性
一次设置
多次复用"] + C["📋 一致性
统一格式
便于对标"] + D["🧠 智能化
表单引导思考
降低失误"] + E["🤝 协作性
团队共享模板
结果一致"] + end + + style A fill:#4A90E2,stroke:#2E5C8A,color:#fff + style B fill:#50E3C2,stroke:#2EA896,color:#fff + style C fill:#E85D75,stroke:#A23E52,color:#fff + style D fill:#F5A623,stroke:#C27D0E,color:#fff + style E fill:#7ED321,stroke:#5BA30A,color:#fff +``` + +--- + +### 四、自定义模型配置:打造专属的 AI 助手 + +OpenWebUI 提供了强大的模型自定义功能,允许用户精细化配置每个模型的行为、权限和能力,满足不同场景下的专业需求。 + +##### 自定义模型的核心配置项 + +```mermaid +graph TB + subgraph core ["核心配置维度"] + A["👥 用户权限
━━━
控制模型可见性
设置使用权限"] + + B["🏷️ 模型标签
━━━
分类管理
快速筛选"] + + C["📝 系统提示词
━━━
定义角色与风格
预设行为规范"] + + D["⚙️ 接口参数
━━━
通用参数配置
自定义请求参数"] + end + + subgraph enhance ["增强功能"] + E["💡 提示词建议
━━━
智能补全
场景化推荐"] + + F["📚 知识库绑定
━━━
专业领域知识
自动检索注入"] + + G["🛠️ 可用工具
━━━
函数调用
API 集成"] + end + + subgraph plugin ["插件系统"] + H["🔍 过滤器
━━━
输入预处理
内容过滤"] + + I["⚡ 操作
━━━
自定义功能
外部调用"] + end + + subgraph ability ["能力配置"] + J["🎯 模型能力
━━━
对话/生成
分析/总结"] + + K["🌐 默认功能
━━━
联网搜索
图像生成"] + end + + style A fill:#4A90E2,stroke:#2E5C8A,color:#fff + style B fill:#50E3C2,stroke:#2EA896,color:#fff + style C fill:#E85D75,stroke:#A23E52,color:#fff + style D fill:#F5A623,stroke:#C27D0E,color:#fff + style E fill:#7ED321,stroke:#5BA30A,color:#fff + style F fill:#B8E986,stroke:#7BA30A,color:#000 + style G fill:#4A90E2,stroke:#2E5C8A,color:#fff + style H fill:#50E3C2,stroke:#2EA896,color:#fff + style I fill:#E85D75,stroke:#A23E52,color:#fff + style J fill:#F5A623,stroke:#C27D0E,color:#fff + style K fill:#7ED321,stroke:#5BA30A,color:#fff +``` + +--- + +##### 模型配置工作流 + +```mermaid +graph LR + subgraph setup ["配置阶段"] + A["选择/添加模型"] --> B["设置基础信息"] + B --> C["配置权限与标签"] + C --> D["定义系统提示词"] + end + + subgraph enhance ["增强阶段"] + E["配置接口参数"] --> F["关联知识库"] + F --> G["添加可用工具"] + G --> H["启用过滤器/操作"] + end + + subgraph ability ["能力阶段"] + I["设置提示词建议"] --> J["配置默认功能"] + J --> K["定义模型能力"] + end + + subgraph deploy ["部署使用"] + L["保存配置"] --> M["分配给用户/团队"] + M --> N["开始使用"] + end + + setup --> enhance + enhance --> ability + ability --> deploy + + style setup fill:#4A90E2,stroke:#2E5C8A,color:#fff + style enhance fill:#50E3C2,stroke:#2EA896,color:#fff + style ability fill:#F5A623,stroke:#C27D0E,color:#fff + style deploy fill:#B8E986,stroke:#7BA30A,color:#000 +``` + +--- + +####关键配置项详解 + +##### 1. 用户权限与模型标签 + +```mermaid +graph TB + subgraph permission ["👥 用户权限管理"] + A["公开模型
所有用户可见"] + B["团队模型
特定团队可用"] + C["私有模型
仅限管理员"] + end + + subgraph tag ["🏷️ 标签分类"] + D["按用途分类
客服/写作/编程"] + E["按能力分类
文本/多模态/代码"] + F["按场景分类
内部/外部/测试"] + end + + subgraph benefit ["优势"] + G["✅ 精准权限控制
✅ 快速查找定位
✅ 有序组织管理"] + end + + A --> G + B --> G + C --> G + D --> G + E --> G + F --> G + + style permission fill:#4A90E2,stroke:#2E5C8A,color:#fff + style tag fill:#50E3C2,stroke:#2EA896,color:#fff + style benefit fill:#B8E986,stroke:#7BA30A,color:#000 +``` + +##### 2. 系统提示词与接口参数 + +```mermaid +graph LR + subgraph prompt ["📝 系统提示词"] + A["角色定义
━━━
你是...专家
专注于...领域"] + + B["行为规范
━━━
回答风格
输出格式"] + + C["约束条件
━━━
不要...
必须..."] + end + + subgraph params ["⚙️ 接口参数"] + D["通用参数
━━━
temperature
top_p
max_tokens"] + + E["自定义参数
━━━
特殊 headers
请求体结构
认证方式"] + end + + subgraph result ["效果"] + F["一致的模型行为"] + G["精准的输出控制"] + end + + A --> F + B --> F + C --> F + D --> G + E --> G + + style prompt fill:#E85D75,stroke:#A23E52,color:#fff + style params fill:#F5A623,stroke:#C27D0E,color:#fff + style result fill:#B8E986,stroke:#7BA30A,color:#000 +``` + +##### 3. 增强功能:知识库、工具与插件 + +```mermaid +graph TB + subgraph kb ["📚 知识库集成"] + A["绑定专业知识库"] + B["自动检索相关内容"] + C["增强回答准确性"] + end + + subgraph tool ["🛠️ 工具集成"] + D["函数调用
Function Calling"] + E["API 接口
外部服务"] + F["实时数据
动态查询"] + end + + subgraph plugin ["🔌 插件系统"] + G["过滤器 Filter
━━━
输入预处理
内容过滤
上下文压缩"] + + H["操作 Action
━━━
保存到文件
调用 API
自定义功能"] + end + + subgraph flow ["工作流程"] + I["用户输入"] + J["过滤器处理"] + K["知识库检索"] + L["工具调用"] + M["模型生成"] + N["操作执行"] + O["返回结果"] + end + + I --> J --> K --> L --> M --> N --> O + + style kb fill:#4A90E2,stroke:#2E5C8A,color:#fff + style tool fill:#50E3C2,stroke:#2EA896,color:#fff + style plugin fill:#E85D75,stroke:#A23E52,color:#fff + style flow fill:#F5A623,stroke:#C27D0E,color:#fff +``` + +--- + +####实战应用场景 + +##### 场景示例:客服专用模型配置 + +```mermaid +graph TB + subgraph config ["配置内容"] + A["🏷️ 标签
客服/支持/FAQ"] + + B["👥 权限
客服团队可见"] + + C["📝 系统提示词
你是专业客服
友好、耐心、专业
总是提供解决方案"] + + D["📚 知识库
产品手册
常见问题
解决方案库"] + + E["🛠️ 工具
工单系统
用户数据查询
库存查询"] + + F["🔍 过滤器
敏感信息过滤
语气优化"] + + G["⚡ 操作
创建工单
发送邮件"] + + H["🌐 默认功能
启用联网查询"] + end + + subgraph effect ["使用效果"] + I["✅ 专业响应
✅ 知识准确
✅ 自动化操作
✅ 统一服务标准"] + end + + A --> effect + B --> effect + C --> effect + D --> effect + E --> effect + F --> effect + G --> effect + H --> effect + + style config fill:#4A90E2,stroke:#2E5C8A,color:#fff + style effect fill:#B8E986,stroke:#7BA30A,color:#000 +``` + +##### 场景示例:代码助手模型配置 + +```mermaid +graph LR + subgraph codemodel ["代码助手配置"] + A["系统提示词
━━━
专业程序员
详细注释
最佳实践"] + + B["知识库
━━━
项目文档
API 文档
编码规范"] + + C["工具
━━━
代码执行
linter
测试运行器"] + + D["能力
━━━
代码生成
重构
bug 修复"] + end + + subgraph workflow ["工作流"] + E["需求描述"] + F["知识库查询"] + G["代码生成"] + H["自动测试"] + I["返回结果"] + end + + E --> F --> G --> H --> I + + style codemodel fill:#50E3C2,stroke:#2EA896,color:#fff + style workflow fill:#F5A623,stroke:#C27D0E,color:#fff +``` + +--- + +####配置最佳实践 + +```mermaid +graph TB + subgraph practice ["配置建议"] + A["🎯 明确定位
━━━
清晰的角色定义
专注特定场景"] + + B["📝 精炼提示词
━━━
简洁明确
避免冲突指令"] + + C["📚 合理关联
━━━
知识库按需绑定
避免信息过载"] + + D["🛠️ 渐进增强
━━━
先基础后高级
逐步添加功能"] + + E["🔍 持续优化
━━━
根据反馈调整
迭代改进配置"] + + F["👥 权限合理
━━━
最小权限原则
按需分配"] + end + + subgraph tips ["关键要点"] + G["✓ 一个模型一个用途
✓ 提示词避免过于复杂
✓ 工具按需启用
✓ 定期审查配置
✓ 测试后再推广"] + end + + practice --> tips + + style practice fill:#4A90E2,stroke:#2E5C8A,color:#fff + style tips fill:#B8E986,stroke:#7BA30A,color:#000 +``` + +--- + +### 总结:自定义模型的价值 + +通过 OpenWebUI 的自定义模型功能,您可以: + +- **🎯 精准控制**:细粒度的权限管理和行为定制 +- **📚 知识增强**:无缝集成专业知识库,提升准确性 +- **🛠️ 功能扩展**:通过工具和插件实现复杂业务流程 +- **⚡ 提升效率**:一次配置,多次复用,标准化输出 +- **👥 团队协作**:统一的模型配置,保证服务一致性 + +自定义模型功能将 OpenWebUI 从简单的对话工具升级为可深度定制的 AI 工作平台,满足从个人使用到企业级部署的各类需求。 + +### 五、四大特性与四大核心功能的完整协同 + +```mermaid +graph TB + subgraph components ["四大核心特性"] + A["📁 文件夹
项目工作室"] + B["📚 知识库
专业智库"] + C["📋 用户提示词
交互式模板"] + end + + subgraph conversation ["四大对话功能"] + D["🔶 多模型并行"] + E["🔷 @提及深化"] + F["🔹 合并总结"] + G["🔸 内容选中追问"] + end + + subgraph workflow ["完整工作流"] + I["产品经理
创建文件夹"] + J["配置系统提示词"] + K["关联知识库"] + L["自定义模型配置"] + M["创建 /feature_analysis"] + N["工作时输入 /"] + O["填表自动生成"] + P["并行发送多模型
对比 → 深化 → 优化"] + Q["高质量方案"] + end + + A --> I + B --> K + C --> M + D --> L + + E --> P + F --> P + G --> P + H --> P + + I --> J --> K --> L --> M --> N --> O --> P --> Q + + style A fill:#4A90E2,stroke:#2E5C8A,color:#fff + style B fill:#50E3C2,stroke:#2EA896,color:#fff + style C fill:#F5A623,stroke:#C27D0E,color:#fff + style D fill:#E85D75,stroke:#A23E52,color:#fff + style E fill:#4A90E2,stroke:#2E5C8A,color:#fff + style F fill:#50E3C2,stroke:#2EA896,color:#fff + style G fill:#E85D75,stroke:#A23E52,color:#fff + style H fill:#7ED321,stroke:#5BA30A,color:#fff + style Q fill:#B8E986,stroke:#7BA30A,color:#000 +``` + +--- + +## 总结:OpenWebUI 的独特价值主张 + +```mermaid +graph LR + subgraph org ["📁 组织管理"] + A["文件夹即项目
自动应用配置"] + end + + subgraph know ["📚 知识体系"] + B["多源知识库
智能检索注入"] + end + + subgraph eff ["⚡ 工作效率"] + C["交互式提示词
复杂需求简化"] + end + + subgraph model ["⚙️ 模型定制"] + D["精细化配置
权限与能力管理"] + end + + subgraph quality ["🎯 对话品质"] + E["四大核心功能
完整协同流程"] + end + + subgraph value ["💎 最终价值"] + F["从混乱到秩序
从碎片到系统
从重复到高效
从单一到多元
━━━
构建真正的
AI 智囊团"] + end + + A --> value + B --> value + C --> value + D --> value + E --> value + + style org fill:#4A90E2,stroke:#2E5C8A,color:#fff + style know fill:#50E3C2,stroke:#2EA896,color:#fff + style eff fill:#F5A623,stroke:#C27D0E,color:#fff + style model fill:#E85D75,stroke:#A23E52,color:#fff + style quality fill:#7ED321,stroke:#5BA30A,color:#fff + style value fill:#B8E986,stroke:#7BA30A,color:#000 +``` + +--- + +### 对比表:OpenWebUI vs 其他工具 + +| 维度 | OpenWebUI | 其他工具 | +| -------------- | -------------------------- | ---------------- | +| **项目组织** | 📁 文件夹即项目 + 自动配置 | 文件夹只用于分类 | +| **知识来源** | 📚 笔记 + 文件 + URL + 对话 | 主要是文件上传 | +| **知识应用** | 自动检索 + 智能注入 | 需要手动引用 | +| **提示词管理** | 📋 文件夹级 + 交互式表单 | 通常无模板系统 | +| **多模型协同** | 🔶🔷🔹🔸 四大核心功能 | 基础的多模型切换 | +| **模型定制** | ⚙️ 精细化配置 + 权限管理 | 基础参数调整 | +| **开源友好度** | ⭐⭐⭐⭐⭐ 高度可定制 | 部分不开源 | + + + +## 第三部分:扩展功能——Functions、Tools、OpenAPI Server 和 MCP Server + +OpenWebUI 的真正强大之处在于其丰富的扩展能力。通过 Functions、Tools、OpenAPI Server 和 MCP Server,您可以将 OpenWebUI 从一个对话界面扩展成为一个功能完备的 AI 应用平台。 + +### 一、Functions(函数):模块化的 Python 插件系统 + +#### 什么是 Functions? + +Functions 是用纯 Python 编写的模块化插件,运行在 OpenWebUI 环境内部,允许您: + +- 集成新的 AI 模型提供商(如 Anthropic、Google Vertex AI) +- 自定义对话处理流程 +- 添加自定义按钮、工作流步骤或 UI 行为 +- 实现复杂的业务逻辑 + +```mermaid +graph TB + subgraph types ["Functions 的三种类型"] + B["🔍 Filter Functions
━━━
预处理输入内容
后处理输出内容
强制执行样式和规范"] + + C["⚡ Action Functions
━━━
响应模型/用户事件
执行特定操作
触发外部流程"] + + A["🔗 Pipe Functions
━━━
创建自定义代理/模型
在 UI 中显示为可选模型
可链接实现高级工作流"] + end + + subgraph features ["核心特性"] + D["✅ 纯 Python 实现"] + E["✅ 模块化设计"] + F["✅ 环境隔离"] + G["✅ 可链式调用"] + end + + B --> D + C --> E + A --> F + B --> G + + style B fill:#50E3C2,stroke:#2EA896,color:#fff + style C fill:#E85D75,stroke:#A23E52,color:#fff + style A fill:#4A90E2,stroke:#2E5C8A,color:#fff + style D fill:#B8E986,stroke:#7BA30A,color:#000 + style E fill:#B8E986,stroke:#7BA30A,color:#000 + style F fill:#B8E986,stroke:#7BA30A,color:#000 + style G fill:#B8E986,stroke:#7BA30A,color:#000 +``` + +--- + +#### Filter Functions:智能内容处理 + +```mermaid +graph TB + subgraph input_filter ["输入过滤器"] + A["用户原始输入"] + B["格式化处理"] + C["敏感信息过滤"] + D["上下文增强"] + E["发送给模型"] + end + + subgraph output_filter ["输出过滤器"] + F["模型原始输出"] + G["语气调整"] + H["内容清理"] + I["格式优化"] + J["返回给用户"] + end + + A --> B --> C --> D --> E + F --> G --> H --> I --> J + + subgraph benefits ["应用价值"] + K["✅ 统一输入格式"] + L["✅ 保护隐私安全"] + M["✅ 优化输出质量"] + N["✅ 强制执行规范"] + end + + E --> K + J --> M + + style input_filter fill:#4A90E2,stroke:#2E5C8A,color:#fff + style output_filter fill:#50E3C2,stroke:#2EA896,color:#fff + style benefits fill:#B8E986,stroke:#7BA30A,color:#000 +``` + +--- + +#### Action Functions:事件驱动的自动化 + +```mermaid +graph LR + subgraph trigger ["触发器"] + A["对话完成"] + B["用户点击"] + C["特定关键词"] + D["定时任务"] + end + + subgraph action ["Action 执行"] + E["保存到数据库"] + F["发送通知"] + G["调用外部 API"] + H["生成报告"] + I["触发工作流"] + end + + A --> E + B --> F + C --> G + D --> H + E --> I + + subgraph scenarios ["典型场景"] + J["对话归档
自动保存重要对话"] + K["任务创建
从对话生成待办事项"] + L["数据同步
更新外部系统"] + end + + style trigger fill:#4A90E2,stroke:#2E5C8A,color:#fff + style action fill:#50E3C2,stroke:#2EA896,color:#fff + style scenarios fill:#E85D75,stroke:#A23E52,color:#fff +``` + +--- + +#### Pipe Functions:构建自定义 AI 代理 + +```mermaid +graph LR + subgraph pipe ["Pipe Function 工作流"] + A["用户输入"] --> B["Pipe Function 接收"] + B --> C["自定义处理逻辑
━━━
API 调用
数据转换
多模型编排"] + C --> D["返回结果"] + D --> E["UI 显示"] + end + + subgraph examples ["应用示例"] + F["Google Search 代理
实时搜索集成"] + G["Home Assistant 代理
智能家居控制"] + H["多模型路由
智能选择最佳模型"] + I["自定义 API 集成
企业内部系统"] + end + + style pipe fill:#4A90E2,stroke:#2E5C8A,color:#fff + style examples fill:#50E3C2,stroke:#2EA896,color:#fff + style F fill:#7ED321,stroke:#5BA30A,color:#fff + style G fill:#7ED321,stroke:#5BA30A,color:#fff + style H fill:#7ED321,stroke:#5BA30A,color:#fff + style I fill:#7ED321,stroke:#5BA30A,color:#fff +``` + +--- + +### 二、Tools(工具):为 AI 赋予超能力 + +#### 什么是 Tools? + +Tools 是 Python 脚本,为您的 AI 助手添加执行实际任务的能力: + +- 实时网络搜索(天气、新闻) +- 图像生成与处理 +- 语音合成(如 ElevenLabs 集成) +- 文档分析(PDF、Excel 等) +- 代码解释和执行 + +```mermaid +graph TB + subgraph tool_types ["工具类型"] + A["🌐 网络工具
━━━
搜索引擎
API 查询
数据抓取"] + + B["🎨 媒体工具
━━━
图像生成
语音合成
视频处理"] + + C["📄 文档工具
━━━
PDF 解析
表格分析
内容提取"] + + D["💻 代码工具
━━━
代码执行
调试分析
测试运行"] + end + + subgraph modes ["调用模式"] + E["Default Mode
━━━
通过提示词工程
LLM 决定何时调用"] + + F["Native Mode
━━━
函数调用原生支持
直接工具执行"] + end + + style A fill:#4A90E2,stroke:#2E5C8A,color:#fff + style B fill:#50E3C2,stroke:#2EA896,color:#fff + style C fill:#E85D75,stroke:#A23E52,color:#fff + style D fill:#F5A623,stroke:#C27D0E,color:#fff + style E fill:#7ED321,stroke:#5BA30A,color:#fff + style F fill:#7ED321,stroke:#5BA30A,color:#fff +``` + +--- + +#### Tools 工作流程 + +```mermaid +graph LR + subgraph install ["安装阶段"] + A["从社区库选择"] + B["手动上传脚本"] + C["配置参数"] + end + + subgraph enable ["启用阶段"] + D["会话级启用"] + E["模型默认工具"] + F["全局工具配置"] + end + + subgraph execute ["执行阶段"] + G["用户提问"] + H["LLM 分析需求"] + I["选择合适工具"] + J["工具执行"] + K["结果整合"] + L["生成回答"] + end + + A --> D + B --> E + C --> F + + D --> G + E --> G + F --> G + + G --> H --> I --> J --> K --> L + + style install fill:#4A90E2,stroke:#2E5C8A,color:#fff + style enable fill:#50E3C2,stroke:#2EA896,color:#fff + style execute fill:#F5A623,stroke:#C27D0E,color:#fff +``` + +--- + +#### 实战示例:网络搜索工具 + +```mermaid +graph TB + subgraph scenario ["使用场景"] + A["用户提问:
今天特斯拉股价是多少?"] + end + + subgraph process ["处理流程"] + B["LLM 分析
需要实时数据"] + C["调用搜索工具"] + D["获取最新股价"] + E["整合到回答中"] + end + + subgraph result ["结果展示"] + F["截至今日收盘,
特斯拉股价为 $XXX.XX,
较昨日上涨 X.X%
━━━
🔗 数据来源:Yahoo Finance"] + end + + A --> B --> C --> D --> E --> F + + style scenario fill:#4A90E2,stroke:#2E5C8A,color:#fff + style process fill:#50E3C2,stroke:#2EA896,color:#fff + style result fill:#B8E986,stroke:#7BA30A,color:#000 +``` + +--- + +### 三、OpenAPI Server:标准化的服务集成 + +#### 什么是 OpenAPI Server 集成? + +OpenWebUI(v0.6+)支持通过符合 OpenAPI 标准的服务器扩展功能。这使得您可以连接任何暴露 OpenAPI (Swagger) 接口的服务——无论是自己的 Python 脚本、云 API,还是第三方服务。 + +```mermaid +graph LR + subgraph architecture ["架构设计"] + A["OpenWebUI 前端"] + B["OpenWebUI 后端"] + C["OpenAPI Server
━━━
自定义服务
FastAPI/Flask
任何 HTTP 服务"] + end + + subgraph benefits ["核心优势"] + D["🔌 标准化接口
遵循 OpenAPI 规范"] + E["🔐 安全可控
认证与授权"] + F["📚 自动文档
Swagger UI"] + G["🔄 易于集成
RESTful API"] + end + + A --> B + B --> C + + C --> D + C --> E + C --> F + C --> G + + style architecture fill:#4A90E2,stroke:#2E5C8A,color:#fff + style benefits fill:#B8E986,stroke:#7BA30A,color:#000 +``` + +--- + +#### 两种服务器模式 + +```mermaid +graph TB + subgraph user_server ["👤 用户工具服务器"] + A["User Tool Server"] + A1["请求来自浏览器"] + A2["localhost = 用户计算机"] + A3["个人隐私工具"] + A4["本地资源访问"] + end + + subgraph global_server ["🌐 全局工具服务器"] + B["Global Tool Server"] + B1["请求来自后端"] + B2["localhost = OpenWebUI 服务器"] + B3["团队共享工具"] + B4["中心化管理"] + end + + A --> A1 --> A2 + A2 --> A3 + A2 --> A4 + + B --> B1 --> B2 + B2 --> B3 + B2 --> B4 + + subgraph use_cases ["使用场景"] + C["个人工具:
本地文件访问
私有 API 调用"] + D["团队工具:
企业服务集成
共享数据源"] + end + + A4 --> C + B4 --> D + + style user_server fill:#4A90E2,stroke:#2E5C8A,color:#fff + style global_server fill:#50E3C2,stroke:#2EA896,color:#fff + style use_cases fill:#E85D75,stroke:#A23E52,color:#fff +``` + +#### 开发自定义工具服务器 + +```mermaid +graph TB + subgraph develop ["开发指南"] + A["选择框架
━━━
FastAPI(推荐)
Flask
其他 HTTP 框架"] + + B["实现端点
━━━
定义 API 路由
处理请求
返回 JSON"] + + C["生成 OpenAPI
━━━
自动生成文档
暴露 /openapi.json
Swagger UI"] + + D["添加安全
━━━
认证机制
CORS 配置
访问控制"] + end + + subgraph example ["示例场景"] + E["文件系统工具
读写本地文件"] + F["数据库查询
执行 SQL 查询"] + G["外部 API
调用第三方服务"] + H["自定义业务
企业内部逻辑"] + end + + A --> B --> C --> D + + D --> E + D --> F + D --> G + D --> H + + style develop fill:#4A90E2,stroke:#2E5C8A,color:#fff + style example fill:#50E3C2,stroke:#2EA896,color:#fff +``` + +--- + +### 四、MCP Server:下一代工具协议 + +#### 什么是 MCP(Model Context Protocol)? + +MCP 是一个为 AI 代理设计的开放标准协议,使得 AI 能够以安全、统一、上下文驱动的方式发现和交互外部工具(如代码操作、文件访问、数据库查询、自定义 API)。 + +```mermaid +graph TB + subgraph mcp_concept ["MCP 核心概念"] + A["🎯 标准化协议
━━━
统一的工具发现
结构化的操作模式
安全的执行机制"] + + B["🧠 上下文感知
━━━
保持状态信息
理解使用场景
智能决策支持"] + + C["🔐 安全设计
━━━
权限控制
沙箱隔离
审计日志"] + end + + subgraph why_mcp ["为什么需要 MCP?"] + D["❌ 传统问题
━━━
每个工具独立集成
缺乏统一标准
重复开发工作"] + + E["✅ MCP 解决
━━━
一次集成多个工具
标准化接口
自动工具发现"] + end + + style A fill:#4A90E2,stroke:#2E5C8A,color:#fff + style B fill:#50E3C2,stroke:#2EA896,color:#fff + style C fill:#E85D75,stroke:#A23E52,color:#fff + style D fill:#F5A623,stroke:#C27D0E,color:#fff + style E fill:#B8E986,stroke:#7BA30A,color:#000 +``` + +--- + +#### OpenWebUI 中的 MCP 集成架构 + +```mermaid +graph LR + subgraph architecture ["三层架构"] + A["OpenWebUI
前端界面"] + + B["mcpo
代理服务器
━━━
MCP → OpenAPI
协议转换"] + + C["MCP Server
工具服务器
━━━
实际功能实现
stdio/HTTP"] + end + + subgraph flow ["工作流程"] + D["1. 用户请求"] + E["2. OpenWebUI
调用 REST API"] + F["3. mcpo
转换为 MCP 协议"] + G["4. MCP Server
执行任务"] + H["5. 结果返回"] + end + + A --> B + B --> C + + D --> E --> F --> G --> H + + style architecture fill:#4A90E2,stroke:#2E5C8A,color:#fff + style flow fill:#50E3C2,stroke:#2EA896,color:#fff +``` + +**为什么使用代理模式?** + +```mermaid +graph TB + subgraph reasons ["代理服务器的价值"] + A["🔒 安全性
━━━
沙箱后端行为
认证与授权
减小攻击面"] + + B["🔄 互操作性
━━━
统一为 OpenAPI
无需自定义连接器
标准 REST API"] + + C["📈 可扩展性
━━━
独立演进
模块化设计
易于维护"] + + D["📚 自动文档
━━━
Swagger UI
交互式测试
API 探索"] + end + + style A fill:#4A90E2,stroke:#2E5C8A,color:#fff + style B fill:#50E3C2,stroke:#2EA896,color:#fff + style C fill:#E85D75,stroke:#A23E52,color:#fff + style D fill:#F5A623,stroke:#C27D0E,color:#fff +``` + +#### MCP 应用场景 + +```mermaid +graph TB + subgraph scenarios ["典型应用"] + A["📖 知识库检索
━━━
RAG 文档搜索
向量数据库查询
智能知识管理"] + + B["📁 文件操作
━━━
读写本地文件
目录管理
文件搜索"] + + C["🗄️ 数据访问
━━━
数据库查询
API 调用
数据处理"] + + D["🎯 领域工具
━━━
特定行业工具
企业内部系统
自定义功能"] + end + + subgraph enterprise ["企业级部署"] + E["多服务器集成
━━━
统一代理管理
多个 MCP 服务器
工具编排"] + + F["安全合规
━━━
访问控制
审计日志
数据隔离"] + end + + A --> E + B --> E + C --> E + D --> E + + E --> F + + style scenarios fill:#4A90E2,stroke:#2E5C8A,color:#fff + style enterprise fill:#50E3C2,stroke:#2EA896,color:#fff +``` + +--- + +### 五、扩展功能对比与选择指南 + +#### 四种扩展方式对比 + +```mermaid +graph TB + subgraph comparison ["功能对比"] + A["📊 对比维度"] + end + + subgraph functions ["Functions"] + B["运行位置
OpenWebUI 内部"] + C["开发语言
纯 Python"] + D["适用场景
轻量级集成
UI 定制
流程控制"] + E["优势
简单快速
深度集成"] + end + + subgraph tools ["Tools"] + F["运行位置
OpenWebUI 内部"] + G["开发语言
Python 脚本"] + H["适用场景
AI 能力扩展
实时查询
媒体处理"] + I["优势
易于管理
丰富社区"] + end + + subgraph openapi ["OpenAPI Server"] + J["运行位置
独立服务器"] + K["开发语言
任意语言"] + L["适用场景
复杂服务
企业集成
已有系统"] + M["优势
标准化
可扩展"] + end + + subgraph mcp ["MCP Server"] + N["运行位置
独立服务器"] + O["开发语言
任意语言"] + P["适用场景
下一代集成
多工具编排
智能代理"] + Q["优势
未来标准
上下文感知"] + end + + style comparison fill:#F5A623,stroke:#C27D0E,color:#fff + style functions fill:#4A90E2,stroke:#2E5C8A,color:#fff + style tools fill:#50E3C2,stroke:#2EA896,color:#fff + style openapi fill:#E85D75,stroke:#A23E52,color:#fff + style mcp fill:#7ED321,stroke:#5BA30A,color:#fff +``` + +--- + +#### 选择决策树 + +```mermaid +graph TB + start["我需要扩展 OpenWebUI"] + + q1{"需求类型?"} + q2{"现有系统?"} + q3{"团队技术栈?"} + q4{"未来规划?"} + + a1["Functions
━━━
简单快速
UI 集成"] + a2["Tools
━━━
AI 能力
社区资源"] + a3["OpenAPI Server
━━━
标准集成
现有系统"] + a4["MCP Server
━━━
未来标准
智能编排"] + + start --> q1 + + q1 -->|UI 定制
流程控制| a1 + q1 -->|AI 能力扩展| a2 + q1 -->|服务集成| q2 + + q2 -->|有现成 API| a3 + q2 -->|需要新开发| q3 + + q3 -->|Python 为主| a2 + q3 -->|多语言| q4 + + q4 -->|传统架构| a3 + q4 -->|现代化
AI 原生| a4 + + style start fill:#F5A623,stroke:#C27D0E,color:#fff + style q1 fill:#4A90E2,stroke:#2E5C8A,color:#fff + style q2 fill:#4A90E2,stroke:#2E5C8A,color:#fff + style q3 fill:#4A90E2,stroke:#2E5C8A,color:#fff + style q4 fill:#4A90E2,stroke:#2E5C8A,color:#fff + style a1 fill:#B8E986,stroke:#7BA30A,color:#000 + style a2 fill:#B8E986,stroke:#7BA30A,color:#000 + style a3 fill:#B8E986,stroke:#7BA30A,color:#000 + style a4 fill:#B8E986,stroke:#7BA30A,color:#000 +``` + +--- + +### 六、扩展功能最佳实践 + +#### 开发建议 + +```mermaid +graph LR + subgraph principles ["核心原则"] + A["🎯 单一职责
━━━
一个扩展
一个功能"] + + B["📝 清晰文档
━━━
使用说明
参数描述
示例代码"] + + C["🔒 安全第一
━━━
输入验证
错误处理
权限控制"] + + D["🧪 充分测试
━━━
单元测试
集成测试
边界测试"] + end + + subgraph deployment ["部署策略"] + E["开发环境
本地测试
快速迭代"] + + F["测试环境
团队验证
性能测试"] + + G["生产环境
稳定发布
监控告警"] + end + + A --> E + B --> E + C --> F + D --> F + + E --> F --> G + + style principles fill:#4A90E2,stroke:#2E5C8A,color:#fff + style deployment fill:#50E3C2,stroke:#2EA896,color:#fff +``` + +--- + +#### 性能优化 + +```mermaid +graph TB + subgraph optimize ["优化要点"] + A["⚡ 响应速度
━━━
异步处理
缓存策略
连接池"] + + B["📊 资源管理
━━━
内存控制
并发限制
超时设置"] + + C["🔄 错误恢复
━━━
重试机制
降级方案
友好提示"] + + D["📈 可观测性
━━━
日志记录
性能指标
错误追踪"] + end + + subgraph monitoring ["监控指标"] + E["响应时间"] + F["成功率"] + G["错误率"] + H["资源使用"] + end + + A --> E + B --> F + C --> G + D --> H + + style optimize fill:#4A90E2,stroke:#2E5C8A,color:#fff + style monitoring fill:#B8E986,stroke:#7BA30A,color:#000 +``` + +--- + +### 七、社区资源与学习路径 + +```mermaid +graph LR + subgraph resources ["官方资源"] + A["📚 官方文档
docs.openwebui.com"] + B["💻 GitHub 仓库
源码与示例"] + C["💬 社区讨论
问题与解答"] + end + + subgraph libraries ["社区库"] + D["Functions 库
github.com/open-webui/functions"] + E["Tools 库
社区贡献工具"] + F["OpenAPI 服务器示例
参考实现"] + end + + subgraph learning ["学习路径"] + G["1. 基础
了解概念
阅读文档"] + H["2. 实践
运行示例
简单修改"] + I["3. 开发
创建扩展
解决问题"] + J["4. 分享
贡献社区
帮助他人"] + end + + A --> G + B --> H + C --> I + D --> H + E --> H + F --> H + + G --> H --> I --> J + + style resources fill:#4A90E2,stroke:#2E5C8A,color:#fff + style libraries fill:#50E3C2,stroke:#2EA896,color:#fff + style learning fill:#F5A623,stroke:#C27D0E,color:#fff +``` + +--- + +### 总结:构建完整的 AI 应用生态 + +通过 Functions、Tools、OpenAPI Server 和 MCP Server 四大扩展机制,OpenWebUI 提供了从简单到复杂、从内部到外部的完整扩展能力: + +```mermaid +graph TB + subgraph ecosystem ["OpenWebUI 扩展生态"] + A["核心对话平台"] + + B["Functions
内部扩展
━━━
流程控制
UI 定制"] + + C["Tools
能力增强
━━━
实时查询
媒体处理"] + + D["OpenAPI Server
服务集成
━━━
企业系统
标准接口"] + + E["MCP Server
智能编排
━━━
下一代标准
上下文感知"] + end + + subgraph value ["核心价值"] + F["🎯 灵活扩展
满足各种需求"] + G["🔌 标准化
易于集成"] + H["🚀 快速开发
丰富生态"] + I["🔐 安全可控
企业级"] + end + + A --> B + A --> C + A --> D + A --> E + + B --> F + C --> G + D --> H + E --> I + + style A fill:#F5A623,stroke:#C27D0E,color:#fff + style B fill:#4A90E2,stroke:#2E5C8A,color:#fff + style C fill:#50E3C2,stroke:#2EA896,color:#fff + style D fill:#E85D75,stroke:#A23E52,color:#fff + style E fill:#7ED321,stroke:#5BA30A,color:#fff + style F fill:#B8E986,stroke:#7BA30A,color:#000 + style G fill:#B8E986,stroke:#7BA30A,color:#000 + style H fill:#B8E986,stroke:#7BA30A,color:#000 + style I fill:#B8E986,stroke:#7BA30A,color:#000 +``` + +**关键要点:** + +- **🔧 Functions**:适合轻量级、深度集成的内部扩展 +- **🛠️ Tools**:为 AI 提供执行实际任务的能力 +- **🌐 OpenAPI Server**:连接现有系统和服务的标准方式 +- **🚀 MCP Server**:面向未来的智能工具协议 + +无论您是个人开发者还是企业团队,OpenWebUI 的扩展能力都能帮助您构建符合需求的定制化 AI 应用平台。从简单的对话界面,到复杂的智能工作流,OpenWebUI 提供了完整的工具链和生态支持。 + +--- + +## 全文总结 + +OpenWebUI 不仅仅是一个 AI 对话界面,而是一个完整的 AI 应用开发平台: + +**第一部分**:通过**多模型并行**、**@提及机制**、**智能合并总结**和**内容选中追问**四大核心功能,构建了强大的多模型协同对话体系。 + +**第二部分**:通过**文件夹管理**、**知识库系统**、**用户提示词**和**自定义模型配置**,实现了从混乱到秩序、从碎片到系统的智能工作台转变,提供了精细化的模型管理能力,满足从个人到企业的各类需求。 + +**第三部分**:通过**Functions**、**Tools**、**OpenAPI Server**和**MCP Server**四大扩展机制,构建了完整的应用生态,实现了从简单对话到复杂业务流程的全面支持。 + +OpenWebUI 将 AI 对话、知识管理、工作流程和应用开发完美融合,为用户提供了一个真正的 AI 智囊团和工作平台。 diff --git a/plugins/README.md b/plugins/README.md new file mode 100644 index 0000000..c75134c --- /dev/null +++ b/plugins/README.md @@ -0,0 +1,124 @@ +# Plugins + +English | [中文](./README_CN.md) + +This directory contains three types of plugins for OpenWebUI: + +- **Filters**: Process user input before sending to LLM +- **Actions**: Trigger custom functionalities from chat +- **Pipes**: Enhance LLM responses before displaying to user + +## 📦 Plugin Types Overview + +### 🔧 Filters (`/filters`) + +Filters modify user input before it reaches the LLM. They are useful for: + +- Input validation and normalization +- Adding system prompts or context +- Compressing long conversations +- Preprocessing and formatting + +[View Filters →](./filters/README.md) + +### 🎬 Actions (`/actions`) + +Actions are custom functionalities triggered from chat. They are useful for: + +- Generating outputs (mind maps, charts, etc.) +- Interacting with external APIs +- Data transformations +- File operations and exports +- Complex workflows + +[View Actions →](./actions/README.md) + +### 📤 Pipes (`/pipes`) + +Pipes process LLM responses after generation. They are useful for: + +- Response formatting +- Content enhancement +- Translation and transformation +- Response filtering +- Integration with external services + +[View Pipes →](./pipes/README.md) + +## 🚀 Quick Start + +### Installing Plugins + +1. **Download** the desired plugin file (`.py`) +2. **Open** OpenWebUI Admin Settings → Plugins +3. **Select** the plugin type (Filters, Actions, or Pipes) +4. **Upload** the file +5. **Refresh** the page +6. **Configure** in chat settings + +### Using Plugins + +- **Filters**: Automatically applied to all inputs when enabled +- **Actions**: Selected manually from the actions menu during chat +- **Pipes**: Automatically applied to all responses when enabled + +## 📚 Plugin Documentation + +Each plugin directory contains: + +- Plugin code (`.py` files) +- English documentation (`README.md`) +- Chinese documentation (`README_CN.md`) +- Configuration and usage guides + +## 🛠️ Plugin Development + +To create a new plugin: + +1. Choose the plugin type (Filter, Action, or Pipe) +2. Navigate to the corresponding directory +3. Create a new folder for your plugin +4. Write the plugin code with clear documentation +5. Create `README.md` and `README_CN.md` +6. Update the main README in that directory + +### Plugin Structure Template + +```python +plugins/ +├── filters/ +│ ├── my_filter/ +│ │ ├── my_filter.py # Plugin code +│ │ ├── my_filter_cn.py # Optional: Chinese version +│ │ ├── README.md # Documentation +│ │ └── README_CN.md # Chinese documentation +│ └── README.md +├── actions/ +│ ├── my_action/ +│ │ ├── my_action.py +│ │ ├── README.md +│ │ └── README_CN.md +│ └── README.md +└── pipes/ + ├── my_pipe/ + │ ├── my_pipe.py + │ ├── README.md + │ └── README_CN.md + └── README.md +``` + +## 📋 Documentation Checklist + +Each plugin should include: + +- [ ] Clear feature description +- [ ] Configuration parameters with defaults +- [ ] Installation and setup instructions +- [ ] Usage examples +- [ ] Troubleshooting guide +- [ ] Performance considerations +- [ ] Version and author information + +--- + +> **Note**: For detailed information about each plugin type, see the respective README files in each plugin type directory. diff --git a/plugins/README_CN.md b/plugins/README_CN.md new file mode 100644 index 0000000..55813d2 --- /dev/null +++ b/plugins/README_CN.md @@ -0,0 +1,124 @@ +# Plugins(插件) + +[English](./README.md) | 中文 + +此目录包含 OpenWebUI 的三种类型的插件: + +- **Filters(过滤器)**: 在将用户输入发送给 LLM 前进行处理 +- **Actions(动作)**: 从聊天中触发自定义功能 +- **Pipes(管道)**: 在显示给用户前增强 LLM 响应 + +## 📦 插件类型概览 + +### 🔧 Filters(过滤器)(`/filters`) + +过滤器在用户输入到达 LLM 前修改它。用途包括: + +- 输入验证和规范化 +- 添加系统提示或上下文 +- 压缩长对话 +- 预处理和格式化 + +[查看过滤器 →](./filters/README_CN.md) + +### 🎬 Actions(动作)(`/actions`) + +动作是从聊天中触发的自定义功能。用途包括: + +- 生成输出(思维导图、图表等) +- 与外部 API 交互 +- 数据转换 +- 文件操作和导出 +- 复杂工作流程 + +[查看动作 →](./actions/README_CN.md) + +### 📤 Pipes(管道)(`/pipes`) + +管道在 LLM 生成响应后处理它。用途包括: + +- 响应格式化 +- 内容增强 +- 翻译和转换 +- 响应过滤 +- 与外部服务集成 + +[查看管道 →](./pipes/README_CN.md) + +## 🚀 快速开始 + +### 安装插件 + +1. **下载**所需的插件文件(`.py`) +2. **打开** OpenWebUI 管理员设置 → 插件(Plugins) +3. **选择**插件类型(Filters、Actions 或 Pipes) +4. **上传**文件 +5. **刷新**页面 +6. **配置**聊天设置中的参数 + +### 使用插件 + +- **Filters(过滤器)**: 启用后自动应用于所有输入 +- **Actions(动作)**: 在聊天时从动作菜单手动选择 +- **Pipes(管道)**: 启用后自动应用于所有响应 + +## 📚 插件文档 + +每个插件目录包含: + +- 插件代码(`.py` 文件) +- 英文文档(`README.md`) +- 中文文档(`README_CN.md`) +- 配置和使用指南 + +## 🛠️ 插件开发 + +要创建新插件: + +1. 选择插件类型(Filter、Action 或 Pipe) +2. 导航到对应的目录 +3. 为插件创建新文件夹 +4. 编写清晰记录的插件代码 +5. 创建 `README.md` 和 `README_CN.md` +6. 更新该目录中的主 README + +### 插件结构模板 + +```python +plugins/ +├── filters/ +│ ├── my_filter/ +│ │ ├── my_filter.py # 插件代码 +│ │ ├── my_filter_cn.py # 可选:中文版本 +│ │ ├── README.md # 文档 +│ │ └── README_CN.md # 中文文档 +│ └── README.md +├── actions/ +│ ├── my_action/ +│ │ ├── my_action.py +│ │ ├── README.md +│ │ └── README_CN.md +│ └── README.md +└── pipes/ + ├── my_pipe/ + │ ├── my_pipe.py + │ ├── README.md + │ └── README_CN.md + └── README.md +``` + +## 📋 文档检查清单 + +每个插件应包含: + +- [ ] 清晰的功能描述 +- [ ] 配置参数及默认值 +- [ ] 安装和设置说明 +- [ ] 使用示例 +- [ ] 故障排除指南 +- [ ] 性能考虑 +- [ ] 版本和作者信息 + +--- + +> **注意**:有关每种插件类型的详细信息,请参阅每个插件类型目录中的相应 README 文件。 diff --git a/plugins/actions/README.md b/plugins/actions/README.md new file mode 100644 index 0000000..fd80fc0 --- /dev/null +++ b/plugins/actions/README.md @@ -0,0 +1,227 @@ +# Actions (Action Plugins) + +English | [中文](./README_CN.md) + +Action plugins allow you to define custom functionalities that can be triggered from chat. This directory contains various action plugins that can be used to extend OpenWebUI functionality. + +## 📋 Action Plugins List + +| Plugin Name | Description | Version | Documentation | +| :--- | :--- | :--- | :--- | +| **Smart Mind Map** | Intelligently analyzes text content and generates interactive mind maps | 0.7.2 | [English](./smart-mind-map/README.md) / [中文](./smart-mind-map/README_CN.md) | +| **Flash Card (闪记卡)** | Quickly generates beautiful learning memory cards, perfect for studying and quick memorization | 0.2.0 | [English](./knowledge-card/README.md) / [中文](./knowledge-card/README_CN.md) | + +## 🎯 What are Action Plugins? + +Action plugins typically used for: + +- Generating specific output formats (such as mind maps, charts, tables, etc.) +- Interacting with external APIs or services +- Performing data transformations and processing +- Saving or exporting content to files +- Creating interactive visualizations +- Automating complex workflows + +## 🚀 Quick Start + +### Installing an Action Plugin + +1. Download the plugin file (`.py`) to your local machine +2. Open OpenWebUI Admin Settings and find the "Plugins" section +3. Select the "Actions" type +4. Upload the downloaded file +5. Refresh the page and enable the plugin in chat settings +6. Use the plugin by selecting it from the available actions in chat + +## 📖 Development Guide + +### Adding a New Action Plugin + +When adding a new action plugin, please follow these steps: + +1. **Create Plugin Directory**: Create a new folder under `plugins/actions/` (e.g., `my_action/`) +2. **Write Plugin Code**: Create a `.py` file with clear documentation of functionality +3. **Write Documentation**: + - Create `README.md` (English version) + - Create `README_CN.md` (Chinese version) + - Include: feature description, configuration, usage examples, and troubleshooting +4. **Update This List**: Add your plugin to the table above + +### Open WebUI Plugin Development Common Features + +When developing Action plugins, you can use the following standard features provided by Open WebUI: + +#### 1. **Plugin Metadata Definition** + +```python +""" +title: Plugin Name +icon_url: data:image/svg+xml;base64,... # Plugin icon (Base64 encoded SVG) +version: 1.0.0 +description: Plugin functionality description +""" +``` + +#### 2. **Valves Configuration System** + +Use Pydantic to define configurable parameters that users can adjust dynamically in the UI: + +```python +from pydantic import BaseModel, Field + +class Valves(BaseModel): + show_status: bool = Field( + default=True, + description="Whether to show status updates" + ) + api_key: str = Field( + default="", + description="API key" + ) +``` + +#### 3. **Standard Action Class Structure** + +```python +class Action: + def __init__(self): + self.valves = self.Valves() + + async def action( + self, + body: dict, + __user__: Optional[Dict[str, Any]] = None, + __event_emitter__: Optional[Any] = None, + __request__: Optional[Request] = None, + ) -> Optional[dict]: + # Plugin logic + return body +``` + +#### 4. **Getting User Information** + +```python +# Supports both dictionary and list formats +user_language = __user__.get("language", "en-US") +user_name = __user__.get("name", "User") +user_id = __user__.get("id", "unknown_user") +``` + +#### 5. **Event Emitter (event_emitter)** + +**Sending notification messages:** + +```python +await __event_emitter__({ + "type": "notification", + "data": { + "type": "info", # info/warning/error/success + "content": "Message content" + } +}) +``` + +**Sending status updates:** + +```python +await __event_emitter__({ + "type": "status", + "data": { + "description": "Status description", + "done": False, # True when completed + "hidden": False # True to hide + } +}) +``` + +#### 6. **Calling Built-in LLM** + +```python +from open_webui.utils.chat import generate_chat_completion +from open_webui.models.users import Users + +# Get user object +user_obj = Users.get_user_by_id(user_id) + +# Build LLM request +llm_payload = { + "model": "model-id", + "messages": [ + {"role": "system", "content": "System prompt"}, + {"role": "user", "content": "User input"} + ], + "temperature": 0.7, + "stream": False +} + +# Call LLM +llm_response = await generate_chat_completion( + __request__, llm_payload, user_obj +) +``` + +#### 7. **Handling Message Body** + +```python +# Read messages +messages = body.get("messages") +user_message = messages[-1]["content"] + +# Modify messages +body["messages"][-1]["content"] = f"{user_message}\n\nAdditional content" + +# Return modified body +return body +``` + +#### 8. **Embedding HTML Content** + +```python +html_content = "
Interactive content
" +html_embed_tag = f"```html\n{html_content}\n```" +body["messages"][-1]["content"] = f"{text}\n\n{html_embed_tag}" +``` + +#### 9. **Async Processing** + +All plugin methods must be asynchronous: + +```python +async def action(...): + await __event_emitter__(...) + result = await some_async_function() + return result +``` + +#### 10. **Error Handling and Logging** + +```python +import logging + +logger = logging.getLogger(__name__) +logging.basicConfig(level=logging.INFO) + +try: + # Plugin logic + pass +except Exception as e: + logger.error(f"Error: {str(e)}", exc_info=True) + await __event_emitter__({ + "type": "notification", + "data": {"type": "error", "content": f"Operation failed: {str(e)}"} + }) +``` + +### Development Best Practices + +1. **Use Valves Configuration**: Allow users to customize plugin behavior +2. **Provide Real-time Feedback**: Use event emitter to inform users of progress +3. **Graceful Error Handling**: Catch exceptions and provide friendly messages +4. **Support Multiple Languages**: Get language preference from `__user__` +5. **Logging**: Record key operations and errors for debugging +6. **Validate Input**: Check required parameters and data formats +7. **Return Complete Body**: Ensure message flow is properly passed + +--- + +> **Contributor Note**: To ensure project quality, please provide clear and complete documentation for each new plugin, including features, configuration, usage examples, and troubleshooting guides. Refer to the common features above when developing your plugins. diff --git a/plugins/actions/README_CN.md b/plugins/actions/README_CN.md new file mode 100644 index 0000000..49ef29d --- /dev/null +++ b/plugins/actions/README_CN.md @@ -0,0 +1,226 @@ +# Actions(动作插件) + +[English](./README.md) | 中文 + +动作插件(Actions)允许您定义可以从聊天中触发的自定义功能。此目录包含可用于扩展 OpenWebUI 功能的各种动作插件。 + +## 📋 动作插件列表 + +| 插件名称 | 描述 | 版本 | 文档 | +| :--- | :--- | :--- | :--- | +| **智绘心图** | 智能分析文本内容,生成交互式思维导图 | 0.7.2 | [中文](./smart-mind-map/README_CN.md) / [English](./smart-mind-map/README.md) | + +## 🎯 什么是动作插件? + +动作插件通常用于: + +- 生成特定格式的输出(如思维导图、图表、表格等) +- 与外部 API 或服务交互 +- 执行数据转换和处理 +- 保存或导出内容到文件 +- 创建交互式可视化 +- 自动化复杂工作流程 + +## 🚀 快速开始 + +### 安装动作插件 + +1. 将插件文件(`.py`)下载到本地 +2. 在 OpenWebUI 管理员设置中,找到"Plugins"部分 +3. 选择"Actions"类型 +4. 上传下载的文件 +5. 刷新页面并在聊天设置中启用插件 +6. 在聊天中从可用动作中选择使用该插件 + +## 📖 开发指南 + +### 添加新动作插件 + +添加新动作插件时,请遵循以下步骤: + +1. **创建插件目录**:在 `plugins/actions/` 下创建新文件夹(例如 `my_action/`) +2. **编写插件代码**:创建 `.py` 文件,清晰记录功能说明 +3. **编写文档**: + - 创建 `README.md`(英文版) + - 创建 `README_CN.md`(中文版) + - 包含:功能说明、配置方法、使用示例和故障排除 +4. **更新此列表**:在上述表格中添加您的插件 + +### Open WebUI 插件开发通用功能 + +开发 Action 插件时,可以使用以下 Open WebUI 提供的标准功能: + +#### 1. **插件元数据定义** + +```python +""" +title: 插件名称 +icon_url: data:image/svg+xml;base64,... # 插件图标(Base64编码的SVG) +version: 1.0.0 +description: 插件功能描述 +""" +``` + +#### 2. **Valves 配置系统** + +使用 Pydantic 定义可配置参数,用户可在 UI 界面动态调整: + +```python +from pydantic import BaseModel, Field + +class Valves(BaseModel): + show_status: bool = Field( + default=True, + description="是否显示状态更新" + ) + api_key: str = Field( + default="", + description="API密钥" + ) +``` + +#### 3. **标准 Action 类结构** + +```python +class Action: + def __init__(self): + self.valves = self.Valves() + + async def action( + self, + body: dict, + __user__: Optional[Dict[str, Any]] = None, + __event_emitter__: Optional[Any] = None, + __request__: Optional[Request] = None, + ) -> Optional[dict]: + # 插件逻辑 + return body +``` + +#### 4. **获取用户信息** + +```python +# 支持字典和列表两种格式 +user_language = __user__.get("language", "en-US") +user_name = __user__.get("name", "User") +user_id = __user__.get("id", "unknown_user") +``` + +#### 5. **事件发射器 (event_emitter)** + +**发送通知消息:** + +```python +await __event_emitter__({ + "type": "notification", + "data": { + "type": "info", # info/warning/error/success + "content": "消息内容" + } +}) +``` + +**发送状态更新:** + +```python +await __event_emitter__({ + "type": "status", + "data": { + "description": "状态描述", + "done": False, # True表示完成 + "hidden": False # True表示隐藏 + } +}) +``` + +#### 6. **调用内置 LLM** + +```python +from open_webui.utils.chat import generate_chat_completion +from open_webui.models.users import Users + +# 获取用户对象 +user_obj = Users.get_user_by_id(user_id) + +# 构建 LLM 请求 +llm_payload = { + "model": "model-id", + "messages": [ + {"role": "system", "content": "系统提示词"}, + {"role": "user", "content": "用户输入"} + ], + "temperature": 0.7, + "stream": False +} + +# 调用 LLM +llm_response = await generate_chat_completion( + __request__, llm_payload, user_obj +) +``` + +#### 7. **处理消息体 (body)** + +```python +# 读取消息 +messages = body.get("messages") +user_message = messages[-1]["content"] + +# 修改消息 +body["messages"][-1]["content"] = f"{user_message}\n\n新增内容" + +# 返回修改后的body +return body +``` + +#### 8. **嵌入 HTML 内容** + +```python +html_content = "
交互式内容
" +html_embed_tag = f"```html\n{html_content}\n```" +body["messages"][-1]["content"] = f"{text}\n\n{html_embed_tag}" +``` + +#### 9. **异步处理** + +所有插件方法必须是异步的: + +```python +async def action(...): + await __event_emitter__(...) + result = await some_async_function() + return result +``` + +#### 10. **错误处理和日志** + +```python +import logging + +logger = logging.getLogger(__name__) +logging.basicConfig(level=logging.INFO) + +try: + # 插件逻辑 + pass +except Exception as e: + logger.error(f"错误: {str(e)}", exc_info=True) + await __event_emitter__({ + "type": "notification", + "data": {"type": "error", "content": f"操作失败: {str(e)}"} + }) +``` + +### 开发最佳实践 + +1. **使用 Valves 配置**:让用户可以自定义插件行为 +2. **提供实时反馈**:使用事件发射器告知用户进度 +3. **优雅的错误处理**:捕获异常并给出友好提示 +4. **支持多语言**:从 `__user__` 获取语言偏好 +5. **日志记录**:记录关键操作和错误,便于调试 +6. **验证输入**:检查必需参数和数据格式 +7. **返回完整的 body**:确保消息流正确传递 + +--- + +> **贡献者注意**:为了确保项目质量,请为每个新增插件提供清晰完整的文档,包括功能说明、配置方法、使用示例和故障排除指南。参考上述通用功能开发您的插件。 diff --git a/plugins/actions/export_to_excel/README.md b/plugins/actions/export_to_excel/README.md new file mode 100644 index 0000000..cf707eb --- /dev/null +++ b/plugins/actions/export_to_excel/README.md @@ -0,0 +1,15 @@ +# Export to Excel + +This plugin allows you to export your chat history to an Excel (.xlsx) file directly from the chat interface. + +## Features + +- **One-Click Export**: Adds an "Export to Excel" button to the chat. +- **Automatic Header Extraction**: Intelligently identifies table headers from the chat content. +- **Multi-Table Support**: Handles multiple tables within a single chat session. + +## Usage + +1. Install the plugin. +2. In any chat, click the "Export to Excel" button. +3. The file will be automatically downloaded to your device. diff --git a/plugins/actions/export_to_excel/README_CN.md b/plugins/actions/export_to_excel/README_CN.md new file mode 100644 index 0000000..cbda3df --- /dev/null +++ b/plugins/actions/export_to_excel/README_CN.md @@ -0,0 +1,15 @@ +# 导出为 Excel + +此插件允许你直接从聊天界面将对话历史导出为 Excel (.xlsx) 文件。 + +## 功能特点 + +- **一键导出**:在聊天界面添加“导出为 Excel”按钮。 +- **自动表头提取**:智能识别聊天内容中的表格标题。 +- **多表支持**:支持处理单次对话中的多个表格。 + +## 使用方法 + +1. 安装插件。 +2. 在任意对话中,点击“导出为 Excel”按钮。 +3. 文件将自动下载到你的设备。 diff --git a/plugins/actions/export_to_excel/export_to_excel.py b/plugins/actions/export_to_excel/export_to_excel.py new file mode 100644 index 0000000..e0fed2d --- /dev/null +++ b/plugins/actions/export_to_excel/export_to_excel.py @@ -0,0 +1,804 @@ +""" +title: 导出到Excel +author: Fu-Jie +description: 从最后一条AI回答消息中提取Markdown表格到Excel文件,并在浏览器中触发下载。支持多表并自动根据标题命名 +icon_url: data:image/svg+xml;base64,PD94bWwgdmVyc2lvbj0iMS4wIiBlbmNvZGluZz0idXRmLTgiPz48IS0tIFVwbG9hZGVkIHRvOiBTVkcgUmVwbywgd3d3LnN2Z3JlcG8uY29tLCBHZW5lcmF0b3I6IFNWRyBSZXBvIE1peGVyIFRvb2xzIC0tPgo8c3ZnIHdpZHRoPSI4MDBweCIgaGVpZ2h0PSI4MDBweCIgdmlld0JveD0iMCAtMS4yNyAxMTAuMDM3IDExMC4wMzciIHhtbG5zPSJodHRwOi8vd3d3LnczLm9yZy8yMDAwL3N2ZyI+PHBhdGggZD0iTTU3LjU1IDBoNy40MjV2MTBjMTIuNTEzIDAgMjUuMDI1LjAyNSAzNy41MzctLjAzOCAyLjExMy4wODcgNC40MzgtLjA2MiA2LjI3NSAxLjIgMS4yODcgMS44NSAxLjEzOCA0LjIgMS4yMjUgNi4zMjUtLjA2MiAyMS43LS4wMzcgNDMuMzg4LS4wMjQgNjUuMDc1LS4wNjIgMy42MzguMzM3IDcuMzUtLjQyNSAxMC45MzgtLjUgMi42LTMuNjI1IDIuNjYyLTUuNzEzIDIuNzUtMTIuOTUuMDM3LTI1LjkxMi0uMDI1LTM4Ljg3NSAwdjExLjI1aC03Ljc2M2MtMTkuMDUtMy40NjMtMzguMTM4LTYuNjYyLTU3LjIxMi0xMFYxMC4wMTNDMTkuMTg4IDYuNjc1IDM4LjM3NSAzLjM4OCA1Ny41NSAweiIgZmlsbD0iIzIwNzI0NSIvPjxwYXRoIGQ9Ik02NC45NzUgMTMuNzVoNDEuMjVWOTIuNWgtNDEuMjVWODVoMTB2LTguNzVoLTEwdi01aDEwVjYyLjVoLTEwdi01aDEwdi04Ljc1aC0xMHYtNWgxMFYzNWgtMTB2LTVoMTB2LTguNzVoLTEwdi03LjV6IiBmaWxsPSIjZmZmZmZmIi8+PHBhdGggZD0iTTc5Ljk3NSAyMS4yNWgxNy41VjMwaC0xNy41di04Ljc1eiIgZmlsbD0iIzIwNzI0NSIvPjxwYXRoIGQ9Ik0zNy4wMjUgMzIuOTYyYzIuODI1LS4yIDUuNjYzLS4zNzUgOC41LS41MTJhMjYwNy4zNDQgMjYwNy4zNDQgMCAwIDEtMTAuMDg3IDIwLjQ4N2MzLjQzOCA3IDYuOTQ5IDEzLjk1IDEwLjM5OSAyMC45NSBhNzE2LjI4IDcxNi4yOCAwIDAgMS05LjAyNC0uNTc1Yy0yLjEyNS01LjIxMy00LjcxMy0xMC4yNS02LjIzOC0xNS43Yy0xLjY5OSA1LjA3NS00LjEyNSA5Ljg2Mi02LjA3NCAxNC44MzgtMi43MzgtLjAzOC01LjQ3Ni0uMTUtOC4yMTMtLjI2M0MxOS41IDY1LjkgMjIuNiA1OS41NjIgMjUuOTEyIDUzLjMxMmMtMi44MTItNi40MzgtNS45LTEyLjc1LTguOC0xOS4xNSAyLjc1LS4xNjMgNS41LS4zMjUgOC4yNS0uNDc1IDEuODYyIDQuODg4IDMuODk5IDkuNzEyIDUuNDM4IDE0LjcyNSAxLjY0OS01LjMxMiA0LjExMi0xMC4zMTIgNi4yMjUtMTUuNDV6IiBmaWxsPSIjZmZmZmZmIi8+PHBhdGggZD0iTTc5Ljk3NSAzNWgxNy41djguNzVoLTE3LjVWMzV6TTc5Ljk3NSA0OC43NWgxNy41djguNzVoLTE3LjV2LTguNzV6TTc5Ljk3NSA2Mi41aDE3LjV2OC43NWgtMTcuNVY2Mi41ek03OS45NzUgNzYuMjVoMTcuNVY4NWgtMTcuNXYtOC43NXoiIGZpbGw9IiMyMDcyNDUiLz48L3N2Zz4= +version: 0.1.0 +""" + +import os +import pandas as pd +import re +import base64 +from fastapi import FastAPI, HTTPException +from typing import Optional, Callable, Awaitable, Any, List, Dict +import datetime + +app = FastAPI() + + +class Action: + + def __init__(self): + pass + + async def _send_notification(self, emitter: Callable, type: str, content: str): + await emitter( + {"type": "notification", "data": {"type": type, "content": content}} + ) + + async def action( + self, + body: dict, + __user__=None, + __event_emitter__=None, + __event_call__: Optional[Callable[[Any], Awaitable[None]]] = None, + ): + print(f"action:{__name__}") + if isinstance(__user__, (list, tuple)): + user_language = ( + __user__[0].get("language", "zh-CN") if __user__ else "zh-CN" + ) + user_name = __user__[0].get("name", "用户") if __user__[0] else "用户" + user_id = ( + __user__[0]["id"] + if __user__ and "id" in __user__[0] + else "unknown_user" + ) + elif isinstance(__user__, dict): + user_language = __user__.get("language", "zh-CN") + user_name = __user__.get("name", "用户") + user_id = __user__.get("id", "unknown_user") + + if __event_emitter__: + last_assistant_message = body["messages"][-1] + + await __event_emitter__( + { + "type": "status", + "data": {"description": "正在保存到文件...", "done": False}, + } + ) + + try: + message_content = last_assistant_message["content"] + tables = self.extract_tables_from_message(message_content) + + if not tables: + raise HTTPException(status_code=400, detail="未找到任何表格。") + + # 获取动态文件名和sheet名称 + workbook_name, sheet_names = self.generate_names_from_content( + message_content, tables + ) + + # 使用优化后的文件名生成逻辑 + current_datetime = datetime.datetime.now() + formatted_date = current_datetime.strftime("%Y%m%d") + + # 如果没找到标题则使用 user_yyyymmdd 格式 + if not workbook_name: + workbook_name = f"{user_name}_{formatted_date}" + + filename = f"{workbook_name}.xlsx" + excel_file_path = os.path.join( + "app", "backend", "data", "temp", filename + ) + + os.makedirs(os.path.dirname(excel_file_path), exist_ok=True) + + # 保存表格到Excel(使用符合中国规范的格式化功能) + self.save_tables_to_excel_enhanced(tables, excel_file_path, sheet_names) + + # 触发文件下载 + if __event_call__: + with open(excel_file_path, "rb") as file: + file_content = file.read() + base64_blob = base64.b64encode(file_content).decode("utf-8") + + await __event_call__( + { + "type": "execute", + "data": { + "code": f""" + try {{ + const base64Data = "{base64_blob}"; + const binaryData = atob(base64Data); + const arrayBuffer = new Uint8Array(binaryData.length); + for (let i = 0; i < binaryData.length; i++) {{ + arrayBuffer[i] = binaryData.charCodeAt(i); + }} + const blob = new Blob([arrayBuffer], {{ type: "application/vnd.openxmlformats-officedocument.spreadsheetml.sheet" }}); + const filename = "{filename}"; + + const url = URL.createObjectURL(blob); + const a = document.createElement("a"); + a.style.display = "none"; + a.href = url; + a.download = filename; + document.body.appendChild(a); + a.click(); + URL.revokeObjectURL(url); + document.body.removeChild(a); + }} catch (error) {{ + console.error('触发下载时出错:', error); + }} + """ + }, + } + ) + await __event_emitter__( + { + "type": "status", + "data": {"description": "输出已保存", "done": True}, + } + ) + + # 清理临时文件 + if os.path.exists(excel_file_path): + os.remove(excel_file_path) + + return {"message": "下载事件已触发"} + + except HTTPException as e: + print(f"Error processing tables: {str(e.detail)}") + await __event_emitter__( + { + "type": "status", + "data": { + "description": f"保存文件时出错: {e.detail}", + "done": True, + }, + } + ) + await self._send_notification( + __event_emitter__, "error", "没有找到可以导出的表格!" + ) + raise e + except Exception as e: + print(f"Error processing tables: {str(e)}") + await __event_emitter__( + { + "type": "status", + "data": { + "description": f"保存文件时出错: {str(e)}", + "done": True, + }, + } + ) + await self._send_notification( + __event_emitter__, "error", "没有找到可以导出的表格!" + ) + + def extract_tables_from_message(self, message: str) -> List[Dict]: + """ + 从消息文本中提取Markdown表格及位置信息 + 返回结构: [{ + "data": 表格数据, + "start_line": 起始行号, + "end_line": 结束行号 + }] + """ + table_row_pattern = r"^\s*\|.*\|.*\s*$" + rows = message.split("\n") + tables = [] + current_table = [] + start_line = None + current_line = 0 + + for row in rows: + current_line += 1 + if re.search(table_row_pattern, row): + if start_line is None: + start_line = current_line # 记录表格起始行 + + # 处理表格行 + cells = [cell.strip() for cell in row.strip().strip("|").split("|")] + + # 跳过分隔行 + is_separator_row = all(re.fullmatch(r"[:\-]+", cell) for cell in cells) + if not is_separator_row: + current_table.append(cells) + elif current_table: + # 表格结束 + tables.append( + { + "data": current_table, + "start_line": start_line, + "end_line": current_line - 1, + } + ) + current_table = [] + start_line = None + + # 处理最后一个表格 + if current_table: + tables.append( + { + "data": current_table, + "start_line": start_line, + "end_line": current_line, + } + ) + + return tables + + def generate_names_from_content(self, content: str, tables: List[Dict]) -> tuple: + """ + 根据内容生成工作簿名称和sheet名称 + - 忽略非空段落,只使用 markdown 标题 (h1-h6)。 + - 单表格: 使用最近的标题作为工作簿和工作表名。 + - 多表格: 使用文档第一个标题作为工作簿名,各表格最近的标题作为工作表名。 + - 默认命名: + - 工作簿: 在主流程中处理 (user_yyyymmdd.xlsx)。 + - 工作表: 表1, 表2, ... + """ + lines = content.split("\n") + workbook_name = "" + sheet_names = [] + all_headers = [] + + # 1. 查找文档中所有 h1-h6 标题及其位置 + for i, line in enumerate(lines): + if re.match(r"^#{1,6}\s+", line): + all_headers.append( + {"text": re.sub(r"^#{1,6}\s+", "", line).strip(), "line_num": i} + ) + + # 2. 为每个表格生成 sheet 名称 + for i, table in enumerate(tables): + table_start_line = table["start_line"] - 1 # 转换为 0-based 索引 + closest_header_text = None + + # 查找当前表格上方最近的标题 + candidate_headers = [ + h for h in all_headers if h["line_num"] < table_start_line + ] + if candidate_headers: + # 找到候选标题中行号最大的,即为最接近的 + closest_header = max(candidate_headers, key=lambda x: x["line_num"]) + closest_header_text = closest_header["text"] + + if closest_header_text: + # 清理并添加找到的标题 + sheet_names.append(self.clean_sheet_name(closest_header_text)) + else: + # 如果找不到标题,使用默认名称 "表{i+1}" + sheet_names.append(f"表{i+1}") + + # 3. 根据表格数量确定工作簿名称 + if len(tables) == 1: + # 单个表格: 使用其工作表名作为工作簿名 (前提是该名称不是默认的 "表1") + if sheet_names[0] != "表1": + workbook_name = sheet_names[0] + elif len(tables) > 1: + # 多个表格: 使用文档中的第一个标题作为工作簿名 + if all_headers: + # 找到所有标题中行号最小的,即为第一个标题 + first_header = min(all_headers, key=lambda x: x["line_num"]) + workbook_name = first_header["text"] + + # 4. 清理工作簿名称 (如果为空,主流程会使用默认名称) + workbook_name = self.clean_filename(workbook_name) if workbook_name else "" + + return workbook_name, sheet_names + + def clean_filename(self, name: str) -> str: + """清理文件名中的非法字符""" + return re.sub(r'[\\/*?:"<>|]', "", name).strip() + + def clean_sheet_name(self, name: str) -> str: + """清理sheet名称(限制31字符,去除非法字符)""" + name = re.sub(r"[\\/*?[\]:]", "", name).strip() + return name[:31] if len(name) > 31 else name + + # ======================== 符合中国规范的格式化功能 ======================== + + def calculate_text_width(self, text: str) -> float: + """ + 计算文本显示宽度,考虑中英文字符差异 + 中文字符按2个单位计算,英文字符按1个单位计算 + """ + if not text: + return 0 + + width = 0 + for char in str(text): + # 判断是否为中文字符(包括中文标点) + if "\u4e00" <= char <= "\u9fff" or "\u3000" <= char <= "\u303f": + width += 2 # 中文字符占2个单位宽度 + else: + width += 1 # 英文字符占1个单位宽度 + + return width + + def calculate_text_height(self, text: str, max_width: int = 50) -> int: + """ + 计算文本显示所需的行数 + 根据换行符和文本长度计算 + """ + if not text: + return 1 + + text = str(text) + # 计算换行符导致的行数 + explicit_lines = text.count("\n") + 1 + + # 计算因文本长度超出而需要的额外行数 + text_width = self.calculate_text_width(text.replace("\n", "")) + wrapped_lines = max( + 1, int(text_width / max_width) + (1 if text_width % max_width > 0 else 0) + ) + + return max(explicit_lines, wrapped_lines) + + def get_column_letter(self, col_index: int) -> str: + """ + 将列索引转换为Excel列字母 (A, B, C, ..., AA, AB, ...) + """ + result = "" + while col_index >= 0: + result = chr(65 + col_index % 26) + result + col_index = col_index // 26 - 1 + return result + + def determine_content_type(self, header: str, values: list) -> str: + """ + 根据表头和内容智能判断数据类型,符合中国官方表格规范 + 返回: 'number', 'date', 'sequence', 'text' + """ + header_lower = str(header).lower().strip() + + # 检查表头关键词 + number_keywords = [ + "数量", + "金额", + "价格", + "费用", + "成本", + "收入", + "支出", + "总计", + "小计", + "百分比", + "%", + "比例", + "率", + "数值", + "分数", + "成绩", + "得分", + ] + date_keywords = ["日期", "时间", "年份", "月份", "时刻", "date", "time"] + sequence_keywords = [ + "序号", + "编号", + "号码", + "排序", + "次序", + "顺序", + "id", + "编码", + ] + + # 检查表头 + for keyword in number_keywords: + if keyword in header_lower: + return "number" + + for keyword in date_keywords: + if keyword in header_lower: + return "date" + + for keyword in sequence_keywords: + if keyword in header_lower: + return "sequence" + + # 检查数据内容 + if not values: + return "text" + + sample_values = [ + str(v).strip() for v in values[:10] if str(v).strip() + ] # 取前10个非空值作为样本 + if not sample_values: + return "text" + + numeric_count = 0 + date_count = 0 + sequence_count = 0 + + for value in sample_values: + # 检查是否为数字 + try: + float( + value.replace(",", "") + .replace(",", "") + .replace("%", "") + .replace("%", "") + ) + numeric_count += 1 + continue + except ValueError: + pass + + # 检查是否为日期格式 + date_patterns = [ + r"\d{4}[-/年]\d{1,2}[-/月]\d{1,2}日?", + r"\d{1,2}[-/]\d{1,2}[-/]\d{4}", + r"\d{4}\d{2}\d{2}", + ] + for pattern in date_patterns: + if re.match(pattern, value): + date_count += 1 + break + + # 检查是否为序号格式 + if ( + re.match(r"^\d+$", value) and len(value) <= 4 + ): # 纯数字且不超过4位,可能是序号 + sequence_count += 1 + + total_count = len(sample_values) + + # 根据比例判断类型 + if numeric_count / total_count >= 0.7: + return "number" + elif date_count / total_count >= 0.7: + return "date" + elif sequence_count / total_count >= 0.8 and sequence_count > 2: + return "sequence" + else: + return "text" + + def get_column_letter(self, col_index: int) -> str: + """ + 将列索引转换为Excel列字母 (A, B, C, ..., AA, AB, ...) + """ + result = "" + while col_index >= 0: + result = chr(65 + col_index % 26) + result + col_index = col_index // 26 - 1 + return result + + def save_tables_to_excel_enhanced( + self, tables: List[Dict], file_path: str, sheet_names: List[str] + ): + """ + 符合中国官方表格规范的Excel保存功能 + """ + try: + with pd.ExcelWriter(file_path, engine="xlsxwriter") as writer: + workbook = writer.book + + # 定义表头样式 - 居中对齐(符合中国规范) + header_format = workbook.add_format( + { + "bold": True, + "font_size": 12, + "font_color": "white", + "bg_color": "#00abbd", + "border": 1, + "align": "center", # 表头居中 + "valign": "vcenter", + "text_wrap": True, + } + ) + + # 文本单元格样式 - 左对齐 + text_format = workbook.add_format( + { + "border": 1, + "align": "left", # 文本左对齐 + "valign": "vcenter", + "text_wrap": True, + } + ) + + # 数值单元格样式 - 右对齐 + number_format = workbook.add_format( + {"border": 1, "align": "right", "valign": "vcenter"} # 数值右对齐 + ) + + # 整数格式 - 右对齐 + integer_format = workbook.add_format( + { + "num_format": "0", + "border": 1, + "align": "right", # 整数右对齐 + "valign": "vcenter", + } + ) + + # 小数格式 - 右对齐 + decimal_format = workbook.add_format( + { + "num_format": "0.00", + "border": 1, + "align": "right", # 小数右对齐 + "valign": "vcenter", + } + ) + + # 日期格式 - 居中对齐 + date_format = workbook.add_format( + { + "border": 1, + "align": "center", # 日期居中对齐 + "valign": "vcenter", + "text_wrap": True, + } + ) + + # 序号格式 - 居中对齐 + sequence_format = workbook.add_format( + { + "border": 1, + "align": "center", # 序号居中对齐 + "valign": "vcenter", + } + ) + + for i, table in enumerate(tables): + try: + table_data = table["data"] + if not table_data or len(table_data) < 1: + print(f"Skipping empty table at index {i}") + continue + + print(f"Processing table {i+1} with {len(table_data)} rows") + + # 获取sheet名称 + sheet_name = ( + sheet_names[i] if i < len(sheet_names) else f"表{i+1}" + ) + + # 创建DataFrame + headers = [ + str(cell).strip() + for cell in table_data[0] + if str(cell).strip() + ] + if not headers: + print(f"Warning: No valid headers found for table {i+1}") + headers = [f"列{j+1}" for j in range(len(table_data[0]))] + + data_rows = [] + if len(table_data) > 1: + max_cols = len(headers) + for row in table_data[1:]: + processed_row = [] + for j in range(max_cols): + if j < len(row): + processed_row.append(str(row[j])) + else: + processed_row.append("") + data_rows.append(processed_row) + df = pd.DataFrame(data_rows, columns=headers) + else: + df = pd.DataFrame(columns=headers) + + print(f"DataFrame created with columns: {list(df.columns)}") + + # 修复pandas FutureWarning - 使用try-except替代errors='ignore' + for col in df.columns: + try: + df[col] = pd.to_numeric(df[col]) + except (ValueError, TypeError): + pass + + # 先写入数据(不包含表头) + df.to_excel( + writer, + sheet_name=sheet_name, + index=False, + header=False, + startrow=1, + ) + worksheet = writer.sheets[sheet_name] + + # 应用符合中国规范的格式化 + self.apply_chinese_standard_formatting( + worksheet, + df, + headers, + workbook, + header_format, + text_format, + number_format, + integer_format, + decimal_format, + date_format, + sequence_format, + ) + + except Exception as e: + print(f"Error processing table {i+1}: {str(e)}") + continue + + except Exception as e: + print(f"Error saving Excel file: {str(e)}") + raise + + def apply_chinese_standard_formatting( + self, + worksheet, + df, + headers, + workbook, + header_format, + text_format, + number_format, + integer_format, + decimal_format, + date_format, + sequence_format, + ): + """ + 应用符合中国官方表格规范的格式化 + - 表头: 居中对齐 + - 数值: 右对齐 + - 文本: 左对齐 + - 日期: 居中对齐 + - 序号: 居中对齐 + """ + try: + # 1. 写入表头(居中对齐) + print(f"Writing headers with Chinese standard alignment: {headers}") + for col_idx, header in enumerate(headers): + if header and str(header).strip(): + worksheet.write(0, col_idx, str(header).strip(), header_format) + else: + default_header = f"列{col_idx+1}" + worksheet.write(0, col_idx, default_header, header_format) + + # 2. 分析每列的数据类型并应用相应格式 + column_types = {} + for col_idx, column in enumerate(headers): + if col_idx < len(df.columns): + column_values = df.iloc[:, col_idx].tolist() + column_types[col_idx] = self.determine_content_type( + column, column_values + ) + print( + f"Column '{column}' determined as type: {column_types[col_idx]}" + ) + else: + column_types[col_idx] = "text" + + # 3. 写入并格式化数据(根据类型使用不同对齐方式) + for row_idx, row in df.iterrows(): + for col_idx, value in enumerate(row): + content_type = column_types.get(col_idx, "text") + + # 根据内容类型选择格式 + if content_type == "number": + # 数值类型 - 右对齐 + if pd.api.types.is_numeric_dtype(df.iloc[:, col_idx]): + if pd.api.types.is_integer_dtype(df.iloc[:, col_idx]): + current_format = integer_format + else: + try: + numeric_value = float(value) + if numeric_value.is_integer(): + current_format = integer_format + value = int(numeric_value) + else: + current_format = decimal_format + except (ValueError, TypeError): + current_format = decimal_format + else: + current_format = number_format + + elif content_type == "date": + # 日期类型 - 居中对齐 + current_format = date_format + + elif content_type == "sequence": + # 序号类型 - 居中对齐 + current_format = sequence_format + + else: + # 文本类型 - 左对齐 + current_format = text_format + + worksheet.write(row_idx + 1, col_idx, value, current_format) + + # 4. 自动调整列宽 + for col_idx, column in enumerate(headers): + col_letter = self.get_column_letter(col_idx) + + # 计算表头宽度 + header_width = self.calculate_text_width(str(column)) + + # 计算数据列的最大宽度 + max_data_width = 0 + if not df.empty and col_idx < len(df.columns): + for value in df.iloc[:, col_idx]: + value_width = self.calculate_text_width(str(value)) + max_data_width = max(max_data_width, value_width) + + # 基础宽度:取表头和数据的最大宽度 + base_width = max(header_width, max_data_width) + + # 根据内容类型调整宽度 + content_type = column_types.get(col_idx, "text") + if content_type == "sequence": + # 序号列通常比较窄 + optimal_width = max(8, min(15, base_width + 2)) + elif content_type == "number": + # 数值列需要额外空间显示数字 + optimal_width = max(12, min(25, base_width + 3)) + elif content_type == "date": + # 日期列需要固定宽度 + optimal_width = max(15, min(20, base_width + 2)) + else: + # 文本列根据内容调整 + if base_width <= 10: + optimal_width = base_width + 3 + elif base_width <= 20: + optimal_width = base_width + 4 + else: + optimal_width = base_width + 5 + optimal_width = max(10, min(60, optimal_width)) + + worksheet.set_column(f"{col_letter}:{col_letter}", optimal_width) + + # 5. 自动调整行高 + # 设置表头行高为35点 + worksheet.set_row(0, 35) + + # 设置数据行行高 + for row_idx, row in df.iterrows(): + max_row_height = 20 # 中国表格规范建议的最小行高 + + for col_idx, value in enumerate(row): + if col_idx < len(headers): + col_width = min( + 60, + max( + 10, self.calculate_text_width(str(headers[col_idx])) + 5 + ), + ) + else: + col_width = 15 + + cell_lines = self.calculate_text_height(str(value), col_width) + cell_height = cell_lines * 20 # 每行20点高度,符合中国规范 + + max_row_height = max(max_row_height, cell_height) + + final_height = min(120, max_row_height) + worksheet.set_row(row_idx + 1, final_height) + + print(f"Successfully applied Chinese standard formatting") + + except Exception as e: + print(f"Warning: Failed to apply Chinese standard formatting: {str(e)}") + # 降级到基础格式化 + self.apply_basic_formatting_fallback(worksheet, df) + + def apply_basic_formatting_fallback(self, worksheet, df): + """ + 基础格式化降级方案 + """ + try: + # 基础列宽调整 + for i, column in enumerate(df.columns): + column_width = ( + max( + len(str(column)), + (df[column].astype(str).map(len).max() if not df.empty else 0), + ) + + 2 + ) + + col_letter = self.get_column_letter(i) + worksheet.set_column( + f"{col_letter}:{col_letter}", min(60, max(10, column_width)) + ) + + print("Applied basic formatting fallback") + + except Exception as e: + print(f"Warning: Even basic formatting failed: {str(e)}") diff --git a/plugins/actions/export_to_excel/export_to_excel_cn.py b/plugins/actions/export_to_excel/export_to_excel_cn.py new file mode 100644 index 0000000..07f3fc7 --- /dev/null +++ b/plugins/actions/export_to_excel/export_to_excel_cn.py @@ -0,0 +1,806 @@ +""" +title: 导出为 Excel +author: Antigravity +author_url: https://github.com/open-webui +funding_url: https://github.com/open-webui +version: 0.3.3 +icon_url: data:image/svg+xml;base64,PHN2ZyB4bWxucz0iaHR0cDovL3d3dy53My5vcmcvMjAwMC9zdmciIHdpZHRoPSIyNCIgaGVpZ2h0PSIyNCIgdmlld0JveD0iMCAwIDI0IDI0IiBmaWxsPSJub25lIiBzdHJva2U9ImN1cnJlbnRDb2xvciIgc3Ryb2tlLXdpZHRoPSIyIiBzdHJva2UtbGluZWNhcD0icm91bmQiIHN0cm9rZS1saW5lam9pbj0icm91bmQiPjxwYXRoIGQ9Ik0xNCAyaDZhMiAyIDAgMCAxIDIgMnYxNmEyIDIgMCAwIDEtMiAyaC02YTIgMiAwIDAgMS0yLTJ2LTVhMiAyIDAgMCAxLTItMnYtNSIvPjxwb2x5bGluZSBwb2ludHM9IjE0IDIgMTQgOCAyMCA4Ii8+PHBhdGggZD0iTTE2IDEzdjgiLz48cGF0aCBkPSJNOCAxM3Y4Ii8+PHBhdGggZD0iTTEyIDEzdjgiLz48cGF0aCBkPSJNMTYgMTdoLTgiLz48cGF0aCBkPSJNMTYgMjFoLTgiLz48cGF0aCBkPSJNMTYgMTNoLTgiLz48L3N2Zz4= +description: 将当前对话历史导出为 Excel (.xlsx) 文件,支持自动提取表头。 +""" + +import os +import pandas as pd +import re +import base64 +from fastapi import FastAPI, HTTPException +from typing import Optional, Callable, Awaitable, Any, List, Dict +import datetime + +app = FastAPI() + + +class Action: + + def __init__(self): + pass + + async def _send_notification(self, emitter: Callable, type: str, content: str): + await emitter( + {"type": "notification", "data": {"type": type, "content": content}} + ) + + async def action( + self, + body: dict, + __user__=None, + __event_emitter__=None, + __event_call__: Optional[Callable[[Any], Awaitable[None]]] = None, + ): + print(f"action:{__name__}") + if isinstance(__user__, (list, tuple)): + user_language = ( + __user__[0].get("language", "zh-CN") if __user__ else "zh-CN" + ) + user_name = __user__[0].get("name", "用户") if __user__[0] else "用户" + user_id = ( + __user__[0]["id"] + if __user__ and "id" in __user__[0] + else "unknown_user" + ) + elif isinstance(__user__, dict): + user_language = __user__.get("language", "zh-CN") + user_name = __user__.get("name", "用户") + user_id = __user__.get("id", "unknown_user") + + if __event_emitter__: + last_assistant_message = body["messages"][-1] + + await __event_emitter__( + { + "type": "status", + "data": {"description": "正在保存到文件...", "done": False}, + } + ) + + try: + message_content = last_assistant_message["content"] + tables = self.extract_tables_from_message(message_content) + + if not tables: + raise HTTPException(status_code=400, detail="未找到任何表格。") + + # 获取动态文件名和sheet名称 + workbook_name, sheet_names = self.generate_names_from_content( + message_content, tables + ) + + # 使用优化后的文件名生成逻辑 + current_datetime = datetime.datetime.now() + formatted_date = current_datetime.strftime("%Y%m%d") + + # 如果没找到标题则使用 user_yyyymmdd 格式 + if not workbook_name: + workbook_name = f"{user_name}_{formatted_date}" + + filename = f"{workbook_name}.xlsx" + excel_file_path = os.path.join( + "app", "backend", "data", "temp", filename + ) + + os.makedirs(os.path.dirname(excel_file_path), exist_ok=True) + + # 保存表格到Excel(使用符合中国规范的格式化功能) + self.save_tables_to_excel_enhanced(tables, excel_file_path, sheet_names) + + # 触发文件下载 + if __event_call__: + with open(excel_file_path, "rb") as file: + file_content = file.read() + base64_blob = base64.b64encode(file_content).decode("utf-8") + + await __event_call__( + { + "type": "execute", + "data": { + "code": f""" + try {{ + const base64Data = "{base64_blob}"; + const binaryData = atob(base64Data); + const arrayBuffer = new Uint8Array(binaryData.length); + for (let i = 0; i < binaryData.length; i++) {{ + arrayBuffer[i] = binaryData.charCodeAt(i); + }} + const blob = new Blob([arrayBuffer], {{ type: "application/vnd.openxmlformats-officedocument.spreadsheetml.sheet" }}); + const filename = "{filename}"; + + const url = URL.createObjectURL(blob); + const a = document.createElement("a"); + a.style.display = "none"; + a.href = url; + a.download = filename; + document.body.appendChild(a); + a.click(); + URL.revokeObjectURL(url); + document.body.removeChild(a); + }} catch (error) {{ + console.error('触发下载时出错:', error); + }} + """ + }, + } + ) + await __event_emitter__( + { + "type": "status", + "data": {"description": "输出已保存", "done": True}, + } + ) + + # 清理临时文件 + if os.path.exists(excel_file_path): + os.remove(excel_file_path) + + return {"message": "下载事件已触发"} + + except HTTPException as e: + print(f"Error processing tables: {str(e.detail)}") + await __event_emitter__( + { + "type": "status", + "data": { + "description": f"保存文件时出错: {e.detail}", + "done": True, + }, + } + ) + await self._send_notification( + __event_emitter__, "error", "没有找到可以导出的表格!" + ) + raise e + except Exception as e: + print(f"Error processing tables: {str(e)}") + await __event_emitter__( + { + "type": "status", + "data": { + "description": f"保存文件时出错: {str(e)}", + "done": True, + }, + } + ) + await self._send_notification( + __event_emitter__, "error", "没有找到可以导出的表格!" + ) + + def extract_tables_from_message(self, message: str) -> List[Dict]: + """ + 从消息文本中提取Markdown表格及位置信息 + 返回结构: [{ + "data": 表格数据, + "start_line": 起始行号, + "end_line": 结束行号 + }] + """ + table_row_pattern = r"^\s*\|.*\|.*\s*$" + rows = message.split("\n") + tables = [] + current_table = [] + start_line = None + current_line = 0 + + for row in rows: + current_line += 1 + if re.search(table_row_pattern, row): + if start_line is None: + start_line = current_line # 记录表格起始行 + + # 处理表格行 + cells = [cell.strip() for cell in row.strip().strip("|").split("|")] + + # 跳过分隔行 + is_separator_row = all(re.fullmatch(r"[:\-]+", cell) for cell in cells) + if not is_separator_row: + current_table.append(cells) + elif current_table: + # 表格结束 + tables.append( + { + "data": current_table, + "start_line": start_line, + "end_line": current_line - 1, + } + ) + current_table = [] + start_line = None + + # 处理最后一个表格 + if current_table: + tables.append( + { + "data": current_table, + "start_line": start_line, + "end_line": current_line, + } + ) + + return tables + + def generate_names_from_content(self, content: str, tables: List[Dict]) -> tuple: + """ + 根据内容生成工作簿名称和sheet名称 + - 忽略非空段落,只使用 markdown 标题 (h1-h6)。 + - 单表格: 使用最近的标题作为工作簿和工作表名。 + - 多表格: 使用文档第一个标题作为工作簿名,各表格最近的标题作为工作表名。 + - 默认命名: + - 工作簿: 在主流程中处理 (user_yyyymmdd.xlsx)。 + - 工作表: 表1, 表2, ... + """ + lines = content.split("\n") + workbook_name = "" + sheet_names = [] + all_headers = [] + + # 1. 查找文档中所有 h1-h6 标题及其位置 + for i, line in enumerate(lines): + if re.match(r"^#{1,6}\s+", line): + all_headers.append( + {"text": re.sub(r"^#{1,6}\s+", "", line).strip(), "line_num": i} + ) + + # 2. 为每个表格生成 sheet 名称 + for i, table in enumerate(tables): + table_start_line = table["start_line"] - 1 # 转换为 0-based 索引 + closest_header_text = None + + # 查找当前表格上方最近的标题 + candidate_headers = [ + h for h in all_headers if h["line_num"] < table_start_line + ] + if candidate_headers: + # 找到候选标题中行号最大的,即为最接近的 + closest_header = max(candidate_headers, key=lambda x: x["line_num"]) + closest_header_text = closest_header["text"] + + if closest_header_text: + # 清理并添加找到的标题 + sheet_names.append(self.clean_sheet_name(closest_header_text)) + else: + # 如果找不到标题,使用默认名称 "表{i+1}" + sheet_names.append(f"表{i+1}") + + # 3. 根据表格数量确定工作簿名称 + if len(tables) == 1: + # 单个表格: 使用其工作表名作为工作簿名 (前提是该名称不是默认的 "表1") + if sheet_names[0] != "表1": + workbook_name = sheet_names[0] + elif len(tables) > 1: + # 多个表格: 使用文档中的第一个标题作为工作簿名 + if all_headers: + # 找到所有标题中行号最小的,即为第一个标题 + first_header = min(all_headers, key=lambda x: x["line_num"]) + workbook_name = first_header["text"] + + # 4. 清理工作簿名称 (如果为空,主流程会使用默认名称) + workbook_name = self.clean_filename(workbook_name) if workbook_name else "" + + return workbook_name, sheet_names + + def clean_filename(self, name: str) -> str: + """清理文件名中的非法字符""" + return re.sub(r'[\\/*?:"<>|]', "", name).strip() + + def clean_sheet_name(self, name: str) -> str: + """清理sheet名称(限制31字符,去除非法字符)""" + name = re.sub(r"[\\/*?[\]:]", "", name).strip() + return name[:31] if len(name) > 31 else name + + # ======================== 符合中国规范的格式化功能 ======================== + + def calculate_text_width(self, text: str) -> float: + """ + 计算文本显示宽度,考虑中英文字符差异 + 中文字符按2个单位计算,英文字符按1个单位计算 + """ + if not text: + return 0 + + width = 0 + for char in str(text): + # 判断是否为中文字符(包括中文标点) + if "\u4e00" <= char <= "\u9fff" or "\u3000" <= char <= "\u303f": + width += 2 # 中文字符占2个单位宽度 + else: + width += 1 # 英文字符占1个单位宽度 + + return width + + def calculate_text_height(self, text: str, max_width: int = 50) -> int: + """ + 计算文本显示所需的行数 + 根据换行符和文本长度计算 + """ + if not text: + return 1 + + text = str(text) + # 计算换行符导致的行数 + explicit_lines = text.count("\n") + 1 + + # 计算因文本长度超出而需要的额外行数 + text_width = self.calculate_text_width(text.replace("\n", "")) + wrapped_lines = max( + 1, int(text_width / max_width) + (1 if text_width % max_width > 0 else 0) + ) + + return max(explicit_lines, wrapped_lines) + + def get_column_letter(self, col_index: int) -> str: + """ + 将列索引转换为Excel列字母 (A, B, C, ..., AA, AB, ...) + """ + result = "" + while col_index >= 0: + result = chr(65 + col_index % 26) + result + col_index = col_index // 26 - 1 + return result + + def determine_content_type(self, header: str, values: list) -> str: + """ + 根据表头和内容智能判断数据类型,符合中国官方表格规范 + 返回: 'number', 'date', 'sequence', 'text' + """ + header_lower = str(header).lower().strip() + + # 检查表头关键词 + number_keywords = [ + "数量", + "金额", + "价格", + "费用", + "成本", + "收入", + "支出", + "总计", + "小计", + "百分比", + "%", + "比例", + "率", + "数值", + "分数", + "成绩", + "得分", + ] + date_keywords = ["日期", "时间", "年份", "月份", "时刻", "date", "time"] + sequence_keywords = [ + "序号", + "编号", + "号码", + "排序", + "次序", + "顺序", + "id", + "编码", + ] + + # 检查表头 + for keyword in number_keywords: + if keyword in header_lower: + return "number" + + for keyword in date_keywords: + if keyword in header_lower: + return "date" + + for keyword in sequence_keywords: + if keyword in header_lower: + return "sequence" + + # 检查数据内容 + if not values: + return "text" + + sample_values = [ + str(v).strip() for v in values[:10] if str(v).strip() + ] # 取前10个非空值作为样本 + if not sample_values: + return "text" + + numeric_count = 0 + date_count = 0 + sequence_count = 0 + + for value in sample_values: + # 检查是否为数字 + try: + float( + value.replace(",", "") + .replace(",", "") + .replace("%", "") + .replace("%", "") + ) + numeric_count += 1 + continue + except ValueError: + pass + + # 检查是否为日期格式 + date_patterns = [ + r"\d{4}[-/年]\d{1,2}[-/月]\d{1,2}日?", + r"\d{1,2}[-/]\d{1,2}[-/]\d{4}", + r"\d{4}\d{2}\d{2}", + ] + for pattern in date_patterns: + if re.match(pattern, value): + date_count += 1 + break + + # 检查是否为序号格式 + if ( + re.match(r"^\d+$", value) and len(value) <= 4 + ): # 纯数字且不超过4位,可能是序号 + sequence_count += 1 + + total_count = len(sample_values) + + # 根据比例判断类型 + if numeric_count / total_count >= 0.7: + return "number" + elif date_count / total_count >= 0.7: + return "date" + elif sequence_count / total_count >= 0.8 and sequence_count > 2: + return "sequence" + else: + return "text" + + def get_column_letter(self, col_index: int) -> str: + """ + 将列索引转换为Excel列字母 (A, B, C, ..., AA, AB, ...) + """ + result = "" + while col_index >= 0: + result = chr(65 + col_index % 26) + result + col_index = col_index // 26 - 1 + return result + + def save_tables_to_excel_enhanced( + self, tables: List[Dict], file_path: str, sheet_names: List[str] + ): + """ + 符合中国官方表格规范的Excel保存功能 + """ + try: + with pd.ExcelWriter(file_path, engine="xlsxwriter") as writer: + workbook = writer.book + + # 定义表头样式 - 居中对齐(符合中国规范) + header_format = workbook.add_format( + { + "bold": True, + "font_size": 12, + "font_color": "white", + "bg_color": "#00abbd", + "border": 1, + "align": "center", # 表头居中 + "valign": "vcenter", + "text_wrap": True, + } + ) + + # 文本单元格样式 - 左对齐 + text_format = workbook.add_format( + { + "border": 1, + "align": "left", # 文本左对齐 + "valign": "vcenter", + "text_wrap": True, + } + ) + + # 数值单元格样式 - 右对齐 + number_format = workbook.add_format( + {"border": 1, "align": "right", "valign": "vcenter"} # 数值右对齐 + ) + + # 整数格式 - 右对齐 + integer_format = workbook.add_format( + { + "num_format": "0", + "border": 1, + "align": "right", # 整数右对齐 + "valign": "vcenter", + } + ) + + # 小数格式 - 右对齐 + decimal_format = workbook.add_format( + { + "num_format": "0.00", + "border": 1, + "align": "right", # 小数右对齐 + "valign": "vcenter", + } + ) + + # 日期格式 - 居中对齐 + date_format = workbook.add_format( + { + "border": 1, + "align": "center", # 日期居中对齐 + "valign": "vcenter", + "text_wrap": True, + } + ) + + # 序号格式 - 居中对齐 + sequence_format = workbook.add_format( + { + "border": 1, + "align": "center", # 序号居中对齐 + "valign": "vcenter", + } + ) + + for i, table in enumerate(tables): + try: + table_data = table["data"] + if not table_data or len(table_data) < 1: + print(f"Skipping empty table at index {i}") + continue + + print(f"Processing table {i+1} with {len(table_data)} rows") + + # 获取sheet名称 + sheet_name = ( + sheet_names[i] if i < len(sheet_names) else f"表{i+1}" + ) + + # 创建DataFrame + headers = [ + str(cell).strip() + for cell in table_data[0] + if str(cell).strip() + ] + if not headers: + print(f"Warning: No valid headers found for table {i+1}") + headers = [f"列{j+1}" for j in range(len(table_data[0]))] + + data_rows = [] + if len(table_data) > 1: + max_cols = len(headers) + for row in table_data[1:]: + processed_row = [] + for j in range(max_cols): + if j < len(row): + processed_row.append(str(row[j])) + else: + processed_row.append("") + data_rows.append(processed_row) + df = pd.DataFrame(data_rows, columns=headers) + else: + df = pd.DataFrame(columns=headers) + + print(f"DataFrame created with columns: {list(df.columns)}") + + # 修复pandas FutureWarning - 使用try-except替代errors='ignore' + for col in df.columns: + try: + df[col] = pd.to_numeric(df[col]) + except (ValueError, TypeError): + pass + + # 先写入数据(不包含表头) + df.to_excel( + writer, + sheet_name=sheet_name, + index=False, + header=False, + startrow=1, + ) + worksheet = writer.sheets[sheet_name] + + # 应用符合中国规范的格式化 + self.apply_chinese_standard_formatting( + worksheet, + df, + headers, + workbook, + header_format, + text_format, + number_format, + integer_format, + decimal_format, + date_format, + sequence_format, + ) + + except Exception as e: + print(f"Error processing table {i+1}: {str(e)}") + continue + + except Exception as e: + print(f"Error saving Excel file: {str(e)}") + raise + + def apply_chinese_standard_formatting( + self, + worksheet, + df, + headers, + workbook, + header_format, + text_format, + number_format, + integer_format, + decimal_format, + date_format, + sequence_format, + ): + """ + 应用符合中国官方表格规范的格式化 + - 表头: 居中对齐 + - 数值: 右对齐 + - 文本: 左对齐 + - 日期: 居中对齐 + - 序号: 居中对齐 + """ + try: + # 1. 写入表头(居中对齐) + print(f"Writing headers with Chinese standard alignment: {headers}") + for col_idx, header in enumerate(headers): + if header and str(header).strip(): + worksheet.write(0, col_idx, str(header).strip(), header_format) + else: + default_header = f"列{col_idx+1}" + worksheet.write(0, col_idx, default_header, header_format) + + # 2. 分析每列的数据类型并应用相应格式 + column_types = {} + for col_idx, column in enumerate(headers): + if col_idx < len(df.columns): + column_values = df.iloc[:, col_idx].tolist() + column_types[col_idx] = self.determine_content_type( + column, column_values + ) + print( + f"Column '{column}' determined as type: {column_types[col_idx]}" + ) + else: + column_types[col_idx] = "text" + + # 3. 写入并格式化数据(根据类型使用不同对齐方式) + for row_idx, row in df.iterrows(): + for col_idx, value in enumerate(row): + content_type = column_types.get(col_idx, "text") + + # 根据内容类型选择格式 + if content_type == "number": + # 数值类型 - 右对齐 + if pd.api.types.is_numeric_dtype(df.iloc[:, col_idx]): + if pd.api.types.is_integer_dtype(df.iloc[:, col_idx]): + current_format = integer_format + else: + try: + numeric_value = float(value) + if numeric_value.is_integer(): + current_format = integer_format + value = int(numeric_value) + else: + current_format = decimal_format + except (ValueError, TypeError): + current_format = decimal_format + else: + current_format = number_format + + elif content_type == "date": + # 日期类型 - 居中对齐 + current_format = date_format + + elif content_type == "sequence": + # 序号类型 - 居中对齐 + current_format = sequence_format + + else: + # 文本类型 - 左对齐 + current_format = text_format + + worksheet.write(row_idx + 1, col_idx, value, current_format) + + # 4. 自动调整列宽 + for col_idx, column in enumerate(headers): + col_letter = self.get_column_letter(col_idx) + + # 计算表头宽度 + header_width = self.calculate_text_width(str(column)) + + # 计算数据列的最大宽度 + max_data_width = 0 + if not df.empty and col_idx < len(df.columns): + for value in df.iloc[:, col_idx]: + value_width = self.calculate_text_width(str(value)) + max_data_width = max(max_data_width, value_width) + + # 基础宽度:取表头和数据的最大宽度 + base_width = max(header_width, max_data_width) + + # 根据内容类型调整宽度 + content_type = column_types.get(col_idx, "text") + if content_type == "sequence": + # 序号列通常比较窄 + optimal_width = max(8, min(15, base_width + 2)) + elif content_type == "number": + # 数值列需要额外空间显示数字 + optimal_width = max(12, min(25, base_width + 3)) + elif content_type == "date": + # 日期列需要固定宽度 + optimal_width = max(15, min(20, base_width + 2)) + else: + # 文本列根据内容调整 + if base_width <= 10: + optimal_width = base_width + 3 + elif base_width <= 20: + optimal_width = base_width + 4 + else: + optimal_width = base_width + 5 + optimal_width = max(10, min(60, optimal_width)) + + worksheet.set_column(f"{col_letter}:{col_letter}", optimal_width) + + # 5. 自动调整行高 + # 设置表头行高为35点 + worksheet.set_row(0, 35) + + # 设置数据行行高 + for row_idx, row in df.iterrows(): + max_row_height = 20 # 中国表格规范建议的最小行高 + + for col_idx, value in enumerate(row): + if col_idx < len(headers): + col_width = min( + 60, + max( + 10, self.calculate_text_width(str(headers[col_idx])) + 5 + ), + ) + else: + col_width = 15 + + cell_lines = self.calculate_text_height(str(value), col_width) + cell_height = cell_lines * 20 # 每行20点高度,符合中国规范 + + max_row_height = max(max_row_height, cell_height) + + final_height = min(120, max_row_height) + worksheet.set_row(row_idx + 1, final_height) + + print(f"Successfully applied Chinese standard formatting") + + except Exception as e: + print(f"Warning: Failed to apply Chinese standard formatting: {str(e)}") + # 降级到基础格式化 + self.apply_basic_formatting_fallback(worksheet, df) + + def apply_basic_formatting_fallback(self, worksheet, df): + """ + 基础格式化降级方案 + """ + try: + # 基础列宽调整 + for i, column in enumerate(df.columns): + column_width = ( + max( + len(str(column)), + (df[column].astype(str).map(len).max() if not df.empty else 0), + ) + + 2 + ) + + col_letter = self.get_column_letter(i) + worksheet.set_column( + f"{col_letter}:{col_letter}", min(60, max(10, column_width)) + ) + + print("Applied basic formatting fallback") + + except Exception as e: + print(f"Warning: Even basic formatting failed: {str(e)}") diff --git a/plugins/actions/knowledge-card/README.md b/plugins/actions/knowledge-card/README.md new file mode 100644 index 0000000..7357cac --- /dev/null +++ b/plugins/actions/knowledge-card/README.md @@ -0,0 +1,15 @@ +# Flash Card + +Quickly generates beautiful flashcards from text, extracting key points and categories for efficient learning. + +## Features + +- **Instant Generation**: Turn any text into a structured flashcard. +- **Key Point Extraction**: Automatically identifies core concepts. +- **Visual Design**: Generates a visually appealing HTML card. + +## Usage + +1. Install the plugin. +2. Send text to the chat. +3. The plugin will analyze the text and generate a flashcard. diff --git a/plugins/actions/knowledge-card/README_CN.md b/plugins/actions/knowledge-card/README_CN.md new file mode 100644 index 0000000..7289e91 --- /dev/null +++ b/plugins/actions/knowledge-card/README_CN.md @@ -0,0 +1,15 @@ +# 闪记卡 (Flash Card) + +快速将文本提炼为精美的学习记忆卡片,支持核心要点提取与分类,助力高效学习。 + +## 功能特点 + +- **即时生成**:将任何文本转化为结构化的记忆卡片。 +- **要点提取**:自动识别核心概念。 +- **视觉设计**:生成视觉精美的 HTML 卡片。 + +## 使用方法 + +1. 安装插件。 +2. 发送文本到聊天框。 +3. 插件将分析文本并生成一张闪记卡。 diff --git a/plugins/actions/knowledge-card/knowledge_card.py b/plugins/actions/knowledge-card/knowledge_card.py new file mode 100644 index 0000000..4e852d0 --- /dev/null +++ b/plugins/actions/knowledge-card/knowledge_card.py @@ -0,0 +1,554 @@ +""" +title: 闪记卡 (Flash Card) +author: Antigravity +author_url: https://github.com/open-webui +funding_url: https://github.com/open-webui +version: 0.2.1 +icon_url: data:image/svg+xml;base64,PHN2ZyB4bWxucz0iaHR0cDovL3d3dy53My5vcmcvMjAwMC9zdmciIHdpZHRoPSIyNCIgaGVpZ2h0PSIyNCIgdmlld0JveD0iMCAwIDI0IDI0Ij48ZGVmcz48bGluZWFyR3JhZGllbnQgaWQ9ImciIHgxPSIwIiB5MT0iMCIgeDI9IjEiIHkyPSIxIj48c3RvcCBvZmZzZXQ9IjAlIiBzdG9wLWNvbG9yPSIjRkZENzAwIi8+PHN0b3Agb2Zmc2V0PSIxMDAlIiBzdG9wLWNvbG9yPSIjRkZBNzAwIi8+PC9saW5lYXJHcmFkaWVudD48L2RlZnM+PHBhdGggZD0iTTEzIDJMMyA3djEzbDEwIDV2LTZ6IiBmaWxsPSJ1cmwoI2cpIi8+PHBhdGggZD0iTTEzIDJ2Nmw4LTN2MTNsLTggM3YtNnoiIGZpbGw9IiM2NjdlZWEiLz48cGF0aCBkPSJNMTMgMnY2bTAgNXYxMCIgc3Ryb2tlPSIjZmZmIiBzdHJva2Utd2lkdGg9IjEuNSIgc3Ryb2tlLW9wYWNpdHk9IjAuMyIvPjwvc3ZnPg== +description: 快速将文本提炼为精美的学习记忆卡片,支持核心要点提取与分类。 +""" + +from pydantic import BaseModel, Field +from typing import Optional, Dict, Any, List +import json +import logging +from open_webui.utils.chat import generate_chat_completion +from open_webui.models.users import Users + +# Setup logging +logging.basicConfig(level=logging.INFO) +logger = logging.getLogger(__name__) + + +class Action: + class Valves(BaseModel): + model_id: str = Field( + default="", + description="用于生成卡片内容的模型 ID。如果为空,则使用当前模型。", + ) + min_text_length: int = Field( + default=50, description="生成闪记卡所需的最小文本长度(字符数)。" + ) + max_text_length: int = Field( + default=2000, + description="建议的最大文本长度。超过此长度建议使用深度分析工具。", + ) + language: str = Field( + default="zh", description="卡片内容的目标语言 (例如 'zh', 'en')。" + ) + show_status: bool = Field( + default=True, description="是否在聊天界面显示状态更新。" + ) + + def __init__(self): + self.valves = self.Valves() + + async def action( + self, + body: dict, + __user__: Optional[Dict[str, Any]] = None, + __event_emitter__: Optional[Any] = None, + __request__: Optional[Any] = None, + ) -> Optional[dict]: + print(f"action:{__name__} triggered") + + if not __event_emitter__: + return body + + # Get the last user message + messages = body.get("messages", []) + if not messages: + return body + + # Usually the action is triggered on the last message + target_message = messages[-1]["content"] + + # Check text length + text_length = len(target_message) + if text_length < self.valves.min_text_length: + if __event_emitter__: + await __event_emitter__( + { + "type": "notification", + "data": { + "type": "warning", + "content": f"文本过短({text_length}字符),建议至少{self.valves.min_text_length}字符。", + }, + } + ) + return body + + if text_length > self.valves.max_text_length: + if __event_emitter__: + await __event_emitter__( + { + "type": "notification", + "data": { + "type": "info", + "content": f"文本较长({text_length}字符),建议使用'墨海拾贝'进行深度分析。", + }, + } + ) + + # Notify user that we are generating the card + if self.valves.show_status: + await __event_emitter__( + { + "type": "notification", + "data": { + "type": "info", + "content": "⚡ 正在生成闪记卡...", + }, + } + ) + + try: + # 1. Extract information using LLM + user_id = __user__.get("id") if __user__ else "default" + user_obj = Users.get_user_by_id(user_id) + + model = self.valves.model_id if self.valves.model_id else body.get("model") + + system_prompt = f""" +你是一个闪记卡生成专家,专注于创建适合学习和记忆的知识卡片。你的任务是将文本提炼成简洁、易记的学习卡片。 + +请提取以下字段,并以 JSON 格式返回: +1. "title": 创建一个简短、精准的标题(6-12 字),突出核心概念 +2. "summary": 用一句话总结核心要义(20-40 字),要通俗易懂、便于记忆 +3. "key_points": 列出 3-5 个关键记忆点(每个 10-20 字) + - 每个要点应该是独立的知识点 + - 使用简洁、口语化的表达 + - 避免冗长的句子 +4. "tags": 列出 2-4 个分类标签(每个 2-5 字) +5. "category": 选择一个主分类(如:概念、技能、事实、方法等) + +目标语言: {self.valves.language} + +重要原则: +- **极简主义**: 每个要点都要精炼到极致 +- **记忆优先**: 内容要便于记忆和回忆 +- **核心聚焦**: 只提取最核心的知识点 +- **口语化**: 使用通俗易懂的语言 +- 只返回 JSON 对象,不要包含 markdown 格式 + """ + + prompt = f"请将以下文本提炼成一张学习记忆卡片:\n\n{target_message}" + + payload = { + "model": model, + "messages": [ + {"role": "system", "content": system_prompt}, + {"role": "user", "content": prompt}, + ], + "stream": False, + } + + response = await generate_chat_completion(__request__, payload, user_obj) + content = response["choices"][0]["message"]["content"] + + # Parse JSON + try: + # simple cleanup in case of markdown code blocks + if "```json" in content: + content = content.split("```json")[1].split("```")[0].strip() + elif "```" in content: + content = content.split("```")[1].split("```")[0].strip() + + card_data = json.loads(content) + except Exception as e: + logger.error(f"Failed to parse JSON: {e}, content: {content}") + if self.valves.show_status: + await __event_emitter__( + { + "type": "notification", + "data": { + "type": "error", + "content": "生成卡片数据失败,请重试。", + }, + } + ) + return body + + # 2. Generate HTML + html_card = self.generate_html_card(card_data) + + # 3. Append to message + # We append it to the user message so it shows up as part of the interaction + # Or we can append it to the assistant response if we were a Pipe, but this is an Action. + # Actions usually modify the input or trigger a side effect. + # To show the card, we can append it to the message content. + + html_embed_tag = f"```html\n{html_card}\n```" + body["messages"][-1]["content"] += f"\n\n{html_embed_tag}" + + if self.valves.show_status: + await __event_emitter__( + { + "type": "notification", + "data": { + "type": "success", + "content": "⚡ 闪记卡生成成功!", + }, + } + ) + + return body + + except Exception as e: + logger.error(f"Error generating knowledge card: {e}") + if self.valves.show_status: + await __event_emitter__( + { + "type": "notification", + "data": { + "type": "error", + "content": f"生成知识卡片时出错: {str(e)}", + }, + } + ) + return body + + def generate_html_card(self, data): + # Enhanced CSS with premium styling + style = """ + + """ + + # Enhanced HTML structure + html = f""" + + + + + {style} + + +
+
+
+
+
{data.get('category', '通用知识')}
+

{data.get('title', '知识卡片')}

+
+
+
+ {data.get('summary', '')} +
+
核心要点
+
    + {''.join([f'
  • {point}
  • ' for point in data.get('key_points', [])])} +
+
+ +
+
+
+ +""" + return html diff --git a/plugins/actions/knowledge-card/knowledge_card_en.py b/plugins/actions/knowledge-card/knowledge_card_en.py new file mode 100644 index 0000000..fe9a82b --- /dev/null +++ b/plugins/actions/knowledge-card/knowledge_card_en.py @@ -0,0 +1,554 @@ +""" +title: Flash Card +author: Antigravity +author_url: https://github.com/open-webui +funding_url: https://github.com/open-webui +version: 0.2.1 +icon_url: data:image/svg+xml;base64,PHN2ZyB4bWxucz0iaHR0cDovL3d3dy53My5vcmcvMjAwMC9zdmciIHdpZHRoPSIyNCIgaGVpZ2h0PSIyNCIgdmlld0JveD0iMCAwIDI0IDI0Ij48ZGVmcz48bGluZWFyR3JhZGllbnQgaWQ9ImciIHgxPSIwIiB5MT0iMCIgeDI9IjEiIHkyPSIxIj48c3RvcCBvZmZzZXQ9IjAlIiBzdG9wLWNvbG9yPSIjRkZENzAwIi8+PHN0b3Agb2Zmc2V0PSIxMDAlIiBzdG9wLWNvbG9yPSIjRkZBNzAwIi8+PC9saW5lYXJHcmFkaWVudD48L2RlZnM+PHBhdGggZD0iTTEzIDJMMyA3djEzbDEwIDV2LTZ6IiBmaWxsPSJ1cmwoI2cpIi8+PHBhdGggZD0iTTEzIDJ2Nmw4LTN2MTNsLTggM3YtNnoiIGZpbGw9IiM2NjdlZWEiLz48cGF0aCBkPSJNMTMgMnY2bTAgNXYxMCIgc3Ryb2tlPSIjZmZmIiBzdHJva2Utd2lkdGg9IjEuNSIgc3Ryb2tlLW9wYWNpdHk9IjAuMyIvPjwvc3ZnPg== +description: Quickly generates beautiful flashcards from text, extracting key points and categories. +""" + +from pydantic import BaseModel, Field +from typing import Optional, Dict, Any, List +import json +import logging +from open_webui.utils.chat import generate_chat_completion +from open_webui.models.users import Users + +# Setup logging +logging.basicConfig(level=logging.INFO) +logger = logging.getLogger(__name__) + + +class Action: + class Valves(BaseModel): + model_id: str = Field( + default="", + description="用于生成卡片内容的模型 ID。如果为空,则使用当前模型。", + ) + min_text_length: int = Field( + default=50, description="生成闪记卡所需的最小文本长度(字符数)。" + ) + max_text_length: int = Field( + default=2000, + description="建议的最大文本长度。超过此长度建议使用深度分析工具。", + ) + language: str = Field( + default="zh", description="卡片内容的目标语言 (例如 'zh', 'en')。" + ) + show_status: bool = Field( + default=True, description="是否在聊天界面显示状态更新。" + ) + + def __init__(self): + self.valves = self.Valves() + + async def action( + self, + body: dict, + __user__: Optional[Dict[str, Any]] = None, + __event_emitter__: Optional[Any] = None, + __request__: Optional[Any] = None, + ) -> Optional[dict]: + print(f"action:{__name__} triggered") + + if not __event_emitter__: + return body + + # Get the last user message + messages = body.get("messages", []) + if not messages: + return body + + # Usually the action is triggered on the last message + target_message = messages[-1]["content"] + + # Check text length + text_length = len(target_message) + if text_length < self.valves.min_text_length: + if __event_emitter__: + await __event_emitter__( + { + "type": "notification", + "data": { + "type": "warning", + "content": f"文本过短({text_length}字符),建议至少{self.valves.min_text_length}字符。", + }, + } + ) + return body + + if text_length > self.valves.max_text_length: + if __event_emitter__: + await __event_emitter__( + { + "type": "notification", + "data": { + "type": "info", + "content": f"文本较长({text_length}字符),建议使用'墨海拾贝'进行深度分析。", + }, + } + ) + + # Notify user that we are generating the card + if self.valves.show_status: + await __event_emitter__( + { + "type": "notification", + "data": { + "type": "info", + "content": "⚡ 正在生成闪记卡...", + }, + } + ) + + try: + # 1. Extract information using LLM + user_id = __user__.get("id") if __user__ else "default" + user_obj = Users.get_user_by_id(user_id) + + model = self.valves.model_id if self.valves.model_id else body.get("model") + + system_prompt = f""" +你是一个闪记卡生成专家,专注于创建适合学习和记忆的知识卡片。你的任务是将文本提炼成简洁、易记的学习卡片。 + +请提取以下字段,并以 JSON 格式返回: +1. "title": 创建一个简短、精准的标题(6-12 字),突出核心概念 +2. "summary": 用一句话总结核心要义(20-40 字),要通俗易懂、便于记忆 +3. "key_points": 列出 3-5 个关键记忆点(每个 10-20 字) + - 每个要点应该是独立的知识点 + - 使用简洁、口语化的表达 + - 避免冗长的句子 +4. "tags": 列出 2-4 个分类标签(每个 2-5 字) +5. "category": 选择一个主分类(如:概念、技能、事实、方法等) + +目标语言: {self.valves.language} + +重要原则: +- **极简主义**: 每个要点都要精炼到极致 +- **记忆优先**: 内容要便于记忆和回忆 +- **核心聚焦**: 只提取最核心的知识点 +- **口语化**: 使用通俗易懂的语言 +- 只返回 JSON 对象,不要包含 markdown 格式 + """ + + prompt = f"请将以下文本提炼成一张学习记忆卡片:\n\n{target_message}" + + payload = { + "model": model, + "messages": [ + {"role": "system", "content": system_prompt}, + {"role": "user", "content": prompt}, + ], + "stream": False, + } + + response = await generate_chat_completion(__request__, payload, user_obj) + content = response["choices"][0]["message"]["content"] + + # Parse JSON + try: + # simple cleanup in case of markdown code blocks + if "```json" in content: + content = content.split("```json")[1].split("```")[0].strip() + elif "```" in content: + content = content.split("```")[1].split("```")[0].strip() + + card_data = json.loads(content) + except Exception as e: + logger.error(f"Failed to parse JSON: {e}, content: {content}") + if self.valves.show_status: + await __event_emitter__( + { + "type": "notification", + "data": { + "type": "error", + "content": "生成卡片数据失败,请重试。", + }, + } + ) + return body + + # 2. Generate HTML + html_card = self.generate_html_card(card_data) + + # 3. Append to message + # We append it to the user message so it shows up as part of the interaction + # Or we can append it to the assistant response if we were a Pipe, but this is an Action. + # Actions usually modify the input or trigger a side effect. + # To show the card, we can append it to the message content. + + html_embed_tag = f"```html\n{html_card}\n```" + body["messages"][-1]["content"] += f"\n\n{html_embed_tag}" + + if self.valves.show_status: + await __event_emitter__( + { + "type": "notification", + "data": { + "type": "success", + "content": "⚡ 闪记卡生成成功!", + }, + } + ) + + return body + + except Exception as e: + logger.error(f"Error generating knowledge card: {e}") + if self.valves.show_status: + await __event_emitter__( + { + "type": "notification", + "data": { + "type": "error", + "content": f"生成知识卡片时出错: {str(e)}", + }, + } + ) + return body + + def generate_html_card(self, data): + # Enhanced CSS with premium styling + style = """ + + """ + + # Enhanced HTML structure + html = f""" + + + + + {style} + + +
+
+
+
+
{data.get('category', '通用知识')}
+

{data.get('title', '知识卡片')}

+
+
+
+ {data.get('summary', '')} +
+
核心要点
+
    + {''.join([f'
  • {point}
  • ' for point in data.get('key_points', [])])} +
+
+ +
+
+
+ +""" + return html diff --git a/plugins/actions/smart-mind-map/README.md b/plugins/actions/smart-mind-map/README.md new file mode 100644 index 0000000..8c3a409 --- /dev/null +++ b/plugins/actions/smart-mind-map/README.md @@ -0,0 +1,210 @@ +# Smart Mind Map - Mind Mapping Generation Plugin + +**Author:** [Fu-Jie](https://github.com/Fu-Jie) | **Version:** 0.7.2 | **License:** MIT + +> **Important**: To ensure the maintainability and usability of all plugins, each plugin should be accompanied by clear and comprehensive documentation to ensure its functionality, configuration, and usage are well explained. + +Smart Mind Map is a powerful OpenWebUI action plugin that intelligently analyzes long-form text content and automatically generates interactive mind maps, helping users structure and visualize knowledge. + +--- + +## Core Features + +- ✅ **Intelligent Text Analysis**: Automatically identifies core themes, key concepts, and hierarchical structures +- ✅ **Interactive Visualization**: Generates beautiful interactive mind maps based on Markmap.js +- ✅ **Multi-language Support**: Automatically adjusts output based on user language +- ✅ **Real-time Rendering**: Renders mind maps directly in the chat interface without navigation +- ✅ **Export Capabilities**: Supports copying SVG code and Markdown source +- ✅ **Customizable Configuration**: Configurable LLM model, minimum text length, and other parameters + +--- + +## How It Works + +1. **Text Extraction**: Extracts text content from user messages (automatically filters HTML code blocks) +2. **Intelligent Analysis**: Analyzes text structure using the configured LLM model +3. **Markdown Generation**: Converts analysis results to Markmap-compatible Markdown format +4. **Visual Rendering**: Renders the mind map using Markmap.js in an HTML template +5. **Interactive Display**: Presents the mind map to users in an interactive format within the chat interface + +--- + +## Installation and Configuration + +### 1. Plugin Installation + +1. Download the `思维导图.py` file to your local computer +2. In OpenWebUI Admin Settings, find the "Plugins" section +3. Select "Actions" type +4. Upload the downloaded file +5. Refresh the page, and the plugin will be available + +### 2. Model Configuration + +The plugin requires access to an LLM model for text analysis. Please ensure: + +- Your OpenWebUI instance has at least one available LLM model configured +- Recommended to use fast, economical models (e.g., `gemini-2.5-flash`) for the best experience +- Configure the `LLM_MODEL_ID` parameter in the plugin settings + +### 3. Plugin Activation + +Select the "Smart Mind Map" action plugin in chat settings to enable it. + +--- + +## Configuration Parameters + +You can adjust the following parameters in the plugin's settings (Valves): + +| Parameter | Default | Description | +| :--- | :--- | :--- | +| `show_status` | `true` | Whether to display operation status updates in the chat interface (e.g., "Analyzing..."). | +| `LLM_MODEL_ID` | `gemini-2.5-flash` | LLM model ID for text analysis. Recommended to use fast and economical models. | +| `MIN_TEXT_LENGTH` | `100` | Minimum text length (in characters) required for mind map analysis. Text that's too short cannot generate valid mind maps. | + +--- + +## Usage + +### Basic Usage + +1. Enable the "Smart Mind Map" action in chat settings +2. Input or paste long-form text content (at least 100 characters) in the conversation +3. After sending the message, the plugin will automatically analyze and generate a mind map +4. The mind map will be rendered directly in the chat interface + +### Usage Example + +**Input Text:** +``` +Artificial Intelligence (AI) is a branch of computer science dedicated to creating systems capable of performing tasks that typically require human intelligence. +Main application areas include: +1. Machine Learning - Enables computers to learn from data +2. Natural Language Processing - Understanding and generating human language +3. Computer Vision - Recognizing and processing images +4. Robotics - Creating intelligent systems that can interact with the physical world +``` + +**Generated Result:** +The plugin will generate an interactive mind map centered on "Artificial Intelligence", including major application areas and their sub-concepts. + +### Export Features + +Generated mind maps support two export methods: + +1. **Copy SVG Code**: Click the "Copy SVG Code" button to copy the mind map in SVG format to the clipboard +2. **Copy Markdown**: Click the "Copy Markdown" button to copy the raw Markdown format to the clipboard + +--- + +## Technical Architecture + +### Frontend Rendering + +- **Markmap.js**: Open-source mind mapping rendering engine +- **D3.js**: Data visualization foundation library +- **Responsive Design**: Adapts to different screen sizes + +### Backend Processing + +- **LLM Integration**: Calls configured models via `generate_chat_completion` +- **Text Preprocessing**: Automatically filters HTML code blocks, extracts plain text content +- **Format Conversion**: Converts LLM output to Markmap-compatible Markdown format + +### Security + +- **XSS Protection**: Automatically escapes `` tags to prevent script injection +- **Input Validation**: Checks text length to avoid invalid requests + +--- + +## Troubleshooting + +### Issue: Plugin Won't Start + +**Solution:** +- Check OpenWebUI logs for error messages +- Confirm the plugin is correctly uploaded and enabled +- Verify OpenWebUI version supports action plugins + +### Issue: Text Content Too Short + +**Symptom:** Prompt shows "Text content is too short for effective analysis" + +**Solution:** +- Ensure input text contains at least 100 characters (default configuration) +- Lower the `MIN_TEXT_LENGTH` parameter value in plugin settings +- Provide more detailed, structured text content + +### Issue: Mind Map Not Generated + +**Solution:** +- Check if `LLM_MODEL_ID` is configured correctly +- Confirm the configured model is available in OpenWebUI +- Review backend logs for LLM call failures +- Verify user has sufficient permissions to access the configured model + +### Issue: Mind Map Display Error + +**Symptom:** Shows "⚠️ Mind map rendering failed" + +**Solution:** +- Check browser console for error messages +- Confirm Markmap.js and D3.js libraries are loading correctly +- Verify generated Markdown format conforms to Markmap specifications +- Try refreshing the page to re-render + +### Issue: Export Function Not Working + +**Solution:** +- Confirm browser supports Clipboard API +- Check if browser is blocking clipboard access permissions +- Use modern browsers (Chrome, Firefox, Edge, etc.) + +--- + +## Best Practices + +1. **Text Preparation** + - Provide text content with clear structure and distinct hierarchies + - Use paragraphs, lists, and other formatting to help LLM understand text structure + - Avoid excessively lengthy or unstructured text + +2. **Model Selection** + - For daily use, recommend fast models like `gemini-2.5-flash` + - For complex text analysis, use more powerful models (e.g., GPT-4) + - Balance speed and analysis quality based on needs + +3. **Performance Optimization** + - Set `MIN_TEXT_LENGTH` appropriately to avoid processing text that's too short + - For particularly long texts, consider summarizing before generating mind maps + - Disable `show_status` in production environments to reduce interface updates + +--- + +## Changelog + +### v0.7.2 (Current Version) +- Optimized text extraction logic, automatically filters HTML code blocks +- Improved error handling and user feedback +- Enhanced export functionality compatibility +- Optimized UI styling and interactive experience + +--- + +## License + +This plugin is released under the MIT License. + +## Contributing + +Welcome to submit issue reports and improvement suggestions! Please visit the project repository: [awesome-openwebui](https://github.com/Fu-Jie/awesome-openwebui) + +--- + +## Related Resources + +- [Markmap Official Website](https://markmap.js.org/) +- [OpenWebUI Documentation](https://docs.openwebui.com/) +- [D3.js Official Website](https://d3js.org/) diff --git a/plugins/actions/smart-mind-map/README_CN.md b/plugins/actions/smart-mind-map/README_CN.md new file mode 100644 index 0000000..3372ee8 --- /dev/null +++ b/plugins/actions/smart-mind-map/README_CN.md @@ -0,0 +1,210 @@ +# 智绘心图 - 思维导图生成插件 + +**作者:** [Fu-Jie](https://github.com/Fu-Jie) | **版本:** 0.7.2 | **许可证:** MIT + +> **重要提示**:为了确保所有插件的可维护性和易用性,每个插件都应附带清晰、完整的文档,以确保其功能、配置和使用方法得到充分说明。 + +智绘心图是一个强大的 OpenWebUI 动作插件,能够智能分析长篇文本内容,自动生成交互式思维导图,帮助用户结构化和可视化知识。 + +--- + +## 核心特性 + +- ✅ **智能文本分析**: 自动识别文本的核心主题、关键概念和层次结构 +- ✅ **交互式可视化**: 基于 Markmap.js 生成美观的交互式思维导图 +- ✅ **多语言支持**: 根据用户语言自动调整输出 +- ✅ **实时渲染**: 在聊天界面中直接渲染思维导图,无需跳转 +- ✅ **导出功能**: 支持复制 SVG 代码和 Markdown 源码 +- ✅ **自定义配置**: 可配置 LLM 模型、最小文本长度等参数 + +--- + +## 工作原理 + +1. **文本提取**: 从用户消息中提取文本内容(自动过滤 HTML 代码块) +2. **智能分析**: 使用配置的 LLM 模型分析文本结构 +3. **Markdown 生成**: 将分析结果转换为 Markmap 兼容的 Markdown 格式 +4. **可视化渲染**: 在 HTML 模板中使用 Markmap.js 渲染思维导图 +5. **交互展示**: 在聊天界面中以可交互的形式展示给用户 + +--- + +## 安装与配置 + +### 1. 插件安装 + +1. 下载 `思维导图.py` 文件到本地 +2. 在 OpenWebUI 管理员设置中找到"插件"(Plugins)部分 +3. 选择"动作"(Actions)类型 +4. 上传下载的文件 +5. 刷新页面,插件即可使用 + +### 2. 模型配置 + +插件需要访问 LLM 模型来分析文本。请确保: + +- 您的 OpenWebUI 实例中配置了至少一个可用的 LLM 模型 +- 推荐使用快速、经济的模型(如 `gemini-2.5-flash`)来获得最佳体验 +- 在插件设置中配置 `LLM_MODEL_ID` 参数 + +### 3. 插件启用 + +在聊天设置中选择"智绘心图"动作插件即可启用。 + +--- + +## 配置参数 + +您可以在插件的设置(Valves)中调整以下参数: + +| 参数 | 默认值 | 描述 | +| :--- | :--- | :--- | +| `show_status` | `true` | 是否在聊天界面显示操作状态更新(如"正在分析...")。 | +| `LLM_MODEL_ID` | `gemini-2.5-flash` | 用于文本分析的 LLM 模型 ID。推荐使用快速且经济的模型。 | +| `MIN_TEXT_LENGTH` | `100` | 进行思维导图分析所需的最小文本长度(字符数)。文本过短将无法生成有效的导图。 | + +--- + +## 使用方法 + +### 基本使用 + +1. 在聊天设置中启用"智绘心图"动作 +2. 在对话中输入或粘贴长篇文本内容(至少 100 字符) +3. 发送消息后,插件会自动分析并生成思维导图 +4. 思维导图将在聊天界面中直接渲染显示 + +### 使用示例 + +**输入文本:** +``` +人工智能(AI)是计算机科学的一个分支,致力于创建能够执行通常需要人类智能的任务的系统。 +主要应用领域包括: +1. 机器学习 - 使计算机能够从数据中学习 +2. 自然语言处理 - 理解和生成人类语言 +3. 计算机视觉 - 识别和处理图像 +4. 机器人技术 - 创建能够与物理世界交互的智能系统 +``` + +**生成结果:** +插件会生成一个以"人工智能"为中心主题的交互式思维导图,包含主要应用领域及其子概念。 + +### 导出功能 + +生成的思维导图支持两种导出方式: + +1. **复制 SVG 代码**: 点击"复制 SVG 代码"按钮,可将思维导图的 SVG 格式复制到剪贴板 +2. **复制 Markdown**: 点击"复制 Markdown"按钮,可将原始 Markdown 格式复制到剪贴板 + +--- + +## 技术架构 + +### 前端渲染 + +- **Markmap.js**: 开源的思维导图渲染引擎 +- **D3.js**: 数据可视化基础库 +- **响应式设计**: 适配不同屏幕尺寸 + +### 后端处理 + +- **LLM 集成**: 通过 `generate_chat_completion` 调用配置的模型 +- **文本预处理**: 自动过滤 HTML 代码块,提取纯文本内容 +- **格式转换**: 将 LLM 输出转换为 Markmap 兼容的 Markdown 格式 + +### 安全性 + +- **XSS 防护**: 自动转义 `` 标签,防止脚本注入 +- **输入验证**: 检查文本长度,避免无效请求 + +--- + +## 故障排除 + +### 问题:插件无法启动 + +**解决方案:** +- 检查 OpenWebUI 日志,查看是否有错误信息 +- 确认插件已正确上传并启用 +- 验证 OpenWebUI 版本是否支持动作插件 + +### 问题:文本内容过短 + +**现象:** 提示"文本内容过短,无法进行有效分析" + +**解决方案:** +- 确保输入的文本至少包含 100 个字符(默认配置) +- 可以在插件设置中降低 `MIN_TEXT_LENGTH` 参数值 +- 提供更详细、结构化的文本内容 + +### 问题:思维导图未生成 + +**解决方案:** +- 检查 `LLM_MODEL_ID` 是否配置正确 +- 确认配置的模型在 OpenWebUI 中可用 +- 查看后端日志,检查是否有 LLM 调用失败的错误 +- 验证用户是否有足够的权限访问配置的模型 + +### 问题:思维导图显示错误 + +**现象:** 显示"⚠️ 思维导图渲染失败" + +**解决方案:** +- 检查浏览器控制台的错误信息 +- 确认 Markmap.js 和 D3.js 库是否正确加载 +- 验证生成的 Markdown 格式是否符合 Markmap 规范 +- 尝试刷新页面重新渲染 + +### 问题:导出功能不工作 + +**解决方案:** +- 确认浏览器支持剪贴板 API +- 检查浏览器是否阻止了剪贴板访问权限 +- 使用现代浏览器(Chrome、Firefox、Edge 等) + +--- + +## 最佳实践 + +1. **文本准备** + - 提供结构清晰、层次分明的文本内容 + - 使用段落、列表等格式帮助 LLM 理解文本结构 + - 避免过于冗长或无结构的文本 + +2. **模型选择** + - 对于日常使用,推荐 `gemini-2.5-flash` 等快速模型 + - 对于复杂文本分析,可以使用更强大的模型(如 GPT-4) + - 根据需求平衡速度和分析质量 + +3. **性能优化** + - 合理设置 `MIN_TEXT_LENGTH`,避免处理过短的文本 + - 对于特别长的文本,考虑先进行摘要再生成思维导图 + - 在生产环境中关闭 `show_status` 以减少界面更新 + +--- + +## 更新日志 + +### v0.7.2 (当前版本) +- 优化文本提取逻辑,自动过滤 HTML 代码块 +- 改进错误处理和用户反馈 +- 增强导出功能的兼容性 +- 优化 UI 样式和交互体验 + +--- + +## 许可证 + +本插件采用 MIT 许可证发布。 + +## 贡献 + +欢迎提交问题报告和改进建议!请访问项目仓库:[awesome-openwebui](https://github.com/Fu-Jie/awesome-openwebui) + +--- + +## 相关资源 + +- [Markmap 官方网站](https://markmap.js.org/) +- [OpenWebUI 文档](https://docs.openwebui.com/) +- [D3.js 官方网站](https://d3js.org/) diff --git a/plugins/actions/smart-mind-map/smart_mind_map.py b/plugins/actions/smart-mind-map/smart_mind_map.py new file mode 100644 index 0000000..3f92b04 --- /dev/null +++ b/plugins/actions/smart-mind-map/smart_mind_map.py @@ -0,0 +1,611 @@ +""" +title: Smart Mind Map +icon_url: data:image/svg+xml;base64,PHN2ZyB4bWxucz0iaHR0cDovL3d3dy53My5vcmcvMjAwMC9zdmciIHdpZHRoPSIyNCIgaGVpZ2h0PSIyNCIgdmlld0JveD0iMCAwIDI0IDI0IiBmaWxsPSJub25lIiBzdHJva2U9ImN1cnJlbnRDb2xvciIgc3Ryb2tlLXdpZHRoPSIxLjUiIHN0cm9rZS1saW5lY2FwPSJyb3VuZCIgc3Ryb2tlLWxpbmVqb2luPSJyb3VuZCI+CiAgPGNpcmNsZSBjeD0iMTIiIGN5PSIxMiIgcj0iMyIgZmlsbD0iY3VycmVudENvbG9yIi8+CiAgPGxpbmUgeDE9IjEyIiB5MT0iOSIgeDI9IjEyIiB5Mj0iNCIvPgogIDxjaXJjbGUgY3g9IjEyIiBjeT0iMyIgcj0iMS41Ii8+CiAgPGxpbmUgeDE9IjEyIiB5MT0iMTUiIHgyPSIxMiIgeTI9IjIwIi8+CiAgPGNpcmNsZSBjeD0iMTIiIGN5PSIyMSIgcj0iMS41Ii8+CiAgPGxpbmUgeDE9IjkiIHkxPSIxMiIgeDI9IjQiIHkyPSIxMiIvPgogIDxjaXJjbGUgY3g9IjMiIGN5PSIxMiIgcj0iMS41Ii8+CiAgPGxpbmUgeDE9IjE1IiB5MT0iMTIiIHgyPSIyMCIgeTI9IjEyIi8+CiAgPGNpcmNsZSBjeD0iMjEiIGN5PSIxMiIgcj0iMS41Ii8+CiAgPGxpbmUgeDE9IjEwLjUiIHkxPSIxMC41IiB4Mj0iNiIgeTI9IjYiLz4KICA8Y2lyY2xlIGN4PSI1IiBjeT0iNSIgcj0iMS41Ii8+CiAgPGxpbmUgeDE9IjEzLjUiIHkxPSIxMC41IiB4Mj0iMTgiIHkyPSI2Ii8+CiAgPGNpcmNsZSBjeD0iMTkiIGN5PSI1IiByPSIxLjUiLz4KICA8bGluZSB4MT0iMTAuNSIgeTE9IjEzLjUiIHgyPSI2IiB5Mj0iMTgiLz4KICA8Y2lyY2xlIGN4PSI1IiBjeT0iMTkiIHI9IjEuNSIvPgogIDxsaW5lIHgxPSIxMy41IiB5MT0iMTMuNSIgeDI9IjE4IiB5Mj0iMTgiLz4KICA8Y2lyY2xlIGN4PSIxOSIgY3k9IjE5IiByPSIxLjUiLz4KPC9zdmc+ +version: 0.7.3 +description: 智能分析长文本并生成交互式思维导图,支持 SVG/Markdown 导出。 +""" + +from pydantic import BaseModel, Field +from typing import Optional, Dict, Any +import logging +import time +import re +from fastapi import Request +from datetime import datetime +import pytz + +from open_webui.utils.chat import generate_chat_completion +from open_webui.models.users import Users + +logging.basicConfig( + level=logging.INFO, format="%(asctime)s - %(name)s - %(levelname)s - %(message)s" +) +logger = logging.getLogger(__name__) + +SYSTEM_PROMPT_MINDMAP_ASSISTANT = """ +You are a professional mind map generation assistant, capable of efficiently analyzing long-form text provided by users and structuring its core themes, key concepts, branches, and sub-branches into standard Markdown list syntax for rendering by Markmap.js. + +Please strictly follow these guidelines: +- **Language**: All output must be in the language specified by the user. +- **Format**: Your output must strictly be in Markdown list format, wrapped with ```markdown and ```. + - Use `#` to define the central theme (root node). + - Use `-` with two-space indentation to represent branches and sub-branches. +- **Content**: + - Identify the central theme of the text as the `#` heading. + - Identify main concepts as first-level list items. + - Identify supporting details or sub-concepts as nested list items. + - Node content should be concise and clear, avoiding verbosity. +- **Output Markdown syntax only**: Do not include any additional greetings, explanations, or guiding text. +- **If text is too short or cannot generate a valid mind map**: Output a simple Markdown list indicating inability to generate, for example: + ```markdown + # Unable to Generate Mind Map + - Reason: Insufficient or unclear text content + ``` +""" + +USER_PROMPT_GENERATE_MINDMAP = """ +Please analyze the following long-form text and structure its core themes, key concepts, branches, and sub-branches into standard Markdown list syntax for Markmap.js rendering. + +--- +**User Context Information:** +User Name: {user_name} +Current Date & Time: {current_date_time_str} +Current Weekday: {current_weekday} +Current Timezone: {current_timezone_str} +User Language: {user_language} +--- + +**Long-form Text Content:** +{long_text_content} +""" + +HTML_TEMPLATE_MINDMAP = """ + + + + + + Smart Mind Map: Mind Map Visualization + + + + + + +
+
+

🧠 Smart Mind Map

+
+
+ User: {user_name} + Analysis Time: {current_date_time_str} + Weekday: {current_weekday_zh} +
+
+
+
+ + +
+
+ +
+ + + + + + +""" + + +class Action: + class Valves(BaseModel): + show_status: bool = Field( + default=True, + description="Whether to show action status updates in the chat interface.", + ) + LLM_MODEL_ID: str = Field( + default="gemini-2.5-flash", + description="Built-in LLM model ID for text analysis.", + ) + MIN_TEXT_LENGTH: int = Field( + default=100, + description="Minimum text length (character count) required for mind map analysis.", + ) + + def __init__(self): + self.valves = self.Valves() + self.weekday_map = { + "Monday": "Monday", + "Tuesday": "Tuesday", + "Wednesday": "Wednesday", + "Thursday": "Thursday", + "Friday": "Friday", + "Saturday": "Saturday", + "Sunday": "Sunday", + } + + def _extract_markdown_syntax(self, llm_output: str) -> str: + match = re.search(r"```markdown\s*(.*?)\s*```", llm_output, re.DOTALL) + if match: + extracted_content = match.group(1).strip() + else: + logger.warning( + "LLM output did not strictly follow the expected Markdown format, treating the entire output as summary." + ) + extracted_content = llm_output.strip() + return extracted_content.replace("", "<\\/script>") + + async def action( + self, + body: dict, + __user__: Optional[Dict[str, Any]] = None, + __event_emitter__: Optional[Any] = None, + __request__: Optional[Request] = None, + ) -> Optional[dict]: + logger.info("Action: Smart Mind Map (v0.7.2) started") + + if isinstance(__user__, (list, tuple)): + user_language = ( + __user__[0].get("language", "en-US") if __user__ else "en-US" + ) + user_name = __user__[0].get("name", "User") if __user__[0] else "User" + user_id = ( + __user__[0]["id"] + if __user__ and "id" in __user__[0] + else "unknown_user" + ) + elif isinstance(__user__, dict): + user_language = __user__.get("language", "en-US") + user_name = __user__.get("name", "User") + user_id = __user__.get("id", "unknown_user") + + try: + shanghai_tz = pytz.timezone("Asia/Shanghai") + current_datetime_shanghai = datetime.now(shanghai_tz) + current_date_time_str = current_datetime_shanghai.strftime( + "%Y-%m-%d %H:%M:%S" + ) + current_weekday_en = current_datetime_shanghai.strftime("%A") + current_weekday_zh = self.weekday_map.get(current_weekday_en, "Unknown") + current_year = current_datetime_shanghai.strftime("%Y") + current_timezone_str = "Asia/Shanghai" + except Exception as e: + logger.warning(f"Failed to get timezone info: {e}, using default values.") + now = datetime.now() + current_date_time_str = now.strftime("%Y-%m-%d %H:%M:%S") + current_weekday_zh = "Unknown" + current_year = now.strftime("%Y") + current_timezone_str = "Unknown" + + if __event_emitter__: + await __event_emitter__( + { + "type": "notification", + "data": { + "type": "info", + "content": "Smart Mind Map is starting, generating mind map for you...", + }, + } + ) + + messages = body.get("messages") + if ( + not messages + or not isinstance(messages, list) + or not messages[-1].get("content") + ): + error_message = "Unable to retrieve valid user message content." + if __event_emitter__: + await __event_emitter__( + { + "type": "notification", + "data": {"type": "error", "content": error_message}, + } + ) + return { + "messages": [{"role": "assistant", "content": f"❌ {error_message}"}] + } + + parts = re.split(r"```html.*?```", messages[-1]["content"], flags=re.DOTALL) + long_text_content = "" + if parts: + for part in reversed(parts): + if part.strip(): + long_text_content = part.strip() + break + + if not long_text_content: + long_text_content = messages[-1]["content"].strip() + + if len(long_text_content) < self.valves.MIN_TEXT_LENGTH: + short_text_message = f"Text content is too short ({len(long_text_content)} characters), unable to perform effective analysis. Please provide at least {self.valves.MIN_TEXT_LENGTH} characters of text." + if __event_emitter__: + await __event_emitter__( + { + "type": "notification", + "data": {"type": "warning", "content": short_text_message}, + } + ) + return { + "messages": [ + {"role": "assistant", "content": f"⚠️ {short_text_message}"} + ] + } + + if self.valves.show_status and __event_emitter__: + await __event_emitter__( + { + "type": "status", + "data": { + "description": "Smart Mind Map: Analyzing text structure in depth...", + "done": False, + "hidden": False, + }, + } + ) + + try: + unique_id = f"id_{int(time.time() * 1000)}" + + formatted_user_prompt = USER_PROMPT_GENERATE_MINDMAP.format( + user_name=user_name, + current_date_time_str=current_date_time_str, + current_weekday=current_weekday_zh, + current_timezone_str=current_timezone_str, + user_language=user_language, + long_text_content=long_text_content, + ) + + llm_payload = { + "model": self.valves.LLM_MODEL_ID, + "messages": [ + {"role": "system", "content": SYSTEM_PROMPT_MINDMAP_ASSISTANT}, + {"role": "user", "content": formatted_user_prompt}, + ], + "temperature": 0.5, + "stream": False, + } + user_obj = Users.get_user_by_id(user_id) + if not user_obj: + raise ValueError(f"Unable to get user object, user ID: {user_id}") + + llm_response = await generate_chat_completion( + __request__, llm_payload, user_obj + ) + + if ( + not llm_response + or "choices" not in llm_response + or not llm_response["choices"] + ): + raise ValueError("LLM response format is incorrect or empty.") + + assistant_response_content = llm_response["choices"][0]["message"][ + "content" + ] + markdown_syntax = self._extract_markdown_syntax(assistant_response_content) + + final_html_content = ( + HTML_TEMPLATE_MINDMAP.replace("{unique_id}", unique_id) + .replace("{user_language}", user_language) + .replace("{user_name}", user_name) + .replace("{current_date_time_str}", current_date_time_str) + .replace("{current_weekday_zh}", current_weekday_zh) + .replace("{current_year}", current_year) + .replace("{markdown_syntax}", markdown_syntax) + ) + + html_embed_tag = f"```html\n{final_html_content}\n```" + body["messages"][-1]["content"] = f"{long_text_content}\n\n{html_embed_tag}" + + if self.valves.show_status and __event_emitter__: + await __event_emitter__( + { + "type": "status", + "data": { + "description": "Smart Mind Map: Drawing completed!", + "done": True, + "hidden": False, + }, + } + ) + await __event_emitter__( + { + "type": "notification", + "data": { + "type": "success", + "content": f"Mind map has been generated, {user_name}!", + }, + } + ) + logger.info("Action: Smart Mind Map (v0.7.2) completed successfully") + + except Exception as e: + error_message = f"Smart Mind Map processing failed: {str(e)}" + logger.error(f"Smart Mind Map error: {error_message}", exc_info=True) + user_facing_error = f"Sorry, Smart Mind Map encountered an error during processing: {str(e)}.\nPlease check the Open WebUI backend logs for more details." + body["messages"][-1][ + "content" + ] = f"{long_text_content}\n\n❌ **Error:** {user_facing_error}" + + if __event_emitter__: + if self.valves.show_status: + await __event_emitter__( + { + "type": "status", + "data": { + "description": "Smart Mind Map: Processing failed.", + "done": True, + "hidden": False, + }, + } + ) + await __event_emitter__( + { + "type": "notification", + "data": { + "type": "error", + "content": f"Smart Mind Map generation failed, {user_name}!", + }, + } + ) + + return body diff --git a/plugins/actions/smart-mind-map/思维导图.py b/plugins/actions/smart-mind-map/思维导图.py new file mode 100644 index 0000000..2804ee3 --- /dev/null +++ b/plugins/actions/smart-mind-map/思维导图.py @@ -0,0 +1,611 @@ +""" +title: 智绘心图 +icon_url: data:image/svg+xml;base64,PHN2ZyB4bWxucz0iaHR0cDovL3d3dy53My5vcmcvMjAwMC9zdmciIHdpZHRoPSIyNCIgaGVpZ2h0PSIyNCIgdmlld0JveD0iMCAwIDI0IDI0IiBmaWxsPSJub25lIiBzdHJva2U9ImN1cnJlbnRDb2xvciIgc3Ryb2tlLXdpZHRoPSIxLjUiIHN0cm9rZS1saW5lY2FwPSJyb3VuZCIgc3Ryb2tlLWxpbmVqb2luPSJyb3VuZCI+CiAgPGNpcmNsZSBjeD0iMTIiIGN5PSIxMiIgcj0iMyIgZmlsbD0iY3VycmVudENvbG9yIi8+CiAgPGxpbmUgeDE9IjEyIiB5MT0iOSIgeDI9IjEyIiB5Mj0iNCIvPgogIDxjaXJjbGUgY3g9IjEyIiBjeT0iMyIgcj0iMS41Ii8+CiAgPGxpbmUgeDE9IjEyIiB5MT0iMTUiIHgyPSIxMiIgeTI9IjIwIi8+CiAgPGNpcmNsZSBjeD0iMTIiIGN5PSIyMSIgcj0iMS41Ii8+CiAgPGxpbmUgeDE9IjkiIHkxPSIxMiIgeDI9IjQiIHkyPSIxMiIvPgogIDxjaXJjbGUgY3g9IjMiIGN5PSIxMiIgcj0iMS41Ii8+CiAgPGxpbmUgeDE9IjE1IiB5MT0iMTIiIHgyPSIyMCIgeTI9IjEyIi8+CiAgPGNpcmNsZSBjeD0iMjEiIGN5PSIxMiIgcj0iMS41Ii8+CiAgPGxpbmUgeDE9IjEwLjUiIHkxPSIxMC41IiB4Mj0iNiIgeTI9IjYiLz4KICA8Y2lyY2xlIGN4PSI1IiBjeT0iNSIgcj0iMS41Ii8+CiAgPGxpbmUgeDE9IjEzLjUiIHkxPSIxMC41IiB4Mj0iMTgiIHkyPSI2Ii8+CiAgPGNpcmNsZSBjeD0iMTkiIGN5PSI1IiByPSIxLjUiLz4KICA8bGluZSB4MT0iMTAuNSIgeTE9IjEzLjUiIHgyPSI2IiB5Mj0iMTgiLz4KICA8Y2lyY2xlIGN4PSI1IiBjeT0iMTkiIHI9IjEuNSIvPgogIDxsaW5lIHgxPSIxMy41IiB5MT0iMTMuNSIgeDI9IjE4IiB5Mj0iMTgiLz4KICA8Y2lyY2xlIGN4PSIxOSIgY3k9IjE5IiByPSIxLjUiLz4KPC9zdmc+ +version: 0.7.2 +description: 智能分析文本内容,生成交互式思维导图,帮助用户结构化和可视化知识。 +""" + +from pydantic import BaseModel, Field +from typing import Optional, Dict, Any +import logging +import time +import re +from fastapi import Request +from datetime import datetime +import pytz + +from open_webui.utils.chat import generate_chat_completion +from open_webui.models.users import Users + +logging.basicConfig( + level=logging.INFO, format="%(asctime)s - %(name)s - %(levelname)s - %(message)s" +) +logger = logging.getLogger(__name__) + +SYSTEM_PROMPT_MINDMAP_ASSISTANT = """ +你是一个专业的思维导图生成助手,能够高效地分析用户提供的长篇文本,并将其核心主题、关键概念、分支和子分支结构化为标准的Markdown列表语法,以便Markmap.js进行渲染。 + +请严格遵循以下指导原则: +- **语言**: 所有输出必须使用用户指定的语言。 +- **格式**: 你的输出必须严格为Markdown列表格式,并用```markdown 和 ``` 包裹。 + - 使用 `#` 定义中心主题(根节点)。 + - 使用 `-` 和两个空格的缩进表示分支和子分支。 +- **内容**: + - 识别文本的中心主题作为 `#` 标题。 + - 识别主要概念作为一级列表项。 + - 识别支持性细节或子概念作为嵌套的列表项。 + - 节点内容应简洁明了,避免冗长。 +- **只输出Markdown语法**: 不要包含任何额外的寒暄、解释或引导性文字。 +- **如果文本过短或无法生成有效导图**: 请输出一个简单的Markdown列表,表示无法生成,例如: + ```markdown + # 无法生成思维导图 + - 原因: 文本内容不足或不明确 + ``` +""" + +USER_PROMPT_GENERATE_MINDMAP = """ +请分析以下长篇文本,并将其核心主题、关键概念、分支和子分支结构化为标准的Markdown列表语法,以供Markmap.js渲染。 + +--- +**用户上下文信息:** +用户姓名: {user_name} +当前日期时间: {current_date_time_str} +当前星期: {current_weekday} +当前时区: {current_timezone_str} +用户语言: {user_language} +--- + +**长篇文本内容:** +Use code with caution. +Python +{long_text_content} +""" + +HTML_TEMPLATE_MINDMAP = """ + + + + + + 智绘心图: 思维导图 + + + + + + +
+
+

🧠 智绘心图

+
+
+ 用户: {user_name} + 分析时间: {current_date_time_str} + 星期: {current_weekday_zh} +
+
+
+
+ + +
+
+ +
+ + + + + + +""" + + +class Action: + class Valves(BaseModel): + show_status: bool = Field( + default=True, description="是否在聊天界面显示操作状态更新。" + ) + LLM_MODEL_ID: str = Field( + default="gemini-2.5-flash", + description="用于文本分析的内置LLM模型ID。", + ) + MIN_TEXT_LENGTH: int = Field( + default=100, description="进行思维导图分析所需的最小文本长度(字符数)。" + ) + + def __init__(self): + self.valves = self.Valves() + self.weekday_map = { + "Monday": "星期一", + "Tuesday": "星期二", + "Wednesday": "星期三", + "Thursday": "星期四", + "Friday": "星期五", + "Saturday": "星期六", + "Sunday": "星期日", + } + + def _extract_markdown_syntax(self, llm_output: str) -> str: + match = re.search(r"```markdown\s*(.*?)\s*```", llm_output, re.DOTALL) + if match: + extracted_content = match.group(1).strip() + else: + logger.warning( + "LLM输出未严格遵循预期Markdown格式,将整个输出作为摘要处理。" + ) + extracted_content = llm_output.strip() + return extracted_content.replace("", "<\\/script>") + + async def action( + self, + body: dict, + __user__: Optional[Dict[str, Any]] = None, + __event_emitter__: Optional[Any] = None, + __request__: Optional[Request] = None, + ) -> Optional[dict]: + logger.info("Action: 智绘心图 (v12 - Final Feedback Fix) started") + + if isinstance(__user__, (list, tuple)): + user_language = ( + __user__[0].get("language", "zh-CN") if __user__ else "zh-CN" + ) + user_name = __user__[0].get("name", "用户") if __user__[0] else "用户" + user_id = ( + __user__[0]["id"] + if __user__ and "id" in __user__[0] + else "unknown_user" + ) + elif isinstance(__user__, dict): + user_language = __user__.get("language", "zh-CN") + user_name = __user__.get("name", "用户") + user_id = __user__.get("id", "unknown_user") + + try: + shanghai_tz = pytz.timezone("Asia/Shanghai") + current_datetime_shanghai = datetime.now(shanghai_tz) + current_date_time_str = current_datetime_shanghai.strftime( + "%Y-%m-%d %H:%M:%S" + ) + current_weekday_en = current_datetime_shanghai.strftime("%A") + current_weekday_zh = self.weekday_map.get(current_weekday_en, "未知星期") + current_year = current_datetime_shanghai.strftime("%Y") + current_timezone_str = "Asia/Shanghai" + except Exception as e: + logger.warning(f"获取时区信息失败: {e},使用默认值。") + now = datetime.now() + current_date_time_str = now.strftime("%Y-%m-%d %H:%M:%S") + current_weekday_zh = "未知星期" + current_year = now.strftime("%Y") + current_timezone_str = "未知时区" + + if __event_emitter__: + await __event_emitter__( + { + "type": "notification", + "data": { + "type": "info", + "content": "智绘心图已启动,正在为您生成思维导图...", + }, + } + ) + + messages = body.get("messages") + if ( + not messages + or not isinstance(messages, list) + or not messages[-1].get("content") + ): + error_message = "无法获取有效的用户消息内容。" + if __event_emitter__: + await __event_emitter__( + { + "type": "notification", + "data": {"type": "error", "content": error_message}, + } + ) + return { + "messages": [{"role": "assistant", "content": f"❌ {error_message}"}] + } + + parts = re.split(r"```html.*?```", messages[-1]["content"], flags=re.DOTALL) + long_text_content = "" + if parts: + for part in reversed(parts): + if part.strip(): + long_text_content = part.strip() + break + + if not long_text_content: + long_text_content = messages[-1]["content"].strip() + + if len(long_text_content) < self.valves.MIN_TEXT_LENGTH: + short_text_message = f"文本内容过短({len(long_text_content)}字符),无法进行有效分析。请提供至少{self.valves.MIN_TEXT_LENGTH}字符的文本。" + if __event_emitter__: + await __event_emitter__( + { + "type": "notification", + "data": {"type": "warning", "content": short_text_message}, + } + ) + return { + "messages": [ + {"role": "assistant", "content": f"⚠️ {short_text_message}"} + ] + } + + if self.valves.show_status and __event_emitter__: + await __event_emitter__( + { + "type": "status", + "data": { + "description": "智绘心图: 深入分析文本结构...", + "done": False, + "hidden": False, + }, + } + ) + + try: + unique_id = f"id_{int(time.time() * 1000)}" + + formatted_user_prompt = USER_PROMPT_GENERATE_MINDMAP.format( + user_name=user_name, + current_date_time_str=current_date_time_str, + current_weekday=current_weekday_zh, + current_timezone_str=current_timezone_str, + user_language=user_language, + long_text_content=long_text_content, + ) + + llm_payload = { + "model": self.valves.LLM_MODEL_ID, + "messages": [ + {"role": "system", "content": SYSTEM_PROMPT_MINDMAP_ASSISTANT}, + {"role": "user", "content": formatted_user_prompt}, + ], + "temperature": 0.5, + "stream": False, + } + user_obj = Users.get_user_by_id(user_id) + if not user_obj: + raise ValueError(f"无法获取用户对象,用户ID: {user_id}") + + llm_response = await generate_chat_completion( + __request__, llm_payload, user_obj + ) + + if ( + not llm_response + or "choices" not in llm_response + or not llm_response["choices"] + ): + raise ValueError("LLM响应格式不正确或为空。") + + assistant_response_content = llm_response["choices"][0]["message"][ + "content" + ] + markdown_syntax = self._extract_markdown_syntax(assistant_response_content) + + final_html_content = ( + HTML_TEMPLATE_MINDMAP.replace("{unique_id}", unique_id) + .replace("{user_language}", user_language) + .replace("{user_name}", user_name) + .replace("{current_date_time_str}", current_date_time_str) + .replace("{current_weekday_zh}", current_weekday_zh) + .replace("{current_year}", current_year) + .replace("{markdown_syntax}", markdown_syntax) + ) + + html_embed_tag = f"```html\n{final_html_content}\n```" + body["messages"][-1]["content"] = f"{long_text_content}\n\n{html_embed_tag}" + + if self.valves.show_status and __event_emitter__: + await __event_emitter__( + { + "type": "status", + "data": { + "description": "智绘心图: 绘制完成!", + "done": True, + "hidden": False, + }, + } + ) + await __event_emitter__( + { + "type": "notification", + "data": { + "type": "success", + "content": f"思维导图已生成,{user_name}!", + }, + } + ) + logger.info("Action: 智绘心图 (v12) completed successfully") + + except Exception as e: + error_message = f"智绘心图处理失败: {str(e)}" + logger.error(f"智绘心图错误: {error_message}", exc_info=True) + user_facing_error = f"抱歉,智绘心图在处理时遇到错误: {str(e)}。\n请检查Open WebUI后端日志获取更多详情。" + body["messages"][-1][ + "content" + ] = f"{long_text_content}\n\n❌ **错误:** {user_facing_error}" + + if __event_emitter__: + if self.valves.show_status: + await __event_emitter__( + { + "type": "status", + "data": { + "description": "智绘心图: 处理失败。", + "done": True, + "hidden": False, + }, + } + ) + await __event_emitter__( + { + "type": "notification", + "data": { + "type": "error", + "content": f"智绘心图生成失败, {user_name}!", + }, + } + ) + + return body diff --git a/plugins/actions/summary/README.md b/plugins/actions/summary/README.md new file mode 100644 index 0000000..3093ff2 --- /dev/null +++ b/plugins/actions/summary/README.md @@ -0,0 +1,15 @@ +# Deep Reading & Summary + +A powerful tool for analyzing long texts, generating detailed summaries, key points, and actionable insights. + +## Features + +- **Deep Analysis**: Goes beyond simple summarization to understand the core message. +- **Key Point Extraction**: Identifies and lists the most important information. +- **Actionable Advice**: Provides practical suggestions based on the text content. + +## Usage + +1. Install the plugin. +2. Send a long text or article to the chat. +3. Click the "Deep Reading" button (or trigger via command). diff --git a/plugins/actions/summary/README_CN.md b/plugins/actions/summary/README_CN.md new file mode 100644 index 0000000..175eb2f --- /dev/null +++ b/plugins/actions/summary/README_CN.md @@ -0,0 +1,15 @@ +# 深度阅读与摘要 (Deep Reading & Summary) + +一个强大的长文本分析工具,用于生成详细摘要、关键信息点和可执行的行动建议。 + +## 功能特点 + +- **深度分析**:超越简单的总结,深入理解核心信息。 +- **关键点提取**:识别并列出最重要的信息点。 +- **行动建议**:基于文本内容提供切实可行的建议。 + +## 使用方法 + +1. 安装插件。 +2. 发送长文本或文章到聊天框。 +3. 点击“精读”按钮(或通过命令触发)。 diff --git a/plugins/actions/summary/summary.py b/plugins/actions/summary/summary.py new file mode 100644 index 0000000..2f1c1c3 --- /dev/null +++ b/plugins/actions/summary/summary.py @@ -0,0 +1,527 @@ +""" +title: Deep Reading & Summary +author: Antigravity +author_url: https://github.com/open-webui +funding_url: https://github.com/open-webui +version: 0.1.0 +icon_url: data:image/svg+xml;base64,PHN2ZyB4bWxucz0iaHR0cDovL3d3dy53My5vcmcvMjAwMC9zdmciIHdpZHRoPSIyNCIgaGVpZ2h0PSIyNCIgdmlld0JveD0iMCAwIDI0IDI0IiBmaWxsPSJub25lIiBzdHJva2U9ImN1cnJlbnRDb2xvciIgc3Ryb2tlLXdpZHRoPSIyIiBzdHJva2UtbGluZWNhcD0icm91bmQiIHN0cm9rZS1saW5lam9pbj0icm91bmQiPjxwYXRoIGQ9Ik0yIDNIMGEyIDIgMCAwIDAgMiAyIi8+PHBhdGggZD0iTTIyIDNIMjBhMiAyIDAgMCAwLTIgMiIvPjxwYXRoIGQ9Ik0yIDdoMjB2MTRhMiAyIDAgMCAxLTIgMmgtMTZhMiAyIDAgMCAxLTItMnYtMTQiLz48cGF0aCBkPSJNMTEgMTJ2NiIvPjxwYXRoIGQ9Ik0xNiAxMnY2Ii8+PHBhdGggZD0iTTYgMTJ2NiIvPjwvc3ZnPg== +description: Provides deep reading analysis and summarization for long texts. +requirements: jinja2, markdown +""" + +from pydantic import BaseModel, Field +from typing import Optional, Dict, Any +import logging +import re +from fastapi import Request +from datetime import datetime +import pytz +import markdown +from jinja2 import Template + +from open_webui.utils.chat import generate_chat_completion +from open_webui.models.users import Users + +logging.basicConfig( + level=logging.INFO, format="%(asctime)s - %(name)s - %(levelname)s - %(message)s" +) +logger = logging.getLogger(__name__) + +# ================================================================= +# 内部 LLM 提示词设计 +# ================================================================= + +SYSTEM_PROMPT_READING_ASSISTANT = """ +你是一个专业的深度文本分析专家,擅长精读长篇文本并提炼精华。你的任务是进行全面、深入的分析。 + +请提供以下内容: +1. **详细摘要**:用 2-3 段话全面总结文本的核心内容,确保准确性和完整性。不要过于简略,要让读者充分理解文本主旨。 +2. **关键信息点**:列出 5-8 个最重要的事实、观点或论据。每个信息点应该: + - 具体且有深度 + - 包含必要的细节和背景 + - 使用 Markdown 列表格式 +3. **行动建议**:从文本中识别并提炼出具体的、可执行的行动项。每个建议应该: + - 明确且可操作 + - 包含执行的优先级或时间建议 + - 如果没有明确的行动项,可以提供学习建议或思考方向 + +请严格遵循以下指导原则: +- **语言**:所有输出必须使用用户指定的语言。 +- **格式**:请严格按照以下 Markdown 格式输出,确保每个部分都有明确的标题: + ## 摘要 + [这里是详细的摘要内容,2-3段话,可以使用 Markdown 进行**加粗**或*斜体*强调重点] + + ## 关键信息点 + - [关键点1:包含具体细节和背景] + - [关键点2:包含具体细节和背景] + - [关键点3:包含具体细节和背景] + - [至少5个,最多8个关键点] + + ## 行动建议 + - [行动项1:具体、可执行,包含优先级] + - [行动项2:具体、可执行,包含优先级] + - [如果没有明确行动项,提供学习建议或思考方向] +- **深度优先**:分析要深入、全面,不要浮于表面。 +- **行动导向**:重点关注可执行的建议和下一步行动。 +- **只输出分析结果**:不要包含任何额外的寒暄、解释或引导性文字。 +""" + +USER_PROMPT_GENERATE_SUMMARY = """ +请对以下长篇文本进行深度分析,提供: +1. 详细的摘要(2-3段话,全面概括文本内容) +2. 关键信息点列表(5-8个,包含具体细节) +3. 可执行的行动建议(具体、明确,包含优先级) + +--- +**用户上下文信息:** +用户姓名: {user_name} +当前日期时间: {current_date_time_str} +当前星期: {current_weekday} +当前时区: {current_timezone_str} +用户语言: {user_language} +--- + +**长篇文本内容:** +``` +{long_text_content} +``` + +请进行深入、全面的分析,重点关注可执行的行动建议。 +""" + +# ================================================================= +# 前端 HTML 模板 (Jinja2 语法) +# ================================================================= + +HTML_TEMPLATE = """ + + + + + + 精读:深度分析报告 + + + +
+
+

📖 精读:深度分析报告

+
+
+ 用户: {{ user_name }} + 分析时间: {{ current_date_time_str }} + 星期: {{ current_weekday }} +
+
+
+

📝详细摘要

+
{{ summary_html | safe }}
+
+
+

💡关键信息点

+
{{ keypoints_html | safe }}
+
+
+

🎯行动建议

+
{{ actions_html | safe }}
+
+
+ +
+ +""" + + +class Action: + class Valves(BaseModel): + show_status: bool = Field( + default=True, description="是否在聊天界面显示操作状态更新。" + ) + LLM_MODEL_ID: str = Field( + default="gemini-2.5-flash", + description="用于文本分析的内置LLM模型ID。", + ) + MIN_TEXT_LENGTH: int = Field( + default=200, + description="进行深度分析所需的最小文本长度(字符数)。建议200字符以上。", + ) + RECOMMENDED_MIN_LENGTH: int = Field( + default=500, description="建议的最小文本长度,以获得最佳分析效果。" + ) + + def __init__(self): + self.valves = self.Valves() + + def _process_llm_output(self, llm_output: str) -> Dict[str, str]: + """ + 解析LLM的Markdown输出,将其转换为HTML片段。 + """ + summary_match = re.search( + r"##\s*摘要\s*\n(.*?)(?=\n##|$)", llm_output, re.DOTALL + ) + keypoints_match = re.search( + r"##\s*关键信息点\s*\n(.*?)(?=\n##|$)", llm_output, re.DOTALL + ) + actions_match = re.search( + r"##\s*行动建议\s*\n(.*?)(?=\n##|$)", llm_output, re.DOTALL + ) + + summary_md = summary_match.group(1).strip() if summary_match else "" + keypoints_md = keypoints_match.group(1).strip() if keypoints_match else "" + actions_md = actions_match.group(1).strip() if actions_match else "" + + if not any([summary_md, keypoints_md, actions_md]): + summary_md = llm_output.strip() + logger.warning("LLM输出未遵循预期的Markdown格式。将整个输出视为摘要。") + + # 使用 'nl2br' 扩展将换行符 \n 转换为
+ md_extensions = ["nl2br"] + summary_html = ( + markdown.markdown(summary_md, extensions=md_extensions) + if summary_md + else '

未能提取摘要信息。

' + ) + keypoints_html = ( + markdown.markdown(keypoints_md, extensions=md_extensions) + if keypoints_md + else '

未能提取关键信息点。

' + ) + actions_html = ( + markdown.markdown(actions_md, extensions=md_extensions) + if actions_md + else '

暂无明确的行动建议。

' + ) + + return { + "summary_html": summary_html, + "keypoints_html": keypoints_html, + "actions_html": actions_html, + } + + def _build_html(self, context: dict) -> str: + """ + 使用 Jinja2 模板和上下文数据构建最终的HTML内容。 + """ + template = Template(HTML_TEMPLATE) + return template.render(context) + + async def action( + self, + body: dict, + __user__: Optional[Dict[str, Any]] = None, + __event_emitter__: Optional[Any] = None, + __request__: Optional[Request] = None, + ) -> Optional[dict]: + logger.info("Action: 精读启动 (v2.0.0 - Deep Reading)") + + if isinstance(__user__, (list, tuple)): + user_language = ( + __user__[0].get("language", "zh-CN") if __user__ else "zh-CN" + ) + user_name = __user__[0].get("name", "用户") if __user__[0] else "用户" + user_id = ( + __user__[0]["id"] + if __user__ and "id" in __user__[0] + else "unknown_user" + ) + elif isinstance(__user__, dict): + user_language = __user__.get("language", "zh-CN") + user_name = __user__.get("name", "用户") + user_id = __user__.get("id", "unknown_user") + + now = datetime.now() + current_date_time_str = now.strftime("%Y-%m-%d %H:%M:%S") + current_weekday = now.strftime("%A") + current_year = now.strftime("%Y") + current_timezone_str = "未知时区" + + original_content = "" + try: + messages = body.get("messages", []) + if not messages or not messages[-1].get("content"): + raise ValueError("无法获取有效的用户消息内容。") + + original_content = messages[-1]["content"] + + if len(original_content) < self.valves.MIN_TEXT_LENGTH: + short_text_message = f"文本内容过短({len(original_content)}字符),建议至少{self.valves.MIN_TEXT_LENGTH}字符以获得有效的深度分析。\n\n💡 提示:对于短文本,建议使用'⚡ 闪记卡'进行快速提炼。" + if __event_emitter__: + await __event_emitter__( + { + "type": "notification", + "data": {"type": "warning", "content": short_text_message}, + } + ) + return { + "messages": [ + {"role": "assistant", "content": f"⚠️ {short_text_message}"} + ] + } + + # Recommend for longer texts + if len(original_content) < self.valves.RECOMMENDED_MIN_LENGTH: + if __event_emitter__: + await __event_emitter__( + { + "type": "notification", + "data": { + "type": "info", + "content": f"文本长度为{len(original_content)}字符。建议{self.valves.RECOMMENDED_MIN_LENGTH}字符以上可获得更好的分析效果。", + }, + } + ) + + if __event_emitter__: + await __event_emitter__( + { + "type": "notification", + "data": { + "type": "info", + "content": "📖 精读已启动,正在进行深度分析...", + }, + } + ) + if self.valves.show_status: + await __event_emitter__( + { + "type": "status", + "data": { + "description": "📖 精读: 深入分析文本,提炼精华...", + "done": False, + }, + } + ) + + formatted_user_prompt = USER_PROMPT_GENERATE_SUMMARY.format( + user_name=user_name, + current_date_time_str=current_date_time_str, + current_weekday=current_weekday, + current_timezone_str=current_timezone_str, + user_language=user_language, + long_text_content=original_content, + ) + + llm_payload = { + "model": self.valves.LLM_MODEL_ID, + "messages": [ + {"role": "system", "content": SYSTEM_PROMPT_READING_ASSISTANT}, + {"role": "user", "content": formatted_user_prompt}, + ], + "stream": False, + } + + user_obj = Users.get_user_by_id(user_id) + if not user_obj: + raise ValueError(f"无法获取用户对象, 用户ID: {user_id}") + + llm_response = await generate_chat_completion( + __request__, llm_payload, user_obj + ) + assistant_response_content = llm_response["choices"][0]["message"][ + "content" + ] + + processed_content = self._process_llm_output(assistant_response_content) + + context = { + "user_language": user_language, + "user_name": user_name, + "current_date_time_str": current_date_time_str, + "current_weekday": current_weekday, + "current_year": current_year, + **processed_content, + } + + final_html_content = self._build_html(context) + html_embed_tag = f"```html\n{final_html_content}\n```" + body["messages"][-1]["content"] = f"{original_content}\n\n{html_embed_tag}" + + if self.valves.show_status and __event_emitter__: + await __event_emitter__( + { + "type": "status", + "data": {"description": "📖 精读: 分析完成!", "done": True}, + } + ) + await __event_emitter__( + { + "type": "notification", + "data": { + "type": "success", + "content": f"📖 精读完成,{user_name}!深度分析报告已生成。", + }, + } + ) + + except Exception as e: + error_message = f"精读处理失败: {str(e)}" + logger.error(f"精读错误: {error_message}", exc_info=True) + user_facing_error = f"抱歉, 精读在处理时遇到错误: {str(e)}。\n请检查Open WebUI后端日志获取更多详情。" + body["messages"][-1][ + "content" + ] = f"{original_content}\n\n❌ **错误:** {user_facing_error}" + + if __event_emitter__: + if self.valves.show_status: + await __event_emitter__( + { + "type": "status", + "data": { + "description": "精读: 处理失败。", + "done": True, + }, + } + ) + await __event_emitter__( + { + "type": "notification", + "data": { + "type": "error", + "content": f"精读处理失败, {user_name}!", + }, + } + ) + + return body diff --git a/plugins/actions/summary/精读.py b/plugins/actions/summary/精读.py new file mode 100644 index 0000000..e9e80ed --- /dev/null +++ b/plugins/actions/summary/精读.py @@ -0,0 +1,521 @@ +""" +title: 精读 (Deep Reading) +icon_url: data:image/svg+xml;base64,PHN2ZyB4bWxucz0iaHR0cDovL3d3dy53My5vcmcvMjAwMC9zdmciIHdpZHRoPSIyNCIgaGVpZ2h0PSIyNCIgdmlld0JveD0iMCAwIDI0IDI0Ij48ZGVmcz48bGluZWFyR3JhZGllbnQgaWQ9ImciIHgxPSIwIiB5MT0iMCIgeDI9IjEiIHkyPSIxIj48c3RvcCBvZmZzZXQ9IjAlIiBzdG9wLWNvbG9yPSIjNDI4NWY0Ii8+PHN0b3Agb2Zmc2V0PSIxMDAlIiBzdG9wLWNvbG9yPSIjMWU4OGU1Ii8+PC9saW5lYXJHcmFkaWVudD48L2RlZnM+PHBhdGggZD0iTTYgMmg4bDYgNnYxMmEyIDIgMCAwIDEtMiAySDZhMiAyIDAgMCAxLTItMlY0YTIgMiAwIDAgMSAyLTJ6IiBmaWxsPSJ1cmwoI2cpIi8+PHBhdGggZD0iTTE0IDJsNiA2aC02eiIgZmlsbD0iIzFlODhlNSIgb3BhY2l0eT0iMC42Ii8+PGxpbmUgeDE9IjgiIHkxPSIxMyIgeDI9IjE2IiB5Mj0iMTMiIHN0cm9rZT0iI2ZmZiIgc3Ryb2tlLXdpZHRoPSIxLjUiLz48bGluZSB4MT0iOCIgeTE9IjE3IiB4Mj0iMTQiIHkyPSIxNyIgc3Ryb2tlPSIjZmZmIiBzdHJva2Utd2lkdGg9IjEuNSIvPjxjaXJjbGUgY3g9IjE2IiBjeT0iMTgiIHI9IjMiIGZpbGw9IiNmZmQ3MDAiLz48cGF0aCBkPSJNMTYgMTZsMS41IDEuNSIgc3Ryb2tlPSIjNDI4NWY0IiBzdHJva2Utd2lkdGg9IjIiIHN0cm9rZS1saW5lY2FwPSJyb3VuZCIvPjwvc3ZnPg== +version: 2.0.0 +description: 深度分析长篇文本,提炼详细摘要、关键信息点和可执行的行动建议,适合工作和学习场景。 +requirements: jinja2, markdown +""" + +from pydantic import BaseModel, Field +from typing import Optional, Dict, Any +import logging +import re +from fastapi import Request +from datetime import datetime +import pytz +import markdown +from jinja2 import Template + +from open_webui.utils.chat import generate_chat_completion +from open_webui.models.users import Users + +logging.basicConfig( + level=logging.INFO, format="%(asctime)s - %(name)s - %(levelname)s - %(message)s" +) +logger = logging.getLogger(__name__) + +# ================================================================= +# 内部 LLM 提示词设计 +# ================================================================= + +SYSTEM_PROMPT_READING_ASSISTANT = """ +你是一个专业的深度文本分析专家,擅长精读长篇文本并提炼精华。你的任务是进行全面、深入的分析。 + +请提供以下内容: +1. **详细摘要**:用 2-3 段话全面总结文本的核心内容,确保准确性和完整性。不要过于简略,要让读者充分理解文本主旨。 +2. **关键信息点**:列出 5-8 个最重要的事实、观点或论据。每个信息点应该: + - 具体且有深度 + - 包含必要的细节和背景 + - 使用 Markdown 列表格式 +3. **行动建议**:从文本中识别并提炼出具体的、可执行的行动项。每个建议应该: + - 明确且可操作 + - 包含执行的优先级或时间建议 + - 如果没有明确的行动项,可以提供学习建议或思考方向 + +请严格遵循以下指导原则: +- **语言**:所有输出必须使用用户指定的语言。 +- **格式**:请严格按照以下 Markdown 格式输出,确保每个部分都有明确的标题: + ## 摘要 + [这里是详细的摘要内容,2-3段话,可以使用 Markdown 进行**加粗**或*斜体*强调重点] + + ## 关键信息点 + - [关键点1:包含具体细节和背景] + - [关键点2:包含具体细节和背景] + - [关键点3:包含具体细节和背景] + - [至少5个,最多8个关键点] + + ## 行动建议 + - [行动项1:具体、可执行,包含优先级] + - [行动项2:具体、可执行,包含优先级] + - [如果没有明确行动项,提供学习建议或思考方向] +- **深度优先**:分析要深入、全面,不要浮于表面。 +- **行动导向**:重点关注可执行的建议和下一步行动。 +- **只输出分析结果**:不要包含任何额外的寒暄、解释或引导性文字。 +""" + +USER_PROMPT_GENERATE_SUMMARY = """ +请对以下长篇文本进行深度分析,提供: +1. 详细的摘要(2-3段话,全面概括文本内容) +2. 关键信息点列表(5-8个,包含具体细节) +3. 可执行的行动建议(具体、明确,包含优先级) + +--- +**用户上下文信息:** +用户姓名: {user_name} +当前日期时间: {current_date_time_str} +当前星期: {current_weekday} +当前时区: {current_timezone_str} +用户语言: {user_language} +--- + +**长篇文本内容:** +``` +{long_text_content} +``` + +请进行深入、全面的分析,重点关注可执行的行动建议。 +""" + +# ================================================================= +# 前端 HTML 模板 (Jinja2 语法) +# ================================================================= + +HTML_TEMPLATE = """ + + + + + + 精读:深度分析报告 + + + +
+
+

📖 精读:深度分析报告

+
+
+ 用户: {{ user_name }} + 分析时间: {{ current_date_time_str }} + 星期: {{ current_weekday }} +
+
+
+

📝详细摘要

+
{{ summary_html | safe }}
+
+
+

💡关键信息点

+
{{ keypoints_html | safe }}
+
+
+

🎯行动建议

+
{{ actions_html | safe }}
+
+
+ +
+ +""" + + +class Action: + class Valves(BaseModel): + show_status: bool = Field( + default=True, description="是否在聊天界面显示操作状态更新。" + ) + LLM_MODEL_ID: str = Field( + default="gemini-2.5-flash", + description="用于文本分析的内置LLM模型ID。", + ) + MIN_TEXT_LENGTH: int = Field( + default=200, description="进行深度分析所需的最小文本长度(字符数)。建议200字符以上。" + ) + RECOMMENDED_MIN_LENGTH: int = Field( + default=500, description="建议的最小文本长度,以获得最佳分析效果。" + ) + + def __init__(self): + self.valves = self.Valves() + + def _process_llm_output(self, llm_output: str) -> Dict[str, str]: + """ + 解析LLM的Markdown输出,将其转换为HTML片段。 + """ + summary_match = re.search( + r"##\s*摘要\s*\n(.*?)(?=\n##|$)", llm_output, re.DOTALL + ) + keypoints_match = re.search( + r"##\s*关键信息点\s*\n(.*?)(?=\n##|$)", llm_output, re.DOTALL + ) + actions_match = re.search( + r"##\s*行动建议\s*\n(.*?)(?=\n##|$)", llm_output, re.DOTALL + ) + + summary_md = summary_match.group(1).strip() if summary_match else "" + keypoints_md = keypoints_match.group(1).strip() if keypoints_match else "" + actions_md = actions_match.group(1).strip() if actions_match else "" + + if not any([summary_md, keypoints_md, actions_md]): + summary_md = llm_output.strip() + logger.warning("LLM输出未遵循预期的Markdown格式。将整个输出视为摘要。") + + # 使用 'nl2br' 扩展将换行符 \n 转换为
+ md_extensions = ["nl2br"] + summary_html = ( + markdown.markdown(summary_md, extensions=md_extensions) + if summary_md + else '

未能提取摘要信息。

' + ) + keypoints_html = ( + markdown.markdown(keypoints_md, extensions=md_extensions) + if keypoints_md + else '

未能提取关键信息点。

' + ) + actions_html = ( + markdown.markdown(actions_md, extensions=md_extensions) + if actions_md + else '

暂无明确的行动建议。

' + ) + + return { + "summary_html": summary_html, + "keypoints_html": keypoints_html, + "actions_html": actions_html, + } + + def _build_html(self, context: dict) -> str: + """ + 使用 Jinja2 模板和上下文数据构建最终的HTML内容。 + """ + template = Template(HTML_TEMPLATE) + return template.render(context) + + async def action( + self, + body: dict, + __user__: Optional[Dict[str, Any]] = None, + __event_emitter__: Optional[Any] = None, + __request__: Optional[Request] = None, + ) -> Optional[dict]: + logger.info("Action: 精读启动 (v2.0.0 - Deep Reading)") + + if isinstance(__user__, (list, tuple)): + user_language = ( + __user__[0].get("language", "zh-CN") if __user__ else "zh-CN" + ) + user_name = __user__[0].get("name", "用户") if __user__[0] else "用户" + user_id = ( + __user__[0]["id"] + if __user__ and "id" in __user__[0] + else "unknown_user" + ) + elif isinstance(__user__, dict): + user_language = __user__.get("language", "zh-CN") + user_name = __user__.get("name", "用户") + user_id = __user__.get("id", "unknown_user") + + now = datetime.now() + current_date_time_str = now.strftime("%Y-%m-%d %H:%M:%S") + current_weekday = now.strftime("%A") + current_year = now.strftime("%Y") + current_timezone_str = "未知时区" + + original_content = "" + try: + messages = body.get("messages", []) + if not messages or not messages[-1].get("content"): + raise ValueError("无法获取有效的用户消息内容。") + + original_content = messages[-1]["content"] + + if len(original_content) < self.valves.MIN_TEXT_LENGTH: + short_text_message = f"文本内容过短({len(original_content)}字符),建议至少{self.valves.MIN_TEXT_LENGTH}字符以获得有效的深度分析。\n\n💡 提示:对于短文本,建议使用'⚡ 闪记卡'进行快速提炼。" + if __event_emitter__: + await __event_emitter__( + { + "type": "notification", + "data": {"type": "warning", "content": short_text_message}, + } + ) + return { + "messages": [ + {"role": "assistant", "content": f"⚠️ {short_text_message}"} + ] + } + + # Recommend for longer texts + if len(original_content) < self.valves.RECOMMENDED_MIN_LENGTH: + if __event_emitter__: + await __event_emitter__( + { + "type": "notification", + "data": { + "type": "info", + "content": f"文本长度为{len(original_content)}字符。建议{self.valves.RECOMMENDED_MIN_LENGTH}字符以上可获得更好的分析效果。", + }, + } + ) + + if __event_emitter__: + await __event_emitter__( + { + "type": "notification", + "data": { + "type": "info", + "content": "📖 精读已启动,正在进行深度分析...", + }, + } + ) + if self.valves.show_status: + await __event_emitter__( + { + "type": "status", + "data": { + "description": "📖 精读: 深入分析文本,提炼精华...", + "done": False, + }, + } + ) + + formatted_user_prompt = USER_PROMPT_GENERATE_SUMMARY.format( + user_name=user_name, + current_date_time_str=current_date_time_str, + current_weekday=current_weekday, + current_timezone_str=current_timezone_str, + user_language=user_language, + long_text_content=original_content, + ) + + llm_payload = { + "model": self.valves.LLM_MODEL_ID, + "messages": [ + {"role": "system", "content": SYSTEM_PROMPT_READING_ASSISTANT}, + {"role": "user", "content": formatted_user_prompt}, + ], + "stream": False, + } + + user_obj = Users.get_user_by_id(user_id) + if not user_obj: + raise ValueError(f"无法获取用户对象, 用户ID: {user_id}") + + llm_response = await generate_chat_completion(__request__, llm_payload, user_obj) + assistant_response_content = llm_response["choices"][0]["message"][ + "content" + ] + + processed_content = self._process_llm_output(assistant_response_content) + + context = { + "user_language": user_language, + "user_name": user_name, + "current_date_time_str": current_date_time_str, + "current_weekday": current_weekday, + "current_year": current_year, + **processed_content, + } + + final_html_content = self._build_html(context) + html_embed_tag = f"```html\n{final_html_content}\n```" + body["messages"][-1]["content"] = f"{original_content}\n\n{html_embed_tag}" + + if self.valves.show_status and __event_emitter__: + await __event_emitter__( + { + "type": "status", + "data": {"description": "📖 精读: 分析完成!", "done": True}, + } + ) + await __event_emitter__( + { + "type": "notification", + "data": { + "type": "success", + "content": f"📖 精读完成,{user_name}!深度分析报告已生成。", + }, + } + ) + + except Exception as e: + error_message = f"精读处理失败: {str(e)}" + logger.error(f"精读错误: {error_message}", exc_info=True) + user_facing_error = f"抱歉, 精读在处理时遇到错误: {str(e)}。\n请检查Open WebUI后端日志获取更多详情。" + body["messages"][-1][ + "content" + ] = f"{original_content}\n\n❌ **错误:** {user_facing_error}" + + if __event_emitter__: + if self.valves.show_status: + await __event_emitter__( + { + "type": "status", + "data": { + "description": "精读: 处理失败。", + "done": True, + }, + } + ) + await __event_emitter__( + { + "type": "notification", + "data": { + "type": "error", + "content": f"精读处理失败, {user_name}!", + }, + } + ) + + return body diff --git a/plugins/filters/README.md b/plugins/filters/README.md new file mode 100644 index 0000000..79f14e8 --- /dev/null +++ b/plugins/filters/README.md @@ -0,0 +1,45 @@ +# Filters + +English | [中文](./README_CN.md) + +Filters process and modify user input before it is sent to the LLM. This directory contains various filters that can be used to extend OpenWebUI functionality. + +## 📋 Filter List + +| Filter Name | Description | Documentation | +| :--- | :--- | :--- | +| **Async Context Compression** | Reduces token consumption in long conversations through intelligent summarization and message compression while maintaining conversational coherence. | [English](./async-context-compression/async_context_compression.md) / [中文](./async-context-compression/async_context_compression_cn.md) | + +## 🚀 Quick Start + +### Installing a Filter + +1. Navigate to the desired filter directory +2. Download the corresponding `.py` file to your local machine +3. Open OpenWebUI Admin Settings and find the "Filters" section +4. Upload the Python file +5. Configure the filter parameters according to its documentation +6. Refresh the page and enable the filter in your chat settings + +## 📖 Development Guide + +When adding a new filter, please follow these steps: + +1. **Create Filter Directory**: Create a new folder in the current directory (e.g., `my_filter/`) +2. **Write Filter Code**: Create a `.py` file with clear documentation of functionality and configuration in comments +3. **Write Documentation**: + - Create `filter_name.md` (English version) + - Create `filter_name_cn.md` (Chinese version) + - Documentation should include: feature description, configuration parameters, usage examples, and troubleshooting +4. **Update This List**: Add your new filter to the table above + +## ⚙️ Configuration Best Practices + +- **Priority Management**: Set appropriate filter priority to ensure correct execution order +- **Parameter Tuning**: Adjust filter parameters based on your specific needs +- **Debug Logging**: Enable debug mode during development, disable in production +- **Performance Testing**: Test filter performance under high load + +--- + +> **Contributor Note**: To ensure project maintainability and user experience, please provide clear and complete documentation for each new filter, including feature description, parameter configuration, usage examples, and troubleshooting guide. diff --git a/plugins/filters/README_CN.md b/plugins/filters/README_CN.md new file mode 100644 index 0000000..6c9b740 --- /dev/null +++ b/plugins/filters/README_CN.md @@ -0,0 +1,67 @@ +# 自动上下文合并过滤器 (Auto Context Merger Filter) + +## 概述 + +`auto_context_merger` 是一个 Open WebUI 过滤器插件,旨在通过自动收集和注入上一回合多模型回答的上下文,来增强后续对话的连贯性和深度。当用户在一次多模型回答之后提出新的后续问题时,此过滤器会自动激活。 + +它会从对话历史中识别出上一回合所有 AI 模型的回答,将它们按照清晰的格式直接拼接起来,然后作为一个系统消息注入到当前请求中。这样,当前模型在处理用户的新问题时,就能直接参考到之前所有 AI 的观点,从而提供更全面、更连贯的回答。 + +## 工作原理 + +1. **触发时机**: 当用户在一次“多模型回答”之后,发送新的后续问题时,此过滤器会自动激活。 +2. **获取历史数据**: 过滤器会使用当前对话的 `chat_id`,从数据库中加载完整的对话历史记录。 +3. **分析上一回合**: 通过分析对话树结构,它能准确找到用户上一个问题,以及当时所有 AI 模型给出的并行回答。 +4. **直接格式化**: 如果检测到上一回合确实有多个 AI 回答,它会收集所有这些 AI 的回答内容。 +5. **智能注入**: 将这些格式化后的回答作为一个系统消息,注入到当前请求的 `messages` 列表的开头,紧邻用户的新问题之前。 +6. **传递给目标模型**: 修改后的消息体(包含格式化后的上下文)将传递给用户最初选择的目标模型。目标模型在生成响应时,将能够利用这个更丰富的上下文。 +7. **状态更新**: 在整个处理过程中,过滤器会通过 `__event_emitter__` 提供实时状态更新,让用户了解处理进度。 + +## 配置 (Valves) + +您可以在 Open WebUI 的管理界面中配置此过滤器的 `Valves`。 + +* **`CONTEXT_PREFIX`** (字符串, 必填): + * **描述**: 注入的系统消息的前缀文本。它会出现在合并后的上下文之前,用于向模型解释这段内容的来源和目的。 + * **示例**: `**背景知识**:为了更好地回答您的新问题,请参考上一轮对话中多个AI模型给出的回答:\n\n` + +## 如何使用 + +1. **部署过滤器**: 将 `auto_context_merger.py` 文件放置在 Open WebUI 实例的 `plugins/filters/` 目录下。 +2. **启用过滤器**: 登录 Open WebUI 管理界面,导航到 **Workspace -> Functions**。找到 `auto_context_merger` 过滤器并启用它。 +3. **配置参数**: 点击 `auto_context_merger` 过滤器旁边的编辑按钮,根据您的需求配置 `CONTEXT_PREFIX`。 +4. **开始对话**: + * 首先,向一个模型提问,并确保有多个模型(例如通过 `gemini_manifold` 或其他多模型工具)给出回答。 + * 然后,针对这个多模型回答,提出您的后续问题。 + * 此过滤器将自动激活,将上一回合所有 AI 的回答合并并注入到当前请求中。 + +## 示例 + +假设您配置了 `CONTEXT_PREFIX` 为默认值。 + +1. **用户提问**: “解释一下量子力学” +2. **多个 AI 回答** (例如,模型 A 和模型 B 都给出了回答) +3. **用户再次提问**: “那么,量子纠缠和量子隧穿有什么区别?” + +此时,`auto_context_merger` 过滤器将自动激活: +1. 它会获取模型 A 和模型 B 对“解释一下量子力学”的回答。 +2. 将它们格式化为: + ``` + **背景知识**:为了更好地回答您的新问题,请参考上一轮对话中多个AI模型给出的回答: + + **来自模型 '模型A名称' 的回答是:** + [模型A对量子力学的解释] + + --- + + **来自模型 '模型B名称' 的回答是:** + [模型B对量子力学的解释] + ``` +3. 然后,将这段内容作为一个系统消息,注入到当前请求中,紧邻“那么,量子纠缠和量子隧穿有什么区别?”这个用户问题之前。 + +最终,模型将收到一个包含所有相关上下文的请求,从而能够更准确、更全面地回答您的后续问题。 + +## 注意事项 + +* 此过滤器旨在增强多模型对话的连贯性,通过提供更丰富的上下文来帮助模型理解后续问题。 +* 确保您的 Open WebUI 实例中已配置并启用了 `gemini_manifold` 或其他能够产生多模型回答的工具,以便此过滤器能够检测到多模型历史。 +* 此过滤器不会增加额外的模型调用,因此不会显著增加延迟或成本。它只是对现有历史数据进行格式化和注入。 diff --git a/plugins/filters/async-context-compression/async_context_compression.md b/plugins/filters/async-context-compression/async_context_compression.md new file mode 100644 index 0000000..0b94d63 --- /dev/null +++ b/plugins/filters/async-context-compression/async_context_compression.md @@ -0,0 +1,77 @@ +# Async Context Compression Filter + +**Author:** [Fu-Jie](https://github.com/Fu-Jie) | **Version:** 1.0.0 | **License:** MIT + +> **Important Note**: To ensure the maintainability and usability of all filters, each filter should be accompanied by clear and complete documentation to fully explain its functionality, configuration, and usage. + +This filter significantly reduces token consumption in long conversations by using intelligent summarization and message compression, while maintaining conversational coherence. + +--- + +## Core Features + +- ✅ **Automatic Compression**: Triggers context compression automatically based on a message count threshold. +- ✅ **Asynchronous Summarization**: Generates summaries in the background without blocking the current chat response. +- ✅ **Persistent Storage**: Supports both PostgreSQL and SQLite databases to ensure summaries are not lost after a service restart. +- ✅ **Flexible Retention Policy**: Freely configure the number of initial and final messages to keep, ensuring critical information and context continuity. +- ✅ **Smart Injection**: Intelligently injects the generated historical summary into the new context. + +--- + +## Installation & Configuration + +### 1. Environment Variable + +This plugin requires a database connection. You **must** configure the `DATABASE_URL` in your Open WebUI environment variables. + +- **PostgreSQL Example**: + ``` + DATABASE_URL=postgresql://user:password@host:5432/openwebui + ``` +- **SQLite Example**: + ``` + DATABASE_URL=sqlite:///path/to/your/data/webui.db + ``` + +### 2. Filter Order + +It is recommended to set the priority of this filter relatively high (a smaller number) to ensure it runs before other filters that might modify message content. A typical order might be: + +1. **Pre-Filters (priority < 10)** + - e.g., A filter that injects a system-level prompt. +2. **This Compression Filter (priority = 10)** +3. **Post-Filters (priority > 10)** + - e.g., A filter that formats the final output. + +--- + +## Configuration Parameters + +You can adjust the following parameters in the filter's settings: + +| Parameter | Default | Description | +| :--- | :--- | :--- | +| `priority` | `10` | The execution order of the filter. Lower numbers run first. | +| `compression_threshold` | `15` | When the total message count reaches this value, a background summary generation will be triggered. | +| `keep_first` | `1` | Always keep the first N messages. The first message often contains important system prompts. | +| `keep_last` | `6` | Always keep the last N messages to ensure contextual coherence. | +| `summary_model` | `None` | The model used for generating summaries. **Strongly recommended** to set a fast, economical, and compatible model (e.g., `gemini-2.5-flash`). If left empty, it will try to use the current chat's model, which may fail if it's an incompatible model type (like a Pipe model). | +| `max_summary_tokens` | `4000` | The maximum number of tokens allowed for the generated summary. | +| `summary_temperature` | `0.3` | Controls the randomness of the summary. Lower values are more deterministic. | +| `debug_mode` | `true` | Whether to print detailed debug information to the log. Recommended to set to `false` in production. | + +--- + +## Troubleshooting + +- **Problem: Database connection failed.** + - **Solution**: Please ensure the `DATABASE_URL` environment variable is set correctly and that the database service is running. + +- **Problem: Summary not generated.** + - **Solution**: Check if the `compression_threshold` has been met and verify that `summary_model` is configured correctly. Check the logs for detailed errors. + +- **Problem: Initial system prompt is lost.** + - **Solution**: Ensure `keep_first` is set to a value greater than 0 to preserve the initial messages containing important information. + +- **Problem: Compression effect is not significant.** + - **Solution**: Try increasing the `compression_threshold` or decreasing the `keep_first` / `keep_last` values. diff --git a/plugins/filters/async-context-compression/async_context_compression.py b/plugins/filters/async-context-compression/async_context_compression.py new file mode 100644 index 0000000..0597b61 --- /dev/null +++ b/plugins/filters/async-context-compression/async_context_compression.py @@ -0,0 +1,780 @@ +""" +title: Async Context Compression +id: async_context_compression +author: Fu-Jie +author_url: https://github.com/Fu-Jie +funding_url: https://github.com/Fu-Jie/awesome-openwebui +description: Reduces token consumption in long conversations while maintaining coherence through intelligent summarization and message compression. +version: 1.0.1 +license: MIT + +═══════════════════════════════════════════════════════════════════════════════ +📌 Overview +═══════════════════════════════════════════════════════════════════════════════ + +This filter significantly reduces token consumption in long conversations by using intelligent summarization and message compression, while maintaining conversational coherence. + +Core Features: + ✅ Automatic compression triggered by a message count threshold + ✅ Asynchronous summary generation (does not block user response) + ✅ Persistent storage with database support (PostgreSQL and SQLite) + ✅ Flexible retention policy (configurable to keep first and last N messages) + ✅ Smart summary injection to maintain context + +═══════════════════════════════════════════════════════════════════════════════ +🔄 Workflow +═══════════════════════════════════════════════════════════════════════════════ + +Phase 1: Inlet (Pre-request processing) +━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━ + 1. Receives all messages in the current conversation. + 2. Checks for a previously saved summary. + 3. If a summary exists and the message count exceeds the retention threshold: + ├─ Extracts the first N messages to be kept. + ├─ Injects the summary into the first message. + ├─ Extracts the last N messages to be kept. + └─ Combines them into a new message list: [Kept First Messages + Summary] + [Kept Last Messages]. + 4. Sends the compressed message list to the LLM. + +Phase 2: Outlet (Post-response processing) +━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━ + 1. Triggered after the LLM response is complete. + 2. Checks if the message count has reached the compression threshold. + 3. If the threshold is met, an asynchronous background task is started to generate a summary: + ├─ Extracts messages to be summarized (excluding the kept first and last messages). + ├─ Calls the LLM to generate a concise summary. + └─ Saves the summary to the database. + +═══════════════════════════════════════════════════════════════════════════════ +💾 Storage +═══════════════════════════════════════════════════════════════════════════════ + +This filter uses a database for persistent storage, configured via the `DATABASE_URL` environment variable. It supports both PostgreSQL and SQLite. + +Configuration: + - The `DATABASE_URL` environment variable must be set. + - PostgreSQL Example: `postgresql://user:password@host:5432/openwebui` + - SQLite Example: `sqlite:///path/to/your/database.db` + +The filter automatically selects the appropriate database driver based on the `DATABASE_URL` prefix (`postgres` or `sqlite`). + + Table Structure (`chat_summary`): + - id: Primary Key (auto-increment) + - chat_id: Unique chat identifier (indexed) + - summary: The summary content (TEXT) + - compressed_message_count: The original number of messages + - created_at: Timestamp of creation + - updated_at: Timestamp of last update + +═══════════════════════════════════════════════════════════════════════════════ +📊 Compression Example +═══════════════════════════════════════════════════════════════════════════════ + +Scenario: A 20-message conversation (Default settings: keep first 1, keep last 6) +━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━ + Before Compression: + Message 1: [Initial prompt + First question] + Messages 2-14: [Historical conversation] + Messages 15-20: [Recent conversation] + Total: 20 full messages + + After Compression: + Message 1: [Initial prompt + Historical summary + First question] + Messages 15-20: [Last 6 full messages] + Total: 7 messages + + Effect: + ✓ Saves 13 messages (approx. 65%) + ✓ Retains full context + ✓ Protects important initial prompts + +═══════════════════════════════════════════════════════════════════════════════ +⚙️ Configuration +═══════════════════════════════════════════════════════════════════════════════ + +priority + Default: 10 + Description: The execution order of the filter. Lower numbers run first. + +compression_threshold + Default: 15 + Description: When the message count reaches this value, a background summary generation will be triggered after the conversation ends. + Recommendation: Adjust based on your model's context window and cost. + +keep_first + Default: 1 + Description: Always keep the first N messages of the conversation. Set to 0 to disable. The first message often contains important system prompts. + +keep_last + Default: 6 + Description: Always keep the last N full messages of the conversation to ensure context coherence. + +summary_model + Default: None + Description: The LLM used to generate the summary. + Recommendation: + - It is strongly recommended to configure a fast, economical, and compatible model, such as `deepseek-v3`、`gemini-2.5-flash`、`gpt-4.1`。 + - If left empty, the filter will attempt to use the model from the current conversation. + Note: + - If the current conversation uses a pipeline (Pipe) model or a model that does not support standard generation APIs, leaving this field empty may cause summary generation to fail. In this case, you must specify a valid model. + +max_summary_tokens + Default: 4000 + Description: The maximum number of tokens allowed for the generated summary. + +summary_temperature + Default: 0.3 + Description: Controls the randomness of the summary generation. Lower values produce more deterministic output. + +debug_mode + Default: true + Description: Prints detailed debug information to the log. Recommended to set to `false` in production. + +🔧 Deployment +═══════════════════════════════════════════════════════ + +Docker Compose Example: +━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━ + services: + openwebui: + environment: + DATABASE_URL: postgresql://user:password@postgres:5432/openwebui + depends_on: + - postgres + + postgres: + image: postgres:15-alpine + environment: + POSTGRES_USER: user + POSTGRES_PASSWORD: password + POSTGRES_DB: openwebui + +Suggested Filter Installation Order: +━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━ +It is recommended to set the priority of this filter relatively high (a smaller number) to ensure it runs before other filters that might modify message content. A typical order might be: + + 1. Filters that need access to the full, uncompressed history (priority < 10) + (e.g., a filter that injects a system-level prompt) + 2. This compression filter (priority = 10) + 3. Filters that run after compression (priority > 10) + (e.g., a final output formatting filter) + +═══════════════════════════════════════════════════════════════════════════════ +📝 Database Query Examples +═══════════════════════════════════════════════════════════════════════════════ + +View all summaries: + SELECT + chat_id, + LEFT(summary, 100) as summary_preview, + compressed_message_count, + updated_at + FROM chat_summary + ORDER BY updated_at DESC; + +Query a specific conversation: + SELECT * + FROM chat_summary + WHERE chat_id = 'your_chat_id'; + +Delete old summaries: + DELETE FROM chat_summary + WHERE updated_at < NOW() - INTERVAL '30 days'; + +Statistics: + SELECT + COUNT(*) as total_summaries, + AVG(LENGTH(summary)) as avg_summary_length, + AVG(compressed_message_count) as avg_msg_count + FROM chat_summary; + +═══════════════════════════════════════════════════════════════════════════════ +⚠️ Important Notes +═══════════════════════════════════════════════════════════════════════════════ + +1. Database Permissions + ⚠ Ensure the user specified in `DATABASE_URL` has permissions to create tables. + ⚠ The `chat_summary` table will be created automatically on first run. + +2. Retention Policy + ⚠ The `keep_first` setting is crucial for preserving initial messages that contain system prompts. Configure it as needed. + +3. Performance + ⚠ Summary generation is asynchronous and will not block the user response. + ⚠ There will be a brief background processing time when the threshold is first met. + +4. Cost Optimization + ⚠ The summary model is called once each time the threshold is met. + ⚠ Set `compression_threshold` reasonably to avoid frequent calls. + ⚠ It's recommended to use a fast and economical model to generate summaries. + +5. Multimodal Support + ✓ This filter supports multimodal messages containing images. + ✓ The summary is generated only from the text content. + ✓ Non-text parts (like images) are preserved in their original messages during compression. + +═══════════════════════════════════════════════════════════════════════════════ +🐛 Troubleshooting +═══════════════════════════════════════════════════════════════════════════════ + +Problem: Database connection failed +Solution: + 1. Verify that the `DATABASE_URL` environment variable is set correctly. + 2. Confirm that `DATABASE_URL` starts with either `sqlite` or `postgres`. + 3. Ensure the database service is running and network connectivity is normal. + 4. Validate the username, password, host, and port in the connection URL. + 5. Check the Open WebUI container logs for detailed error messages. + +Problem: Summary not generated +Solution: + 1. Check if the `compression_threshold` has been met. + 2. Verify that the `summary_model` is configured correctly. + 3. Check the debug logs for any error messages. + +Problem: Initial system prompt is lost +Solution: + - Ensure `keep_first` is set to a value greater than 0 to preserve the initial messages containing this information. + +Problem: Compression effect is not significant +Solution: + 1. Increase the `compression_threshold` appropriately. + 2. Decrease the number of `keep_last` or `keep_first`. + 3. Check if the conversation is actually long enough. + + +""" + +from pydantic import BaseModel, Field, model_validator +from typing import Optional +import asyncio +import json +import hashlib +import os + +# Open WebUI built-in imports +from open_webui.utils.chat import generate_chat_completion +from open_webui.models.users import Users +from fastapi.requests import Request +from open_webui.main import app as webui_app + +# Database imports +from sqlalchemy import create_engine, Column, String, Text, DateTime, Integer +from sqlalchemy.ext.declarative import declarative_base +from sqlalchemy.orm import sessionmaker +from datetime import datetime + +Base = declarative_base() + + +class ChatSummary(Base): + """Chat Summary Storage Table""" + + __tablename__ = "chat_summary" + + id = Column(Integer, primary_key=True, autoincrement=True) + chat_id = Column(String(255), unique=True, nullable=False, index=True) + summary = Column(Text, nullable=False) + compressed_message_count = Column(Integer, default=0) + created_at = Column(DateTime, default=datetime.utcnow) + updated_at = Column(DateTime, default=datetime.utcnow, onupdate=datetime.utcnow) + + +class Filter: + def __init__(self): + self.valves = self.Valves() + self._db_engine = None + self._SessionLocal = None + self._init_database() + + def _init_database(self): + """Initializes the database connection and table.""" + try: + database_url = os.getenv("DATABASE_URL") + + if not database_url: + print("[Database] ❌ Error: DATABASE_URL environment variable is not set. Please set this variable.") + self._db_engine = None + self._SessionLocal = None + return + + db_type = None + engine_args = {} + + if database_url.startswith("sqlite"): + db_type = "SQLite" + engine_args = { + "connect_args": {"check_same_thread": False}, + "echo": False, + } + elif database_url.startswith("postgres"): + db_type = "PostgreSQL" + if database_url.startswith("postgres://"): + database_url = database_url.replace( + "postgres://", "postgresql://", 1 + ) + print("[Database] ℹ️ Automatically converted postgres:// to postgresql://") + engine_args = { + "pool_pre_ping": True, + "pool_recycle": 3600, + "echo": False, + } + else: + print( + f"[Database] ❌ Error: Unsupported database type. DATABASE_URL must start with 'sqlite' or 'postgres'. Current value: {database_url}" + ) + self._db_engine = None + self._SessionLocal = None + return + + # Create database engine + self._db_engine = create_engine(database_url, **engine_args) + + # Create session factory + self._SessionLocal = sessionmaker( + autocommit=False, autoflush=False, bind=self._db_engine + ) + + # Create table if it doesn't exist + Base.metadata.create_all(bind=self._db_engine) + + print(f"[Database] ✅ Successfully connected to {db_type} and initialized the chat_summary table.") + + except Exception as e: + print(f"[Database] ❌ Initialization failed: {str(e)}") + self._db_engine = None + self._SessionLocal = None + + class Valves(BaseModel): + priority: int = Field( + default=10, description="Priority level for the filter operations." + ) + compression_threshold: int = Field( + default=15, ge=0, description="The number of messages at which to trigger compression." + ) + keep_first: int = Field( + default=1, ge=0, description="Always keep the first N messages. Set to 0 to disable." + ) + keep_last: int = Field(default=6, ge=0, description="Always keep the last N messages.") + summary_model: str = Field( + default=None, + description="The model to use for generating the summary. If empty, uses the current conversation's model.", + ) + max_summary_tokens: int = Field( + default=4000, ge=1, description="The maximum number of tokens for the summary." + ) + summary_temperature: float = Field( + default=0.3, ge=0.0, le=2.0, description="The temperature for summary generation." + ) + debug_mode: bool = Field(default=True, description="Enable detailed logging for debugging.") + + @model_validator(mode="after") + def check_thresholds(self) -> "Valves": + kept_count = self.keep_first + self.keep_last + if self.compression_threshold <= kept_count: + raise ValueError( + f"compression_threshold ({self.compression_threshold}) must be greater than " + f"the sum of keep_first ({self.keep_first}) and keep_last ({self.keep_last}) ({kept_count})." + ) + return self + + def _save_summary(self, chat_id: str, summary: str, body: dict): + """Saves the summary to the database.""" + if not self._SessionLocal: + if self.valves.debug_mode: + print("[Storage] Database not initialized, skipping summary save.") + return + + try: + session = self._SessionLocal() + try: + # Find existing record + existing = ( + session.query(ChatSummary).filter_by(chat_id=chat_id).first() + ) + + if existing: + # Update existing record + existing.summary = summary + existing.compressed_message_count = len(body.get("messages", [])) + existing.updated_at = datetime.utcnow() + else: + # Create new record + new_summary = ChatSummary( + chat_id=chat_id, + summary=summary, + compressed_message_count=len(body.get("messages", [])), + ) + session.add(new_summary) + + session.commit() + + if self.valves.debug_mode: + action = "Updated" if existing else "Created" + print(f"[Storage] Summary has been {action.lower()} in the database (Chat ID: {chat_id})") + + finally: + session.close() + + except Exception as e: + print(f"[Storage] ❌ Database save failed: {str(e)}") + + def _load_summary(self, chat_id: str, body: dict) -> Optional[str]: + """Loads the summary from the database.""" + if not self._SessionLocal: + if self.valves.debug_mode: + print("[Storage] Database not initialized, cannot load summary.") + return None + + try: + session = self._SessionLocal() + try: + record = ( + session.query(ChatSummary).filter_by(chat_id=chat_id).first() + ) + + if record: + if self.valves.debug_mode: + print(f"[Storage] Loaded summary from database (Chat ID: {chat_id})") + print( + f"[Storage] Last updated: {record.updated_at}, Original message count: {record.compressed_message_count}" + ) + return record.summary + + finally: + session.close() + + except Exception as e: + print(f"[Storage] ❌ Database read failed: {str(e)}") + + return None + + def _inject_summary_to_first_message(self, message: dict, summary: str) -> dict: + """Injects the summary into the first message by prepending it.""" + content = message.get("content", "") + summary_block = f"【Historical Conversation Summary】\n{summary}\n\n---\nBelow is the recent conversation:\n\n" + + # Handle different content types + if isinstance(content, list): # Multimodal content + # Find the first text part and insert the summary before it + new_content = [] + summary_inserted = False + + for part in content: + if ( + isinstance(part, dict) + and part.get("type") == "text" + and not summary_inserted + ): + # Prepend summary to the first text part + new_content.append( + {"type": "text", "text": summary_block + part.get("text", "")} + ) + summary_inserted = True + else: + new_content.append(part) + + # If no text part, insert at the beginning + if not summary_inserted: + new_content.insert(0, {"type": "text", "text": summary_block}) + + message["content"] = new_content + + elif isinstance(content, str): # Plain text + message["content"] = summary_block + content + + return message + + async def inlet( + self, body: dict, __user__: Optional[dict] = None, __metadata__: dict = None + ) -> dict: + """ + Executed before sending to the LLM. + Compression Strategy: + 1. Keep the first N messages. + 2. Inject the summary into the first message (if keep_first > 0). + 3. Keep the last N messages. + """ + messages = body.get("messages", []) + chat_id = __metadata__["chat_id"] + + if self.valves.debug_mode: + print(f"\n{'='*60}") + print(f"[Inlet] Chat ID: {chat_id}") + print(f"[Inlet] Received {len(messages)} messages") + + # [Optimization] Load summary in a background thread to avoid blocking the event loop. + if self.valves.debug_mode: + print("[Optimization] Loading summary in a background thread to avoid blocking the event loop.") + saved_summary = await asyncio.to_thread(self._load_summary, chat_id, body) + + total_kept_count = self.valves.keep_first + self.valves.keep_last + + if saved_summary and len(messages) > total_kept_count: + if self.valves.debug_mode: + print(f"[Inlet] Found saved summary, applying compression.") + + first_messages_to_keep = [] + + if self.valves.keep_first > 0: + # Copy the initial messages to keep + first_messages_to_keep = [ + m.copy() for m in messages[: self.valves.keep_first] + ] + # Inject the summary into the very first message + first_messages_to_keep[0] = self._inject_summary_to_first_message( + first_messages_to_keep[0], saved_summary + ) + else: + # If not keeping initial messages, create a new system message for the summary + summary_block = ( + f"【Historical Conversation Summary】\n{saved_summary}\n\n---\nBelow is the recent conversation:\n\n" + ) + first_messages_to_keep.append( + {"role": "system", "content": summary_block} + ) + + # Keep the last messages + last_messages_to_keep = ( + messages[-self.valves.keep_last :] if self.valves.keep_last > 0 else [] + ) + + # Combine: [Kept initial messages (with summary)] + [Kept recent messages] + body["messages"] = first_messages_to_keep + last_messages_to_keep + + if self.valves.debug_mode: + print(f"[Inlet] ✂️ Compression complete:") + print(f" - Original messages: {len(messages)}") + print(f" - Compressed to: {len(body['messages'])}") + print( + f" - Structure: [Keep first {self.valves.keep_first} (with summary)] + [Keep last {self.valves.keep_last}]" + ) + print(f" - Saved: {len(messages) - len(body['messages'])} messages") + else: + if self.valves.debug_mode: + if not saved_summary: + print(f"[Inlet] No summary found, using full conversation history.") + else: + print(f"[Inlet] Message count does not exceed retention threshold, no compression applied.") + + if self.valves.debug_mode: + print(f"{'='*60}\n") + + return body + + async def outlet( + self, body: dict, __user__: Optional[dict] = None, __metadata__: dict = None + ) -> dict: + """ + Executed after the LLM response is complete. + Triggers summary generation asynchronously. + """ + messages = body.get("messages", []) + chat_id = __metadata__["chat_id"] + + if self.valves.debug_mode: + print(f"\n{'='*60}") + print(f"[Outlet] Chat ID: {chat_id}") + print(f"[Outlet] Response complete, current message count: {len(messages)}") + + # Check if compression is needed + if len(messages) >= self.valves.compression_threshold: + if self.valves.debug_mode: + print( + f"[Outlet] ⚡ Compression threshold reached ({len(messages)} >= {self.valves.compression_threshold})" + ) + print(f"[Outlet] Preparing to generate summary in the background...") + + # Generate summary asynchronously in the background + asyncio.create_task( + self._generate_summary_async(messages, chat_id, body, __user__) + ) + else: + if self.valves.debug_mode: + print( + f"[Outlet] Compression threshold not reached ({len(messages)} < {self.valves.compression_threshold})" + ) + + if self.valves.debug_mode: + print(f"{'='*60}\n") + + return body + + async def _generate_summary_async( + self, messages: list, chat_id: str, body: dict, user_data: Optional[dict] + ): + """ + Generates a summary asynchronously in the background. + """ + try: + if self.valves.debug_mode: + print(f"\n[🤖 Async Summary Task] Starting...") + + # Messages to summarize: exclude kept initial and final messages + if self.valves.keep_last > 0: + messages_to_summarize = messages[ + self.valves.keep_first : -self.valves.keep_last + ] + else: + messages_to_summarize = messages[self.valves.keep_first :] + + if len(messages_to_summarize) == 0: + if self.valves.debug_mode: + print(f"[🤖 Async Summary Task] No messages to summarize, skipping.") + return + + if self.valves.debug_mode: + print(f"[🤖 Async Summary Task] Preparing to summarize {len(messages_to_summarize)} messages.") + print( + f"[🤖 Async Summary Task] Protecting: First {self.valves.keep_first} + Last {self.valves.keep_last} messages." + ) + + # Build conversation history text + conversation_text = self._format_messages_for_summary(messages_to_summarize) + + # Call LLM to generate summary + summary = await self._call_summary_llm(conversation_text, body, user_data) + + # [Optimization] Save summary in a background thread to avoid blocking the event loop. + if self.valves.debug_mode: + print("[Optimization] Saving summary in a background thread to avoid blocking the event loop.") + await asyncio.to_thread(self._save_summary, chat_id, summary, body) + + if self.valves.debug_mode: + print(f"[🤖 Async Summary Task] ✅ Complete! Summary length: {len(summary)} characters.") + print(f"[🤖 Async Summary Task] Summary preview: {summary[:150]}...") + + except Exception as e: + print(f"[🤖 Async Summary Task] ❌ Error: {str(e)}") + import traceback + + traceback.print_exc() + # Save a simple placeholder even on failure + fallback_summary = ( + f"[Historical Conversation Summary] Contains content from approximately {len(messages_to_summarize)} messages." + ) + + # [Optimization] Save summary in a background thread to avoid blocking the event loop. + if self.valves.debug_mode: + print("[Optimization] Saving summary in a background thread to avoid blocking the event loop.") + await asyncio.to_thread(self._save_summary, chat_id, fallback_summary, body) + + def _format_messages_for_summary(self, messages: list) -> str: + """Formats messages for summarization.""" + formatted = [] + for i, msg in enumerate(messages, 1): + role = msg.get("role", "unknown") + content = msg.get("content", "") + + # Handle multimodal content + if isinstance(content, list): + text_parts = [] + for part in content: + if isinstance(part, dict) and part.get("type") == "text": + text_parts.append(part.get("text", "")) + content = " ".join(text_parts) + + # Handle role name + role_name = {"user": "User", "assistant": "Assistant"}.get(role, role) + + # Limit length of each message to avoid excessive length + if len(content) > 500: + content = content[:500] + "..." + + formatted.append(f"[{i}] {role_name}: {content}") + + return "\n\n".join(formatted) + + async def _call_summary_llm( + self, conversation_text: str, body: dict, user_data: dict + ) -> str: + """ + Calls the LLM to generate a summary using Open WebUI's built-in method. + """ + if self.valves.debug_mode: + print(f"[🤖 LLM Call] Using Open WebUI's built-in method.") + + # Build summary prompt + summary_prompt = f""" +You are a professional conversation context compression assistant. Your task is to perform a high-fidelity compression of the [Conversation Content] below, producing a concise summary that can be used directly as context for subsequent conversation. Strictly adhere to the following requirements: + +MUST RETAIN: Topics/goals, user intent, key facts and data, important parameters and constraints, deadlines, decisions/conclusions, action items and their status, and technical details like code/commands (code must be preserved as is). +REMOVE: Greetings, politeness, repetitive statements, off-topic chatter, and procedural details (unless essential). For information that has been overturned or is outdated, please mark it as "Obsolete: " when retaining. +CONFLICT RESOLUTION: If there are contradictions or multiple revisions, retain the latest consistent conclusion and list unresolved or conflicting points under "Points to Clarify". +STRUCTURE AND TONE: Output in structured bullet points. Be logical, objective, and concise. Summarize from a third-person perspective. Use code blocks to preserve technical/code snippets verbatim. +OUTPUT LENGTH: Strictly limit the summary content to within {int(self.valves.max_summary_tokens * 3)} characters. Prioritize key information; if space is insufficient, trim details rather than core conclusions. +FORMATTING: Output only the summary text. Do not add any extra explanations, execution logs, or generation processes. You must use the following headings (if a section has no content, write "None"): +Core Theme: +Key Information: +... (List 3-6 key points) +Decisions/Conclusions: +Action Items (with owner/deadline if any): +Relevant Roles/Preferences: +Risks/Dependencies/Assumptions: +Points to Clarify: +Compression Ratio: Original ~X words → Summary ~Y words (estimate) +Conversation Content: +{conversation_text} + +Please directly output the compressed summary that meets the above requirements (summary text only). +""" + # Determine the model to use + model = self.valves.summary_model or body.get("model", "") + + if self.valves.debug_mode: + print(f"[🤖 LLM Call] Model: {model}") + + # Build payload + payload = { + "model": model, + "messages": [{"role": "user", "content": summary_prompt}], + "stream": False, + "max_tokens": self.valves.max_summary_tokens, + "temperature": self.valves.summary_temperature, + } + + try: + # Get user object + user_id = user_data.get("id") if user_data else None + if not user_id: + raise ValueError("Could not get user ID") + + # [Optimization] Get user object in a background thread to avoid blocking the event loop. + if self.valves.debug_mode: + print("[Optimization] Getting user object in a background thread to avoid blocking the event loop.") + user = await asyncio.to_thread(Users.get_user_by_id, user_id) + + if not user: + raise ValueError(f"Could not find user: {user_id}") + + if self.valves.debug_mode: + print(f"[🤖 LLM Call] User: {user.email}") + print(f"[🤖 LLM Call] Sending request...") + + # Create Request object + request = Request(scope={"type": "http", "app": webui_app}) + + # Call generate_chat_completion + response = await generate_chat_completion(request, payload, user) + + if not response or "choices" not in response or not response["choices"]: + raise ValueError("LLM response is not in the correct format or is empty") + + summary = response["choices"][0]["message"]["content"].strip() + + if self.valves.debug_mode: + print(f"[🤖 LLM Call] ✅ Successfully received summary.") + + return summary + + except Exception as e: + error_message = f"An error occurred while calling the LLM ({model}) to generate a summary: {str(e)}" + if not self.valves.summary_model: + error_message += ( + "\n[Hint] You did not specify a summary_model, so the filter attempted to use the current conversation's model. " + "If this is a pipeline (Pipe) model or an incompatible model, please specify a compatible summary model (e.g., 'gemini-2.5-flash') in the configuration." + ) + + if self.valves.debug_mode: + print(f"[🤖 LLM Call] ❌ {error_message}") + + raise Exception(error_message) diff --git a/plugins/filters/async-context-compression/async_context_compression_cn.md b/plugins/filters/async-context-compression/async_context_compression_cn.md new file mode 100644 index 0000000..bfcbf51 --- /dev/null +++ b/plugins/filters/async-context-compression/async_context_compression_cn.md @@ -0,0 +1,77 @@ +# 异步上下文压缩过滤器 + +**作者:** [Fu-Jie](https://github.com/Fu-Jie) | **版本:** 1.0.0 | **许可证:** MIT + +> **重要提示**:为了确保所有过滤器的可维护性和易用性,每个过滤器都应附带清晰、完整的文档,以确保其功能、配置和使用方法得到充分说明。 + +本过滤器通过智能摘要和消息压缩技术,在保持对话连贯性的同时,显著降低长对话的Token消耗。 + +--- + +## 核心特性 + +- ✅ **自动压缩**: 基于消息数量阈值自动触发上下文压缩。 +- ✅ **异步摘要**: 在后台生成摘要,不阻塞当前对话的响应。 +- ✅ **持久化存储**: 支持 PostgreSQL 和 SQLite 数据库,确保摘要在服务重启后不丢失。 +- ✅ **灵活保留策略**: 可自由配置保留对话头部和尾部的消息数量,确保关键信息和上下文的连贯性。 +- ✅ **智能注入**: 将生成的历史摘要智能地注入到新的上下文中。 + +--- + +## 安装与配置 + +### 1. 环境变量 + +本插件的运行依赖于数据库,您**必须**在 Open WebUI 的环境变量中配置 `DATABASE_URL`。 + +- **PostgreSQL 示例**: + ``` + DATABASE_URL=postgresql://user:password@host:5432/openwebui + ``` +- **SQLite 示例**: + ``` + DATABASE_URL=sqlite:///path/to/your/data/webui.db + ``` + +### 2. 过滤器顺序 + +建议将此过滤器的优先级设置得相对较高(数值较小),以确保它在其他可能修改消息内容的过滤器之前运行。一个典型的顺序可能是: + +1. **前置过滤器 (priority < 10)** + - 例如:注入系统级提示的过滤器。 +2. **本压缩过滤器 (priority = 10)** +3. **后置过滤器 (priority > 10)** + - 例如:对最终输出进行格式化的过滤器。 + +--- + +## 配置参数 + +您可以在过滤器的设置中调整以下参数: + +| 参数 | 默认值 | 描述 | +| :--- | :--- | :--- | +| `priority` | `10` | 过滤器执行顺序,数值越小越先执行。 | +| `compression_threshold` | `15` | 当总消息数达到此值时,将在后台触发摘要生成。 | +| `keep_first` | `1` | 始终保留对话开始的 N 条消息。第一条消息通常包含重要的系统提示。 | +| `keep_last` | `6` | 始终保留对话末尾的 N 条消息,以确保上下文连贯。 | +| `summary_model` | `None` | 用于生成摘要的模型。**强烈建议**配置一个快速、经济的兼容模型(如 `gemini-2.5-flash`)。如果留空,将尝试使用当前对话的模型,但这可能因模型不兼容(如 Pipe 模型)而失败。 | +| `max_summary_tokens` | `4000` | 生成摘要时允许的最大 Token 数。 | +| `summary_temperature` | `0.3` | 控制摘要生成的随机性,较低的值结果更稳定。 | +| `debug_mode` | `true` | 是否在日志中打印详细的调试信息。生产环境建议设为 `false`。 | + +--- + +## 故障排除 + +- **问题:数据库连接失败** + - **解决**:请确认 `DATABASE_URL` 环境变量已正确设置,并且数据库服务运行正常。 + +- **问题:摘要未生成** + - **解决**:检查 `compression_threshold` 是否已达到,并确认 `summary_model` 配置正确。查看日志以获取详细错误。 + +- **问题:初始的系统提示丢失** + - **解决**:确保 `keep_first` 的值大于 0,以保留包含重要信息的初始消息。 + +- **问题:压缩效果不明显** + - **解决**:尝试适当提高 `compression_threshold`,或减少 `keep_first` / `keep_last` 的值。 diff --git a/plugins/filters/async-context-compression/工作流程指南.md b/plugins/filters/async-context-compression/工作流程指南.md new file mode 100644 index 0000000..ee6b61a --- /dev/null +++ b/plugins/filters/async-context-compression/工作流程指南.md @@ -0,0 +1,662 @@ +# 异步上下文压缩过滤器 - 工作流程指南 + +## 📋 目录 +1. [概述](#概述) +2. [系统架构](#系统架构) +3. [工作流程详解](#工作流程详解) +4. [Token 计数机制](#token-计数机制) +5. [递归摘要机制](#递归摘要机制) +6. [配置指南](#配置指南) +7. [最佳实践](#最佳实践) + +--- + +## 概述 + +异步上下文压缩过滤器是一个高性能的消息压缩插件,通过以下方式降低长对话的 Token 消耗: + +- **智能摘要**:将历史消息压缩成高保真摘要 +- **递归更新**:新摘要合并旧摘要,保证历史连贯性 +- **异步处理**:后台生成摘要,不阻塞用户响应 +- **灵活配置**:支持全局和模型特定的阈值配置 + +### 核心指标 +- **压缩率**:可达 65% 以上(取决于对话长度) +- **响应时间**:inlet 阶段 <10ms(无计算开销) +- **摘要质量**:高保真递归摘要,保留关键信息 + +--- + +## 系统架构 + +``` +┌─────────────────────────────────────────────────────┐ +│ 用户请求流程 │ +└────────────────┬────────────────────────────────────┘ + │ + ┌────────────▼──────────────┐ + │ inlet(请求前处理) │ + │ ├─ 加载摘要记录 │ + │ ├─ 注入摘要到首条消息 │ + │ └─ 返回压缩消息列表 │ ◄─ 快速返回 (<10ms) + └────────────┬──────────────┘ + │ + ┌────────────▼──────────────┐ + │ LLM 处理消息 │ + │ ├─ 调用语言模型 │ + │ └─ 生成回复 │ + └────────────┬──────────────┘ + │ + ┌────────────▼──────────────┐ + │ outlet(响应后处理) │ + │ ├─ 启动后台异步任务 │ + │ └─ 立即返回(不阻塞) │ ◄─ 返回响应给用户 + └────────────┬──────────────┘ + │ + ┌────────────▼──────────────┐ + │ 后台处理(asyncio 任务) │ + │ ├─ 计算 Token 数 │ + │ ├─ 检查压缩阈值 │ + │ ├─ 生成递归摘要 │ + │ └─ 保存到数据库 │ + └────────────┬──────────────┘ + │ + ┌────────────▼──────────────┐ + │ 数据库持久化存储 │ + │ ├─ 摘要内容 │ + │ ├─ 压缩进度 │ + │ └─ 时间戳 │ + └────────────────────────────┘ +``` + +--- + +## 工作流程详解 + +### 1️⃣ inlet 阶段:消息注入与压缩视图构建 + +**目标**:快速应用已有摘要,构建压缩消息视图 + +**流程**: + +``` +输入:所有消息列表 + │ + ├─► 从数据库加载摘要记录 + │ │ + │ ├─► 找到 ✓ ─────┐ + │ └─► 未找到 ───┐ │ + │ │ │ + ├──────────────────┴─┼─► 存在摘要? + │ │ + │ ┌───▼───┐ + │ │ 是 │ 否 + │ └───┬───┴───┐ + │ │ │ + │ ┌───────────▼─┐ ┌─▼─────────┐ + │ │ 构建压缩视图 │ │ 使用原始 │ + │ │ [H] + [T] │ │ 消息列表 │ + │ └───────┬─────┘ └─┬────────┘ + │ │ │ + │ ┌───────────┴──────────┘ + │ │ + │ └─► 组合消息: + │ • 头部(keep_first) + │ • 摘要注入到首条 + │ • 尾部(keep_last) + │ + └─────► 返回压缩消息列表 + ⏱️ 耗时 <10ms +``` + +**关键参数**: +- `keep_first`:保留前 N 条消息(默认 1) +- `keep_last`:保留后 N 条消息(默认 6) +- 摘要注入位置:首条消息的内容前 + +**示例**: +```python +# 原始:20 条消息 +消息1: [系统提示] +消息2-14: [历史对话] +消息15-20: [最近对话] + +# inlet 后(存在摘要):7 条消息 +消息1: [系统提示 + 【历史摘要】...] ◄─ 摘要已注入 +消息15-20: [最近对话] ◄─ 保留后6条 +``` + +--- + +### 2️⃣ outlet 阶段:后台异步处理 + +**目标**:计算 Token 数、检查阈值、生成摘要(不阻塞响应) + +**流程**: + +``` +LLM 响应完成 + │ + └─► outlet 处理 + │ + └─► 启动后台异步任务(asyncio.create_task) + │ + ├─► 立即返回给用户 ✓ + │ (不等待后台任务完成) + │ + └─► 后台执行 _check_and_generate_summary_async + │ + ├─► 在后台线程中计算 Token 数 + │ (await asyncio.to_thread) + │ + ├─► 获取模型阈值配置 + │ • 优先使用 model_thresholds 中的配置 + │ • 回退到全局 compression_threshold_tokens + │ + ├─► 检查是否触发压缩 + │ if current_tokens >= threshold: + │ + └─► 触发摘要生成流程 +``` + +**时序图**: +``` +时间线: +│ +├─ T0: LLM 响应完成 +│ +├─ T1: outlet 被调用 +│ └─► 启动后台任务 +│ └─► 立即返回 ✓ +│ +├─ T2: 用户收到响应 ✓✓✓ +│ +└─ T3-T10: 后台任务执行 + ├─ 计算 Token + ├─ 检查阈值 + ├─ 调用 LLM 生成摘要 + └─ 保存到数据库 +``` + +**关键特性**: +- ✅ 用户响应不受影响 +- ✅ Token 计算不阻塞请求 +- ✅ 摘要生成异步进行 + +--- + +### 3️⃣ Token 计数与阈值检查 + +**工作流程**: + +``` +后台线程执行 _check_and_generate_summary_async +│ +├─► Step 1: 计算当前 Token 总数 +│ │ +│ ├─ 遍历所有消息 +│ ├─ 处理多模态内容(提取文本部分) +│ ├─ 使用 o200k_base 编码计数 +│ └─ 返回 total_tokens +│ +├─► Step 2: 获取模型特定阈值 +│ │ +│ ├─ 模型 ID: gpt-4 +│ ├─ 查询 model_thresholds +│ │ +│ ├─ 存在配置? +│ │ ├─ 是 ✓ 使用该配置 +│ │ └─ 否 ✓ 使用全局参数 +│ │ +│ ├─ compression_threshold_tokens(默认 64000) +│ └─ max_context_tokens(默认 128000) +│ +└─► Step 3: 检查是否触发压缩 + │ + if current_tokens >= compression_threshold_tokens: + │ └─► 触发摘要生成 + │ + else: + └─► 无需压缩,任务结束 +``` + +**Token 计数细节**: + +```python +def _count_tokens(text): + if tiktoken_available: + # 使用 o200k_base(统一编码) + encoding = tiktoken.get_encoding("o200k_base") + return len(encoding.encode(text)) + else: + # 回退:字符估算 + return len(text) // 4 +``` + +**模型阈值优先级**: +``` +优先级 1: model_thresholds["gpt-4"] +优先级 2: model_thresholds["gemini-2.5-flash"] +优先级 3: 全局 compression_threshold_tokens +``` + +--- + +### 4️⃣ 递归摘要生成 + +**核心机制**:将旧摘要与新消息合并,生成更新的摘要 + +**工作流程**: + +``` +触发 _generate_summary_async +│ +├─► Step 1: 加载旧摘要 +│ │ +│ ├─ 从数据库查询 +│ ├─ 获取 previous_summary +│ └─ 获取 compressed_message_count(上次压缩进度) +│ +├─► Step 2: 确定待压缩消息范围 +│ │ +│ ├─ start_index = max(compressed_count, keep_first) +│ ├─ end_index = len(messages) - keep_last +│ │ +│ ├─ 提取 messages[start_index:end_index] +│ └─ 这是【新增对话】部分 +│ +├─► Step 3: 构建 LLM 提示词 +│ │ +│ ├─ 【已有摘要】= previous_summary +│ ├─ 【新增对话】= 格式化的新消息 +│ │ +│ └─ 提示词模板: +│ "将【已有摘要】和【新增对话】合并..." +│ +├─► Step 4: 调用 LLM 生成摘要 +│ │ +│ ├─ 模型选择:summary_model(若配置)或当前模型 +│ ├─ 参数: +│ │ • max_tokens = max_summary_tokens(默认 4000) +│ │ • temperature = summary_temperature(默认 0.3) +│ │ • stream = False +│ │ +│ └─ 返回 new_summary +│ +├─► Step 5: 保存摘要到数据库 +│ │ +│ ├─ 更新 chat_summary 表 +│ ├─ summary = new_summary +│ ├─ compressed_message_count = end_index +│ └─ updated_at = now() +│ +└─► Step 6: 记录日志 + └─ 摘要长度、压缩进度、耗时等 +``` + +**递归摘要示例**: + +``` +第一轮压缩: + 旧摘要: 无 + 新消息: 消息2-14(13条) + 生成: Summary_V1 + + 保存: compressed_message_count = 14 + +第二轮压缩: + 旧摘要: Summary_V1 + 新消息: 消息15-28(从14开始) + 生成: Summary_V2 = LLM(Summary_V1 + 新消息14-28) + + 保存: compressed_message_count = 28 + +结果: + ✓ 早期信息得以保留(通过 Summary_V1) + ✓ 新信息与旧摘要融合 + ✓ 历史连贯性维护 +``` + +--- + +## Token 计数机制 + +### 编码方案 + +``` +┌─────────────────────────────────┐ +│ _count_tokens(text) │ +├─────────────────────────────────┤ +│ 1. tiktoken 可用? │ +│ ├─ 是 ✓ │ +│ │ └─ use o200k_base │ +│ │ (最新模型适配) │ +│ │ │ +│ └─ 否 ✓ │ +│ └─ 字符估算 │ +│ (1 token ≈ 4 chars) │ +└─────────────────────────────────┘ +``` + +### 多模态内容处理 + +```python +# 消息结构 +message = { + "role": "user", + "content": [ + {"type": "text", "text": "描述图片..."}, + {"type": "image_url", "image_url": {...}}, + {"type": "text", "text": "更多描述..."} + ] +} + +# Token 计数 +提取所有 text 部分 → 合并 → 计数 +图片部分被忽略(不消耗文本 token) +``` + +### 计数流程 + +``` +_calculate_messages_tokens(messages, model) +│ +├─► 遍历每条消息 +│ │ +│ ├─ content 是列表? +│ │ ├─ 是 ✓ 提取所有文本部分 +│ │ └─ 否 ✓ 直接使用 +│ │ +│ └─ _count_tokens(content) +│ +└─► 累加所有 Token 数 +``` + +--- + +## 递归摘要机制 + +### 保证历史连贯性的核心原理 + +``` +传统压缩方式(有问题): +时间线: + 消息1-50 ─► 生成摘要1 ─► 保留 [摘要1 + 消息45-50] + │ + 消息51-100 ─► 生成摘要2 ─► 保留 [摘要2 + 消息95-100] + └─► ❌ 摘要1 丢失!早期信息无法追溯 + +递归摘要方式(本实现): +时间线: + 消息1-50 ──► 生成摘要1 ──► 保存 + │ + 摘要1 + 消息51-100 ──► 生成摘要2 ──► 保存 + └─► ✓ 摘要1 信息融入摘要2 + ✓ 历史信息连贯保存 +``` + +### 工作机制 + +``` +inlet 阶段: + 摘要库查询 + │ + ├─ previous_summary(已有摘要) + └─ compressed_message_count(压缩进度) + +outlet 阶段: + 如果 current_tokens >= threshold: + │ + ├─ 新消息范围: + │ [compressed_message_count : len(messages) - keep_last] + │ + └─ LLM 处理: + Input: previous_summary + 新消息 + Output: 更新的摘要(含早期信息 + 新信息) + + 保存进度: + └─ compressed_message_count = end_index + (下次压缩从这里开始) +``` + +--- + +## 配置指南 + +### 全局配置 + +```python +Valves( + # Token 阈值 + compression_threshold_tokens=64000, # 触发压缩 + max_context_tokens=128000, # 硬性上限 + + # 消息保留策略 + keep_first=1, # 保留首条(系统提示) + keep_last=6, # 保留末6条(最近对话) + + # 摘要模型 + summary_model="gemini-2.5-flash", # 快速经济 + + # 摘要参数 + max_summary_tokens=4000, + summary_temperature=0.3, +) +``` + +### 模型特定配置 + +```python +model_thresholds = { + "gpt-4": { + "compression_threshold_tokens": 8000, + "max_context_tokens": 32000 + }, + "gemini-2.5-flash": { + "compression_threshold_tokens": 10000, + "max_context_tokens": 40000 + }, + "llama-70b": { + "compression_threshold_tokens": 20000, + "max_context_tokens": 80000 + } +} +``` + +### 配置选择建议 + +``` +场景1:长对话成本优化 + compression_threshold_tokens: 32000 ◄─ 更早触发 + keep_last: 4 ◄─ 保留少一些 + +场景2:质量优先 + compression_threshold_tokens: 100000 ◄─ 晚触发 + keep_last: 10 ◄─ 保留多一些 + max_summary_tokens: 8000 ◄─ 更详细摘要 + +场景3:平衡方案(推荐) + compression_threshold_tokens: 64000 ◄─ 默认 + keep_last: 6 ◄─ 默认 + summary_model: "gemini-2.5-flash" ◄─ 快速经济 +``` + +--- + +## 最佳实践 + +### 1️⃣ 摘要模型选择 + +``` +推荐模型: + ✅ gemini-2.5-flash 快速、经济、质量好 + ✅ deepseek-v3 成本低、速度快 + ✅ gpt-4o-mini 通用、质量稳定 + +避免: + ❌ 流水线(Pipe)模型 可能不支持标准 API + ❌ 本地模型 容易超时、影响体验 +``` + +### 2️⃣ 阈值调优 + +``` +Token 计数验证: + 1. 启用 debug_mode + 2. 观察实际 Token 数 + 3. 根据需要调整阈值 + + # 日志示例 + [🔍 后台计算] Token 数: 45320 + [🔍 后台计算] 未触发压缩阈值 (Token: 45320 < 64000) +``` + +### 3️⃣ 消息保留策略 + +``` +keep_first 配置: + 通常值: 1(保留系统提示) + 某些场景: 0(系统提示在摘要中) + +keep_last 配置: + 通常值: 6(保留最近对话) + 长对话: 8-10(更多最近对话) + 短对话: 3-4(节省 Token) +``` + +### 4️⃣ 监控与维护 + +``` +关键指标: + • 摘要生成耗时 + • Token 节省率 + • 摘要质量(通过对话体验) + +数据库维护: + # 定期清理过期摘要 + DELETE FROM chat_summary + WHERE updated_at < NOW() - INTERVAL '30 days' + + # 统计压缩效果 + SELECT + COUNT(*) as total_summaries, + AVG(compressed_message_count) as avg_compressed + FROM chat_summary +``` + +### 5️⃣ 故障排除 + +``` +问题:摘要未生成 + 检查项: + 1. Token 数是否达到阈值? + → debug_mode 查看日志 + 2. summary_model 是否配置正确? + → 确保模型存在且可用 + 3. 数据库连接是否正常? + → 检查 DATABASE_URL + +问题:inlet 响应变慢 + 检查项: + 1. keep_first/keep_last 是否过大? + 2. 摘要数据是否过大? + 3. 消息数是否过多? + +问题:摘要质量下降 + 调整方案: + 1. 增加 max_summary_tokens + 2. 降低 summary_temperature(更确定性) + 3. 更换摘要模型 +``` + +--- + +## 性能参考 + +### 时间开销 + +``` +inlet 阶段: + ├─ 数据库查询: 1-2ms + ├─ 摘要注入: 2-3ms + └─ 总计: <10ms ✓ (不影响用户体验) + +outlet 阶段: + ├─ 启动后台任务: <1ms + └─ 立即返回: ✓ (无等待) + +后台处理(不阻塞用户): + ├─ Token 计数: 10-50ms + ├─ LLM 调用: 1-5 秒 + ├─ 数据库保存: 1-2ms + └─ 总计: 1-6 秒 (后台进行) +``` + +### Token 节省示例 + +``` +场景:20 条消息对话 + +未压缩: + 总消息: 20 条 + 预估 Token: 8000 个 + +压缩后(keep_first=1, keep_last=6): + 头部消息: 1 条 (1600 Token) + 摘要: ~800 Token (嵌入在头部) + 尾部消息: 6 条 (3200 Token) + 总计: 7 条有效输入 (~5600 Token) + +节省:8000 - 5600 = 2400 Token (30% 节省) + +随对话变长,节省比例可达 65% 以上 +``` + +--- + +## 数据流图 + +``` +用户消息 + ↓ +[inlet] 摘要注入器 + ├─ 数据库 ← 查询摘要 + ├─ 摘要注入到首条消息 + └─ 返回压缩消息列表 + ↓ +LLM 处理 + ├─ 调用语言模型 + ├─ 生成响应 + └─ 返回给用户 ✓✓✓ + ↓ +[outlet] 后台处理(asyncio 任务) + ├─ 计算 Token 数 + ├─ 检查阈值 + ├─ [if 需要] 调用 LLM 生成摘要 + │ ├─ 加载旧摘要 + │ ├─ 提取新消息 + │ ├─ 构建提示词 + │ └─ 调用 LLM + ├─ 保存新摘要到数据库 + └─ 记录日志 + ↓ +数据库持久化 + └─ chat_summary 表更新 +``` + +--- + +## 总结 + +| 阶段 | 职责 | 耗时 | 特点 | +|------|------|------|------| +| **inlet** | 摘要注入 | <10ms | 快速、无计算 | +| **LLM** | 生成回复 | 变量 | 正常流程 | +| **outlet** | 启动后台 | <1ms | 不阻塞响应 | +| **后台处理** | Token 计算、摘要生成、数据保存 | 1-6s | 异步执行 | + +**核心优势**: +- ✅ 用户响应不受影响 +- ✅ Token 消耗显著降低 +- ✅ 历史信息连贯保存 +- ✅ 灵活的配置选项 diff --git a/plugins/filters/async-context-compression/异步上下文压缩.py b/plugins/filters/async-context-compression/异步上下文压缩.py new file mode 100644 index 0000000..f388d01 --- /dev/null +++ b/plugins/filters/async-context-compression/异步上下文压缩.py @@ -0,0 +1,1100 @@ +""" +title: 异步上下文压缩 +id: async_context_compression +author: Fu-Jie +author_url: https://github.com/Fu-Jie +funding_url: https://github.com/Fu-Jie/awesome-openwebui +description: 通过智能摘要和消息压缩,降低长对话的 token 消耗,同时保持对话连贯性。 +version: 1.0.0 +license: MIT + +═══════════════════════════════════════════════════════════════════════════════ +📌 功能概述 +═══════════════════════════════════════════════════════════════════════════════ + +本过滤器通过智能摘要和消息压缩技术,显著降低长对话的 token 消耗,同时保持对话连贯性。 + +核心特性: + ✅ 自动触发压缩(基于 Token 数量阈值) + ✅ 异步生成摘要(不阻塞用户响应) + ✅ 数据库持久化存储(支持 PostgreSQL 和 SQLite) + ✅ 灵活的保留策略(可配置保留对话的头部和尾部) + ✅ 智能注入摘要,保持上下文连贯性 + +═══════════════════════════════════════════════════════════════════════════════ +🔄 工作流程 +═══════════════════════════════════════════════════════════════════════════════ + +阶段 1: inlet(请求前处理) +━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━ + 1. 接收当前对话的所有消息。 + 2. 检查是否存在已保存的摘要。 + 3. 如果有摘要且消息数超过保留阈值: + ├─ 提取要保留的头部消息(例如,第一条消息)。 + ├─ 将摘要注入到头部消息中。 + ├─ 提取要保留的尾部消息。 + └─ 组合成新的消息列表:[头部消息+摘要] + [尾部消息]。 + 4. 发送压缩后的消息到 LLM。 + +阶段 2: outlet(响应后处理) +━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━ + 1. LLM 响应完成后触发。 + 2. 检查 Token 数是否达到压缩阈值。 + 3. 如果达到 Token 阈值,则在后台异步生成摘要: + ├─ 提取需要摘要的消息(排除保留的头部和尾部)。 + ├─ 调用 LLM 生成简洁摘要。 + └─ 将摘要保存到数据库。 + +═══════════════════════════════════════════════════════════════════════════════ +💾 存储方案 +═══════════════════════════════════════════════════════════════════════════════ + +本过滤器使用数据库进行持久化存储,通过 `DATABASE_URL` 环境变量进行配置,支持 PostgreSQL 和 SQLite。 + +配置方式: + - 必须设置 `DATABASE_URL` 环境变量。 + - PostgreSQL 示例: `postgresql://user:password@host:5432/openwebui` + - SQLite 示例: `sqlite:///path/to/your/database.db` + +过滤器会根据 `DATABASE_URL` 的前缀(`postgres` 或 `sqlite`)自动选择合适的数据库驱动。 + + 表结构: + - id: 主键(自增) + - chat_id: 对话唯一标识(唯一索引) + - summary: 摘要内容(TEXT) + - compressed_message_count: 原始消息数 + - created_at: 创建时间 + - updated_at: 更新时间 + +═══════════════════════════════════════════════════════════════════════════════ +📊 压缩效果示例 +═══════════════════════════════════════════════════════════════════════════════ + +场景:20 条消息的对话 (默认设置: 保留前 1 条, 后 6 条) +━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━ + 压缩前: + 消息 1: [初始设定 + 初始问题] + 消息 2-14: [历史对话内容] + 消息 15-20: [最近对话] + 总计: 20 条完整消息 + + 压缩后: + 消息 1: [初始设定 + 历史摘要 + 初始问题] + 消息 15-20: [最近 6 条完整消息] + 总计: 7 条消息 + + 效果: + ✓ 节省 13 条消息(约 65%) + ✓ 保留完整上下文信息 + ✓ 保护重要的初始设定 + +═══════════════════════════════════════════════════════════════════════════════ +⚙️ 配置参数说明 +═══════════════════════════════════════════════════════════════════════════════ + +priority (优先级) + 默认: 10 + 说明: 过滤器执行顺序,数值越小越先执行。 + +compression_threshold_tokens (压缩阈值 Token) + 默认: 64000 + 说明: 当上下文总 Token 数超过此值时,触发压缩。 + 建议: 根据模型上下文窗口和成本调整。 + +max_context_tokens (最大上下文 Token) + 默认: 128000 + 说明: 上下文的硬性上限。超过此值将强制移除最早的消息。 + +model_thresholds (模型特定阈值) + 默认: {} + 说明: 针对特定模型的阈值覆盖配置。 + 示例: {"gpt-4": {"compression_threshold_tokens": 8000, "max_context_tokens": 32000}} + +keep_first (保留初始消息数) + 默认: 1 + 说明: 始终保留对话开始的 N 条消息。设置为 0 则不保留。第一条消息通常包含重要的提示或环境变量。 + +keep_last (保留最近消息数) + 默认: 6 + 说明: 始终保留对话末尾的 N 条完整消息,以确保上下文的连贯性。 + +summary_model (摘要模型) + 默认: None + 说明: 用于生成摘要的 LLM 模型。 + 建议: + - 强烈建议配置一个快速且经济的兼容模型,如 `deepseek-v3`、`gemini-2.5-flash`、`gpt-4.1`。 + - 如果留空,过滤器将尝试使用当前对话的模型。 + 注意: + - 如果当前对话使用的是流水线(Pipe)模型或不直接支持标准生成API的模型,留空此项可能会导致摘要生成失败。在这种情况下,必须指定一个有效的模型。 + +max_summary_tokens (摘要长度) + 默认: 4000 + 说明: 生成摘要时允许的最大 token 数。 + +summary_temperature (摘要温度) + 默认: 0.3 + 说明: 控制摘要生成的随机性,较低的值会产生更确定性的输出。 + +debug_mode (调试模式) + 默认: true + 说明: 在日志中打印详细的调试信息。生产环境建议设为 `false`。 + +🔧 部署配置 +═══════════════════════════════════════════════════════ + +Docker Compose 示例: +━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━ + services: + openwebui: + environment: + DATABASE_URL: postgresql://user:password@postgres:5432/openwebui + depends_on: + - postgres + + postgres: + image: postgres:15-alpine + environment: + POSTGRES_USER: user + POSTGRES_PASSWORD: password + POSTGRES_DB: openwebui + +过滤器安装顺序建议: +━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━ +建议将此过滤器的优先级设置得相对较高(数值较小),以确保它在其他可能修改消息内容的过滤器之前运行。一个典型的顺序可能是: + + 1. 需要访问完整、未压缩历史记录的过滤器 (priority < 10) + (例如: 注入系统级提示的过滤器) + 2. 本压缩过滤器 (priority = 10) + 3. 在压缩后运行的过滤器 (priority > 10) + (例如: 最终输出格式化过滤器) + +═══════════════════════════════════════════════════════════════════════════════ +📝 数据库查询示例 +═══════════════════════════════════════════════════════════════════════════════ + +查看所有摘要: + SELECT + chat_id, + LEFT(summary, 100) as summary_preview, + compressed_message_count, + updated_at + FROM chat_summary + ORDER BY updated_at DESC; + +查询特定对话: + SELECT * + FROM chat_summary + WHERE chat_id = 'your_chat_id'; + +删除过期摘要: + DELETE FROM chat_summary + WHERE updated_at < NOW() - INTERVAL '30 days'; + +统计信息: + SELECT + COUNT(*) as total_summaries, + AVG(LENGTH(summary)) as avg_summary_length, + AVG(compressed_message_count) as avg_msg_count + FROM chat_summary; + +═══════════════════════════════════════════════════════════════════════════════ +⚠️ 注意事项 +═══════════════════════════════════════════════════════════════════════════════ + +1. 数据库权限 + ⚠ 确保 `DATABASE_URL` 指向的用户有创建表的权限。 + ⚠ 首次运行会自动创建 `chat_summary` 表。 + +2. 保留策略 + ⚠ `keep_first` 配置对于保留包含提示或环境变量的初始消息非常重要。请根据需要进行配置。 + +3. 性能考虑 + ⚠ 摘要生成是异步的,不会阻塞用户响应。 + ⚠ 首次达到阈值时会有短暂的后台处理时间。 + +4. 成本优化 + ⚠ 每次达到阈值会调用一次摘要模型。 + ⚠ 合理设置 `compression_threshold_tokens` 避免频繁调用。 + ⚠ 建议使用快速且经济的模型(如 `gemini-flash`)生成摘要。 + +5. 多模态支持 + ✓ 本过滤器支持包含图片的多模态消息。 + ✓ 摘要仅针对文本内容生成。 + ✓ 在压缩过程中,非文本部分(如图片)会被保留在原始消息中。 + +═══════════════════════════════════════════════════════════════════════════════ +🐛 故障排除 +═══════════════════════════════════════════════════════════════════════════════ + +问题:数据库连接失败 +解决: + 1. 确认 `DATABASE_URL` 环境变量已正确设置。 + 2. 确认 `DATABASE_URL` 以 `sqlite` 或 `postgres` 开头。 + 3. 确认数据库服务正在运行,并且网络连接正常。 + 4. 验证连接 URL 中的用户名、密码、主机和端口是否正确。 + 5. 查看 Open WebUI 的容器日志以获取详细的错误信息。 + +问题:摘要未生成 +解决: + 1. 检查是否达到 `compression_threshold_tokens`。 + 2. 查看 `summary_model` 是否配置正确。 + 3. 检查调试日志中的错误信息。 + +问题:初始的提示或环境变量丢失 +解决: + - 确保 `keep_first` 设置为大于 0 的值,以保留包含这些信息的初始消息。 + +问题:压缩效果不明显 +解决: + 1. 适当提高 `compression_threshold_tokens`。 + 2. 减少 `keep_last` 或 `keep_first` 的数量。 + 3. 检查对话是否真的很长。 + + +""" + +from pydantic import BaseModel, Field, model_validator +from typing import Optional, Dict, Any, List, Union, Callable, Awaitable +import asyncio +import json +import hashlib +import os +import time + +# Open WebUI 内置导入 +from open_webui.utils.chat import generate_chat_completion +from open_webui.models.users import Users +from fastapi.requests import Request +from open_webui.main import app as webui_app + +# 尝试导入 tiktoken +try: + import tiktoken +except ImportError: + tiktoken = None + +# 数据库导入 +from sqlalchemy import create_engine, Column, String, Text, DateTime, Integer +from sqlalchemy.ext.declarative import declarative_base +from sqlalchemy.orm import sessionmaker +from datetime import datetime + +Base = declarative_base() + + +class ChatSummary(Base): + """对话摘要存储表""" + + __tablename__ = "chat_summary" + + id = Column(Integer, primary_key=True, autoincrement=True) + chat_id = Column(String(255), unique=True, nullable=False, index=True) + summary = Column(Text, nullable=False) + compressed_message_count = Column(Integer, default=0) + created_at = Column(DateTime, default=datetime.utcnow) + updated_at = Column(DateTime, default=datetime.utcnow, onupdate=datetime.utcnow) + + +class Filter: + def __init__(self): + self.valves = self.Valves() + self._db_engine = None + self._SessionLocal = None + self.temp_state = {} # 用于在 inlet 和 outlet 之间传递临时数据 + self._init_database() + + def _init_database(self): + """初始化数据库连接和表""" + try: + database_url = os.getenv("DATABASE_URL") + + if not database_url: + print("[数据库] ❌ 错误: DATABASE_URL 环境变量未设置。请设置该变量。") + self._db_engine = None + self._SessionLocal = None + return + + db_type = None + engine_args = {} + + if database_url.startswith("sqlite"): + db_type = "SQLite" + engine_args = { + "connect_args": {"check_same_thread": False}, + "echo": False, + } + elif database_url.startswith("postgres"): + db_type = "PostgreSQL" + if database_url.startswith("postgres://"): + database_url = database_url.replace( + "postgres://", "postgresql://", 1 + ) + print("[数据库] ℹ️ 已自动将 postgres:// 转换为 postgresql://") + engine_args = { + "pool_pre_ping": True, + "pool_recycle": 3600, + "echo": False, + } + else: + print( + f"[数据库] ❌ 错误: 不支持的数据库类型。DATABASE_URL 必须以 'sqlite' 或 'postgres' 开头。当前值: {database_url}" + ) + self._db_engine = None + self._SessionLocal = None + return + + # 创建数据库引擎 + self._db_engine = create_engine(database_url, **engine_args) + + # 创建会话工厂 + self._SessionLocal = sessionmaker( + autocommit=False, autoflush=False, bind=self._db_engine + ) + + # 创建表(如果不存在) + Base.metadata.create_all(bind=self._db_engine) + + print(f"[数据库] ✅ 成功连接到 {db_type} 并初始化 chat_summary 表") + + except Exception as e: + print(f"[数据库] ❌ 初始化失败: {str(e)}") + self._db_engine = None + self._SessionLocal = None + + class Valves(BaseModel): + priority: int = Field( + default=10, description="Priority level for the filter operations." + ) + # Token 相关参数 + compression_threshold_tokens: int = Field( + default=64000, ge=0, description="当上下文总 Token 数超过此值时,触发压缩 (全局默认值)" + ) + max_context_tokens: int = Field( + default=128000, ge=0, description="上下文的硬性上限。超过此值将强制移除最早的消息 (全局默认值)" + ) + model_thresholds: dict = Field( + default={ + # Groq + "groq-openai/gpt-oss-20b": {"max_context_tokens": 8000, "compression_threshold_tokens": 5600}, + "groq-openai/gpt-oss-120b": {"max_context_tokens": 8000, "compression_threshold_tokens": 5600}, + + # Qwen (ModelScope / CF) + "modelscope-Qwen/Qwen3-Coder-480B-A35B-Instruct": {"max_context_tokens": 256000, "compression_threshold_tokens": 179200}, + "cfchatqwen-qwen3-max-search": {"max_context_tokens": 262144, "compression_threshold_tokens": 183500}, + "modelscope-Qwen/Qwen3-235B-A22B-Thinking-2507": {"max_context_tokens": 262144, "compression_threshold_tokens": 183500}, + "cfchatqwen-qwen3-max": {"max_context_tokens": 262144, "compression_threshold_tokens": 183500}, + "cfchatqwen-qwen3-vl-plus-thinking": {"max_context_tokens": 262144, "compression_threshold_tokens": 183500}, + "cfchatqwen-qwen3-coder-plus-search": {"max_context_tokens": 1048576, "compression_threshold_tokens": 734000}, + "cfchatqwen-qwen3-vl-plus": {"max_context_tokens": 262144, "compression_threshold_tokens": 183500}, + "cfchatqwen-qwen3-coder-plus": {"max_context_tokens": 1048576, "compression_threshold_tokens": 734000}, + "cfchatqwen-qwen3-omni-flash-thinking": {"max_context_tokens": 65536, "compression_threshold_tokens": 45875}, + "cfchatqwen-qwen3-omni-flash": {"max_context_tokens": 65536, "compression_threshold_tokens": 45875}, + "cfchatqwen-qwen3-next-80b-a3b-thinking": {"max_context_tokens": 262144, "compression_threshold_tokens": 183500}, + "modelscope-Qwen/Qwen3-VL-235B-A22B-Instruct": {"max_context_tokens": 262144, "compression_threshold_tokens": 183500}, + "cfchatqwen-qwen3-next-80b-a3b-thinking-search": {"max_context_tokens": 262144, "compression_threshold_tokens": 183500}, + "cfchatqwen-qwen3-next-80b-a3b": {"max_context_tokens": 262144, "compression_threshold_tokens": 183500}, + "cfchatqwen-qwen3-235b-a22b-thinking-search": {"max_context_tokens": 131072, "compression_threshold_tokens": 91750}, + "cfchatqwen-qwen3-235b-a22b": {"max_context_tokens": 131072, "compression_threshold_tokens": 91750}, + "cfchatqwen-qwen3-235b-a22b-thinking": {"max_context_tokens": 131072, "compression_threshold_tokens": 91750}, + "cfchatqwen-qwen3-coder-flash-search": {"max_context_tokens": 262144, "compression_threshold_tokens": 183500}, + "cfchatqwen-qwen3-coder-flash": {"max_context_tokens": 262144, "compression_threshold_tokens": 183500}, + "cfchatqwen-qwen3-max-2025-10-30": {"max_context_tokens": 262144, "compression_threshold_tokens": 183500}, + "cfchatqwen-qwen3-max-2025-10-30-thinking": {"max_context_tokens": 262144, "compression_threshold_tokens": 183500}, + "cfchatqwen-qwen3-max-2025-10-30-thinking-search": {"max_context_tokens": 262144, "compression_threshold_tokens": 183500}, + "modelscope-Qwen/Qwen3-235B-A22B-Instruct-2507": {"max_context_tokens": 262144, "compression_threshold_tokens": 183500}, + "cfchatqwen-qwen3-vl-30b-a3b": {"max_context_tokens": 131072, "compression_threshold_tokens": 91750}, + "cfchatqwen-qwen3-vl-30b-a3b-thinking": {"max_context_tokens": 131072, "compression_threshold_tokens": 91750}, + + # Gemini + "gemini-2.5-pro-search": {"max_context_tokens": 1048576, "compression_threshold_tokens": 734000}, + "gemini-2.5-flash-search": {"max_context_tokens": 1048576, "compression_threshold_tokens": 734000}, + "gemini-2.5-flash": {"max_context_tokens": 1048576, "compression_threshold_tokens": 734000}, + "gemini-2.5-flash-lite": {"max_context_tokens": 1048576, "compression_threshold_tokens": 734000}, + "gemini-2.5-flash-lite-search": {"max_context_tokens": 1048576, "compression_threshold_tokens": 734000}, + "gemini-2.5-pro": {"max_context_tokens": 1048576, "compression_threshold_tokens": 734000}, + "gemini-2.0-flash-search": {"max_context_tokens": 1048576, "compression_threshold_tokens": 734000}, + "gemini-2.0-flash": {"max_context_tokens": 1048576, "compression_threshold_tokens": 734000}, + "gemini-2.0-flash-exp": {"max_context_tokens": 1048576, "compression_threshold_tokens": 734000}, + "gemini-2.0-flash-lite": {"max_context_tokens": 1048576, "compression_threshold_tokens": 734000}, + "copilot-gemini-2.5-pro": {"max_context_tokens": 128000, "compression_threshold_tokens": 89600}, + "gemini-pro-latest": {"max_context_tokens": 1048576, "compression_threshold_tokens": 734000}, + "gemini-3-pro-preview": {"max_context_tokens": 128000, "compression_threshold_tokens": 89600}, + "gemini-pro-latest-search": {"max_context_tokens": 1048576, "compression_threshold_tokens": 734000}, + "gemini-flash-latest": {"max_context_tokens": 1048576, "compression_threshold_tokens": 734000}, + "gemini-flash-latest-search": {"max_context_tokens": 1048576, "compression_threshold_tokens": 734000}, + "gemini-flash-lite-latest-search": {"max_context_tokens": 1048576, "compression_threshold_tokens": 734000}, + "gemini-flash-lite-latest": {"max_context_tokens": 1048576, "compression_threshold_tokens": 734000}, + "gemini-robotics-er-1.5-preview": {"max_context_tokens": 1048576, "compression_threshold_tokens": 734000}, + + # DeepSeek + "modelscope-deepseek-ai/DeepSeek-V3.1": {"max_context_tokens": 128000, "compression_threshold_tokens": 89600}, + "cfdeepseek-deepseek-search": {"max_context_tokens": 128000, "compression_threshold_tokens": 89600}, + "openrouter-deepseek/deepseek-r1-0528:free": {"max_context_tokens": 163840, "compression_threshold_tokens": 114688}, + "modelscope-deepseek-ai/DeepSeek-V3.2-Exp": {"max_context_tokens": 128000, "compression_threshold_tokens": 89600}, + "cfdeepseek-deepseek-r1-search": {"max_context_tokens": 128000, "compression_threshold_tokens": 89600}, + "cfdeepseek-deepseek-r1": {"max_context_tokens": 128000, "compression_threshold_tokens": 89600}, + "openrouter-deepseek/deepseek-chat-v3.1:free": {"max_context_tokens": 163800, "compression_threshold_tokens": 114660}, + "modelscope-deepseek-ai/DeepSeek-R1-0528": {"max_context_tokens": 128000, "compression_threshold_tokens": 89600}, + "cfdeepseek-deepseek": {"max_context_tokens": 128000, "compression_threshold_tokens": 89600}, + + # Kimi (Moonshot) + "cfkimi-kimi-k2-search": {"max_context_tokens": 128000, "compression_threshold_tokens": 89600}, + "cfkimi-kimi-k1.5-search": {"max_context_tokens": 128000, "compression_threshold_tokens": 89600}, + "cfkimi-kimi-k1.5-thinking-search": {"max_context_tokens": 128000, "compression_threshold_tokens": 89600}, + "cfkimi-kimi-research": {"max_context_tokens": 128000, "compression_threshold_tokens": 89600}, + "openrouter-moonshotai/kimi-k2:free": {"max_context_tokens": 32768, "compression_threshold_tokens": 22937}, + "cfkimi-kimi-k2": {"max_context_tokens": 128000, "compression_threshold_tokens": 89600}, + "cfkimi-kimi-k1.5": {"max_context_tokens": 128000, "compression_threshold_tokens": 89600}, + + # GPT / OpenAI + "gpt-4.1": {"max_context_tokens": 128000, "compression_threshold_tokens": 89600}, + "gpt-4o": {"max_context_tokens": 64000, "compression_threshold_tokens": 44800}, + "gpt-5": {"max_context_tokens": 128000, "compression_threshold_tokens": 89600}, + "github-gpt-4.1": {"max_context_tokens": 7500, "compression_threshold_tokens": 5250}, + "gpt-5-mini": {"max_context_tokens": 128000, "compression_threshold_tokens": 89600}, + "gpt-5.1": {"max_context_tokens": 128000, "compression_threshold_tokens": 89600}, + "gpt-5.1-codex": {"max_context_tokens": 128000, "compression_threshold_tokens": 89600}, + "gpt-5.1-codex-mini": {"max_context_tokens": 128000, "compression_threshold_tokens": 89600}, + "gpt-5-codex": {"max_context_tokens": 200000, "compression_threshold_tokens": 140000}, + "github-gpt-4.1-mini": {"max_context_tokens": 7500, "compression_threshold_tokens": 5250}, + "openrouter-openai/gpt-oss-20b:free": {"max_context_tokens": 128000, "compression_threshold_tokens": 89600}, + + # Claude / Anthropic + "claude-sonnet-4.5": {"max_context_tokens": 128000, "compression_threshold_tokens": 89600}, + "claude-haiku-4.5": {"max_context_tokens": 128000, "compression_threshold_tokens": 89600}, + "copilot-claude-opus-41": {"max_context_tokens": 80000, "compression_threshold_tokens": 56000}, + "copilot-claude-sonnet-4": {"max_context_tokens": 80000, "compression_threshold_tokens": 56000}, + + # Other / OpenRouter / OSWE + "oswe-vscode-insiders": {"max_context_tokens": 256000, "compression_threshold_tokens": 179200}, + "modelscope-MiniMax/MiniMax-M2": {"max_context_tokens": 204800, "compression_threshold_tokens": 143360}, + "oswe-vscode-prime": {"max_context_tokens": 200000, "compression_threshold_tokens": 140000}, + "grok-code-fast-1": {"max_context_tokens": 128000, "compression_threshold_tokens": 89600}, + "copilot-auto": {"max_context_tokens": 128000, "compression_threshold_tokens": 89600}, + "modelscope-ZhipuAI/GLM-4.6": {"max_context_tokens": 32000, "compression_threshold_tokens": 22400}, + "openrouter-x-ai/grok-4.1-fast:free": {"max_context_tokens": 2000000, "compression_threshold_tokens": 1400000}, + "openrouter-qwen/qwen3-coder:free": {"max_context_tokens": 262000, "compression_threshold_tokens": 183400}, + "openrouter-qwen/qwen3-235b-a22b:free": {"max_context_tokens": 40960, "compression_threshold_tokens": 28672}, + }, + description="针对特定模型的阈值覆盖配置。仅包含需要特殊配置的模型。" + ) + + keep_first: int = Field( + default=1, ge=0, description="始终保留最初的 N 条消息。设置为 0 则不保留。" + ) + keep_last: int = Field(default=6, ge=0, description="始终保留最近的 N 条完整消息。") + summary_model: str = Field( + default=None, + description="用于生成摘要的模型 ID。留空则使用当前对话的模型。用于匹配 model_thresholds 中的配置。", + ) + max_summary_tokens: int = Field( + default=16384, ge=1, description="摘要的最大 token 数" + ) + summary_temperature: float = Field( + default=0.1, ge=0.0, le=2.0, description="摘要生成的温度参数" + ) + debug_mode: bool = Field(default=True, description="调试模式,打印详细日志") + + def _save_summary(self, chat_id: str, summary: str, compressed_count: int): + """保存摘要到数据库""" + if not self._SessionLocal: + if self.valves.debug_mode: + print("[存储] 数据库未初始化,跳过保存摘要") + return + + try: + session = self._SessionLocal() + try: + # 查找现有记录 + existing = ( + session.query(ChatSummary).filter_by(chat_id=chat_id).first() + ) + + if existing: + # [优化] 乐观锁检查:只有进度向前推进时才更新 + if compressed_count <= existing.compressed_message_count: + if self.valves.debug_mode: + print(f"[存储] 跳过更新:新进度 ({compressed_count}) 不大于现有进度 ({existing.compressed_message_count})") + return + + # 更新现有记录 + existing.summary = summary + existing.compressed_message_count = compressed_count + existing.updated_at = datetime.utcnow() + else: + # 创建新记录 + new_summary = ChatSummary( + chat_id=chat_id, + summary=summary, + compressed_message_count=compressed_count, + ) + session.add(new_summary) + + session.commit() + + if self.valves.debug_mode: + action = "更新" if existing else "创建" + print(f"[存储] 摘要已{action}到数据库 (Chat ID: {chat_id})") + + finally: + session.close() + + except Exception as e: + print(f"[存储] ❌ 数据库保存失败: {str(e)}") + + def _load_summary_record(self, chat_id: str) -> Optional[ChatSummary]: + """从数据库加载摘要记录对象""" + if not self._SessionLocal: + return None + + try: + session = self._SessionLocal() + try: + record = session.query(ChatSummary).filter_by(chat_id=chat_id).first() + if record: + # Detach the object from the session so it can be used after session close + session.expunge(record) + return record + finally: + session.close() + except Exception as e: + print(f"[加载] ❌ 数据库读取失败: {str(e)}") + return None + + def _load_summary(self, chat_id: str, body: dict) -> Optional[str]: + """从数据库加载摘要文本 (兼容旧接口)""" + record = self._load_summary_record(chat_id) + if record: + if self.valves.debug_mode: + print(f"[加载] 从数据库加载摘要 (Chat ID: {chat_id})") + print( + f"[加载] 更新时间: {record.updated_at}, 已压缩消息数: {record.compressed_message_count}" + ) + return record.summary + return None + + def _count_tokens(self, text: str, model: str = "gpt-3.5-turbo") -> int: + """计算文本的 Token 数量""" + if not text: + return 0 + + if tiktoken: + try: + # 统一使用 o200k_base 编码 (适配最新模型) + encoding = tiktoken.get_encoding("o200k_base") + return len(encoding.encode(text)) + except Exception as e: + if self.valves.debug_mode: + print(f"[Token计数] tiktoken 错误: {e},回退到字符估算") + + # 回退策略:粗略估算 (1 token ≈ 4 chars) + return len(text) // 4 + + def _calculate_messages_tokens(self, messages: List[Dict], model: str = "gpt-3.5-turbo") -> int: + """计算消息列表的总 Token 数""" + total_tokens = 0 + for msg in messages: + content = msg.get("content", "") + if isinstance(content, list): + # 多模态内容处理 + text_content = "" + for part in content: + if isinstance(part, dict) and part.get("type") == "text": + text_content += part.get("text", "") + total_tokens += self._count_tokens(text_content, model) + else: + total_tokens += self._count_tokens(str(content), model) + return total_tokens + + def _get_model_thresholds(self, model_id: str) -> Dict[str, int]: + """获取特定模型的阈值配置 + + 优先级: + 1. 如果 model_thresholds 中存在该模型ID的配置,使用该配置 + 2. 否则使用全局参数 compression_threshold_tokens 和 max_context_tokens + """ + # 尝试从模型特定配置中匹配 + if model_id in self.valves.model_thresholds: + if self.valves.debug_mode: + print(f"[配置] 使用模型特定配置: {model_id}") + return self.valves.model_thresholds[model_id] + + # 使用全局默认配置 + if self.valves.debug_mode: + print(f"[配置] 模型 {model_id} 未在 model_thresholds 中,使用全局参数") + + return { + "compression_threshold_tokens": self.valves.compression_threshold_tokens, + "max_context_tokens": self.valves.max_context_tokens + } + + def _inject_summary_to_first_message(self, message: dict, summary: str) -> dict: + """将摘要注入到第一条消息中(追加到内容前面)""" + content = message.get("content", "") + summary_block = f"【历史对话摘要】\n{summary}\n\n---\n以下是最近的对话:\n\n" + + # 处理不同内容类型 + if isinstance(content, list): # 多模态内容 + # 查找第一个文本部分并在其前面插入摘要 + new_content = [] + summary_inserted = False + + for part in content: + if ( + isinstance(part, dict) + and part.get("type") == "text" + and not summary_inserted + ): + # 在第一个文本部分前插入摘要 + new_content.append( + {"type": "text", "text": summary_block + part.get("text", "")} + ) + summary_inserted = True + else: + new_content.append(part) + + # 如果没有文本部分,在开头插入 + if not summary_inserted: + new_content.insert(0, {"type": "text", "text": summary_block}) + + message["content"] = new_content + + elif isinstance(content, str): # 纯文本 + message["content"] = summary_block + content + + return message + + async def inlet( + self, + body: dict, + __user__: Optional[dict] = None, + __metadata__: dict = None, + __event_emitter__: Callable[[Any], Awaitable[None]] = None, + ) -> dict: + """ + 在发送到 LLM 之前执行 + 压缩策略:只负责注入已有的摘要,不进行 Token 计算 + """ + messages = body.get("messages", []) + chat_id = __metadata__["chat_id"] + + if self.valves.debug_mode: + print(f"\n{'='*60}") + print(f"[Inlet] Chat ID: {chat_id}") + print(f"[Inlet] 收到 {len(messages)} 条消息") + + # 记录原始消息的目标压缩进度,供 outlet 使用 + # 目标是压缩到倒数第 keep_last 条之前 + target_compressed_count = max(0, len(messages) - self.valves.keep_last) + + # [优化] 简单的状态清理检查 + if chat_id in self.temp_state: + if self.valves.debug_mode: + print(f"[Inlet] ⚠️ 覆盖未消费的旧状态 (Chat ID: {chat_id})") + + self.temp_state[chat_id] = target_compressed_count + + if self.valves.debug_mode: + print(f"[Inlet] 记录目标压缩进度: {target_compressed_count}") + + # 加载摘要记录 + summary_record = await asyncio.to_thread(self._load_summary_record, chat_id) + + final_messages = [] + + if summary_record: + # 存在摘要,构建视图:[Head] + [Summary Message] + [Tail] + # Tail 是从上次压缩点之后的所有消息 + compressed_count = summary_record.compressed_message_count + + # 确保 compressed_count 合理 + if compressed_count > len(messages): + compressed_count = max(0, len(messages) - self.valves.keep_last) + + # 1. 头部消息 (Keep First) + head_messages = [] + if self.valves.keep_first > 0: + head_messages = messages[:self.valves.keep_first] + + # 2. 摘要消息 (作为 User 消息插入) + summary_content = ( + f"【系统提示:以下是历史对话的摘要,仅供参考上下文,请勿对摘要内容进行回复,直接回答后续的最新问题】\n\n" + f"{summary_record.summary}\n\n" + f"---\n" + f"以下是最近的对话:" + ) + summary_msg = {"role": "user", "content": summary_content} + + # 3. 尾部消息 (Tail) - 从上次压缩点开始的所有消息 + # 注意:这里必须确保不重复包含头部消息 + start_index = max(compressed_count, self.valves.keep_first) + tail_messages = messages[start_index:] + + final_messages = head_messages + [summary_msg] + tail_messages + + # 发送状态通知 + if __event_emitter__: + await __event_emitter__( + { + "type": "status", + "data": { + "description": f"已加载历史摘要 (隐藏 {compressed_count} 条历史消息)", + "done": True, + }, + } + ) + + if self.valves.debug_mode: + print(f"[Inlet] 应用摘要: Head({len(head_messages)}) + Summary + Tail({len(tail_messages)})") + else: + # 没有摘要,使用原始消息 + final_messages = messages + + body["messages"] = final_messages + + if self.valves.debug_mode: + print(f"[Inlet] 最终发送: {len(body['messages'])} 条消息") + print(f"{'='*60}\n") + + return body + + async def outlet( + self, + body: dict, + __user__: Optional[dict] = None, + __metadata__: dict = None, + __event_emitter__: Callable[[Any], Awaitable[None]] = None, + ) -> dict: + """ + 在 LLM 响应完成后执行 + 在后台计算 Token 数并触发摘要生成(不阻塞当前响应,不影响内容输出) + """ + chat_id = __metadata__["chat_id"] + model = body.get("model", "gpt-3.5-turbo") + + if self.valves.debug_mode: + print(f"\n{'='*60}") + print(f"[Outlet] Chat ID: {chat_id}") + print(f"[Outlet] 响应完成") + + # 在后台异步处理 Token 计算和摘要生成(不等待完成,不影响输出) + asyncio.create_task( + self._check_and_generate_summary_async( + chat_id, model, body, __user__, __event_emitter__ + ) + ) + + if self.valves.debug_mode: + print(f"[Outlet] 后台处理已启动") + print(f"{'='*60}\n") + + return body + + async def _check_and_generate_summary_async( + self, + chat_id: str, + model: str, + body: dict, + user_data: Optional[dict], + __event_emitter__: Callable[[Any], Awaitable[None]] = None, + ): + """ + 后台处理:计算 Token 数并生成摘要(不阻塞响应) + """ + try: + messages = body.get("messages", []) + + # 获取当前模型的阈值配置 + thresholds = self._get_model_thresholds(model) + compression_threshold_tokens = thresholds.get("compression_threshold_tokens", self.valves.compression_threshold_tokens) + + if self.valves.debug_mode: + print(f"\n[🔍 后台计算] 开始 Token 计数...") + + # 在后台线程中计算 Token 数 + current_tokens = await asyncio.to_thread( + self._calculate_messages_tokens, messages, model + ) + + if self.valves.debug_mode: + print(f"[🔍 后台计算] Token 数: {current_tokens}") + + # 检查是否需要压缩 + if current_tokens >= compression_threshold_tokens: + if self.valves.debug_mode: + print( + f"[🔍 后台计算] ⚡ 触发压缩阈值 (Token: {current_tokens} >= {compression_threshold_tokens})" + ) + + # 继续生成摘要 + await self._generate_summary_async( + messages, chat_id, body, user_data, __event_emitter__ + ) + else: + if self.valves.debug_mode: + print( + f"[🔍 后台计算] 未触发压缩阈值 (Token: {current_tokens} < {compression_threshold_tokens})" + ) + + except Exception as e: + print(f"[🔍 后台计算] ❌ 错误: {str(e)}") + + async def _generate_summary_async( + self, + messages: list, + chat_id: str, + body: dict, + user_data: Optional[dict], + __event_emitter__: Callable[[Any], Awaitable[None]] = None, + ): + """ + 异步生成摘要(后台执行,不阻塞响应) + 逻辑: + 1. 提取中间消息(去除 keep_first 和 keep_last)。 + 2. 检查 Token 上限,如果超过 max_context_tokens,从中间消息头部移除。 + 3. 对剩余的中间消息生成摘要。 + """ + try: + if self.valves.debug_mode: + print(f"\n[🤖 异步摘要任务] 开始...") + + # 1. 获取目标压缩进度 + # 优先从 temp_state 获取(由 inlet 计算),如果获取不到(例如重启后),则假设当前是完整历史 + target_compressed_count = self.temp_state.pop(chat_id, None) + if target_compressed_count is None: + target_compressed_count = max(0, len(messages) - self.valves.keep_last) + if self.valves.debug_mode: + print(f"[🤖 异步摘要任务] ⚠️ 无法获取 inlet 状态,使用当前消息数估算进度: {target_compressed_count}") + + # 2. 确定待压缩的消息范围 (Middle) + start_index = self.valves.keep_first + end_index = len(messages) - self.valves.keep_last + if self.valves.keep_last == 0: + end_index = len(messages) + + # 确保索引有效 + if start_index >= end_index: + if self.valves.debug_mode: + print(f"[🤖 异步摘要任务] 中间消息为空 (Start: {start_index}, End: {end_index}),跳过") + return + + middle_messages = messages[start_index:end_index] + + if self.valves.debug_mode: + print(f"[🤖 异步摘要任务] 待处理中间消息: {len(middle_messages)} 条") + + # 3. 检查 Token 上限并截断 (Max Context Truncation) + # [优化] 使用摘要模型(如果有)的阈值来决定能处理多少中间消息 + # 这样可以用长窗口模型(如 gemini-flash)来压缩超过当前模型窗口的历史记录 + summary_model_id = self.valves.summary_model or body.get("model", "gpt-3.5-turbo") + + thresholds = self._get_model_thresholds(summary_model_id) + # 注意:这里使用的是摘要模型的最大上下文限制 + max_context_tokens = thresholds.get("max_context_tokens", self.valves.max_context_tokens) + + if self.valves.debug_mode: + print(f"[🤖 异步摘要任务] 使用模型 {summary_model_id} 的上限: {max_context_tokens} Tokens") + + # 计算当前总 Token (使用摘要模型进行计数) + total_tokens = await asyncio.to_thread(self._calculate_messages_tokens, messages, summary_model_id) + + if total_tokens > max_context_tokens: + excess_tokens = total_tokens - max_context_tokens + if self.valves.debug_mode: + print(f"[🤖 异步摘要任务] ⚠️ 总 Token ({total_tokens}) 超过摘要模型上限 ({max_context_tokens}),需要移除约 {excess_tokens} Token") + + # 从 middle_messages 头部开始移除 + removed_tokens = 0 + removed_count = 0 + + while removed_tokens < excess_tokens and middle_messages: + msg_to_remove = middle_messages.pop(0) + msg_tokens = self._count_tokens(str(msg_to_remove.get("content", "")), summary_model_id) + removed_tokens += msg_tokens + removed_count += 1 + + if self.valves.debug_mode: + print(f"[🤖 异步摘要任务] 已移除 {removed_count} 条消息,共 {removed_tokens} Token") + + if not middle_messages: + if self.valves.debug_mode: + print(f"[🤖 异步摘要任务] 截断后中间消息为空,跳过摘要生成") + return + + # 4. 构建对话文本 + conversation_text = self._format_messages_for_summary(middle_messages) + + # 5. 调用 LLM 生成新摘要 + # 注意:这里不再传入 previous_summary,因为旧摘要(如果有)已经包含在 middle_messages 里了 + + # 发送开始生成摘要的状态通知 + if __event_emitter__: + await __event_emitter__( + { + "type": "status", + "data": { + "description": "正在后台生成上下文摘要...", + "done": False, + }, + } + ) + + new_summary = await self._call_summary_llm(None, conversation_text, body, user_data) + + # 6. 保存新摘要 + if self.valves.debug_mode: + print("[优化] 正在后台线程中保存摘要,以避免阻塞事件循环。") + + await asyncio.to_thread(self._save_summary, chat_id, new_summary, target_compressed_count) + + # 发送完成状态通知 + if __event_emitter__: + await __event_emitter__( + { + "type": "status", + "data": { + "description": f"上下文摘要已更新 (节省 {len(middle_messages)} 条消息)", + "done": True, + }, + } + ) + + if self.valves.debug_mode: + print(f"[🤖 异步摘要任务] ✅ 完成!新摘要长度: {len(new_summary)} 字符") + print(f"[🤖 异步摘要任务] 进度更新: 已压缩至原始第 {target_compressed_count} 条消息") + + except Exception as e: + print(f"[🤖 异步摘要任务] ❌ 错误: {str(e)}") + import traceback + traceback.print_exc() + + def _format_messages_for_summary(self, messages: list) -> str: + """格式化消息用于摘要""" + formatted = [] + for i, msg in enumerate(messages, 1): + role = msg.get("role", "unknown") + content = msg.get("content", "") + + # 处理多模态内容 + if isinstance(content, list): + text_parts = [] + for part in content: + if isinstance(part, dict) and part.get("type") == "text": + text_parts.append(part.get("text", "")) + content = " ".join(text_parts) + + # 处理角色名称 + role_name = {"user": "用户", "assistant": "助手"}.get(role, role) + + # 限制每条消息的长度,避免过长 + if len(content) > 500: + content = content[:500] + "..." + + formatted.append(f"[{i}] {role_name}: {content}") + + return "\n\n".join(formatted) + + async def _call_summary_llm( + self, previous_summary: Optional[str], new_conversation_text: str, body: dict, user_data: dict + ) -> str: + """ + 使用 Open WebUI 内置方法调用 LLM 生成摘要 + """ + if self.valves.debug_mode: + print(f"[🤖 LLM 调用] 使用 Open WebUI 内置方法") + + # 构建摘要提示词 (优化版) + summary_prompt = f""" +你是一个专业的对话上下文压缩专家。你的任务是对以下对话内容进行高保真摘要。 +这段对话可能包含之前的摘要(作为系统消息或文本)以及后续的对话内容。 + +### 核心目标 +1. **全面总结**:将对话中的关键信息、用户意图、助手回复进行精炼总结。 +2. **去噪提纯**:移除寒暄、重复、确认性回复等无用信息。 +3. **关键保留**: + * **代码片段、命令、技术参数必须逐字保留,严禁修改或概括。** + * 用户意图、核心需求、决策结论、待办事项必须清晰保留。 +4. **连贯性**:生成的摘要应作为一个整体,能够替代原始对话作为上下文。 +5. **详尽记录**:由于允许的篇幅较长,请尽可能保留对话中的细节、论证过程和多轮交互的细微差别,而不仅仅是宏观概括。 + +### 输出要求 +* **格式**:结构化文本,逻辑清晰。 +* **语言**:与对话语言保持一致(通常为中文)。 +* **长度**:严格控制在 {self.valves.max_summary_tokens} Token 以内。 +* **严禁**:不要输出"根据对话..."、"摘要如下..."等废话。直接输出摘要内容。 + +### 摘要结构建议 +* **当前目标/主题**:一句话概括当前正在解决的问题。 +* **关键信息与上下文**: + * 已确认的事实/参数。 + * **代码/技术细节** (使用代码块包裹)。 +* **进展与结论**:已完成的步骤和达成的共识。 +* **待办/下一步**:明确的后续行动。 + +--- +{new_conversation_text} +--- + +请根据上述内容,生成摘要: +""" + # 确定使用的模型 + model = self.valves.summary_model or body.get("model", "") + + if self.valves.debug_mode: + print(f"[🤖 LLM 调用] 模型: {model}") + + # 构建 payload + payload = { + "model": model, + "messages": [{"role": "user", "content": summary_prompt}], + "stream": False, + "max_tokens": self.valves.max_summary_tokens, + "temperature": self.valves.summary_temperature, + } + + try: + # 获取用户对象 + user_id = user_data.get("id") if user_data else None + if not user_id: + raise ValueError("无法获取用户 ID") + + # [优化] 在后台线程中获取用户对象,以避免阻塞事件循环 + if self.valves.debug_mode: + print("[优化] 正在后台线程中获取用户对象,以避免阻塞事件循环。") + user = await asyncio.to_thread(Users.get_user_by_id, user_id) + + if not user: + raise ValueError(f"无法找到用户: {user_id}") + + if self.valves.debug_mode: + print(f"[🤖 LLM 调用] 用户: {user.email}") + print(f"[🤖 LLM 调用] 发送请求...") + + # 创建 Request 对象 + request = Request(scope={"type": "http", "app": webui_app}) + + # 调用 generate_chat_completion + response = await generate_chat_completion(request, payload, user) + + if not response or "choices" not in response or not response["choices"]: + raise ValueError("LLM 响应格式不正确或为空") + + summary = response["choices"][0]["message"]["content"].strip() + + if self.valves.debug_mode: + print(f"[🤖 LLM 调用] ✅ 成功获取摘要") + + return summary + + except Exception as e: + error_message = f"调用 LLM ({model}) 生成摘要时发生错误: {str(e)}" + if not self.valves.summary_model: + error_message += ( + "\n[提示] 您没有指定摘要模型 (summary_model),因此尝试使用当前对话的模型。" + "如果这是一个流水线(Pipe)模型或不兼容的模型,请在配置中指定一个兼容的摘要模型(如 'gemini-2.5-flash')。" + ) + + if self.valves.debug_mode: + print(f"[🤖 LLM 调用] ❌ {error_message}") + + raise Exception(error_message) diff --git a/plugins/filters/async-context-compression/异步上下文压缩优化.md b/plugins/filters/async-context-compression/异步上下文压缩优化.md new file mode 100644 index 0000000..6aa62e3 --- /dev/null +++ b/plugins/filters/async-context-compression/异步上下文压缩优化.md @@ -0,0 +1,45 @@ +需求文档:异步上下文压缩插件优化 (Async Context Compression Optimization) +1. 核心目标 将现有的基于消息数量的压缩逻辑升级为基于 Token 数量的压缩逻辑,并引入递归摘要机制,以更精准地控制上下文窗口,提高摘要质量,并防止历史信息丢失。 + +2. 功能需求 + +Token 计数与阈值控制 +引入 tiktoken: 使用 tiktoken 库进行精确的 Token 计数。如果环境不支持,则回退到字符估算 (1 token ≈ 4 chars)。 +新配置参数 (Valves): +compression_threshold_tokens (默认: 64000): 当上下文总 Token 数超过此值时,触发压缩(生成摘要)。 +max_context_tokens (默认: 128000): 上下文的硬性上限。如果超过此值,强制移除最早的消息(保留受保护消息除外)。 +model_thresholds (字典): 支持针对不同模型 ID 配置不同的阈值。例如:{'gpt-4': {'compression_threshold_tokens': 8000, ...}}。 +废弃旧参数: compression_threshold (基于消息数) 将被标记为废弃,优先使用 Token 阈值。 +递归摘要 (Recursive Summarization) +机制: 在生成新摘要时,必须读取并包含上一次的摘要。 +逻辑: 新摘要 = LLM(上一次摘要 + 新产生的对话消息)。 +目的: 防止随着对话进行,最早期的摘要信息被丢弃,确保长期记忆的连续性。 +消息保护与修剪策略 +保护机制: keep_first (保留头部 N 条) 和 keep_last (保留尾部 N 条) 的消息绝对不参与压缩,也不被移除。 +修剪逻辑: 当触发 max_context_tokens 限制时,优先移除 keep_first 之后、keep_last 之前的最早消息。 +优化的提示词 (Prompt Engineering) +目标: 去除无用信息(寒暄、重复),保留关键信号(事实、代码、决策)。 +指令: +提炼与净化: 明确要求移除噪音。 +关键保留: 强调代码片段必须逐字保留。 +合并与更新: 明确指示将新信息合并到旧摘要中。 +语言一致性: 输出语言必须与对话语言保持一致。 +3. 实现细节 + +文件: +async_context_compression.py +类: +Filter +关键方法: +_count_tokens(text): 实现 Token 计数。 +_calculate_messages_tokens(messages): 计算消息列表总 Token。 +_generate_summary_async(...) +: 修改为加载旧摘要,并传入 LLM。 +_call_summary_llm(...) +: 更新 Prompt,接受 previous_summary 和 new_messages。 +inlet(...) +: +使用 compression_threshold_tokens 判断是否注入摘要。 +实现 max_context_tokens 的强制修剪逻辑。 +outlet(...) +: 使用 compression_threshold_tokens 判断是否触发后台摘要任务。 diff --git a/plugins/filters/context_enhancement_filter/context_enhancement_filter.py b/plugins/filters/context_enhancement_filter/context_enhancement_filter.py new file mode 100644 index 0000000..cf23e32 --- /dev/null +++ b/plugins/filters/context_enhancement_filter/context_enhancement_filter.py @@ -0,0 +1,572 @@ +""" +title: Context & Model Enhancement Filter +author: Fu-Jie +author_url: https://github.com/Fu-Jie +funding_url: https://github.com/Fu-Jie/awesome-openwebui +version: 0.2 + +description: + 一个功能全面的 Filter 插件,用于增强请求上下文和优化模型功能。提供四大核心功能: + + 1. 环境变量注入:在每条用户消息前自动注入用户环境变量(用户名、时间、时区、语言等) + - 支持纯文本、图片、多模态消息 + - 幂等性设计,避免重复注入 + - 注入成功时发送前端状态提示 + + 2. Web Search 功能改进:为特定模型优化 Web 搜索功能 + - 为阿里云通义千问系列、DeepSeek、Gemini 等模型添加搜索能力 + - 自动识别模型并追加 "-search" 后缀 + - 管理功能开关,防止冲突 + - 启用时发送搜索能力状态提示 + + 3. 模型适配与上下文注入:为特定模型注入 chat_id 等上下文信息 + - 支持 cfchatqwen、webgemini 等模型的特殊处理 + - 动态模型重定向 + - 智能化的模型识别和适配 + + 4. 智能内容规范化:生产级的内容清洗与修复系统 + - 智能修复损坏的代码块(前缀、后缀、缩进) + - 规范化 LaTeX 公式格式(行内/块级) + - 优化思维链标签()格式 + - 自动闭合未结束的代码块 + - 智能列表格式修复 + - 清理冗余的 XML 标签 + - 可配置的规则系统 + +features: + - 自动化环境变量管理 + - 智能模型功能适配 + - 异步状态反馈 + - 幂等性保证 + - 多模型支持 + - 智能内容清洗与规范化 +""" + +from pydantic import BaseModel, Field +from typing import Optional, List, Callable +import re +import logging +from dataclasses import dataclass, field + + +# 配置日志 +logger = logging.getLogger(__name__) + +@dataclass +class NormalizerConfig: + """规范化配置类,用于动态启用/禁用特定规则""" + enable_escape_fix: bool = True # 修复转义字符 + enable_thought_tag_fix: bool = True # 修复思考链标签 + enable_code_block_fix: bool = True # 修复代码块格式 + enable_latex_fix: bool = True # 修复 LaTeX 公式格式 + enable_list_fix: bool = False # 修复列表换行 + enable_unclosed_block_fix: bool = True # 修复未闭合代码块 + enable_fullwidth_symbol_fix: bool = False # 修复代码内的全角符号 + enable_xml_tag_cleanup: bool = True # 清理 XML 残留标签 + + # 自定义清理函数列表(高级扩展用) + custom_cleaners: List[Callable[[str], str]] = field(default_factory=list) + +class ContentNormalizer: + """LLM 输出内容规范化器 - 生产级实现""" + + # --- 1. 预编译正则表达式(性能优化) --- + _PATTERNS = { + # 代码块前缀:如果 ``` 前面不是行首也不是换行符 + 'code_block_prefix': re.compile(r'(? 后可能跟空格或换行 + 'thought_tag': re.compile(r'[ \t]*\n*'), + + # LaTeX 块级公式:\[ ... \] + 'latex_bracket_block': re.compile(r'\\\[(.+?)\\\]', re.DOTALL), + # LaTeX 行内公式:\( ... \) + 'latex_paren_inline': re.compile(r'\\\((.+?)\\\)'), + + # 列表项:非换行符 + 数字 + 点 + 空格 (e.g. "Text1. Item") + 'list_item': re.compile(r'([^\n])(\d+\. )'), + + # XML 残留标签 (如 Claude 的 artifacts) + 'xml_artifacts': re.compile(r']*>', re.IGNORECASE), + } + + def __init__(self, config: Optional[NormalizerConfig] = None): + self.config = config or NormalizerConfig() + self.applied_fixes = [] + + def normalize(self, content: str) -> str: + """主入口:按顺序应用所有规范化规则""" + self.applied_fixes = [] + if not content: + return content + + try: + # 1. 转义字符修复(必须最先执行,否则影响后续正则) + if self.config.enable_escape_fix: + original = content + content = self._fix_escape_characters(content) + if content != original: + self.applied_fixes.append("修复转义字符") + + # 2. 思考链标签规范化 + if self.config.enable_thought_tag_fix: + original = content + content = self._fix_thought_tags(content) + if content != original: + self.applied_fixes.append("规范化思考链") + + # 3. 代码块格式修复 + if self.config.enable_code_block_fix: + original = content + content = self._fix_code_blocks(content) + if content != original: + self.applied_fixes.append("修复代码块格式") + + # 4. LaTeX 公式规范化 + if self.config.enable_latex_fix: + original = content + content = self._fix_latex_formulas(content) + if content != original: + self.applied_fixes.append("规范化 LaTeX 公式") + + # 5. 列表格式修复 + if self.config.enable_list_fix: + original = content + content = self._fix_list_formatting(content) + if content != original: + self.applied_fixes.append("修复列表格式") + + # 6. 未闭合代码块检测与修复 + if self.config.enable_unclosed_block_fix: + original = content + content = self._fix_unclosed_code_blocks(content) + if content != original: + self.applied_fixes.append("闭合未结束代码块") + + # 7. 全角符号转半角(仅代码块内) + if self.config.enable_fullwidth_symbol_fix: + original = content + content = self._fix_fullwidth_symbols_in_code(content) + if content != original: + self.applied_fixes.append("全角符号转半角") + + # 8. XML 标签残留清理 + if self.config.enable_xml_tag_cleanup: + original = content + content = self._cleanup_xml_tags(content) + if content != original: + self.applied_fixes.append("清理 XML 标签") + + # 9. 执行自定义清理函数 + for cleaner in self.config.custom_cleaners: + original = content + content = cleaner(content) + if content != original: + self.applied_fixes.append("执行自定义清理") + + return content + + except Exception as e: + # 生产环境保底机制:如果清洗过程报错,返回原始内容,避免阻断服务 + logger.error(f"内容规范化失败: {e}", exc_info=True) + return content + + def _fix_escape_characters(self, content: str) -> str: + """修复过度转义的字符""" + # 注意:先处理具体的转义序列,再处理通用的双反斜杠 + content = content.replace("\\r\\n", "\n") + content = content.replace("\\n", "\n") + content = content.replace("\\t", "\t") + # 修复过度转义的反斜杠 (例如路径 C:\\Users) + content = content.replace("\\\\", "\\") + return content + + def _fix_thought_tags(self, content: str) -> str: + """规范化 标签,统一为空两行""" + return self._PATTERNS['thought_tag'].sub("\n\n", content) + + def _fix_code_blocks(self, content: str) -> str: + """修复代码块格式(独占行、换行、去缩进)""" + # C: 移除代码块前的缩进(必须先执行,否则影响下面的判断) + content = self._PATTERNS['code_block_indent'].sub(r"\1", content) + # A: 确保 ``` 前有换行 + content = self._PATTERNS['code_block_prefix'].sub(r"\n\1", content) + # B: 确保 ```语言标识 后有换行 + content = self._PATTERNS['code_block_suffix'].sub(r"\1\n\2", content) + return content + + def _fix_latex_formulas(self, content: str) -> str: + """规范化 LaTeX 公式:\[ -> $$ (块级), \( -> $ (行内)""" + content = self._PATTERNS['latex_bracket_block'].sub(r"$$\1$$", content) + content = self._PATTERNS['latex_paren_inline'].sub(r"$\1$", content) + return content + + def _fix_list_formatting(self, content: str) -> str: + """修复列表项缺少换行的问题 (如 'text1. item' -> 'text\\n1. item')""" + return self._PATTERNS['list_item'].sub(r"\1\n\2", content) + + def _fix_unclosed_code_blocks(self, content: str) -> str: + """检测并修复未闭合的代码块""" + if content.count("```") % 2 != 0: + logger.warning("检测到未闭合的代码块,自动补全") + content += "\n```" + return content + + def _fix_fullwidth_symbols_in_code(self, content: str) -> str: + """在代码块内将全角符号转为半角(精细化操作)""" + # 常见误用的全角符号映射 + FULLWIDTH_MAP = { + ',': ',', '。': '.', '(': '(', ')': ')', + '【': '[', '】': ']', ';': ';', ':': ':', + '?': '?', '!': '!', '"': '"', '"': '"', + ''': "'", ''': "'", + } + + parts = content.split("```") + # 代码块内容位于索引 1, 3, 5... (奇数位) + for i in range(1, len(parts), 2): + for full, half in FULLWIDTH_MAP.items(): + parts[i] = parts[i].replace(full, half) + + return "```".join(parts) + + def _cleanup_xml_tags(self, content: str) -> str: + """移除无关的 XML 标签""" + return self._PATTERNS['xml_artifacts'].sub("", content) + +class Filter: + class Valves(BaseModel): + priority: int = Field( + default=0, description="Priority level for the filter operations." + ) + + def __init__(self): + # Indicates custom file handling logic. This flag helps disengage default routines in favor of custom + # implementations, informing the WebUI to defer file-related operations to designated methods within this class. + # Alternatively, you can remove the files directly from the body in from the inlet hook + # self.file_handler = True + + # Initialize 'valves' with specific configurations. Using 'Valves' instance helps encapsulate settings, + # which ensures settings are managed cohesively and not confused with operational flags like 'file_handler'. + self.valves = self.Valves() + pass + + def inlet( + self, + body: dict, + __user__: Optional[dict] = None, + __metadata__: Optional[dict] = None, + __model__: Optional[dict] = None, + __event_emitter__=None, + ) -> dict: + # Modify the request body or validate it before processing by the chat completion API. + # This function is the pre-processor for the API where various checks on the input can be performed. + # It can also modify the request before sending it to the API. + messages = body.get("messages", []) + self.insert_user_env_info(__metadata__, messages, __event_emitter__) + # if "测试系统提示词" in str(messages): + # messages.insert(0, {"role": "system", "content": "你是一个大数学家"}) + # print("XXXXX" * 100) + # print(body) + self.change_web_search(body, __user__, __event_emitter__) + body = self.inlet_chat_id(__model__, __metadata__, body) + + return body + + def inlet_chat_id(self, model: dict, metadata: dict, body: dict): + if "openai" in model: + base_model_id = model["openai"]["id"] + + else: + base_model_id = model["info"]["base_model_id"] + + base_model = model["id"] if base_model_id is None else base_model_id + if base_model.startswith("cfchatqwen"): + # pass + body["chat_id"] = metadata["chat_id"] + + if base_model.startswith("webgemini"): + body["chat_id"] = metadata["chat_id"] + if not model["id"].startswith("webgemini"): + body["custom_model_id"] = model["id"] + + # print("我是 body *******************", body) + return body + + def change_web_search(self, body, __user__, __event_emitter__=None): + """ + 优化特定模型的 Web 搜索功能。 + + 功能: + - 检测是否启用了 Web 搜索 + - 为支持搜索的模型启用模型本身的搜索能力 + - 禁用默认的 web_search 开关以避免冲突 + - 当使用模型本身的搜索能力时发送状态提示 + + 参数: + body: 请求体字典 + __user__: 用户信息 + __event_emitter__: 用于发送前端事件的发射器函数 + """ + features = body.get("features", {}) + web_search_enabled = ( + features.get("web_search", False) if isinstance(features, dict) else False + ) + if isinstance(__user__, (list, tuple)): + user_email = __user__[0].get("email", "用户") if __user__[0] else "用户" + elif isinstance(__user__, dict): + user_email = __user__.get("email", "用户") + model_name = body.get("model") + + search_enabled_for_model = False + if web_search_enabled: + if model_name in ["qwen-max-latest", "qwen-max", "qwen-plus-latest"]: + body.setdefault("enable_search", True) + features["web_search"] = False + search_enabled_for_model = True + if "search" in model_name or "搜索" in model_name: + features["web_search"] = False + if model_name.startswith("cfdeepseek-deepseek") and not model_name.endswith( + "search" + ): + body["model"] = body["model"] + "-search" + features["web_search"] = False + search_enabled_for_model = True + if model_name.startswith("cfchatqwen") and not model_name.endswith( + "search" + ): + body["model"] = body["model"] + "-search" + features["web_search"] = False + search_enabled_for_model = True + if model_name.startswith("gemini-2.5") and "search" not in model_name: + body["model"] = body["model"] + "-search" + features["web_search"] = False + search_enabled_for_model = True + if user_email == "yi204o@qq.com": + features["web_search"] = False + + # 如果启用了模型本身的搜索能力,发送状态提示 + if search_enabled_for_model and __event_emitter__: + import asyncio + + try: + asyncio.create_task( + self._emit_search_status(__event_emitter__, model_name) + ) + except RuntimeError: + pass + + def insert_user_env_info( + self, __metadata__, messages, __event_emitter__=None, model_match_tags=None + ): + """ + 在第一条用户消息中注入环境变量信息。 + + 功能特性: + - 始终在用户消息内容前注入环境变量的 Markdown 说明 + - 支持多种消息类型:纯文本、图片、图文混合消息 + - 幂等性设计:若环境变量信息已存在则更新为最新数据,不会重复添加 + - 注入成功后通过事件发射器向前端发送"注入成功"的状态提示 + + 参数: + __metadata__: 包含环境变量的元数据字典 + messages: 消息列表 + __event_emitter__: 用于发送前端事件的发射器函数 + model_match_tags: 模型匹配标签(保留参数,当前未使用) + """ + variables = __metadata__.get("variables", {}) + if not messages or messages[0]["role"] != "user": + return + + env_injected = False + if variables: + # 构建环境变量的Markdown文本 + variable_markdown = ( + "## 用户环境变量\n" + "以下信息为用户的环境变量,可用于为用户提供更个性化的服务或满足特定需求时作为参考:\n" + f"- **用户姓名**:{variables.get('{{USER_NAME}}', '')}\n" + f"- **当前日期时间**:{variables.get('{{CURRENT_DATETIME}}', '')}\n" + f"- **当前星期**:{variables.get('{{CURRENT_WEEKDAY}}', '')}\n" + f"- **当前时区**:{variables.get('{{CURRENT_TIMEZONE}}', '')}\n" + f"- **用户语言**:{variables.get('{{USER_LANGUAGE}}', '')}\n" + ) + + content = messages[0]["content"] + # 环境变量部分的匹配模式 + env_var_pattern = r"(## 用户环境变量\n以下信息为用户的环境变量,可用于为用户提供更个性化的服务或满足特定需求时作为参考:\n.*?用户语言.*?\n)" + # 处理不同内容类型 + if isinstance(content, list): # 多模态内容(可能包含图片和文本) + # 查找第一个文本类型的内容 + text_index = -1 + for i, part in enumerate(content): + if isinstance(part, dict) and part.get("type") == "text": + text_index = i + break + + if text_index >= 0: + # 存在文本内容,检查是否已存在环境变量信息 + text_part = content[text_index] + text_content = text_part.get("text", "") + + if re.search(env_var_pattern, text_content, flags=re.DOTALL): + # 已存在环境变量信息,更新为最新数据 + text_part["text"] = re.sub( + env_var_pattern, + variable_markdown, + text_content, + flags=re.DOTALL, + ) + else: + # 不存在环境变量信息,添加到开头 + text_part["text"] = f"{variable_markdown}\n{text_content}" + + content[text_index] = text_part + else: + # 没有文本内容(例如只有图片),添加新的文本项 + content.insert( + 0, {"type": "text", "text": f"{variable_markdown}\n"} + ) + + messages[0]["content"] = content + + elif isinstance(content, str): # 纯文本内容 + # 检查是否已存在环境变量信息 + if re.search(env_var_pattern, content, flags=re.DOTALL): + # 已存在,更新为最新数据 + messages[0]["content"] = re.sub( + env_var_pattern, variable_markdown, content, flags=re.DOTALL + ) + else: + # 不存在,添加到开头 + messages[0]["content"] = f"{variable_markdown}\n{content}" + env_injected = True + + else: # 其他类型内容 + # 转换为字符串并处理 + str_content = str(content) + # 检查是否已存在环境变量信息 + if re.search(env_var_pattern, str_content, flags=re.DOTALL): + # 已存在,更新为最新数据 + messages[0]["content"] = re.sub( + env_var_pattern, variable_markdown, str_content, flags=re.DOTALL + ) + else: + # 不存在,添加到开头 + messages[0]["content"] = f"{variable_markdown}\n{str_content}" + env_injected = True + + # 环境变量注入成功后,发送状态提示给用户 + if env_injected and __event_emitter__: + import asyncio + + try: + # 如果在异步环境中,使用 await + asyncio.create_task(self._emit_env_status(__event_emitter__)) + except RuntimeError: + # 如果不在异步环境中,直接调用 + pass + + async def _emit_env_status(self, __event_emitter__): + """ + 发送环境变量注入成功的状态提示给前端用户 + """ + try: + await __event_emitter__( + { + "type": "status", + "data": { + "description": "✓ 用户环境变量已注入成功", + "done": True, + }, + } + ) + except Exception as e: + print(f"发送状态提示时出错: {e}") + + async def _emit_search_status(self, __event_emitter__, model_name): + """ + 发送模型搜索功能启用的状态提示给前端用户 + """ + try: + await __event_emitter__( + { + "type": "status", + "data": { + "description": f"🔍 已为 {model_name} 启用搜索能力", + "done": True, + }, + } + ) + except Exception as e: + print(f"发送搜索状态提示时出错: {e}") + + async def _emit_normalization_status(self, __event_emitter__, applied_fixes: List[str] = None): + """ + 发送内容规范化完成的状态提示 + """ + description = "✓ 内容已自动规范化" + if applied_fixes: + description += f":{', '.join(applied_fixes)}" + + try: + await __event_emitter__( + { + "type": "status", + "data": { + "description": description, + "done": True, + }, + } + ) + except Exception as e: + print(f"发送规范化状态提示时出错: {e}") + + def _contains_html(self, content: str) -> bool: + """ + 检测内容是否包含 HTML 标签 + """ + # 匹配常见的 HTML 标签 + pattern = r"<\s*/?\s*(?:html|head|body|div|span|p|br|hr|ul|ol|li|table|thead|tbody|tfoot|tr|td|th|img|a|b|i|strong|em|code|pre|blockquote|h[1-6]|script|style|form|input|button|label|select|option|iframe|link|meta|title)\b" + return bool(re.search(pattern, content, re.IGNORECASE)) + + def outlet(self, body: dict, __user__: Optional[dict] = None, __event_emitter__=None) -> dict: + """ + 处理传出响应体,通过修改最后一条助手消息的内容。 + 使用 ContentNormalizer 进行全面的内容规范化。 + """ + if "messages" in body and body["messages"]: + last = body["messages"][-1] + content = last.get("content", "") or "" + + if last.get("role") == "assistant" and isinstance(content, str): + # 如果包含 HTML,跳过规范化,为了防止错误格式化 + if self._contains_html(content): + return body + + # 初始化规范化器 + normalizer = ContentNormalizer() + + # 执行规范化 + new_content = normalizer.normalize(content) + + # 更新内容 + if new_content != content: + last["content"] = new_content + # 如果内容发生了改变,发送状态提示 + if __event_emitter__: + import asyncio + try: + # 传入 applied_fixes + asyncio.create_task(self._emit_normalization_status(__event_emitter__, normalizer.applied_fixes)) + except RuntimeError: + # 假如不在循环中,则忽略 + pass + + return body diff --git a/plugins/filters/gemini_manifold_companion/gemini_manifold_companion.py b/plugins/filters/gemini_manifold_companion/gemini_manifold_companion.py new file mode 100644 index 0000000..1a3ec58 --- /dev/null +++ b/plugins/filters/gemini_manifold_companion/gemini_manifold_companion.py @@ -0,0 +1,1102 @@ +""" +title: Gemini Manifold Companion +id: gemini_manifold_companion +description: A companion filter for "Gemini Manifold google_genai" pipe providing enhanced functionality. +author: suurt8ll +author_url: https://github.com/suurt8ll +funding_url: https://github.com/suurt8ll/open_webui_functions +license: MIT +version: 1.7.0 +""" + +VERSION = "1.7.0" + +# This filter can detect that a feature like web search or code execution is enabled in the front-end, +# set the feature back to False so Open WebUI does not run it's own logic and then +# pass custom values to "Gemini Manifold google_genai" that signal which feature was enabled and intercepted. + +import copy +import json +from google.genai import types + +import sys +import time +import asyncio +import aiohttp +from fastapi import Request +from fastapi.datastructures import State +from loguru import logger +from pydantic import BaseModel, Field +import pydantic_core +from collections.abc import Awaitable, Callable +from typing import Any, Literal, TYPE_CHECKING, cast + +from open_webui.models.functions import Functions + +if TYPE_CHECKING: + from loguru import Record + from loguru._handler import Handler # type: ignore + from open_webui.utils.manifold_types import * # My personal types in a separate file for more robustness. + +# According to https://ai.google.dev/gemini-api/docs/models +ALLOWED_GROUNDING_MODELS = { + "gemini-2.5-pro", + "gemini-flash-latest", + "gemini-2.5-flash-preview-09-2025", + "gemini-2.5-flash", + "gemini-flash-lite-latest", + "gemini-2.5-flash-lite-preview-09-2025", + "gemini-2.5-flash-lite", + "gemini-2.5-flash-lite-preview-06-17", + "gemini-2.5-pro-preview-06-05", + "gemini-2.5-flash-preview-05-20", + "gemini-2.5-pro-preview-05-06", + "gemini-2.5-flash-preview-04-17", + "gemini-2.5-pro-preview-03-25", + "gemini-2.5-pro-exp-03-25", + "gemini-2.0-pro-exp", + "gemini-2.0-pro-exp-02-05", + "gemini-exp-1206", + "gemini-2.0-flash", + "gemini-2.0-flash-exp", + "gemini-2.0-flash-001", + "gemini-1.5-pro", + "gemini-1.5-flash", + "gemini-1.0-pro", +} +ALLOWED_CODE_EXECUTION_MODELS = { + "gemini-2.5-pro", + "gemini-flash-latest", + "gemini-2.5-flash-preview-09-2025", + "gemini-2.5-flash", + "gemini-flash-lite-latest", + "gemini-2.5-flash-lite-preview-09-2025", + "gemini-2.5-flash-lite", + "gemini-2.5-flash-lite-preview-06-17", + "gemini-2.5-pro-preview-06-05", + "gemini-2.5-flash-preview-05-20", + "gemini-2.5-pro-preview-05-06", + "gemini-2.5-flash-preview-04-17", + "gemini-2.5-pro-preview-03-25", + "gemini-2.5-pro-exp-03-25", + "gemini-2.0-pro-exp", + "gemini-2.0-pro-exp-02-05", + "gemini-exp-1206", + "gemini-2.0-flash-thinking-exp-01-21", + "gemini-2.0-flash", + "gemini-2.0-flash-exp", + "gemini-2.0-flash-001", +} + +# Default timeout for URL resolution +# TODO: Move to Pipe.Valves. +DEFAULT_URL_TIMEOUT = aiohttp.ClientTimeout(total=10) # 10 seconds total timeout + +# Setting auditable=False avoids duplicate output for log levels that would be printed out by the main log. +log = logger.bind(auditable=False) + + +class Filter: + + class Valves(BaseModel): + FORCE_NON_STREAM_FOR_IMAGE_MODELS: bool = Field( + default=True, + description="""Automatically disable streaming for image generation models + (e.g., gemini-2.5-flash-image-preview) to prevent 'Chunk too big' errors. + Set to False to attempt streaming with these models.""", + ) + SET_TEMP_TO_ZERO: bool = Field( + default=False, + description="""Decide if you want to set the temperature to 0 for grounded answers, + Google reccomends it in their docs.""", + ) + GROUNDING_DYNAMIC_RETRIEVAL_THRESHOLD: float | None = Field( + default=None, + description="""See https://ai.google.dev/gemini-api/docs/grounding?lang=python#dynamic-threshold for more information. + Only supported for 1.0 and 1.5 models""", + ) + USE_PERMISSIVE_SAFETY: bool = Field( + default=False, + description="""Whether to request relaxed safety filtering. + Default value is False.""", + ) + BYPASS_BACKEND_RAG: bool = Field( + default=True, + description="""Decide if you want ot bypass Open WebUI's RAG and send your documents directly to Google API. + Default value is True.""", + ) + LOG_LEVEL: Literal[ + "TRACE", "DEBUG", "INFO", "SUCCESS", "WARNING", "ERROR", "CRITICAL" + ] = Field( + default="INFO", + description="Select logging level. Use `docker logs -f open-webui` to view logs.", + ) + + # TODO: Support user settting through UserValves. + + def __init__(self): + # This hack makes the valves values available to the `__init__` method. + # TODO: Get the id from the frontmatter instead of hardcoding it. + valves = Functions.get_function_valves_by_id("gemini_manifold_companion") + self.valves = self.Valves(**(valves if valves else {})) + self.log_level = self.valves.LOG_LEVEL + self._add_log_handler() + log.success("Function has been initialized.") + log.trace("Full self object:", payload=self.__dict__) + + def inlet(self, body: "Body", __metadata__: dict[str, Any]) -> "Body": + """Modifies the incoming request payload before it's sent to the LLM. Operates on the `form_data` dictionary.""" + + # Detect log level change inside self.valves + if self.log_level != self.valves.LOG_LEVEL: + log.info( + f"Detected log level change: {self.log_level=} and {self.valves.LOG_LEVEL=}. " + "Running the logging setup again." + ) + self._add_log_handler() + + log.debug( + f"inlet method has been called. Gemini Manifold Companion version is {VERSION}" + ) + + canonical_model_name, is_manifold = self._get_model_name(body) + # Exit early if we are filtering an unsupported model. + if not is_manifold: + log.debug( + "Returning the original body object because conditions for proceeding are not fulfilled." + ) + return body + + # Check if it's a relevant model (supports either feature) + is_grounding_model = canonical_model_name in ALLOWED_GROUNDING_MODELS + is_code_exec_model = canonical_model_name in ALLOWED_CODE_EXECUTION_MODELS + log.debug(f"{is_grounding_model=}, {is_code_exec_model=}") + + features = body.get("features", {}) + log.debug(f"body.features:", payload=features) + + # Ensure features field exists + metadata = body.get("metadata") + metadata_features = metadata.get("features") + if metadata_features is None: + metadata_features = cast(Features, {}) + metadata["features"] = metadata_features + + # Add the companion version to the payload for the pipe to consume. + metadata_features["gemini_manifold_companion_version"] = VERSION + + if is_grounding_model: + web_search_enabled = ( + features.get("web_search", False) + if isinstance(features, dict) + else False + ) + if web_search_enabled: + log.info( + "Search feature is enabled, disabling it and adding custom feature called grounding_w_google_search." + ) + # Disable web_search + features["web_search"] = False + # Use "Google Search Retrieval" for 1.0 and 1.5 models and "Google Search as a Tool for >=2.0 models". + if "1.0" in canonical_model_name or "1.5" in canonical_model_name: + metadata_features["google_search_retrieval"] = True + metadata_features["google_search_retrieval_threshold"] = ( + self.valves.GROUNDING_DYNAMIC_RETRIEVAL_THRESHOLD + ) + else: + metadata_features["google_search_tool"] = True + # Google suggest setting temperature to 0 if using grounding: + # https://cloud.google.com/vertex-ai/generative-ai/docs/multimodal/ground-with-google-search#:~:text=For%20ideal%20results%2C%20use%20a%20temperature%20of%200.0. + if self.valves.SET_TEMP_TO_ZERO: + log.info("Setting temperature to 0.") + body["temperature"] = 0 # type: ignore + if is_code_exec_model: + code_execution_enabled = ( + features.get("code_interpreter", False) + if isinstance(features, dict) + else False + ) + if code_execution_enabled: + log.info( + "Code interpreter feature is enabled, disabling it and adding custom feature called google_code_execution." + ) + # Disable code_interpreter + features["code_interpreter"] = False + metadata_features["google_code_execution"] = True + if self.valves.USE_PERMISSIVE_SAFETY: + log.info("Adding permissive safety settings to body.metadata") + metadata["safety_settings"] = self._get_permissive_safety_settings( + canonical_model_name + ) + if self.valves.BYPASS_BACKEND_RAG: + if __metadata__["chat_id"] == "local": + # TODO toast notification + log.warning( + "Bypassing Open WebUI's RAG is not possible for temporary chats. " + "The Manifold pipe requires a database entry to access uploaded files, " + "which temporary chats do not have. Falling back to Open WebUI's RAG." + ) + metadata_features["upload_documents"] = False + else: + log.info( + "BYPASS_BACKEND_RAG is enabled, bypassing Open WebUI RAG to let the Manifold pipe handle documents." + ) + if files := body.get("files"): + log.info( + f"Removing {len(files)} files from the Open WebUI RAG pipeline." + ) + body["files"] = [] + metadata_features["upload_documents"] = True + else: + log.info( + "BYPASS_BACKEND_RAG is disabled. Open WebUI's RAG will be used if applicable." + ) + metadata_features["upload_documents"] = False + + # The manifold pipe requires the backend to be in streaming mode to correctly + # process the AsyncGenerator it returns. We save the user's original + # streaming intent and then force the backend into streaming mode. + + user_stream_intent = body.get("stream", True) + image_generation_models = { + "gemini-2.0-flash-preview-image-generation", + "gemini-2.5-flash-image-preview", + "gemini-2.5-flash-image", + } + + # Check if the current model is an image generation model and if the + # user has enabled the non-streaming override for them. + if ( + self.valves.FORCE_NON_STREAM_FOR_IMAGE_MODELS + and canonical_model_name in image_generation_models + ): + log.info( + f"Image generation model '{canonical_model_name}' detected. " + "Forcing non-streaming mode to prevent potential 'Chunk too big' errors." + ) + # Override the user's intent to ensure stability. + user_stream_intent = False + + log.info( + f"Storing user's stream intent ({user_stream_intent}) into __metadata__. " + "Backend will be forced down the streaming path." + ) + metadata_features["stream"] = user_stream_intent + body["stream"] = True + + # TODO: Filter out the citation markers here. + + log.debug("inlet method has finished.") + return body + + def stream(self, event: dict) -> dict: + """Modifies the streaming response from the LLM in real-time. Operates on individual chunks of data.""" + return event + + async def outlet( + self, + body: "Body", + __request__: Request, + __metadata__: dict[str, Any], + __event_emitter__: Callable[["Event"], Awaitable[None]], + ) -> "Body": + """Modifies the complete response payload after it's received from the LLM. Operates on the final `body` dictionary.""" + + log.debug("outlet method has been called.") + + chat_id: str = __metadata__.get("chat_id", "") + message_id: str = __metadata__.get("message_id", "") + grounding_key = f"grounding_{chat_id}_{message_id}" + time_key = f"pipe_start_time_{chat_id}_{message_id}" + + app_state: State = __request__.app.state + log.debug(f"Checking for attributes for message {message_id} in request state.") + stored_metadata: types.GroundingMetadata | None = getattr( + app_state, grounding_key, None + ) + pipe_start_time: float | None = getattr(app_state, time_key, None) + + if stored_metadata: + log.info("Found grounding metadata, processing citations.") + log.trace("Stored grounding metadata:", payload=stored_metadata) + + current_content = body["messages"][-1]["content"] + if isinstance(current_content, list): + text_to_use = "" + for item in current_content: + if item.get("type") == "text": + item = cast("TextContent", item) + text_to_use = item["text"] + break + else: + text_to_use = current_content + + # Insert citation markers into the response text + cited_text = self._get_text_w_citation_markers( + stored_metadata, + text_to_use, + ) + + if cited_text: + content = body["messages"][-1]["content"] + if isinstance(content, list): + for item in content: + if item.get("type") == "text": + item = cast("TextContent", item) + item["text"] = cited_text + break + else: + body["messages"][-1]["content"] = cited_text + + # Emit sources to the front-end. + gs_supports = stored_metadata.grounding_supports + gs_chunks = stored_metadata.grounding_chunks + if gs_supports and gs_chunks: + await self._resolve_and_emit_sources( + grounding_chunks=gs_chunks, + supports=gs_supports, + event_emitter=__event_emitter__, + pipe_start_time=pipe_start_time, + ) + else: + log.info( + "Grounding metadata missing supports or chunks (checked in outlet); " + "skipping source resolution and emission." + ) + + # Emit status event with search queries + await self._emit_status_event_w_queries(stored_metadata, __event_emitter__) + + # Clean up state + delattr(app_state, grounding_key) + if hasattr(app_state, time_key): + delattr(app_state, time_key) + else: + log.info("No grounding metadata found in request state.") + + log.debug("outlet method has finished.") + return body + + # region 1. Helper methods inside the Filter class + + # region 1.1 Add citations + + def _get_text_w_citation_markers( + self, + grounding_metadata: types.GroundingMetadata, + raw_str: str, + ) -> str | None: + """ + Returns the model response with citation markers. + Thoughts, if present as THOUGHT_START_TAG...THOUGHT_END_TAG at the beginning of raw_str, + are preserved but excluded from the citation indexing process. + Everything up to the *last* THOUGHT_END_TAG tag is considered part of the thought. + """ + + supports = grounding_metadata.grounding_supports + grounding_chunks = grounding_metadata.grounding_chunks + if not supports or not grounding_chunks: + log.info( + "Grounding metadata missing supports or chunks, can't insert citation markers. " + "Response was probably just not grounded." + ) + return None + + log.trace("raw_str:", payload=raw_str, _log_truncation_enabled=False) + + thought_prefix = "" + content_for_citation_processing = raw_str + + THOUGHT_START_TAG = "= len(THOUGHT_START_TAG) - 1 + ): + thought_block_end_offset = last_end_thought_tag_idx + len( + THOUGHT_END_TAG + ) + thought_prefix = raw_str[:thought_block_end_offset] + content_for_citation_processing = raw_str[thought_block_end_offset:] + log.info( + "Model thoughts detected at the beginning of the response. " + "Citations will be processed on the content following the last thought block." + ) + else: + log.warning( + "Detected THOUGHT_START_TAG at the start of raw_str without a subsequent closing THOUGHT_END_TAG " + "or a malformed thought block. The entire raw_str will be processed for citations. " + "This might lead to incorrect marker placement if thoughts were intended and indices " + "are relative to content after thoughts." + ) + + processed_content_part_with_markers = content_for_citation_processing + + if content_for_citation_processing: + try: + modified_content_bytes = bytearray( + content_for_citation_processing.encode("utf-8") + ) + for support in reversed(supports): + segment = support.segment + indices = support.grounding_chunk_indices + if not ( + indices is not None + and segment + and segment.end_index is not None + ): + log.debug(f"Skipping support due to missing data: {support}") + continue + end_pos = segment.end_index + if not (0 <= end_pos <= len(modified_content_bytes)): + log.warning( + f"Support segment end_index ({end_pos}) is out of bounds for the processable content " + f"(length {len(modified_content_bytes)} bytes after potential thought stripping). " + f"Content (first 50 chars): '{content_for_citation_processing[:50]}...'. Skipping this support. Support: {support}" + ) + continue + citation_markers = "".join(f"[{index + 1}]" for index in indices) + encoded_citation_markers = citation_markers.encode("utf-8") + modified_content_bytes[end_pos:end_pos] = encoded_citation_markers + processed_content_part_with_markers = modified_content_bytes.decode( + "utf-8" + ) + except Exception as e: + log.error( + f"Error injecting citation markers into content: {e}. " + f"Using content part (after potential thought stripping) without new markers." + ) + else: + if raw_str and not content_for_citation_processing: + log.info( + "Content for citation processing is empty (e.g., raw_str contained only thoughts). " + "No citation markers will be injected." + ) + elif not raw_str: + log.warning("Raw string is empty, cannot inject citation markers.") + + final_result_str = thought_prefix + processed_content_part_with_markers + return final_result_str + + async def _resolve_url( + self, + session: aiohttp.ClientSession, + url: str, + timeout: aiohttp.ClientTimeout = DEFAULT_URL_TIMEOUT, + max_retries: int = 3, + base_delay: float = 0.5, + ) -> tuple[str, bool]: + """ + Resolves a given URL using the provided aiohttp session, with multiple retries on failure. + Returns the final URL and a boolean indicating success. + """ + if not url: + return "", False + for attempt in range(max_retries + 1): + try: + async with session.get( + url, + allow_redirects=True, + timeout=timeout, + ) as response: + final_url = str(response.url) + log.debug( + f"Resolved URL '{url}' to '{final_url}' after {attempt} retries" + ) + return final_url, True + except (asyncio.TimeoutError, aiohttp.ClientError) as e: + if attempt == max_retries: + log.error( + f"Failed to resolve URL '{url}' after {max_retries + 1} attempts: {e}" + ) + return url, False + else: + delay = min(base_delay * (2**attempt), 10.0) + log.warning( + f"Retry {attempt + 1}/{max_retries + 1} for URL '{url}': {e}. Waiting {delay:.1f}s..." + ) + await asyncio.sleep(delay) + except Exception as e: + log.error(f"Unexpected error resolving URL '{url}': {e}") + return url, False + return url, False + + async def _resolve_and_emit_sources( + self, + grounding_chunks: list[types.GroundingChunk], + supports: list[types.GroundingSupport], + event_emitter: Callable[["Event"], Awaitable[None]], + pipe_start_time: float | None, + ): + """ + Resolves URLs in the background and emits a chat completion event + containing only the source information, along with status updates. + """ + initial_metadatas: list[tuple[int, str]] = [] + for i, g_c in enumerate(grounding_chunks): + uri = None + if (web_info := g_c.web) and web_info.uri: + uri = web_info.uri + elif (maps_info := g_c.maps) and maps_info.uri: + uri = maps_info.uri + + if uri: + initial_metadatas.append((i, uri)) + + if not initial_metadatas: + log.info("No source URIs found, skipping source emission.") + return + + urls_to_resolve = [ + uri + for _, uri in initial_metadatas + if uri.startswith( + "https://vertexaisearch.cloud.google.com/grounding-api-redirect/" + ) + ] + resolved_uris_map = {} + + if urls_to_resolve: + num_urls = len(urls_to_resolve) + self._emit_status_update( + event_emitter, + f"Resolving {num_urls} source URLs...", + pipe_start_time, + ) + + try: + log.info(f"Resolving {num_urls} source URLs...") + async with aiohttp.ClientSession() as session: + tasks = [self._resolve_url(session, url) for url in urls_to_resolve] + results = await asyncio.gather(*tasks) + log.info("URL resolution completed.") + + resolved_uris = [res[0] for res in results] + resolved_uris_map = dict(zip(urls_to_resolve, resolved_uris)) + + success_count = sum(1 for _, success in results if success) + final_status_msg = ( + "URL resolution complete" + if success_count == num_urls + else f"Resolved {success_count}/{num_urls} URLs" + ) + self._emit_status_update( + event_emitter, final_status_msg, pipe_start_time, done=True + ) + + except Exception as e: + log.error(f"Error during URL resolution: {e}") + resolved_uris_map = {url: url for url in urls_to_resolve} + self._emit_status_update( + event_emitter, "URL resolution failed", pipe_start_time, done=True + ) + + source_metadatas_template: list["SourceMetadata"] = [ + {"source": None, "original_url": None, "supports": []} + for _ in grounding_chunks + ] + populated_metadatas = [m.copy() for m in source_metadatas_template] + + for chunk_index, original_uri in initial_metadatas: + final_uri = resolved_uris_map.get(original_uri, original_uri) + if 0 <= chunk_index < len(populated_metadatas): + populated_metadatas[chunk_index]["original_url"] = original_uri + populated_metadatas[chunk_index]["source"] = final_uri + else: + log.warning( + f"Chunk index {chunk_index} out of bounds when populating resolved URLs." + ) + + # Create a mapping from each chunk index to the text segments it supports. + chunk_index_to_segments: dict[int, list[types.Segment]] = {} + for support in supports: + segment = support.segment + indices = support.grounding_chunk_indices + if not (segment and segment.text and indices is not None): + continue + + for index in indices: + if index not in chunk_index_to_segments: + chunk_index_to_segments[index] = [] + chunk_index_to_segments[index].append(segment) + populated_metadatas[index]["supports"].append(support.model_dump()) # type: ignore + + valid_source_metadatas: list["SourceMetadata"] = [] + doc_list: list[str] = [] + + for i, meta in enumerate(populated_metadatas): + if meta.get("original_url") is not None: + valid_source_metadatas.append(meta) + + content_parts: list[str] = [] + chunk = grounding_chunks[i] + + if maps_info := chunk.maps: + title = maps_info.title or "N/A" + place_id = maps_info.place_id or "N/A" + content_parts.append(f"Title: {title}\nPlace ID: {place_id}") + + supported_segments = chunk_index_to_segments.get(i) + if supported_segments: + if content_parts: + content_parts.append("") # Add a blank line for separation + + # Use a set to show each unique snippet only once per source. + unique_snippets = { + (seg.text, seg.start_index, seg.end_index) + for seg in supported_segments + if seg.text is not None + } + + # Sort snippets by their appearance in the text. + sorted_snippets = sorted(unique_snippets, key=lambda s: s[1] or 0) + + snippet_strs = [ + f'- "{text}" (Indices: {start}-{end})' + for text, start, end in sorted_snippets + ] + content_parts.append("Supported text snippets:") + content_parts.extend(snippet_strs) + + doc_list.append("\n".join(content_parts)) + + sources_list: list["Source"] = [] + if valid_source_metadatas: + sources_list.append( + { + "source": {"name": "web_search"}, + "document": doc_list, + "metadata": valid_source_metadatas, + } + ) + + event: "ChatCompletionEvent" = { + "type": "chat:completion", + "data": {"sources": sources_list}, + } + await event_emitter(event) + log.info("Emitted sources event.") + log.trace("ChatCompletionEvent:", payload=event) + + async def _emit_status_event_w_queries( + self, + grounding_metadata: types.GroundingMetadata, + event_emitter: Callable[["Event"], Awaitable[None]], + ) -> None: + """ + Creates a StatusEvent with search URLs based on the web_search_queries + in the GroundingMetadata. This covers both Google Search and Google Maps grounding. + """ + if not grounding_metadata.web_search_queries: + log.debug("Grounding metadata does not contain any search queries.") + return + + search_queries = grounding_metadata.web_search_queries + if not search_queries: + log.debug("web_search_queries list is empty.") + return + + # The queries are used for grounding, so we link them to a general Google search page. + google_search_urls = [ + f"https://www.google.com/search?q={query}" for query in search_queries + ] + + status_event_data: StatusEventData = { + "action": "web_search", + "description": "This response was grounded with a Google tool", + "urls": google_search_urls, + } + status_event: StatusEvent = { + "type": "status", + "data": status_event_data, + } + await event_emitter(status_event) + log.info("Emitted grounding queries.") + log.trace("StatusEvent:", payload=status_event) + + # endregion 1.1 Add citations + + # region 1.2 Remove citation markers + # TODO: Remove citation markers from model input. + # endregion 1.2 Remove citation markers + + # region 1.3 Get permissive safety settings + + def _get_permissive_safety_settings( + self, model_name: str + ) -> list[types.SafetySetting]: + """Get safety settings based on model name and permissive setting.""" + + # Settings supported by most models + category_threshold_map = { + types.HarmCategory.HARM_CATEGORY_HARASSMENT: types.HarmBlockThreshold.OFF, + types.HarmCategory.HARM_CATEGORY_HATE_SPEECH: types.HarmBlockThreshold.OFF, + types.HarmCategory.HARM_CATEGORY_SEXUALLY_EXPLICIT: types.HarmBlockThreshold.OFF, + types.HarmCategory.HARM_CATEGORY_DANGEROUS_CONTENT: types.HarmBlockThreshold.OFF, + types.HarmCategory.HARM_CATEGORY_CIVIC_INTEGRITY: types.HarmBlockThreshold.BLOCK_NONE, + } + + # Older models use BLOCK_NONE + if model_name in [ + "gemini-1.5-pro-001", + "gemini-1.5-flash-001", + "gemini-1.5-flash-8b-exp-0827", + "gemini-1.5-flash-8b-exp-0924", + "gemini-pro", + "gemini-1.0-pro", + "gemini-1.0-pro-001", + ]: + for category in category_threshold_map: + category_threshold_map[category] = types.HarmBlockThreshold.BLOCK_NONE + + # Gemini 2.0 Flash supports CIVIC_INTEGRITY OFF + if model_name in [ + "gemini-2.0-flash", + "gemini-2.0-flash-001", + "gemini-2.0-flash-exp", + ]: + category_threshold_map[types.HarmCategory.HARM_CATEGORY_CIVIC_INTEGRITY] = ( + types.HarmBlockThreshold.OFF + ) + + log.debug( + f"Safety settings: {str({k.value: v.value for k, v in category_threshold_map.items()})}" + ) + + safety_settings = [ + types.SafetySetting(category=category, threshold=threshold) + for category, threshold in category_threshold_map.items() + ] + return safety_settings + + # endregion 1.3 Get permissive safety settings + + # region 1.4 Utility helpers + + def _emit_status_update( + self, + event_emitter: Callable[["Event"], Awaitable[None]], + description: str, + pipe_start_time: float | None, + *, + done: bool = False, + ): + """Constructs and emits a status event in a non-blocking task.""" + + async def emit_task(): + time_str = ( + f" (+{(time.monotonic() - pipe_start_time):.2f}s)" + if pipe_start_time is not None + else "" + ) + full_description = f"{description}{time_str}" + + status_event: "StatusEvent" = { + "type": "status", + "data": {"description": full_description, "done": done}, + } + + try: + await event_emitter(status_event) + log.debug(f"Emitted status:", payload=status_event) + except Exception: + log.exception("Error emitting status.") + + # Fire-and-forget the emission task. + asyncio.create_task(emit_task()) + + def _get_first_candidate( + self, candidates: list[types.Candidate] | None + ) -> types.Candidate | None: + """Selects the first candidate, logging a warning if multiple exist.""" + if not candidates: + log.warning("Received chunk with no candidates, skipping processing.") + return None + if len(candidates) > 1: + log.warning("Multiple candidates found, defaulting to first candidate.") + return candidates[0] + + def _get_model_name(self, body: "Body") -> tuple[str, bool]: + """ + Extracts the effective and canonical model name from the request body. + + Handles standard model names and custom workspace models by prioritizing + the base_model_id found in metadata. + + Args: + body: The request body dictionary. + + Returns: + A tuple containing: + - The canonical model name (prefix removed). + - A boolean indicating if the effective model name contained the + 'gemini_manifold_google_genai.' prefix. + """ + # 1. Get the initially requested model name from the top level + effective_model_name: str = body.get("model", "") + initial_model_name = effective_model_name + base_model_name = None + + # 2. Check for a base model ID in the metadata for custom models + # If metadata exists, attempt to extract the base_model_id + if metadata := body.get("metadata"): + # Safely navigate the nested structure: metadata -> model -> info -> base_model_id + base_model_name = ( + metadata.get("model", {}).get("info", {}).get("base_model_id", None) + ) + # If a base model ID is found, it overrides the initially requested name + if base_model_name: + effective_model_name = base_model_name + + # 3. Determine if the effective model name contains the manifold prefix. + # This flag indicates if the model (after considering base_model_id) + # appears to be one defined or routed via the manifold pipe function. + is_manifold_model = "gemini_manifold_google_genai." in effective_model_name + + # 4. Create the canonical model name by removing the manifold prefix + # from the effective model name. + canonical_model_name = effective_model_name.replace( + "gemini_manifold_google_genai.", "" + ) + + # 5. Log the relevant names for debugging purposes + log.debug( + f"Model Name Extraction: initial='{initial_model_name}', " + f"base='{base_model_name}', effective='{effective_model_name}', " + f"canonical='{canonical_model_name}', is_manifold={is_manifold_model}" + ) + + # 6. Return the canonical name and the manifold flag + return canonical_model_name, is_manifold_model + + def _is_flat_dict(self, data: Any) -> bool: + """ + Checks if a dictionary contains only non-dict/non-list values (is one level deep). + """ + if not isinstance(data, dict): + return False + return not any(isinstance(value, (dict, list)) for value in data.values()) + + def _truncate_long_strings( + self, data: Any, max_len: int, truncation_marker: str, truncation_enabled: bool + ) -> Any: + """ + Recursively traverses a data structure (dicts, lists) and truncates + long string values. Creates copies to avoid modifying original data. + + Args: + data: The data structure (dict, list, str, int, float, bool, None) to process. + max_len: The maximum allowed length for string values. + truncation_marker: The string to append to truncated values. + truncation_enabled: Whether truncation is enabled. + + Returns: + A potentially new data structure with long strings truncated. + """ + if not truncation_enabled or max_len <= len(truncation_marker): + # If truncation is disabled or max_len is too small, return original + # Make a copy only if it's a mutable type we might otherwise modify + if isinstance(data, (dict, list)): + return copy.deepcopy(data) # Ensure deep copy for nested structures + return data # Primitives are immutable + + if isinstance(data, str): + if len(data) > max_len: + return data[: max_len - len(truncation_marker)] + truncation_marker + return data # Return original string if not truncated + elif isinstance(data, dict): + # Process dictionary items, creating a new dict + return { + k: self._truncate_long_strings( + v, max_len, truncation_marker, truncation_enabled + ) + for k, v in data.items() + } + elif isinstance(data, list): + # Process list items, creating a new list + return [ + self._truncate_long_strings( + item, max_len, truncation_marker, truncation_enabled + ) + for item in data + ] + else: + # Return non-string, non-container types as is (they are immutable) + return data + + def plugin_stdout_format(self, record: "Record") -> str: + """ + Custom format function for the plugin's logs. + Serializes and truncates data passed under the 'payload' key in extra. + """ + + # Configuration Keys + LOG_OPTIONS_PREFIX = "_log_" + TRUNCATION_ENABLED_KEY = f"{LOG_OPTIONS_PREFIX}truncation_enabled" + MAX_LENGTH_KEY = f"{LOG_OPTIONS_PREFIX}max_length" + TRUNCATION_MARKER_KEY = f"{LOG_OPTIONS_PREFIX}truncation_marker" + DATA_KEY = "payload" + + original_extra = record["extra"] + # Extract the data intended for serialization using the chosen key + data_to_process = original_extra.get(DATA_KEY) + + serialized_data_json = "" + if data_to_process is not None: + try: + serializable_data = pydantic_core.to_jsonable_python( + data_to_process, serialize_unknown=True + ) + + # Determine truncation settings + truncation_enabled = original_extra.get(TRUNCATION_ENABLED_KEY, True) + max_length = original_extra.get(MAX_LENGTH_KEY, 256) + truncation_marker = original_extra.get(TRUNCATION_MARKER_KEY, "[...]") + + # If max_length was explicitly provided, force truncation enabled + if MAX_LENGTH_KEY in original_extra: + truncation_enabled = True + + # Truncate long strings + truncated_data = self._truncate_long_strings( + serializable_data, + max_length, + truncation_marker, + truncation_enabled, + ) + + # Serialize the (potentially truncated) data + if self._is_flat_dict(truncated_data) and not isinstance( + truncated_data, list + ): + json_string = json.dumps( + truncated_data, separators=(",", ":"), default=str + ) + # Add a simple prefix if it's compact + serialized_data_json = " - " + json_string + else: + json_string = json.dumps(truncated_data, indent=2, default=str) + # Prepend with newline for readability + serialized_data_json = "\n" + json_string + + except (TypeError, ValueError) as e: # Catch specific serialization errors + serialized_data_json = f" - {{Serialization Error: {e}}}" + except ( + Exception + ) as e: # Catch any other unexpected errors during processing + serialized_data_json = f" - {{Processing Error: {e}}}" + + # Add the final JSON string (or error message) back into the record + record["extra"]["_plugin_serialized_data"] = serialized_data_json + + # Base template + base_template = ( + "{time:YYYY-MM-DD HH:mm:ss.SSS} | " + "{level: <8} | " + "{name}:{function}:{line} - " + "{message}" + ) + + # Append the serialized data + base_template += "{extra[_plugin_serialized_data]}" + # Append the exception part + base_template += "\n{exception}" + # Return the format string template + return base_template.rstrip() + + def _add_log_handler(self): + """ + Adds or updates the loguru handler specifically for this plugin. + Includes logic for serializing and truncating extra data. + """ + + def plugin_filter(record: "Record"): + """Filter function to only allow logs from this plugin (based on module name).""" + return record["name"] == __name__ + + # Get the desired level name and number + desired_level_name = self.valves.LOG_LEVEL + try: + # Use the public API to get level details + desired_level_info = log.level(desired_level_name) + desired_level_no = desired_level_info.no + except ValueError: + log.error( + f"Invalid LOG_LEVEL '{desired_level_name}' configured for plugin {__name__}. Cannot add/update handler." + ) + return # Stop processing if the level is invalid + + # Access the internal state of the log + handlers: dict[int, "Handler"] = log._core.handlers # type: ignore + handler_id_to_remove = None + found_correct_handler = False + + for handler_id, handler in handlers.items(): + existing_filter = handler._filter # Access internal attribute + + # Check if the filter matches our plugin_filter + # Comparing function objects directly can be fragile if they are recreated. + # Comparing by name and module is more robust for functions defined at module level. + is_our_filter = ( + existing_filter is not None # Make sure a filter is set + and hasattr(existing_filter, "__name__") + and existing_filter.__name__ == plugin_filter.__name__ + and hasattr(existing_filter, "__module__") + and existing_filter.__module__ == plugin_filter.__module__ + ) + + if is_our_filter: + existing_level_no = handler.levelno + log.trace( + f"Found existing handler {handler_id} for {__name__} with level number {existing_level_no}." + ) + + # Check if the level matches the desired level + if existing_level_no == desired_level_no: + log.debug( + f"Handler {handler_id} for {__name__} already exists with the correct level '{desired_level_name}'." + ) + found_correct_handler = True + break # Found the correct handler, no action needed + else: + # Found our handler, but the level is wrong. Mark for removal. + log.info( + f"Handler {handler_id} for {__name__} found, but log level differs " + f"(existing: {existing_level_no}, desired: {desired_level_no}). " + f"Removing it to update." + ) + handler_id_to_remove = handler_id + break # Found the handler to replace, stop searching + + # Remove the old handler if marked for removal + if handler_id_to_remove is not None: + try: + log.remove(handler_id_to_remove) + log.debug(f"Removed handler {handler_id_to_remove} for {__name__}.") + except ValueError: + # This might happen if the handler was somehow removed between the check and now + log.warning( + f"Could not remove handler {handler_id_to_remove} for {__name__}. It might have already been removed." + ) + # If removal failed but we intended to remove, we should still proceed to add + # unless found_correct_handler is somehow True (which it shouldn't be if handler_id_to_remove was set). + + # Add a new handler if no correct one was found OR if we just removed an incorrect one + if not found_correct_handler: + self.log_level = desired_level_name + log.add( + sys.stdout, + level=desired_level_name, + format=self.plugin_stdout_format, + filter=plugin_filter, + ) + log.debug( + f"Added new handler to loguru for {__name__} with level {desired_level_name}." + ) + + # endregion 1.4 Utility helpers + + # endregion 1. Helper methods inside the Filter class diff --git a/plugins/filters/multi_model_context_merger.py b/plugins/filters/multi_model_context_merger.py new file mode 100644 index 0000000..7055350 --- /dev/null +++ b/plugins/filters/multi_model_context_merger.py @@ -0,0 +1,212 @@ +import asyncio +from typing import List, Optional, Dict +from pydantic import BaseModel, Field +from fastapi import Request + +from open_webui.models.chats import Chats + + +class Filter: + class Valves(BaseModel): + # 注入的系统消息的前缀 + CONTEXT_PREFIX: str = Field( + default="下面是多个匿名AI模型给出的回答,使用标签包裹:\n\n", + description="Prefix for the injected system message containing the raw merged context.", + ) + + def __init__(self): + self.valves = self.Valves() + self.toggle = True + self.type = "filter" + self.name = "合并回答" + self.description = "在用户提问时,自动注入之前多个模型回答的上下文。" + + async def inlet( + self, + body: Dict, + __user__: Dict, + __metadata__: Dict, + __request__: Request, + __event_emitter__, + ): + """ + 此方法是过滤器的入口点。它会检查上一回合是否为多模型响应, + 如果是,则将这些响应直接格式化,并将格式化后的上下文作为系统消息注入到当前请求中。 + """ + print(f"*********** Filter '{self.name}' triggered ***********") + chat_id = __metadata__.get("chat_id") + if not chat_id: + print( + f"DEBUG: Filter '{self.name}' skipped: chat_id not found in metadata." + ) + return body + + print(f"DEBUG: Chat ID found: {chat_id}") + + # 1. 从数据库获取完整的聊天历史 + try: + chat = await asyncio.to_thread(Chats.get_chat_by_id, chat_id) + + if ( + not chat + or not hasattr(chat, "chat") + or not chat.chat.get("history") + or not chat.chat.get("history").get("messages") + ): + print( + f"DEBUG: Filter '{self.name}' skipped: Chat history not found or empty for chat_id: {chat_id}" + ) + return body + + messages_map = chat.chat["history"]["messages"] + print( + f"DEBUG: Successfully loaded {len(messages_map)} messages from history." + ) + + # Count the number of user messages in the history + user_message_count = sum( + 1 for msg in messages_map.values() if msg.get("role") == "user" + ) + + # If there are less than 2 user messages, there's no previous turn to merge. + if user_message_count < 2: + print( + f"DEBUG: Filter '{self.name}' skipped: Not enough user messages in history to have a previous turn (found {user_message_count}, required >= 2)." + ) + return body + + except Exception as e: + print( + f"ERROR: Filter '{self.name}' failed to get chat history from DB: {e}" + ) + return body + + # This filter rebuilds the entire chat history to consolidate all multi-response turns. + + # 1. Get all messages from history and sort by timestamp + all_messages = list(messages_map.values()) + all_messages.sort(key=lambda x: x.get("timestamp", 0)) + + # 2. Pre-group all assistant messages by their parentId for efficient lookup + assistant_groups = {} + for msg in all_messages: + if msg.get("role") == "assistant": + parent_id = msg.get("parentId") + if parent_id: + if parent_id not in assistant_groups: + assistant_groups[parent_id] = [] + assistant_groups[parent_id].append(msg) + + final_messages = [] + processed_parent_ids = set() + + # 3. Iterate through the sorted historical messages to build the final, clean list + for msg in all_messages: + msg_id = msg.get("id") + role = msg.get("role") + parent_id = msg.get("parentId") + + if role == "user": + # Add user messages directly + final_messages.append(msg) + + elif role == "assistant": + # If this assistant's parent group has already been processed, skip it + if parent_id in processed_parent_ids: + continue + + # Process the group of siblings for this parent_id + if parent_id in assistant_groups: + siblings = assistant_groups[parent_id] + + # Only perform a merge if there are multiple siblings + if len(siblings) > 1: + print( + f"DEBUG: Found a group of {len(siblings)} siblings for parent_id {parent_id}. Merging..." + ) + + # --- MERGE LOGIC --- + merged_content = None + merged_message_id = None + # Sort siblings by timestamp before processing + siblings.sort(key=lambda s: s.get("timestamp", 0)) + merged_message_timestamp = siblings[0].get("timestamp", 0) + + # Case A: Check for system pre-merged content (merged.status: true and content not empty) + merged_content_msg = next( + ( + s + for s in siblings + if s.get("merged", {}).get("status") + and s.get("merged", {}).get("content") + ), + None, + ) + + if merged_content_msg: + merged_content = merged_content_msg["merged"]["content"] + merged_message_id = merged_content_msg["id"] + merged_message_timestamp = merged_content_msg.get( + "timestamp", merged_message_timestamp + ) + print( + f"DEBUG: Using pre-merged content from message ID: {merged_message_id}" + ) + else: + # Case B: Manually merge content + combined_content = [] + first_sibling_id = None + counter = 0 + + for s in siblings: + if not first_sibling_id: + first_sibling_id = s["id"] + + content = s.get("content", "") + if ( + content + and content + != "The requested model is not supported." + ): + response_id = chr(ord("a") + counter) + combined_content.append( + f'\n{content}\n' + ) + counter += 1 + + if combined_content: + merged_content = "\n\n".join(combined_content) + merged_message_id = first_sibling_id or parent_id + + if merged_content: + merged_message = { + "id": merged_message_id, + "parentId": parent_id, + "role": "assistant", + "content": f"{self.valves.CONTEXT_PREFIX}{merged_content}", + "timestamp": merged_message_timestamp, + } + final_messages.append(merged_message) + else: + # If there's only one sibling, add it directly + final_messages.append(siblings[0]) + + # Mark this group as processed + processed_parent_ids.add(parent_id) + + # 4. The new user message from the current request is not in the historical messages_map, + # so we need to append it to our newly constructed message list. + if body.get("messages"): + new_user_message_from_body = body["messages"][-1] + # Ensure we don't add a historical message that might be in the body for context + if new_user_message_from_body.get("id") not in messages_map: + final_messages.append(new_user_message_from_body) + + # 5. Replace the original message list with the new, cleaned-up list + body["messages"] = final_messages + print( + f"DEBUG: Rebuilt message history with {len(final_messages)} messages, consolidating all multi-response turns." + ) + + print(f"*********** Filter '{self.name}' finished successfully ***********") + return body diff --git a/plugins/pipelines/moe_prompt_refiner.py b/plugins/pipelines/moe_prompt_refiner.py new file mode 100644 index 0000000..9964145 --- /dev/null +++ b/plugins/pipelines/moe_prompt_refiner.py @@ -0,0 +1,208 @@ +import os +from typing import List, Optional +from pydantic import BaseModel +import time + + +class Pipeline: + """ + 该管道用于优化多模型(MoE)汇总请求的提示词。 + + 它会拦截用于汇总多个模型响应的请求,提取原始用户查询和各个模型的具体回答, + 然后构建一个新的、更详细、结构化的提示词。 + + 这个经过优化的提示词会引导最终的汇总模型扮演一个专家分析师的角色, + 将输入信息整合成一份高质量、全面的综合报告。 + """ + + class Valves(BaseModel): + # 指定该过滤器管道将连接到的目标管道ID(模型)。 + # 如果希望连接到所有管道,可以设置为 ["*"]。 + pipelines: List[str] = ["*"] + + # 为过滤器管道分配一个优先级。 + # 优先级决定了过滤器管道的执行顺序。 + # 数字越小,优先级越高。 + priority: int = 0 + + def __init__(self): + self.type = "filter" + self.name = "moe_prompt_refiner" + self.valves = self.Valves() + + async def on_startup(self): + # 此函数在服务器启动时调用。 + # print(f"on_startup:{__name__}") + pass + + async def on_shutdown(self): + # 此函数在服务器停止时调用。 + # print(f"on_shutdown:{__name__}") + pass + + async def inlet(self, body: dict, user: Optional[dict] = None) -> dict: + """ + 此方法是管道的入口点。 + + 它会检查传入的请求是否为多模型(MoE)汇总请求。如果是,它会解析原始提示词, + 提取用户的查询和来自不同模型的响应。然后,它会动态构建一个新的、结构更清晰的提示词, + 并用它替换原始的消息内容。 + + 参数: + body (dict): 包含消息的请求体。 + user (Optional[dict]): 用户信息。 + + 返回: + dict: 包含优化后提示词的已修改请求体。 + """ + print(f"pipe:{__name__}") + + messages = body.get("messages", []) + if not messages: + return body + + user_message_content = "" + user_message_index = -1 + + # 找到最后一条用户消息 + for i in range(len(messages) - 1, -1, -1): + if messages[i].get("role") == "user": + content = messages[i].get("content", "") + # 处理内容为数组的情况(多模态消息) + if isinstance(content, list): + # 从数组中提取所有文本内容 + text_parts = [] + for item in content: + if isinstance(item, dict) and item.get("type") == "text": + text_parts.append(item.get("text", "")) + elif isinstance(item, str): + text_parts.append(item) + user_message_content = "".join(text_parts) + elif isinstance(content, str): + user_message_content = content + + user_message_index = i + break + + if user_message_index == -1: + return body + + # 检查是否为MoE汇总请求 + if isinstance(user_message_content, str) and user_message_content.startswith( + "You have been provided with a set of responses from various models to the latest user query" + ): + print("检测到MoE汇总请求,正在更改提示词。") + + # 1. 提取原始查询 + query_start_phrase = 'the latest user query: "' + query_end_phrase = '"\n\nYour task is to' + start_index = user_message_content.find(query_start_phrase) + end_index = user_message_content.find(query_end_phrase) + + original_query = "" + if start_index != -1 and end_index != -1: + original_query = user_message_content[ + start_index + len(query_start_phrase) : end_index + ] + + # 2. 提取各个模型的响应 + responses_start_phrase = "Responses from models: " + responses_start_index = user_message_content.find(responses_start_phrase) + + responses_text = "" + if responses_start_index != -1: + responses_text = user_message_content[ + responses_start_index + len(responses_start_phrase) : + ] + + # 使用三重双引号作为分隔符来提取响应 + responses = [ + part.strip() for part in responses_text.split('"""') if part.strip() + ] + + # 3. 动态构建模型响应部分 + responses_section = "" + for i, response in enumerate(responses): + responses_section += f'''""" +[第 {i + 1} 个模型的完整回答] +{response} +""" +''' + + # 4. 构建新的提示词 + merge_prompt = f'''# 角色定位 +你是一位经验丰富的首席分析师,正在处理来自多个独立 AI 专家团队对同一问题的分析报告。你的任务是将这些报告进行深度整合、批判性分析,并提炼出一份结构清晰、洞察深刻、对决策者极具价值的综合报告。 + +# 原始用户问题 +{original_query} + +# 输入格式说明 ⚠️ 重要 +各模型的响应已通过 """ (三重引号)分隔符准确识别和分离。系统已将不同模型的回答分别提取,你现在需要基于以下分离后的内容进行分析。 + +**已分离的模型响应**: +{responses_section} +# 核心任务 +请勿简单地复制或拼接原始报告。你需要运用你的专业分析能力,完成以下步骤: + +## 1. 信息解析与评估 (Analysis & Evaluation) +- **准确分隔**: 已根据 """ 分隔符,准确识别每个模型的回答边界。 +- **可信度评估**: 批判性地审视每份报告,识别其中可能存在的偏见、错误或不一致之处。 +- **逻辑梳理**: 理清每份报告的核心论点、支撑论据和推理链条。 + +## 2. 核心洞察提炼 (Insight Extraction) +- **识别共识**: 找出所有报告中共同提及、高度一致的观点或建议。这通常是问题的核心事实或最稳健的策略。 +- **突出差异**: 明确指出各报告在视角、方法、预测或结论上的关键分歧点。这些分歧往往蕴含着重要的战略考量。 +- **捕捉亮点**: 挖掘单个报告中独有的、具有创新性或深刻性的见解,这些"闪光点"可能是关键的差异化优势。 + +## 3. 综合报告撰写 (Synthesis) +基于以上分析,生成一份包含以下结构的综合报告: + +### **【核心共识】** +- 用清晰的要点列出所有模型一致认同的关键信息或建议。 +- 标注覆盖范围(如"所有模型均同意"或"多数模型提及")。 + +### **【关键分歧】** +- 清晰地对比不同模型在哪些核心问题上持有不同观点。 +- 用序号或描述性语言标识不同的观点阵营(如"观点 A 与观点 B 的分歧"或"方案 1 vs 方案 2")。 +- 简要说明其原因或侧重点的差异。 + +### **【独特洞察】** +- 提炼并呈现那些仅在单个报告中出现,但极具价值的独特建议或视角。 +- 用"某个模型提出"或"另一视角"等中立表述,避免因缺少显式来源标记而造成的混淆。 + +### **【综合分析与建议】** +- **整合**: 基于共识、差异和亮点,提供一个全面、平衡、且经过你专业判断优化的最终分析。 +- **建议**: 如果原始指令是寻求方案或策略,这里应提出一个或多个融合了各方优势的、可执行的建议。 + +# 格式要求 +- 语言精炼、逻辑清晰、结构分明。 +- 使用加粗、列表、标题等格式,确保报告易于阅读和理解。 +- 由于缺少显式的模型标识,**在呈现差异化观点时,使用描述性或序号化的方式**(如"第一种观点""另一个视角")而非具体的模型名称。 +- 始终以"为用户提供最高价值的决策依据"为目标。 + +# 输出结构示例 +根据以上要求,你的输出应该呈现如下结构: + +## 【核心共识】 +✓ [共识观点 1] —— 所有模型均同意 +✓ [共识观点 2] —— 多数模型同意 + +## 【关键分歧】 +⚡ **在[议题]上的分歧**: +- 观点阵营 A: ... +- 观点阵营 B: ... +- 观点阵营 C: ... + +## 【独特洞察】 +💡 [某个模型独有的深刻观点]: ... +💡 [另一个模型的创新视角]: ... + +## 【综合分析与建议】 +基于以上分析,推荐方案/策略: ... +''' + + # 5. 替换原始消息内容 + body["messages"][user_message_index]["content"] = merge_prompt + print("提示词已成功动态替换。") + + return body diff --git a/plugins/pipelines/moe_prompt_refiner/valves.json b/plugins/pipelines/moe_prompt_refiner/valves.json new file mode 100644 index 0000000..9e26dfe --- /dev/null +++ b/plugins/pipelines/moe_prompt_refiner/valves.json @@ -0,0 +1 @@ +{} \ No newline at end of file diff --git a/plugins/pipelines/requirements.txt b/plugins/pipelines/requirements.txt new file mode 100644 index 0000000..e69de29 diff --git a/plugins/pipes/README.md b/plugins/pipes/README.md new file mode 100644 index 0000000..09e887f --- /dev/null +++ b/plugins/pipes/README.md @@ -0,0 +1,60 @@ +# Pipes + +English | [中文](./README_CN.md) + +Pipes process and enhance LLM responses after they are generated and before they are displayed to the user. This directory contains various pipe plugins that can be used to extend OpenWebUI functionality. + +## 📋 Pipe Plugins List + +| Plugin Name | Description | Documentation | +| :--- | :--- | :--- | +| **Example Pipe** | A template/example for creating pipe plugins | [English](./example-pipe/README.md) / [中文](./example-pipe/README_CN.md) | +| **AI Agent Pipe** | Transforms AI responses into complete agent workflows with multiple thinking rounds and tool calls | [English](./ai-agent-pipe/README.md) / [中文](./ai-agent-pipe/README_CN.md) | + +## 🎯 What are Pipe Plugins? + +Pipe plugins process the output from the LLM and can: + +- Format responses (convert to markdown, JSON, tables, etc.) +- Enhance responses with additional information +- Translate or transform content +- Filter or modify content before display +- Add watermarks or metadata +- Integrate with external services + +Pipes are executed after the LLM generates a response but before the user sees it. + +## 🚀 Quick Start + +### Installing a Pipe Plugin + +1. Download the plugin file (`.py`) to your local machine +2. Open OpenWebUI Admin Settings and find the "Plugins" section +3. Select the "Pipes" type +4. Upload the downloaded file +5. Refresh the page and enable the pipe in your chat settings +6. The pipe will be applied to all subsequent LLM responses + +## 📖 Development Guide + +When adding a new pipe plugin, please follow these steps: + +1. **Create Plugin Directory**: Create a new folder under `plugins/pipes/` (e.g., `my_pipe/`) +2. **Write Plugin Code**: Create a `.py` file with clear documentation of functionality +3. **Write Documentation**: + - Create `README.md` (English version) + - Create `README_CN.md` (Chinese version) + - Include: feature description, configuration, usage examples, and troubleshooting +4. **Update This List**: Add your plugin to the table above + +## ⚙️ Best Practices for Pipe Development + +- **Non-blocking Operations**: Keep pipe processing fast to avoid UI delays +- **Error Handling**: Gracefully handle errors without breaking the response +- **Configuration**: Make pipes configurable for different use cases +- **Performance**: Test with large responses to ensure efficiency +- **Documentation**: Provide clear examples and troubleshooting guides + +--- + +> **Contributor Note**: We welcome contributions of new pipe plugins! Please provide clear and complete documentation for each new plugin, including features, configuration, usage examples, and troubleshooting guides. diff --git a/plugins/pipes/README_CN.md b/plugins/pipes/README_CN.md new file mode 100644 index 0000000..2913900 --- /dev/null +++ b/plugins/pipes/README_CN.md @@ -0,0 +1,60 @@ +# Pipes(管道插件) + +[English](./README.md) | 中文 + +管道插件(Pipes)在 LLM 生成响应后、展示给用户前对响应进行处理和增强。此目录包含可用于扩展 OpenWebUI 功能的各种管道插件。 + +## 📋 管道插件列表 + +| 插件名称 | 描述 | 文档 | +| :--- | :--- | :--- | +| **示例管道** | 创建管道插件的模板/示例 | [中文](./example-pipe/README_CN.md) / [English](./example-pipe/README.md) | +| **AI代理管道** | 将AI响应转换为完整的代理工作流程,包含多轮思考和工具调用 | [中文](./ai-agent-pipe/README_CN.md) / [English](./ai-agent-pipe/README.md) | + +## 🎯 什么是管道插件? + +管道插件对 LLM 的输出进行处理,可以: + +- 格式化响应(转换为 Markdown、JSON、表格等) +- 用附加信息增强响应 +- 翻译或转换内容 +- 在显示前过滤或修改内容 +- 添加水印或元数据 +- 与外部服务集成 + +管道在 LLM 生成响应之后、用户看到响应之前执行。 + +## 🚀 快速开始 + +### 安装管道插件 + +1. 将插件文件(`.py`)下载到本地 +2. 在 OpenWebUI 管理员设置中,找到"Plugins"部分 +3. 选择"Pipes"类型 +4. 上传下载的文件 +5. 刷新页面并在聊天设置中启用管道 +6. 该管道将应用于所有后续的 LLM 响应 + +## 📖 开发指南 + +添加新管道插件时,请遵循以下步骤: + +1. **创建插件目录**:在 `plugins/pipes/` 下创建新文件夹(例如 `my_pipe/`) +2. **编写插件代码**:创建 `.py` 文件,清晰记录功能说明 +3. **编写文档**: + - 创建 `README.md`(英文版) + - 创建 `README_CN.md`(中文版) + - 包含:功能说明、配置方法、使用示例和故障排除 +4. **更新此列表**:在上述表格中添加您的插件 + +## ⚙️ 管道开发最佳实践 + +- **非阻塞操作**:保持管道处理快速以避免 UI 延迟 +- **错误处理**:优雅地处理错误而不破坏响应 +- **配置灵活性**:使管道可配置以适应不同用例 +- **性能优化**:使用大型响应测试以确保效率 +- **文档完整**:提供清晰的示例和故障排除指南 + +--- + +> **贡献者注意**:我们欢迎贡献新的管道插件!请为每个新增插件提供清晰完整的文档,包括功能说明、配置方法、使用示例和故障排除指南。 diff --git a/plugins/pipes/gemini_mainfold/README.md b/plugins/pipes/gemini_mainfold/README.md new file mode 100644 index 0000000..0b70480 --- /dev/null +++ b/plugins/pipes/gemini_mainfold/README.md @@ -0,0 +1,54 @@ +# Example Pipe Plugin + +**Author:** OpenWebUI Community | **Version:** 1.0.0 | **License:** MIT + +This is a template/example for creating Pipe plugins in OpenWebUI. + +--- + +## Overview + +Pipes are plugins that process and enhance LLM responses after they are generated and before they are displayed to the user. + +## Core Features + +- ✅ **Response Processing**: Modify or enhance LLM output +- ✅ **Format Conversion**: Convert responses to different formats +- ✅ **Content Filtering**: Filter or sanitize content +- ✅ **Integration**: Connect with external services + +--- + +## Installation + +1. Download the `.py` file from this directory +2. Open OpenWebUI Admin Settings → Plugins +3. Select "Pipes" type +4. Upload the file +5. Refresh the page + +--- + +## Configuration + +Configure the pipe parameters in your chat settings as needed. + +--- + +## Usage + +Once enabled, this pipe will automatically process all LLM responses. + +--- + +## Troubleshooting + +- Check the logs for any errors during pipe execution +- Ensure the pipe is properly configured +- Verify the pipe is enabled in chat settings + +--- + +## Contributing + +Feel free to create your own pipe plugins! Follow the structure and documentation guidelines in this template. diff --git a/plugins/pipes/gemini_mainfold/README_CN.md b/plugins/pipes/gemini_mainfold/README_CN.md new file mode 100644 index 0000000..6437018 --- /dev/null +++ b/plugins/pipes/gemini_mainfold/README_CN.md @@ -0,0 +1,54 @@ +# 示例管道插件 + +**作者:** OpenWebUI 社区 | **版本:** 1.0.0 | **许可证:** MIT + +这是在 OpenWebUI 中创建管道插件的模板/示例。 + +--- + +## 概述 + +管道是在 LLM 生成响应后、显示给用户前对响应进行处理和增强的插件。 + +## 核心特性 + +- ✅ **响应处理**: 修改或增强 LLM 输出 +- ✅ **格式转换**: 将响应转换为不同格式 +- ✅ **内容过滤**: 过滤或清理内容 +- ✅ **集成**: 与外部服务连接 + +--- + +## 安装 + +1. 从此目录下载 `.py` 文件 +2. 打开 OpenWebUI 管理员设置 → 插件(Plugins) +3. 选择"Pipes"类型 +4. 上传文件 +5. 刷新页面 + +--- + +## 配置 + +根据需要在聊天设置中配置管道参数。 + +--- + +## 使用 + +启用后,该管道将自动处理所有 LLM 响应。 + +--- + +## 故障排除 + +- 查看日志了解管道执行过程中的任何错误 +- 确保管道配置正确 +- 验证管道在聊天设置中已启用 + +--- + +## 贡献 + +欢迎创建您自己的管道插件!请遵循此模板中的结构和文档指南。 diff --git a/plugins/pipes/gemini_mainfold/gemini_manifold.py b/plugins/pipes/gemini_mainfold/gemini_manifold.py new file mode 100644 index 0000000..2bc7b67 --- /dev/null +++ b/plugins/pipes/gemini_mainfold/gemini_manifold.py @@ -0,0 +1,3382 @@ +""" +title: Gemini Manifold google_genai +id: gemini_manifold_google_genai +description: Manifold function for Gemini Developer API and Vertex AI. Uses the newer google-genai SDK. Aims to support as many features from it as possible. +author: suurt8ll +author_url: https://github.com/suurt8ll +funding_url: https://github.com/suurt8ll/open_webui_functions +license: MIT +version: 1.26.0 +requirements: google-genai==1.49.0 +""" + +VERSION = "1.26.0" +# This is the recommended version for the companion filter. +# Older versions might still work, but backward compatibility is not guaranteed +# during the development of this personal use plugin. +RECOMMENDED_COMPANION_VERSION = "1.7.0" + + +# Keys `title`, `id` and `description` in the frontmatter above are used for my own development purposes. +# They don't have any effect on the plugin's functionality. + + +# This is a helper function that provides a manifold for Google's Gemini Studio API and Vertex AI. +# Be sure to check out my GitHub repository for more information! Contributions, questions and suggestions are very welcome. + +from google import genai +from google.genai import types +from google.genai import errors as genai_errors +from google.cloud import storage +from google.api_core import exceptions + +import time +import copy +import json +from urllib.parse import urlparse, parse_qs +import xxhash +import asyncio +import aiofiles +from aiocache import cached +from aiocache.base import BaseCache +from aiocache.serializers import NullSerializer +from aiocache.backends.memory import SimpleMemoryCache +from functools import cache +from datetime import datetime, timezone +from fastapi.datastructures import State +import io +import mimetypes +import uuid +import base64 +import re +import fnmatch +import sys +from loguru import logger +from fastapi import Request +import pydantic_core +from pydantic import BaseModel, Field, field_validator +from collections.abc import AsyncIterator, Awaitable, Callable +from typing import ( + Any, + Final, + AsyncGenerator, + Literal, + TYPE_CHECKING, + cast, +) + +from open_webui.models.chats import Chats +from open_webui.models.files import FileForm, Files +from open_webui.storage.provider import Storage +from open_webui.models.functions import Functions +from open_webui.utils.misc import pop_system_message + +# This block is skipped at runtime. +if TYPE_CHECKING: + from loguru import Record + from loguru._handler import Handler # type: ignore + # Imports custom type definitions (TypedDicts) for static analysis purposes (mypy/pylance). + from utils.manifold_types import * + +# Setting auditable=False avoids duplicate output for log levels that would be printed out by the main log. +log = logger.bind(auditable=False) + +# FIXME: remove +COMPATIBLE_MODELS_FOR_URL_CONTEXT: Final = [ + "gemini-2.5-pro", + "gemini-flash-latest", + "gemini-2.5-flash-preview-09-2025", + "gemini-2.5-flash", + "gemini-flash-lite-latest", + "gemini-2.5-flash-lite-preview-09-2025", + "gemini-2.5-flash-lite", + "gemini-2.5-flash-lite-preview-06-17", + "gemini-2.5-pro-preview-06-05", + "gemini-2.5-pro-preview-05-06", + "gemini-2.5-flash-preview-05-20", + "gemini-2.0-flash", + "gemini-2.0-flash-001", + "gemini-2.0-flash-live-001", +] + +# A mapping of finish reason names (str) to human-readable descriptions. +# This allows handling of reasons that may not be defined in the current SDK version. +FINISH_REASON_DESCRIPTIONS: Final = { + "FINISH_REASON_UNSPECIFIED": "The reason for finishing is not specified.", + "STOP": "Natural stopping point or stop sequence reached.", + "MAX_TOKENS": "The maximum number of tokens was reached.", + "SAFETY": "The response was blocked due to safety concerns.", + "RECITATION": "The response was blocked due to potential recitation of copyrighted material.", + "LANGUAGE": "The response was stopped because of an unsupported language.", + "OTHER": "The response was stopped for an unspecified reason.", + "BLOCKLIST": "The response was blocked due to a word on a blocklist.", + "PROHIBITED_CONTENT": "The response was blocked for containing prohibited content.", + "SPII": "The response was blocked for containing sensitive personally identifiable information.", + "MALFORMED_FUNCTION_CALL": "The model generated an invalid function call.", + "IMAGE_SAFETY": "Generated image was blocked due to safety concerns.", + "UNEXPECTED_TOOL_CALL": "The model generated an invalid tool call.", + "IMAGE_PROHIBITED_CONTENT": "Generated image was blocked for containing prohibited content.", + "NO_IMAGE": "The model was expected to generate an image, but it did not.", + "IMAGE_OTHER": ( + "Image generation stopped for other reasons, possibly related to safety or quality. " + "Try a different image or prompt." + ), +} + +# Finish reasons that are considered normal and do not require user notification. +NORMAL_REASONS: Final = {types.FinishReason.STOP, types.FinishReason.MAX_TOKENS} + +# These tags will be "disabled" in the response, meaning that they will not be parsed by the backend. +SPECIAL_TAGS_TO_DISABLE = [ + "details", + "think", + "thinking", + "reason", + "reasoning", + "thought", + "Thought", + "|begin_of_thought|", + "code_interpreter", + "|begin_of_solution|", +] +ZWS = "\u200b" + + +class GenaiApiError(Exception): + """Custom exception for errors during Genai API interactions.""" + + pass + + +class FilesAPIError(Exception): + """Custom exception for errors during Files API operations.""" + + pass + + +class EventEmitter: + """A helper class to abstract web-socket event emissions to the front-end.""" + + def __init__( + self, + event_emitter: Callable[["Event"], Awaitable[None]] | None, + *, + hide_successful_status: bool = False, + ): + self.event_emitter = event_emitter + self.hide_successful_status = hide_successful_status + + def emit_toast( + self, + msg: str, + toastType: Literal["info", "success", "warning", "error"] = "info", + ) -> None: + """Emits a toast notification to the front-end. This is a fire-and-forget operation.""" + if not self.event_emitter: + return + + event: "NotificationEvent" = { + "type": "notification", + "data": {"type": toastType, "content": msg}, + } + + log.debug(f"Emitting toast: '{msg}'") + log.trace("Toast payload:", payload=event) + + async def send_toast(): + try: + # Re-check in case the event loop runs this later and state has changed. + if self.event_emitter: + await self.event_emitter(event) + except Exception: + log.exception("Error emitting toast notification.") + + asyncio.create_task(send_toast()) + + async def emit_status( + self, + message: str, + done: bool = False, + hidden: bool = False, + *, + is_successful_finish: bool = False, + ) -> None: + """Emit status updates asynchronously.""" + if not self.event_emitter: + return + + # If this is a successful finish status and the user wants to hide it, + # we override the hidden flag. + if is_successful_finish and self.hide_successful_status: + hidden = True + + status_event: "StatusEvent" = { + "type": "status", + "data": {"description": message, "done": done, "hidden": hidden}, + } + + log.debug(f"Emitting status: '{message}'") + log.trace("Status payload:", payload=status_event) + + try: + await self.event_emitter(status_event) + except Exception: + log.exception("Error emitting status.") + + async def emit_completion( + self, + content: str | None = None, + done: bool = False, + error: str | None = None, + sources: list["Source"] | None = None, + usage: dict[str, Any] | None = None, + ) -> None: + """Constructs and emits completion event.""" + if not self.event_emitter: + return + + emission: "ChatCompletionEvent" = { + "type": "chat:completion", + "data": {"done": done}, + } + parts = [] + if content is not None: + emission["data"]["content"] = content + parts.append("content") + if error is not None: + emission["data"]["error"] = {"detail": error} + parts.append("error") + if sources is not None: + emission["data"]["sources"] = sources + parts.append("sources") + if usage is not None: + emission["data"]["usage"] = usage + parts.append("usage") + + desc = f" with {', '.join(parts)}" if parts else "" + log.debug(f"Emitting completion: done={done}{desc}") + log.trace("Completion payload:", payload=emission) + + try: + await self.event_emitter(emission) + except Exception: + log.exception("Error emitting completion.") + + async def emit_usage(self, usage_data: dict[str, Any]) -> None: + """A wrapper around emit_completion to specifically emit usage data.""" + await self.emit_completion(usage=usage_data) + + async def emit_error( + self, + error_msg: str, + warning: bool = False, + exception: bool = True, + ) -> None: + """Emits an event to the front-end that causes it to display a nice red error message.""" + if warning: + log.opt(depth=1, exception=False).warning(error_msg) + else: + log.opt(depth=1, exception=exception).error(error_msg) + await self.emit_completion(error=f"\n{error_msg}", done=True) + + +class UploadStatusManager: + """ + Manages and centralizes status updates for concurrent file uploads. + + This manager is self-configuring. It discovers the number of files that + require an actual upload at runtime, only showing a status message to the + user when network activity is necessary. + + The communication protocol uses tuples sent via an asyncio.Queue: + - ('REGISTER_UPLOAD',): Sent by a worker when it determines an upload is needed. + - ('COMPLETE_UPLOAD',): Sent by a worker when its upload is finished. + - ('FINALIZE',): Sent by the orchestrator when all workers are done. + """ + + def __init__( + self, + event_emitter: EventEmitter, + start_time: float, + ): + self.event_emitter = event_emitter + self.start_time = start_time + self.queue = asyncio.Queue() + self.total_uploads_expected = 0 + self.uploads_completed = 0 + self.finalize_received = False + self.is_active = False + + async def run(self) -> None: + """ + Runs the manager loop, listening for updates and emitting status to the UI. + This should be started as a background task using asyncio.create_task(). + """ + while not ( + self.finalize_received + and self.total_uploads_expected == self.uploads_completed + ): + msg = await self.queue.get() + msg_type = msg[0] + + if msg_type == "REGISTER_UPLOAD": + self.is_active = True + self.total_uploads_expected += 1 + await self._emit_progress_update() + elif msg_type == "COMPLETE_UPLOAD": + self.uploads_completed += 1 + await self._emit_progress_update() + elif msg_type == "FINALIZE": + self.finalize_received = True + + self.queue.task_done() + + log.debug("UploadStatusManager finished its run.") + + async def _emit_progress_update(self) -> None: + """Emits the current progress to the front-end if uploads are active.""" + if not self.is_active: + return + + elapsed_time = time.monotonic() - self.start_time + time_str = f"(+{elapsed_time:.2f}s)" + + is_done = ( + self.total_uploads_expected > 0 + and self.uploads_completed == self.total_uploads_expected + ) + + if is_done: + message = f"- Upload complete. {self.uploads_completed} file(s) processed. {time_str}" + else: + # Show "Uploading 1 of N..." + message = f"- Uploading file {self.uploads_completed + 1} of {self.total_uploads_expected}... {time_str}" + + await self.event_emitter.emit_status(message, done=is_done) + + +class FilesAPIManager: + """ + Manages uploading, caching, and retrieving files using the Google Gemini Files API. + + This class provides a stateless and efficient way to handle files by using a fast, + non-cryptographic hash (xxHash) of the file's content as the primary identifier. + This enables content-addressable storage, preventing duplicate uploads of the + same file. It uses a multi-tiered approach: + + 1. Hot Path (In-Memory Caches): For instantly retrieving file objects and hashes + for recently used files. + 2. Warm Path (Stateless GET): For quickly recovering file state after a server + restart by using a deterministic name (derived from the content hash) and a + single `get` API call. + 3. Cold Path (Upload): As a last resort, for uploading new files or re-uploading + expired ones. + """ + + def __init__( + self, + client: genai.Client, + file_cache: SimpleMemoryCache, + id_hash_cache: SimpleMemoryCache, + event_emitter: EventEmitter, + ): + """ + Initializes the FilesAPIManager. + + Args: + client: An initialized `google.genai.Client` instance. + file_cache: An aiocache instance for mapping `content_hash -> types.File`. + Must be configured with `aiocache.serializers.NullSerializer`. + id_hash_cache: An aiocache instance for mapping `owui_file_id -> content_hash`. + This is an optimization to avoid re-hashing known files. + event_emitter: An abstract class for emitting events to the front-end. + """ + self.client = client + self.file_cache = file_cache + self.id_hash_cache = id_hash_cache + self.event_emitter = event_emitter + # A dictionary to manage locks for concurrent uploads. + # The key is the content_hash, the value is an asyncio.Lock. + self.upload_locks: dict[str, asyncio.Lock] = {} + + async def get_or_upload_file( + self, + file_bytes: bytes, + mime_type: str, + *, + owui_file_id: str | None = None, + status_queue: asyncio.Queue | None = None, + ) -> types.File: + """ + The main public method to get a file, using caching, recovery, or uploading. + + This method uses a fast content hash (xxHash) as the primary key for all + caching and remote API interactions to ensure deduplication and performance. + It is safe from race conditions during concurrent uploads. + + Args: + file_bytes: The raw byte content of the file. Required. + mime_type: The MIME type of the file (e.g., 'image/png'). Required. + owui_file_id: The unique ID of the file from Open WebUI, if available. + Used for logging and as a key for the hash cache optimization. + status_queue: An optional asyncio.Queue to report upload lifecycle events. + + Returns: + An `ACTIVE` `google.genai.types.File` object. + + Raises: + FilesAPIError: If the file fails to upload or process. + """ + # Step 1: Get the fast content hash, using the ID cache as an optimization if possible. + content_hash = await self._get_content_hash(file_bytes, owui_file_id) + + # Step 2: The Hot Path (Check Local File Cache) + # A cache hit means the file is valid and we can return immediately. + cached_file: types.File | None = await self.file_cache.get(content_hash) + if cached_file: + log_id = f"OWUI ID: {owui_file_id}" if owui_file_id else "anonymous file" + log.debug( + f"Cache HIT for file hash {content_hash} ({log_id}). Returning immediately." + ) + return cached_file + + # On cache miss, acquire a lock specific to this file's content to prevent race conditions. + # dict.setdefault is atomic, ensuring only one lock is created per hash. + lock = self.upload_locks.setdefault(content_hash, asyncio.Lock()) + if lock.locked(): + log.debug( + f"Lock for hash {content_hash} is held by another task. " + f"This call will now wait for the lock to be released." + ) + + async with lock: + # Step 2.5: Double-Checked Locking + # After acquiring the lock, check the cache again. Another task might have + # completed the upload while we were waiting for the lock. + cached_file = await self.file_cache.get(content_hash) + if cached_file: + log.debug( + f"Cache HIT for file hash {content_hash} after acquiring lock. Returning." + ) + return cached_file + + # Step 3: The Warm/Cold Path (On Cache Miss) + deterministic_name = f"files/owui-v1-{content_hash}" + log.debug( + f"Cache MISS for hash {content_hash}. Attempting stateless recovery with GET: {deterministic_name}" + ) + + try: + # Attempt to get the file (Warm Path) + file = await self.client.aio.files.get(name=deterministic_name) + if not file.name: + raise FilesAPIError( + f"Stateless recovery for {deterministic_name} returned a file without a name." + ) + + log.debug( + f"Stateless recovery successful for {deterministic_name}. File exists on server." + ) + active_file = await self._poll_for_active_state(file.name, owui_file_id) + + ttl_seconds = self._calculate_ttl(active_file.expiration_time) + await self.file_cache.set(content_hash, active_file, ttl=ttl_seconds) + + return active_file + except genai_errors.ClientError as e: + if e.code == 403: # "Not found" signal from the API. + log.info( + f"File {deterministic_name} not found on server (received 403). Proceeding to upload." + ) + # Proceed to upload (Cold Path) + return await self._upload_and_process_file( + content_hash, + file_bytes, + mime_type, + deterministic_name, + owui_file_id, + status_queue, + ) + else: + log.exception( + f"A non-403 client error occurred during stateless recovery for {deterministic_name}." + ) + self.event_emitter.emit_toast( + f"API error for file: {e.code}. Please check permissions.", + "error", + ) + raise FilesAPIError( + f"Failed to check file status for {deterministic_name}: {e}" + ) from e + except Exception as e: + log.exception( + f"An unexpected error occurred during stateless recovery for {deterministic_name}." + ) + self.event_emitter.emit_toast( + "Unexpected error retrieving a file. Please try again.", + "error", + ) + raise FilesAPIError( + f"Failed to check file status for {deterministic_name}: {e}" + ) from e + finally: + # Clean up the lock from the dictionary once processing is complete + # for this hash, preventing memory growth over time. + # This is safe because any future request for this hash will hit the cache. + if content_hash in self.upload_locks: + del self.upload_locks[content_hash] + + async def _get_content_hash( + self, file_bytes: bytes, owui_file_id: str | None + ) -> str: + """ + Retrieves the file's content hash, using a cache for known IDs or computing it. + + This acts as a memoization layer for the hashing process, avoiding + re-computation for files with a known Open WebUI ID. For anonymous files + (owui_file_id=None), it will always compute the hash. + """ + if owui_file_id: + # First, check the ID-to-Hash cache for known files. + cached_hash: str | None = await self.id_hash_cache.get(owui_file_id) + if cached_hash: + log.trace(f"Hash cache HIT for OWUI ID {owui_file_id}.") + return cached_hash + + # If not in cache or if file is anonymous, compute the fast hash. + log.trace( + f"Hash cache MISS for OWUI ID {owui_file_id if owui_file_id else 'N/A'}. Computing hash." + ) + content_hash = xxhash.xxh64(file_bytes).hexdigest() + + # If there was an ID, store the newly computed hash for next time. + if owui_file_id: + await self.id_hash_cache.set(owui_file_id, content_hash) + + return content_hash + + def _calculate_ttl(self, expiration_time: datetime | None) -> float | None: + """Calculates the TTL in seconds from an expiration datetime.""" + if not expiration_time: + return None + + now_utc = datetime.now(timezone.utc) + if expiration_time <= now_utc: + return 0 + + return (expiration_time - now_utc).total_seconds() + + async def _upload_and_process_file( + self, + content_hash: str, + file_bytes: bytes, + mime_type: str, + deterministic_name: str, + owui_file_id: str | None, + status_queue: asyncio.Queue | None = None, + ) -> types.File: + """Handles the full upload and post-upload processing workflow.""" + + # Register with the manager that an actual upload is starting. + if status_queue: + await status_queue.put(("REGISTER_UPLOAD",)) + + log.info(f"Starting upload for {deterministic_name}...") + + try: + file_io = io.BytesIO(file_bytes) + upload_config = types.UploadFileConfig( + name=deterministic_name, mime_type=mime_type + ) + uploaded_file = await self.client.aio.files.upload( + file=file_io, config=upload_config + ) + if not uploaded_file.name: + raise FilesAPIError( + f"File upload for {deterministic_name} did not return a file name." + ) + + log.debug(f"{uploaded_file.name} uploaded.") + log.trace("Uploaded file details:", payload=uploaded_file) + + # Check if the file is already active. If so, we can skip polling. + if uploaded_file.state == types.FileState.ACTIVE: + log.debug( + f"File {uploaded_file.name} is already ACTIVE. Skipping poll." + ) + active_file = uploaded_file + else: + # If not active, proceed with the original polling logic. + log.debug( + f"{uploaded_file.name} uploaded with state {uploaded_file.state}. Polling for ACTIVE state." + ) + active_file = await self._poll_for_active_state( + uploaded_file.name, owui_file_id + ) + log.debug(f"File {active_file.name} is now ACTIVE.") + + # Calculate TTL and set in the main file cache using the content hash as the key. + ttl_seconds = self._calculate_ttl(active_file.expiration_time) + await self.file_cache.set(content_hash, active_file, ttl=ttl_seconds) + log.debug( + f"Cached new file object for hash {content_hash} with TTL: {ttl_seconds}s." + ) + + return active_file + except Exception as e: + log.exception(f"File upload or processing failed for {deterministic_name}.") + self.event_emitter.emit_toast( + "Upload failed for a file. Please check connection and try again.", + "error", + ) + raise FilesAPIError(f"Upload failed for {deterministic_name}: {e}") from e + finally: + # Report completion (success or failure) to the status manager. + # This ensures the progress counter always advances. + if status_queue: + await status_queue.put(("COMPLETE_UPLOAD",)) + + async def _poll_for_active_state( + self, + file_name: str, + owui_file_id: str | None, + timeout: int = 60, + poll_interval: int = 1, + ) -> types.File: + """Polls the file's status until it is ACTIVE or fails.""" + end_time = time.monotonic() + timeout + while time.monotonic() < end_time: + try: + file = await self.client.aio.files.get(name=file_name) + except Exception as e: + raise FilesAPIError( + f"Polling failed: Could not get status for {file_name}. Reason: {e}" + ) from e + + if file.state == types.FileState.ACTIVE: + return file + if file.state == types.FileState.FAILED: + log_id = f"'{owui_file_id}'" if owui_file_id else "an uploaded file" + error_message = f"File processing failed on server for {file_name}." + toast_message = f"Google could not process {log_id}." + if file.error: + reason = f"Reason: {file.error.message} (Code: {file.error.code})" + error_message += f" {reason}" + toast_message += f" Reason: {file.error.message}" + + self.event_emitter.emit_toast(toast_message, "error") + raise FilesAPIError(error_message) + + state_name = file.state.name if file.state else "UNKNOWN" + log.trace( + f"File {file_name} is still {state_name}. Waiting {poll_interval}s..." + ) + await asyncio.sleep(poll_interval) + + raise FilesAPIError( + f"File {file_name} did not become ACTIVE within {timeout} seconds." + ) + + +class GeminiContentBuilder: + """Builds a list of `google.genai.types.Content` objects from the OWUI's body payload.""" + + def __init__( + self, + messages_body: list["Message"], + metadata_body: "Metadata", + user_data: "UserData", + event_emitter: EventEmitter, + valves: "Pipe.Valves", + files_api_manager: "FilesAPIManager", + ): + self.messages_body = messages_body + self.upload_documents = (metadata_body.get("features", {}) or {}).get( + "upload_documents", False + ) + self.event_emitter = event_emitter + self.valves = valves + self.files_api_manager = files_api_manager + self.is_temp_chat = metadata_body.get("chat_id") == "local" + self.vertexai = self.files_api_manager.client.vertexai + + self.system_prompt, self.messages_body = self._extract_system_prompt( + self.messages_body + ) + self.messages_db = self._fetch_and_validate_chat_history( + metadata_body, user_data + ) + + async def build_contents(self, start_time: float) -> list[types.Content]: + """ + The main public method to generate the contents list by processing all + message turns concurrently and using a self-configuring status manager. + """ + if not self.messages_db: + warn_msg = ( + "There was a problem retrieving the messages from the backend database. " + "Check the console for more details. " + "Citation filtering and file uploads will not be available." + ) + self.event_emitter.emit_toast(warn_msg, "warning") + + # 1. Set up and launch the status manager. It will activate itself if needed. + status_manager = UploadStatusManager(self.event_emitter, start_time=start_time) + manager_task = asyncio.create_task(status_manager.run()) + + # 2. Create and run concurrent processing tasks for each message turn. + tasks = [ + self._process_message_turn(i, message, status_manager.queue) + for i, message in enumerate(self.messages_body) + ] + log.debug(f"Starting concurrent processing of {len(tasks)} message turns.") + results = await asyncio.gather(*tasks, return_exceptions=True) + + # 3. Signal to the manager that no more uploads will be registered. + await status_manager.queue.put(("FINALIZE",)) + + # 4. Wait for the manager to finish processing all reported uploads. + await manager_task + + # 5. Filter and assemble the final contents list. + contents: list[types.Content] = [] + for i, res in enumerate(results): + if isinstance(res, types.Content): + contents.append(res) + elif isinstance(res, Exception): + log.error( + f"An error occurred while processing message {i} concurrently.", + payload=res, + ) + return contents + + @staticmethod + def _extract_system_prompt( + messages: list["Message"], + ) -> tuple[str | None, list["Message"]]: + """Extracts the system prompt and returns it along with the modified message list.""" + system_message, remaining_messages = pop_system_message(messages) # type: ignore + system_prompt: str | None = (system_message or {}).get("content") + return system_prompt, remaining_messages # type: ignore + + def _fetch_and_validate_chat_history( + self, metadata_body: "Metadata", user_data: "UserData" + ) -> list["ChatMessageTD"] | None: + """ + Fetches message history from the database and validates its length against the request body. + Returns the database messages or None if not found or if validation fails. + """ + # 1. Fetch from database + chat_id = metadata_body.get("chat_id", "") + if chat := Chats.get_chat_by_id_and_user_id( + id=chat_id, user_id=user_data["id"] + ): + chat_content: "ChatObjectDataTD" = chat.chat # type: ignore + # Last message is the upcoming assistant response, at this point in the logic it's empty. + messages_db = chat_content.get("messages", [])[:-1] + else: + log.warning( + f"Chat {chat_id} not found. Cannot process files or filter citations." + ) + return None + + # 2. Validate length against the current message body + if len(messages_db) != len(self.messages_body): + warn_msg = ( + f"Messages in the body ({len(self.messages_body)}) and " + f"messages in the database ({len(messages_db)}) do not match. " + "This is likely due to a bug in Open WebUI. " + "Cannot process files or filter citations." + ) + + # TODO: Emit a toast to the user in the front-end. + log.warning(warn_msg) + # Invalidate the db messages if they don't match + return None + + return messages_db + + async def _process_message_turn( + self, i: int, message: "Message", status_queue: asyncio.Queue + ) -> types.Content | None: + """ + Processes a single message turn, handling user and assistant roles, + and returns a complete `types.Content` object. Designed to be run concurrently. + """ + role = message.get("role") + parts: list[types.Part] = [] + + if role == "user": + message = cast("UserMessage", message) + files = [] + if self.messages_db: + message_db = self.messages_db[i] + if self.upload_documents: + files = message_db.get("files", []) + parts = await self._process_user_message(message, files, status_queue) + # Case 1: User content is completely empty (no text, no files). + if not parts: + log.info( + f"User message at index {i} is completely empty. " + "Injecting a prompt to ask for clarification." + ) + # Inform the user via a toast notification. + toast_msg = f"Your message #{i + 1} was empty. The assistant will ask for clarification." + self.event_emitter.emit_toast(toast_msg, "info") + + clarification_prompt = ( + "The user sent an empty message. Please ask the user for " + "clarification on what they would like to ask or discuss." + ) + # This will become the only part for this user message. + parts = await self._genai_parts_from_text( + clarification_prompt, status_queue + ) + else: + # Case 2: User has sent content, check if it includes text. + has_text_component = any(p.text for p in parts if p.text) + if not has_text_component: + # The user sent content (e.g., files) but no accompanying text. + if self.vertexai: + # Vertex AI requires a text part in multi-modal messages. + log.info( + f"User message at index {i} lacks a text component for Vertex AI. " + "Adding default text prompt." + ) + # Inform the user via a toast notification. + toast_msg = ( + f"For your message #{i + 1}, a default prompt was added as text is required " + "for requests with attachments when using Vertex AI." + ) + self.event_emitter.emit_toast(toast_msg, "info") + + default_prompt_text = ( + "The user did not send any text message with the additional context. " + "Answer by summarizing the newly added context." + ) + default_text_parts = await self._genai_parts_from_text( + default_prompt_text, status_queue + ) + parts.extend(default_text_parts) + else: + # Google Developer API allows no-text user content. + log.info( + f"User message at index {i} lacks a text component for Google Developer API. " + "Proceeding with non-text parts only." + ) + elif role == "assistant": + message = cast("AssistantMessage", message) + # Google API's assistant role is "model" + role = "model" + sources = None + if self.messages_db: + message_db = self.messages_db[i] + sources = message_db.get("sources") + parts = await self._process_assistant_message( + message, sources, status_queue + ) + else: + warn_msg = f"Message {i} has an invalid role: {role}. Skipping to the next message." + log.warning(warn_msg) + self.event_emitter.emit_toast(warn_msg, "warning") + return None + + # Only create a Content object if there are parts to include. + if parts: + return types.Content(parts=parts, role=role) + return None + + async def _process_user_message( + self, + message: "UserMessage", + files: list["FileAttachmentTD"], + status_queue: asyncio.Queue, + ) -> list[types.Part]: + user_parts: list[types.Part] = [] + db_files_processed = False + + # PATH 1: Database is available (Normal Chat). + if self.messages_db and files: + db_files_processed = True + log.info(f"Processing {len(files)} files from the database concurrently.") + + upload_tasks = [] + for file in files: + log.debug("Preparing DB file for concurrent upload:", payload=file) + uri = "" + if file.get("type") == "image": + uri = file.get("url", "") + elif file.get("type") == "file": + # Reconstruct the local API URI to be handled by our unified function + uri = f"/api/v1/files/{file.get('id', '')}/content" + + if uri: + # Create a coroutine for each file upload and add it to a list. + upload_tasks.append(self._genai_part_from_uri(uri, status_queue)) + else: + log.warning("Could not determine URI for file in DB.", payload=file) + + if upload_tasks: + # Run all upload tasks concurrently. asyncio.gather maintains the order of results. + results = await asyncio.gather(*upload_tasks) + # Filter out None results (from failed uploads) and add the successful parts to the list. + user_parts.extend(part for part in results if part) + + # Now, process the content from the message payload. + user_content = message.get("content") + if isinstance(user_content, str): + user_content_list: list["Content"] = [ + {"type": "text", "text": user_content} + ] + elif isinstance(user_content, list): + user_content_list = user_content + else: + warn_msg = "User message content is not a string or list, skipping." + log.warning(warn_msg) + self.event_emitter.emit_toast(warn_msg, "warning") + return user_parts + + for c in user_content_list: + c_type = c.get("type") + if c_type == "text": + c = cast("TextContent", c) + if c_text := c.get("text"): + user_parts.extend( + await self._genai_parts_from_text(c_text, status_queue) + ) + + # PATH 2: Temporary Chat Image Handling. + elif c_type == "image_url" and not db_files_processed: + log.info("Processing image from payload (temporary chat mode).") + c = cast("ImageContent", c) + if uri := c.get("image_url", {}).get("url"): + if part := await self._genai_part_from_uri(uri, status_queue): + user_parts.append(part) + + return user_parts + + async def _process_assistant_message( + self, + message: "AssistantMessage", + sources: list["Source"] | None, + status_queue: asyncio.Queue, + ) -> list[types.Part]: + assistant_text = message.get("content") + if sources: + assistant_text = self._remove_citation_markers(assistant_text, sources) + return await self._genai_parts_from_text(assistant_text, status_queue) + + async def _genai_part_from_uri( + self, uri: str, status_queue: asyncio.Queue + ) -> types.Part | None: + """ + Processes any resource URI and returns a genai.types.Part. + This is the central dispatcher for all media processing, handling data URIs, + local API file paths, and YouTube URLs. It decides whether to use the + Files API or send raw bytes based on configuration and context. + """ + if not uri: + log.warning("Received an empty URI, skipping.") + return None + + try: + file_bytes: bytes | None = None + mime_type: str | None = None + owui_file_id: str | None = None + + # Step 1: Extract bytes and mime_type from the URI if applicable + if uri.startswith("data:image"): + match = re.match(r"data:(image/\w+);base64,(.+)", uri) + if not match: + raise ValueError("Invalid data URI for image.") + mime_type, base64_data = match.group(1), match.group(2) + file_bytes = base64.b64decode(base64_data) + elif uri.startswith("/api/v1/files/"): + log.info(f"Processing local API file URI: {uri}") + file_id = uri.split("/")[4] + owui_file_id = file_id + file_bytes, mime_type = await self._get_file_data(file_id) + elif "youtube.com/" in uri or "youtu.be/" in uri: + log.info(f"Found YouTube URL: {uri}") + return self._genai_part_from_youtube_uri(uri) + # TODO: Google Cloud Storage bucket support. + # elif uri.startswith("gs://"): ... + else: + warn_msg = f"Unsupported URI: '{uri[:64]}...' Links must be to YouTube or a supported file type." + log.warning(warn_msg) + self.event_emitter.emit_toast(warn_msg, "warning") + return None + + # Step 2: If we have bytes, decide how to create the Part + if file_bytes and mime_type: + # TODO: The Files API is strict about MIME types (e.g., text/plain, + # application/pdf). In the future, inspect the content of files + # with unsupported text-like MIME types (e.g., 'application/json', + # 'text/markdown'). If the content is detected as plaintext, + # override the `mime_type` variable to 'text/plain' to allow the upload. + + # Determine whether to use the Files API based on the specified conditions. + use_files_api = True + reason = "" + + if not self.valves.USE_FILES_API: + reason = "disabled by user setting (USE_FILES_API=False)" + use_files_api = False + elif self.vertexai: + reason = "the active client is configured for Vertex AI, which does not support the Files API" + use_files_api = False + elif self.is_temp_chat: + reason = "temporary chat mode is active" + use_files_api = False + + if use_files_api: + log.info(f"Using Files API for resource from URI: {uri[:64]}...") + gemini_file = await self.files_api_manager.get_or_upload_file( + file_bytes=file_bytes, + mime_type=mime_type, + owui_file_id=owui_file_id, + status_queue=status_queue, + ) + return types.Part( + file_data=types.FileData( + file_uri=gemini_file.uri, + mime_type=gemini_file.mime_type, + ) + ) + else: + log.info( + f"Sending raw bytes because {reason}. Resource from URI: {uri[:64]}..." + ) + return types.Part.from_bytes(data=file_bytes, mime_type=mime_type) + + return None # Return None if bytes/mime_type could not be determined + + except FilesAPIError as e: + error_msg = f"Files API failed for URI '{uri[:64]}...': {e}" + log.error(error_msg) + self.event_emitter.emit_toast(error_msg, "error") + return None + except Exception: + log.exception(f"Error processing URI: {uri[:64]}[...]") + return None + + def _genai_part_from_youtube_uri(self, uri: str) -> types.Part | None: + """Creates a Gemini Part from a YouTube URL, with optional video metadata. + + Handles standard (`watch?v=`), short (`youtu.be/`), mobile (`shorts/`), + and live (`live/`) URLs. Metadata is parsed for the Gemini Developer API + but ignored for Vertex AI, which receives a simple URI Part. + + - **Start/End Time**: `?t=` and `#end=`. The value can be a + flexible duration (e.g., "1m30s", "90") and will be converted to seconds. + - **Frame Rate**: Can be specified in two ways (if both are present, + `interval` takes precedence): + - **Interval**: `#interval=` (e.g., `#interval=10s`, `#interval=0.5s`). + The value is a flexible duration converted to seconds, then to FPS (1/interval). + - **FPS**: `#fps=` (e.g., `#fps=2.5`). + The final FPS value must be in the range (0, 24]. + + Args: + uri: The raw YouTube URL from the user. + is_vertex_client: If True, creates a simple Part for Vertex AI. + + Returns: + A `types.Part` object, or `None` if the URI is not a valid YouTube link. + """ + # Convert YouTube Music URLs to standard YouTube URLs for consistent parsing. + if "music.youtube.com" in uri: + uri = uri.replace("music.youtube.com", "www.youtube.com") + log.info(f"Converted YouTube Music URL to standard URL: {uri}") + + # Regex to capture the 11-character video ID from various YouTube URL formats. + video_id_pattern = re.compile( + r"(?:https?://)?(?:www\.)?(?:youtube\.com/(?:watch\?v=|shorts/|live/)|youtu.be/)([a-zA-Z0-9_-]{11})" + ) + + match = video_id_pattern.search(uri) + if not match: + log.warning(f"Could not extract a valid YouTube video ID from URI: {uri}") + return None + + video_id = match.group(1) + canonical_uri = f"https://www.youtube.com/watch?v={video_id}" + + # --- Branching logic for Vertex AI vs. Gemini Developer API --- + if self.vertexai: + return types.Part.from_uri(file_uri=canonical_uri, mime_type="video/mp4") + else: + parsed_uri = urlparse(uri) + query_params = parse_qs(parsed_uri.query) + fragment_params = parse_qs(parsed_uri.fragment) + + start_offset: str | None = None + end_offset: str | None = None + fps: float | None = None + + # Start time from query `t`. Convert flexible format to "Ns". + if "t" in query_params: + raw_start = query_params["t"][0] + if ( + total_seconds := self._parse_duration_to_seconds(raw_start) + ) is not None: + start_offset = f"{total_seconds}s" + + # End time from fragment `end`. Convert flexible format to "Ns". + if "end" in fragment_params: + raw_end = fragment_params["end"][0] + if ( + total_seconds := self._parse_duration_to_seconds(raw_end) + ) is not None: + end_offset = f"{total_seconds}s" + + # Frame rate from fragment `interval` or `fps`. `interval` takes precedence. + if "interval" in fragment_params: + raw_interval = fragment_params["interval"][0] + if ( + interval_seconds := self._parse_duration_to_seconds(raw_interval) + ) is not None and interval_seconds > 0: + calculated_fps = 1.0 / interval_seconds + if 0.0 < calculated_fps <= 24.0: + fps = calculated_fps + else: + log.warning( + f"Interval '{raw_interval}' results in FPS '{calculated_fps}' which is outside the valid range (0.0, 24.0]. Ignoring." + ) + + # Fall back to `fps` param if not set by `interval`. + if fps is None and "fps" in fragment_params: + try: + fps_val = float(fragment_params["fps"][0]) + if 0.0 < fps_val <= 24.0: + fps = fps_val + else: + log.warning( + f"FPS value '{fps_val}' is outside the valid range (0.0, 24.0]. Ignoring." + ) + except (ValueError, IndexError): + log.warning( + f"Invalid FPS value in fragment: {fragment_params.get('fps')}. Ignoring." + ) + + video_metadata: types.VideoMetadata | None = None + if start_offset or end_offset or fps is not None: + video_metadata = types.VideoMetadata( + start_offset=start_offset, + end_offset=end_offset, + fps=fps, + ) + + return types.Part( + file_data=types.FileData(file_uri=canonical_uri), + video_metadata=video_metadata, + ) + + def _parse_duration_to_seconds(self, duration_str: str) -> float | None: + """Converts a human-readable duration string to total seconds. + + Supports formats like "1h30m15s", "90m", "3600s", or just "90". + Also supports float values like "0.5s" or "90.5". + Returns total seconds as a float, or None if the string is invalid. + """ + # First, try to convert the whole string as a plain number (e.g., "90", "90.5"). + try: + return float(duration_str) + except ValueError: + # If it fails, it might be a composite duration like "1m30s", so we parse it below. + pass + + total_seconds = 0.0 + # Regex to find number-unit pairs (e.g., 1h, 30.5m, 15s). Supports floats. + parts = re.findall(r"(\d+(?:\.\d+)?)\s*(h|m|s)?", duration_str, re.IGNORECASE) + + if not parts: + # log.warning(f"Could not parse duration string: {duration_str}") + return None + + for value, unit in parts: + val = float(value) + unit = (unit or "s").lower() # Default to seconds if no unit + if unit == "h": + total_seconds += val * 3600 + elif unit == "m": + total_seconds += val * 60 + elif unit == "s": + total_seconds += val + + return total_seconds + + @staticmethod + def _enable_special_tags(text: str) -> str: + """ + Reverses the action of _disable_special_tags by removing the ZWS + from special tags. This is used to clean up history messages before + sending them to the model, so it can understand the context correctly. + """ + if not text: + return "" + + # The regex finds ' 0: + log.debug(f"Re-enabled {count} special tag(s) for model context.") + + return restored_text + + async def _genai_parts_from_text( + self, text: str, status_queue: asyncio.Queue + ) -> list[types.Part]: + if not text: + return [] + + text = self._enable_special_tags(text) + parts: list[types.Part] = [] + last_pos = 0 + + # Conditionally build a regex to find media links. + # If YouTube parsing is disabled, the regex will only find markdown image links, + # leaving YouTube URLs to be treated as plain text. + markdown_part = r"!\[.*?\]\(([^)]+)\)" # Group 1: Markdown URI + youtube_part = r"(https?://(?:(?:www|music)\.)?youtube\.com/(?:watch\?v=|shorts/|live/)[^\s)]+|https?://youtu\.be/[^\s)]+)" # Group 2: YouTube URL + if self.valves.PARSE_YOUTUBE_URLS: + pattern = re.compile(f"{markdown_part}|{youtube_part}") + process_youtube = True + else: + pattern = re.compile(markdown_part) + process_youtube = False + log.info( + "YouTube URL parsing is disabled. URLs will be treated as plain text." + ) + + for match in pattern.finditer(text): + # Add the text segment that precedes the media link + if text_segment := text[last_pos : match.start()].strip(): + parts.append(types.Part.from_text(text=text_segment)) + + # The URI is in group 1 for markdown, or group 2 for YouTube. + if process_youtube: + uri = match.group(1) or match.group(2) + else: + uri = match.group(1) + + if not uri: + log.warning( + f"Found unsupported URI format in text: {match.group(0)}. Skipping." + ) + continue + + # Delegate all URI processing to the unified helper + if media_part := await self._genai_part_from_uri(uri, status_queue): + parts.append(media_part) + + last_pos = match.end() + + # Add any remaining text after the last media link + if remaining_text := text[last_pos:].strip(): + parts.append(types.Part.from_text(text=remaining_text)) + + # If no media links were found, the whole text is a single part + if not parts and text.strip(): + parts.append(types.Part.from_text(text=text.strip())) + + return parts + + @staticmethod + async def _get_file_data(file_id: str) -> tuple[bytes | None, str | None]: + """ + Asynchronously retrieves file metadata from the database and its content from disk. + """ + # TODO: Emit toasts on unexpected conditions. + if not file_id: + log.warning("file_id is empty. Cannot continue.") + return None, None + + # Run the synchronous, blocking database call in a separate thread + # to avoid blocking the main asyncio event loop. + try: + file_model = await asyncio.to_thread(Files.get_file_by_id, file_id) + except Exception as e: + log.exception( + f"An unexpected error occurred during database call for file_id {file_id}: {e}" + ) + return None, None + + if file_model is None: + # The get_file_by_id method already handles and logs the specific exception, + # so we just need to handle the None return value. + log.warning(f"File {file_id} not found in the backend's database.") + return None, None + + if not (file_path := file_model.path): + log.warning( + f"File {file_id} was found in the database but it lacks `path` field. Cannot Continue." + ) + return None, None + if file_model.meta is None: + log.warning( + f"File {file_path} was found in the database but it lacks `meta` field. Cannot continue." + ) + return None, None + if not (content_type := file_model.meta.get("content_type")): + log.warning( + f"File {file_path} was found in the database but it lacks `meta.content_type` field. Cannot continue." + ) + return None, None + + if file_path.startswith("gs://"): + try: + # Initialize the GCS client + storage_client = storage.Client() + + # Parse the GCS path + # The path should be in the format "gs://bucket-name/object-name" + if len(file_path.split("/", 3)) < 4: + raise ValueError( + f"Invalid GCS path: '{file_path}'. " + "Path must be in the format 'gs://bucket-name/object-name'." + ) + + bucket_name, blob_name = file_path.removeprefix("gs://").split("/", 1) + + # Get the bucket and blob (file object) + bucket = storage_client.bucket(bucket_name) + blob = bucket.blob(blob_name) + + # Download the file's content as bytes + print(f"Reading from GCS: {file_path}") + return blob.download_as_bytes(), content_type + except exceptions.NotFound: + print(f"Error: GCS object not found at {file_path}") + raise + except Exception as e: + print(f"An error occurred while reading from GCS: {e}") + raise + try: + async with aiofiles.open(file_path, "rb") as file: + file_data = await file.read() + return file_data, content_type + except FileNotFoundError: + log.exception(f"File {file_path} not found on disk.") + return None, content_type + except Exception: + log.exception(f"Error processing file {file_path}") + return None, content_type + + @staticmethod + def _remove_citation_markers(text: str, sources: list["Source"]) -> str: + original_text = text + processed: set[str] = set() + for source in sources: + supports = [ + metadata["supports"] + for metadata in source.get("metadata", []) + if "supports" in metadata + ] + supports = [item for sublist in supports for item in sublist] + for support in supports: + support = types.GroundingSupport(**support) + indices = support.grounding_chunk_indices + segment = support.segment + if not (indices and segment): + continue + segment_text = segment.text + if not segment_text: + continue + # Using a shortened version because user could edit the assistant message in the front-end. + # If citation segment get's edited, then the markers would not be removed. Shortening reduces the + # chances of this happening. + segment_end = segment_text[-32:] + if segment_end in processed: + continue + processed.add(segment_end) + citation_markers = "".join(f"[{index + 1}]" for index in indices) + # Find the position of the citation markers in the text + pos = text.find(segment_text + citation_markers) + if pos != -1: + # Remove the citation markers + text = ( + text[: pos + len(segment_text)] + + text[pos + len(segment_text) + len(citation_markers) :] + ) + trim = len(original_text) - len(text) + log.debug( + f"Citation removal finished. Returning text str that is {trim} character shorter than the original input." + ) + return text + + +class Pipe: + + @staticmethod + def _validate_coordinates_format(v: str | None) -> str | None: + """Reusable validator for 'latitude,longitude' format.""" + if v is not None and v != "": + try: + parts = v.split(",") + if len(parts) != 2: + raise ValueError( + "Must contain exactly two parts separated by a comma." + ) + + lat_str, lon_str = parts + lat = float(lat_str.strip()) + lon = float(lon_str.strip()) + + if not (-90 <= lat <= 90): + raise ValueError("Latitude must be between -90 and 90.") + if not (-180 <= lon <= 180): + raise ValueError("Longitude must be between -180 and 180.") + except (ValueError, TypeError) as e: + raise ValueError( + f"Invalid format for MAPS_GROUNDING_COORDINATES: '{v}'. " + f"Expected 'latitude,longitude' (e.g., '40.7128,-74.0060'). Original error: {e}" + ) + return v + + class Valves(BaseModel): + GEMINI_API_KEY: str | None = Field(default=None) + IMAGE_GEN_GEMINI_API_KEY: str | None = Field( + default=None, + description="""Optional separate API key for image generation models. + If not provided, the main GEMINI_API_KEY will be used. + An image generation model is identified by the Image Model Pattern regex below.""", + ) + USER_MUST_PROVIDE_AUTH_CONFIG: bool = Field( + default=False, + description="""Whether to require users (including admins) to provide their own authentication configuration. + User can provide these through UserValves. Setting this to True will disallow users from using Vertex AI. + Default value is False.""", + ) + AUTH_WHITELIST: str | None = Field( + default=None, + description="""Comma separated list of user emails that are allowed to bypassUSER_MUST_PROVIDE_AUTH_CONFIG and use the default authentication configuration. + Default value is None (no users are whitelisted).""", + ) + GEMINI_API_BASE_URL: str | None = Field( + default=None, + description="""The base URL for calling the Gemini API. + Default value is None.""", + ) + USE_VERTEX_AI: bool = Field( + default=False, + description="""Whether to use Google Cloud Vertex AI instead of the standard Gemini API. + If VERTEX_PROJECT is not set then the plugin will use the Gemini Developer API. + Default value is False. + Users can opt out of this by setting USE_VERTEX_AI to False in their UserValves.""", + ) + VERTEX_PROJECT: str | None = Field( + default=None, + description="""The Google Cloud project ID to use with Vertex AI. + Default value is None.""", + ) + VERTEX_LOCATION: str = Field( + default="global", + description="""The Google Cloud region to use with Vertex AI. + Default value is 'global'.""", + ) + MODEL_WHITELIST: str = Field( + default="*", + description="""Comma-separated list of allowed model names. + Supports `fnmatch` patterns: *, ?, [seq], [!seq]. + Default value is * (all models allowed).""", + ) + MODEL_BLACKLIST: str | None = Field( + default=None, + description="""Comma-separated list of blacklisted model names. + Supports `fnmatch` patterns: *, ?, [seq], [!seq]. + Default value is None (no blacklist).""", + ) + CACHE_MODELS: bool = Field( + default=True, + description="""Whether to request models only on first load and when white- or blacklist changes. + Default value is True.""", + ) + THINKING_BUDGET: int = Field( + default=8192, + ge=-1, + # The widest possible range is 0 (for Lite/Flash) to 32768 (for Pro). + # -1 is used for dynamic thinking budget. + # Model-specific constraints are detailed in the description. + le=32768, + description="""Specifies the token budget for the model's internal thinking process, + used for complex tasks like tool use. Applicable to Gemini 2.5 models. + Default value is 8192. If you want the model to control the thinking budget when using the API, set the thinking budget to -1. + + The valid token range depends on the specific model tier: + - **Pro models**: Must be a value between 128 and 32,768. + - **Flash and Lite models**: A value between 0 and 24,576. For these + models, a value of 0 disables the thinking feature. + + See for more details.""", + ) + SHOW_THINKING_SUMMARY: bool = Field( + default=True, + description="""Whether to show the thinking summary in the response. + This is only applicable for Gemini 2.5 models. + Default value is True.""", + ) + THINKING_MODEL_PATTERN: str = Field( + default=r"^(?=.*(?:gemini-2\.5|gemini-flash-latest|gemini-flash-lite-latest))(?!(.*live))(?!(.*image))", + description="""Regex pattern to identify thinking models. + Default value is r"^(?=.*(?:gemini-2\.5|gemini-flash-latest|gemini-flash-lite-latest))(?!(.*live))(?!(.*image))".""", + ) + IMAGE_MODEL_PATTERN: str = Field( + default=r"image", + description="""Regex pattern to identify image generation models. + Default value is r"image".""", + ) + # FIXME: remove + ENABLE_URL_CONTEXT_TOOL: bool = Field( + default=False, + description="""Enable the URL context tool to allow the model to fetch and use content from provided URLs. + This tool is only compatible with specific models. Default value is False.""", + ) + USE_FILES_API: bool = Field( + default=True, + description="""Whether to use the Google Files API for uploading files. + This provides caching and performance benefits, but can be disabled for privacy, cost, or compatibility reasons. + If disabled, files are sent as raw bytes in the request. + Default value is True.""", + ) + PARSE_YOUTUBE_URLS: bool = Field( + default=True, + description="""Whether to parse YouTube URLs from user messages and provide them as context to the model. + If disabled, YouTube links are treated as plain text. + This is only applicable for models that support video. + Default value is True.""", + ) + USE_ENTERPRISE_SEARCH: bool = Field( + default=False, + description="""Enable the Enterprise Search tool to allow the model to fetch and use content from provided URLs. """, + ) + MAPS_GROUNDING_COORDINATES: str | None = Field( + default=None, + description="""Optional latitude and longitude coordinates for location-aware results with Google Maps grounding. + Expected format: 'latitude,longitude' (e.g., '40.7128,-74.0060'). + Default value is None.""", + ) + HIDE_SUCCESSFUL_STATUS_MESSAGE: bool = Field( + default=False, + description="""Whether to hide the final 'Response finished' status message on success. + Error messages will always be shown. + Default value is False.""", + ) + LOG_LEVEL: Literal[ + "TRACE", "DEBUG", "INFO", "SUCCESS", "WARNING", "ERROR", "CRITICAL" + ] = Field( + default="INFO", + description="""Select logging level. Use `docker logs -f open-webui` to view logs. + Default value is INFO.""", + ) + + @field_validator("MAPS_GROUNDING_COORDINATES", mode="after") + @classmethod + def validate_coordinates_format(cls, v: str | None): + return Pipe._validate_coordinates_format(v) + + class UserValves(BaseModel): + """Defines user-specific settings that can override the default `Valves`. + + The `UserValves` class provides a mechanism for individual users to customize + their Gemini API settings for each request. This system is designed as a + practical workaround for backend/frontend limitations, enabling per-user + configurations. + + Think of the main `Valves` as the global, admin-configured template for the + plugin. `UserValves` acts as a user-provided "overlay" or "patch" that + is applied on top of that template at runtime. + + How it works: + 1. **Default Behavior:** At the start of a request, the system merges the + user's `UserValves` with the admin's `Valves`. If a field in + `UserValves` has a value (i.e., is not `None` or an empty string `""`), + it overrides the corresponding value from the main `Valves`. If a + field is `None` or `""`, the admin's default is used. + + 2. **Special Authentication Logic:** A critical exception exists to enforce + security and usage policies. If the admin sets `USER_MUST_PROVIDE_AUTH_CONFIG` + to `True` in the main `Valves`, the merging logic changes for any user + not on the `AUTH_WHITELIST`: + - The user's `GEMINI_API_KEY` is taken directly from their `UserValves`, + bypassing the admin's key entirely. + - The ability to use the admin-configured Vertex AI is disabled + (`USE_VERTEX_AI` is forced to `False`). + This ensures that when required, users must use their own credentials + and cannot fall back on the shared, system-level authentication. + + This two-tiered configuration allows administrators to set sensible defaults + and enforce policies, while still giving users the flexibility to tailor + certain parameters, like their API key or model settings, for their own use. + """ + + GEMINI_API_KEY: str | None = Field( + default=None, + description="""Gemini Developer API key. + Default value is None (uses the default from Valves, same goes for other options below).""", + ) + IMAGE_GEN_GEMINI_API_KEY: str | None = Field( + default=None, + description="""Optional separate API key for image generation models. + If not provided, the main GEMINI_API_KEY will be used.""", + ) + GEMINI_API_BASE_URL: str | None = Field( + default=None, + description="""The base URL for calling the Gemini API + Default value is None.""", + ) + USE_VERTEX_AI: bool | None | Literal[""] = Field( + default=None, + description="""Whether to use Google Cloud Vertex AI instead of the standard Gemini API. + Default value is None.""", + ) + VERTEX_PROJECT: str | None = Field( + default=None, + description="""The Google Cloud project ID to use with Vertex AI. + Default value is None.""", + ) + VERTEX_LOCATION: str | None = Field( + default=None, + description="""The Google Cloud region to use with Vertex AI. + Default value is None.""", + ) + THINKING_BUDGET: int | None | Literal[""] = Field( + default=None, + description="""Specifies the token budget for the model's internal thinking process, + used for complex tasks like tool use. Applicable to Gemini 2.5 models. + Default value is None. If you want the model to control the thinking budget when using the API, set the thinking budget to -1. + + The valid token range depends on the specific model tier: + - **Pro models**: Must be a value between 128 and 32,768. + - **Flash and Lite models**: A value between 0 and 24,576. For these + models, a value of 0 disables the thinking feature. + + See for more details.""", + ) + SHOW_THINKING_SUMMARY: bool | None | Literal[""] = Field( + default=None, + description="""Whether to show the thinking summary in the response. + This is only applicable for Gemini 2.5 models. + Default value is None.""", + ) + THINKING_MODEL_PATTERN: str | None = Field( + default=None, + description="""Regex pattern to identify thinking models. + Default value is None.""", + ) + ENABLE_URL_CONTEXT_TOOL: bool | None | Literal[""] = Field( + default=None, + description="""Enable the URL context tool to allow the model to fetch and use content from provided URLs. + This tool is only compatible with specific models. Default value is None.""", + ) + USE_FILES_API: bool | None | Literal[""] = Field( + default=None, + description="""Override the default setting for using the Google Files API. + Set to True to force use, False to disable. + Default is None (use the admin's setting).""", + ) + PARSE_YOUTUBE_URLS: bool | None | Literal[""] = Field( + default=None, + description="""Override the default setting for parsing YouTube URLs. + Set to True to enable, False to disable. + Default is None (use the admin's setting).""", + ) + MAPS_GROUNDING_COORDINATES: str | None | Literal[""] = Field( + default=None, + description="""Optional latitude and longitude coordinates for location-aware results with Google Maps grounding. + Overrides the admin setting. Expected format: 'latitude,longitude' (e.g., '40.7128,-74.0060'). + Default value is None.""", + ) + HIDE_SUCCESSFUL_STATUS_MESSAGE: bool | None | Literal[""] = Field( + default=None, + description="""Override the default setting for hiding the successful status message. + Set to True to hide, False to show. + Default is None (use the admin's setting).""", + ) + + @field_validator("THINKING_BUDGET", mode="after") + @classmethod + def validate_thinking_budget_range(cls, v): + if v is not None and v != "": + if not (-1 <= v <= 32768): + raise ValueError( + "THINKING_BUDGET must be between -1 and 32768, inclusive." + ) + return v + + @field_validator("MAPS_GROUNDING_COORDINATES", mode="after") + @classmethod + def validate_coordinates_format(cls, v: str | None): + return Pipe._validate_coordinates_format(v) + + def __init__(self): + self.valves = self.Valves() + self.file_content_cache = SimpleMemoryCache(serializer=NullSerializer()) + self.file_id_to_hash_cache = SimpleMemoryCache(serializer=NullSerializer()) + log.success("Function has been initialized.") + + async def pipes(self) -> list["ModelData"]: + """Register all available Google models.""" + self._add_log_handler(self.valves.LOG_LEVEL) + log.debug("pipes method has been called.") + + # Clear cache if caching is disabled + if not self.valves.CACHE_MODELS: + log.debug("CACHE_MODELS is False, clearing model cache.") + cache_instance = getattr(self._get_genai_models, "cache") + await cast(BaseCache, cache_instance).clear() + + log.info("Fetching and filtering models from Google API.") + # Get and filter models (potentially cached based on API key, base URL, white- and blacklist) + try: + client_args = self._prepare_client_args(self.valves) + client_args += [self.valves.MODEL_WHITELIST, self.valves.MODEL_BLACKLIST] + filtered_models = await self._get_genai_models(*client_args) + except GenaiApiError: + error_msg = "Error getting the models from Google API, check the logs." + return [self._return_error_model(error_msg, exception=True)] + + log.info(f"Returning {len(filtered_models)} models to Open WebUI.") + log.debug("Model list:", payload=filtered_models, _log_truncation_enabled=False) + log.debug("pipes method has finished.") + + return filtered_models + + async def pipe( + self, + body: "Body", + __user__: "UserData", + __request__: Request, + __event_emitter__: Callable[["Event"], Awaitable[None]] | None, + __metadata__: "Metadata", + ) -> AsyncGenerator[dict, None] | str: + + start_time = time.monotonic() + self._add_log_handler(self.valves.LOG_LEVEL) + + log.debug( + f"pipe method has been called. Gemini Manifold google_genai version is {VERSION}" + ) + log.trace("__metadata__:", payload=__metadata__) + features = __metadata__.get("features", {}) or {} + + # Check the version of the companion filter + self._check_companion_filter_version(features) + + # Apply settings from the user + valves: Pipe.Valves = self._get_merged_valves( + self.valves, __user__.get("valves"), __user__.get("email") + ) + + model_name = re.sub(r"^.*?[./]", "", body.get("model", "")) + is_image_model = self._is_image_model(model_name, valves.IMAGE_MODEL_PATTERN) + + if is_image_model and valves.IMAGE_GEN_GEMINI_API_KEY: + log.info("Using separate API key for image generation model.") + valves.GEMINI_API_KEY = valves.IMAGE_GEN_GEMINI_API_KEY + + # When using a separate key, assume it's for Gemini API, not Vertex AI + # TODO: check if it would work for Vertex AI as well + valves.USE_VERTEX_AI = False + valves.VERTEX_PROJECT = None + + log.debug( + f"USE_VERTEX_AI: {valves.USE_VERTEX_AI}, VERTEX_PROJECT set: {bool(valves.VERTEX_PROJECT)}, API_KEY set: {bool(valves.GEMINI_API_KEY)}" + ) + + log.debug( + f"Getting genai client (potentially cached) for user {__user__['email']}." + ) + client = self._get_user_client(valves, __user__["email"]) + __metadata__["is_vertex_ai"] = client.vertexai + + if __metadata__.get("task"): + log.info(f'{__metadata__["task"]=}, disabling event emissions.') # type: ignore + # Task model is not user facing, so we should not emit any events. + __event_emitter__ = None + + event_emitter = EventEmitter( + __event_emitter__, + hide_successful_status=valves.HIDE_SUCCESSFUL_STATUS_MESSAGE, + ) + + files_api_manager = FilesAPIManager( + client=client, + file_cache=self.file_content_cache, + id_hash_cache=self.file_id_to_hash_cache, + event_emitter=event_emitter, + ) + + # Check if user is chatting with an error model for some reason. + if "error" in __metadata__["model"]["id"]: + error_msg = f"There has been an error during model retrival phase: {str(__metadata__['model'])}" + raise ValueError(error_msg) + + # NOTE: will be "local" if Temporary Chat is enabled. + chat_id = __metadata__.get("chat_id", "not_provided") + message_id = __metadata__.get("message_id", "not_provided") + + log.info( + "Converting Open WebUI's `body` dict into list of `Content` objects that `google-genai` understands." + ) + + builder = GeminiContentBuilder( + messages_body=body.get("messages"), + metadata_body=__metadata__, + user_data=__user__, + event_emitter=event_emitter, + valves=valves, + files_api_manager=files_api_manager, + ) + # This is our first timed event, marking the start of payload preparation. + asyncio.create_task(event_emitter.emit_status("Preparing request...")) + contents = await builder.build_contents(start_time=start_time) + + gen_content_conf = self._build_gen_content_config(body, __metadata__, valves) + gen_content_conf.system_instruction = builder.system_prompt + + # Some models (e.g., image generation, Gemma) do not support the system prompt message. + system_prompt_unsupported = is_image_model or "gemma" in model_name + if system_prompt_unsupported: + # TODO: append to user message instead. + if gen_content_conf.system_instruction: + gen_content_conf.system_instruction = None + log.warning( + f"Model '{model_name}' does not support the system prompt message! Removing the system prompt." + ) + + gen_content_args = { + "model": model_name, + "contents": contents, + "config": gen_content_conf, + } + log.debug("Passing these args to the Google API:", payload=gen_content_args) + + # Both streaming and non-streaming responses are now handled by the same + # unified processor, which returns an AsyncGenerator. For non-streaming, + # we adapt the single response object into a one-item async generator. + + elapsed_time = time.monotonic() - start_time + time_str = f"(+{elapsed_time:.2f}s)" + + # Determine the request type to provide a more informative status message. + is_streaming = features.get("stream", True) + request_type_str = "streaming" if is_streaming else "non-streaming" + + # Emit a status update with timing before making the actual API call. + asyncio.create_task(event_emitter.emit_status(f"Sending {request_type_str} request to Google API... {time_str}")) + + if is_streaming: + # Streaming response + response_stream: AsyncIterator[types.GenerateContentResponse] = ( + await client.aio.models.generate_content_stream(**gen_content_args) # type: ignore + ) + + log.info( + "Streaming enabled. Returning AsyncGenerator from unified processor." + ) + log.debug("pipe method has finished.") + return self._unified_response_processor( + response_stream, + __request__, + model_name, + event_emitter, + __user__["id"], + chat_id, + message_id, + start_time=start_time, + ) + else: + # Non-streaming response. + res = await client.aio.models.generate_content(**gen_content_args) + + # Adapter: Create a simple, one-shot async generator that yields the + # single response object, making it behave like a stream. + async def single_item_stream( + response: types.GenerateContentResponse, + ) -> AsyncGenerator[types.GenerateContentResponse, None]: + yield response + + log.info( + "Streaming disabled. Adapting full response and returning " + "AsyncGenerator from unified processor." + ) + log.debug("pipe method has finished.") + return self._unified_response_processor( + single_item_stream(res), + __request__, + model_name, + event_emitter, + __user__["id"], + chat_id, + message_id, + start_time=start_time, + ) + + # region 2. Helper methods inside the Pipe class + + # region 2.1 Client initialization + @staticmethod + @cache + def _get_or_create_genai_client( + api_key: str | None = None, + base_url: str | None = None, + use_vertex_ai: bool | None = None, + vertex_project: str | None = None, + vertex_location: str | None = None, + ) -> genai.Client: + """ + Creates a genai.Client instance or retrieves it from cache. + Raises GenaiApiError on failure. + """ + + if not vertex_project and not api_key: + # FIXME: More detailed reason in the exception (tell user to set the API key). + msg = "Neither VERTEX_PROJECT nor GEMINI_API_KEY is set." + raise GenaiApiError(msg) + + if use_vertex_ai and vertex_project: + kwargs = { + "vertexai": True, + "project": vertex_project, + "location": vertex_location, + } + api = "Vertex AI" + else: # Covers (use_vertex_ai and not vertex_project) OR (not use_vertex_ai) + if use_vertex_ai and not vertex_project: + log.warning( + "Vertex AI is enabled but no project is set. " + "Using Gemini Developer API." + ) + # This also implicitly covers the case where api_key might be None, + # which is handled by the initial check or the SDK. + kwargs = { + "api_key": api_key, + "http_options": types.HttpOptions(base_url=base_url), + } + api = "Gemini Developer API" + + try: + client = genai.Client(**kwargs) + log.success(f"{api} Genai client successfully initialized.") + return client + except Exception as e: + raise GenaiApiError(f"{api} Genai client initialization failed: {e}") from e + + def _get_user_client(self, valves: "Pipe.Valves", user_email: str) -> genai.Client: + user_whitelist = ( + valves.AUTH_WHITELIST.split(",") if valves.AUTH_WHITELIST else [] + ) + log.debug( + f"User whitelist: {user_whitelist}, user email: {user_email}, " + f"USER_MUST_PROVIDE_AUTH_CONFIG: {valves.USER_MUST_PROVIDE_AUTH_CONFIG}" + ) + if valves.USER_MUST_PROVIDE_AUTH_CONFIG and user_email not in user_whitelist: + if not valves.GEMINI_API_KEY: + error_msg = ( + "User must provide their own authentication configuration. " + "Please set GEMINI_API_KEY in your UserValves." + ) + raise ValueError(error_msg) + try: + client_args = self._prepare_client_args(valves) + client = self._get_or_create_genai_client(*client_args) + except GenaiApiError as e: + error_msg = f"Failed to initialize genai client for user {user_email}: {e}" + # FIXME: include correct traceback. + raise ValueError(error_msg) from e + return client + + @staticmethod + def _prepare_client_args( + source_valves: "Pipe.Valves | Pipe.UserValves", + ) -> list[str | bool | None]: + """Prepares arguments for _get_or_create_genai_client from source_valves.""" + ATTRS = [ + "GEMINI_API_KEY", + "GEMINI_API_BASE_URL", + "USE_VERTEX_AI", + "VERTEX_PROJECT", + "VERTEX_LOCATION", + ] + return [getattr(source_valves, attr) for attr in ATTRS] + + # endregion 2.1 Client initialization + + # region 2.2 Model retrival from Google API + @cached() # aiocache.cached for async method + async def _get_genai_models( + self, + api_key: str | None, + base_url: str | None, + use_vertex_ai: bool | None, # User's preference from config + vertex_project: str | None, + vertex_location: str | None, + whitelist_str: str, + blacklist_str: str | None, + ) -> list["ModelData"]: + """ + Gets valid Google models from API(s) and filters them. + If use_vertex_ai, vertex_project, and api_key are all provided, + models are fetched from both Vertex AI and Gemini Developer API and merged. + """ + all_raw_models: list[types.Model] = [] + + # Condition for fetching from both sources + fetch_both = bool(use_vertex_ai and vertex_project and api_key) + + if fetch_both: + log.info( + "Attempting to fetch models from both Gemini Developer API and Vertex AI." + ) + gemini_models_list: list[types.Model] = [] + vertex_models_list: list[types.Model] = [] + + # TODO: perf, consider parallelizing these two fetches + # 1. Fetch from Gemini Developer API + try: + gemini_client = self._get_or_create_genai_client( + api_key=api_key, + base_url=base_url, + use_vertex_ai=False, # Explicitly target Gemini API + vertex_project=None, + vertex_location=None, + ) + gemini_models_list = await self._fetch_models_from_client_internal( + gemini_client, "Gemini Developer API" + ) + except GenaiApiError as e: + log.warning( + f"Failed to initialize or retrieve models from Gemini Developer API: {e}" + ) + except Exception as e: + log.warning( + f"An unexpected error occurred with Gemini Developer API models: {e}", + exc_info=True, + ) + + # 2. Fetch from Vertex AI + try: + vertex_client = self._get_or_create_genai_client( + use_vertex_ai=True, # Explicitly target Vertex AI + vertex_project=vertex_project, + vertex_location=vertex_location, + api_key=None, # API key is not used for Vertex AI with project auth + base_url=base_url, # Pass base_url for potential Vertex custom endpoints + ) + vertex_models_list = await self._fetch_models_from_client_internal( + vertex_client, "Vertex AI" + ) + except GenaiApiError as e: + log.warning( + f"Failed to initialize or retrieve models from Vertex AI: {e}" + ) + except Exception as e: + log.warning( + f"An unexpected error occurred with Vertex AI models: {e}", + exc_info=True, + ) + + # 3. Combine and de-duplicate + # Prioritize models from Gemini Developer API in case of ID collision + combined_models_dict: dict[str, types.Model] = {} + + for model in gemini_models_list: + if model.name: + model_id = Pipe.strip_prefix(model.name) + if model_id and model_id not in combined_models_dict: + combined_models_dict[model_id] = model + else: + log.trace( + f"Gemini model without a name encountered: {model.display_name or 'N/A'}" + ) + + for model in vertex_models_list: + if model.name: + model_id = Pipe.strip_prefix(model.name) + if model_id: + if model_id not in combined_models_dict: + combined_models_dict[model_id] = model + else: + log.info( + f"Duplicate model ID '{model_id}' from Vertex AI already sourced from Gemini API. Keeping Gemini API version." + ) + else: + log.trace( + f"Vertex AI model without a name encountered: {model.display_name or 'N/A'}" + ) + + all_raw_models = list(combined_models_dict.values()) + + log.info( + f"Fetched {len(gemini_models_list)} models from Gemini API, " + f"{len(vertex_models_list)} from Vertex AI. " + f"Combined to {len(all_raw_models)} unique models." + ) + + if not all_raw_models and (gemini_models_list or vertex_models_list): + log.warning( + "Models were fetched but resulted in an empty list after de-duplication, possibly due to missing names or empty/duplicate IDs." + ) + + if not all_raw_models and not gemini_models_list and not vertex_models_list: + raise GenaiApiError( + "Failed to retrieve models: Both Gemini Developer API and Vertex AI attempts yielded no models." + ) + + else: # Single source logic + # Determine if we are effectively using Vertex AI or Gemini API + # This depends on user's config (use_vertex_ai) and availability of project/key + client_target_is_vertex = bool(use_vertex_ai and vertex_project) + client_source_name = ( + "Vertex AI" if client_target_is_vertex else "Gemini Developer API" + ) + log.info( + f"Attempting to fetch models from a single source: {client_source_name}." + ) + + try: + client = self._get_or_create_genai_client( + api_key=api_key, + base_url=base_url, + use_vertex_ai=client_target_is_vertex, # Pass the determined target + vertex_project=vertex_project if client_target_is_vertex else None, + vertex_location=( + vertex_location if client_target_is_vertex else None + ), + ) + all_raw_models = await self._fetch_models_from_client_internal( + client, client_source_name + ) + + if not all_raw_models: + raise GenaiApiError( + f"No models retrieved from {client_source_name}. This could be due to an API error, network issue, or no models being available." + ) + + except GenaiApiError as e: + raise GenaiApiError( + f"Failed to get models from {client_source_name}: {e}" + ) from e + except Exception as e: + log.error( + f"An unexpected error occurred while configuring client or fetching models from {client_source_name}: {e}", + exc_info=True, + ) + raise GenaiApiError( + f"An unexpected error occurred while retrieving models from {client_source_name}: {e}" + ) from e + + # --- Common processing for all_raw_models --- + + if not all_raw_models: + log.warning("No models available after attempting all configured sources.") + return [] + + log.info(f"Processing {len(all_raw_models)} unique raw models.") + + generative_models: list[types.Model] = [] + for model in all_raw_models: + if model.name is None: + log.trace( + f"Skipping model with no name during generative filter: {model.display_name or 'N/A'}" + ) + continue + actions = model.supported_actions + if ( + actions is None or "generateContent" in actions + ): # Includes models if actions is None (e.g., Vertex) + generative_models.append(model) + else: + log.trace( + f"Model '{model.name}' (ID: {Pipe.strip_prefix(model.name)}) skipped, not generative (actions: {actions})." + ) + + if not generative_models: + log.warning( + "No generative models found after filtering all retrieved models." + ) + return [] + + def match_patterns( + name_to_check: str, list_of_patterns_str: str | None + ) -> bool: + if not list_of_patterns_str: + return False + patterns = [ + pat for pat in list_of_patterns_str.replace(" ", "").split(",") if pat + ] # Ensure pat is not empty + return any(fnmatch.fnmatch(name_to_check, pat) for pat in patterns) + + filtered_models_data: list["ModelData"] = [] + for model in generative_models: + # model.name is guaranteed non-None by generative_models filter logic + stripped_name = Pipe.strip_prefix(model.name) # type: ignore + + if not stripped_name: + log.warning( + f"Model '{model.name}' (display: {model.display_name}) resulted in an empty ID after stripping. Skipping." + ) + continue + + passes_whitelist = not whitelist_str or match_patterns( + stripped_name, whitelist_str + ) + passes_blacklist = not blacklist_str or not match_patterns( + stripped_name, blacklist_str + ) + + if passes_whitelist and passes_blacklist: + filtered_models_data.append( + { + "id": stripped_name, + "name": model.display_name or stripped_name, + "description": model.description, + } + ) + else: + log.trace( + f"Model ID '{stripped_name}' filtered out by whitelist/blacklist. Whitelist match: {passes_whitelist}, Blacklist pass: {passes_blacklist}" + ) + + log.info( + f"Filtered {len(generative_models)} generative models down to {len(filtered_models_data)} models based on white/blacklists." + ) + return filtered_models_data + + # TODO: Use cache for this method too? + async def _fetch_models_from_client_internal( + self, client: genai.Client, source_name: str + ) -> list[types.Model]: + """Helper to fetch models from a given client and handle common exceptions.""" + try: + google_models_pager = await client.aio.models.list( + config={"query_base": True} # Fetch base models by default + ) + models = [model async for model in google_models_pager] + log.info(f"Retrieved {len(models)} models from {source_name}.") + log.trace( + f"All models returned by {source_name}:", payload=models + ) # Can be verbose + return models + except Exception as e: + log.error(f"Retrieving models from {source_name} failed: {e}") + # Return empty list; caller decides if this is fatal for the whole operation. + return [] + + @staticmethod + def _return_error_model( + error_msg: str, warning: bool = False, exception: bool = True + ) -> "ModelData": + """Returns a placeholder model for communicating error inside the pipes method to the front-end.""" + if warning: + log.opt(depth=1, exception=False).warning(error_msg) + else: + log.opt(depth=1, exception=exception).error(error_msg) + return { + "id": "error", + "name": "[gemini_manifold] " + error_msg, + "description": error_msg, + } + + @staticmethod + def strip_prefix(model_name: str) -> str: + """ + Extract the model identifier using regex, handling various naming conventions. + e.g., "gemini_manifold_google_genai.gemini-2.5-flash-preview-04-17" -> "gemini-2.5-flash-preview-04-17" + e.g., "models/gemini-1.5-flash-001" -> "gemini-1.5-flash-001" + e.g., "publishers/google/models/gemini-1.5-pro" -> "gemini-1.5-pro" + """ + # Use regex to remove everything up to and including the last '/' or the first '.' + stripped = re.sub(r"^(?:.*/|[^.]*\.)", "", model_name) + return stripped + + @staticmethod + def _is_image_model(model_name: str, pattern: str) -> bool: + return bool(re.search(pattern, model_name, re.IGNORECASE)) + + # endregion 2.2 Model retrival from Google API + + # region 2.3 GenerateContentConfig assembly + + def _build_gen_content_config( + self, + body: "Body", + __metadata__: "Metadata", + valves: "Valves", + ) -> types.GenerateContentConfig: + """Assembles the GenerateContentConfig for a Gemini API request.""" + model_name = re.sub(r"^.*?[./]", "", body.get("model", "")) + features = __metadata__.get("features", {}) or {} + is_vertex_ai = __metadata__.get("is_vertex_ai", False) + + log.debug( + "Features extracted from metadata (UI toggles and config):", + payload=features + ) + + safety_settings: list[types.SafetySetting] | None = __metadata__.get( + "safety_settings" + ) + + thinking_conf = None + # Use the user-configurable regex to determine if this is a thinking model. + is_thinking_model = re.search( + valves.THINKING_MODEL_PATTERN, model_name, re.IGNORECASE + ) + log.debug( + f"Model '{model_name}' is classified as a reasoning model: {bool(is_thinking_model)}. " + f"Pattern: '{valves.THINKING_MODEL_PATTERN}'" + ) + + if is_thinking_model: + # Start with the default thinking configuration from valves. + log.info(f"Setting `thinking_budget` to {valves.THINKING_BUDGET} and `include_thoughts` to {valves.SHOW_THINKING_SUMMARY}.") + thinking_conf = types.ThinkingConfig( + thinking_budget=valves.THINKING_BUDGET, + include_thoughts=valves.SHOW_THINKING_SUMMARY, + ) + + # Check if reasoning can be disabled. This happens if the toggle is available but turned OFF by the user. + is_avail, is_on = self._get_toggleable_feature_status( + "gemini_reasoning_toggle", __metadata__ + ) + if is_avail and not is_on: + # This toggle is only applicable to flash/lite models, which support a budget of 0. + is_reasoning_toggleable = "flash" in model_name or "lite" in model_name + if is_reasoning_toggleable: + log.info( + f"Model '{model_name}' supports disabling reasoning, and it is toggled OFF in the UI. " + "Overwriting `thinking_budget` to 0 to disable reasoning." + ) + thinking_conf.thinking_budget = 0 + + # TODO: Take defaults from the general front-end config. + # system_instruction is intentionally left unset here. It will be set by the caller. + gen_content_conf = types.GenerateContentConfig( + temperature=body.get("temperature"), + top_p=body.get("top_p"), + top_k=body.get("top_k"), + max_output_tokens=body.get("max_tokens"), + stop_sequences=body.get("stop"), + safety_settings=safety_settings, + thinking_config=thinking_conf, + ) + + gen_content_conf.response_modalities = ["TEXT"] + if self._is_image_model(model_name, valves.IMAGE_MODEL_PATTERN): + gen_content_conf.response_modalities.append("IMAGE") + + gen_content_conf.tools = [] + + if features.get("google_search_tool"): + if valves.USE_ENTERPRISE_SEARCH and is_vertex_ai: + log.info("Using grounding with Enterprise Web Search as a Tool.") + gen_content_conf.tools.append( + types.Tool(enterprise_web_search=types.EnterpriseWebSearch()) + ) + else: + log.info("Using grounding with Google Search as a Tool.") + gen_content_conf.tools.append( + types.Tool(google_search=types.GoogleSearch()) + ) + elif features.get("google_search_retrieval"): + log.info("Using grounding with Google Search Retrieval.") + gs = types.GoogleSearchRetrieval( + dynamic_retrieval_config=types.DynamicRetrievalConfig( + dynamic_threshold=features.get("google_search_retrieval_threshold") + ) + ) + gen_content_conf.tools.append(types.Tool(google_search_retrieval=gs)) + + # NB: It is not possible to use both Search and Code execution at the same time, + # however, it can be changed later, so let's just handle it as a common error + if features.get("google_code_execution"): + log.info("Using code execution on Google side.") + gen_content_conf.tools.append( + types.Tool(code_execution=types.ToolCodeExecution()) + ) + + # Determine if URL context tool should be enabled. + is_avail, is_on = self._get_toggleable_feature_status( + "gemini_url_context_toggle", __metadata__ + ) + enable_url_context = valves.ENABLE_URL_CONTEXT_TOOL # Start with valve default. + if is_avail: + # If the toggle filter is configured, it overrides the valve setting. + enable_url_context = is_on + + if enable_url_context: + if model_name in COMPATIBLE_MODELS_FOR_URL_CONTEXT: + if is_vertex_ai and (len(gen_content_conf.tools) > 0): + log.warning( + "URL context tool is enabled, but Vertex AI is used with other tools. Skipping." + ) + else: + log.info( + f"Model {model_name} is compatible with URL context tool. Enabling." + ) + gen_content_conf.tools.append( + types.Tool(url_context=types.UrlContext()) + ) + else: + log.warning( + f"URL context tool is enabled, but model {model_name} is not in the compatible list. Skipping." + ) + + # Determine if Google Maps grounding should be enabled. + is_avail, is_on = self._get_toggleable_feature_status( + "gemini_maps_grounding_toggle", __metadata__ + ) + if is_avail and is_on: + log.info("Enabling Google Maps grounding tool.") + gen_content_conf.tools.append( + types.Tool(google_maps=types.GoogleMaps()) + ) + + if valves.MAPS_GROUNDING_COORDINATES: + try: + lat_str, lon_str = valves.MAPS_GROUNDING_COORDINATES.split(",") + latitude = float(lat_str.strip()) + longitude = float(lon_str.strip()) + + log.info( + "Using coordinates for Maps grounding: " + f"lat={latitude}, lon={longitude}" + ) + + lat_lng = types.LatLng(latitude=latitude, longitude=longitude) + + # Ensure tool_config and retrieval_config exist before assigning lat_lng. + if not gen_content_conf.tool_config: + gen_content_conf.tool_config = types.ToolConfig() + if not gen_content_conf.tool_config.retrieval_config: + gen_content_conf.tool_config.retrieval_config = ( + types.RetrievalConfig() + ) + + gen_content_conf.tool_config.retrieval_config.lat_lng = lat_lng + + except (ValueError, TypeError) as e: + # This should not happen due to the Pydantic validator, but it's good practice to be safe. + log.error( + "Failed to parse MAPS_GROUNDING_COORDINATES: " + f"'{valves.MAPS_GROUNDING_COORDINATES}'. Error: {e}" + ) + + return gen_content_conf + + # endregion 2.3 GenerateContentConfig assembly + + # region 2.4 Model response processing + async def _unified_response_processor( + self, + response_stream: AsyncIterator[types.GenerateContentResponse], + __request__: Request, + model: str, + event_emitter: EventEmitter, + user_id: str, + chat_id: str, + message_id: str, + start_time: float, + ) -> AsyncGenerator[dict, None]: + """ + Processes an async iterator of GenerateContentResponse objects, yielding + structured dictionary chunks for the Open WebUI frontend. + + This single method handles both streaming and non-streaming (via an adapter) + responses, eliminating code duplication. It processes all parts within each + response chunk, counts tag substitutions for a final toast notification, + and handles post-processing in a finally block. + """ + final_response_chunk: types.GenerateContentResponse | None = None + error_occurred = False + total_substitutions = 0 + first_chunk_received = False + chunk_counter = 0 + + try: + async for chunk in response_stream: + log.trace(f"Processing response chunk #{chunk_counter}:", payload=chunk) + chunk_counter += 1 + final_response_chunk = chunk # Keep the latest chunk for metadata + + if not first_chunk_received: + # This is the first (and possibly only) chunk. + elapsed_time = time.monotonic() - start_time + time_str = f"(+{elapsed_time:.2f}s)" + asyncio.create_task( + event_emitter.emit_status( + f"Response received {time_str}", + done=True, + ) + ) + first_chunk_received = True + + if not (parts := chunk.parts): + log.warning("Chunk has no parts, skipping.") + continue + + # This inner loop makes the method robust. It handles a single chunk + # with many parts (non-streaming) or many chunks with one part (streaming). + for part in parts: + payload, count = await self._process_part( + part, + __request__, + model, + user_id, + chat_id, + message_id, + is_stream=True, # We always yield chunks, so this is effectively true + ) + + if payload: + if count > 0: + total_substitutions += count + log.debug(f"Disabled {count} special tag(s) in a part.") + + structured_chunk = {"choices": [{"delta": payload}]} + yield structured_chunk + + except Exception as e: + error_occurred = True + error_msg = f"Response processing ended with error: {e}" + log.exception(error_msg) + await event_emitter.emit_error(error_msg) + + finally: + # The async for loop has completed, meaning we have received all data + # from the API. Now, we perform final internal processing. + + if total_substitutions > 0 and not error_occurred: + plural_s = "s" if total_substitutions > 1 else "" + toast_msg = ( + f"For clarity, {total_substitutions} special tag{plural_s} " + "were disabled in the response by injecting a zero-width space (ZWS)." + ) + event_emitter.emit_toast(toast_msg, "info") + + if not error_occurred: + yield "data: [DONE]" + log.info("Response processing finished successfully!") + + try: + await self._do_post_processing( + final_response_chunk, + event_emitter, + __request__, + chat_id=chat_id, + message_id=message_id, + stream_error_happened=error_occurred, + start_time=start_time, + ) + except Exception as e: + error_msg = f"Post-processing failed with error:\n\n{e}" + event_emitter.emit_toast(error_msg, "error") + log.exception(error_msg) + + log.debug("Unified response processor has finished.") + + async def _process_part( + self, + part: types.Part, + __request__: Request, + model: str, + user_id: str, + chat_id: str, + message_id: str, + is_stream: bool, + ) -> tuple[dict | None, int]: + """ + Processes a single `types.Part` object and returns a payload dictionary + for the Open WebUI stream, along with a count of tag substitutions. + """ + # Initialize variables to ensure they always have a defined state. + payload: dict[str, str] | None = None + count: int = 0 + key: str = "content" + + match part: + case types.Part(text=str(text), thought=True): + # It's a thought, so we'll use the "reasoning" key. + key = "reasoning" + sanitized_text, count = self._disable_special_tags(text) + + # For non-streaming responses, wrap the thought/reasoning block + # in details block manually for nice front-end rendering. + if not is_stream: + sanitized_text = f'\n
\nThinking...\n{sanitized_text}\n
\n' + + payload = {key: sanitized_text} + case types.Part(text=str(text)): + # It's regular content, using the default "content" key. + sanitized_text, count = self._disable_special_tags(text) + payload = {key: sanitized_text} + case types.Part(inline_data=data) if data: + # Image parts don't need tag disabling. + processed_text = await self._process_image_part( + data, model, user_id, chat_id, message_id, __request__ + ) + payload = {"content": processed_text} + case types.Part(executable_code=code) if code: + # Code blocks are already formatted and safe. + if processed_text := self._process_executable_code_part(code): + payload = {"content": processed_text} + case types.Part(code_execution_result=result) if result: + # Code results are also safe. + if processed_text := self._process_code_execution_result_part(result): + payload = {"content": processed_text} + + return payload, count + + @staticmethod + def _disable_special_tags(text: str) -> tuple[str, int]: + """ + Finds special tags in a text chunk and inserts a Zero-Width Space (ZWS) + to prevent them from being parsed by the Open WebUI backend's legacy system. + This is a safeguard against accidental tag generation by the model. + """ + if not text: + return "", 0 + + # The regex finds '<' followed by an optional '/' and then one of the special tags. + # The inner parentheses group the tags, so the optional '/' applies to all of them. + TAG_REGEX = re.compile( + r"<(/?" + + "(" + + "|".join(re.escape(tag) for tag in SPECIAL_TAGS_TO_DISABLE) + + ")" + + r")" + ) + # The substitution injects a ZWS, e.g., '' becomes ' str: + """ + Handles image data by saving it to the Open WebUI backend and returning a markdown link. + """ + mime_type = inline_data.mime_type + image_data = inline_data.data + + if mime_type and image_data: + image_url = await self._upload_image( + image_data=image_data, + mime_type=mime_type, + model=model, + user_id=user_id, + chat_id=chat_id, + message_id=message_id, + __request__=request, + ) + else: + log.warning( + "Image part has no mime_type or data, cannot upload image. " + "Returning a placeholder message." + ) + image_url = None + + return ( + f"![Generated Image]({image_url})" + if image_url + else "*An error occurred while trying to store this model generated image.*" + ) + + async def _upload_image( + self, + image_data: bytes, + mime_type: str, + model: str, + user_id: str, + chat_id: str, + message_id: str, + __request__: Request, + ) -> str | None: + """ + Helper method that uploads a generated image to the configured Open WebUI storage provider. + Returns the url to the uploaded image. + """ + image_format = mimetypes.guess_extension(mime_type) or ".png" + id = str(uuid.uuid4()) + name = f"generated-image{image_format}" + + # The final filename includes the unique ID to prevent collisions. + imagename = f"{id}_{name}" + image = io.BytesIO(image_data) + + # Create a clean, precise metadata object linking to the generation context. + image_metadata = { + "model": model, + "chat_id": chat_id, + "message_id": message_id, + } + + log.info("Uploading the model-generated image to the Open WebUI backend.") + + try: + contents, image_path = await asyncio.to_thread( + Storage.upload_file, image, imagename, tags={} + ) + except Exception: + log.exception("Error occurred during upload to the storage provider.") + return None + + log.debug("Adding the image file to the Open WebUI files database.") + file_item = await asyncio.to_thread( + Files.insert_new_file, + user_id, + FileForm( + id=id, + filename=name, + path=image_path, + meta={ + "name": name, + "content_type": mime_type, + "size": len(contents), + "data": image_metadata, + }, + ), + ) + if not file_item: + log.warning("Image upload to Open WebUI database likely failed.") + return None + + image_url: str = __request__.app.url_path_for( + "get_file_content_by_id", id=file_item.id + ) + log.success("Image upload finished!") + return image_url + + def _process_executable_code_part( + self, executable_code_part: types.ExecutableCode | None + ) -> str | None: + """ + Processes an executable code part and returns the formatted string representation. + """ + + if not executable_code_part: + return None + + lang_name = "python" # Default language + if executable_code_part_lang_enum := executable_code_part.language: + if lang_name := executable_code_part_lang_enum.name: + lang_name = executable_code_part_lang_enum.name.lower() + else: + log.warning( + f"Could not extract language name from {executable_code_part_lang_enum}. Default to python." + ) + else: + log.warning("Language Enum is None, defaulting to python.") + + if executable_code_part_code := executable_code_part.code: + return f"```{lang_name}\n{executable_code_part_code.rstrip()}\n```\n\n" + return "" + + def _process_code_execution_result_part( + self, code_execution_result_part: types.CodeExecutionResult | None + ) -> str | None: + """ + Processes a code execution result part and returns the formatted string representation. + """ + + if not code_execution_result_part: + return None + + if code_execution_result_part_output := code_execution_result_part.output: + return f"**Output:**\n\n```\n{code_execution_result_part_output.rstrip()}\n```\n\n" + else: + return None + + # endregion 2.4 Model response processing + + # region 2.5 Post-processing + async def _do_post_processing( + self, + model_response: types.GenerateContentResponse | None, + event_emitter: EventEmitter, + request: Request, + chat_id: str, + message_id: str, + *, + stream_error_happened: bool = False, + start_time: float, + ): + """Handles emitting usage, grounding, and sources after the main response/stream is done.""" + log.info("Post-processing the model response.") + + elapsed_time = time.monotonic() - start_time + time_str = f"(+{elapsed_time:.2f}s)" + + if stream_error_happened: + log.warning("Response processing failed due to stream error.") + await event_emitter.emit_status( + f"Response failed [Stream Error] {time_str}", done=True + ) + return + + if not model_response: + log.warning("Response processing skipped: Model response was empty.") + await event_emitter.emit_status( + f"Response failed [Empty Response] {time_str}", done=True + ) + return + + if not (candidate := self._get_first_candidate(model_response.candidates)): + log.warning("Response processing skipped: No candidates found.") + await event_emitter.emit_status( + f"Response failed [No Candidates] {time_str}", done=True + ) + return + + # --- Construct detailed finish reason message --- + reason_name = getattr(candidate.finish_reason, "name", "UNSPECIFIED") + reason_description = FINISH_REASON_DESCRIPTIONS.get(reason_name) + finish_message = ( + candidate.finish_message.strip() if candidate.finish_message else None + ) + + details_parts = [part for part in (reason_description, finish_message) if part] + details_str = f": {' '.join(details_parts)}" if details_parts else "" + full_finish_details = f"[{reason_name}]{details_str}" + + # --- Determine final status and emit toast for errors --- + is_normal_finish = candidate.finish_reason in NORMAL_REASONS + + if is_normal_finish: + log.debug(f"Response finished normally. {full_finish_details}") + status_prefix = "Response finished" + else: + log.error(f"Response finished with an error. {full_finish_details}") + status_prefix = "Response failed" + event_emitter.emit_toast( + f"An error occurred. {full_finish_details}", + "error", + ) + + # For the most common success case (STOP), we don't need to show the reason. + final_reason_str = "" if reason_name == "STOP" else f" [{reason_name}]" + await event_emitter.emit_status( + f"{status_prefix}{final_reason_str} {time_str}", + done=True, + is_successful_finish=is_normal_finish, + ) + + # TODO: Emit a toast message if url context retrieval was not successful. + + # --- Emit usage and grounding data --- + # Attempt to emit token usage data even if the finish reason was problematic, + # as usage data might still be available. + if usage_data := self._get_usage_data(model_response): + # Inject the total processing time into the usage payload. + usage_data["completion_time"] = round(elapsed_time, 2) + await event_emitter.emit_usage(usage_data) + + self._add_grounding_data_to_state( + model_response, request, chat_id, message_id, start_time + ) + + def _add_grounding_data_to_state( + self, + response: types.GenerateContentResponse, + request: Request, + chat_id: str, + message_id: str, + pipe_start_time: float, + ): + candidate = self._get_first_candidate(response.candidates) + grounding_metadata_obj = candidate.grounding_metadata if candidate else None + + app_state: State = request.app.state + grounding_key = f"grounding_{chat_id}_{message_id}" + time_key = f"pipe_start_time_{chat_id}_{message_id}" + + if grounding_metadata_obj: + log.debug( + f"Found grounding metadata. Storing in request's app state using key {grounding_key}." + ) + # Using shared `request.app.state` to pass data to Filter.outlet. + # This is necessary because the Pipe and Filter operate on different requests. + app_state._state[grounding_key] = grounding_metadata_obj + app_state._state[time_key] = pipe_start_time + else: + log.debug(f"Response {message_id} does not have grounding metadata.") + + @staticmethod + def _get_usage_data( + response: types.GenerateContentResponse, + ) -> dict[str, Any] | None: + """ + Extracts and cleans usage data from a GenerateContentResponse object. + Returns None if usage metadata is not present. + """ + if not response.usage_metadata: + log.warning( + "Usage metadata is missing from the response. Cannot determine usage." + ) + return None + + usage_data = response.usage_metadata.model_dump() + + # 1. Rename the three core required fields. + usage_data["prompt_tokens"] = usage_data.pop("prompt_token_count") + usage_data["completion_tokens"] = usage_data.pop("candidates_token_count") + usage_data["total_tokens"] = usage_data.pop("total_token_count") + + CORE_KEYS = {"prompt_tokens", "completion_tokens", "total_tokens"} + + # 2. Remove auxiliary keys that have falsy values (None, empty list, etc.). + # We must iterate over a copy of keys to safely delete items from the dict. + for k in list(usage_data.keys()): + if k in CORE_KEYS: + continue + + # If the value is falsy (None, 0, empty list), remove the key. + # This retains non-core data (like modality counts) if it exists. + if not usage_data[k]: + del usage_data[k] + + return usage_data + + # endregion 2.5 Post-processing + + # region 2.6 Logging + # TODO: Move to a separate plugin that does not have any Open WebUI funcitonlity and is only imported by this plugin. + + def _is_flat_dict(self, data: Any) -> bool: + """ + Checks if a dictionary contains only non-dict/non-list values (is one level deep). + """ + if not isinstance(data, dict): + return False + return not any(isinstance(value, (dict, list)) for value in data.values()) + + def _truncate_long_strings( + self, data: Any, max_len: int, truncation_marker: str, truncation_enabled: bool + ) -> Any: + """ + Recursively traverses a data structure (dicts, lists) and truncates + long string values. Creates copies to avoid modifying original data. + + Args: + data: The data structure (dict, list, str, int, float, bool, None) to process. + max_len: The maximum allowed length for string values. + truncation_marker: The string to append to truncated values. + truncation_enabled: Whether truncation is enabled. + + Returns: + A potentially new data structure with long strings truncated. + """ + if not truncation_enabled or max_len <= len(truncation_marker): + # If truncation is disabled or max_len is too small, return original + # Make a copy only if it's a mutable type we might otherwise modify + if isinstance(data, (dict, list)): + return copy.deepcopy(data) # Ensure deep copy for nested structures + return data # Primitives are immutable + + if isinstance(data, str): + if len(data) > max_len: + return data[: max_len - len(truncation_marker)] + truncation_marker + return data # Return original string if not truncated + elif isinstance(data, dict): + # Process dictionary items, creating a new dict + return { + k: self._truncate_long_strings( + v, max_len, truncation_marker, truncation_enabled + ) + for k, v in data.items() + } + elif isinstance(data, list): + # Process list items, creating a new list + return [ + self._truncate_long_strings( + item, max_len, truncation_marker, truncation_enabled + ) + for item in data + ] + else: + # Return non-string, non-container types as is (they are immutable) + return data + + def plugin_stdout_format(self, record: "Record") -> str: + """ + Custom format function for the plugin's logs. + Serializes and truncates data passed under the 'payload' key in extra. + """ + + # Configuration Keys + LOG_OPTIONS_PREFIX = "_log_" + TRUNCATION_ENABLED_KEY = f"{LOG_OPTIONS_PREFIX}truncation_enabled" + MAX_LENGTH_KEY = f"{LOG_OPTIONS_PREFIX}max_length" + TRUNCATION_MARKER_KEY = f"{LOG_OPTIONS_PREFIX}truncation_marker" + DATA_KEY = "payload" + + original_extra = record["extra"] + # Extract the data intended for serialization using the chosen key + data_to_process = original_extra.get(DATA_KEY) + + serialized_data_json = "" + if data_to_process is not None: + try: + serializable_data = pydantic_core.to_jsonable_python( + data_to_process, serialize_unknown=True + ) + + # Determine truncation settings + truncation_enabled = original_extra.get(TRUNCATION_ENABLED_KEY, True) + max_length = original_extra.get(MAX_LENGTH_KEY, 256) + truncation_marker = original_extra.get(TRUNCATION_MARKER_KEY, "[...]") + + # If max_length was explicitly provided, force truncation enabled + if MAX_LENGTH_KEY in original_extra: + truncation_enabled = True + + # Truncate long strings + truncated_data = self._truncate_long_strings( + serializable_data, + max_length, + truncation_marker, + truncation_enabled, + ) + + # Serialize the (potentially truncated) data + if self._is_flat_dict(truncated_data) and not isinstance( + truncated_data, list + ): + json_string = json.dumps( + truncated_data, separators=(",", ":"), default=str + ) + # Add a simple prefix if it's compact + serialized_data_json = " - " + json_string + else: + json_string = json.dumps(truncated_data, indent=2, default=str) + # Prepend with newline for readability + serialized_data_json = "\n" + json_string + + except (TypeError, ValueError) as e: # Catch specific serialization errors + serialized_data_json = f" - {{Serialization Error: {e}}}" + except ( + Exception + ) as e: # Catch any other unexpected errors during processing + serialized_data_json = f" - {{Processing Error: {e}}}" + + # Add the final JSON string (or error message) back into the record + record["extra"]["_plugin_serialized_data"] = serialized_data_json + + # Base template + base_template = ( + "{time:YYYY-MM-DD HH:mm:ss.SSS} | " + "{level: <8} | " + "{name}:{function}:{line} - " + "{message}" + ) + + # Append the serialized data + base_template += "{extra[_plugin_serialized_data]}" + # Append the exception part + base_template += "\n{exception}" + # Return the format string template + return base_template.rstrip() + + @cache + def _add_log_handler(self, log_level: str): + """ + Adds or updates the loguru handler specifically for this plugin. + Includes logic for serializing and truncating extra data. + The handler is added only if the log_level has changed since the last call. + """ + + def plugin_filter(record: "Record"): + """Filter function to only allow logs from this plugin (based on module name).""" + return record["name"] == __name__ + + # Get the desired level name and number + desired_level_name = log_level + try: + # Use the public API to get level details + desired_level_info = log.level(desired_level_name) + desired_level_no = desired_level_info.no + except ValueError: + log.error( + f"Invalid LOG_LEVEL '{desired_level_name}' configured for plugin {__name__}. Cannot add/update handler." + ) + return # Stop processing if the level is invalid + + # Access the internal state of the log + handlers: dict[int, "Handler"] = log._core.handlers # type: ignore + handler_id_to_remove = None + found_correct_handler = False + + for handler_id, handler in handlers.items(): + existing_filter = handler._filter # Access internal attribute + + # Check if the filter matches our plugin_filter + # Comparing function objects directly can be fragile if they are recreated. + # Comparing by name and module is more robust for functions defined at module level. + is_our_filter = ( + existing_filter is not None # Make sure a filter is set + and hasattr(existing_filter, "__name__") + and existing_filter.__name__ == plugin_filter.__name__ + and hasattr(existing_filter, "__module__") + and existing_filter.__module__ == plugin_filter.__module__ + ) + + if is_our_filter: + existing_level_no = handler.levelno + log.trace( + f"Found existing handler {handler_id} for {__name__} with level number {existing_level_no}." + ) + + # Check if the level matches the desired level + if existing_level_no == desired_level_no: + log.debug( + f"Handler {handler_id} for {__name__} already exists with the correct level '{desired_level_name}'." + ) + found_correct_handler = True + break # Found the correct handler, no action needed + else: + # Found our handler, but the level is wrong. Mark for removal. + log.info( + f"Handler {handler_id} for {__name__} found, but log level differs " + f"(existing: {existing_level_no}, desired: {desired_level_no}). " + f"Removing it to update." + ) + handler_id_to_remove = handler_id + break # Found the handler to replace, stop searching + + # Remove the old handler if marked for removal + if handler_id_to_remove is not None: + try: + log.remove(handler_id_to_remove) + log.debug(f"Removed handler {handler_id_to_remove} for {__name__}.") + except ValueError: + # This might happen if the handler was somehow removed between the check and now + log.warning( + f"Could not remove handler {handler_id_to_remove} for {__name__}. It might have already been removed." + ) + # If removal failed but we intended to remove, we should still proceed to add + # unless found_correct_handler is somehow True (which it shouldn't be if handler_id_to_remove was set). + + # Add a new handler if no correct one was found OR if we just removed an incorrect one + if not found_correct_handler: + log.add( + sys.stdout, + level=desired_level_name, + format=self.plugin_stdout_format, + filter=plugin_filter, + ) + log.debug( + f"Added new handler to loguru for {__name__} with level {desired_level_name}." + ) + + # endregion 2.6 Logging + + # region 2.7 Utility helpers + + @staticmethod + def _get_toggleable_feature_status( + filter_id: str, + __metadata__: "Metadata", + ) -> tuple[bool, bool]: + """ + Checks the complete status of a toggleable filter (function). + + This function performs a series of checks to determine if a feature + is available for use and if the user has activated it. + + 1. Checks if the filter is installed. + 2. Checks if the filter's master toggle is active in the Functions dashboard. + 3. Checks if the filter is enabled for the current model (or is global). + 4. Checks if the user has toggled the feature ON for the current request. + + Args: + filter_id: The ID of the filter to check. + __metadata__: The metadata object for the current request. + + Returns: + A tuple (is_available: bool, is_toggled_on: bool). + - is_available: True if the filter is installed, active, and configured for the model. + - is_toggled_on: True if the user has the toggle ON in the UI for this request. + """ + # 1. Check if the filter is installed + f = Functions.get_function_by_id(filter_id) + if not f: + log.warning( + f"The '{filter_id}' filter is not installed. " + "Install it to use the corresponding front-end toggle." + ) + return (False, False) + + # 2. Check if the master toggle is active + if not f.is_active: + log.warning( + f"The '{filter_id}' filter is installed but is currently disabled in the " + "Functions dashboard (master toggle is off). Enable it to make it available." + ) + return (False, False) + + # 3. Check if the filter is enabled for the model or is global + model_info = __metadata__.get("model", {}).get("info", {}) + model_filter_ids = model_info.get("meta", {}).get("filterIds", []) + is_enabled_for_model = filter_id in model_filter_ids or f.is_global + + log.debug( + f"Checking model enablement for '{filter_id}': in_model_filters={filter_id in model_filter_ids}, " + f"is_global={f.is_global} -> is_enabled={is_enabled_for_model}" + ) + + if not is_enabled_for_model: + # This is a configuration issue, not a user-facing warning. Debug is appropriate. + model_id = __metadata__.get("model", {}).get("id", "Unknown") + log.debug(f"Filter '{filter_id}' is not enabled for model '{model_id}' and is not global.") + return (False, False) + + # 4. Check if the user has toggled the feature ON for this request + user_toggled_ids = __metadata__.get("filter_ids", []) + is_toggled_on = filter_id in user_toggled_ids + + if is_toggled_on: + log.info( + f"Feature '{filter_id}' is available and enabled by the front-end toggle for this request." + ) + else: + log.debug( + f"Feature '{filter_id}' is available but not enabled by the front-end toggle for this request." + ) + + return (True, is_toggled_on) + + @staticmethod + def _get_merged_valves( + default_valves: "Pipe.Valves", + user_valves: "Pipe.UserValves | None", + user_email: str, + ) -> "Pipe.Valves": + """ + Merges UserValves into a base Valves configuration. + + The general rule is that if a field in UserValves is not None, it overrides + the corresponding field in the default_valves. Otherwise, the default_valves + field value is used. + + Exceptions: + - If default_valves.USER_MUST_PROVIDE_AUTH_CONFIG is True, then GEMINI_API_KEY and + VERTEX_PROJECT in the merged result will be taken directly from + user_valves (even if they are None), ignoring the values in default_valves. + + Args: + default_valves: The base Valves object with default configurations. + user_valves: An optional UserValves object with user-specific overrides. + If None, a copy of default_valves is returned. + + Returns: + A new Valves object representing the merged configuration. + """ + if user_valves is None: + # If no user-specific valves are provided, return a copy of the default valves. + return default_valves.model_copy(deep=True) + + # Start with the values from the base `Valves` + merged_data = default_valves.model_dump() + + # Override with non-None values from `UserValves` + # Iterate over fields defined in the UserValves model + for field_name in Pipe.UserValves.model_fields: + # getattr is safe as field_name comes from model_fields of user_valves' type + user_value = getattr(user_valves, field_name) + if user_value is not None and user_value != "": + # Only update if the field is also part of the main Valves model + # (keys of merged_data are fields of default_valves) + if field_name in merged_data: + merged_data[field_name] = user_value + + user_whitelist = ( + default_valves.AUTH_WHITELIST.split(",") + if default_valves.AUTH_WHITELIST + else [] + ) + + # Apply special logic based on default_valves.USER_MUST_PROVIDE_AUTH_CONFIG + if ( + default_valves.USER_MUST_PROVIDE_AUTH_CONFIG + and user_email not in user_whitelist + ): + # If USER_MUST_PROVIDE_AUTH_CONFIG is True and user is not in the whitelist, + # then user must provide their own GEMINI_API_KEY + # User is disallowed from using Vertex AI in this case. + merged_data["GEMINI_API_KEY"] = user_valves.GEMINI_API_KEY + merged_data["VERTEX_PROJECT"] = None + merged_data["USE_VERTEX_AI"] = False + + # Create a new Valves instance with the merged data. + # Pydantic will validate the data against the Valves model definition during instantiation. + return Pipe.Valves(**merged_data) + + def _get_first_candidate( + self, candidates: list[types.Candidate] | None + ) -> types.Candidate | None: + """Selects the first candidate, logging a warning if multiple exist.""" + if not candidates: + # Logging warnings is handled downstream. + return None + if len(candidates) > 1: + log.warning("Multiple candidates found, defaulting to first candidate.") + return candidates[0] + + def _check_companion_filter_version(self, features: "Features | dict") -> None: + """ + Checks for the presence and version compatibility of the Gemini Manifold Companion filter. + Logs warnings if the filter is missing or outdated. + """ + companion_version = features.get("gemini_manifold_companion_version") + + if companion_version is None: + log.warning( + "Gemini Manifold Companion filter not detected. " + "While this pipe can function without it, you are missing out on key features like native Google Search, " + "Code Execution, and direct document uploads. Please install the companion filter or ensure it is active " + "for this model to unlock the full functionality." + ) + else: + # Comparing tuples of integers is a robust way to handle versions like '1.10.0' vs '1.2.0'. + try: + companion_v_tuple = tuple(map(int, companion_version.split("."))) + recommended_v_tuple = tuple( + map(int, RECOMMENDED_COMPANION_VERSION.split(".")) + ) + + if companion_v_tuple < recommended_v_tuple: + log.warning( + f"The installed Gemini Manifold Companion filter version ({companion_version}) is older than " + f"the recommended version ({RECOMMENDED_COMPANION_VERSION}). " + "Some features may not work as expected. Please update the filter." + ) + else: + log.debug( + f"Gemini Manifold Companion filter detected with version: {companion_version}" + ) + except (ValueError, TypeError): + # This handles cases where the version string is malformed (e.g., '1.a.0'). + log.error( + f"Could not parse companion version string: '{companion_version}'. Version check skipped." + ) + + # endregion 2.7 Utility helpers + + # endregion 2. Helper methods inside the Pipe class