From ab0daba80dd16b7265dcac4fd761dab9a55a0d7f Mon Sep 17 00:00:00 2001 From: fujie Date: Wed, 14 Jan 2026 23:31:58 +0800 Subject: [PATCH] docs: update documentation, add new filters, remove deprecated plugins --- .github/copilot-instructions.md | 2054 +++------- .../{knowledge-card.md => flash-card.md} | 6 +- ...{knowledge-card.zh.md => flash-card.zh.md} | 2 +- docs/plugins/actions/index.md | 18 +- docs/plugins/actions/smart-mind-map.md | 2 +- docs/plugins/actions/smart-mind-map.zh.md | 2 +- .../filters/gemini-manifold-companion.md | 54 - .../filters/gemini-manifold-companion.zh.md | 54 - docs/plugins/filters/index.md | 30 +- .../filters/multi-model-context-merger.md | 35 + .../filters/multi-model-context-merger.zh.md | 35 + docs/plugins/filters/web-gemini-multimodel.md | 51 + .../filters/web-gemini-multimodel.zh.md | 51 + docs/plugins/pipes/gemini-manifold.md | 106 - docs/plugins/pipes/gemini-manifold.zh.md | 106 - docs/plugins/pipes/index.md | 12 - .../gemini_manifold_companion.py | 1102 ------ plugins/pipes/gemini_mainfold/README.md | 54 - plugins/pipes/gemini_mainfold/README_CN.md | 54 - .../pipes/gemini_mainfold/gemini_manifold.py | 3382 ----------------- 20 files changed, 779 insertions(+), 6431 deletions(-) rename docs/plugins/actions/{knowledge-card.md => flash-card.md} (94%) rename docs/plugins/actions/{knowledge-card.zh.md => flash-card.zh.md} (98%) delete mode 100644 docs/plugins/filters/gemini-manifold-companion.md delete mode 100644 docs/plugins/filters/gemini-manifold-companion.zh.md create mode 100644 docs/plugins/filters/multi-model-context-merger.md create mode 100644 docs/plugins/filters/multi-model-context-merger.zh.md create mode 100644 docs/plugins/filters/web-gemini-multimodel.md create mode 100644 docs/plugins/filters/web-gemini-multimodel.zh.md delete mode 100644 docs/plugins/pipes/gemini-manifold.md delete mode 100644 docs/plugins/pipes/gemini-manifold.zh.md delete mode 100644 plugins/filters/gemini_manifold_companion/gemini_manifold_companion.py delete mode 100644 plugins/pipes/gemini_mainfold/README.md delete mode 100644 plugins/pipes/gemini_mainfold/README_CN.md delete mode 100644 plugins/pipes/gemini_mainfold/gemini_manifold.py diff --git a/.github/copilot-instructions.md b/.github/copilot-instructions.md index c8114c5..abc0f2e 100644 --- a/.github/copilot-instructions.md +++ b/.github/copilot-instructions.md @@ -6,9 +6,11 @@ This document defines the standard conventions and best practices for OpenWebUI --- -## 📚 双语版本要求 (Bilingual Version Requirements) +## 🏗️ 项目结构与命名 (Project Structure & Naming) -### 插件代码 (Plugin Code) +### 1. 双语版本要求 (Bilingual Version Requirements) + +#### 插件代码 (Plugin Code) 每个插件必须提供两个版本: @@ -24,14 +26,14 @@ plugins/actions/export_to_docx/ └── README_CN.md # Chinese documentation ``` -### 文档 (Documentation) +#### 文档 (Documentation) 每个插件目录必须包含双语 README 文件: - `README.md` - English documentation - `README_CN.md` - 中文文档 -### README 结构规范 (README Structure Standard) +#### README 结构规范 (README Structure Standard) 所有插件 README 必须遵循以下统一结构顺序: @@ -46,118 +48,7 @@ plugins/actions/export_to_docx/ 7. **配置参数 (Configuration/Valves)**: 使用表格格式,包含参数名、默认值、描述 8. **其他 (Others)**: 支持的模板类型、语法示例、故障排除等 -完整示例 (Full Example): - -```markdown -# 📊 Smart Plugin - -**Author:** [Fu-Jie](https://github.com/Fu-Jie) | **Version:** 1.0.0 | **Project:** [Awesome OpenWebUI](https://github.com/Fu-Jie/awesome-openwebui) - -A one-sentence description of this plugin. - -## 🔥 What's New in v1.0.0 - -- ✨ **Feature Name**: Brief description of the feature. -- 🔧 **Configuration Change**: What changed in settings. -- 🐛 **Bug Fix**: What was fixed. - -## ✨ Key Features - -- 🚀 **Feature A**: Description of feature A. -- 🎨 **Feature B**: Description of feature B. -- 📥 **Feature C**: Description of feature C. - -## 🚀 How to Use - -1. **Install**: Search for "Plugin Name" in the Open WebUI Community and install. -2. **Trigger**: Enter your text in the chat, then click the **Action Button**. -3. **Result**: View the generated result. - -## ⚙️ Configuration (Valves) - -| Parameter | Default | Description | -| :--- | :--- | :--- | -| **Show Status (SHOW_STATUS)** | `True` | Whether to show status updates. | -| **Model ID (MODEL_ID)** | `Empty` | LLM model for processing. | -| **Output Mode (OUTPUT_MODE)** | `image` | `image` for static, `html` for interactive. | - -## 🛠️ Supported Types (Optional) - -| Category | Type Name | Use Case | -| :--- | :--- | :--- | -| **Category A** | `type-a`, `type-b` | Use case description | - -## 📝 Advanced Example (Optional) - -\`\`\`syntax -example code or syntax here -\`\`\` -``` - -### 文档内容要求 (Content Requirements) - -- **新增功能**: 必须在 "What's New" 章节中明确列出,使用 Emoji + 粗体标题格式 (仅保留最近 3 个版本的更新记录)。 -- **双语**: 必须提供 `README.md` (英文) 和 `README_CN.md` (中文)。 -- **表格对齐**: 配置参数表格使用左对齐 `:---`。 -- **Emoji 规范**: 标题使用合适的 Emoji 增强可读性。 - -### 官方文档 (Official Documentation) - -如果插件被合并到主仓库,还需更新 `docs/` 目录下的相关文档: -- `docs/plugins/{type}/plugin-name.md` -- `docs/plugins/{type}/plugin-name.zh.md` - -其中 `{type}` 对应插件类型(如 `actions`, `filters`, `pipes` 等)。 - ---- - -## 📝 文档字符串规范 (Docstring Standard) - -每个插件文件必须以标准化的文档字符串开头: - -```python -""" -title: 插件名称 (Plugin Name) -author: Fu-Jie -author_url: https://github.com/Fu-Jie -funding_url: https://github.com/Fu-Jie/awesome-openwebui -version: 0.1.0 -icon_url: data:image/svg+xml;base64, -requirements: dependency1==1.0.0, dependency2>=2.0.0 -description: 插件功能的简短描述。Brief description of plugin functionality. -""" -``` - -### 字段说明 (Field Descriptions) - -| 字段 (Field) | 说明 (Description) | 示例 (Example) | -|--------------|---------------------|----------------| -| `title` | 插件显示名称 | `Export to Word` / `导出为 Word` | -| `author` | 作者名称 | `Fu-Jie` | -| `author_url` | 作者主页链接 | `https://github.com/Fu-Jie` | -| `funding_url` | 赞助/项目链接 | `https://github.com/Fu-Jie/awesome-openwebui` | -| `version` | 语义化版本号 | `0.1.0`, `1.2.3` | -| `icon_url` | 图标 (Base64 编码的 SVG) | 见下方图标规范 | -| `requirements` | 额外依赖 (仅 OpenWebUI 环境未安装的) | `python-docx==1.1.2` | -| `description` | 功能描述 | `将对话导出为 Word 文档` | - -### 图标规范 (Icon Guidelines) - -- 图标来源:从 [Lucide Icons](https://lucide.dev/icons/) 获取符合插件功能的图标 -- 格式:Base64 编码的 SVG -- 获取方法:从 Lucide 下载 SVG,然后使用 Base64 编码 -- 示例格式: -``` -icon_url: data:image/svg+xml;base64,PHN2ZyB4bWxucz0i...(完整的 Base64 编码字符串) -``` - ---- - -(Author info is now part of the top metadata section, see "README Structure Standard" above) - ---- - -## 🏗️ 插件目录结构 (Plugin Directory Structure) +### 2. 插件目录结构 (Plugin Directory Structure) ``` plugins/ @@ -183,9 +74,71 @@ plugins/ └── ... ``` +### 3. 文档字符串规范 (Docstring Standard) + +每个插件文件必须以标准化的文档字符串开头: + +```python +""" +title: 插件名称 (Plugin Name) +author: Fu-Jie +author_url: https://github.com/Fu-Jie/awesome-openwebui +funding_url: https://github.com/open-webui +version: 0.1.0 +icon_url: data:image/svg+xml;base64, +requirements: dependency1==1.0.0, dependency2>=2.0.0 +description: 插件功能的简短描述。Brief description of plugin functionality. +""" +``` + +#### 字段说明 (Field Descriptions) + +| 字段 (Field) | 说明 (Description) | 示例 (Example) | +|--------------|---------------------|----------------| +| `title` | 插件显示名称 | `Export to Word` / `导出为 Word` | +| `author` | 作者名称 | `Fu-Jie` | +| `author_url` | 作者主页链接 | `https://github.com/Fu-Jie/awesome-openwebui` | +| `funding_url` | 赞助/项目链接 | `https://github.com/open-webui` | +| `version` | 语义化版本号 | `0.1.0`, `1.2.3` | +| `icon_url` | 图标 (Base64 编码的 SVG) | 见下方图标规范 | +| `requirements` | 额外依赖 (仅 OpenWebUI 环境未安装的) | `python-docx==1.1.2` | +| `description` | 功能描述 | `将对话导出为 Word 文档` | + +#### 图标规范 (Icon Guidelines) + +- 图标来源:从 [Lucide Icons](https://lucide.dev/icons/) 获取符合插件功能的图标 +- 格式:Base64 编码的 SVG +- 获取方法:从 Lucide 下载 SVG,然后使用 Base64 编码 +- 示例格式: +``` +icon_url: data:image/svg+xml;base64,PHN2ZyB4bWxucz0i...(完整的 Base64 编码字符串) +``` + +### 4. 依赖管理 (Dependencies) + +#### requirements 字段规则 + +- 仅列出 OpenWebUI 环境中**未安装**的依赖 +- 使用精确版本号 +- 多个依赖用逗号分隔 + +```python +""" +requirements: python-docx==1.1.2, openpyxl==3.1.2 +""" +``` + +常见 OpenWebUI 已安装依赖(无需在 requirements 中声明): +- `pydantic` +- `fastapi` +- `logging` +- `re`, `json`, `datetime`, `io`, `base64` + --- -## ⚙️ Valves 配置规范 (Valves Configuration) +## 💻 核心开发规范 (Core Development Standards) + +### 1. Valves 配置规范 (Valves Configuration) 使用 Pydantic BaseModel 定义可配置参数: @@ -198,6 +151,10 @@ class Action: default=True, description="Whether to show operation status updates." ) + SHOW_DEBUG_LOG: bool = Field( + default=False, + description="Whether to print debug logs in the browser console." + ) MODEL_ID: str = Field( default="", description="Built-in LLM Model ID. If empty, uses current conversation model." @@ -206,27 +163,92 @@ class Action: default=50, description="Minimum text length required for processing (characters)." ) - CLEAR_PREVIOUS_HTML: bool = Field( - default=False, - description="Whether to clear previous plugin results." - ) - MESSAGE_COUNT: int = Field( - default=1, - description="Number of recent messages to use for generation." - ) def __init__(self): self.valves = self.Valves() ``` -### 命名规则 (Naming Convention) +#### 命名规则 (Naming Convention) - 所有 Valves 字段使用 **大写下划线** (UPPER_SNAKE_CASE) - 示例:`SHOW_STATUS`, `MODEL_ID`, `MIN_TEXT_LENGTH` ---- +### 2. 上下文获取规范 (Context Access) -## 📤 事件发送规范 (Event Emission) +所有插件**必须**使用 `_get_user_context` 和 `_get_chat_context` 方法来安全获取信息,而不是直接访问 `__user__` 或 `body`。 + +#### 用户上下文 (User Context) + +```python +def _get_user_context(self, __user__: Optional[Dict[str, Any]]) -> Dict[str, str]: + """安全提取用户上下文信息。""" + if isinstance(__user__, (list, tuple)): + user_data = __user__[0] if __user__ else {} + elif isinstance(__user__, dict): + user_data = __user__ + else: + user_data = {} + + return { + "user_id": user_data.get("id", "unknown_user"), + "user_name": user_data.get("name", "User"), + "user_language": user_data.get("language", "en-US"), + } +``` + +#### 聊天上下文 (Chat Context) + +```python +def _get_chat_context(self, body: dict, __metadata__: Optional[dict] = None) -> Dict[str, str]: + """ + 统一提取聊天上下文信息 (chat_id, message_id)。 + 优先从 body 中提取,其次从 metadata 中提取。 + """ + chat_id = "" + message_id = "" + + # 1. 尝试从 body 获取 + if isinstance(body, dict): + chat_id = body.get("chat_id", "") + message_id = body.get("id", "") # message_id 在 body 中通常是 id + + # 再次检查 body.metadata + if not chat_id or not message_id: + body_metadata = body.get("metadata", {}) + if isinstance(body_metadata, dict): + if not chat_id: + chat_id = body_metadata.get("chat_id", "") + if not message_id: + message_id = body_metadata.get("message_id", "") + + # 2. 尝试从 __metadata__ 获取 (作为补充) + if (__metadata__ and isinstance(__metadata__, dict)): + if not chat_id: + chat_id = __metadata__.get("chat_id", "") + if not message_id: + message_id = __metadata__.get("message_id", "") + + return { + "chat_id": str(chat_id).strip(), + "message_id": str(message_id).strip(), + } +``` + +#### 使用示例 + +```python +async def action(self, body: dict, __user__: Optional[Dict[str, Any]] = None, __metadata__: Optional[dict] = None, ...): + user_ctx = self._get_user_context(__user__) + chat_ctx = self._get_chat_context(body, __metadata__) + + user_id = user_ctx["user_id"] + chat_id = chat_ctx["chat_id"] + message_id = chat_ctx["message_id"] +``` + +### 3. 事件发送与日志规范 (Event Emission & Logging) + +#### 事件发送 (Event Emission) 必须实现以下辅助方法: @@ -247,31 +269,28 @@ async def _emit_notification( self, emitter: Optional[Callable[[Any], Awaitable[None]]], content: str, - type: str = "info", + ntype: str = "info", ): """Emits a notification event (info, success, warning, error).""" if emitter: await emitter( - {"type": "notification", "data": {"type": type, "content": content}} + {"type": "notification", "data": {"type": ntype, "content": content}} ) ``` ---- +#### 前端控制台调试 (Frontend Console Debugging) - **优先推荐** -## 📋 日志规范 (Logging Standard) - -### 1. 前端控制台调试 (Frontend Console Debugging) - **优先推荐 (Preferred)** - -对于需要实时查看数据流、排查 UI 交互或内容变更的场景,**优先使用**前端控制台日志。这种方式可以直接在浏览器 DevTools (F12) 中查看,无需访问服务端日志。 - -**实现方式**: 通过 `__event_emitter__` 发送 `type: "execute"` 事件执行 JS 代码。 +对于需要实时查看数据流、排查 UI 交互或内容变更的场景,**优先使用**前端控制台日志。 ```python -import json - -async def _emit_debug_log(self, __event_emitter__, title: str, data: dict): - """在浏览器控制台打印结构化调试日志""" - if not self.valves.show_debug_log or not __event_emitter__: +async def _emit_debug_log( + self, + emitter: Optional[Callable[[Any], Awaitable[None]]], + title: str, + data: dict, +): + """Print structured debug logs in the browser console.""" + if not self.valves.SHOW_DEBUG_LOG or not emitter: return try: @@ -282,20 +301,13 @@ async def _emit_debug_log(self, __event_emitter__, title: str, data: dict): console.groupEnd(); }})(); """ - - await __event_emitter__({ - "type": "execute", - "data": {"code": js_code} - }) + + await emitter({"type": "execute", "data": {"code": js_code}}) except Exception as e: print(f"Error emitting debug log: {e}") ``` -**配置要求**: -- 在 `Valves` 中添加 `show_debug_log: bool` 开关,默认关闭。 -- 仅在开关开启时发送日志。 - -### 2. 服务端日志 (Server-side Logging) +#### 服务端日志 (Server-side Logging) 用于记录系统级错误、异常堆栈或无需前端感知的后台任务。 @@ -318,46 +330,89 @@ logger.info(f"Action: {__name__} started") logger.error(f"Processing failed: {e}", exc_info=True) ``` ---- +### 4. 数据库连接规范 (Database Connection) -## 🛡️ Filter 插件开发规范 (Filter Plugin Standards) +当插件需要持久化存储时,**必须**复用 Open WebUI 的内部数据库连接,而不是创建新的数据库引擎。 -### 1. 状态管理 (State Management) - **关键 (Critical)** +```python +# Open WebUI internal database (re-use shared connection) +from open_webui.internal.db import engine as owui_engine +from open_webui.internal.db import Session as owui_Session +from open_webui.internal.db import Base as owui_Base -Filter 实例在 OpenWebUI 生命周期中是**单例 (Singleton)**。这意味着同一个 Filter 实例会处理所有并发请求。 +class PluginTable(owui_Base): + # ... definition ... + pass -- **❌ 禁止 (Prohibited)**: 使用 `self` 存储请求级别的临时状态(如 `self.temp_state`)。这会导致严重的**竞态条件 (Race Conditions)**,即一个请求的数据被另一个请求覆盖。 -- **✅ 推荐 (Recommended)**: - - **无状态设计**: `inlet` 和 `outlet` 应该尽可能独立。 - - **重新计算**: 在 `outlet` 中根据 `body['messages']` 重新计算所需的状态,而不是依赖 `inlet` 传递。 - - **元数据传递**: 如果必须传递状态,尝试使用 `body` 中的临时字段(需谨慎处理清理)或 `__metadata__`(如果可写)。 +class Filter: + def __init__(self): + self._db_engine = owui_engine + self._SessionLocal = owui_Session + # ... +``` -### 2. 摘要注入角色 (Summary Injection Role) +### 5. 文件存储访问规范 (File Storage Access) -当注入历史摘要或上下文时: +插件在访问用户上传的文件或生成的图片时,必须实现多级回退机制以兼容所有存储配置(本地磁盘、S3/MinIO 等)。 -- **❌ 避免 (Avoid)**: 使用 `system` 角色(部分模型对 system prompt 位置敏感或不支持中间插入)。 -- **❌ 避免 (Avoid)**: 使用 `user` 角色(容易混淆用户真实意图)。 -- **✅ 推荐 (Recommended)**: 使用 **`assistant`** 角色。这通常被模型视为上下文历史的一部分,兼容性最好。 +推荐实现以下优先级的文件获取策略: +1. 数据库直接存储 (小文件) +2. S3 直连 (对象存储 - 最快) +3. 本地文件系统 (磁盘存储) +4. 公共 URL 下载 +5. 内部 API 回调 (通用兜底方案) -### 3. 模型默认值 (Model Defaults) +(详细实现参考 `plugins/actions/export_to_docx/export_to_word.py` 中的 `_image_bytes_from_owui_file_id` 方法) -- **❌ 禁止 (Prohibited)**: 硬编码特定模型 ID(如 `gpt-3.5-turbo`)作为默认值。这会导致非 OpenAI 用户出错。 -- **✅ 推荐 (Recommended)**: - - 默认值设为 `None` 或空字符串。 - - 优先使用当前对话的模型 (`body.get("model")`)。 - - 如果必须指定,通过 `Valves` 让用户配置。 +### 6. 长时间运行任务通知 (Long-running Task Notifications) -### 4. 异步处理 (Async Processing) +如果一个前台任务的运行时间预计超过 **3秒**,必须实现用户通知机制。 -对于耗时的后台任务(如摘要生成、日志记录): +```python +import asyncio -- **✅ 推荐 (Recommended)**: 在 `outlet` 中使用 `asyncio.create_task()` 启动后台任务,确保不阻塞用户响应。 -- **✅ 推荐 (Recommended)**: 在后台任务中捕获所有异常,防止崩溃影响主进程。 +async def long_running_task_with_notification(self, event_emitter, ...): + # 定义实际任务 + async def actual_task(): + # ... 执行耗时操作 ... + return result + + # 定义通知任务 + async def notification_task(): + # 立即发送首次通知 + if event_emitter: + await self._emit_notification(event_emitter, "正在使用 AI 生成中...", "info") + + # 之后每5秒通知一次 + while True: + await asyncio.sleep(5) + if event_emitter: + await self._emit_notification(event_emitter, "仍在处理中,请耐心等待...", "info") + + # 并发运行任务 + task_future = asyncio.ensure_future(actual_task()) + notify_future = asyncio.ensure_future(notification_task()) + + # 等待任务完成 + done, pending = await asyncio.wait( + [task_future, notify_future], + return_when=asyncio.FIRST_COMPLETED + ) + + # 取消通知任务 + if not notify_future.done(): + notify_future.cancel() + + # 获取结果 + if task_future in done: + return task_future.result() +``` --- -## 🎨 HTML 注入规范 (HTML Injection) +## ⚡ Action 插件规范 (Action Plugin Standards) + +### 1. HTML 注入规范 (HTML Injection) 使用统一的标记和结构: @@ -383,433 +438,219 @@ HTML_WRAPPER_TEMPLATE = """ """ ``` -必须实现 HTML 合并方法以支持多次运行插件: +必须实现 HTML 合并方法 `_remove_existing_html` 和 `_merge_html` 以支持多次运行插件。 + +### 2. HTML 生成插件的完整模板 (Complete Template) + +以下是生成 HTML 输出的 Action 插件需要包含的完整公共代码: ```python -def _remove_existing_html(self, content: str) -> str: - """Removes existing plugin-generated HTML code blocks.""" - pattern = r"```html\s*[\s\S]*?```" - return re.sub(pattern, "", content).strip() - -def _merge_html( - self, - existing_html: str, - new_content: str, - new_styles: str = "", - new_scripts: str = "", - user_language: str = "en-US", -) -> str: - """ - Merges new content into existing HTML container. - See ACTION_PLUGIN_TEMPLATE.py for full implementation. - """ - pass # Implement based on template -``` - ---- - -## 🌍 多语言支持 (Internationalization) - -从用户上下文获取语言偏好: - -```python -def _get_user_context(self, __user__: Optional[Dict[str, Any]]) -> Dict[str, str]: - """Extracts user context information.""" - if isinstance(__user__, (list, tuple)): - user_data = __user__[0] if __user__ else {} - elif isinstance(__user__, dict): - user_data = __user__ - else: - user_data = {} - - return { - "user_id": user_data.get("id", "unknown_user"), - "user_name": user_data.get("name", "User"), - "user_language": user_data.get("language", "en-US"), - } -``` - -中文版插件默认值: -- `user_language`: `"zh-CN"` -- `user_name`: `"用户"` - -英文版插件默认值: -- `user_language`: `"en-US"` -- `user_name`: `"User"` - -### 用户上下文获取规范 (User Context Retrieval) - -所有插件**必须**使用 `_get_user_context` 方法来安全获取用户信息,而不是直接访问 `__user__` 参数。这是因为 `__user__` 的类型可能是 `dict`、`list`、`tuple` 或其他类型,直接调用 `.get()` 可能导致 `AttributeError`。 - -All plugins **MUST** use the `_get_user_context` method to safely retrieve user information instead of directly accessing the `__user__` parameter. This is because `__user__` can be of type `dict`, `list`, `tuple`, or other types, and directly calling `.get()` may cause `AttributeError`. - -**正确做法 (Correct):** - -```python -def _get_user_context(self, __user__: Optional[Dict[str, Any]]) -> Dict[str, str]: - """安全提取用户上下文信息。""" - if isinstance(__user__, (list, tuple)): - user_data = __user__[0] if __user__ else {} - elif isinstance(__user__, dict): - user_data = __user__ - else: - user_data = {} - - return { - "user_id": user_data.get("id", "unknown_user"), - "user_name": user_data.get("name", "User"), - "user_language": user_data.get("language", "en-US"), - } - -async def action(self, body: dict, __user__: Optional[Dict[str, Any]] = None, ...): - user_ctx = self._get_user_context(__user__) - user_id = user_ctx["user_id"] - user_name = user_ctx["user_name"] - user_language = user_ctx["user_language"] -``` - -**禁止的做法 (Prohibited):** - -```python -# ❌ 禁止: 直接调用 __user__.get() -# ❌ Prohibited: Directly calling __user__.get() -user_id = __user__.get("id") if __user__ else "default" - -# ❌ 禁止: 假设 __user__ 一定是 dict -# ❌ Prohibited: Assuming __user__ is always a dict -user_name = __user__["name"] -``` - ---- - -## 📦 依赖管理 (Dependencies) - -### requirements 字段规则 - -- 仅列出 OpenWebUI 环境中**未安装**的依赖 -- 使用精确版本号 -- 多个依赖用逗号分隔 - -```python -""" -requirements: python-docx==1.1.2, openpyxl==3.1.2 -""" -``` - -常见 OpenWebUI 已安装依赖(无需在 requirements 中声明): -- `pydantic` -- `fastapi` -- `logging` -- `re`, `json`, `datetime`, `io`, `base64` - ---- - -## 🗄️ 数据库连接规范 (Database Connection) - -### 复用 OpenWebUI 内部连接 (Re-use OpenWebUI's Internal Connection) - -当插件需要持久化存储时,**必须**复用 Open WebUI 的内部数据库连接,而不是创建新的数据库引擎。这确保了: - -- 插件与数据库类型无关(自动支持 PostgreSQL、SQLite 等) -- 自动继承 Open WebUI 的数据库配置 -- 避免连接池资源浪费 -- 保持与 Open WebUI 核心的兼容性 - -When a plugin requires persistent storage, it **MUST** re-use Open WebUI's internal database connection instead of creating a new database engine. This ensures: - -- The plugin is database-agnostic (automatically supports PostgreSQL, SQLite, etc.) -- Automatic inheritance of Open WebUI's database configuration -- No wasted connection pool resources -- Compatibility with Open WebUI's core - -### 实现示例 (Implementation Example) - -```python -# Open WebUI internal database (re-use shared connection) -from open_webui.internal.db import engine as owui_engine -from open_webui.internal.db import Session as owui_Session -from open_webui.internal.db import Base as owui_Base - -from sqlalchemy import Column, String, Text, DateTime, Integer, inspect -from datetime import datetime - - -class PluginTable(owui_Base): - """Plugin storage table - inherits from OpenWebUI's Base""" - - __tablename__ = "plugin_table_name" - __table_args__ = {"extend_existing": True} # Required to avoid conflicts on plugin reload - - id = Column(Integer, primary_key=True, autoincrement=True) - unique_id = Column(String(255), unique=True, nullable=False, index=True) - data = Column(Text, nullable=False) - created_at = Column(DateTime, default=datetime.utcnow) - updated_at = Column(DateTime, default=datetime.utcnow, onupdate=datetime.utcnow) - - -class Filter: # or Pipe, Action, etc. - def __init__(self): - self.valves = self.Valves() - self._db_engine = owui_engine - self._SessionLocal = owui_Session - self._init_database() - - def _init_database(self): - """Initialize the database table using OpenWebUI's shared connection.""" - try: - inspector = inspect(self._db_engine) - if not inspector.has_table("plugin_table_name"): - PluginTable.__table__.create(bind=self._db_engine, checkfirst=True) - print("[Database] ✅ Created plugin table using OpenWebUI's shared connection.") - else: - print("[Database] ✅ Using OpenWebUI's shared connection. Table already exists.") - except Exception as e: - print(f"[Database] ❌ Initialization failed: {str(e)}") - - def _save_data(self, unique_id: str, data: str): - """Save data using context manager pattern.""" - try: - with self._SessionLocal() as session: - # Your database operations here - session.commit() - except Exception as e: - print(f"[Storage] ❌ Database save failed: {str(e)}") - - def _load_data(self, unique_id: str): - """Load data using context manager pattern.""" - try: - with self._SessionLocal() as session: - record = session.query(PluginTable).filter_by(unique_id=unique_id).first() - if record: - session.expunge(record) # Detach from session for use after close - return record - except Exception as e: - print(f"[Load] ❌ Database read failed: {str(e)}") - return None -``` - -### 禁止的做法 (Prohibited Practices) - -以下做法**已被弃用**,不应在新插件中使用: - -The following practices are **deprecated** and should NOT be used in new plugins: - -```python -# ❌ 禁止: 读取 DATABASE_URL 环境变量 -# ❌ Prohibited: Reading DATABASE_URL environment variable -database_url = os.getenv("DATABASE_URL") - -# ❌ 禁止: 创建独立的数据库引擎 -# ❌ Prohibited: Creating a separate database engine -from sqlalchemy import create_engine -self._db_engine = create_engine(database_url, **engine_args) - -# ❌ 禁止: 创建独立的会话工厂 -# ❌ Prohibited: Creating a separate session factory -from sqlalchemy.orm import sessionmaker -self._SessionLocal = sessionmaker(autocommit=False, autoflush=False, bind=self._db_engine) - -# ❌ 禁止: 使用独立的 Base -# ❌ Prohibited: Using a separate Base -from sqlalchemy.ext.declarative import declarative_base -Base = declarative_base() -``` - ---- - -## 📂 文件存储访问规范 (File Storage Access) - -OpenWebUI 支持多种文件存储后端(本地磁盘、S3/MinIO 对象存储等)。插件在访问用户上传的文件或生成的图片时,必须实现多级回退机制以兼容所有存储配置。 - -### 存储类型检测 (Storage Type Detection) - -通过 `Files.get_file_by_id()` 获取的文件对象,其 `path` 属性决定了存储位置: - -| Path 格式 | 存储类型 | 访问方式 | -|-----------|----------|----------| -| `s3://bucket/key` | S3/MinIO 对象存储 | boto3 直连或 API 回调 | -| `/app/backend/data/...` | Docker 卷存储 | 本地文件系统读取 | -| `./uploads/...` | 本地相对路径 | 本地文件系统读取 | -| `gs://bucket/key` | Google Cloud Storage | API 回调 | - -### 多级回退机制 (Multi-level Fallback) - -推荐实现以下优先级的文件获取策略: - -```python -def _get_file_content(self, file_id: str, max_bytes: int) -> Optional[bytes]: - """获取文件内容,支持多种存储后端""" - file_obj = Files.get_file_by_id(file_id) - if not file_obj: - return None - - # 1️⃣ 数据库直接存储 (小文件) - data_field = getattr(file_obj, "data", None) - if isinstance(data_field, dict): - if "bytes" in data_field: - return data_field["bytes"] - if "base64" in data_field: - return base64.b64decode(data_field["base64"]) - - # 2️⃣ S3 直连 (对象存储 - 最快) - s3_path = getattr(file_obj, "path", None) - if isinstance(s3_path, str) and s3_path.startswith("s3://"): - data = self._read_from_s3(s3_path, max_bytes) - if data: - return data - - # 3️⃣ 本地文件系统 (磁盘存储) - for attr in ("path", "file_path"): - path = getattr(file_obj, attr, None) - if path and not path.startswith(("s3://", "gs://", "http")): - # 尝试多个常见路径 - for base in ["", "./data", "/app/backend/data"]: - full_path = Path(base) / path if base else Path(path) - if full_path.exists(): - return full_path.read_bytes()[:max_bytes] - - # 4️⃣ 公共 URL 下载 - url = getattr(file_obj, "url", None) - if url and url.startswith("http"): - return self._download_from_url(url, max_bytes) - - # 5️⃣ 内部 API 回调 (通用兜底方案) - if self._api_base_url: - api_url = f"{self._api_base_url}/api/v1/files/{file_id}/content" - return self._download_from_api(api_url, self._api_token, max_bytes) - - return None -``` - -### S3 直连实现 (S3 Direct Access) - -当检测到 `s3://` 路径时,使用 `boto3` 直接访问对象存储,读取以下环境变量: - -| 环境变量 | 说明 | 示例 | -|----------|------|------| -| `S3_ENDPOINT_URL` | S3 兼容服务端点 | `https://minio.example.com` | -| `S3_ACCESS_KEY_ID` | 访问密钥 ID | `minioadmin` | -| `S3_SECRET_ACCESS_KEY` | 访问密钥 | `minioadmin` | -| `S3_ADDRESSING_STYLE` | 寻址样式 | `auto`, `path`, `virtual` | - -```python -# S3 直连示例 -import boto3 -from botocore.config import Config as BotoConfig -import os - -def _read_from_s3(self, s3_path: str, max_bytes: int) -> Optional[bytes]: - """从 S3 直接读取文件 (比 API 回调更快)""" - if not s3_path.startswith("s3://"): - return None - - # 解析 s3://bucket/key - parts = s3_path[5:].split("/", 1) - bucket, key = parts[0], parts[1] - - # 从环境变量读取配置 - endpoint = os.environ.get("S3_ENDPOINT_URL") - access_key = os.environ.get("S3_ACCESS_KEY_ID") - secret_key = os.environ.get("S3_SECRET_ACCESS_KEY") - - if not all([endpoint, access_key, secret_key]): - return None # 回退到 API 方式 - - s3_client = boto3.client( - "s3", - endpoint_url=endpoint, - aws_access_key_id=access_key, - aws_secret_access_key=secret_key, - config=BotoConfig(s3={"addressing_style": os.environ.get("S3_ADDRESSING_STYLE", "auto")}) - ) - - response = s3_client.get_object(Bucket=bucket, Key=key) - return response["Body"].read(max_bytes) -``` - -### API 回调实现 (API Fallback) - -当其他方式失败时,通过 OpenWebUI 内部 API 获取文件: - -```python -def _download_from_api(self, api_url: str, token: str, max_bytes: int) -> Optional[bytes]: - """通过 OpenWebUI API 获取文件内容""" - import urllib.request - - headers = {"User-Agent": "OpenWebUI-Plugin"} - if token: - headers["Authorization"] = token - - req = urllib.request.Request(api_url, headers=headers) - with urllib.request.urlopen(req, timeout=15) as response: - if 200 <= response.status < 300: - return response.read(max_bytes) - return None -``` - -### 获取 API 上下文 (API Context Extraction) - -在 `action()` 方法中捕获请求上下文,用于 API 回调: - -```python -async def action(self, body: dict, __request__=None, ...): - # 从请求对象获取 API 凭证 - if __request__: - self._api_token = __request__.headers.get("Authorization") - self._api_base_url = str(__request__.base_url).rstrip("/") - else: - # 从环境变量获取端口作为备用 - port = os.environ.get("PORT") or "8080" - self._api_base_url = f"http://localhost:{port}" - self._api_token = None -``` - -### 性能对比 (Performance Comparison) - -| 方式 | 网络跳数 | 适用场景 | -|------|----------|----------| -| S3 直连 | 1 (插件 → S3) | 对象存储,最快 | -| 本地文件 | 0 | 磁盘存储,最快 | -| API 回调 | 2 (插件 → OpenWebUI → S3/磁盘) | 通用兜底 | - -### 参考实现 (Reference Implementation) - -- `plugins/actions/export_to_docx/export_to_word.py` - `_image_bytes_from_owui_file_id` 方法 - -### Python 规范 - -- 遵循 **PEP 8** 规范 -- 使用 **Black** 格式化代码 -- 关键逻辑添加注释 - -### 导入顺序 - -```python -# 1. Standard library imports -import os import re import json import logging from typing import Optional, Dict, Any, Callable, Awaitable - -# 2. Third-party imports from pydantic import BaseModel, Field -from fastapi import Request -# 3. OpenWebUI imports -from open_webui.utils.chat import generate_chat_completion -from open_webui.models.users import Users +# Logging setup +logging.basicConfig( + level=logging.INFO, + format="%(asctime)s - %(name)s - %(levelname)s - %(message)s" +) +logger = logging.getLogger(__name__) + +# HTML Template with insertion points +HTML_WRAPPER_TEMPLATE = """ + + + + + + + + + +
+ +
+ + + +""" + +class Action: + class Valves(BaseModel): + SHOW_STATUS: bool = Field( + default=True, + description="Whether to show operation status updates." + ) + SHOW_DEBUG_LOG: bool = Field( + default=False, + description="Whether to print debug logs in the browser console." + ) + # ... other valves ... + + def __init__(self): + self.valves = self.Valves() + + # ==================== Common Helper Methods ==================== + + def _get_user_context(self, __user__: Optional[Dict[str, Any]]) -> Dict[str, str]: + """Safely extracts user context information.""" + if isinstance(__user__, (list, tuple)): + user_data = __user__[0] if __user__ else {} + elif isinstance(__user__, dict): + user_data = __user__ + else: + user_data = {} + + return { + "user_id": user_data.get("id", "unknown_user"), + "user_name": user_data.get("name", "User"), + "user_language": user_data.get("language", "en-US"), + } + + def _get_chat_context( + self, body: dict, __metadata__: Optional[dict] = None + ) -> Dict[str, str]: + """ + Unified extraction of chat context information (chat_id, message_id). + Prioritizes extraction from body, then metadata. + """ + chat_id = "" + message_id = "" + + if isinstance(body, dict): + chat_id = body.get("chat_id", "") + message_id = body.get("id", "") + + if not chat_id or not message_id: + body_metadata = body.get("metadata", {}) + if isinstance(body_metadata, dict): + if not chat_id: + chat_id = body_metadata.get("chat_id", "") + if not message_id: + message_id = body_metadata.get("message_id", "") + + if __metadata__ and isinstance(__metadata__, dict): + if not chat_id: + chat_id = __metadata__.get("chat_id", "") + if not message_id: + message_id = __metadata__.get("message_id", "") + + return { + "chat_id": str(chat_id).strip(), + "message_id": str(message_id).strip(), + } + + async def _emit_status( + self, + emitter: Optional[Callable[[Any], Awaitable[None]]], + description: str, + done: bool = False, + ): + """Emits a status update event.""" + if self.valves.SHOW_STATUS and emitter: + await emitter( + {"type": "status", "data": {"description": description, "done": done}} + ) + + async def _emit_notification( + self, + emitter: Optional[Callable[[Any], Awaitable[None]]], + content: str, + ntype: str = "info", + ): + """Emits a notification event (info, success, warning, error).""" + if emitter: + await emitter( + {"type": "notification", "data": {"type": ntype, "content": content}} + ) + + async def _emit_debug_log( + self, + emitter: Optional[Callable[[Any], Awaitable[None]]], + title: str, + data: dict, + ): + """Print structured debug logs in the browser console.""" + if not self.valves.SHOW_DEBUG_LOG or not emitter: + return + + try: + js_code = f""" + (async function() {{ + console.group("🛠️ {title}"); + console.log({json.dumps(data, ensure_ascii=False)}); + console.groupEnd(); + }})(); + """ + + await emitter({"type": "execute", "data": {"code": js_code}}) + except Exception as e: + print(f"Error emitting debug log: {e}") + + # ==================== HTML Helper Methods ==================== + + def _remove_existing_html(self, content: str) -> str: + """Removes existing plugin-generated HTML code blocks.""" + pattern = r"```html\s*[\s\S]*?```" + return re.sub(pattern, "", content).strip() + + def _merge_html( + self, + existing_html: str, + new_content: str, + new_styles: str = "", + new_scripts: str = "", + user_language: str = "en-US", + ) -> str: + """Merges new content into existing HTML container.""" + if not existing_html: + base_html = HTML_WRAPPER_TEMPLATE.replace("{user_language}", user_language) + else: + base_html = existing_html + + if "" in base_html: + base_html = base_html.replace( + "", + f"{new_content}\n " + ) + + if new_styles and "/* STYLES_INSERTION_POINT */" in base_html: + base_html = base_html.replace( + "/* STYLES_INSERTION_POINT */", + f"{new_styles}\n /* STYLES_INSERTION_POINT */" + ) + + if new_scripts and "" in base_html: + base_html = base_html.replace( + "", + f"{new_scripts}\n " + ) + + return base_html ``` ---- +### 3. 文件导出与命名规范 (File Export and Naming) -## 📄 文件导出与命名规范 (File Export and Naming) +对于涉及文件导出的插件,必须提供灵活的标题生成策略。 -对于涉及文件导出的插件(通常是 Action),必须提供灵活的标题生成策略。 - -### Valves 配置 (Valves Configuration) - -应包含 `TITLE_SOURCE` 选项: +#### Valves 配置 ```python class Valves(BaseModel): @@ -819,817 +660,181 @@ class Valves(BaseModel): ) ``` -### 标题获取逻辑 (Title Retrieval Logic) +#### 优先级与回退 (Priority & Fallback) -1. **chat_title**: 尝试从 `body` 获取,若失败且有 `chat_id`,则从数据库获取 (`Chats.get_chat_by_id`)。 -2. **markdown_title**: 从 Markdown 内容提取第一个 H1 或 H2。 -3. **ai_generated**: 使用轻量级 Prompt 让 AI 生成简短标题。 - -### 优先级与回退 (Priority and Fallback) - -代码应根据 `TITLE_SOURCE` 优先尝试指定方法,若失败则按以下顺序回退: `chat_title` -> `markdown_title` -> `user_name + date` -```python -# 核心逻辑示例 -if self.valves.TITLE_SOURCE == "chat_title": - title = chat_title -elif self.valves.TITLE_SOURCE == "markdown_title": - title = self.extract_title(content) -elif self.valves.TITLE_SOURCE == "ai_generated": - title = await self.generate_title_using_ai(...) -``` - -### AI 标题生成实现 (AI Title Generation Implementation) - -如果支持 `ai_generated` 选项,应实现类似以下的方法: +#### 实现示例 (Implementation Example) ```python -async def generate_title_using_ai( - self, - body: dict, - content: str, - user_id: str, - request: Any -) -> str: - """Generates a short title using the current LLM model.""" - if not request: - return "" - - try: - # 获取当前用户和模型 - user_obj = Users.get_user_by_id(user_id) - model = body.get("model") - - # 构造请求 - payload = { - "model": model, - "messages": [ - { - "role": "system", - "content": "You are a helpful assistant. Generate a short, concise title (max 10 words) for the following text. Do not use quotes. Only output the title." - }, - { - "role": "user", - "content": content[:2000] # 限制上下文长度以节省 Token - } - ], - "stream": False, - } - - # 调用 OpenWebUI 内部生成接口 - response = await generate_chat_completion(request, payload, user_obj) + async def _get_filename( + self, + body: dict, + content: str, + user_id: str, + request: Optional[Any] = None, + ) -> str: + """ + Generate filename based on priority: + 1. TITLE_SOURCE (chat_title / markdown_title / ai_generated) + 2. Fallback: chat_title -> markdown_title -> user_name + date + """ + title = "" + chat_title = "" - if response and "choices" in response: - return response["choices"][0]["message"]["content"].strip() - - except Exception as e: - logger.error(f"Error generating title: {e}") + # 1. Get Chat Title + chat_ctx = self._get_chat_context(body) + chat_id = chat_ctx["chat_id"] + if chat_id: + chat_title = await self._fetch_chat_title(chat_id, user_id) - return "" + # 2. Determine Title based on Valve + source = self.valves.TITLE_SOURCE + if source == "chat_title": + title = chat_title + elif source == "markdown_title": + title = self._extract_title(content) + elif source == "ai_generated": + # Optional: Implement AI title generation + # title = await self._generate_title_using_ai(body, content, user_id, request) + pass + + # 3. Fallback Logic + if not title: + # Fallback to chat_title if not already tried + if source != "chat_title" and chat_title: + title = chat_title + # Fallback to markdown_title if not already tried + elif source != "markdown_title": + title = self._extract_title(content) + + # 4. Final Fallback: User + Date + if not title: + user_ctx = self._get_user_context(body.get("user")) + user_name = user_ctx["user_name"] + date_str = datetime.datetime.now().strftime("%Y%m%d") + title = f"{user_name}_{date_str}" + + return self._clean_filename(title) + + async def _fetch_chat_title(self, chat_id: str, user_id: str) -> str: + try: + from open_webui.apps.webui.models.chats import Chats + chat = Chats.get_chat_by_id_and_user_id(chat_id, user_id) + return chat.title if chat else "" + except Exception: + return "" + + def _extract_title(self, content: str) -> str: + """Extract title from Markdown h1 (# Title)""" + match = re.search(r"^#\s+(.+)$", content, re.MULTILINE) + return match.group(1).strip() if match else "" + + def _clean_filename(self, filename: str) -> str: + """Remove invalid characters for filenames""" + return re.sub(r'[\\/*?:"<>|]', "", filename).strip() ``` ---- - -## 🎭 iframe 主题检测规范 (iframe Theme Detection) +### 4. iframe 主题检测规范 (iframe Theme Detection) 当插件在 iframe 中运行(特别是使用 `srcdoc` 属性)时,需要检测应用程序的主题以保持视觉一致性。 -### 检测优先级 (Priority Order) +优先级: +1. 显式切换 (Explicit Toggle) +2. 父文档 Meta 标签 (Parent Meta Theme-Color) +3. 父文档 Class/Data-Theme (Parent HTML/Body Class) +4. 系统偏好 (System Preference) -按以下顺序尝试检测主题,直到找到有效结果: +### 5. 高级开发模式 (Advanced Development Patterns) -1. **显式切换** (Explicit Toggle) - 用户手动点击主题按钮 -2. **父文档 Meta 标签** (Parent Meta Theme-Color) - 从 `window.parent.document` 的 `` 读取 -3. **父文档 Class/Data-Theme** (Parent HTML/Body Class) - 检查父文档 html/body 的 class 或 data-theme 属性 -4. **系统偏好** (System Preference) - `prefers-color-scheme: dark` 媒体查询 +#### 混合服务端-客户端生成 (Hybrid Server-Client Generation) +服务端生成半成品(如 ZIP),客户端渲染复杂组件(如 Mermaid)并回填。 -### 核心实现代码 (Implementation) +#### 原生 Word 公式支持 (Native Word Math Support) +使用 `latex2mathml` + `mathml2omml`。 -```javascript -// 1. 颜色亮度解析(支持 hex 和 rgb) -const parseColorLuma = (colorStr) => { - if (!colorStr) return null; - // hex #rrggbb or rrggbb - let m = colorStr.match(/^#?([0-9a-f]{6})$/i); - if (m) { - const hex = m[1]; - const r = parseInt(hex.slice(0, 2), 16); - const g = parseInt(hex.slice(2, 4), 16); - const b = parseInt(hex.slice(4, 6), 16); - return (0.2126 * r + 0.7152 * g + 0.0722 * b) / 255; - } - // rgb(r, g, b) or rgba(r, g, b, a) - m = colorStr.match(/rgba?\s*\(\s*(\d+)\s*,\s*(\d+)\s*,\s*(\d+)/i); - if (m) { - const r = parseInt(m[1], 10); - const g = parseInt(m[2], 10); - const b = parseInt(m[3], 10); - return (0.2126 * r + 0.7152 * g + 0.0722 * b) / 255; - } - return null; -}; +#### JS 渲染并嵌入 Markdown (JS Render to Markdown) +利用浏览器渲染图表,导出为 Data URL 图片,回写到 Markdown 中。 -// 2. 从 meta 标签提取主题 -const getThemeFromMeta = (doc, scope = 'self') => { - const metas = Array.from((doc || document).querySelectorAll('meta[name="theme-color"]')); - if (!metas.length) return null; - const color = metas[metas.length - 1].content.trim(); - const luma = parseColorLuma(color); - if (luma === null) return null; - return luma < 0.5 ? 'dark' : 'light'; -}; +#### OpenWebUI Chat API 更新规范 (Chat API Update Specification) +当插件需要修改消息内容并持久化到数据库时,必须遵循 OpenWebUI 的 Backend-Controlled API 流程。 -// 3. 安全地访问父文档 -const getParentDocumentSafe = () => { - try { - if (!window.parent || window.parent === window) return null; - const pDoc = window.parent.document; - void pDoc.title; // 触发跨域检查 - return pDoc; - } catch (err) { - console.log(`Parent document not accessible: ${err.name}`); - return null; - } -}; - -// 4. 从父文档的 class/data-theme 检测主题 -const getThemeFromParentClass = () => { - try { - if (!window.parent || window.parent === window) return null; - const pDoc = window.parent.document; - const html = pDoc.documentElement; - const body = pDoc.body; - const htmlClass = html ? html.className : ''; - const bodyClass = body ? body.className : ''; - const htmlDataTheme = html ? html.getAttribute('data-theme') : ''; - - if (htmlDataTheme === 'dark' || bodyClass.includes('dark') || htmlClass.includes('dark')) - return 'dark'; - if (htmlDataTheme === 'light' || bodyClass.includes('light') || htmlClass.includes('light')) - return 'light'; - return null; - } catch (err) { - return null; - } -}; - -// 5. 主题设置及检测 -const setTheme = (wrapperEl, explicitTheme) => { - const parentDoc = getParentDocumentSafe(); - const metaThemeParent = parentDoc ? getThemeFromMeta(parentDoc, 'parent') : null; - const parentClassTheme = getThemeFromParentClass(); - const prefersDark = window.matchMedia && window.matchMedia('(prefers-color-scheme: dark)').matches; - - // 按优先级选择 - const chosen = explicitTheme || metaThemeParent || parentClassTheme || (prefersDark ? 'dark' : 'light'); - wrapperEl.classList.toggle('theme-dark', chosen === 'dark'); - return chosen; -}; -``` - -### CSS 变量定义 (CSS Variables) - -使用 CSS 变量实现主题切换,避免硬编码颜色: - -```css -:root { - --primary-color: #1e88e5; - --background-color: #f4f6f8; - --text-color: #263238; - --border-color: #e0e0e0; -} - -.theme-dark { - --primary-color: #64b5f6; - --background-color: #111827; - --text-color: #e5e7eb; - --border-color: #374151; -} - -.container { - background-color: var(--background-color); - color: var(--text-color); - border-color: var(--border-color); -} -``` - -### 调试与日志 (Debugging) - -添加详细日志便于排查主题检测问题: - -```javascript -console.log(`[plugin] [parent] meta theme-color count: ${metas.length}`); -console.log(`[plugin] [parent] meta theme-color picked: "${color}"`); -console.log(`[plugin] [parent] meta theme-color luma=${luma.toFixed(3)}, inferred=${inferred}`); -console.log(`[plugin] parent html.class="${htmlClass}", data-theme="${htmlDataTheme}"`); -console.log(`[plugin] final chosen theme: ${chosen}`); -``` - -### 最佳实践 (Best Practices) - -- 仅尝试访问**父文档**的主题信息,不依赖 srcdoc iframe 自身的 meta(通常为空) -- 在跨域 iframe 中使用 class/data-theme 作为备选方案 -- 使用 try-catch 包裹所有父文档访问,避免跨域异常中断 -- 提供用户手动切换主题的按钮作为最高优先级 -- 记录详细日志便于用户反馈主题检测问题 - -### OpenWebUI Configuration Requirement (OpenWebUI Configuration) - -For iframe plugins to access parent document theme information, users need to configure: - -1. **Enable Artifact Same-Origin Access** - In User Settings: **Interface** → **Artifacts** → Check **iframe Sandbox Allow Same Origin** -2. **Configure Sandbox Attributes** - Ensure iframe's sandbox attribute includes both `allow-same-origin` and `allow-scripts` -3. **Verify Meta Tag** - Ensure OpenWebUI page head contains `` tag - -**Important Notes**: -- Same-origin access allows iframe to read theme information via `window.parent.document` -- Cross-origin iframes cannot access parent document and should implement class/data-theme detection as fallback -- Using same-origin access in srcdoc iframe is safe (origin is null, doesn't bypass CORS policy) -- Users can provide manual theme toggle button in plugin as highest priority option +1. **Event API**: 即时更新前端显示。 +2. **Chat Persistence API**: 持久化到数据库(必须同时更新 `messages[]` 和 `history.messages`)。 --- -## ✅ 开发检查清单 (Development Checklist) +## 🛡️ Filter 插件规范 (Filter Plugin Standards) -开发新插件时,请确保完成以下检查: +### 1. 状态管理 (State Management) - **关键** + +Filter 实例是**单例 (Singleton)**。 + +- **❌ 禁止**: 使用 `self` 存储请求级别的临时状态。 +- **✅ 推荐**: 无状态设计,或使用 `body` 传递临时数据。 + +### 2. 摘要注入角色 (Summary Injection Role) + +- **✅ 推荐**: 使用 **`assistant`** 角色。 + +### 3. 模型默认值 (Model Defaults) + +- **❌ 禁止**: 硬编码特定模型 ID。 +- **✅ 推荐**: 默认值为 `None`,优先使用当前对话模型。 + +### 4. 异步处理 (Async Processing) + +- **✅ 推荐**: 在 `outlet` 中使用 `asyncio.create_task()` 启动后台任务。 + +--- + +## 🔄 工作流与流程 (Workflow & Process) + +### 1. ✅ 开发检查清单 (Development Checklist) - [ ] 创建英文版插件代码 (`plugin_name.py`) -- [ ] 创建中文版插件代码 (`插件名.py` 或 `plugin_name_cn.py`) +- [ ] 创建中文版插件代码 (`plugin_name_cn.py`) - [ ] 编写英文 README (`README.md`) - [ ] 编写中文 README (`README_CN.md`) - [ ] 包含标准化文档字符串 - [ ] 添加 Author 和 License 信息 -- [ ] 使用 Lucide 图标 (Base64 编码) +- [ ] 使用 Lucide 图标 - [ ] 实现 Valves 配置 - [ ] 使用 logging 而非 print - [ ] 测试双语界面 -- [ ] **一致性检查 (Consistency Check)**: +- [ ] **一致性检查**: 确保文档、代码、README 同步 + +### 2. 🔄 一致性维护 (Consistency Maintenance) + +任何插件的**新增、修改或移除**,必须同时更新: +1. **插件代码** (version) +2. **项目文档** (`docs/`) +3. **自述文件** (`README.md`) + +### 3. 发布工作流 (Release Workflow) + +#### 自动发布 (Automatic Release) +推送到 `main` 分支会自动触发发布。 + +#### 发布前必须完成 +- 更新版本号(中英文同步) +- 遵循语义化版本 (SemVer) + +#### Commit Message 规范 +使用 Conventional Commits 格式 (`feat`, `fix`, `docs`, etc.)。 + +### 4. 🤖 Git Operations (Agent Rules) + +- **允许**: 创建功能分支 (`feature/xxx`),推送到功能分支。 +- **禁止**: 直接推送到 `main`,自动合并到 `main`。 + +### 5. 🤝 贡献者认可规范 (Contributor Recognition) + +使用 `@all-contributors please add @username for ` 指令。 --- -## 🚀 高级开发模式 (Advanced Development Patterns) - -### 混合服务端-客户端生成 (Hybrid Server-Client Generation) - -对于需要复杂前端渲染(如 Mermaid 图表、ECharts)但最终生成文件(如 DOCX、PDF)的场景,建议采用混合模式: - -1. **服务端 (Python)**: - * 处理文本解析、Markdown 转换、文档结构构建。 - * 为复杂组件生成**占位符**(如带有特定 ID 或元数据的图片/文本块)。 - * 将半成品文件(如 Base64 编码的 ZIP/DOCX)发送给前端。 - -2. **客户端 (JavaScript)**: - * 在浏览器中加载半成品文件(使用 JSZip 等库)。 - * 利用浏览器能力渲染复杂组件(如 `mermaid.render`)。 - * 将渲染结果(SVG/PNG)回填到占位符位置。 - * 触发最终文件的下载。 - -**优势**: -* 无需在服务端安装 Headless Browser(如 Puppeteer),降低部署复杂度。 -* 利用用户浏览器的计算能力。 -* 支持动态、交互式内容的静态化导出。 - -### 原生 Word 公式支持 (Native Word Math Support) - -对于需要生成高质量数学公式的 Word 文档,推荐使用 `latex2mathml` + `mathml2omml` 组合: - -1. **LaTeX -> MathML**: 使用 `latex2mathml` 将 LaTeX 字符串转换为标准 MathML。 -2. **MathML -> OMML**: 使用 `mathml2omml` 将 MathML 转换为 Office Math Markup Language (OMML)。 -3. **插入 Word**: 将 OMML XML 插入到 `python-docx` 的段落中。 - -```python -# 示例代码 -from latex2mathml.converter import convert as latex2mathml -from mathml2omml import convert as mathml2omml - -def add_math(paragraph, latex_str): - mathml = latex2mathml(latex_str) - omml = mathml2omml(mathml) - # ... 插入 OMML 到 paragraph._element ... -``` - -### JS 渲染并嵌入 Markdown (JS Render to Markdown) - -对于需要复杂前端渲染(如 AntV 图表、Mermaid 图表、ECharts)但希望结果**持久化为纯 Markdown 格式**的场景,推荐使用 Data URL 嵌入模式: - -#### 工作流程 - -``` -┌─────────────────────────────────────────────────────────────┐ -│ Plugin Workflow │ -├─────────────────────────────────────────────────────────────┤ -│ 1. Python Action │ -│ ├── 分析消息内容 │ -│ ├── 调用 LLM 生成结构化数据(可选) │ -│ └── 通过 __event_call__ 发送 JS 代码到前端 │ -├─────────────────────────────────────────────────────────────┤ -│ 2. Browser JS (via __event_call__) │ -│ ├── 动态加载可视化库(如 AntV、Mermaid) │ -│ ├── 离屏渲染 SVG/Canvas │ -│ ├── 使用 toDataURL() 导出 Base64 Data URL │ -│ └── 通过 REST API 更新消息内容 │ -├─────────────────────────────────────────────────────────────┤ -│ 3. Markdown 渲染 │ -│ └── 显示 ![描述](data:image/svg+xml;base64,...) │ -└─────────────────────────────────────────────────────────────┘ -``` - -#### 核心实现代码 - -**Python 端(发送 JS 执行):** - -```python -async def action(self, body, __event_call__, __metadata__, ...): - chat_id = self._extract_chat_id(body, __metadata__) - message_id = self._extract_message_id(body, __metadata__) - - # 生成 JS 代码 - js_code = self._generate_js_code( - chat_id=chat_id, - message_id=message_id, - data=processed_data, # 可视化所需数据 - ) - - # 执行 JS - if __event_call__: - await __event_call__({ - "type": "execute", - "data": {"code": js_code} - }) -``` - -**JavaScript 端(渲染并回写):** - -```javascript -(async function() { - // 1. 动态加载可视化库 - if (typeof VisualizationLib === 'undefined') { - await new Promise((resolve, reject) => { - const script = document.createElement('script'); - script.src = 'https://cdn.example.com/lib.min.js'; - script.onload = resolve; - script.onerror = reject; - document.head.appendChild(script); - }); - } - - // 2. 创建离屏容器 - const container = document.createElement('div'); - container.style.cssText = 'position:absolute;left:-9999px;'; - document.body.appendChild(container); - - // 3. 渲染可视化 - const instance = new VisualizationLib({ container, ... }); - instance.render(data); - - // 4. 导出为 Data URL - const dataUrl = await instance.toDataURL({ type: 'svg', embedResources: true }); - // 或手动转换 SVG: - // const svgData = new XMLSerializer().serializeToString(svgElement); - // const base64 = btoa(unescape(encodeURIComponent(svgData))); - // const dataUrl = "data:image/svg+xml;base64," + base64; - - // 5. 清理 - instance.destroy(); - document.body.removeChild(container); - - // 6. 生成 Markdown 图片 - const markdownImage = `![描述](${dataUrl})`; - - // 7. 通过 API 更新消息 - const token = localStorage.getItem("token"); - await fetch(`/api/v1/chats/${chatId}/messages/${messageId}/event`, { - method: "POST", - headers: { - "Content-Type": "application/json", - "Authorization": `Bearer ${token}` - }, - body: JSON.stringify({ - type: "chat:message", - data: { content: originalContent + "\n\n" + markdownImage } - }) - }); -})(); -``` - -#### 优势 - -- **纯 Markdown 输出**:结果是标准的 Markdown 图片语法,无需 HTML 代码块 -- **高效存储**:图片上传至 `/api/v1/files`,避免 Base64 字符串膨胀聊天记录 -- **持久化**:通过 API 回写,消息重新加载后图片仍然存在 -- **跨平台**:任何支持 Markdown 图片的客户端都能显示 -- **无服务端渲染依赖**:利用用户浏览器的渲染能力 - -#### 与 HTML 注入模式对比 - -| 特性 | HTML 注入 (`\`\`\`html`) | JS 渲染 + Markdown 图片 | -|------|-------------------------|------------------------| -| 输出格式 | HTML 代码块 | Markdown 图片 | -| 交互性 | ✅ 支持按钮、动画 | ❌ 静态图片 | -| 外部依赖 | 需要加载 JS 库 | 依赖 `/api/v1/files` 存储 | -| 持久化 | 依赖浏览器渲染 | ✅ 永久可见 | -| 文件导出 | 需特殊处理 | ✅ 直接导出 | -| 适用场景 | 交互式内容 | 信息图、图表快照 | - -#### 参考实现 - -- `plugins/actions/js-render-poc/infographic_markdown.py` - AntV Infographic 生成并嵌入 -- `plugins/actions/js-render-poc/js_render_poc.py` - 基础概念验证 - -### OpenWebUI Chat API 更新规范 (Chat API Update Specification) - -当插件需要修改消息内容并持久化到数据库时,必须遵循 OpenWebUI 的 Backend-Controlled API 流程。 - -When a plugin needs to modify message content and persist it to the database, follow OpenWebUI's Backend-Controlled API flow. - -#### 核心概念 (Core Concepts) - -1. **Event API** (`/api/v1/chats/{chatId}/messages/{messageId}/event`) - - 用于**即时更新前端显示**,用户无需刷新页面 - - 是可选的,部分版本可能不支持 - - 仅影响当前会话的 UI,不持久化 - -2. **Chat Persistence API** (`/api/v1/chats/{chatId}`) - - 用于**持久化到数据库**,确保刷新页面后数据仍存在 - - 必须同时更新 `messages[]` 数组和 `history.messages` 对象 - - 是消息持久化的唯一可靠方式 - -#### 数据结构 (Data Structure) - -OpenWebUI 的 Chat 对象包含两个关键位置存储消息内容: - -```javascript -{ - "chat": { - "id": "chat-uuid", - "title": "Chat Title", - "messages": [ // 1️⃣ 消息数组 - { "id": "msg-1", "role": "user", "content": "..." }, - { "id": "msg-2", "role": "assistant", "content": "..." } - ], - "history": { - "current_id": "msg-2", - "messages": { // 2️⃣ 消息索引对象 - "msg-1": { "id": "msg-1", "role": "user", "content": "..." }, - "msg-2": { "id": "msg-2", "role": "assistant", "content": "..." } - } - } - } -} -``` - -> **重要**:修改消息时,**必须同时更新两个位置**,否则可能导致数据不一致。 - -#### 标准实现流程 (Standard Implementation) - -```javascript -(async function() { - const chatId = "{chat_id}"; - const messageId = "{message_id}"; - const token = localStorage.getItem("token"); - - // 1️⃣ 获取当前 Chat 数据 - const getResponse = await fetch(`/api/v1/chats/${chatId}`, { - method: "GET", - headers: { "Authorization": `Bearer ${token}` } - }); - const chatData = await getResponse.json(); - - // 2️⃣ 使用 map 遍历 messages,只修改目标消息 - let newContent = ""; - const updatedMessages = chatData.chat.messages.map(m => { - if (m.id === messageId) { - const originalContent = m.content || ""; - newContent = originalContent + "\n\n" + newMarkdown; - - // 3️⃣ 同时更新 history.messages 中对应的消息 - if (chatData.chat.history && chatData.chat.history.messages) { - if (chatData.chat.history.messages[messageId]) { - chatData.chat.history.messages[messageId].content = newContent; - } - } - - // 4️⃣ 保留消息的其他属性,只修改 content - return { ...m, content: newContent }; - } - return m; // 其他消息原样返回 - }); - - // 5️⃣ 通过 Event API 即时更新前端(可选) - try { - await fetch(`/api/v1/chats/${chatId}/messages/${messageId}/event`, { - method: "POST", - headers: { - "Content-Type": "application/json", - "Authorization": `Bearer ${token}` - }, - body: JSON.stringify({ - type: "chat:message", - data: { content: newContent } - }) - }); - } catch (e) { - // Event API 是可选的,继续执行持久化 - console.log("Event API not available, continuing..."); - } - - // 6️⃣ 持久化到数据库(必须) - const updatePayload = { - chat: { - ...chatData.chat, // 保留所有原有属性 - messages: updatedMessages - // history 已在上面原地修改 - } - }; - - await fetch(`/api/v1/chats/${chatId}`, { - method: "POST", - headers: { - "Content-Type": "application/json", - "Authorization": `Bearer ${token}` - }, - body: JSON.stringify(updatePayload) - }); -})(); -``` - -#### 最佳实践 (Best Practices) - -1. **保留原有结构**:使用展开运算符 `...chatData.chat` 和 `...m` 确保不丢失任何原有属性 -2. **双位置更新**:必须同时更新 `messages[]` 和 `history.messages[id]` -3. **错误处理**:Event API 调用应包裹在 try-catch 中,失败时继续持久化 -4. **重试机制**:对持久化 API 实现重试逻辑,提高可靠性 - -```javascript -// 带重试的请求函数 -const fetchWithRetry = async (url, options, retries = 3) => { - for (let i = 0; i < retries; i++) { - try { - const response = await fetch(url, options); - if (response.ok) return response; - if (i < retries - 1) { - await new Promise(r => setTimeout(r, 1000 * (i + 1))); // 指数退避 - } - } catch (e) { - if (i === retries - 1) throw e; - await new Promise(r => setTimeout(r, 1000 * (i + 1))); - } - } - return null; -}; -``` - -5. **禁止使用的 API**:不要使用 `/api/v1/chats/{chatId}/share` 作为持久化备用方案,该 API 用于分享功能,不是更新功能 - -#### 提取 Chat ID 和 Message ID (Extracting IDs) - -```python -def _extract_chat_id(self, body: dict, metadata: Optional[dict]) -> str: - """从 body 或 metadata 中提取 chat_id""" - if isinstance(body, dict): - chat_id = body.get("chat_id") - if isinstance(chat_id, str) and chat_id.strip(): - return chat_id.strip() - - body_metadata = body.get("metadata", {}) - if isinstance(body_metadata, dict): - chat_id = body_metadata.get("chat_id") - if isinstance(chat_id, str) and chat_id.strip(): - return chat_id.strip() - - if isinstance(metadata, dict): - chat_id = metadata.get("chat_id") - if isinstance(chat_id, str) and chat_id.strip(): - return chat_id.strip() - - return "" - -def _extract_message_id(self, body: dict, metadata: Optional[dict]) -> str: - """从 body 或 metadata 中提取 message_id""" - if isinstance(body, dict): - message_id = body.get("id") - if isinstance(message_id, str) and message_id.strip(): - return message_id.strip() - - body_metadata = body.get("metadata", {}) - if isinstance(body_metadata, dict): - message_id = body_metadata.get("message_id") - if isinstance(message_id, str) and message_id.strip(): - return message_id.strip() - - if isinstance(metadata, dict): - message_id = metadata.get("message_id") - if isinstance(message_id, str) and message_id.strip(): - return message_id.strip() - - return "" -``` - -#### 参考实现 - -- `plugins/actions/smart-mind-map/smart_mind_map.py` - 思维导图图片模式实现 -- 官方文档: [Backend-Controlled, UI-Compatible API Flow](https://docs.openwebui.com/tutorials/tips/backend-controlled-ui-compatible-api-flow) - ---- - -## 🔄 一致性维护 (Consistency Maintenance) - -任何插件的**新增、修改或移除**,必须同时更新以下三个位置,保持完全一致: - -1. **插件代码 (Plugin Code)**: 更新 `version` 和功能实现。 -2. **项目文档 (Docs)**: 更新 `docs/` 下对应的文档文件(版本号、功能描述)。 -3. **自述文件 (README)**: 更新根目录下的 `README.md` 和 `README_CN.md` 中的插件列表。 - -> [!IMPORTANT] -> 提交 PR 前,请务必检查这三处是否同步。例如:如果删除了一个插件,必须同时从 README 列表中移除,并删除对应的 docs 文档。 - ---- - -## � 发布工作流 (Release Workflow) - -### 自动发布 (Automatic Release) - -当插件更新推送到 `main` 分支时,会**自动触发**发布流程: - -1. 🔍 检测版本变化(与上次 release 对比) -2. 📝 生成发布说明(包含更新内容和提交记录) -3. 📦 创建 GitHub Release(包含可下载的插件文件) -4. 🏷️ 自动生成版本号(格式:`vYYYY.MM.DD-运行号`) - -**注意**:仅**移除插件**(删除文件)**不会触发**自动发布。只有新增或修改插件(且更新了版本号)才会触发发布。移除的插件将不会出现在发布日志中。 - -### 发布前必须完成 (Pre-release Requirements) - -> [!IMPORTANT] -> 版本号**仅在用户明确要求发布时**才需要更新。日常代码更改**无需**更新版本号。 - -**触发版本更新的关键词**: -- 用户说 "发布"、"release"、"bump version" -- 用户明确要求准备发布 - -**Agent 主动询问发布 (Agent-Initiated Release Prompt)**: - -当 Agent 完成以下类型的更改后,**应主动询问**用户是否需要发布新版本: - -| 更改类型 | 示例 | 是否询问发布 | -|---------|------|-------------| -| 新功能 | 新增导出格式、新的配置选项 | ✅ 询问 | -| 重要 Bug 修复 | 修复导致崩溃或数据丢失的问题 | ✅ 询问 | -| 累积多次更改 | 同一插件在会话中被修改 >= 3 次 | ✅ 询问 | -| 小优化 | 代码清理、格式符号处理 | ❌ 不询问 | -| 文档更新 | 只改 README、注释 | ❌ 不询问 | - -如果用户确认发布,Agent 需要更新所有版本相关的文件(代码、README、docs 等)。 - -**发布时需要完成**: -1. ✅ **更新版本号** - 修改插件文档字符串中的 `version` 字段 -2. ✅ **中英文版本同步** - 确保两个版本的版本号一致 - -```python -""" -title: My Plugin -version: 0.2.0 # <- 发布时更新这里! -... -""" -``` - -### 版本编号规则 (Versioning) - -遵循[语义化版本](https://semver.org/lang/zh-CN/): - -| 变更类型 | 版本变化 | 示例 | -|---------|---------|------| -| Bug 修复 | PATCH +1 | 0.1.0 → 0.1.1 | -| 新功能 | MINOR +1 | 0.1.1 → 0.2.0 | -| 不兼容变更 | MAJOR +1 | 0.2.0 → 1.0.0 | - -### 发布方式 (Release Methods) - -**方式 A:直接推送到 main(推荐)** - -```bash -# 1. 暂存更改 -git add plugins/actions/my-plugin/ - -# 2. 提交(使用规范的 commit message) -git commit -m "feat(my-plugin): add new feature X - -- Add feature X for better user experience -- Fix bug Y -- Update version to 0.2.0" - -# 3. 推送到 main -git push origin main - -# GitHub Actions 会自动创建 Release -``` - -**方式 B:创建 PR(团队协作)** - -```bash -# 1. 创建功能分支 -git checkout -b feature/my-plugin-v0.2.0 - -# 2. 提交更改 -git commit -m "feat(my-plugin): add new feature X" - -# 3. 推送并创建 PR -git push origin feature/my-plugin-v0.2.0 - -# 4. PR 合并后自动触发发布 -``` - -**方式 C:手动触发发布** - -1. 前往 GitHub Actions → "Plugin Release / 插件发布" -2. 点击 "Run workflow" -3. 填写版本号和发布说明 - -### Commit Message 规范 (Commit Convention) - -使用 [Conventional Commits](https://www.conventionalcommits.org/) 格式: - -``` -(): - -[optional body] - -[optional footer] -``` - -常用类型: -- `feat`: 新功能 -- `fix`: Bug 修复 -- `docs`: 文档更新 -- `refactor`: 代码重构 -- `style`: 代码格式调整 -- `perf`: 性能优化 - -示例: -``` -feat(flash-card): add _get_user_context for safer user info retrieval - -- Add _get_user_context method to handle various __user__ types -- Prevent AttributeError when __user__ is not a dict -- Update version to 0.2.2 for both English and Chinese versions -``` - -### 发布检查清单 (Release Checklist) - -发布前确保完成以下检查: - -- [ ] 更新插件版本号(英文版 + 中文版) -- [ ] 测试插件功能正常 -- [ ] 确保代码通过格式检查 -- [ ] 编写清晰的 commit message -- [ ] 推送到 main 分支或合并 PR - ---- - -## 🤝 贡献者认可规范 (Contributor Recognition Standards) - -本项目使用 [All Contributors](https://allcontributors.org/) 规范来认可所有形式的贡献。 - -### 1. 如何添加贡献者 (How to Add) - -在 GitHub 的 **Issue** 或 **Pull Request** 评论区发送以下指令,Bot 会自动创建 PR 更新 README: - -```text -@all-contributors please add @username for -``` - -### 2. 常用贡献类型 (Common Contribution Types) - -| 类型 (Type) | 含义 (Meaning) | 图标 (Icon) | -| :--- | :--- | :---: | -| **`ideas`** | 提供想法、功能建议或改进思路 | 🤔 | -| **`code`** | 编写并提交代码实现 | 💻 | -| **`bug`** | 报告 Bug 或发现逻辑缺陷 | 🐛 | -| **`doc`** | 改进文档、README 或注释 | 📖 | -| **`translation`** | 提供多语言翻译支持 | 🌍 | -| **`review`** | 进行代码审查 (Code Review) | 👀 | -| **`design`** | 提供 UI/UX 设计或图标 | 🎨 | -| **`question`** | 在讨论区回答用户问题 | 💬 | -| **`tutorial`** | 编写教程或使用指南 | ✅ | - -### 3. 核心区别:`ideas` vs `code` - -- **`ideas`**: 贡献者提供了核心思路、逻辑优化方案或功能需求,但未直接编写代码。 -- **`code`**: 贡献者直接编写并提交了 Pull Request。 -- **组合使用**: 如果贡献者既提出了方案又完成了实现,应同时添加:`for ideas, code`。 - -### 4. 多次贡献处理 (Multiple Contributions) - -All Contributors 支持勋章累加,无需担心重复添加: - -- **累加勋章**: 如果贡献者已在列表中,再次发送指令指定新类型(如 `@all-contributors please add @user for doc`),Bot 会自动将新勋章追加到该用户头像下方。 -- **一次性添加**: 支持在单条指令中列出所有类型:`for code, doc, ideas`。 -- **手动修正**: 若需删除或修正勋章,需手动编辑 `.all-contributorsrc` 文件中的 `contributions` 数组。 - ---- - -## �📚 参考资源 (Reference Resources) +## 📚 参考资源 (Reference Resources) - [Action 插件模板 (英文)](plugins/actions/ACTION_PLUGIN_TEMPLATE.py) - [Action 插件模板 (中文)](plugins/actions/ACTION_PLUGIN_TEMPLATE_CN.py) @@ -1647,112 +852,3 @@ GitHub: [Fu-Jie/awesome-openwebui](https://github.com/Fu-Jie/awesome-openwebui) ## License MIT License - ---- - -## 📝 Commit Message Guidelines - -**Commit messages MUST be in English.** Do not use Chinese. - -### Format -Follow the [Conventional Commits](https://www.conventionalcommits.org/) specification: - -- `feat`: New feature -- `fix`: Bug fix -- `docs`: Documentation only changes -- `style`: Changes that do not affect the meaning of the code (white-space, formatting, etc) -- `refactor`: A code change that neither fixes a bug nor adds a feature -- `perf`: A code change that improves performance -- `test`: Adding missing tests or correcting existing tests -- `chore`: Changes to the build process or auxiliary tools and libraries such as documentation generation - -### Examples - -✅ **Good:** -- `feat: add new export to pdf plugin` -- `fix: resolve icon rendering issue in documentation` -- `docs: update README with installation steps` - -❌ **Bad:** -- `新增导出PDF插件` (Chinese is not allowed) -- `update code` (Too vague) - ---- - -## 🤖 Git Operations (Agent Rules) - -**重要规则 (CRITICAL RULES FOR AI AGENTS)**: - -AI Agent(如 Copilot、Gemini、Claude 等)在执行 Git 操作时必须遵守以下规则: - -| 操作 (Operation) | 允许 (Allowed) | 说明 (Description) | -|-----------------|---------------|---------------------| -| 创建功能分支 | ✅ 允许 | `git checkout -b feature/xxx` | -| 推送到功能分支 | ✅ 允许 | `git push origin feature/xxx` | -| 直接推送到 main | ❌ 禁止 | `git push origin main` 需要用户手动执行 | -| 合并到 main | ❌ 禁止 | 任何合并操作需要用户明确批准 | -| Rebase 到 main | ❌ 禁止 | 任何 rebase 操作需要用户明确批准 | - -**规则详解 (Rule Details)**: - -1. **Feature Branches Allowed**: Agent **可以**创建新的功能分支并推送到远程仓库 -2. **No Direct Push to Main**: Agent **禁止**直接推送任何更改到 `main` 分支 -3. **No Auto-Merge**: Agent **禁止**在未经用户明确批准的情况下合并任何分支到 `main` -4. **User Approval Required**: 任何影响 `main` 分支的操作(push、merge、rebase)都需要用户明确批准 - -> [!CAUTION] -> 违反上述规则可能导致代码库不稳定或触发意外的 CI/CD 流程。Agent 应始终在功能分支上工作,并让用户决定何时合并到主分支。 - ---- - -## ⏳ 长时间运行任务通知 (Long-running Task Notifications) - -如果一个前台任务(Foreground Task)的运行时间预计超过 **3秒**,必须实现用户通知机制,以避免用户感到困惑。 - -**要求 (Requirements):** - -1. **初始通知 (Initial Notification)**: 任务开始时**立即**发送第一条通知,告知用户正在处理中(例如:“正在使用 AI 生成中...”)。 -2. **周期性通知 (Periodic Notification)**: 之后每隔 **5秒** 发送一次通知,告知用户任务仍在运行中。 -3. **完成清理 (Cleanup)**: 任务完成后,应自动取消通知任务。 - -**代码示例 (Code Example):** - -```python -import asyncio - -async def long_running_task_with_notification(self, event_emitter, ...): - # 定义实际任务 - async def actual_task(): - # ... 执行耗时操作 ... - return result - - # 定义通知任务 - async def notification_task(): - # 立即发送首次通知 - if event_emitter: - await self._send_notification(event_emitter, "info", "正在使用 AI 生成中...") - - # 之后每5秒通知一次 - while True: - await asyncio.sleep(5) - if event_emitter: - await self._send_notification(event_emitter, "info", "仍在处理中,请耐心等待...") - - # 并发运行任务 - task_future = asyncio.ensure_future(actual_task()) - notify_future = asyncio.ensure_future(notification_task()) - - # 等待任务完成 - done, pending = await asyncio.wait( - [task_future, notify_future], - return_when=asyncio.FIRST_COMPLETED - ) - - # 取消通知任务 - if not notify_future.done(): - notify_future.cancel() - - # 获取结果 - if task_future in done: - return task_future.result() -``` diff --git a/docs/plugins/actions/knowledge-card.md b/docs/plugins/actions/flash-card.md similarity index 94% rename from docs/plugins/actions/knowledge-card.md rename to docs/plugins/actions/flash-card.md index cc4a832..602c113 100644 --- a/docs/plugins/actions/knowledge-card.md +++ b/docs/plugins/actions/flash-card.md @@ -1,9 +1,9 @@ -# Knowledge Card +# Flash Card Action -v0.2.2 +v0.2.4 -Quickly generates beautiful learning memory cards, perfect for studying and quick memorization. +Quickly generates beautiful flashcards from text, extracting key points and categories. --- diff --git a/docs/plugins/actions/knowledge-card.zh.md b/docs/plugins/actions/flash-card.zh.md similarity index 98% rename from docs/plugins/actions/knowledge-card.zh.md rename to docs/plugins/actions/flash-card.zh.md index 6c55446..bfd4293 100644 --- a/docs/plugins/actions/knowledge-card.zh.md +++ b/docs/plugins/actions/flash-card.zh.md @@ -1,7 +1,7 @@ # Knowledge Card(知识卡片) Action -v0.2.0 +v0.2.4 快速生成精美的学习记忆卡片,适合学习和速记。 diff --git a/docs/plugins/actions/index.md b/docs/plugins/actions/index.md index dc6dd46..643d53f 100644 --- a/docs/plugins/actions/index.md +++ b/docs/plugins/actions/index.md @@ -23,7 +23,7 @@ Actions are interactive plugins that: Intelligently analyzes text content and generates interactive mind maps with beautiful visualizations. - **Version:** 0.8.0 + **Version:** 0.9.1 [:octicons-arrow-right-24: Documentation](smart-mind-map.md) @@ -37,15 +37,15 @@ Actions are interactive plugins that: [:octicons-arrow-right-24: Documentation](smart-infographic.md) -- :material-card-text:{ .lg .middle } **Knowledge Card** +- :material-card-text:{ .lg .middle } **Flash Card** --- - Quickly generates beautiful learning memory cards, perfect for studying and memorization. + Quickly generates beautiful flashcards from text, extracting key points and categories. - **Version:** 0.2.2 + **Version:** 0.2.4 - [:octicons-arrow-right-24: Documentation](knowledge-card.md) + [:octicons-arrow-right-24: Documentation](flash-card.md) - :material-file-excel:{ .lg .middle } **Export to Excel** @@ -77,15 +77,7 @@ Actions are interactive plugins that: [:octicons-arrow-right-24: Documentation](deep-dive.md) -- :material-image-text:{ .lg .middle } **Infographic to Markdown** - --- - - AI-powered infographic generator that renders SVG and embeds it as Markdown Data URL image. - - **Version:** 1.0.0 - - [:octicons-arrow-right-24: Documentation](infographic-markdown.md) diff --git a/docs/plugins/actions/smart-mind-map.md b/docs/plugins/actions/smart-mind-map.md index 4646d3e..3f11cf2 100644 --- a/docs/plugins/actions/smart-mind-map.md +++ b/docs/plugins/actions/smart-mind-map.md @@ -1,7 +1,7 @@ # Smart Mind Map Action -v0.8.0 +v0.9.1 Intelligently analyzes text content and generates interactive mind maps for better visualization and understanding. diff --git a/docs/plugins/actions/smart-mind-map.zh.md b/docs/plugins/actions/smart-mind-map.zh.md index 43a1f06..7c6bdba 100644 --- a/docs/plugins/actions/smart-mind-map.zh.md +++ b/docs/plugins/actions/smart-mind-map.zh.md @@ -1,7 +1,7 @@ # Smart Mind Map(智能思维导图) Action -v0.8.0 +v0.9.1 智能分析文本内容,生成交互式思维导图,帮助你更直观地理解信息结构。 diff --git a/docs/plugins/filters/gemini-manifold-companion.md b/docs/plugins/filters/gemini-manifold-companion.md deleted file mode 100644 index 387901c..0000000 --- a/docs/plugins/filters/gemini-manifold-companion.md +++ /dev/null @@ -1,54 +0,0 @@ -# Gemini Manifold Companion - -Filter -v0.3.2 - -Companion filter for the Gemini Manifold pipe plugin, providing enhanced functionality. - ---- - -## Overview - -The Gemini Manifold Companion works alongside the [Gemini Manifold Pipe](../pipes/gemini-manifold.md) to provide additional processing and enhancement for Gemini model integrations. - -## Features - -- :material-handshake: **Seamless Integration**: Works with Gemini Manifold pipe -- :material-format-text: **Message Formatting**: Optimizes messages for Gemini -- :material-shield: **Error Handling**: Graceful handling of API issues -- :material-tune: **Fine-tuning**: Additional configuration options - ---- - -## Installation - -1. First, install the [Gemini Manifold Pipe](../pipes/gemini-manifold.md) -2. Download the companion filter: [`gemini_manifold_companion.py`](https://github.com/Fu-Jie/awesome-openwebui/tree/main/plugins/filters/gemini_manifold_companion) -3. Upload to OpenWebUI: **Admin Panel** → **Settings** → **Functions** -4. Enable the filter - ---- - -## Configuration - -| Option | Type | Default | Description | -|--------|------|---------|-------------| -| `auto_format` | boolean | `true` | Auto-format messages for Gemini | -| `handle_errors` | boolean | `true` | Enable error handling | - ---- - -## Requirements - -!!! warning "Dependency" - This filter requires the **Gemini Manifold Pipe** to be installed and configured. - -!!! note "Prerequisites" - - OpenWebUI v0.3.0 or later - - Gemini Manifold Pipe installed - ---- - -## Source Code - -[:fontawesome-brands-github: View on GitHub](https://github.com/Fu-Jie/awesome-openwebui/tree/main/plugins/filters/gemini_manifold_companion){ .md-button } diff --git a/docs/plugins/filters/gemini-manifold-companion.zh.md b/docs/plugins/filters/gemini-manifold-companion.zh.md deleted file mode 100644 index 11c1dba..0000000 --- a/docs/plugins/filters/gemini-manifold-companion.zh.md +++ /dev/null @@ -1,54 +0,0 @@ -# Gemini Manifold Companion - -Filter -v0.3.2 - -Gemini Manifold Pipe 的伴随过滤器,用于增强 Gemini 集成的处理效果。 - ---- - -## 概览 - -Gemini Manifold Companion 与 [Gemini Manifold Pipe](../pipes/gemini-manifold.md) 搭配使用,为 Gemini 模型集成提供额外的处理与优化。 - -## 功能特性 - -- :material-handshake: **无缝协同**:与 Gemini Manifold Pipe 配合工作 -- :material-format-text: **消息格式化**:针对 Gemini 优化消息 -- :material-shield: **错误处理**:更友好的 API 异常处理 -- :material-tune: **精细配置**:提供额外调优选项 - ---- - -## 安装 - -1. 先安装 [Gemini Manifold Pipe](../pipes/gemini-manifold.md) -2. 下载伴随过滤器:[`gemini_manifold_companion.py`](https://github.com/Fu-Jie/awesome-openwebui/tree/main/plugins/filters/gemini_manifold_companion) -3. 上传到 OpenWebUI:**Admin Panel** → **Settings** → **Functions** -4. 启用过滤器 - ---- - -## 配置项 - -| 选项 | 类型 | 默认值 | 说明 | -|--------|------|---------|-------------| -| `auto_format` | boolean | `true` | 为 Gemini 自动格式化消息 | -| `handle_errors` | boolean | `true` | 开启错误处理 | - ---- - -## 运行要求 - -!!! warning "依赖" - 本过滤器需要先安装并配置 **Gemini Manifold Pipe**。 - -!!! note "前置条件" - - OpenWebUI v0.3.0 及以上 - - 已安装 Gemini Manifold Pipe - ---- - -## 源码 - -[:fontawesome-brands-github: 在 GitHub 查看](https://github.com/Fu-Jie/awesome-openwebui/tree/main/plugins/filters/gemini_manifold_companion){ .md-button } diff --git a/docs/plugins/filters/index.md b/docs/plugins/filters/index.md index 5d125a9..c48c6a7 100644 --- a/docs/plugins/filters/index.md +++ b/docs/plugins/filters/index.md @@ -36,15 +36,7 @@ Filters act as middleware in the message pipeline: [:octicons-arrow-right-24: Documentation](context-enhancement.md) -- :material-google:{ .lg .middle } **Gemini Manifold Companion** - --- - - Companion filter for the Gemini Manifold pipe plugin. - - **Version:** 1.7.0 - - [:octicons-arrow-right-24: Documentation](gemini-manifold-companion.md) - :material-format-paint:{ .lg .middle } **Markdown Normalizer** @@ -52,10 +44,30 @@ Filters act as middleware in the message pipeline: Fixes common Markdown formatting issues in LLM outputs, including Mermaid syntax, code blocks, and LaTeX formulas. - **Version:** 1.0.1 + **Version:** 1.1.2 [:octicons-arrow-right-24: Documentation](markdown_normalizer.md) +- :material-merge:{ .lg .middle } **Multi-Model Context Merger** + + --- + + Automatically merges context from multiple model responses in the previous turn, enabling collaborative answers. + + **Version:** 0.1.0 + + [:octicons-arrow-right-24: Documentation](multi-model-context-merger.md) + +- :material-file-document-multiple:{ .lg .middle } **Web Gemini Multimodal Filter** + + --- + + A powerful filter that provides multimodal capabilities (PDF, Office, Images, Audio, Video) to any model in OpenWebUI. + + **Version:** 0.3.2 + + [:octicons-arrow-right-24: Documentation](web-gemini-multimodel.md) + --- diff --git a/docs/plugins/filters/multi-model-context-merger.md b/docs/plugins/filters/multi-model-context-merger.md new file mode 100644 index 0000000..e590db9 --- /dev/null +++ b/docs/plugins/filters/multi-model-context-merger.md @@ -0,0 +1,35 @@ +# Multi-Model Context Merger + +Filter +v0.1.0 + +Automatically merges context from multiple model responses in the previous turn, enabling collaborative answers. + +--- + +## Overview + +This filter detects when multiple models have responded in the previous turn (e.g., using "Arena" mode or multiple models selected). It consolidates these responses and injects them as context for the current turn, allowing the next model to see what others have said. + +## Features + +- :material-merge: **Auto-Merge**: Consolidates responses from multiple models into a single context block. +- :material-format-list-group: **Structured Injection**: Uses XML-like tags (``) to separate different model outputs. +- :material-robot-confused: **Collaboration**: Enables models to build upon or critique each other's answers. + +--- + +## Installation + +1. Download the plugin file: [`multi_model_context_merger.py`](https://github.com/Fu-Jie/awesome-openwebui/tree/main/plugins/filters) +2. Upload to OpenWebUI: **Admin Panel** → **Settings** → **Functions** +3. Enable the filter. + +--- + +## Usage + +1. Select **multiple models** in the chat (or use Arena mode). +2. Ask a question. All models will respond. +3. Ask a follow-up question. +4. The filter will inject the previous responses from ALL models into the context of the current model(s). diff --git a/docs/plugins/filters/multi-model-context-merger.zh.md b/docs/plugins/filters/multi-model-context-merger.zh.md new file mode 100644 index 0000000..7e237f6 --- /dev/null +++ b/docs/plugins/filters/multi-model-context-merger.zh.md @@ -0,0 +1,35 @@ +# 多模型上下文合并 (Multi-Model Context Merger) + +Filter +v0.1.0 + +自动合并上一轮中多个模型的回答上下文,实现协作问答。 + +--- + +## 概述 + +此过滤器检测上一轮是否由多个模型回复(例如使用“竞技场”模式或选择了多个模型)。它将这些回复合并并作为上下文注入到当前轮次,使下一个模型能够看到其他模型之前所说的内容。 + +## 功能特性 + +- :material-merge: **自动合并**: 将多个模型的回复合并为单个上下文块。 +- :material-format-list-group: **结构化注入**: 使用类似 XML 的标签 (``) 分隔不同模型的输出。 +- :material-robot-confused: **协作**: 允许模型基于彼此的回答进行构建或评论。 + +--- + +## 安装 + +1. 下载插件文件: [`multi_model_context_merger.py`](https://github.com/Fu-Jie/awesome-openwebui/tree/main/plugins/filters) +2. 上传到 OpenWebUI: **管理员面板** → **设置** → **函数** +3. 启用过滤器。 + +--- + +## 使用方法 + +1. 在聊天中选择 **多个模型** (或使用竞技场模式)。 +2. 提问。所有模型都会回答。 +3. 提出后续问题。 +4. 过滤器会将所有模型之前的回答注入到当前模型的上下文中。 diff --git a/docs/plugins/filters/web-gemini-multimodel.md b/docs/plugins/filters/web-gemini-multimodel.md new file mode 100644 index 0000000..62299b6 --- /dev/null +++ b/docs/plugins/filters/web-gemini-multimodel.md @@ -0,0 +1,51 @@ +# Web Gemini Multimodal Filter + +Filter +v0.3.2 + +A powerful filter that provides multimodal capabilities (PDF, Office, Images, Audio, Video) to any model in OpenWebUI. + +--- + +## Overview + +This plugin enables multimodal processing for any model by leveraging Gemini as an analyzer. It supports direct file processing for Gemini models and "Analyzer Mode" for other models (like DeepSeek, Llama), where Gemini analyzes the file and injects the result as context. + +## Features + +- :material-file-document-multiple: **Multimodal Support**: Process PDF, Word, Excel, PowerPoint, EPUB, MP3, MP4, and Images. +- :material-router-network: **Smart Routing**: + - **Direct Mode**: Files are passed directly to Gemini models. + - **Analyzer Mode**: Files are analyzed by Gemini, and results are injected into the context for other models. +- :material-history: **Persistent Context**: Maintains session history across multiple turns using OpenWebUI Chat ID. +- :material-database-check: **Deduplication**: Automatically tracks analyzed file hashes to prevent redundant processing. +- :material-subtitles: **Subtitle Enhancement**: Specialized mode for generating high-quality SRT subtitles from video/audio. + +--- + +## Installation + +1. Download the plugin file: [`web_gemini_multimodel.py`](https://github.com/Fu-Jie/awesome-openwebui/tree/main/plugins/filters/web_gemini_multimodel_filter) +2. Upload to OpenWebUI: **Admin Panel** → **Settings** → **Functions** +3. Configure the Gemini Adapter URL and other settings. +4. Enable the filter globally or per chat. + +--- + +## Configuration + +| Option | Type | Default | Description | +|--------|------|---------|-------------| +| `gemini_adapter_url` | string | `http://...` | URL of the Gemini Adapter service | +| `target_model_keyword` | string | `"webgemini"` | Keyword to identify Gemini models | +| `mode` | string | `"auto"` | `auto`, `direct`, or `analyzer` | +| `analyzer_base_model_id` | string | `"gemini-3.0-pro"` | Model used for document analysis | +| `subtitle_keywords` | string | `"字幕,srt"` | Keywords to trigger subtitle flow | + +--- + +## Usage + +1. **Upload a file** (PDF, Image, Video, etc.) in the chat. +2. **Ask a question** about the file. +3. The plugin will automatically process the file and provide context to your selected model. diff --git a/docs/plugins/filters/web-gemini-multimodel.zh.md b/docs/plugins/filters/web-gemini-multimodel.zh.md new file mode 100644 index 0000000..49768af --- /dev/null +++ b/docs/plugins/filters/web-gemini-multimodel.zh.md @@ -0,0 +1,51 @@ +# Web Gemini 多模态过滤器 + +Filter +v0.3.2 + +一个强大的过滤器,为 OpenWebUI 中的任何模型提供多模态能力:PDF、Office、图片、音频、视频等。 + +--- + +## 概述 + +此插件利用 Gemini 作为分析器,为任何模型提供多模态处理能力。它支持 Gemini 模型的直接文件处理,以及其他模型(如 DeepSeek, Llama)的“分析器模式”,即由 Gemini 分析文件并将结果注入上下文。 + +## 功能特性 + +- :material-file-document-multiple: **多模态支持**: 处理 PDF, Word, Excel, PowerPoint, EPUB, MP3, MP4 和图片。 +- :material-router-network: **智能路由**: + - **直连模式 (Direct Mode)**: 对于 Gemini 模型,文件直接传递(原生多模态)。 + - **分析器模式 (Analyzer Mode)**: 对于非 Gemini 模型,文件由 Gemini 分析,结果注入为上下文。 +- :material-history: **持久上下文**: 利用 OpenWebUI 的 Chat ID 跨多轮对话维护会话历史。 +- :material-database-check: **数据库去重**: 自动记录已分析文件的哈希值,防止重复上传和分析。 +- :material-subtitles: **字幕增强**: 针对视频/音频上传的专用模式,生成高质量 SRT 字幕。 + +--- + +## 安装 + +1. 下载插件文件: [`web_gemini_multimodel.py`](https://github.com/Fu-Jie/awesome-openwebui/tree/main/plugins/filters/web_gemini_multimodel_filter) +2. 上传到 OpenWebUI: **管理员面板** → **设置** → **函数** +3. 配置 Gemini Adapter URL 和其他设置。 +4. 启用过滤器。 + +--- + +## 配置 + +| 选项 | 类型 | 默认值 | 描述 | +|------|------|--------|------| +| `gemini_adapter_url` | string | `http://...` | Gemini Adapter 服务的 URL | +| `target_model_keyword` | string | `"webgemini"` | 识别 Gemini 模型的关键字 | +| `mode` | string | `"auto"` | `auto` (自动), `direct` (直连), 或 `analyzer` (分析器) | +| `analyzer_base_model_id` | string | `"gemini-3.0-pro"` | 用于文档分析的模型 | +| `subtitle_keywords` | string | `"字幕,srt"` | 触发字幕流程的关键字 | + +--- + +## 使用方法 + +1. 在聊天中 **上传文件** (PDF, 图片, 视频等)。 +2. 关于文件 **提问**。 +3. 插件会自动处理文件并为所选模型提供上下文。 diff --git a/docs/plugins/pipes/gemini-manifold.md b/docs/plugins/pipes/gemini-manifold.md deleted file mode 100644 index 2d354a5..0000000 --- a/docs/plugins/pipes/gemini-manifold.md +++ /dev/null @@ -1,106 +0,0 @@ -# Gemini Manifold - -Pipe -v1.0.0 - -Integration pipeline for Google's Gemini models with full streaming support. - ---- - -## Overview - -The Gemini Manifold pipe provides seamless integration with Google's Gemini AI models. It exposes Gemini models as selectable options in OpenWebUI, allowing you to use them just like any other model. - -## Features - -- :material-google: **Full Gemini Support**: Access all Gemini model variants -- :material-stream: **Streaming**: Real-time response streaming -- :material-image: **Multimodal**: Support for images and text -- :material-shield: **Error Handling**: Robust error management -- :material-tune: **Configurable**: Customize model parameters - ---- - -## Installation - -1. Download the plugin file: [`gemini_manifold.py`](https://github.com/Fu-Jie/awesome-openwebui/tree/main/plugins/pipes/gemini_mainfold) -2. Upload to OpenWebUI: **Admin Panel** → **Settings** → **Functions** -3. Configure your Gemini API key -4. Select Gemini models from the model dropdown - ---- - -## Configuration - -| Option | Type | Required | Description | -|--------|------|----------|-------------| -| `GEMINI_API_KEY` | string | Yes | Your Google AI Studio API key | -| `DEFAULT_MODEL` | string | No | Default Gemini model to use | -| `TEMPERATURE` | float | No | Response temperature (0-1) | -| `MAX_TOKENS` | integer | No | Maximum response tokens | - ---- - -## Available Models - -When configured, the following models become available: - -- `gemini-pro` - Text-only model -- `gemini-pro-vision` - Multimodal model -- `gemini-1.5-pro` - Latest Pro model -- `gemini-1.5-flash` - Fast response model - ---- - -## Usage - -1. After installation, go to any chat -2. Open the model selector dropdown -3. Look for models prefixed with your pipe name -4. Select a Gemini model -5. Start chatting! - ---- - -## Getting an API Key - -1. Visit [Google AI Studio](https://makersuite.google.com/app/apikey) -2. Create a new API key -3. Copy the key and paste it in the plugin configuration - -!!! warning "API Key Security" - Keep your API key secure. Never share it publicly or commit it to version control. - ---- - -## Companion Filter - -For enhanced functionality, consider installing the [Gemini Manifold Companion](../filters/gemini-manifold-companion.md) filter. - ---- - -## Requirements - -!!! note "Prerequisites" - - OpenWebUI v0.3.0 or later - - Valid Gemini API key - - Internet access to Google AI APIs - ---- - -## Troubleshooting - -??? question "Models not appearing?" - Ensure your API key is correctly configured and the plugin is enabled. - -??? question "API errors?" - Check your API key validity and quota limits in Google AI Studio. - -??? question "Slow responses?" - Consider using `gemini-1.5-flash` for faster response times. - ---- - -## Source Code - -[:fontawesome-brands-github: View on GitHub](https://github.com/Fu-Jie/awesome-openwebui/tree/main/plugins/pipes/gemini_mainfold){ .md-button } diff --git a/docs/plugins/pipes/gemini-manifold.zh.md b/docs/plugins/pipes/gemini-manifold.zh.md deleted file mode 100644 index d9e0c3c..0000000 --- a/docs/plugins/pipes/gemini-manifold.zh.md +++ /dev/null @@ -1,106 +0,0 @@ -# Gemini Manifold - -Pipe -v1.0.0 - -面向 Google Gemini 模型的集成流水线,支持完整流式返回。 - ---- - -## 概览 - -Gemini Manifold Pipe 提供与 Google Gemini AI 模型的无缝集成。它会将 Gemini 模型作为可选项暴露在 OpenWebUI 中,你可以像使用其他模型一样使用它们。 - -## 功能特性 - -- :material-google: **完整 Gemini 支持**:可使用所有 Gemini 模型变体 -- :material-stream: **流式输出**:实时流式响应 -- :material-image: **多模态**:支持图像与文本 -- :material-shield: **错误处理**:健壮的错误管理 -- :material-tune: **可配置**:可自定义模型参数 - ---- - -## 安装 - -1. 下载插件文件:[`gemini_manifold.py`](https://github.com/Fu-Jie/awesome-openwebui/tree/main/plugins/pipes/gemini_mainfold) -2. 上传到 OpenWebUI:**Admin Panel** → **Settings** → **Functions** -3. 配置你的 Gemini API Key -4. 在模型下拉中选择 Gemini 模型 - ---- - -## 配置 - -| 选项 | 类型 | 是否必填 | 说明 | -|--------|------|----------|-------------| -| `GEMINI_API_KEY` | string | 是 | 你的 Google AI Studio API Key | -| `DEFAULT_MODEL` | string | 否 | 默认使用的 Gemini 模型 | -| `TEMPERATURE` | float | 否 | 输出温度(0-1) | -| `MAX_TOKENS` | integer | 否 | 最大回复 token 数 | - ---- - -## 可用模型 - -配置完成后,你可以选择以下模型: - -- `gemini-pro` —— 纯文本模型 -- `gemini-pro-vision` —— 多模态模型 -- `gemini-1.5-pro` —— 最新 Pro 模型 -- `gemini-1.5-flash` —— 快速响应模型 - ---- - -## 使用方法 - -1. 安装后进入任意对话 -2. 打开模型选择下拉 -3. 查找以 Pipe 名称前缀的模型 -4. 选择 Gemini 模型 -5. 开始聊天! - ---- - -## 获取 API Key - -1. 访问 [Google AI Studio](https://makersuite.google.com/app/apikey) -2. 创建新的 API Key -3. 复制并粘贴到插件配置中 - -!!! warning "API Key 安全" - 请妥善保管你的 API Key,不要公开或提交到版本库。 - ---- - -## 伴随过滤器 - -如需增强功能,可安装 [Gemini Manifold Companion](../filters/gemini-manifold-companion.md) 过滤器。 - ---- - -## 运行要求 - -!!! note "前置条件" - - OpenWebUI v0.3.0 及以上 - - 有效的 Gemini API Key - - 可访问 Google AI API 的网络 - ---- - -## 常见问题 - -??? question "模型没有出现?" - 请确认 API Key 配置正确且插件已启用。 - -??? question "出现 API 错误?" - 检查 Google AI Studio 中的 Key 有效性和额度限制。 - -??? question "响应较慢?" - 可尝试使用 `gemini-1.5-flash` 获得更快速度。 - ---- - -## 源码 - -[:fontawesome-brands-github: 在 GitHub 查看](https://github.com/Fu-Jie/awesome-openwebui/tree/main/plugins/pipes/gemini_mainfold){ .md-button } diff --git a/docs/plugins/pipes/index.md b/docs/plugins/pipes/index.md index 4596567..5b6346f 100644 --- a/docs/plugins/pipes/index.md +++ b/docs/plugins/pipes/index.md @@ -15,19 +15,7 @@ Pipes allow you to: ## Available Pipe Plugins -
-- :material-google:{ .lg .middle } **Gemini Manifold** - - --- - - Integration pipeline for Google's Gemini models with full streaming support. - - **Version:** 1.0.0 - - [:octicons-arrow-right-24: Documentation](gemini-manifold.md) - -
--- diff --git a/plugins/filters/gemini_manifold_companion/gemini_manifold_companion.py b/plugins/filters/gemini_manifold_companion/gemini_manifold_companion.py deleted file mode 100644 index 1a3ec58..0000000 --- a/plugins/filters/gemini_manifold_companion/gemini_manifold_companion.py +++ /dev/null @@ -1,1102 +0,0 @@ -""" -title: Gemini Manifold Companion -id: gemini_manifold_companion -description: A companion filter for "Gemini Manifold google_genai" pipe providing enhanced functionality. -author: suurt8ll -author_url: https://github.com/suurt8ll -funding_url: https://github.com/suurt8ll/open_webui_functions -license: MIT -version: 1.7.0 -""" - -VERSION = "1.7.0" - -# This filter can detect that a feature like web search or code execution is enabled in the front-end, -# set the feature back to False so Open WebUI does not run it's own logic and then -# pass custom values to "Gemini Manifold google_genai" that signal which feature was enabled and intercepted. - -import copy -import json -from google.genai import types - -import sys -import time -import asyncio -import aiohttp -from fastapi import Request -from fastapi.datastructures import State -from loguru import logger -from pydantic import BaseModel, Field -import pydantic_core -from collections.abc import Awaitable, Callable -from typing import Any, Literal, TYPE_CHECKING, cast - -from open_webui.models.functions import Functions - -if TYPE_CHECKING: - from loguru import Record - from loguru._handler import Handler # type: ignore - from open_webui.utils.manifold_types import * # My personal types in a separate file for more robustness. - -# According to https://ai.google.dev/gemini-api/docs/models -ALLOWED_GROUNDING_MODELS = { - "gemini-2.5-pro", - "gemini-flash-latest", - "gemini-2.5-flash-preview-09-2025", - "gemini-2.5-flash", - "gemini-flash-lite-latest", - "gemini-2.5-flash-lite-preview-09-2025", - "gemini-2.5-flash-lite", - "gemini-2.5-flash-lite-preview-06-17", - "gemini-2.5-pro-preview-06-05", - "gemini-2.5-flash-preview-05-20", - "gemini-2.5-pro-preview-05-06", - "gemini-2.5-flash-preview-04-17", - "gemini-2.5-pro-preview-03-25", - "gemini-2.5-pro-exp-03-25", - "gemini-2.0-pro-exp", - "gemini-2.0-pro-exp-02-05", - "gemini-exp-1206", - "gemini-2.0-flash", - "gemini-2.0-flash-exp", - "gemini-2.0-flash-001", - "gemini-1.5-pro", - "gemini-1.5-flash", - "gemini-1.0-pro", -} -ALLOWED_CODE_EXECUTION_MODELS = { - "gemini-2.5-pro", - "gemini-flash-latest", - "gemini-2.5-flash-preview-09-2025", - "gemini-2.5-flash", - "gemini-flash-lite-latest", - "gemini-2.5-flash-lite-preview-09-2025", - "gemini-2.5-flash-lite", - "gemini-2.5-flash-lite-preview-06-17", - "gemini-2.5-pro-preview-06-05", - "gemini-2.5-flash-preview-05-20", - "gemini-2.5-pro-preview-05-06", - "gemini-2.5-flash-preview-04-17", - "gemini-2.5-pro-preview-03-25", - "gemini-2.5-pro-exp-03-25", - "gemini-2.0-pro-exp", - "gemini-2.0-pro-exp-02-05", - "gemini-exp-1206", - "gemini-2.0-flash-thinking-exp-01-21", - "gemini-2.0-flash", - "gemini-2.0-flash-exp", - "gemini-2.0-flash-001", -} - -# Default timeout for URL resolution -# TODO: Move to Pipe.Valves. -DEFAULT_URL_TIMEOUT = aiohttp.ClientTimeout(total=10) # 10 seconds total timeout - -# Setting auditable=False avoids duplicate output for log levels that would be printed out by the main log. -log = logger.bind(auditable=False) - - -class Filter: - - class Valves(BaseModel): - FORCE_NON_STREAM_FOR_IMAGE_MODELS: bool = Field( - default=True, - description="""Automatically disable streaming for image generation models - (e.g., gemini-2.5-flash-image-preview) to prevent 'Chunk too big' errors. - Set to False to attempt streaming with these models.""", - ) - SET_TEMP_TO_ZERO: bool = Field( - default=False, - description="""Decide if you want to set the temperature to 0 for grounded answers, - Google reccomends it in their docs.""", - ) - GROUNDING_DYNAMIC_RETRIEVAL_THRESHOLD: float | None = Field( - default=None, - description="""See https://ai.google.dev/gemini-api/docs/grounding?lang=python#dynamic-threshold for more information. - Only supported for 1.0 and 1.5 models""", - ) - USE_PERMISSIVE_SAFETY: bool = Field( - default=False, - description="""Whether to request relaxed safety filtering. - Default value is False.""", - ) - BYPASS_BACKEND_RAG: bool = Field( - default=True, - description="""Decide if you want ot bypass Open WebUI's RAG and send your documents directly to Google API. - Default value is True.""", - ) - LOG_LEVEL: Literal[ - "TRACE", "DEBUG", "INFO", "SUCCESS", "WARNING", "ERROR", "CRITICAL" - ] = Field( - default="INFO", - description="Select logging level. Use `docker logs -f open-webui` to view logs.", - ) - - # TODO: Support user settting through UserValves. - - def __init__(self): - # This hack makes the valves values available to the `__init__` method. - # TODO: Get the id from the frontmatter instead of hardcoding it. - valves = Functions.get_function_valves_by_id("gemini_manifold_companion") - self.valves = self.Valves(**(valves if valves else {})) - self.log_level = self.valves.LOG_LEVEL - self._add_log_handler() - log.success("Function has been initialized.") - log.trace("Full self object:", payload=self.__dict__) - - def inlet(self, body: "Body", __metadata__: dict[str, Any]) -> "Body": - """Modifies the incoming request payload before it's sent to the LLM. Operates on the `form_data` dictionary.""" - - # Detect log level change inside self.valves - if self.log_level != self.valves.LOG_LEVEL: - log.info( - f"Detected log level change: {self.log_level=} and {self.valves.LOG_LEVEL=}. " - "Running the logging setup again." - ) - self._add_log_handler() - - log.debug( - f"inlet method has been called. Gemini Manifold Companion version is {VERSION}" - ) - - canonical_model_name, is_manifold = self._get_model_name(body) - # Exit early if we are filtering an unsupported model. - if not is_manifold: - log.debug( - "Returning the original body object because conditions for proceeding are not fulfilled." - ) - return body - - # Check if it's a relevant model (supports either feature) - is_grounding_model = canonical_model_name in ALLOWED_GROUNDING_MODELS - is_code_exec_model = canonical_model_name in ALLOWED_CODE_EXECUTION_MODELS - log.debug(f"{is_grounding_model=}, {is_code_exec_model=}") - - features = body.get("features", {}) - log.debug(f"body.features:", payload=features) - - # Ensure features field exists - metadata = body.get("metadata") - metadata_features = metadata.get("features") - if metadata_features is None: - metadata_features = cast(Features, {}) - metadata["features"] = metadata_features - - # Add the companion version to the payload for the pipe to consume. - metadata_features["gemini_manifold_companion_version"] = VERSION - - if is_grounding_model: - web_search_enabled = ( - features.get("web_search", False) - if isinstance(features, dict) - else False - ) - if web_search_enabled: - log.info( - "Search feature is enabled, disabling it and adding custom feature called grounding_w_google_search." - ) - # Disable web_search - features["web_search"] = False - # Use "Google Search Retrieval" for 1.0 and 1.5 models and "Google Search as a Tool for >=2.0 models". - if "1.0" in canonical_model_name or "1.5" in canonical_model_name: - metadata_features["google_search_retrieval"] = True - metadata_features["google_search_retrieval_threshold"] = ( - self.valves.GROUNDING_DYNAMIC_RETRIEVAL_THRESHOLD - ) - else: - metadata_features["google_search_tool"] = True - # Google suggest setting temperature to 0 if using grounding: - # https://cloud.google.com/vertex-ai/generative-ai/docs/multimodal/ground-with-google-search#:~:text=For%20ideal%20results%2C%20use%20a%20temperature%20of%200.0. - if self.valves.SET_TEMP_TO_ZERO: - log.info("Setting temperature to 0.") - body["temperature"] = 0 # type: ignore - if is_code_exec_model: - code_execution_enabled = ( - features.get("code_interpreter", False) - if isinstance(features, dict) - else False - ) - if code_execution_enabled: - log.info( - "Code interpreter feature is enabled, disabling it and adding custom feature called google_code_execution." - ) - # Disable code_interpreter - features["code_interpreter"] = False - metadata_features["google_code_execution"] = True - if self.valves.USE_PERMISSIVE_SAFETY: - log.info("Adding permissive safety settings to body.metadata") - metadata["safety_settings"] = self._get_permissive_safety_settings( - canonical_model_name - ) - if self.valves.BYPASS_BACKEND_RAG: - if __metadata__["chat_id"] == "local": - # TODO toast notification - log.warning( - "Bypassing Open WebUI's RAG is not possible for temporary chats. " - "The Manifold pipe requires a database entry to access uploaded files, " - "which temporary chats do not have. Falling back to Open WebUI's RAG." - ) - metadata_features["upload_documents"] = False - else: - log.info( - "BYPASS_BACKEND_RAG is enabled, bypassing Open WebUI RAG to let the Manifold pipe handle documents." - ) - if files := body.get("files"): - log.info( - f"Removing {len(files)} files from the Open WebUI RAG pipeline." - ) - body["files"] = [] - metadata_features["upload_documents"] = True - else: - log.info( - "BYPASS_BACKEND_RAG is disabled. Open WebUI's RAG will be used if applicable." - ) - metadata_features["upload_documents"] = False - - # The manifold pipe requires the backend to be in streaming mode to correctly - # process the AsyncGenerator it returns. We save the user's original - # streaming intent and then force the backend into streaming mode. - - user_stream_intent = body.get("stream", True) - image_generation_models = { - "gemini-2.0-flash-preview-image-generation", - "gemini-2.5-flash-image-preview", - "gemini-2.5-flash-image", - } - - # Check if the current model is an image generation model and if the - # user has enabled the non-streaming override for them. - if ( - self.valves.FORCE_NON_STREAM_FOR_IMAGE_MODELS - and canonical_model_name in image_generation_models - ): - log.info( - f"Image generation model '{canonical_model_name}' detected. " - "Forcing non-streaming mode to prevent potential 'Chunk too big' errors." - ) - # Override the user's intent to ensure stability. - user_stream_intent = False - - log.info( - f"Storing user's stream intent ({user_stream_intent}) into __metadata__. " - "Backend will be forced down the streaming path." - ) - metadata_features["stream"] = user_stream_intent - body["stream"] = True - - # TODO: Filter out the citation markers here. - - log.debug("inlet method has finished.") - return body - - def stream(self, event: dict) -> dict: - """Modifies the streaming response from the LLM in real-time. Operates on individual chunks of data.""" - return event - - async def outlet( - self, - body: "Body", - __request__: Request, - __metadata__: dict[str, Any], - __event_emitter__: Callable[["Event"], Awaitable[None]], - ) -> "Body": - """Modifies the complete response payload after it's received from the LLM. Operates on the final `body` dictionary.""" - - log.debug("outlet method has been called.") - - chat_id: str = __metadata__.get("chat_id", "") - message_id: str = __metadata__.get("message_id", "") - grounding_key = f"grounding_{chat_id}_{message_id}" - time_key = f"pipe_start_time_{chat_id}_{message_id}" - - app_state: State = __request__.app.state - log.debug(f"Checking for attributes for message {message_id} in request state.") - stored_metadata: types.GroundingMetadata | None = getattr( - app_state, grounding_key, None - ) - pipe_start_time: float | None = getattr(app_state, time_key, None) - - if stored_metadata: - log.info("Found grounding metadata, processing citations.") - log.trace("Stored grounding metadata:", payload=stored_metadata) - - current_content = body["messages"][-1]["content"] - if isinstance(current_content, list): - text_to_use = "" - for item in current_content: - if item.get("type") == "text": - item = cast("TextContent", item) - text_to_use = item["text"] - break - else: - text_to_use = current_content - - # Insert citation markers into the response text - cited_text = self._get_text_w_citation_markers( - stored_metadata, - text_to_use, - ) - - if cited_text: - content = body["messages"][-1]["content"] - if isinstance(content, list): - for item in content: - if item.get("type") == "text": - item = cast("TextContent", item) - item["text"] = cited_text - break - else: - body["messages"][-1]["content"] = cited_text - - # Emit sources to the front-end. - gs_supports = stored_metadata.grounding_supports - gs_chunks = stored_metadata.grounding_chunks - if gs_supports and gs_chunks: - await self._resolve_and_emit_sources( - grounding_chunks=gs_chunks, - supports=gs_supports, - event_emitter=__event_emitter__, - pipe_start_time=pipe_start_time, - ) - else: - log.info( - "Grounding metadata missing supports or chunks (checked in outlet); " - "skipping source resolution and emission." - ) - - # Emit status event with search queries - await self._emit_status_event_w_queries(stored_metadata, __event_emitter__) - - # Clean up state - delattr(app_state, grounding_key) - if hasattr(app_state, time_key): - delattr(app_state, time_key) - else: - log.info("No grounding metadata found in request state.") - - log.debug("outlet method has finished.") - return body - - # region 1. Helper methods inside the Filter class - - # region 1.1 Add citations - - def _get_text_w_citation_markers( - self, - grounding_metadata: types.GroundingMetadata, - raw_str: str, - ) -> str | None: - """ - Returns the model response with citation markers. - Thoughts, if present as THOUGHT_START_TAG...THOUGHT_END_TAG at the beginning of raw_str, - are preserved but excluded from the citation indexing process. - Everything up to the *last* THOUGHT_END_TAG tag is considered part of the thought. - """ - - supports = grounding_metadata.grounding_supports - grounding_chunks = grounding_metadata.grounding_chunks - if not supports or not grounding_chunks: - log.info( - "Grounding metadata missing supports or chunks, can't insert citation markers. " - "Response was probably just not grounded." - ) - return None - - log.trace("raw_str:", payload=raw_str, _log_truncation_enabled=False) - - thought_prefix = "" - content_for_citation_processing = raw_str - - THOUGHT_START_TAG = "= len(THOUGHT_START_TAG) - 1 - ): - thought_block_end_offset = last_end_thought_tag_idx + len( - THOUGHT_END_TAG - ) - thought_prefix = raw_str[:thought_block_end_offset] - content_for_citation_processing = raw_str[thought_block_end_offset:] - log.info( - "Model thoughts detected at the beginning of the response. " - "Citations will be processed on the content following the last thought block." - ) - else: - log.warning( - "Detected THOUGHT_START_TAG at the start of raw_str without a subsequent closing THOUGHT_END_TAG " - "or a malformed thought block. The entire raw_str will be processed for citations. " - "This might lead to incorrect marker placement if thoughts were intended and indices " - "are relative to content after thoughts." - ) - - processed_content_part_with_markers = content_for_citation_processing - - if content_for_citation_processing: - try: - modified_content_bytes = bytearray( - content_for_citation_processing.encode("utf-8") - ) - for support in reversed(supports): - segment = support.segment - indices = support.grounding_chunk_indices - if not ( - indices is not None - and segment - and segment.end_index is not None - ): - log.debug(f"Skipping support due to missing data: {support}") - continue - end_pos = segment.end_index - if not (0 <= end_pos <= len(modified_content_bytes)): - log.warning( - f"Support segment end_index ({end_pos}) is out of bounds for the processable content " - f"(length {len(modified_content_bytes)} bytes after potential thought stripping). " - f"Content (first 50 chars): '{content_for_citation_processing[:50]}...'. Skipping this support. Support: {support}" - ) - continue - citation_markers = "".join(f"[{index + 1}]" for index in indices) - encoded_citation_markers = citation_markers.encode("utf-8") - modified_content_bytes[end_pos:end_pos] = encoded_citation_markers - processed_content_part_with_markers = modified_content_bytes.decode( - "utf-8" - ) - except Exception as e: - log.error( - f"Error injecting citation markers into content: {e}. " - f"Using content part (after potential thought stripping) without new markers." - ) - else: - if raw_str and not content_for_citation_processing: - log.info( - "Content for citation processing is empty (e.g., raw_str contained only thoughts). " - "No citation markers will be injected." - ) - elif not raw_str: - log.warning("Raw string is empty, cannot inject citation markers.") - - final_result_str = thought_prefix + processed_content_part_with_markers - return final_result_str - - async def _resolve_url( - self, - session: aiohttp.ClientSession, - url: str, - timeout: aiohttp.ClientTimeout = DEFAULT_URL_TIMEOUT, - max_retries: int = 3, - base_delay: float = 0.5, - ) -> tuple[str, bool]: - """ - Resolves a given URL using the provided aiohttp session, with multiple retries on failure. - Returns the final URL and a boolean indicating success. - """ - if not url: - return "", False - for attempt in range(max_retries + 1): - try: - async with session.get( - url, - allow_redirects=True, - timeout=timeout, - ) as response: - final_url = str(response.url) - log.debug( - f"Resolved URL '{url}' to '{final_url}' after {attempt} retries" - ) - return final_url, True - except (asyncio.TimeoutError, aiohttp.ClientError) as e: - if attempt == max_retries: - log.error( - f"Failed to resolve URL '{url}' after {max_retries + 1} attempts: {e}" - ) - return url, False - else: - delay = min(base_delay * (2**attempt), 10.0) - log.warning( - f"Retry {attempt + 1}/{max_retries + 1} for URL '{url}': {e}. Waiting {delay:.1f}s..." - ) - await asyncio.sleep(delay) - except Exception as e: - log.error(f"Unexpected error resolving URL '{url}': {e}") - return url, False - return url, False - - async def _resolve_and_emit_sources( - self, - grounding_chunks: list[types.GroundingChunk], - supports: list[types.GroundingSupport], - event_emitter: Callable[["Event"], Awaitable[None]], - pipe_start_time: float | None, - ): - """ - Resolves URLs in the background and emits a chat completion event - containing only the source information, along with status updates. - """ - initial_metadatas: list[tuple[int, str]] = [] - for i, g_c in enumerate(grounding_chunks): - uri = None - if (web_info := g_c.web) and web_info.uri: - uri = web_info.uri - elif (maps_info := g_c.maps) and maps_info.uri: - uri = maps_info.uri - - if uri: - initial_metadatas.append((i, uri)) - - if not initial_metadatas: - log.info("No source URIs found, skipping source emission.") - return - - urls_to_resolve = [ - uri - for _, uri in initial_metadatas - if uri.startswith( - "https://vertexaisearch.cloud.google.com/grounding-api-redirect/" - ) - ] - resolved_uris_map = {} - - if urls_to_resolve: - num_urls = len(urls_to_resolve) - self._emit_status_update( - event_emitter, - f"Resolving {num_urls} source URLs...", - pipe_start_time, - ) - - try: - log.info(f"Resolving {num_urls} source URLs...") - async with aiohttp.ClientSession() as session: - tasks = [self._resolve_url(session, url) for url in urls_to_resolve] - results = await asyncio.gather(*tasks) - log.info("URL resolution completed.") - - resolved_uris = [res[0] for res in results] - resolved_uris_map = dict(zip(urls_to_resolve, resolved_uris)) - - success_count = sum(1 for _, success in results if success) - final_status_msg = ( - "URL resolution complete" - if success_count == num_urls - else f"Resolved {success_count}/{num_urls} URLs" - ) - self._emit_status_update( - event_emitter, final_status_msg, pipe_start_time, done=True - ) - - except Exception as e: - log.error(f"Error during URL resolution: {e}") - resolved_uris_map = {url: url for url in urls_to_resolve} - self._emit_status_update( - event_emitter, "URL resolution failed", pipe_start_time, done=True - ) - - source_metadatas_template: list["SourceMetadata"] = [ - {"source": None, "original_url": None, "supports": []} - for _ in grounding_chunks - ] - populated_metadatas = [m.copy() for m in source_metadatas_template] - - for chunk_index, original_uri in initial_metadatas: - final_uri = resolved_uris_map.get(original_uri, original_uri) - if 0 <= chunk_index < len(populated_metadatas): - populated_metadatas[chunk_index]["original_url"] = original_uri - populated_metadatas[chunk_index]["source"] = final_uri - else: - log.warning( - f"Chunk index {chunk_index} out of bounds when populating resolved URLs." - ) - - # Create a mapping from each chunk index to the text segments it supports. - chunk_index_to_segments: dict[int, list[types.Segment]] = {} - for support in supports: - segment = support.segment - indices = support.grounding_chunk_indices - if not (segment and segment.text and indices is not None): - continue - - for index in indices: - if index not in chunk_index_to_segments: - chunk_index_to_segments[index] = [] - chunk_index_to_segments[index].append(segment) - populated_metadatas[index]["supports"].append(support.model_dump()) # type: ignore - - valid_source_metadatas: list["SourceMetadata"] = [] - doc_list: list[str] = [] - - for i, meta in enumerate(populated_metadatas): - if meta.get("original_url") is not None: - valid_source_metadatas.append(meta) - - content_parts: list[str] = [] - chunk = grounding_chunks[i] - - if maps_info := chunk.maps: - title = maps_info.title or "N/A" - place_id = maps_info.place_id or "N/A" - content_parts.append(f"Title: {title}\nPlace ID: {place_id}") - - supported_segments = chunk_index_to_segments.get(i) - if supported_segments: - if content_parts: - content_parts.append("") # Add a blank line for separation - - # Use a set to show each unique snippet only once per source. - unique_snippets = { - (seg.text, seg.start_index, seg.end_index) - for seg in supported_segments - if seg.text is not None - } - - # Sort snippets by their appearance in the text. - sorted_snippets = sorted(unique_snippets, key=lambda s: s[1] or 0) - - snippet_strs = [ - f'- "{text}" (Indices: {start}-{end})' - for text, start, end in sorted_snippets - ] - content_parts.append("Supported text snippets:") - content_parts.extend(snippet_strs) - - doc_list.append("\n".join(content_parts)) - - sources_list: list["Source"] = [] - if valid_source_metadatas: - sources_list.append( - { - "source": {"name": "web_search"}, - "document": doc_list, - "metadata": valid_source_metadatas, - } - ) - - event: "ChatCompletionEvent" = { - "type": "chat:completion", - "data": {"sources": sources_list}, - } - await event_emitter(event) - log.info("Emitted sources event.") - log.trace("ChatCompletionEvent:", payload=event) - - async def _emit_status_event_w_queries( - self, - grounding_metadata: types.GroundingMetadata, - event_emitter: Callable[["Event"], Awaitable[None]], - ) -> None: - """ - Creates a StatusEvent with search URLs based on the web_search_queries - in the GroundingMetadata. This covers both Google Search and Google Maps grounding. - """ - if not grounding_metadata.web_search_queries: - log.debug("Grounding metadata does not contain any search queries.") - return - - search_queries = grounding_metadata.web_search_queries - if not search_queries: - log.debug("web_search_queries list is empty.") - return - - # The queries are used for grounding, so we link them to a general Google search page. - google_search_urls = [ - f"https://www.google.com/search?q={query}" for query in search_queries - ] - - status_event_data: StatusEventData = { - "action": "web_search", - "description": "This response was grounded with a Google tool", - "urls": google_search_urls, - } - status_event: StatusEvent = { - "type": "status", - "data": status_event_data, - } - await event_emitter(status_event) - log.info("Emitted grounding queries.") - log.trace("StatusEvent:", payload=status_event) - - # endregion 1.1 Add citations - - # region 1.2 Remove citation markers - # TODO: Remove citation markers from model input. - # endregion 1.2 Remove citation markers - - # region 1.3 Get permissive safety settings - - def _get_permissive_safety_settings( - self, model_name: str - ) -> list[types.SafetySetting]: - """Get safety settings based on model name and permissive setting.""" - - # Settings supported by most models - category_threshold_map = { - types.HarmCategory.HARM_CATEGORY_HARASSMENT: types.HarmBlockThreshold.OFF, - types.HarmCategory.HARM_CATEGORY_HATE_SPEECH: types.HarmBlockThreshold.OFF, - types.HarmCategory.HARM_CATEGORY_SEXUALLY_EXPLICIT: types.HarmBlockThreshold.OFF, - types.HarmCategory.HARM_CATEGORY_DANGEROUS_CONTENT: types.HarmBlockThreshold.OFF, - types.HarmCategory.HARM_CATEGORY_CIVIC_INTEGRITY: types.HarmBlockThreshold.BLOCK_NONE, - } - - # Older models use BLOCK_NONE - if model_name in [ - "gemini-1.5-pro-001", - "gemini-1.5-flash-001", - "gemini-1.5-flash-8b-exp-0827", - "gemini-1.5-flash-8b-exp-0924", - "gemini-pro", - "gemini-1.0-pro", - "gemini-1.0-pro-001", - ]: - for category in category_threshold_map: - category_threshold_map[category] = types.HarmBlockThreshold.BLOCK_NONE - - # Gemini 2.0 Flash supports CIVIC_INTEGRITY OFF - if model_name in [ - "gemini-2.0-flash", - "gemini-2.0-flash-001", - "gemini-2.0-flash-exp", - ]: - category_threshold_map[types.HarmCategory.HARM_CATEGORY_CIVIC_INTEGRITY] = ( - types.HarmBlockThreshold.OFF - ) - - log.debug( - f"Safety settings: {str({k.value: v.value for k, v in category_threshold_map.items()})}" - ) - - safety_settings = [ - types.SafetySetting(category=category, threshold=threshold) - for category, threshold in category_threshold_map.items() - ] - return safety_settings - - # endregion 1.3 Get permissive safety settings - - # region 1.4 Utility helpers - - def _emit_status_update( - self, - event_emitter: Callable[["Event"], Awaitable[None]], - description: str, - pipe_start_time: float | None, - *, - done: bool = False, - ): - """Constructs and emits a status event in a non-blocking task.""" - - async def emit_task(): - time_str = ( - f" (+{(time.monotonic() - pipe_start_time):.2f}s)" - if pipe_start_time is not None - else "" - ) - full_description = f"{description}{time_str}" - - status_event: "StatusEvent" = { - "type": "status", - "data": {"description": full_description, "done": done}, - } - - try: - await event_emitter(status_event) - log.debug(f"Emitted status:", payload=status_event) - except Exception: - log.exception("Error emitting status.") - - # Fire-and-forget the emission task. - asyncio.create_task(emit_task()) - - def _get_first_candidate( - self, candidates: list[types.Candidate] | None - ) -> types.Candidate | None: - """Selects the first candidate, logging a warning if multiple exist.""" - if not candidates: - log.warning("Received chunk with no candidates, skipping processing.") - return None - if len(candidates) > 1: - log.warning("Multiple candidates found, defaulting to first candidate.") - return candidates[0] - - def _get_model_name(self, body: "Body") -> tuple[str, bool]: - """ - Extracts the effective and canonical model name from the request body. - - Handles standard model names and custom workspace models by prioritizing - the base_model_id found in metadata. - - Args: - body: The request body dictionary. - - Returns: - A tuple containing: - - The canonical model name (prefix removed). - - A boolean indicating if the effective model name contained the - 'gemini_manifold_google_genai.' prefix. - """ - # 1. Get the initially requested model name from the top level - effective_model_name: str = body.get("model", "") - initial_model_name = effective_model_name - base_model_name = None - - # 2. Check for a base model ID in the metadata for custom models - # If metadata exists, attempt to extract the base_model_id - if metadata := body.get("metadata"): - # Safely navigate the nested structure: metadata -> model -> info -> base_model_id - base_model_name = ( - metadata.get("model", {}).get("info", {}).get("base_model_id", None) - ) - # If a base model ID is found, it overrides the initially requested name - if base_model_name: - effective_model_name = base_model_name - - # 3. Determine if the effective model name contains the manifold prefix. - # This flag indicates if the model (after considering base_model_id) - # appears to be one defined or routed via the manifold pipe function. - is_manifold_model = "gemini_manifold_google_genai." in effective_model_name - - # 4. Create the canonical model name by removing the manifold prefix - # from the effective model name. - canonical_model_name = effective_model_name.replace( - "gemini_manifold_google_genai.", "" - ) - - # 5. Log the relevant names for debugging purposes - log.debug( - f"Model Name Extraction: initial='{initial_model_name}', " - f"base='{base_model_name}', effective='{effective_model_name}', " - f"canonical='{canonical_model_name}', is_manifold={is_manifold_model}" - ) - - # 6. Return the canonical name and the manifold flag - return canonical_model_name, is_manifold_model - - def _is_flat_dict(self, data: Any) -> bool: - """ - Checks if a dictionary contains only non-dict/non-list values (is one level deep). - """ - if not isinstance(data, dict): - return False - return not any(isinstance(value, (dict, list)) for value in data.values()) - - def _truncate_long_strings( - self, data: Any, max_len: int, truncation_marker: str, truncation_enabled: bool - ) -> Any: - """ - Recursively traverses a data structure (dicts, lists) and truncates - long string values. Creates copies to avoid modifying original data. - - Args: - data: The data structure (dict, list, str, int, float, bool, None) to process. - max_len: The maximum allowed length for string values. - truncation_marker: The string to append to truncated values. - truncation_enabled: Whether truncation is enabled. - - Returns: - A potentially new data structure with long strings truncated. - """ - if not truncation_enabled or max_len <= len(truncation_marker): - # If truncation is disabled or max_len is too small, return original - # Make a copy only if it's a mutable type we might otherwise modify - if isinstance(data, (dict, list)): - return copy.deepcopy(data) # Ensure deep copy for nested structures - return data # Primitives are immutable - - if isinstance(data, str): - if len(data) > max_len: - return data[: max_len - len(truncation_marker)] + truncation_marker - return data # Return original string if not truncated - elif isinstance(data, dict): - # Process dictionary items, creating a new dict - return { - k: self._truncate_long_strings( - v, max_len, truncation_marker, truncation_enabled - ) - for k, v in data.items() - } - elif isinstance(data, list): - # Process list items, creating a new list - return [ - self._truncate_long_strings( - item, max_len, truncation_marker, truncation_enabled - ) - for item in data - ] - else: - # Return non-string, non-container types as is (they are immutable) - return data - - def plugin_stdout_format(self, record: "Record") -> str: - """ - Custom format function for the plugin's logs. - Serializes and truncates data passed under the 'payload' key in extra. - """ - - # Configuration Keys - LOG_OPTIONS_PREFIX = "_log_" - TRUNCATION_ENABLED_KEY = f"{LOG_OPTIONS_PREFIX}truncation_enabled" - MAX_LENGTH_KEY = f"{LOG_OPTIONS_PREFIX}max_length" - TRUNCATION_MARKER_KEY = f"{LOG_OPTIONS_PREFIX}truncation_marker" - DATA_KEY = "payload" - - original_extra = record["extra"] - # Extract the data intended for serialization using the chosen key - data_to_process = original_extra.get(DATA_KEY) - - serialized_data_json = "" - if data_to_process is not None: - try: - serializable_data = pydantic_core.to_jsonable_python( - data_to_process, serialize_unknown=True - ) - - # Determine truncation settings - truncation_enabled = original_extra.get(TRUNCATION_ENABLED_KEY, True) - max_length = original_extra.get(MAX_LENGTH_KEY, 256) - truncation_marker = original_extra.get(TRUNCATION_MARKER_KEY, "[...]") - - # If max_length was explicitly provided, force truncation enabled - if MAX_LENGTH_KEY in original_extra: - truncation_enabled = True - - # Truncate long strings - truncated_data = self._truncate_long_strings( - serializable_data, - max_length, - truncation_marker, - truncation_enabled, - ) - - # Serialize the (potentially truncated) data - if self._is_flat_dict(truncated_data) and not isinstance( - truncated_data, list - ): - json_string = json.dumps( - truncated_data, separators=(",", ":"), default=str - ) - # Add a simple prefix if it's compact - serialized_data_json = " - " + json_string - else: - json_string = json.dumps(truncated_data, indent=2, default=str) - # Prepend with newline for readability - serialized_data_json = "\n" + json_string - - except (TypeError, ValueError) as e: # Catch specific serialization errors - serialized_data_json = f" - {{Serialization Error: {e}}}" - except ( - Exception - ) as e: # Catch any other unexpected errors during processing - serialized_data_json = f" - {{Processing Error: {e}}}" - - # Add the final JSON string (or error message) back into the record - record["extra"]["_plugin_serialized_data"] = serialized_data_json - - # Base template - base_template = ( - "{time:YYYY-MM-DD HH:mm:ss.SSS} | " - "{level: <8} | " - "{name}:{function}:{line} - " - "{message}" - ) - - # Append the serialized data - base_template += "{extra[_plugin_serialized_data]}" - # Append the exception part - base_template += "\n{exception}" - # Return the format string template - return base_template.rstrip() - - def _add_log_handler(self): - """ - Adds or updates the loguru handler specifically for this plugin. - Includes logic for serializing and truncating extra data. - """ - - def plugin_filter(record: "Record"): - """Filter function to only allow logs from this plugin (based on module name).""" - return record["name"] == __name__ - - # Get the desired level name and number - desired_level_name = self.valves.LOG_LEVEL - try: - # Use the public API to get level details - desired_level_info = log.level(desired_level_name) - desired_level_no = desired_level_info.no - except ValueError: - log.error( - f"Invalid LOG_LEVEL '{desired_level_name}' configured for plugin {__name__}. Cannot add/update handler." - ) - return # Stop processing if the level is invalid - - # Access the internal state of the log - handlers: dict[int, "Handler"] = log._core.handlers # type: ignore - handler_id_to_remove = None - found_correct_handler = False - - for handler_id, handler in handlers.items(): - existing_filter = handler._filter # Access internal attribute - - # Check if the filter matches our plugin_filter - # Comparing function objects directly can be fragile if they are recreated. - # Comparing by name and module is more robust for functions defined at module level. - is_our_filter = ( - existing_filter is not None # Make sure a filter is set - and hasattr(existing_filter, "__name__") - and existing_filter.__name__ == plugin_filter.__name__ - and hasattr(existing_filter, "__module__") - and existing_filter.__module__ == plugin_filter.__module__ - ) - - if is_our_filter: - existing_level_no = handler.levelno - log.trace( - f"Found existing handler {handler_id} for {__name__} with level number {existing_level_no}." - ) - - # Check if the level matches the desired level - if existing_level_no == desired_level_no: - log.debug( - f"Handler {handler_id} for {__name__} already exists with the correct level '{desired_level_name}'." - ) - found_correct_handler = True - break # Found the correct handler, no action needed - else: - # Found our handler, but the level is wrong. Mark for removal. - log.info( - f"Handler {handler_id} for {__name__} found, but log level differs " - f"(existing: {existing_level_no}, desired: {desired_level_no}). " - f"Removing it to update." - ) - handler_id_to_remove = handler_id - break # Found the handler to replace, stop searching - - # Remove the old handler if marked for removal - if handler_id_to_remove is not None: - try: - log.remove(handler_id_to_remove) - log.debug(f"Removed handler {handler_id_to_remove} for {__name__}.") - except ValueError: - # This might happen if the handler was somehow removed between the check and now - log.warning( - f"Could not remove handler {handler_id_to_remove} for {__name__}. It might have already been removed." - ) - # If removal failed but we intended to remove, we should still proceed to add - # unless found_correct_handler is somehow True (which it shouldn't be if handler_id_to_remove was set). - - # Add a new handler if no correct one was found OR if we just removed an incorrect one - if not found_correct_handler: - self.log_level = desired_level_name - log.add( - sys.stdout, - level=desired_level_name, - format=self.plugin_stdout_format, - filter=plugin_filter, - ) - log.debug( - f"Added new handler to loguru for {__name__} with level {desired_level_name}." - ) - - # endregion 1.4 Utility helpers - - # endregion 1. Helper methods inside the Filter class diff --git a/plugins/pipes/gemini_mainfold/README.md b/plugins/pipes/gemini_mainfold/README.md deleted file mode 100644 index 3e97e5d..0000000 --- a/plugins/pipes/gemini_mainfold/README.md +++ /dev/null @@ -1,54 +0,0 @@ -# Example Pipe Plugin - -**Author:** OpenWebUI Community | **Version:** 1.26.0 | **License:** MIT - -This is a template/example for creating Pipe plugins in OpenWebUI. - ---- - -## Overview - -Pipes are plugins that process and enhance LLM responses after they are generated and before they are displayed to the user. - -## Core Features - -- ✅ **Response Processing**: Modify or enhance LLM output -- ✅ **Format Conversion**: Convert responses to different formats -- ✅ **Content Filtering**: Filter or sanitize content -- ✅ **Integration**: Connect with external services - ---- - -## Installation - -1. Download the `.py` file from this directory -2. Open OpenWebUI Admin Settings → Plugins -3. Select "Pipes" type -4. Upload the file -5. Refresh the page - ---- - -## Configuration - -Configure the pipe parameters in your chat settings as needed. - ---- - -## Usage - -Once enabled, this pipe will automatically process all LLM responses. - ---- - -## Troubleshooting - -- Check the logs for any errors during pipe execution -- Ensure the pipe is properly configured -- Verify the pipe is enabled in chat settings - ---- - -## Contributing - -Feel free to create your own pipe plugins! Follow the structure and documentation guidelines in this template. diff --git a/plugins/pipes/gemini_mainfold/README_CN.md b/plugins/pipes/gemini_mainfold/README_CN.md deleted file mode 100644 index 6437018..0000000 --- a/plugins/pipes/gemini_mainfold/README_CN.md +++ /dev/null @@ -1,54 +0,0 @@ -# 示例管道插件 - -**作者:** OpenWebUI 社区 | **版本:** 1.0.0 | **许可证:** MIT - -这是在 OpenWebUI 中创建管道插件的模板/示例。 - ---- - -## 概述 - -管道是在 LLM 生成响应后、显示给用户前对响应进行处理和增强的插件。 - -## 核心特性 - -- ✅ **响应处理**: 修改或增强 LLM 输出 -- ✅ **格式转换**: 将响应转换为不同格式 -- ✅ **内容过滤**: 过滤或清理内容 -- ✅ **集成**: 与外部服务连接 - ---- - -## 安装 - -1. 从此目录下载 `.py` 文件 -2. 打开 OpenWebUI 管理员设置 → 插件(Plugins) -3. 选择"Pipes"类型 -4. 上传文件 -5. 刷新页面 - ---- - -## 配置 - -根据需要在聊天设置中配置管道参数。 - ---- - -## 使用 - -启用后,该管道将自动处理所有 LLM 响应。 - ---- - -## 故障排除 - -- 查看日志了解管道执行过程中的任何错误 -- 确保管道配置正确 -- 验证管道在聊天设置中已启用 - ---- - -## 贡献 - -欢迎创建您自己的管道插件!请遵循此模板中的结构和文档指南。 diff --git a/plugins/pipes/gemini_mainfold/gemini_manifold.py b/plugins/pipes/gemini_mainfold/gemini_manifold.py deleted file mode 100644 index 2bc7b67..0000000 --- a/plugins/pipes/gemini_mainfold/gemini_manifold.py +++ /dev/null @@ -1,3382 +0,0 @@ -""" -title: Gemini Manifold google_genai -id: gemini_manifold_google_genai -description: Manifold function for Gemini Developer API and Vertex AI. Uses the newer google-genai SDK. Aims to support as many features from it as possible. -author: suurt8ll -author_url: https://github.com/suurt8ll -funding_url: https://github.com/suurt8ll/open_webui_functions -license: MIT -version: 1.26.0 -requirements: google-genai==1.49.0 -""" - -VERSION = "1.26.0" -# This is the recommended version for the companion filter. -# Older versions might still work, but backward compatibility is not guaranteed -# during the development of this personal use plugin. -RECOMMENDED_COMPANION_VERSION = "1.7.0" - - -# Keys `title`, `id` and `description` in the frontmatter above are used for my own development purposes. -# They don't have any effect on the plugin's functionality. - - -# This is a helper function that provides a manifold for Google's Gemini Studio API and Vertex AI. -# Be sure to check out my GitHub repository for more information! Contributions, questions and suggestions are very welcome. - -from google import genai -from google.genai import types -from google.genai import errors as genai_errors -from google.cloud import storage -from google.api_core import exceptions - -import time -import copy -import json -from urllib.parse import urlparse, parse_qs -import xxhash -import asyncio -import aiofiles -from aiocache import cached -from aiocache.base import BaseCache -from aiocache.serializers import NullSerializer -from aiocache.backends.memory import SimpleMemoryCache -from functools import cache -from datetime import datetime, timezone -from fastapi.datastructures import State -import io -import mimetypes -import uuid -import base64 -import re -import fnmatch -import sys -from loguru import logger -from fastapi import Request -import pydantic_core -from pydantic import BaseModel, Field, field_validator -from collections.abc import AsyncIterator, Awaitable, Callable -from typing import ( - Any, - Final, - AsyncGenerator, - Literal, - TYPE_CHECKING, - cast, -) - -from open_webui.models.chats import Chats -from open_webui.models.files import FileForm, Files -from open_webui.storage.provider import Storage -from open_webui.models.functions import Functions -from open_webui.utils.misc import pop_system_message - -# This block is skipped at runtime. -if TYPE_CHECKING: - from loguru import Record - from loguru._handler import Handler # type: ignore - # Imports custom type definitions (TypedDicts) for static analysis purposes (mypy/pylance). - from utils.manifold_types import * - -# Setting auditable=False avoids duplicate output for log levels that would be printed out by the main log. -log = logger.bind(auditable=False) - -# FIXME: remove -COMPATIBLE_MODELS_FOR_URL_CONTEXT: Final = [ - "gemini-2.5-pro", - "gemini-flash-latest", - "gemini-2.5-flash-preview-09-2025", - "gemini-2.5-flash", - "gemini-flash-lite-latest", - "gemini-2.5-flash-lite-preview-09-2025", - "gemini-2.5-flash-lite", - "gemini-2.5-flash-lite-preview-06-17", - "gemini-2.5-pro-preview-06-05", - "gemini-2.5-pro-preview-05-06", - "gemini-2.5-flash-preview-05-20", - "gemini-2.0-flash", - "gemini-2.0-flash-001", - "gemini-2.0-flash-live-001", -] - -# A mapping of finish reason names (str) to human-readable descriptions. -# This allows handling of reasons that may not be defined in the current SDK version. -FINISH_REASON_DESCRIPTIONS: Final = { - "FINISH_REASON_UNSPECIFIED": "The reason for finishing is not specified.", - "STOP": "Natural stopping point or stop sequence reached.", - "MAX_TOKENS": "The maximum number of tokens was reached.", - "SAFETY": "The response was blocked due to safety concerns.", - "RECITATION": "The response was blocked due to potential recitation of copyrighted material.", - "LANGUAGE": "The response was stopped because of an unsupported language.", - "OTHER": "The response was stopped for an unspecified reason.", - "BLOCKLIST": "The response was blocked due to a word on a blocklist.", - "PROHIBITED_CONTENT": "The response was blocked for containing prohibited content.", - "SPII": "The response was blocked for containing sensitive personally identifiable information.", - "MALFORMED_FUNCTION_CALL": "The model generated an invalid function call.", - "IMAGE_SAFETY": "Generated image was blocked due to safety concerns.", - "UNEXPECTED_TOOL_CALL": "The model generated an invalid tool call.", - "IMAGE_PROHIBITED_CONTENT": "Generated image was blocked for containing prohibited content.", - "NO_IMAGE": "The model was expected to generate an image, but it did not.", - "IMAGE_OTHER": ( - "Image generation stopped for other reasons, possibly related to safety or quality. " - "Try a different image or prompt." - ), -} - -# Finish reasons that are considered normal and do not require user notification. -NORMAL_REASONS: Final = {types.FinishReason.STOP, types.FinishReason.MAX_TOKENS} - -# These tags will be "disabled" in the response, meaning that they will not be parsed by the backend. -SPECIAL_TAGS_TO_DISABLE = [ - "details", - "think", - "thinking", - "reason", - "reasoning", - "thought", - "Thought", - "|begin_of_thought|", - "code_interpreter", - "|begin_of_solution|", -] -ZWS = "\u200b" - - -class GenaiApiError(Exception): - """Custom exception for errors during Genai API interactions.""" - - pass - - -class FilesAPIError(Exception): - """Custom exception for errors during Files API operations.""" - - pass - - -class EventEmitter: - """A helper class to abstract web-socket event emissions to the front-end.""" - - def __init__( - self, - event_emitter: Callable[["Event"], Awaitable[None]] | None, - *, - hide_successful_status: bool = False, - ): - self.event_emitter = event_emitter - self.hide_successful_status = hide_successful_status - - def emit_toast( - self, - msg: str, - toastType: Literal["info", "success", "warning", "error"] = "info", - ) -> None: - """Emits a toast notification to the front-end. This is a fire-and-forget operation.""" - if not self.event_emitter: - return - - event: "NotificationEvent" = { - "type": "notification", - "data": {"type": toastType, "content": msg}, - } - - log.debug(f"Emitting toast: '{msg}'") - log.trace("Toast payload:", payload=event) - - async def send_toast(): - try: - # Re-check in case the event loop runs this later and state has changed. - if self.event_emitter: - await self.event_emitter(event) - except Exception: - log.exception("Error emitting toast notification.") - - asyncio.create_task(send_toast()) - - async def emit_status( - self, - message: str, - done: bool = False, - hidden: bool = False, - *, - is_successful_finish: bool = False, - ) -> None: - """Emit status updates asynchronously.""" - if not self.event_emitter: - return - - # If this is a successful finish status and the user wants to hide it, - # we override the hidden flag. - if is_successful_finish and self.hide_successful_status: - hidden = True - - status_event: "StatusEvent" = { - "type": "status", - "data": {"description": message, "done": done, "hidden": hidden}, - } - - log.debug(f"Emitting status: '{message}'") - log.trace("Status payload:", payload=status_event) - - try: - await self.event_emitter(status_event) - except Exception: - log.exception("Error emitting status.") - - async def emit_completion( - self, - content: str | None = None, - done: bool = False, - error: str | None = None, - sources: list["Source"] | None = None, - usage: dict[str, Any] | None = None, - ) -> None: - """Constructs and emits completion event.""" - if not self.event_emitter: - return - - emission: "ChatCompletionEvent" = { - "type": "chat:completion", - "data": {"done": done}, - } - parts = [] - if content is not None: - emission["data"]["content"] = content - parts.append("content") - if error is not None: - emission["data"]["error"] = {"detail": error} - parts.append("error") - if sources is not None: - emission["data"]["sources"] = sources - parts.append("sources") - if usage is not None: - emission["data"]["usage"] = usage - parts.append("usage") - - desc = f" with {', '.join(parts)}" if parts else "" - log.debug(f"Emitting completion: done={done}{desc}") - log.trace("Completion payload:", payload=emission) - - try: - await self.event_emitter(emission) - except Exception: - log.exception("Error emitting completion.") - - async def emit_usage(self, usage_data: dict[str, Any]) -> None: - """A wrapper around emit_completion to specifically emit usage data.""" - await self.emit_completion(usage=usage_data) - - async def emit_error( - self, - error_msg: str, - warning: bool = False, - exception: bool = True, - ) -> None: - """Emits an event to the front-end that causes it to display a nice red error message.""" - if warning: - log.opt(depth=1, exception=False).warning(error_msg) - else: - log.opt(depth=1, exception=exception).error(error_msg) - await self.emit_completion(error=f"\n{error_msg}", done=True) - - -class UploadStatusManager: - """ - Manages and centralizes status updates for concurrent file uploads. - - This manager is self-configuring. It discovers the number of files that - require an actual upload at runtime, only showing a status message to the - user when network activity is necessary. - - The communication protocol uses tuples sent via an asyncio.Queue: - - ('REGISTER_UPLOAD',): Sent by a worker when it determines an upload is needed. - - ('COMPLETE_UPLOAD',): Sent by a worker when its upload is finished. - - ('FINALIZE',): Sent by the orchestrator when all workers are done. - """ - - def __init__( - self, - event_emitter: EventEmitter, - start_time: float, - ): - self.event_emitter = event_emitter - self.start_time = start_time - self.queue = asyncio.Queue() - self.total_uploads_expected = 0 - self.uploads_completed = 0 - self.finalize_received = False - self.is_active = False - - async def run(self) -> None: - """ - Runs the manager loop, listening for updates and emitting status to the UI. - This should be started as a background task using asyncio.create_task(). - """ - while not ( - self.finalize_received - and self.total_uploads_expected == self.uploads_completed - ): - msg = await self.queue.get() - msg_type = msg[0] - - if msg_type == "REGISTER_UPLOAD": - self.is_active = True - self.total_uploads_expected += 1 - await self._emit_progress_update() - elif msg_type == "COMPLETE_UPLOAD": - self.uploads_completed += 1 - await self._emit_progress_update() - elif msg_type == "FINALIZE": - self.finalize_received = True - - self.queue.task_done() - - log.debug("UploadStatusManager finished its run.") - - async def _emit_progress_update(self) -> None: - """Emits the current progress to the front-end if uploads are active.""" - if not self.is_active: - return - - elapsed_time = time.monotonic() - self.start_time - time_str = f"(+{elapsed_time:.2f}s)" - - is_done = ( - self.total_uploads_expected > 0 - and self.uploads_completed == self.total_uploads_expected - ) - - if is_done: - message = f"- Upload complete. {self.uploads_completed} file(s) processed. {time_str}" - else: - # Show "Uploading 1 of N..." - message = f"- Uploading file {self.uploads_completed + 1} of {self.total_uploads_expected}... {time_str}" - - await self.event_emitter.emit_status(message, done=is_done) - - -class FilesAPIManager: - """ - Manages uploading, caching, and retrieving files using the Google Gemini Files API. - - This class provides a stateless and efficient way to handle files by using a fast, - non-cryptographic hash (xxHash) of the file's content as the primary identifier. - This enables content-addressable storage, preventing duplicate uploads of the - same file. It uses a multi-tiered approach: - - 1. Hot Path (In-Memory Caches): For instantly retrieving file objects and hashes - for recently used files. - 2. Warm Path (Stateless GET): For quickly recovering file state after a server - restart by using a deterministic name (derived from the content hash) and a - single `get` API call. - 3. Cold Path (Upload): As a last resort, for uploading new files or re-uploading - expired ones. - """ - - def __init__( - self, - client: genai.Client, - file_cache: SimpleMemoryCache, - id_hash_cache: SimpleMemoryCache, - event_emitter: EventEmitter, - ): - """ - Initializes the FilesAPIManager. - - Args: - client: An initialized `google.genai.Client` instance. - file_cache: An aiocache instance for mapping `content_hash -> types.File`. - Must be configured with `aiocache.serializers.NullSerializer`. - id_hash_cache: An aiocache instance for mapping `owui_file_id -> content_hash`. - This is an optimization to avoid re-hashing known files. - event_emitter: An abstract class for emitting events to the front-end. - """ - self.client = client - self.file_cache = file_cache - self.id_hash_cache = id_hash_cache - self.event_emitter = event_emitter - # A dictionary to manage locks for concurrent uploads. - # The key is the content_hash, the value is an asyncio.Lock. - self.upload_locks: dict[str, asyncio.Lock] = {} - - async def get_or_upload_file( - self, - file_bytes: bytes, - mime_type: str, - *, - owui_file_id: str | None = None, - status_queue: asyncio.Queue | None = None, - ) -> types.File: - """ - The main public method to get a file, using caching, recovery, or uploading. - - This method uses a fast content hash (xxHash) as the primary key for all - caching and remote API interactions to ensure deduplication and performance. - It is safe from race conditions during concurrent uploads. - - Args: - file_bytes: The raw byte content of the file. Required. - mime_type: The MIME type of the file (e.g., 'image/png'). Required. - owui_file_id: The unique ID of the file from Open WebUI, if available. - Used for logging and as a key for the hash cache optimization. - status_queue: An optional asyncio.Queue to report upload lifecycle events. - - Returns: - An `ACTIVE` `google.genai.types.File` object. - - Raises: - FilesAPIError: If the file fails to upload or process. - """ - # Step 1: Get the fast content hash, using the ID cache as an optimization if possible. - content_hash = await self._get_content_hash(file_bytes, owui_file_id) - - # Step 2: The Hot Path (Check Local File Cache) - # A cache hit means the file is valid and we can return immediately. - cached_file: types.File | None = await self.file_cache.get(content_hash) - if cached_file: - log_id = f"OWUI ID: {owui_file_id}" if owui_file_id else "anonymous file" - log.debug( - f"Cache HIT for file hash {content_hash} ({log_id}). Returning immediately." - ) - return cached_file - - # On cache miss, acquire a lock specific to this file's content to prevent race conditions. - # dict.setdefault is atomic, ensuring only one lock is created per hash. - lock = self.upload_locks.setdefault(content_hash, asyncio.Lock()) - if lock.locked(): - log.debug( - f"Lock for hash {content_hash} is held by another task. " - f"This call will now wait for the lock to be released." - ) - - async with lock: - # Step 2.5: Double-Checked Locking - # After acquiring the lock, check the cache again. Another task might have - # completed the upload while we were waiting for the lock. - cached_file = await self.file_cache.get(content_hash) - if cached_file: - log.debug( - f"Cache HIT for file hash {content_hash} after acquiring lock. Returning." - ) - return cached_file - - # Step 3: The Warm/Cold Path (On Cache Miss) - deterministic_name = f"files/owui-v1-{content_hash}" - log.debug( - f"Cache MISS for hash {content_hash}. Attempting stateless recovery with GET: {deterministic_name}" - ) - - try: - # Attempt to get the file (Warm Path) - file = await self.client.aio.files.get(name=deterministic_name) - if not file.name: - raise FilesAPIError( - f"Stateless recovery for {deterministic_name} returned a file without a name." - ) - - log.debug( - f"Stateless recovery successful for {deterministic_name}. File exists on server." - ) - active_file = await self._poll_for_active_state(file.name, owui_file_id) - - ttl_seconds = self._calculate_ttl(active_file.expiration_time) - await self.file_cache.set(content_hash, active_file, ttl=ttl_seconds) - - return active_file - except genai_errors.ClientError as e: - if e.code == 403: # "Not found" signal from the API. - log.info( - f"File {deterministic_name} not found on server (received 403). Proceeding to upload." - ) - # Proceed to upload (Cold Path) - return await self._upload_and_process_file( - content_hash, - file_bytes, - mime_type, - deterministic_name, - owui_file_id, - status_queue, - ) - else: - log.exception( - f"A non-403 client error occurred during stateless recovery for {deterministic_name}." - ) - self.event_emitter.emit_toast( - f"API error for file: {e.code}. Please check permissions.", - "error", - ) - raise FilesAPIError( - f"Failed to check file status for {deterministic_name}: {e}" - ) from e - except Exception as e: - log.exception( - f"An unexpected error occurred during stateless recovery for {deterministic_name}." - ) - self.event_emitter.emit_toast( - "Unexpected error retrieving a file. Please try again.", - "error", - ) - raise FilesAPIError( - f"Failed to check file status for {deterministic_name}: {e}" - ) from e - finally: - # Clean up the lock from the dictionary once processing is complete - # for this hash, preventing memory growth over time. - # This is safe because any future request for this hash will hit the cache. - if content_hash in self.upload_locks: - del self.upload_locks[content_hash] - - async def _get_content_hash( - self, file_bytes: bytes, owui_file_id: str | None - ) -> str: - """ - Retrieves the file's content hash, using a cache for known IDs or computing it. - - This acts as a memoization layer for the hashing process, avoiding - re-computation for files with a known Open WebUI ID. For anonymous files - (owui_file_id=None), it will always compute the hash. - """ - if owui_file_id: - # First, check the ID-to-Hash cache for known files. - cached_hash: str | None = await self.id_hash_cache.get(owui_file_id) - if cached_hash: - log.trace(f"Hash cache HIT for OWUI ID {owui_file_id}.") - return cached_hash - - # If not in cache or if file is anonymous, compute the fast hash. - log.trace( - f"Hash cache MISS for OWUI ID {owui_file_id if owui_file_id else 'N/A'}. Computing hash." - ) - content_hash = xxhash.xxh64(file_bytes).hexdigest() - - # If there was an ID, store the newly computed hash for next time. - if owui_file_id: - await self.id_hash_cache.set(owui_file_id, content_hash) - - return content_hash - - def _calculate_ttl(self, expiration_time: datetime | None) -> float | None: - """Calculates the TTL in seconds from an expiration datetime.""" - if not expiration_time: - return None - - now_utc = datetime.now(timezone.utc) - if expiration_time <= now_utc: - return 0 - - return (expiration_time - now_utc).total_seconds() - - async def _upload_and_process_file( - self, - content_hash: str, - file_bytes: bytes, - mime_type: str, - deterministic_name: str, - owui_file_id: str | None, - status_queue: asyncio.Queue | None = None, - ) -> types.File: - """Handles the full upload and post-upload processing workflow.""" - - # Register with the manager that an actual upload is starting. - if status_queue: - await status_queue.put(("REGISTER_UPLOAD",)) - - log.info(f"Starting upload for {deterministic_name}...") - - try: - file_io = io.BytesIO(file_bytes) - upload_config = types.UploadFileConfig( - name=deterministic_name, mime_type=mime_type - ) - uploaded_file = await self.client.aio.files.upload( - file=file_io, config=upload_config - ) - if not uploaded_file.name: - raise FilesAPIError( - f"File upload for {deterministic_name} did not return a file name." - ) - - log.debug(f"{uploaded_file.name} uploaded.") - log.trace("Uploaded file details:", payload=uploaded_file) - - # Check if the file is already active. If so, we can skip polling. - if uploaded_file.state == types.FileState.ACTIVE: - log.debug( - f"File {uploaded_file.name} is already ACTIVE. Skipping poll." - ) - active_file = uploaded_file - else: - # If not active, proceed with the original polling logic. - log.debug( - f"{uploaded_file.name} uploaded with state {uploaded_file.state}. Polling for ACTIVE state." - ) - active_file = await self._poll_for_active_state( - uploaded_file.name, owui_file_id - ) - log.debug(f"File {active_file.name} is now ACTIVE.") - - # Calculate TTL and set in the main file cache using the content hash as the key. - ttl_seconds = self._calculate_ttl(active_file.expiration_time) - await self.file_cache.set(content_hash, active_file, ttl=ttl_seconds) - log.debug( - f"Cached new file object for hash {content_hash} with TTL: {ttl_seconds}s." - ) - - return active_file - except Exception as e: - log.exception(f"File upload or processing failed for {deterministic_name}.") - self.event_emitter.emit_toast( - "Upload failed for a file. Please check connection and try again.", - "error", - ) - raise FilesAPIError(f"Upload failed for {deterministic_name}: {e}") from e - finally: - # Report completion (success or failure) to the status manager. - # This ensures the progress counter always advances. - if status_queue: - await status_queue.put(("COMPLETE_UPLOAD",)) - - async def _poll_for_active_state( - self, - file_name: str, - owui_file_id: str | None, - timeout: int = 60, - poll_interval: int = 1, - ) -> types.File: - """Polls the file's status until it is ACTIVE or fails.""" - end_time = time.monotonic() + timeout - while time.monotonic() < end_time: - try: - file = await self.client.aio.files.get(name=file_name) - except Exception as e: - raise FilesAPIError( - f"Polling failed: Could not get status for {file_name}. Reason: {e}" - ) from e - - if file.state == types.FileState.ACTIVE: - return file - if file.state == types.FileState.FAILED: - log_id = f"'{owui_file_id}'" if owui_file_id else "an uploaded file" - error_message = f"File processing failed on server for {file_name}." - toast_message = f"Google could not process {log_id}." - if file.error: - reason = f"Reason: {file.error.message} (Code: {file.error.code})" - error_message += f" {reason}" - toast_message += f" Reason: {file.error.message}" - - self.event_emitter.emit_toast(toast_message, "error") - raise FilesAPIError(error_message) - - state_name = file.state.name if file.state else "UNKNOWN" - log.trace( - f"File {file_name} is still {state_name}. Waiting {poll_interval}s..." - ) - await asyncio.sleep(poll_interval) - - raise FilesAPIError( - f"File {file_name} did not become ACTIVE within {timeout} seconds." - ) - - -class GeminiContentBuilder: - """Builds a list of `google.genai.types.Content` objects from the OWUI's body payload.""" - - def __init__( - self, - messages_body: list["Message"], - metadata_body: "Metadata", - user_data: "UserData", - event_emitter: EventEmitter, - valves: "Pipe.Valves", - files_api_manager: "FilesAPIManager", - ): - self.messages_body = messages_body - self.upload_documents = (metadata_body.get("features", {}) or {}).get( - "upload_documents", False - ) - self.event_emitter = event_emitter - self.valves = valves - self.files_api_manager = files_api_manager - self.is_temp_chat = metadata_body.get("chat_id") == "local" - self.vertexai = self.files_api_manager.client.vertexai - - self.system_prompt, self.messages_body = self._extract_system_prompt( - self.messages_body - ) - self.messages_db = self._fetch_and_validate_chat_history( - metadata_body, user_data - ) - - async def build_contents(self, start_time: float) -> list[types.Content]: - """ - The main public method to generate the contents list by processing all - message turns concurrently and using a self-configuring status manager. - """ - if not self.messages_db: - warn_msg = ( - "There was a problem retrieving the messages from the backend database. " - "Check the console for more details. " - "Citation filtering and file uploads will not be available." - ) - self.event_emitter.emit_toast(warn_msg, "warning") - - # 1. Set up and launch the status manager. It will activate itself if needed. - status_manager = UploadStatusManager(self.event_emitter, start_time=start_time) - manager_task = asyncio.create_task(status_manager.run()) - - # 2. Create and run concurrent processing tasks for each message turn. - tasks = [ - self._process_message_turn(i, message, status_manager.queue) - for i, message in enumerate(self.messages_body) - ] - log.debug(f"Starting concurrent processing of {len(tasks)} message turns.") - results = await asyncio.gather(*tasks, return_exceptions=True) - - # 3. Signal to the manager that no more uploads will be registered. - await status_manager.queue.put(("FINALIZE",)) - - # 4. Wait for the manager to finish processing all reported uploads. - await manager_task - - # 5. Filter and assemble the final contents list. - contents: list[types.Content] = [] - for i, res in enumerate(results): - if isinstance(res, types.Content): - contents.append(res) - elif isinstance(res, Exception): - log.error( - f"An error occurred while processing message {i} concurrently.", - payload=res, - ) - return contents - - @staticmethod - def _extract_system_prompt( - messages: list["Message"], - ) -> tuple[str | None, list["Message"]]: - """Extracts the system prompt and returns it along with the modified message list.""" - system_message, remaining_messages = pop_system_message(messages) # type: ignore - system_prompt: str | None = (system_message or {}).get("content") - return system_prompt, remaining_messages # type: ignore - - def _fetch_and_validate_chat_history( - self, metadata_body: "Metadata", user_data: "UserData" - ) -> list["ChatMessageTD"] | None: - """ - Fetches message history from the database and validates its length against the request body. - Returns the database messages or None if not found or if validation fails. - """ - # 1. Fetch from database - chat_id = metadata_body.get("chat_id", "") - if chat := Chats.get_chat_by_id_and_user_id( - id=chat_id, user_id=user_data["id"] - ): - chat_content: "ChatObjectDataTD" = chat.chat # type: ignore - # Last message is the upcoming assistant response, at this point in the logic it's empty. - messages_db = chat_content.get("messages", [])[:-1] - else: - log.warning( - f"Chat {chat_id} not found. Cannot process files or filter citations." - ) - return None - - # 2. Validate length against the current message body - if len(messages_db) != len(self.messages_body): - warn_msg = ( - f"Messages in the body ({len(self.messages_body)}) and " - f"messages in the database ({len(messages_db)}) do not match. " - "This is likely due to a bug in Open WebUI. " - "Cannot process files or filter citations." - ) - - # TODO: Emit a toast to the user in the front-end. - log.warning(warn_msg) - # Invalidate the db messages if they don't match - return None - - return messages_db - - async def _process_message_turn( - self, i: int, message: "Message", status_queue: asyncio.Queue - ) -> types.Content | None: - """ - Processes a single message turn, handling user and assistant roles, - and returns a complete `types.Content` object. Designed to be run concurrently. - """ - role = message.get("role") - parts: list[types.Part] = [] - - if role == "user": - message = cast("UserMessage", message) - files = [] - if self.messages_db: - message_db = self.messages_db[i] - if self.upload_documents: - files = message_db.get("files", []) - parts = await self._process_user_message(message, files, status_queue) - # Case 1: User content is completely empty (no text, no files). - if not parts: - log.info( - f"User message at index {i} is completely empty. " - "Injecting a prompt to ask for clarification." - ) - # Inform the user via a toast notification. - toast_msg = f"Your message #{i + 1} was empty. The assistant will ask for clarification." - self.event_emitter.emit_toast(toast_msg, "info") - - clarification_prompt = ( - "The user sent an empty message. Please ask the user for " - "clarification on what they would like to ask or discuss." - ) - # This will become the only part for this user message. - parts = await self._genai_parts_from_text( - clarification_prompt, status_queue - ) - else: - # Case 2: User has sent content, check if it includes text. - has_text_component = any(p.text for p in parts if p.text) - if not has_text_component: - # The user sent content (e.g., files) but no accompanying text. - if self.vertexai: - # Vertex AI requires a text part in multi-modal messages. - log.info( - f"User message at index {i} lacks a text component for Vertex AI. " - "Adding default text prompt." - ) - # Inform the user via a toast notification. - toast_msg = ( - f"For your message #{i + 1}, a default prompt was added as text is required " - "for requests with attachments when using Vertex AI." - ) - self.event_emitter.emit_toast(toast_msg, "info") - - default_prompt_text = ( - "The user did not send any text message with the additional context. " - "Answer by summarizing the newly added context." - ) - default_text_parts = await self._genai_parts_from_text( - default_prompt_text, status_queue - ) - parts.extend(default_text_parts) - else: - # Google Developer API allows no-text user content. - log.info( - f"User message at index {i} lacks a text component for Google Developer API. " - "Proceeding with non-text parts only." - ) - elif role == "assistant": - message = cast("AssistantMessage", message) - # Google API's assistant role is "model" - role = "model" - sources = None - if self.messages_db: - message_db = self.messages_db[i] - sources = message_db.get("sources") - parts = await self._process_assistant_message( - message, sources, status_queue - ) - else: - warn_msg = f"Message {i} has an invalid role: {role}. Skipping to the next message." - log.warning(warn_msg) - self.event_emitter.emit_toast(warn_msg, "warning") - return None - - # Only create a Content object if there are parts to include. - if parts: - return types.Content(parts=parts, role=role) - return None - - async def _process_user_message( - self, - message: "UserMessage", - files: list["FileAttachmentTD"], - status_queue: asyncio.Queue, - ) -> list[types.Part]: - user_parts: list[types.Part] = [] - db_files_processed = False - - # PATH 1: Database is available (Normal Chat). - if self.messages_db and files: - db_files_processed = True - log.info(f"Processing {len(files)} files from the database concurrently.") - - upload_tasks = [] - for file in files: - log.debug("Preparing DB file for concurrent upload:", payload=file) - uri = "" - if file.get("type") == "image": - uri = file.get("url", "") - elif file.get("type") == "file": - # Reconstruct the local API URI to be handled by our unified function - uri = f"/api/v1/files/{file.get('id', '')}/content" - - if uri: - # Create a coroutine for each file upload and add it to a list. - upload_tasks.append(self._genai_part_from_uri(uri, status_queue)) - else: - log.warning("Could not determine URI for file in DB.", payload=file) - - if upload_tasks: - # Run all upload tasks concurrently. asyncio.gather maintains the order of results. - results = await asyncio.gather(*upload_tasks) - # Filter out None results (from failed uploads) and add the successful parts to the list. - user_parts.extend(part for part in results if part) - - # Now, process the content from the message payload. - user_content = message.get("content") - if isinstance(user_content, str): - user_content_list: list["Content"] = [ - {"type": "text", "text": user_content} - ] - elif isinstance(user_content, list): - user_content_list = user_content - else: - warn_msg = "User message content is not a string or list, skipping." - log.warning(warn_msg) - self.event_emitter.emit_toast(warn_msg, "warning") - return user_parts - - for c in user_content_list: - c_type = c.get("type") - if c_type == "text": - c = cast("TextContent", c) - if c_text := c.get("text"): - user_parts.extend( - await self._genai_parts_from_text(c_text, status_queue) - ) - - # PATH 2: Temporary Chat Image Handling. - elif c_type == "image_url" and not db_files_processed: - log.info("Processing image from payload (temporary chat mode).") - c = cast("ImageContent", c) - if uri := c.get("image_url", {}).get("url"): - if part := await self._genai_part_from_uri(uri, status_queue): - user_parts.append(part) - - return user_parts - - async def _process_assistant_message( - self, - message: "AssistantMessage", - sources: list["Source"] | None, - status_queue: asyncio.Queue, - ) -> list[types.Part]: - assistant_text = message.get("content") - if sources: - assistant_text = self._remove_citation_markers(assistant_text, sources) - return await self._genai_parts_from_text(assistant_text, status_queue) - - async def _genai_part_from_uri( - self, uri: str, status_queue: asyncio.Queue - ) -> types.Part | None: - """ - Processes any resource URI and returns a genai.types.Part. - This is the central dispatcher for all media processing, handling data URIs, - local API file paths, and YouTube URLs. It decides whether to use the - Files API or send raw bytes based on configuration and context. - """ - if not uri: - log.warning("Received an empty URI, skipping.") - return None - - try: - file_bytes: bytes | None = None - mime_type: str | None = None - owui_file_id: str | None = None - - # Step 1: Extract bytes and mime_type from the URI if applicable - if uri.startswith("data:image"): - match = re.match(r"data:(image/\w+);base64,(.+)", uri) - if not match: - raise ValueError("Invalid data URI for image.") - mime_type, base64_data = match.group(1), match.group(2) - file_bytes = base64.b64decode(base64_data) - elif uri.startswith("/api/v1/files/"): - log.info(f"Processing local API file URI: {uri}") - file_id = uri.split("/")[4] - owui_file_id = file_id - file_bytes, mime_type = await self._get_file_data(file_id) - elif "youtube.com/" in uri or "youtu.be/" in uri: - log.info(f"Found YouTube URL: {uri}") - return self._genai_part_from_youtube_uri(uri) - # TODO: Google Cloud Storage bucket support. - # elif uri.startswith("gs://"): ... - else: - warn_msg = f"Unsupported URI: '{uri[:64]}...' Links must be to YouTube or a supported file type." - log.warning(warn_msg) - self.event_emitter.emit_toast(warn_msg, "warning") - return None - - # Step 2: If we have bytes, decide how to create the Part - if file_bytes and mime_type: - # TODO: The Files API is strict about MIME types (e.g., text/plain, - # application/pdf). In the future, inspect the content of files - # with unsupported text-like MIME types (e.g., 'application/json', - # 'text/markdown'). If the content is detected as plaintext, - # override the `mime_type` variable to 'text/plain' to allow the upload. - - # Determine whether to use the Files API based on the specified conditions. - use_files_api = True - reason = "" - - if not self.valves.USE_FILES_API: - reason = "disabled by user setting (USE_FILES_API=False)" - use_files_api = False - elif self.vertexai: - reason = "the active client is configured for Vertex AI, which does not support the Files API" - use_files_api = False - elif self.is_temp_chat: - reason = "temporary chat mode is active" - use_files_api = False - - if use_files_api: - log.info(f"Using Files API for resource from URI: {uri[:64]}...") - gemini_file = await self.files_api_manager.get_or_upload_file( - file_bytes=file_bytes, - mime_type=mime_type, - owui_file_id=owui_file_id, - status_queue=status_queue, - ) - return types.Part( - file_data=types.FileData( - file_uri=gemini_file.uri, - mime_type=gemini_file.mime_type, - ) - ) - else: - log.info( - f"Sending raw bytes because {reason}. Resource from URI: {uri[:64]}..." - ) - return types.Part.from_bytes(data=file_bytes, mime_type=mime_type) - - return None # Return None if bytes/mime_type could not be determined - - except FilesAPIError as e: - error_msg = f"Files API failed for URI '{uri[:64]}...': {e}" - log.error(error_msg) - self.event_emitter.emit_toast(error_msg, "error") - return None - except Exception: - log.exception(f"Error processing URI: {uri[:64]}[...]") - return None - - def _genai_part_from_youtube_uri(self, uri: str) -> types.Part | None: - """Creates a Gemini Part from a YouTube URL, with optional video metadata. - - Handles standard (`watch?v=`), short (`youtu.be/`), mobile (`shorts/`), - and live (`live/`) URLs. Metadata is parsed for the Gemini Developer API - but ignored for Vertex AI, which receives a simple URI Part. - - - **Start/End Time**: `?t=` and `#end=`. The value can be a - flexible duration (e.g., "1m30s", "90") and will be converted to seconds. - - **Frame Rate**: Can be specified in two ways (if both are present, - `interval` takes precedence): - - **Interval**: `#interval=` (e.g., `#interval=10s`, `#interval=0.5s`). - The value is a flexible duration converted to seconds, then to FPS (1/interval). - - **FPS**: `#fps=` (e.g., `#fps=2.5`). - The final FPS value must be in the range (0, 24]. - - Args: - uri: The raw YouTube URL from the user. - is_vertex_client: If True, creates a simple Part for Vertex AI. - - Returns: - A `types.Part` object, or `None` if the URI is not a valid YouTube link. - """ - # Convert YouTube Music URLs to standard YouTube URLs for consistent parsing. - if "music.youtube.com" in uri: - uri = uri.replace("music.youtube.com", "www.youtube.com") - log.info(f"Converted YouTube Music URL to standard URL: {uri}") - - # Regex to capture the 11-character video ID from various YouTube URL formats. - video_id_pattern = re.compile( - r"(?:https?://)?(?:www\.)?(?:youtube\.com/(?:watch\?v=|shorts/|live/)|youtu.be/)([a-zA-Z0-9_-]{11})" - ) - - match = video_id_pattern.search(uri) - if not match: - log.warning(f"Could not extract a valid YouTube video ID from URI: {uri}") - return None - - video_id = match.group(1) - canonical_uri = f"https://www.youtube.com/watch?v={video_id}" - - # --- Branching logic for Vertex AI vs. Gemini Developer API --- - if self.vertexai: - return types.Part.from_uri(file_uri=canonical_uri, mime_type="video/mp4") - else: - parsed_uri = urlparse(uri) - query_params = parse_qs(parsed_uri.query) - fragment_params = parse_qs(parsed_uri.fragment) - - start_offset: str | None = None - end_offset: str | None = None - fps: float | None = None - - # Start time from query `t`. Convert flexible format to "Ns". - if "t" in query_params: - raw_start = query_params["t"][0] - if ( - total_seconds := self._parse_duration_to_seconds(raw_start) - ) is not None: - start_offset = f"{total_seconds}s" - - # End time from fragment `end`. Convert flexible format to "Ns". - if "end" in fragment_params: - raw_end = fragment_params["end"][0] - if ( - total_seconds := self._parse_duration_to_seconds(raw_end) - ) is not None: - end_offset = f"{total_seconds}s" - - # Frame rate from fragment `interval` or `fps`. `interval` takes precedence. - if "interval" in fragment_params: - raw_interval = fragment_params["interval"][0] - if ( - interval_seconds := self._parse_duration_to_seconds(raw_interval) - ) is not None and interval_seconds > 0: - calculated_fps = 1.0 / interval_seconds - if 0.0 < calculated_fps <= 24.0: - fps = calculated_fps - else: - log.warning( - f"Interval '{raw_interval}' results in FPS '{calculated_fps}' which is outside the valid range (0.0, 24.0]. Ignoring." - ) - - # Fall back to `fps` param if not set by `interval`. - if fps is None and "fps" in fragment_params: - try: - fps_val = float(fragment_params["fps"][0]) - if 0.0 < fps_val <= 24.0: - fps = fps_val - else: - log.warning( - f"FPS value '{fps_val}' is outside the valid range (0.0, 24.0]. Ignoring." - ) - except (ValueError, IndexError): - log.warning( - f"Invalid FPS value in fragment: {fragment_params.get('fps')}. Ignoring." - ) - - video_metadata: types.VideoMetadata | None = None - if start_offset or end_offset or fps is not None: - video_metadata = types.VideoMetadata( - start_offset=start_offset, - end_offset=end_offset, - fps=fps, - ) - - return types.Part( - file_data=types.FileData(file_uri=canonical_uri), - video_metadata=video_metadata, - ) - - def _parse_duration_to_seconds(self, duration_str: str) -> float | None: - """Converts a human-readable duration string to total seconds. - - Supports formats like "1h30m15s", "90m", "3600s", or just "90". - Also supports float values like "0.5s" or "90.5". - Returns total seconds as a float, or None if the string is invalid. - """ - # First, try to convert the whole string as a plain number (e.g., "90", "90.5"). - try: - return float(duration_str) - except ValueError: - # If it fails, it might be a composite duration like "1m30s", so we parse it below. - pass - - total_seconds = 0.0 - # Regex to find number-unit pairs (e.g., 1h, 30.5m, 15s). Supports floats. - parts = re.findall(r"(\d+(?:\.\d+)?)\s*(h|m|s)?", duration_str, re.IGNORECASE) - - if not parts: - # log.warning(f"Could not parse duration string: {duration_str}") - return None - - for value, unit in parts: - val = float(value) - unit = (unit or "s").lower() # Default to seconds if no unit - if unit == "h": - total_seconds += val * 3600 - elif unit == "m": - total_seconds += val * 60 - elif unit == "s": - total_seconds += val - - return total_seconds - - @staticmethod - def _enable_special_tags(text: str) -> str: - """ - Reverses the action of _disable_special_tags by removing the ZWS - from special tags. This is used to clean up history messages before - sending them to the model, so it can understand the context correctly. - """ - if not text: - return "" - - # The regex finds ' 0: - log.debug(f"Re-enabled {count} special tag(s) for model context.") - - return restored_text - - async def _genai_parts_from_text( - self, text: str, status_queue: asyncio.Queue - ) -> list[types.Part]: - if not text: - return [] - - text = self._enable_special_tags(text) - parts: list[types.Part] = [] - last_pos = 0 - - # Conditionally build a regex to find media links. - # If YouTube parsing is disabled, the regex will only find markdown image links, - # leaving YouTube URLs to be treated as plain text. - markdown_part = r"!\[.*?\]\(([^)]+)\)" # Group 1: Markdown URI - youtube_part = r"(https?://(?:(?:www|music)\.)?youtube\.com/(?:watch\?v=|shorts/|live/)[^\s)]+|https?://youtu\.be/[^\s)]+)" # Group 2: YouTube URL - if self.valves.PARSE_YOUTUBE_URLS: - pattern = re.compile(f"{markdown_part}|{youtube_part}") - process_youtube = True - else: - pattern = re.compile(markdown_part) - process_youtube = False - log.info( - "YouTube URL parsing is disabled. URLs will be treated as plain text." - ) - - for match in pattern.finditer(text): - # Add the text segment that precedes the media link - if text_segment := text[last_pos : match.start()].strip(): - parts.append(types.Part.from_text(text=text_segment)) - - # The URI is in group 1 for markdown, or group 2 for YouTube. - if process_youtube: - uri = match.group(1) or match.group(2) - else: - uri = match.group(1) - - if not uri: - log.warning( - f"Found unsupported URI format in text: {match.group(0)}. Skipping." - ) - continue - - # Delegate all URI processing to the unified helper - if media_part := await self._genai_part_from_uri(uri, status_queue): - parts.append(media_part) - - last_pos = match.end() - - # Add any remaining text after the last media link - if remaining_text := text[last_pos:].strip(): - parts.append(types.Part.from_text(text=remaining_text)) - - # If no media links were found, the whole text is a single part - if not parts and text.strip(): - parts.append(types.Part.from_text(text=text.strip())) - - return parts - - @staticmethod - async def _get_file_data(file_id: str) -> tuple[bytes | None, str | None]: - """ - Asynchronously retrieves file metadata from the database and its content from disk. - """ - # TODO: Emit toasts on unexpected conditions. - if not file_id: - log.warning("file_id is empty. Cannot continue.") - return None, None - - # Run the synchronous, blocking database call in a separate thread - # to avoid blocking the main asyncio event loop. - try: - file_model = await asyncio.to_thread(Files.get_file_by_id, file_id) - except Exception as e: - log.exception( - f"An unexpected error occurred during database call for file_id {file_id}: {e}" - ) - return None, None - - if file_model is None: - # The get_file_by_id method already handles and logs the specific exception, - # so we just need to handle the None return value. - log.warning(f"File {file_id} not found in the backend's database.") - return None, None - - if not (file_path := file_model.path): - log.warning( - f"File {file_id} was found in the database but it lacks `path` field. Cannot Continue." - ) - return None, None - if file_model.meta is None: - log.warning( - f"File {file_path} was found in the database but it lacks `meta` field. Cannot continue." - ) - return None, None - if not (content_type := file_model.meta.get("content_type")): - log.warning( - f"File {file_path} was found in the database but it lacks `meta.content_type` field. Cannot continue." - ) - return None, None - - if file_path.startswith("gs://"): - try: - # Initialize the GCS client - storage_client = storage.Client() - - # Parse the GCS path - # The path should be in the format "gs://bucket-name/object-name" - if len(file_path.split("/", 3)) < 4: - raise ValueError( - f"Invalid GCS path: '{file_path}'. " - "Path must be in the format 'gs://bucket-name/object-name'." - ) - - bucket_name, blob_name = file_path.removeprefix("gs://").split("/", 1) - - # Get the bucket and blob (file object) - bucket = storage_client.bucket(bucket_name) - blob = bucket.blob(blob_name) - - # Download the file's content as bytes - print(f"Reading from GCS: {file_path}") - return blob.download_as_bytes(), content_type - except exceptions.NotFound: - print(f"Error: GCS object not found at {file_path}") - raise - except Exception as e: - print(f"An error occurred while reading from GCS: {e}") - raise - try: - async with aiofiles.open(file_path, "rb") as file: - file_data = await file.read() - return file_data, content_type - except FileNotFoundError: - log.exception(f"File {file_path} not found on disk.") - return None, content_type - except Exception: - log.exception(f"Error processing file {file_path}") - return None, content_type - - @staticmethod - def _remove_citation_markers(text: str, sources: list["Source"]) -> str: - original_text = text - processed: set[str] = set() - for source in sources: - supports = [ - metadata["supports"] - for metadata in source.get("metadata", []) - if "supports" in metadata - ] - supports = [item for sublist in supports for item in sublist] - for support in supports: - support = types.GroundingSupport(**support) - indices = support.grounding_chunk_indices - segment = support.segment - if not (indices and segment): - continue - segment_text = segment.text - if not segment_text: - continue - # Using a shortened version because user could edit the assistant message in the front-end. - # If citation segment get's edited, then the markers would not be removed. Shortening reduces the - # chances of this happening. - segment_end = segment_text[-32:] - if segment_end in processed: - continue - processed.add(segment_end) - citation_markers = "".join(f"[{index + 1}]" for index in indices) - # Find the position of the citation markers in the text - pos = text.find(segment_text + citation_markers) - if pos != -1: - # Remove the citation markers - text = ( - text[: pos + len(segment_text)] - + text[pos + len(segment_text) + len(citation_markers) :] - ) - trim = len(original_text) - len(text) - log.debug( - f"Citation removal finished. Returning text str that is {trim} character shorter than the original input." - ) - return text - - -class Pipe: - - @staticmethod - def _validate_coordinates_format(v: str | None) -> str | None: - """Reusable validator for 'latitude,longitude' format.""" - if v is not None and v != "": - try: - parts = v.split(",") - if len(parts) != 2: - raise ValueError( - "Must contain exactly two parts separated by a comma." - ) - - lat_str, lon_str = parts - lat = float(lat_str.strip()) - lon = float(lon_str.strip()) - - if not (-90 <= lat <= 90): - raise ValueError("Latitude must be between -90 and 90.") - if not (-180 <= lon <= 180): - raise ValueError("Longitude must be between -180 and 180.") - except (ValueError, TypeError) as e: - raise ValueError( - f"Invalid format for MAPS_GROUNDING_COORDINATES: '{v}'. " - f"Expected 'latitude,longitude' (e.g., '40.7128,-74.0060'). Original error: {e}" - ) - return v - - class Valves(BaseModel): - GEMINI_API_KEY: str | None = Field(default=None) - IMAGE_GEN_GEMINI_API_KEY: str | None = Field( - default=None, - description="""Optional separate API key for image generation models. - If not provided, the main GEMINI_API_KEY will be used. - An image generation model is identified by the Image Model Pattern regex below.""", - ) - USER_MUST_PROVIDE_AUTH_CONFIG: bool = Field( - default=False, - description="""Whether to require users (including admins) to provide their own authentication configuration. - User can provide these through UserValves. Setting this to True will disallow users from using Vertex AI. - Default value is False.""", - ) - AUTH_WHITELIST: str | None = Field( - default=None, - description="""Comma separated list of user emails that are allowed to bypassUSER_MUST_PROVIDE_AUTH_CONFIG and use the default authentication configuration. - Default value is None (no users are whitelisted).""", - ) - GEMINI_API_BASE_URL: str | None = Field( - default=None, - description="""The base URL for calling the Gemini API. - Default value is None.""", - ) - USE_VERTEX_AI: bool = Field( - default=False, - description="""Whether to use Google Cloud Vertex AI instead of the standard Gemini API. - If VERTEX_PROJECT is not set then the plugin will use the Gemini Developer API. - Default value is False. - Users can opt out of this by setting USE_VERTEX_AI to False in their UserValves.""", - ) - VERTEX_PROJECT: str | None = Field( - default=None, - description="""The Google Cloud project ID to use with Vertex AI. - Default value is None.""", - ) - VERTEX_LOCATION: str = Field( - default="global", - description="""The Google Cloud region to use with Vertex AI. - Default value is 'global'.""", - ) - MODEL_WHITELIST: str = Field( - default="*", - description="""Comma-separated list of allowed model names. - Supports `fnmatch` patterns: *, ?, [seq], [!seq]. - Default value is * (all models allowed).""", - ) - MODEL_BLACKLIST: str | None = Field( - default=None, - description="""Comma-separated list of blacklisted model names. - Supports `fnmatch` patterns: *, ?, [seq], [!seq]. - Default value is None (no blacklist).""", - ) - CACHE_MODELS: bool = Field( - default=True, - description="""Whether to request models only on first load and when white- or blacklist changes. - Default value is True.""", - ) - THINKING_BUDGET: int = Field( - default=8192, - ge=-1, - # The widest possible range is 0 (for Lite/Flash) to 32768 (for Pro). - # -1 is used for dynamic thinking budget. - # Model-specific constraints are detailed in the description. - le=32768, - description="""Specifies the token budget for the model's internal thinking process, - used for complex tasks like tool use. Applicable to Gemini 2.5 models. - Default value is 8192. If you want the model to control the thinking budget when using the API, set the thinking budget to -1. - - The valid token range depends on the specific model tier: - - **Pro models**: Must be a value between 128 and 32,768. - - **Flash and Lite models**: A value between 0 and 24,576. For these - models, a value of 0 disables the thinking feature. - - See for more details.""", - ) - SHOW_THINKING_SUMMARY: bool = Field( - default=True, - description="""Whether to show the thinking summary in the response. - This is only applicable for Gemini 2.5 models. - Default value is True.""", - ) - THINKING_MODEL_PATTERN: str = Field( - default=r"^(?=.*(?:gemini-2\.5|gemini-flash-latest|gemini-flash-lite-latest))(?!(.*live))(?!(.*image))", - description="""Regex pattern to identify thinking models. - Default value is r"^(?=.*(?:gemini-2\.5|gemini-flash-latest|gemini-flash-lite-latest))(?!(.*live))(?!(.*image))".""", - ) - IMAGE_MODEL_PATTERN: str = Field( - default=r"image", - description="""Regex pattern to identify image generation models. - Default value is r"image".""", - ) - # FIXME: remove - ENABLE_URL_CONTEXT_TOOL: bool = Field( - default=False, - description="""Enable the URL context tool to allow the model to fetch and use content from provided URLs. - This tool is only compatible with specific models. Default value is False.""", - ) - USE_FILES_API: bool = Field( - default=True, - description="""Whether to use the Google Files API for uploading files. - This provides caching and performance benefits, but can be disabled for privacy, cost, or compatibility reasons. - If disabled, files are sent as raw bytes in the request. - Default value is True.""", - ) - PARSE_YOUTUBE_URLS: bool = Field( - default=True, - description="""Whether to parse YouTube URLs from user messages and provide them as context to the model. - If disabled, YouTube links are treated as plain text. - This is only applicable for models that support video. - Default value is True.""", - ) - USE_ENTERPRISE_SEARCH: bool = Field( - default=False, - description="""Enable the Enterprise Search tool to allow the model to fetch and use content from provided URLs. """, - ) - MAPS_GROUNDING_COORDINATES: str | None = Field( - default=None, - description="""Optional latitude and longitude coordinates for location-aware results with Google Maps grounding. - Expected format: 'latitude,longitude' (e.g., '40.7128,-74.0060'). - Default value is None.""", - ) - HIDE_SUCCESSFUL_STATUS_MESSAGE: bool = Field( - default=False, - description="""Whether to hide the final 'Response finished' status message on success. - Error messages will always be shown. - Default value is False.""", - ) - LOG_LEVEL: Literal[ - "TRACE", "DEBUG", "INFO", "SUCCESS", "WARNING", "ERROR", "CRITICAL" - ] = Field( - default="INFO", - description="""Select logging level. Use `docker logs -f open-webui` to view logs. - Default value is INFO.""", - ) - - @field_validator("MAPS_GROUNDING_COORDINATES", mode="after") - @classmethod - def validate_coordinates_format(cls, v: str | None): - return Pipe._validate_coordinates_format(v) - - class UserValves(BaseModel): - """Defines user-specific settings that can override the default `Valves`. - - The `UserValves` class provides a mechanism for individual users to customize - their Gemini API settings for each request. This system is designed as a - practical workaround for backend/frontend limitations, enabling per-user - configurations. - - Think of the main `Valves` as the global, admin-configured template for the - plugin. `UserValves` acts as a user-provided "overlay" or "patch" that - is applied on top of that template at runtime. - - How it works: - 1. **Default Behavior:** At the start of a request, the system merges the - user's `UserValves` with the admin's `Valves`. If a field in - `UserValves` has a value (i.e., is not `None` or an empty string `""`), - it overrides the corresponding value from the main `Valves`. If a - field is `None` or `""`, the admin's default is used. - - 2. **Special Authentication Logic:** A critical exception exists to enforce - security and usage policies. If the admin sets `USER_MUST_PROVIDE_AUTH_CONFIG` - to `True` in the main `Valves`, the merging logic changes for any user - not on the `AUTH_WHITELIST`: - - The user's `GEMINI_API_KEY` is taken directly from their `UserValves`, - bypassing the admin's key entirely. - - The ability to use the admin-configured Vertex AI is disabled - (`USE_VERTEX_AI` is forced to `False`). - This ensures that when required, users must use their own credentials - and cannot fall back on the shared, system-level authentication. - - This two-tiered configuration allows administrators to set sensible defaults - and enforce policies, while still giving users the flexibility to tailor - certain parameters, like their API key or model settings, for their own use. - """ - - GEMINI_API_KEY: str | None = Field( - default=None, - description="""Gemini Developer API key. - Default value is None (uses the default from Valves, same goes for other options below).""", - ) - IMAGE_GEN_GEMINI_API_KEY: str | None = Field( - default=None, - description="""Optional separate API key for image generation models. - If not provided, the main GEMINI_API_KEY will be used.""", - ) - GEMINI_API_BASE_URL: str | None = Field( - default=None, - description="""The base URL for calling the Gemini API - Default value is None.""", - ) - USE_VERTEX_AI: bool | None | Literal[""] = Field( - default=None, - description="""Whether to use Google Cloud Vertex AI instead of the standard Gemini API. - Default value is None.""", - ) - VERTEX_PROJECT: str | None = Field( - default=None, - description="""The Google Cloud project ID to use with Vertex AI. - Default value is None.""", - ) - VERTEX_LOCATION: str | None = Field( - default=None, - description="""The Google Cloud region to use with Vertex AI. - Default value is None.""", - ) - THINKING_BUDGET: int | None | Literal[""] = Field( - default=None, - description="""Specifies the token budget for the model's internal thinking process, - used for complex tasks like tool use. Applicable to Gemini 2.5 models. - Default value is None. If you want the model to control the thinking budget when using the API, set the thinking budget to -1. - - The valid token range depends on the specific model tier: - - **Pro models**: Must be a value between 128 and 32,768. - - **Flash and Lite models**: A value between 0 and 24,576. For these - models, a value of 0 disables the thinking feature. - - See for more details.""", - ) - SHOW_THINKING_SUMMARY: bool | None | Literal[""] = Field( - default=None, - description="""Whether to show the thinking summary in the response. - This is only applicable for Gemini 2.5 models. - Default value is None.""", - ) - THINKING_MODEL_PATTERN: str | None = Field( - default=None, - description="""Regex pattern to identify thinking models. - Default value is None.""", - ) - ENABLE_URL_CONTEXT_TOOL: bool | None | Literal[""] = Field( - default=None, - description="""Enable the URL context tool to allow the model to fetch and use content from provided URLs. - This tool is only compatible with specific models. Default value is None.""", - ) - USE_FILES_API: bool | None | Literal[""] = Field( - default=None, - description="""Override the default setting for using the Google Files API. - Set to True to force use, False to disable. - Default is None (use the admin's setting).""", - ) - PARSE_YOUTUBE_URLS: bool | None | Literal[""] = Field( - default=None, - description="""Override the default setting for parsing YouTube URLs. - Set to True to enable, False to disable. - Default is None (use the admin's setting).""", - ) - MAPS_GROUNDING_COORDINATES: str | None | Literal[""] = Field( - default=None, - description="""Optional latitude and longitude coordinates for location-aware results with Google Maps grounding. - Overrides the admin setting. Expected format: 'latitude,longitude' (e.g., '40.7128,-74.0060'). - Default value is None.""", - ) - HIDE_SUCCESSFUL_STATUS_MESSAGE: bool | None | Literal[""] = Field( - default=None, - description="""Override the default setting for hiding the successful status message. - Set to True to hide, False to show. - Default is None (use the admin's setting).""", - ) - - @field_validator("THINKING_BUDGET", mode="after") - @classmethod - def validate_thinking_budget_range(cls, v): - if v is not None and v != "": - if not (-1 <= v <= 32768): - raise ValueError( - "THINKING_BUDGET must be between -1 and 32768, inclusive." - ) - return v - - @field_validator("MAPS_GROUNDING_COORDINATES", mode="after") - @classmethod - def validate_coordinates_format(cls, v: str | None): - return Pipe._validate_coordinates_format(v) - - def __init__(self): - self.valves = self.Valves() - self.file_content_cache = SimpleMemoryCache(serializer=NullSerializer()) - self.file_id_to_hash_cache = SimpleMemoryCache(serializer=NullSerializer()) - log.success("Function has been initialized.") - - async def pipes(self) -> list["ModelData"]: - """Register all available Google models.""" - self._add_log_handler(self.valves.LOG_LEVEL) - log.debug("pipes method has been called.") - - # Clear cache if caching is disabled - if not self.valves.CACHE_MODELS: - log.debug("CACHE_MODELS is False, clearing model cache.") - cache_instance = getattr(self._get_genai_models, "cache") - await cast(BaseCache, cache_instance).clear() - - log.info("Fetching and filtering models from Google API.") - # Get and filter models (potentially cached based on API key, base URL, white- and blacklist) - try: - client_args = self._prepare_client_args(self.valves) - client_args += [self.valves.MODEL_WHITELIST, self.valves.MODEL_BLACKLIST] - filtered_models = await self._get_genai_models(*client_args) - except GenaiApiError: - error_msg = "Error getting the models from Google API, check the logs." - return [self._return_error_model(error_msg, exception=True)] - - log.info(f"Returning {len(filtered_models)} models to Open WebUI.") - log.debug("Model list:", payload=filtered_models, _log_truncation_enabled=False) - log.debug("pipes method has finished.") - - return filtered_models - - async def pipe( - self, - body: "Body", - __user__: "UserData", - __request__: Request, - __event_emitter__: Callable[["Event"], Awaitable[None]] | None, - __metadata__: "Metadata", - ) -> AsyncGenerator[dict, None] | str: - - start_time = time.monotonic() - self._add_log_handler(self.valves.LOG_LEVEL) - - log.debug( - f"pipe method has been called. Gemini Manifold google_genai version is {VERSION}" - ) - log.trace("__metadata__:", payload=__metadata__) - features = __metadata__.get("features", {}) or {} - - # Check the version of the companion filter - self._check_companion_filter_version(features) - - # Apply settings from the user - valves: Pipe.Valves = self._get_merged_valves( - self.valves, __user__.get("valves"), __user__.get("email") - ) - - model_name = re.sub(r"^.*?[./]", "", body.get("model", "")) - is_image_model = self._is_image_model(model_name, valves.IMAGE_MODEL_PATTERN) - - if is_image_model and valves.IMAGE_GEN_GEMINI_API_KEY: - log.info("Using separate API key for image generation model.") - valves.GEMINI_API_KEY = valves.IMAGE_GEN_GEMINI_API_KEY - - # When using a separate key, assume it's for Gemini API, not Vertex AI - # TODO: check if it would work for Vertex AI as well - valves.USE_VERTEX_AI = False - valves.VERTEX_PROJECT = None - - log.debug( - f"USE_VERTEX_AI: {valves.USE_VERTEX_AI}, VERTEX_PROJECT set: {bool(valves.VERTEX_PROJECT)}, API_KEY set: {bool(valves.GEMINI_API_KEY)}" - ) - - log.debug( - f"Getting genai client (potentially cached) for user {__user__['email']}." - ) - client = self._get_user_client(valves, __user__["email"]) - __metadata__["is_vertex_ai"] = client.vertexai - - if __metadata__.get("task"): - log.info(f'{__metadata__["task"]=}, disabling event emissions.') # type: ignore - # Task model is not user facing, so we should not emit any events. - __event_emitter__ = None - - event_emitter = EventEmitter( - __event_emitter__, - hide_successful_status=valves.HIDE_SUCCESSFUL_STATUS_MESSAGE, - ) - - files_api_manager = FilesAPIManager( - client=client, - file_cache=self.file_content_cache, - id_hash_cache=self.file_id_to_hash_cache, - event_emitter=event_emitter, - ) - - # Check if user is chatting with an error model for some reason. - if "error" in __metadata__["model"]["id"]: - error_msg = f"There has been an error during model retrival phase: {str(__metadata__['model'])}" - raise ValueError(error_msg) - - # NOTE: will be "local" if Temporary Chat is enabled. - chat_id = __metadata__.get("chat_id", "not_provided") - message_id = __metadata__.get("message_id", "not_provided") - - log.info( - "Converting Open WebUI's `body` dict into list of `Content` objects that `google-genai` understands." - ) - - builder = GeminiContentBuilder( - messages_body=body.get("messages"), - metadata_body=__metadata__, - user_data=__user__, - event_emitter=event_emitter, - valves=valves, - files_api_manager=files_api_manager, - ) - # This is our first timed event, marking the start of payload preparation. - asyncio.create_task(event_emitter.emit_status("Preparing request...")) - contents = await builder.build_contents(start_time=start_time) - - gen_content_conf = self._build_gen_content_config(body, __metadata__, valves) - gen_content_conf.system_instruction = builder.system_prompt - - # Some models (e.g., image generation, Gemma) do not support the system prompt message. - system_prompt_unsupported = is_image_model or "gemma" in model_name - if system_prompt_unsupported: - # TODO: append to user message instead. - if gen_content_conf.system_instruction: - gen_content_conf.system_instruction = None - log.warning( - f"Model '{model_name}' does not support the system prompt message! Removing the system prompt." - ) - - gen_content_args = { - "model": model_name, - "contents": contents, - "config": gen_content_conf, - } - log.debug("Passing these args to the Google API:", payload=gen_content_args) - - # Both streaming and non-streaming responses are now handled by the same - # unified processor, which returns an AsyncGenerator. For non-streaming, - # we adapt the single response object into a one-item async generator. - - elapsed_time = time.monotonic() - start_time - time_str = f"(+{elapsed_time:.2f}s)" - - # Determine the request type to provide a more informative status message. - is_streaming = features.get("stream", True) - request_type_str = "streaming" if is_streaming else "non-streaming" - - # Emit a status update with timing before making the actual API call. - asyncio.create_task(event_emitter.emit_status(f"Sending {request_type_str} request to Google API... {time_str}")) - - if is_streaming: - # Streaming response - response_stream: AsyncIterator[types.GenerateContentResponse] = ( - await client.aio.models.generate_content_stream(**gen_content_args) # type: ignore - ) - - log.info( - "Streaming enabled. Returning AsyncGenerator from unified processor." - ) - log.debug("pipe method has finished.") - return self._unified_response_processor( - response_stream, - __request__, - model_name, - event_emitter, - __user__["id"], - chat_id, - message_id, - start_time=start_time, - ) - else: - # Non-streaming response. - res = await client.aio.models.generate_content(**gen_content_args) - - # Adapter: Create a simple, one-shot async generator that yields the - # single response object, making it behave like a stream. - async def single_item_stream( - response: types.GenerateContentResponse, - ) -> AsyncGenerator[types.GenerateContentResponse, None]: - yield response - - log.info( - "Streaming disabled. Adapting full response and returning " - "AsyncGenerator from unified processor." - ) - log.debug("pipe method has finished.") - return self._unified_response_processor( - single_item_stream(res), - __request__, - model_name, - event_emitter, - __user__["id"], - chat_id, - message_id, - start_time=start_time, - ) - - # region 2. Helper methods inside the Pipe class - - # region 2.1 Client initialization - @staticmethod - @cache - def _get_or_create_genai_client( - api_key: str | None = None, - base_url: str | None = None, - use_vertex_ai: bool | None = None, - vertex_project: str | None = None, - vertex_location: str | None = None, - ) -> genai.Client: - """ - Creates a genai.Client instance or retrieves it from cache. - Raises GenaiApiError on failure. - """ - - if not vertex_project and not api_key: - # FIXME: More detailed reason in the exception (tell user to set the API key). - msg = "Neither VERTEX_PROJECT nor GEMINI_API_KEY is set." - raise GenaiApiError(msg) - - if use_vertex_ai and vertex_project: - kwargs = { - "vertexai": True, - "project": vertex_project, - "location": vertex_location, - } - api = "Vertex AI" - else: # Covers (use_vertex_ai and not vertex_project) OR (not use_vertex_ai) - if use_vertex_ai and not vertex_project: - log.warning( - "Vertex AI is enabled but no project is set. " - "Using Gemini Developer API." - ) - # This also implicitly covers the case where api_key might be None, - # which is handled by the initial check or the SDK. - kwargs = { - "api_key": api_key, - "http_options": types.HttpOptions(base_url=base_url), - } - api = "Gemini Developer API" - - try: - client = genai.Client(**kwargs) - log.success(f"{api} Genai client successfully initialized.") - return client - except Exception as e: - raise GenaiApiError(f"{api} Genai client initialization failed: {e}") from e - - def _get_user_client(self, valves: "Pipe.Valves", user_email: str) -> genai.Client: - user_whitelist = ( - valves.AUTH_WHITELIST.split(",") if valves.AUTH_WHITELIST else [] - ) - log.debug( - f"User whitelist: {user_whitelist}, user email: {user_email}, " - f"USER_MUST_PROVIDE_AUTH_CONFIG: {valves.USER_MUST_PROVIDE_AUTH_CONFIG}" - ) - if valves.USER_MUST_PROVIDE_AUTH_CONFIG and user_email not in user_whitelist: - if not valves.GEMINI_API_KEY: - error_msg = ( - "User must provide their own authentication configuration. " - "Please set GEMINI_API_KEY in your UserValves." - ) - raise ValueError(error_msg) - try: - client_args = self._prepare_client_args(valves) - client = self._get_or_create_genai_client(*client_args) - except GenaiApiError as e: - error_msg = f"Failed to initialize genai client for user {user_email}: {e}" - # FIXME: include correct traceback. - raise ValueError(error_msg) from e - return client - - @staticmethod - def _prepare_client_args( - source_valves: "Pipe.Valves | Pipe.UserValves", - ) -> list[str | bool | None]: - """Prepares arguments for _get_or_create_genai_client from source_valves.""" - ATTRS = [ - "GEMINI_API_KEY", - "GEMINI_API_BASE_URL", - "USE_VERTEX_AI", - "VERTEX_PROJECT", - "VERTEX_LOCATION", - ] - return [getattr(source_valves, attr) for attr in ATTRS] - - # endregion 2.1 Client initialization - - # region 2.2 Model retrival from Google API - @cached() # aiocache.cached for async method - async def _get_genai_models( - self, - api_key: str | None, - base_url: str | None, - use_vertex_ai: bool | None, # User's preference from config - vertex_project: str | None, - vertex_location: str | None, - whitelist_str: str, - blacklist_str: str | None, - ) -> list["ModelData"]: - """ - Gets valid Google models from API(s) and filters them. - If use_vertex_ai, vertex_project, and api_key are all provided, - models are fetched from both Vertex AI and Gemini Developer API and merged. - """ - all_raw_models: list[types.Model] = [] - - # Condition for fetching from both sources - fetch_both = bool(use_vertex_ai and vertex_project and api_key) - - if fetch_both: - log.info( - "Attempting to fetch models from both Gemini Developer API and Vertex AI." - ) - gemini_models_list: list[types.Model] = [] - vertex_models_list: list[types.Model] = [] - - # TODO: perf, consider parallelizing these two fetches - # 1. Fetch from Gemini Developer API - try: - gemini_client = self._get_or_create_genai_client( - api_key=api_key, - base_url=base_url, - use_vertex_ai=False, # Explicitly target Gemini API - vertex_project=None, - vertex_location=None, - ) - gemini_models_list = await self._fetch_models_from_client_internal( - gemini_client, "Gemini Developer API" - ) - except GenaiApiError as e: - log.warning( - f"Failed to initialize or retrieve models from Gemini Developer API: {e}" - ) - except Exception as e: - log.warning( - f"An unexpected error occurred with Gemini Developer API models: {e}", - exc_info=True, - ) - - # 2. Fetch from Vertex AI - try: - vertex_client = self._get_or_create_genai_client( - use_vertex_ai=True, # Explicitly target Vertex AI - vertex_project=vertex_project, - vertex_location=vertex_location, - api_key=None, # API key is not used for Vertex AI with project auth - base_url=base_url, # Pass base_url for potential Vertex custom endpoints - ) - vertex_models_list = await self._fetch_models_from_client_internal( - vertex_client, "Vertex AI" - ) - except GenaiApiError as e: - log.warning( - f"Failed to initialize or retrieve models from Vertex AI: {e}" - ) - except Exception as e: - log.warning( - f"An unexpected error occurred with Vertex AI models: {e}", - exc_info=True, - ) - - # 3. Combine and de-duplicate - # Prioritize models from Gemini Developer API in case of ID collision - combined_models_dict: dict[str, types.Model] = {} - - for model in gemini_models_list: - if model.name: - model_id = Pipe.strip_prefix(model.name) - if model_id and model_id not in combined_models_dict: - combined_models_dict[model_id] = model - else: - log.trace( - f"Gemini model without a name encountered: {model.display_name or 'N/A'}" - ) - - for model in vertex_models_list: - if model.name: - model_id = Pipe.strip_prefix(model.name) - if model_id: - if model_id not in combined_models_dict: - combined_models_dict[model_id] = model - else: - log.info( - f"Duplicate model ID '{model_id}' from Vertex AI already sourced from Gemini API. Keeping Gemini API version." - ) - else: - log.trace( - f"Vertex AI model without a name encountered: {model.display_name or 'N/A'}" - ) - - all_raw_models = list(combined_models_dict.values()) - - log.info( - f"Fetched {len(gemini_models_list)} models from Gemini API, " - f"{len(vertex_models_list)} from Vertex AI. " - f"Combined to {len(all_raw_models)} unique models." - ) - - if not all_raw_models and (gemini_models_list or vertex_models_list): - log.warning( - "Models were fetched but resulted in an empty list after de-duplication, possibly due to missing names or empty/duplicate IDs." - ) - - if not all_raw_models and not gemini_models_list and not vertex_models_list: - raise GenaiApiError( - "Failed to retrieve models: Both Gemini Developer API and Vertex AI attempts yielded no models." - ) - - else: # Single source logic - # Determine if we are effectively using Vertex AI or Gemini API - # This depends on user's config (use_vertex_ai) and availability of project/key - client_target_is_vertex = bool(use_vertex_ai and vertex_project) - client_source_name = ( - "Vertex AI" if client_target_is_vertex else "Gemini Developer API" - ) - log.info( - f"Attempting to fetch models from a single source: {client_source_name}." - ) - - try: - client = self._get_or_create_genai_client( - api_key=api_key, - base_url=base_url, - use_vertex_ai=client_target_is_vertex, # Pass the determined target - vertex_project=vertex_project if client_target_is_vertex else None, - vertex_location=( - vertex_location if client_target_is_vertex else None - ), - ) - all_raw_models = await self._fetch_models_from_client_internal( - client, client_source_name - ) - - if not all_raw_models: - raise GenaiApiError( - f"No models retrieved from {client_source_name}. This could be due to an API error, network issue, or no models being available." - ) - - except GenaiApiError as e: - raise GenaiApiError( - f"Failed to get models from {client_source_name}: {e}" - ) from e - except Exception as e: - log.error( - f"An unexpected error occurred while configuring client or fetching models from {client_source_name}: {e}", - exc_info=True, - ) - raise GenaiApiError( - f"An unexpected error occurred while retrieving models from {client_source_name}: {e}" - ) from e - - # --- Common processing for all_raw_models --- - - if not all_raw_models: - log.warning("No models available after attempting all configured sources.") - return [] - - log.info(f"Processing {len(all_raw_models)} unique raw models.") - - generative_models: list[types.Model] = [] - for model in all_raw_models: - if model.name is None: - log.trace( - f"Skipping model with no name during generative filter: {model.display_name or 'N/A'}" - ) - continue - actions = model.supported_actions - if ( - actions is None or "generateContent" in actions - ): # Includes models if actions is None (e.g., Vertex) - generative_models.append(model) - else: - log.trace( - f"Model '{model.name}' (ID: {Pipe.strip_prefix(model.name)}) skipped, not generative (actions: {actions})." - ) - - if not generative_models: - log.warning( - "No generative models found after filtering all retrieved models." - ) - return [] - - def match_patterns( - name_to_check: str, list_of_patterns_str: str | None - ) -> bool: - if not list_of_patterns_str: - return False - patterns = [ - pat for pat in list_of_patterns_str.replace(" ", "").split(",") if pat - ] # Ensure pat is not empty - return any(fnmatch.fnmatch(name_to_check, pat) for pat in patterns) - - filtered_models_data: list["ModelData"] = [] - for model in generative_models: - # model.name is guaranteed non-None by generative_models filter logic - stripped_name = Pipe.strip_prefix(model.name) # type: ignore - - if not stripped_name: - log.warning( - f"Model '{model.name}' (display: {model.display_name}) resulted in an empty ID after stripping. Skipping." - ) - continue - - passes_whitelist = not whitelist_str or match_patterns( - stripped_name, whitelist_str - ) - passes_blacklist = not blacklist_str or not match_patterns( - stripped_name, blacklist_str - ) - - if passes_whitelist and passes_blacklist: - filtered_models_data.append( - { - "id": stripped_name, - "name": model.display_name or stripped_name, - "description": model.description, - } - ) - else: - log.trace( - f"Model ID '{stripped_name}' filtered out by whitelist/blacklist. Whitelist match: {passes_whitelist}, Blacklist pass: {passes_blacklist}" - ) - - log.info( - f"Filtered {len(generative_models)} generative models down to {len(filtered_models_data)} models based on white/blacklists." - ) - return filtered_models_data - - # TODO: Use cache for this method too? - async def _fetch_models_from_client_internal( - self, client: genai.Client, source_name: str - ) -> list[types.Model]: - """Helper to fetch models from a given client and handle common exceptions.""" - try: - google_models_pager = await client.aio.models.list( - config={"query_base": True} # Fetch base models by default - ) - models = [model async for model in google_models_pager] - log.info(f"Retrieved {len(models)} models from {source_name}.") - log.trace( - f"All models returned by {source_name}:", payload=models - ) # Can be verbose - return models - except Exception as e: - log.error(f"Retrieving models from {source_name} failed: {e}") - # Return empty list; caller decides if this is fatal for the whole operation. - return [] - - @staticmethod - def _return_error_model( - error_msg: str, warning: bool = False, exception: bool = True - ) -> "ModelData": - """Returns a placeholder model for communicating error inside the pipes method to the front-end.""" - if warning: - log.opt(depth=1, exception=False).warning(error_msg) - else: - log.opt(depth=1, exception=exception).error(error_msg) - return { - "id": "error", - "name": "[gemini_manifold] " + error_msg, - "description": error_msg, - } - - @staticmethod - def strip_prefix(model_name: str) -> str: - """ - Extract the model identifier using regex, handling various naming conventions. - e.g., "gemini_manifold_google_genai.gemini-2.5-flash-preview-04-17" -> "gemini-2.5-flash-preview-04-17" - e.g., "models/gemini-1.5-flash-001" -> "gemini-1.5-flash-001" - e.g., "publishers/google/models/gemini-1.5-pro" -> "gemini-1.5-pro" - """ - # Use regex to remove everything up to and including the last '/' or the first '.' - stripped = re.sub(r"^(?:.*/|[^.]*\.)", "", model_name) - return stripped - - @staticmethod - def _is_image_model(model_name: str, pattern: str) -> bool: - return bool(re.search(pattern, model_name, re.IGNORECASE)) - - # endregion 2.2 Model retrival from Google API - - # region 2.3 GenerateContentConfig assembly - - def _build_gen_content_config( - self, - body: "Body", - __metadata__: "Metadata", - valves: "Valves", - ) -> types.GenerateContentConfig: - """Assembles the GenerateContentConfig for a Gemini API request.""" - model_name = re.sub(r"^.*?[./]", "", body.get("model", "")) - features = __metadata__.get("features", {}) or {} - is_vertex_ai = __metadata__.get("is_vertex_ai", False) - - log.debug( - "Features extracted from metadata (UI toggles and config):", - payload=features - ) - - safety_settings: list[types.SafetySetting] | None = __metadata__.get( - "safety_settings" - ) - - thinking_conf = None - # Use the user-configurable regex to determine if this is a thinking model. - is_thinking_model = re.search( - valves.THINKING_MODEL_PATTERN, model_name, re.IGNORECASE - ) - log.debug( - f"Model '{model_name}' is classified as a reasoning model: {bool(is_thinking_model)}. " - f"Pattern: '{valves.THINKING_MODEL_PATTERN}'" - ) - - if is_thinking_model: - # Start with the default thinking configuration from valves. - log.info(f"Setting `thinking_budget` to {valves.THINKING_BUDGET} and `include_thoughts` to {valves.SHOW_THINKING_SUMMARY}.") - thinking_conf = types.ThinkingConfig( - thinking_budget=valves.THINKING_BUDGET, - include_thoughts=valves.SHOW_THINKING_SUMMARY, - ) - - # Check if reasoning can be disabled. This happens if the toggle is available but turned OFF by the user. - is_avail, is_on = self._get_toggleable_feature_status( - "gemini_reasoning_toggle", __metadata__ - ) - if is_avail and not is_on: - # This toggle is only applicable to flash/lite models, which support a budget of 0. - is_reasoning_toggleable = "flash" in model_name or "lite" in model_name - if is_reasoning_toggleable: - log.info( - f"Model '{model_name}' supports disabling reasoning, and it is toggled OFF in the UI. " - "Overwriting `thinking_budget` to 0 to disable reasoning." - ) - thinking_conf.thinking_budget = 0 - - # TODO: Take defaults from the general front-end config. - # system_instruction is intentionally left unset here. It will be set by the caller. - gen_content_conf = types.GenerateContentConfig( - temperature=body.get("temperature"), - top_p=body.get("top_p"), - top_k=body.get("top_k"), - max_output_tokens=body.get("max_tokens"), - stop_sequences=body.get("stop"), - safety_settings=safety_settings, - thinking_config=thinking_conf, - ) - - gen_content_conf.response_modalities = ["TEXT"] - if self._is_image_model(model_name, valves.IMAGE_MODEL_PATTERN): - gen_content_conf.response_modalities.append("IMAGE") - - gen_content_conf.tools = [] - - if features.get("google_search_tool"): - if valves.USE_ENTERPRISE_SEARCH and is_vertex_ai: - log.info("Using grounding with Enterprise Web Search as a Tool.") - gen_content_conf.tools.append( - types.Tool(enterprise_web_search=types.EnterpriseWebSearch()) - ) - else: - log.info("Using grounding with Google Search as a Tool.") - gen_content_conf.tools.append( - types.Tool(google_search=types.GoogleSearch()) - ) - elif features.get("google_search_retrieval"): - log.info("Using grounding with Google Search Retrieval.") - gs = types.GoogleSearchRetrieval( - dynamic_retrieval_config=types.DynamicRetrievalConfig( - dynamic_threshold=features.get("google_search_retrieval_threshold") - ) - ) - gen_content_conf.tools.append(types.Tool(google_search_retrieval=gs)) - - # NB: It is not possible to use both Search and Code execution at the same time, - # however, it can be changed later, so let's just handle it as a common error - if features.get("google_code_execution"): - log.info("Using code execution on Google side.") - gen_content_conf.tools.append( - types.Tool(code_execution=types.ToolCodeExecution()) - ) - - # Determine if URL context tool should be enabled. - is_avail, is_on = self._get_toggleable_feature_status( - "gemini_url_context_toggle", __metadata__ - ) - enable_url_context = valves.ENABLE_URL_CONTEXT_TOOL # Start with valve default. - if is_avail: - # If the toggle filter is configured, it overrides the valve setting. - enable_url_context = is_on - - if enable_url_context: - if model_name in COMPATIBLE_MODELS_FOR_URL_CONTEXT: - if is_vertex_ai and (len(gen_content_conf.tools) > 0): - log.warning( - "URL context tool is enabled, but Vertex AI is used with other tools. Skipping." - ) - else: - log.info( - f"Model {model_name} is compatible with URL context tool. Enabling." - ) - gen_content_conf.tools.append( - types.Tool(url_context=types.UrlContext()) - ) - else: - log.warning( - f"URL context tool is enabled, but model {model_name} is not in the compatible list. Skipping." - ) - - # Determine if Google Maps grounding should be enabled. - is_avail, is_on = self._get_toggleable_feature_status( - "gemini_maps_grounding_toggle", __metadata__ - ) - if is_avail and is_on: - log.info("Enabling Google Maps grounding tool.") - gen_content_conf.tools.append( - types.Tool(google_maps=types.GoogleMaps()) - ) - - if valves.MAPS_GROUNDING_COORDINATES: - try: - lat_str, lon_str = valves.MAPS_GROUNDING_COORDINATES.split(",") - latitude = float(lat_str.strip()) - longitude = float(lon_str.strip()) - - log.info( - "Using coordinates for Maps grounding: " - f"lat={latitude}, lon={longitude}" - ) - - lat_lng = types.LatLng(latitude=latitude, longitude=longitude) - - # Ensure tool_config and retrieval_config exist before assigning lat_lng. - if not gen_content_conf.tool_config: - gen_content_conf.tool_config = types.ToolConfig() - if not gen_content_conf.tool_config.retrieval_config: - gen_content_conf.tool_config.retrieval_config = ( - types.RetrievalConfig() - ) - - gen_content_conf.tool_config.retrieval_config.lat_lng = lat_lng - - except (ValueError, TypeError) as e: - # This should not happen due to the Pydantic validator, but it's good practice to be safe. - log.error( - "Failed to parse MAPS_GROUNDING_COORDINATES: " - f"'{valves.MAPS_GROUNDING_COORDINATES}'. Error: {e}" - ) - - return gen_content_conf - - # endregion 2.3 GenerateContentConfig assembly - - # region 2.4 Model response processing - async def _unified_response_processor( - self, - response_stream: AsyncIterator[types.GenerateContentResponse], - __request__: Request, - model: str, - event_emitter: EventEmitter, - user_id: str, - chat_id: str, - message_id: str, - start_time: float, - ) -> AsyncGenerator[dict, None]: - """ - Processes an async iterator of GenerateContentResponse objects, yielding - structured dictionary chunks for the Open WebUI frontend. - - This single method handles both streaming and non-streaming (via an adapter) - responses, eliminating code duplication. It processes all parts within each - response chunk, counts tag substitutions for a final toast notification, - and handles post-processing in a finally block. - """ - final_response_chunk: types.GenerateContentResponse | None = None - error_occurred = False - total_substitutions = 0 - first_chunk_received = False - chunk_counter = 0 - - try: - async for chunk in response_stream: - log.trace(f"Processing response chunk #{chunk_counter}:", payload=chunk) - chunk_counter += 1 - final_response_chunk = chunk # Keep the latest chunk for metadata - - if not first_chunk_received: - # This is the first (and possibly only) chunk. - elapsed_time = time.monotonic() - start_time - time_str = f"(+{elapsed_time:.2f}s)" - asyncio.create_task( - event_emitter.emit_status( - f"Response received {time_str}", - done=True, - ) - ) - first_chunk_received = True - - if not (parts := chunk.parts): - log.warning("Chunk has no parts, skipping.") - continue - - # This inner loop makes the method robust. It handles a single chunk - # with many parts (non-streaming) or many chunks with one part (streaming). - for part in parts: - payload, count = await self._process_part( - part, - __request__, - model, - user_id, - chat_id, - message_id, - is_stream=True, # We always yield chunks, so this is effectively true - ) - - if payload: - if count > 0: - total_substitutions += count - log.debug(f"Disabled {count} special tag(s) in a part.") - - structured_chunk = {"choices": [{"delta": payload}]} - yield structured_chunk - - except Exception as e: - error_occurred = True - error_msg = f"Response processing ended with error: {e}" - log.exception(error_msg) - await event_emitter.emit_error(error_msg) - - finally: - # The async for loop has completed, meaning we have received all data - # from the API. Now, we perform final internal processing. - - if total_substitutions > 0 and not error_occurred: - plural_s = "s" if total_substitutions > 1 else "" - toast_msg = ( - f"For clarity, {total_substitutions} special tag{plural_s} " - "were disabled in the response by injecting a zero-width space (ZWS)." - ) - event_emitter.emit_toast(toast_msg, "info") - - if not error_occurred: - yield "data: [DONE]" - log.info("Response processing finished successfully!") - - try: - await self._do_post_processing( - final_response_chunk, - event_emitter, - __request__, - chat_id=chat_id, - message_id=message_id, - stream_error_happened=error_occurred, - start_time=start_time, - ) - except Exception as e: - error_msg = f"Post-processing failed with error:\n\n{e}" - event_emitter.emit_toast(error_msg, "error") - log.exception(error_msg) - - log.debug("Unified response processor has finished.") - - async def _process_part( - self, - part: types.Part, - __request__: Request, - model: str, - user_id: str, - chat_id: str, - message_id: str, - is_stream: bool, - ) -> tuple[dict | None, int]: - """ - Processes a single `types.Part` object and returns a payload dictionary - for the Open WebUI stream, along with a count of tag substitutions. - """ - # Initialize variables to ensure they always have a defined state. - payload: dict[str, str] | None = None - count: int = 0 - key: str = "content" - - match part: - case types.Part(text=str(text), thought=True): - # It's a thought, so we'll use the "reasoning" key. - key = "reasoning" - sanitized_text, count = self._disable_special_tags(text) - - # For non-streaming responses, wrap the thought/reasoning block - # in details block manually for nice front-end rendering. - if not is_stream: - sanitized_text = f'\n
\nThinking...\n{sanitized_text}\n
\n' - - payload = {key: sanitized_text} - case types.Part(text=str(text)): - # It's regular content, using the default "content" key. - sanitized_text, count = self._disable_special_tags(text) - payload = {key: sanitized_text} - case types.Part(inline_data=data) if data: - # Image parts don't need tag disabling. - processed_text = await self._process_image_part( - data, model, user_id, chat_id, message_id, __request__ - ) - payload = {"content": processed_text} - case types.Part(executable_code=code) if code: - # Code blocks are already formatted and safe. - if processed_text := self._process_executable_code_part(code): - payload = {"content": processed_text} - case types.Part(code_execution_result=result) if result: - # Code results are also safe. - if processed_text := self._process_code_execution_result_part(result): - payload = {"content": processed_text} - - return payload, count - - @staticmethod - def _disable_special_tags(text: str) -> tuple[str, int]: - """ - Finds special tags in a text chunk and inserts a Zero-Width Space (ZWS) - to prevent them from being parsed by the Open WebUI backend's legacy system. - This is a safeguard against accidental tag generation by the model. - """ - if not text: - return "", 0 - - # The regex finds '<' followed by an optional '/' and then one of the special tags. - # The inner parentheses group the tags, so the optional '/' applies to all of them. - TAG_REGEX = re.compile( - r"<(/?" - + "(" - + "|".join(re.escape(tag) for tag in SPECIAL_TAGS_TO_DISABLE) - + ")" - + r")" - ) - # The substitution injects a ZWS, e.g., '' becomes ' str: - """ - Handles image data by saving it to the Open WebUI backend and returning a markdown link. - """ - mime_type = inline_data.mime_type - image_data = inline_data.data - - if mime_type and image_data: - image_url = await self._upload_image( - image_data=image_data, - mime_type=mime_type, - model=model, - user_id=user_id, - chat_id=chat_id, - message_id=message_id, - __request__=request, - ) - else: - log.warning( - "Image part has no mime_type or data, cannot upload image. " - "Returning a placeholder message." - ) - image_url = None - - return ( - f"![Generated Image]({image_url})" - if image_url - else "*An error occurred while trying to store this model generated image.*" - ) - - async def _upload_image( - self, - image_data: bytes, - mime_type: str, - model: str, - user_id: str, - chat_id: str, - message_id: str, - __request__: Request, - ) -> str | None: - """ - Helper method that uploads a generated image to the configured Open WebUI storage provider. - Returns the url to the uploaded image. - """ - image_format = mimetypes.guess_extension(mime_type) or ".png" - id = str(uuid.uuid4()) - name = f"generated-image{image_format}" - - # The final filename includes the unique ID to prevent collisions. - imagename = f"{id}_{name}" - image = io.BytesIO(image_data) - - # Create a clean, precise metadata object linking to the generation context. - image_metadata = { - "model": model, - "chat_id": chat_id, - "message_id": message_id, - } - - log.info("Uploading the model-generated image to the Open WebUI backend.") - - try: - contents, image_path = await asyncio.to_thread( - Storage.upload_file, image, imagename, tags={} - ) - except Exception: - log.exception("Error occurred during upload to the storage provider.") - return None - - log.debug("Adding the image file to the Open WebUI files database.") - file_item = await asyncio.to_thread( - Files.insert_new_file, - user_id, - FileForm( - id=id, - filename=name, - path=image_path, - meta={ - "name": name, - "content_type": mime_type, - "size": len(contents), - "data": image_metadata, - }, - ), - ) - if not file_item: - log.warning("Image upload to Open WebUI database likely failed.") - return None - - image_url: str = __request__.app.url_path_for( - "get_file_content_by_id", id=file_item.id - ) - log.success("Image upload finished!") - return image_url - - def _process_executable_code_part( - self, executable_code_part: types.ExecutableCode | None - ) -> str | None: - """ - Processes an executable code part and returns the formatted string representation. - """ - - if not executable_code_part: - return None - - lang_name = "python" # Default language - if executable_code_part_lang_enum := executable_code_part.language: - if lang_name := executable_code_part_lang_enum.name: - lang_name = executable_code_part_lang_enum.name.lower() - else: - log.warning( - f"Could not extract language name from {executable_code_part_lang_enum}. Default to python." - ) - else: - log.warning("Language Enum is None, defaulting to python.") - - if executable_code_part_code := executable_code_part.code: - return f"```{lang_name}\n{executable_code_part_code.rstrip()}\n```\n\n" - return "" - - def _process_code_execution_result_part( - self, code_execution_result_part: types.CodeExecutionResult | None - ) -> str | None: - """ - Processes a code execution result part and returns the formatted string representation. - """ - - if not code_execution_result_part: - return None - - if code_execution_result_part_output := code_execution_result_part.output: - return f"**Output:**\n\n```\n{code_execution_result_part_output.rstrip()}\n```\n\n" - else: - return None - - # endregion 2.4 Model response processing - - # region 2.5 Post-processing - async def _do_post_processing( - self, - model_response: types.GenerateContentResponse | None, - event_emitter: EventEmitter, - request: Request, - chat_id: str, - message_id: str, - *, - stream_error_happened: bool = False, - start_time: float, - ): - """Handles emitting usage, grounding, and sources after the main response/stream is done.""" - log.info("Post-processing the model response.") - - elapsed_time = time.monotonic() - start_time - time_str = f"(+{elapsed_time:.2f}s)" - - if stream_error_happened: - log.warning("Response processing failed due to stream error.") - await event_emitter.emit_status( - f"Response failed [Stream Error] {time_str}", done=True - ) - return - - if not model_response: - log.warning("Response processing skipped: Model response was empty.") - await event_emitter.emit_status( - f"Response failed [Empty Response] {time_str}", done=True - ) - return - - if not (candidate := self._get_first_candidate(model_response.candidates)): - log.warning("Response processing skipped: No candidates found.") - await event_emitter.emit_status( - f"Response failed [No Candidates] {time_str}", done=True - ) - return - - # --- Construct detailed finish reason message --- - reason_name = getattr(candidate.finish_reason, "name", "UNSPECIFIED") - reason_description = FINISH_REASON_DESCRIPTIONS.get(reason_name) - finish_message = ( - candidate.finish_message.strip() if candidate.finish_message else None - ) - - details_parts = [part for part in (reason_description, finish_message) if part] - details_str = f": {' '.join(details_parts)}" if details_parts else "" - full_finish_details = f"[{reason_name}]{details_str}" - - # --- Determine final status and emit toast for errors --- - is_normal_finish = candidate.finish_reason in NORMAL_REASONS - - if is_normal_finish: - log.debug(f"Response finished normally. {full_finish_details}") - status_prefix = "Response finished" - else: - log.error(f"Response finished with an error. {full_finish_details}") - status_prefix = "Response failed" - event_emitter.emit_toast( - f"An error occurred. {full_finish_details}", - "error", - ) - - # For the most common success case (STOP), we don't need to show the reason. - final_reason_str = "" if reason_name == "STOP" else f" [{reason_name}]" - await event_emitter.emit_status( - f"{status_prefix}{final_reason_str} {time_str}", - done=True, - is_successful_finish=is_normal_finish, - ) - - # TODO: Emit a toast message if url context retrieval was not successful. - - # --- Emit usage and grounding data --- - # Attempt to emit token usage data even if the finish reason was problematic, - # as usage data might still be available. - if usage_data := self._get_usage_data(model_response): - # Inject the total processing time into the usage payload. - usage_data["completion_time"] = round(elapsed_time, 2) - await event_emitter.emit_usage(usage_data) - - self._add_grounding_data_to_state( - model_response, request, chat_id, message_id, start_time - ) - - def _add_grounding_data_to_state( - self, - response: types.GenerateContentResponse, - request: Request, - chat_id: str, - message_id: str, - pipe_start_time: float, - ): - candidate = self._get_first_candidate(response.candidates) - grounding_metadata_obj = candidate.grounding_metadata if candidate else None - - app_state: State = request.app.state - grounding_key = f"grounding_{chat_id}_{message_id}" - time_key = f"pipe_start_time_{chat_id}_{message_id}" - - if grounding_metadata_obj: - log.debug( - f"Found grounding metadata. Storing in request's app state using key {grounding_key}." - ) - # Using shared `request.app.state` to pass data to Filter.outlet. - # This is necessary because the Pipe and Filter operate on different requests. - app_state._state[grounding_key] = grounding_metadata_obj - app_state._state[time_key] = pipe_start_time - else: - log.debug(f"Response {message_id} does not have grounding metadata.") - - @staticmethod - def _get_usage_data( - response: types.GenerateContentResponse, - ) -> dict[str, Any] | None: - """ - Extracts and cleans usage data from a GenerateContentResponse object. - Returns None if usage metadata is not present. - """ - if not response.usage_metadata: - log.warning( - "Usage metadata is missing from the response. Cannot determine usage." - ) - return None - - usage_data = response.usage_metadata.model_dump() - - # 1. Rename the three core required fields. - usage_data["prompt_tokens"] = usage_data.pop("prompt_token_count") - usage_data["completion_tokens"] = usage_data.pop("candidates_token_count") - usage_data["total_tokens"] = usage_data.pop("total_token_count") - - CORE_KEYS = {"prompt_tokens", "completion_tokens", "total_tokens"} - - # 2. Remove auxiliary keys that have falsy values (None, empty list, etc.). - # We must iterate over a copy of keys to safely delete items from the dict. - for k in list(usage_data.keys()): - if k in CORE_KEYS: - continue - - # If the value is falsy (None, 0, empty list), remove the key. - # This retains non-core data (like modality counts) if it exists. - if not usage_data[k]: - del usage_data[k] - - return usage_data - - # endregion 2.5 Post-processing - - # region 2.6 Logging - # TODO: Move to a separate plugin that does not have any Open WebUI funcitonlity and is only imported by this plugin. - - def _is_flat_dict(self, data: Any) -> bool: - """ - Checks if a dictionary contains only non-dict/non-list values (is one level deep). - """ - if not isinstance(data, dict): - return False - return not any(isinstance(value, (dict, list)) for value in data.values()) - - def _truncate_long_strings( - self, data: Any, max_len: int, truncation_marker: str, truncation_enabled: bool - ) -> Any: - """ - Recursively traverses a data structure (dicts, lists) and truncates - long string values. Creates copies to avoid modifying original data. - - Args: - data: The data structure (dict, list, str, int, float, bool, None) to process. - max_len: The maximum allowed length for string values. - truncation_marker: The string to append to truncated values. - truncation_enabled: Whether truncation is enabled. - - Returns: - A potentially new data structure with long strings truncated. - """ - if not truncation_enabled or max_len <= len(truncation_marker): - # If truncation is disabled or max_len is too small, return original - # Make a copy only if it's a mutable type we might otherwise modify - if isinstance(data, (dict, list)): - return copy.deepcopy(data) # Ensure deep copy for nested structures - return data # Primitives are immutable - - if isinstance(data, str): - if len(data) > max_len: - return data[: max_len - len(truncation_marker)] + truncation_marker - return data # Return original string if not truncated - elif isinstance(data, dict): - # Process dictionary items, creating a new dict - return { - k: self._truncate_long_strings( - v, max_len, truncation_marker, truncation_enabled - ) - for k, v in data.items() - } - elif isinstance(data, list): - # Process list items, creating a new list - return [ - self._truncate_long_strings( - item, max_len, truncation_marker, truncation_enabled - ) - for item in data - ] - else: - # Return non-string, non-container types as is (they are immutable) - return data - - def plugin_stdout_format(self, record: "Record") -> str: - """ - Custom format function for the plugin's logs. - Serializes and truncates data passed under the 'payload' key in extra. - """ - - # Configuration Keys - LOG_OPTIONS_PREFIX = "_log_" - TRUNCATION_ENABLED_KEY = f"{LOG_OPTIONS_PREFIX}truncation_enabled" - MAX_LENGTH_KEY = f"{LOG_OPTIONS_PREFIX}max_length" - TRUNCATION_MARKER_KEY = f"{LOG_OPTIONS_PREFIX}truncation_marker" - DATA_KEY = "payload" - - original_extra = record["extra"] - # Extract the data intended for serialization using the chosen key - data_to_process = original_extra.get(DATA_KEY) - - serialized_data_json = "" - if data_to_process is not None: - try: - serializable_data = pydantic_core.to_jsonable_python( - data_to_process, serialize_unknown=True - ) - - # Determine truncation settings - truncation_enabled = original_extra.get(TRUNCATION_ENABLED_KEY, True) - max_length = original_extra.get(MAX_LENGTH_KEY, 256) - truncation_marker = original_extra.get(TRUNCATION_MARKER_KEY, "[...]") - - # If max_length was explicitly provided, force truncation enabled - if MAX_LENGTH_KEY in original_extra: - truncation_enabled = True - - # Truncate long strings - truncated_data = self._truncate_long_strings( - serializable_data, - max_length, - truncation_marker, - truncation_enabled, - ) - - # Serialize the (potentially truncated) data - if self._is_flat_dict(truncated_data) and not isinstance( - truncated_data, list - ): - json_string = json.dumps( - truncated_data, separators=(",", ":"), default=str - ) - # Add a simple prefix if it's compact - serialized_data_json = " - " + json_string - else: - json_string = json.dumps(truncated_data, indent=2, default=str) - # Prepend with newline for readability - serialized_data_json = "\n" + json_string - - except (TypeError, ValueError) as e: # Catch specific serialization errors - serialized_data_json = f" - {{Serialization Error: {e}}}" - except ( - Exception - ) as e: # Catch any other unexpected errors during processing - serialized_data_json = f" - {{Processing Error: {e}}}" - - # Add the final JSON string (or error message) back into the record - record["extra"]["_plugin_serialized_data"] = serialized_data_json - - # Base template - base_template = ( - "{time:YYYY-MM-DD HH:mm:ss.SSS} | " - "{level: <8} | " - "{name}:{function}:{line} - " - "{message}" - ) - - # Append the serialized data - base_template += "{extra[_plugin_serialized_data]}" - # Append the exception part - base_template += "\n{exception}" - # Return the format string template - return base_template.rstrip() - - @cache - def _add_log_handler(self, log_level: str): - """ - Adds or updates the loguru handler specifically for this plugin. - Includes logic for serializing and truncating extra data. - The handler is added only if the log_level has changed since the last call. - """ - - def plugin_filter(record: "Record"): - """Filter function to only allow logs from this plugin (based on module name).""" - return record["name"] == __name__ - - # Get the desired level name and number - desired_level_name = log_level - try: - # Use the public API to get level details - desired_level_info = log.level(desired_level_name) - desired_level_no = desired_level_info.no - except ValueError: - log.error( - f"Invalid LOG_LEVEL '{desired_level_name}' configured for plugin {__name__}. Cannot add/update handler." - ) - return # Stop processing if the level is invalid - - # Access the internal state of the log - handlers: dict[int, "Handler"] = log._core.handlers # type: ignore - handler_id_to_remove = None - found_correct_handler = False - - for handler_id, handler in handlers.items(): - existing_filter = handler._filter # Access internal attribute - - # Check if the filter matches our plugin_filter - # Comparing function objects directly can be fragile if they are recreated. - # Comparing by name and module is more robust for functions defined at module level. - is_our_filter = ( - existing_filter is not None # Make sure a filter is set - and hasattr(existing_filter, "__name__") - and existing_filter.__name__ == plugin_filter.__name__ - and hasattr(existing_filter, "__module__") - and existing_filter.__module__ == plugin_filter.__module__ - ) - - if is_our_filter: - existing_level_no = handler.levelno - log.trace( - f"Found existing handler {handler_id} for {__name__} with level number {existing_level_no}." - ) - - # Check if the level matches the desired level - if existing_level_no == desired_level_no: - log.debug( - f"Handler {handler_id} for {__name__} already exists with the correct level '{desired_level_name}'." - ) - found_correct_handler = True - break # Found the correct handler, no action needed - else: - # Found our handler, but the level is wrong. Mark for removal. - log.info( - f"Handler {handler_id} for {__name__} found, but log level differs " - f"(existing: {existing_level_no}, desired: {desired_level_no}). " - f"Removing it to update." - ) - handler_id_to_remove = handler_id - break # Found the handler to replace, stop searching - - # Remove the old handler if marked for removal - if handler_id_to_remove is not None: - try: - log.remove(handler_id_to_remove) - log.debug(f"Removed handler {handler_id_to_remove} for {__name__}.") - except ValueError: - # This might happen if the handler was somehow removed between the check and now - log.warning( - f"Could not remove handler {handler_id_to_remove} for {__name__}. It might have already been removed." - ) - # If removal failed but we intended to remove, we should still proceed to add - # unless found_correct_handler is somehow True (which it shouldn't be if handler_id_to_remove was set). - - # Add a new handler if no correct one was found OR if we just removed an incorrect one - if not found_correct_handler: - log.add( - sys.stdout, - level=desired_level_name, - format=self.plugin_stdout_format, - filter=plugin_filter, - ) - log.debug( - f"Added new handler to loguru for {__name__} with level {desired_level_name}." - ) - - # endregion 2.6 Logging - - # region 2.7 Utility helpers - - @staticmethod - def _get_toggleable_feature_status( - filter_id: str, - __metadata__: "Metadata", - ) -> tuple[bool, bool]: - """ - Checks the complete status of a toggleable filter (function). - - This function performs a series of checks to determine if a feature - is available for use and if the user has activated it. - - 1. Checks if the filter is installed. - 2. Checks if the filter's master toggle is active in the Functions dashboard. - 3. Checks if the filter is enabled for the current model (or is global). - 4. Checks if the user has toggled the feature ON for the current request. - - Args: - filter_id: The ID of the filter to check. - __metadata__: The metadata object for the current request. - - Returns: - A tuple (is_available: bool, is_toggled_on: bool). - - is_available: True if the filter is installed, active, and configured for the model. - - is_toggled_on: True if the user has the toggle ON in the UI for this request. - """ - # 1. Check if the filter is installed - f = Functions.get_function_by_id(filter_id) - if not f: - log.warning( - f"The '{filter_id}' filter is not installed. " - "Install it to use the corresponding front-end toggle." - ) - return (False, False) - - # 2. Check if the master toggle is active - if not f.is_active: - log.warning( - f"The '{filter_id}' filter is installed but is currently disabled in the " - "Functions dashboard (master toggle is off). Enable it to make it available." - ) - return (False, False) - - # 3. Check if the filter is enabled for the model or is global - model_info = __metadata__.get("model", {}).get("info", {}) - model_filter_ids = model_info.get("meta", {}).get("filterIds", []) - is_enabled_for_model = filter_id in model_filter_ids or f.is_global - - log.debug( - f"Checking model enablement for '{filter_id}': in_model_filters={filter_id in model_filter_ids}, " - f"is_global={f.is_global} -> is_enabled={is_enabled_for_model}" - ) - - if not is_enabled_for_model: - # This is a configuration issue, not a user-facing warning. Debug is appropriate. - model_id = __metadata__.get("model", {}).get("id", "Unknown") - log.debug(f"Filter '{filter_id}' is not enabled for model '{model_id}' and is not global.") - return (False, False) - - # 4. Check if the user has toggled the feature ON for this request - user_toggled_ids = __metadata__.get("filter_ids", []) - is_toggled_on = filter_id in user_toggled_ids - - if is_toggled_on: - log.info( - f"Feature '{filter_id}' is available and enabled by the front-end toggle for this request." - ) - else: - log.debug( - f"Feature '{filter_id}' is available but not enabled by the front-end toggle for this request." - ) - - return (True, is_toggled_on) - - @staticmethod - def _get_merged_valves( - default_valves: "Pipe.Valves", - user_valves: "Pipe.UserValves | None", - user_email: str, - ) -> "Pipe.Valves": - """ - Merges UserValves into a base Valves configuration. - - The general rule is that if a field in UserValves is not None, it overrides - the corresponding field in the default_valves. Otherwise, the default_valves - field value is used. - - Exceptions: - - If default_valves.USER_MUST_PROVIDE_AUTH_CONFIG is True, then GEMINI_API_KEY and - VERTEX_PROJECT in the merged result will be taken directly from - user_valves (even if they are None), ignoring the values in default_valves. - - Args: - default_valves: The base Valves object with default configurations. - user_valves: An optional UserValves object with user-specific overrides. - If None, a copy of default_valves is returned. - - Returns: - A new Valves object representing the merged configuration. - """ - if user_valves is None: - # If no user-specific valves are provided, return a copy of the default valves. - return default_valves.model_copy(deep=True) - - # Start with the values from the base `Valves` - merged_data = default_valves.model_dump() - - # Override with non-None values from `UserValves` - # Iterate over fields defined in the UserValves model - for field_name in Pipe.UserValves.model_fields: - # getattr is safe as field_name comes from model_fields of user_valves' type - user_value = getattr(user_valves, field_name) - if user_value is not None and user_value != "": - # Only update if the field is also part of the main Valves model - # (keys of merged_data are fields of default_valves) - if field_name in merged_data: - merged_data[field_name] = user_value - - user_whitelist = ( - default_valves.AUTH_WHITELIST.split(",") - if default_valves.AUTH_WHITELIST - else [] - ) - - # Apply special logic based on default_valves.USER_MUST_PROVIDE_AUTH_CONFIG - if ( - default_valves.USER_MUST_PROVIDE_AUTH_CONFIG - and user_email not in user_whitelist - ): - # If USER_MUST_PROVIDE_AUTH_CONFIG is True and user is not in the whitelist, - # then user must provide their own GEMINI_API_KEY - # User is disallowed from using Vertex AI in this case. - merged_data["GEMINI_API_KEY"] = user_valves.GEMINI_API_KEY - merged_data["VERTEX_PROJECT"] = None - merged_data["USE_VERTEX_AI"] = False - - # Create a new Valves instance with the merged data. - # Pydantic will validate the data against the Valves model definition during instantiation. - return Pipe.Valves(**merged_data) - - def _get_first_candidate( - self, candidates: list[types.Candidate] | None - ) -> types.Candidate | None: - """Selects the first candidate, logging a warning if multiple exist.""" - if not candidates: - # Logging warnings is handled downstream. - return None - if len(candidates) > 1: - log.warning("Multiple candidates found, defaulting to first candidate.") - return candidates[0] - - def _check_companion_filter_version(self, features: "Features | dict") -> None: - """ - Checks for the presence and version compatibility of the Gemini Manifold Companion filter. - Logs warnings if the filter is missing or outdated. - """ - companion_version = features.get("gemini_manifold_companion_version") - - if companion_version is None: - log.warning( - "Gemini Manifold Companion filter not detected. " - "While this pipe can function without it, you are missing out on key features like native Google Search, " - "Code Execution, and direct document uploads. Please install the companion filter or ensure it is active " - "for this model to unlock the full functionality." - ) - else: - # Comparing tuples of integers is a robust way to handle versions like '1.10.0' vs '1.2.0'. - try: - companion_v_tuple = tuple(map(int, companion_version.split("."))) - recommended_v_tuple = tuple( - map(int, RECOMMENDED_COMPANION_VERSION.split(".")) - ) - - if companion_v_tuple < recommended_v_tuple: - log.warning( - f"The installed Gemini Manifold Companion filter version ({companion_version}) is older than " - f"the recommended version ({RECOMMENDED_COMPANION_VERSION}). " - "Some features may not work as expected. Please update the filter." - ) - else: - log.debug( - f"Gemini Manifold Companion filter detected with version: {companion_version}" - ) - except (ValueError, TypeError): - # This handles cases where the version string is malformed (e.g., '1.a.0'). - log.error( - f"Could not parse companion version string: '{companion_version}'. Version check skipped." - ) - - # endregion 2.7 Utility helpers - - # endregion 2. Helper methods inside the Pipe class