From d1bc0060989d800df937fb32facde1ea3ff8628d Mon Sep 17 00:00:00 2001 From: fujie Date: Mon, 29 Dec 2025 03:41:53 +0800 Subject: [PATCH] =?UTF-8?q?feat:=20=E6=96=B0=E5=A2=9E=20Gemini=20=E5=A4=9A?= =?UTF-8?q?=E6=A8=A1=E6=80=81=E8=BF=87=E6=BB=A4=E5=99=A8=EF=BC=8C=E6=94=AF?= =?UTF-8?q?=E6=8C=81=E5=A4=9A=E7=A7=8D=E6=96=87=E4=BB=B6=E7=B1=BB=E5=9E=8B?= =?UTF-8?q?=E5=B9=B6=E6=8F=90=E4=BE=9B=E8=A7=86=E9=A2=91=E5=AD=97=E5=B9=95?= =?UTF-8?q?=E5=A2=9E=E5=BC=BA=E5=8A=9F=E8=83=BD?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- .../web_gemini_multimodel.py | 912 ++++++++++++++++++ plugins/插件开发总结.md | 77 ++ 2 files changed, 989 insertions(+) create mode 100644 plugins/filters/web_gemini_multimodel_filter/web_gemini_multimodel.py create mode 100644 plugins/插件开发总结.md diff --git a/plugins/filters/web_gemini_multimodel_filter/web_gemini_multimodel.py b/plugins/filters/web_gemini_multimodel_filter/web_gemini_multimodel.py new file mode 100644 index 0000000..5862a9d --- /dev/null +++ b/plugins/filters/web_gemini_multimodel_filter/web_gemini_multimodel.py @@ -0,0 +1,912 @@ +""" +title: Gemini 多模态过滤器(含字幕增强) +author: Gemini Adapter +author_url: https://github.com/Fu-Jie +funding_url: https://github.com/Fu-Jie/awesome-openwebui +version: 0.3.2 +description: > + 一个强大的过滤器,为 OpenWebUI 中的任何模型提供多模态能力:PDF、Office、图片、音频、视频等。 + 默认智能路由至 Gemini 进行分析/直连;当检测到“视频+字幕”需求时,会自动启用字幕精修专家生成高质量 SRT 作为上下文。 + +功能特性: +- **多模态支持**: 处理 PDF, Word, Excel, PowerPoint, EPUB, MP3, MP4 和图片。 +- **智能路由**: + - **直连模式 (Direct Mode)**: 对于 Gemini 模型,文件直接传递(原生多模态)。 + - **分析器模式 (Analyzer Mode)**: 对于非 Gemini 模型(如 DeepSeek, Llama),文件由 Gemini 分析,结果注入为上下文。 +- **持久上下文**: 利用 OpenWebUI 的 Chat ID 跨多轮对话维护会话历史。 +- **数据库去重**: 自动记录已分析文件的哈希值,防止重复上传和分析,节省资源。 +- **智能追问**: 支持针对已上传文档的纯文本追问,自动调用 Gemini 进行上下文分析。 + +配置项 (Valves): +- `gemini_adapter_url`: Gemini Adapter 服务的 URL。 +- `target_model_keyword`: 用于识别 Gemini 模型的关键字(默认: "webgemini")。 +- `mode`: "auto" (推荐), "direct" (直连), 或 "analyzer" (分析器)。 +- `analyzer_base_model_id`: 用于文档分析的基础 Gemini 模型(默认: "gemini-3.0-pro")。 +""" + +import os +import asyncio +import requests +import base64 +import mimetypes +import time +from typing import List, Union, Generator, Iterator, Optional +from pydantic import BaseModel, Field +import sqlalchemy +from sqlalchemy import ( + create_engine, + Column, + String, + Text, + DateTime, + Integer, + Boolean, + JSON, +) +from sqlalchemy.ext.declarative import declarative_base +from sqlalchemy.orm import sessionmaker, Session +from datetime import datetime + +Base = declarative_base() + + +class GeminiAnalysisHistory(Base): + """记录已分析文件的历史,用于去重""" + + __tablename__ = "gemini_analysis_history" + + id = Column(Integer, primary_key=True, autoincrement=True) + chat_id = Column(String(255), nullable=False, index=True) + file_hash = Column(String(255), nullable=False, index=True) + file_id = Column(String(255), nullable=True) # 记录当时的 file_id 仅供参考 + filename = Column(String(255), nullable=True) + analyzed_at = Column(DateTime, default=datetime.utcnow) + + +class File(Base): + """OpenWebUI 现有的 file 表映射 (只读)""" + + __tablename__ = "file" + id = Column(String, primary_key=True) + hash = Column(String) + filename = Column(String) + + # 其他字段我们暂时不需要 + + +class Filter: + class Valves(BaseModel): + priority: int = Field( + default=8, description="Priority level for the filter operations." + ) + gemini_adapter_url: str = Field( + default="http://192.168.31.19:8197", + description="URL of the Gemini Adapter", + ) + openwebui_upload_path: str = Field( + default="/app/backend/data/uploads", + description="Path to OpenWebUI uploads directory (mapped in Docker)", + ) + supported_extensions: str = Field( + default=".pdf, .doc, .docx, .xls, .xlsx, .ppt, .pptx, .epub, .mp3, .wav, .mp4, .mov, .avi, .mkv, .webm", + description="Comma-separated list of supported file extensions", + ) + target_model_keyword: str = Field( + default="webgemini", + description="Keyword to identify Gemini models (e.g., 'webgemini')", + ) + mode: str = Field( + default="auto", + description="Operation mode: 'auto' (decide based on model), 'direct' (pass file URL), or 'analyzer' (analyze first)", + ) + analyzer_base_model_id: str = Field( + default="gemini-3.0-pro", + description="Base Model ID to use for document analysis (Brain)", + ) + analyzer_custom_model_id: str = Field( + default="gemini-analyzer", + description="Custom model ID for general document analysis (Gem)", + ) + subtitle_custom_model_id: str = Field( + default="subtitle-master", + description="Custom model ID for subtitle extraction / SRT polishing", + ) + subtitle_keywords: str = Field( + default="字幕,字幕精修,字幕提取,srt", + description="Comma-separated keywords to trigger subtitle flow when paired with video uploads", + ) + subtitle_video_extensions: str = Field( + default=".mp4, .mov, .avi, .mkv, .webm", + description="Video extensions that will trigger subtitle flow when keywords are present", + ) + upload_timeout_seconds: int = Field( + default=600, description="Timeout for adapter upload requests (seconds)" + ) + analyze_timeout_seconds: int = Field( + default=600, description="Timeout for adapter analysis requests (seconds)" + ) + max_retries: int = Field( + default=2, description="Max retry attempts for adapter calls" + ) + retry_backoff_seconds: float = Field( + default=60.0, description="Backoff seconds between retries" + ) + circuit_failure_threshold: int = Field( + default=3, + description="Number of consecutive failures before opening circuit", + ) + circuit_reset_seconds: int = Field( + default=60, description="Cooldown seconds before closing circuit after trip" + ) + + def __init__(self): + self.valves = self.Valves() + self._db_engine = None + self._SessionLocal = None + self._init_database() + self._failure_count = 0 + self._circuit_open_until = 0 + + def _circuit_open(self) -> bool: + now = time.time() + if now < self._circuit_open_until: + return True + return False + + def _record_success(self): + self._failure_count = 0 + self._circuit_open_until = 0 + + def _record_failure(self): + self._failure_count += 1 + if self._failure_count >= self.valves.circuit_failure_threshold: + self._circuit_open_until = time.time() + self.valves.circuit_reset_seconds + print( + f"🚧 Circuit open for adapter calls ({self.valves.circuit_reset_seconds}s) after {self._failure_count} failures" + ) + + def _init_database(self): + """初始化数据库连接和表""" + try: + database_url = os.getenv("DATABASE_URL") + if not database_url: + print( + "⚠️ DATABASE_URL not set. Deduplication will fall back to memory (unreliable)." + ) + return + + # Handle postgres protocol + if database_url.startswith("postgres://"): + database_url = database_url.replace("postgres://", "postgresql://", 1) + + self._db_engine = create_engine(database_url, pool_pre_ping=True) + self._SessionLocal = sessionmaker( + autocommit=False, autoflush=False, bind=self._db_engine + ) + + # Create our history table if it doesn't exist + Base.metadata.create_all(bind=self._db_engine) + print("✅ Gemini Filter: Database initialized and tables checked.") + except Exception as e: + print(f"❌ Gemini Filter: Database initialization failed: {e}") + self._db_engine = None + + def get_db(self): + if not self._SessionLocal: + return None + return self._SessionLocal() + + def get_file_hash(self, file_id: str) -> Optional[str]: + """从 OpenWebUI 的 file 表获取文件哈希""" + db = self.get_db() + if not db: + return None + try: + file_record = db.query(File).filter(File.id == file_id).first() + return file_record.hash if file_record else None + except Exception as e: + print(f"⚠️ Failed to query file hash: {e}") + return None + finally: + db.close() + + def is_file_analyzed(self, chat_id: str, file_hash: str) -> bool: + """检查文件是否已在当前会话中分析过""" + db = self.get_db() + if not db: + return False + try: + record = ( + db.query(GeminiAnalysisHistory) + .filter( + GeminiAnalysisHistory.chat_id == chat_id, + GeminiAnalysisHistory.file_hash == file_hash, + ) + .first() + ) + return record is not None + except Exception as e: + print(f"⚠️ Failed to check analysis history: {e}") + return False + finally: + db.close() + + def mark_file_analyzed( + self, chat_id: str, file_hash: str, file_id: str, filename: str + ): + """标记文件为已分析""" + db = self.get_db() + if not db: + return + try: + new_record = GeminiAnalysisHistory( + chat_id=chat_id, file_hash=file_hash, file_id=file_id, filename=filename + ) + db.add(new_record) + db.commit() + print( + f"💾 Marked file {filename} (hash: {file_hash[:8]}...) as analyzed in chat {chat_id}" + ) + except Exception as e: + print(f"⚠️ Failed to mark file as analyzed: {e}") + db.rollback() + finally: + db.close() + + def has_analyzed_files_in_chat(self, chat_id: str) -> bool: + """检查当前会话是否有任何已分析的文件(用于追问判断)""" + db = self.get_db() + if not db: + return False + try: + record = ( + db.query(GeminiAnalysisHistory) + .filter(GeminiAnalysisHistory.chat_id == chat_id) + .first() + ) + return record is not None + except Exception as e: + return False + finally: + db.close() + + def upload_to_adapter( + self, file_content: bytes, filename: str, content_type: str + ) -> str: + """ + Uploads file content to Gemini Adapter and returns the accessible URL. + """ + if self._circuit_open(): + print("🚫 Upload circuit open, skipping upload") + return None + + upload_url = f"{self.valves.gemini_adapter_url}/v1/files/upload" + timeout = self.valves.upload_timeout_seconds + retries = self.valves.max_retries + backoff = self.valves.retry_backoff_seconds + + print( + f"📤 Uploading {filename} to {upload_url} (timeout={timeout}s, retries={retries})..." + ) + + for attempt in range(retries + 1): + try: + files = {"file": (filename, file_content, content_type)} + response = requests.post(upload_url, files=files, timeout=timeout) + response.raise_for_status() + + result = response.json() + file_url = result.get("url") + print(f"✅ Upload success: {file_url}") + self._record_success() + return file_url + except Exception as e: + print(f"❌ Upload attempt {attempt+1} failed: {e}") + self._record_failure() + if attempt < retries: + time.sleep(backoff) + else: + print("🚫 Upload exhausted retries") + return None + + def analyze_document( + self, + file_url: Optional[str], + user_message: str, + openwebui_chat_id: Optional[str] = None, + custom_model_id: Optional[str] = None, + subtitle_mode: bool = False, + ) -> Optional[str]: + """ + Send file URL and user message to Gemini Adapter for analysis or subtitle extraction. + Uses OpenWebUI's chat_id directly, relying on Adapter's database for context persistence. + """ + print( + f"🧠 Analyzing/Chatting with Gemini. File: {file_url}, ChatID: {openwebui_chat_id}, SubtitleMode: {subtitle_mode}" + ) + + headers = { + "Content-Type": "application/json", + } + + chat_url = f"{self.valves.gemini_adapter_url}/v1/chat/completions" + timeout = self.valves.analyze_timeout_seconds + retries = self.valves.max_retries + backoff = self.valves.retry_backoff_seconds + + if self._circuit_open(): + print("🚫 Analyze circuit open, skipping analysis") + return None + + # Construct Prompt + # If file_url is present, it's likely the first turn or a new file. + # If file_url is None, it's a follow-up question. + if subtitle_mode: + base_prompt = ( + "你是资深的字幕精修专家,负责将上传的音视频内容转写并打磨为高质量的 SRT 字幕。" + "务必保留技术名词,删除口癖,保证双语混排空格规范。" + "时间轴需要连续、准确,文本建议单行不超过 20 个中文字符。" + ) + if file_url: + prompt = ( + f"{base_prompt}\n\n请聆听/阅读附件内容,并依据用户需求生成标准 SRT:\n" + f"用户问题:{user_message}" + ) + content_list = [ + {"type": "text", "text": prompt}, + {"type": "image_url", "image_url": {"url": file_url}}, + ] + else: + prompt = ( + f"{base_prompt}\n\n基于前文上下文,继续回答用户的新需求:{user_message}\n" + "若需要补全或修订已有字幕,请保持时间轴连续。" + ) + content_list = [{"type": "text", "text": prompt}] + else: + if file_url: + prompt = f"请阅读附件文档,并针对以下问题进行详细分析和回答:\n\n用户问题:{user_message}\n\n请提供详尽的分析结果,这将作为上下文提供给另一个 AI 模型。" + content_list = [ + {"type": "text", "text": prompt}, + {"type": "image_url", "image_url": {"url": file_url}}, + ] + else: + # Follow-up question (no file needed, context is in session) + prompt = f"基于之前的文档内容,回答用户的新问题:{user_message}\n\n请提供详尽的回答,这将作为上下文提供给另一个 AI 模型。" + content_list = [{"type": "text", "text": prompt}] + + # Construct request payload + payload = { + "model": self.valves.analyzer_base_model_id, + "custom_model_id": custom_model_id or self.valves.analyzer_custom_model_id, + "chat_id": openwebui_chat_id, # Pass OpenWebUI chat_id directly + "messages": [ + { + "role": "user", + "content": content_list, + } + ], + "stream": False, + } + + persona = custom_model_id or self.valves.analyzer_custom_model_id + print( + f"🧠 Analyzing document with {self.valves.analyzer_base_model_id} (Persona: {persona})..." + ) + for attempt in range(retries + 1): + try: + response = requests.post(chat_url, json=payload, timeout=timeout) + response.raise_for_status() + result = response.json() + analysis = result["choices"][0]["message"]["content"] + print(f"✅ Analysis complete ({len(analysis)} chars)") + self._record_success() + return analysis + except Exception as e: + print(f"❌ Analysis attempt {attempt+1} failed: {e}") + self._record_failure() + if attempt < retries: + time.sleep(backoff) + else: + print("🚫 Analysis exhausted retries") + return None + + def process_url(self, url: str, should_process_images: bool = True) -> str: + """ + Process a URL (Base64 or HTTP), upload to adapter if needed, and return new URL. + """ + # 1. Handle Base64 + if url.startswith("data:"): + try: + header, encoded = url.split(",", 1) + mime_type = header.split(";")[0].split(":")[1] + + # Check if it's an image and if we should process it + is_image = "image" in mime_type + if is_image and not should_process_images: + return url + + # Only process PDF, images, audio, or video (if allowed) + if ( + "pdf" not in mime_type + and not is_image + and "audio" not in mime_type + and "video" not in mime_type + ): + return url + + # Decode + file_content = base64.b64decode(encoded) + extension = mimetypes.guess_extension(mime_type) or ".bin" + filename = f"upload{extension}" + + # Upload + new_url = self.upload_to_adapter(file_content, filename, mime_type) + return new_url if new_url else url + + except Exception as e: + print(f"❌ Error processing Base64: {e}") + return url + return url + + async def emit_status( + self, __event_emitter__, description: str, done: bool = False + ): + if __event_emitter__: + await __event_emitter__( + { + "type": "status", + "data": { + "description": description, + "done": done, + }, + } + ) + + async def inlet( + self, + body: dict, + __event_emitter__=None, + __user__: Optional[dict] = None, + __model__: Optional[dict] = None, + __metadata__: Optional[dict] = None, + ) -> dict: + print(f"🔍 Gemini PDF Filter: Inlet triggered") + + # Extract Chat ID from metadata + openwebui_chat_id = None + if __metadata__: + openwebui_chat_id = __metadata__.get("chat_id") + print(f"🆔 OpenWebUI Chat ID: {openwebui_chat_id}") + + # 1. Determine the actual model ID (handling custom models) + base_model_id = None + if __model__: + if "openai" in __model__: + base_model_id = __model__["openai"].get("id") + else: + base_model_id = __model__.get("info", {}).get("base_model_id") + + # Fallback to body['model'] if base_model_id not found or __model__ is missing + current_model = base_model_id if base_model_id else body.get("model", "") + + print( + f"🤖 Checking model: {current_model} (Target: {self.valves.target_model_keyword})" + ) + + # Check if model matches target keyword + is_target_model = ( + self.valves.target_model_keyword.lower() in current_model.lower() + ) + + # Determine effective mode + effective_mode = self.valves.mode + if effective_mode == "auto": + if is_target_model: + effective_mode = "direct" + else: + effective_mode = "analyzer" + + print(f"⚙️ Effective Mode: {effective_mode}") + + # If not target model AND effective mode is NOT analyzer, skip. + # This means if we are in 'direct' mode (forced or auto), we only run for target models. + if not is_target_model and effective_mode != "analyzer": + return body + + messages = body.get("messages", []) + if not messages: + return body + + last_message = messages[-1] + if last_message.get("role") != "user": + return body + + # Extract plain text from the last user message for intent detection + user_text_plain = last_message.get("content", "") + if isinstance(user_text_plain, list): + user_text_plain = " ".join( + [ + item.get("text", "") + for item in user_text_plain + if item.get("type") == "text" + ] + ) + + subtitle_keywords = [ + kw.strip() for kw in self.valves.subtitle_keywords.split(",") if kw.strip() + ] + is_subtitle_query = any(kw in user_text_plain for kw in subtitle_keywords) + video_exts = tuple( + ext.strip().lower() + for ext in self.valves.subtitle_video_extensions.split(",") + ) + + # 1. Process files list to bypass RAG + processed_any = False + if files := body.get("files"): + files_to_keep = [] + + # Parse supported extensions + supported_exts = tuple( + ext.strip().lower() + for ext in self.valves.supported_extensions.split(",") + ) + + for file_item in files: + # 尝试适配不同的 files 结构 + file_obj = file_item.get("file", file_item) + + # 获取文件名和 ID + filename = file_obj.get("filename") or file_obj.get("name") or "" + file_id = file_obj.get("id", "") + + print(f"📄 Checking file: {filename} (ID: {file_id})") + + # Database-backed deduplication + file_hash = None + if openwebui_chat_id and file_id: + file_hash = self.get_file_hash(file_id) + if file_hash: + if self.is_file_analyzed(openwebui_chat_id, file_hash): + print( + f"♻️ File {filename} (hash: {file_hash[:8]}...) already analyzed in chat {openwebui_chat_id}, skipping." + ) + continue + else: + print( + f"⚠️ Could not find hash for file {file_id}, skipping deduplication check." + ) + + # Check if extension is supported + # IMPORTANT: If it's an image, only process if is_target_model is True + is_image = any( + filename.lower().endswith(ext) + for ext in [".jpg", ".jpeg", ".png", ".webp", ".gif"] + ) + is_video = filename.lower().endswith(video_exts) + should_process = False + + if filename.lower().endswith(supported_exts): + if is_image and not is_target_model: + print(f"⚠️ Skipping image for non-Gemini model: {filename}") + should_process = False + else: + should_process = True + + # Subtitle flow: only if video + subtitle intent + use_subtitle_flow = should_process and is_video and is_subtitle_query + + if should_process: + print(f"📄 Detected supported file: {filename}") + await self.emit_status( + __event_emitter__, + f"🚀 发现新文件: {filename},准备处理...", + done=False, + ) + + # Construct local path in OpenWebUI container + file_path = os.path.join( + self.valves.openwebui_upload_path, f"{file_id}_{filename}" + ) + + upload_success = False + try: + if os.path.exists(file_path): + print(f"📖 Reading local file: {file_path}") + with open(file_path, "rb") as f: + file_content = f.read() + + # Determine mime type + mime_type, _ = mimetypes.guess_type(filename) + if not mime_type: + mime_type = "application/octet-stream" + if filename.lower().endswith(".pdf"): + mime_type = "application/pdf" + + # Upload to Adapter + # Upload is fast (local network), so we skip the status update for it to avoid flickering. + + # Use run_in_executor to avoid blocking the event loop with synchronous requests + loop = asyncio.get_running_loop() + adapter_url = await loop.run_in_executor( + None, + self.upload_to_adapter, + file_content, + filename, + mime_type, + ) + + if adapter_url: + # Mark file as processed in DB + if openwebui_chat_id and file_hash: + self.mark_file_analyzed( + openwebui_chat_id, file_hash, file_id, filename + ) + + # MODE SWITCH + if effective_mode == "analyzer": + # Analyzer Mode: Analyze first, then inject context + user_text = last_message.get("content", "") + if isinstance(user_text, list): + # Extract text from list content + user_text = " ".join( + [ + item["text"] + for item in user_text + if item["type"] == "text" + ] + ) + + await self.emit_status( + __event_emitter__, + ( + "🧠 正在使用字幕精修专家生成高质量字幕..." + if use_subtitle_flow + else "🧠 正在使用 Gemini 深度分析文档内容 (这可能需要几十秒)..." + ), + done=False, + ) + # Give a small yield to ensure status is sent + await asyncio.sleep(0.1) + + # Pass openwebui_chat_id to analyze_document + # Also run in executor to avoid blocking + custom_model = ( + self.valves.subtitle_custom_model_id + if use_subtitle_flow + else self.valves.analyzer_custom_model_id + ) + analysis_result = await loop.run_in_executor( + None, + self.analyze_document, + adapter_url, + user_text, + openwebui_chat_id, + custom_model, + use_subtitle_flow, + ) + + if analysis_result: + print( + f"✅ Injecting analysis result into context" + ) + # Inject as a system message or prepend to user message + # Prepending to user message is usually safer for context retention + context_title = ( + "【字幕精修结果(SRT 建议)】" + if use_subtitle_flow + else "【文档分析结果】" + ) + context_message = f"{context_title}\n{analysis_result}\n\n【用户问题】\n" + + content = last_message.get("content", "") + if isinstance(content, str): + last_message["content"] = ( + context_message + content + ) + elif isinstance(content, list): + # Insert at the beginning of the list + content.insert( + 0, + { + "type": "text", + "text": context_message, + }, + ) + last_message["content"] = content + + upload_success = True + processed_any = True + await self.emit_status( + __event_emitter__, + ( + "✅ 字幕生成完成,已注入上下文" + if use_subtitle_flow + else "✅ 文档分析完成,已注入上下文" + ), + done=True, + ) + else: + print("❌ Analysis failed, falling back to RAG") + await self.emit_status( + __event_emitter__, + f"❌ 分析失败,回退到 RAG 模式", + done=True, + ) + # upload_success remains False, so it falls back to RAG + else: + # Direct Mode: Pass URL to model + print(f"✅ Adding file to messages: {adapter_url}") + content = last_message.get("content", "") + + # Force Subtitle Gem if needed + if use_subtitle_flow: + if not body.get("custom_model_id"): + body["custom_model_id"] = ( + self.valves.subtitle_custom_model_id + ) + print( + f"🎬 Subtitle flow detected. Binding custom_model_id -> {body['custom_model_id']}" + ) + + # Ensure content is a list + if isinstance(content, str): + content = [{"type": "text", "text": content}] + + content.append( + { + "type": "image_url", + "image_url": {"url": adapter_url}, + } + ) + last_message["content"] = content + + upload_success = True + processed_any = True + await self.emit_status( + __event_emitter__, + f"🔗 文件已连接到 Gemini 模型 (Direct Mode)", + done=True, + ) + else: + print(f"❌ Failed to upload file to adapter") + else: + print(f"❌ Local file not found: {file_path}") + + except Exception as e: + print(f"❌ Error processing file {filename}: {e}") + await self.emit_status( + __event_emitter__, f"❌ 处理文件失败: {e}", done=True + ) + + # RAG Fallback: If upload failed, keep the file in the list so OpenWebUI handles it + # If uploaded successfully, add to keep list (bypass RAG) + # OR if we skipped it because it was already processed (in Analyzer mode), we also bypass RAG? + # Actually, if we skip analysis, we usually want to bypass RAG too if we rely on Gemini context. + # But if we skip, we 'continue'd loop, so we didn't reach here. + # Wait, if we continue'd, we didn't add to files_to_keep. + # So it will be removed from files list? + # Let's check logic: + # files_to_keep = [] + # for file in files: + # if processed: continue + # if success: files_to_keep.append(file) + # body['files'] = files_to_keep + + # If we skip, it's NOT added to files_to_keep. So it's removed from body['files']. + # This is CORRECT for Analyzer mode (we don't want RAG). + # This is ALSO correct for Direct mode (we don't want RAG). + + if upload_success: + # We processed it, so we remove it from RAG list (by NOT adding to files_to_keep? No wait) + # The logic below says: + # if upload_success: pass (don't add to files_to_keep) -> removed from RAG + # else: files_to_keep.append(file_item) -> kept for RAG + pass + else: + files_to_keep.append(file_item) + else: + # Not a supported file type or image for non-Gemini model, keep it for RAG + files_to_keep.append(file_item) + + # Update files list + # Always update if we have filtered anything, regardless of whether we processed new files + if len(files_to_keep) != len(files): + print( + f"🚫 Updating body['files']: {len(files)} -> {len(files_to_keep)}" + ) + body["files"] = files_to_keep + + # Handle Analyzer Mode follow-up questions (No new files processed) + # Only trigger if we are in analyzer mode AND we have processed files in this chat history + has_processed_files = openwebui_chat_id and self.has_analyzed_files_in_chat( + openwebui_chat_id + ) + + if effective_mode == "analyzer" and not processed_any and has_processed_files: + # Check if we have a chat_id (meaning context might exist) + if openwebui_chat_id: + print(f"🤔 Analyzer Mode follow-up detected (has context).") + + user_text = last_message.get("content", "") + if isinstance(user_text, list): + user_text = " ".join( + [item["text"] for item in user_text if item["type"] == "text"] + ) + + # Only analyze if there is text content + if user_text.strip(): + await self.emit_status( + __event_emitter__, + ( + "🧠 正在使用字幕精修专家处理追问..." + if is_subtitle_query + else "🧠 正在使用 Gemini 分析追问..." + ), + done=False, + ) + + loop = asyncio.get_running_loop() + followup_custom_model = ( + self.valves.subtitle_custom_model_id + if is_subtitle_query + else self.valves.analyzer_custom_model_id + ) + analysis_result = await loop.run_in_executor( + None, + self.analyze_document, + None, + user_text, + openwebui_chat_id, + followup_custom_model, + is_subtitle_query, + ) + + if analysis_result: + print(f"✅ Injecting follow-up analysis into context") + context_title = ( + "【字幕精修专家回答】" + if is_subtitle_query + else "【文档分析助手回答】" + ) + context_message = ( + f"{context_title}\n{analysis_result}\n\n【用户新问题】\n" + ) + + content = last_message.get("content", "") + if isinstance(content, str): + last_message["content"] = context_message + content + elif isinstance(content, list): + content.insert(0, {"type": "text", "text": context_message}) + last_message["content"] = content + + await self.emit_status( + __event_emitter__, + f"✅ 追问分析完成,已注入上下文", + done=True, + ) + else: + # If analysis fails or returns None (e.g. no session), we just let it pass. + # But wait, if analyze_document returns None, it might mean error. + # If it's a new chat with no files and no context, analyze_document might be confused? + # analyze_document handles file_url=None by using a specific prompt. + pass + + # 2. Process existing 'image_url' in content (Base64) + # Only process if it's the target model (Gemini) + if is_target_model: + content = last_message.get("content", "") + if isinstance(content, list): + for item in content: + if item.get("type") == "image_url": + url = item.get("image_url", {}).get("url", "") + # Pass should_process_images=True because we already checked is_target_model + new_url = self.process_url(url, should_process_images=True) + if new_url != url: + item["image_url"]["url"] = new_url + print("✅ Message content updated with adapter URLs") + + return body + + def outlet(self, body: dict, __user__: dict = None) -> dict: + return body diff --git a/plugins/插件开发总结.md b/plugins/插件开发总结.md new file mode 100644 index 0000000..a688915 --- /dev/null +++ b/plugins/插件开发总结.md @@ -0,0 +1,77 @@ +# Awesome OpenWebUI 插件开发总结 + +## 功能增强概述 + +这12个插件为 OpenWebUI 带来了全方位的功能增强,显著提升了用户体验和生产力: + +### 📊 可视化能力增强 +- **智能信息图**:将文本内容自动转换为专业的 AntV 可视化图表,支持多种模板(流程图、对比图、象限图等),并可导出 SVG/PNG/HTML +- **思维导图**:基于 Markmap 的交互式思维导图生成,帮助结构化知识和可视化思维 + +### 💾 数据处理能力 +- **Excel 导出**:一键将对话中的 Markdown 表格导出为符合中国规范的 Excel 文件,自动识别数据类型并应用合适的对齐和格式 +- **多模态文件处理**:支持 PDF、Office 文档、音视频等多种格式的智能分析,自动上传并调用 Gemini 进行内容理解 + +### 🧠 学习与分析增强 +- **闪记卡**:快速提炼文本核心要点为精美的记忆卡片,支持分类标签和关键点提取 +- **精读分析**:深度分析长文本,自动生成摘要、关键信息点和可执行的行动建议 + +### ⚡ 性能与上下文优化 +- **异步上下文压缩**:自动压缩对话历史并生成摘要,支持数据库持久化,有效管理超长对话 +- **上下文增强**:自动注入环境变量、优化模型功能适配、智能清洗输出内容(修复代码块、LaTeX 等) +- **多模型回答合并**:将多个 AI 模型的回答合并为统一上下文,提升 MoE(模型混合专家)场景的效果 + +### 🎬 专业场景支持 +- **字幕增强**:自动识别视频+字幕需求,调用专门的字幕精修专家生成高质量 SRT 字幕 +- **智能路由**:根据模型类型自动选择最佳处理方式(直连 Gemini 或通过分析器) +- **提示词优化**:MoE 场景下自动优化提示词,提取原始问题和各模型回答 + +### 🔧 开发者体验 +- **数据库去重**:自动记录已分析文件,避免重复处理,节省资源 +- **会话持久化**:基于 Chat ID 维护跨多轮对话的上下文 +- **智能追问**:支持针对已上传文档的纯文本追问,无需重复上传 + +--- + +## Actions (动作插件) + +1. **📊 智能信息图 (infographic/信息图.py)** - 基于 AntV Infographic 的智能信息图生成插件,支持多种专业模板与 SVG/PNG 下载 + +2. **🧠 智绘心图 (smart-mind-map/思维导图.py)** - 智能分析文本内容生成交互式思维导图,帮助用户结构化和可视化知识 + +3. **📊 导出为 Excel (export_to_excel/导出为Excel.py)** - 将对话历史中的 Markdown 表格导出为符合中国规范的 Excel 文件 + +4. **⚡ 闪记卡 (knowledge-card/闪记卡.py)** - 快速将文本提炼为精美的学习记忆卡片,支持核心要点提取与分类 + +5. **📖 精读 (summary/精读.py)** - 深度分析长篇文本,提炼详细摘要、关键信息点和可执行的行动建议 + +## Filters (过滤器插件) + +1. **🔄 异步上下文压缩 (async-context-compression/异步上下文压缩.py)** - 异步生成摘要并压缩对话历史,支持数据库持久化存储 + +2. **✨ 上下文增强过滤器 (context_enhancement_filter/context_enhancement_filter.py)** - 增强请求上下文和优化模型功能,包含环境变量管理、模型功能适配和内容清洗 + +3. **📊 合并回答 (multi_model_context_merger.py)** - 将多个匿名 AI 模型的回答合并并注入到当前请求的上下文中 + +4. **🎬 Gemini 多模态过滤器 (web_gemini_multimodel_filter/web_gemini_multimodel.py)** - 为任何模型提供多模态能力(PDF/Office/音视频),支持智能路由、字幕增强和上下文持久化 + +5. **🔮 Gemini Manifold Companion (gemini_manifold_companion/gemini_manifold_companion.py)** - Gemini Manifold 的配套过滤器,提供增强功能支持 + +## Pipelines (管道插件) + +1. **🎯 MoE 提示词优化器 (moe_prompt_refiner.py)** - 优化多模型汇总请求的提示词,提取原始用户查询和各模型回答 + +## Pipes (管道接口插件) + +1. **💎 Gemini Manifold (gemini_mainfold/gemini_manifold.py)** - Gemini Developer API 和 Vertex AI 的 Manifold 函数,使用 google-genai SDK 支持丰富功能 + +--- + +**统计信息:** +- Actions: 5个插件 +- Filters: 5个插件 +- Pipelines: 1个插件 +- Pipes: 1个插件 +- **总计: 12个插件** + +**项目地址:** [Fu-Jie/awesome-openwebui](https://github.com/Fu-Jie/awesome-openwebui)