feat: 新增 Gemini 多模态过滤器,支持多种文件类型并提供视频字幕增强功能

This commit is contained in:
fujie
2025-12-29 03:41:53 +08:00
parent 1930d415ee
commit d1bc006098
2 changed files with 989 additions and 0 deletions

View File

@@ -0,0 +1,912 @@
"""
title: Gemini 多模态过滤器(含字幕增强)
author: Gemini Adapter
author_url: https://github.com/Fu-Jie
funding_url: https://github.com/Fu-Jie/awesome-openwebui
version: 0.3.2
description: >
一个强大的过滤器,为 OpenWebUI 中的任何模型提供多模态能力PDF、Office、图片、音频、视频等。
默认智能路由至 Gemini 进行分析/直连;当检测到“视频+字幕”需求时,会自动启用字幕精修专家生成高质量 SRT 作为上下文。
功能特性:
- **多模态支持**: 处理 PDF, Word, Excel, PowerPoint, EPUB, MP3, MP4 和图片。
- **智能路由**:
- **直连模式 (Direct Mode)**: 对于 Gemini 模型,文件直接传递(原生多模态)。
- **分析器模式 (Analyzer Mode)**: 对于非 Gemini 模型(如 DeepSeek, Llama文件由 Gemini 分析,结果注入为上下文。
- **持久上下文**: 利用 OpenWebUI 的 Chat ID 跨多轮对话维护会话历史。
- **数据库去重**: 自动记录已分析文件的哈希值,防止重复上传和分析,节省资源。
- **智能追问**: 支持针对已上传文档的纯文本追问,自动调用 Gemini 进行上下文分析。
配置项 (Valves):
- `gemini_adapter_url`: Gemini Adapter 服务的 URL。
- `target_model_keyword`: 用于识别 Gemini 模型的关键字(默认: "webgemini")。
- `mode`: "auto" (推荐), "direct" (直连), 或 "analyzer" (分析器)。
- `analyzer_base_model_id`: 用于文档分析的基础 Gemini 模型(默认: "gemini-3.0-pro")。
"""
import os
import asyncio
import requests
import base64
import mimetypes
import time
from typing import List, Union, Generator, Iterator, Optional
from pydantic import BaseModel, Field
import sqlalchemy
from sqlalchemy import (
create_engine,
Column,
String,
Text,
DateTime,
Integer,
Boolean,
JSON,
)
from sqlalchemy.ext.declarative import declarative_base
from sqlalchemy.orm import sessionmaker, Session
from datetime import datetime
Base = declarative_base()
class GeminiAnalysisHistory(Base):
"""记录已分析文件的历史,用于去重"""
__tablename__ = "gemini_analysis_history"
id = Column(Integer, primary_key=True, autoincrement=True)
chat_id = Column(String(255), nullable=False, index=True)
file_hash = Column(String(255), nullable=False, index=True)
file_id = Column(String(255), nullable=True) # 记录当时的 file_id 仅供参考
filename = Column(String(255), nullable=True)
analyzed_at = Column(DateTime, default=datetime.utcnow)
class File(Base):
"""OpenWebUI 现有的 file 表映射 (只读)"""
__tablename__ = "file"
id = Column(String, primary_key=True)
hash = Column(String)
filename = Column(String)
# 其他字段我们暂时不需要
class Filter:
class Valves(BaseModel):
priority: int = Field(
default=8, description="Priority level for the filter operations."
)
gemini_adapter_url: str = Field(
default="http://192.168.31.19:8197",
description="URL of the Gemini Adapter",
)
openwebui_upload_path: str = Field(
default="/app/backend/data/uploads",
description="Path to OpenWebUI uploads directory (mapped in Docker)",
)
supported_extensions: str = Field(
default=".pdf, .doc, .docx, .xls, .xlsx, .ppt, .pptx, .epub, .mp3, .wav, .mp4, .mov, .avi, .mkv, .webm",
description="Comma-separated list of supported file extensions",
)
target_model_keyword: str = Field(
default="webgemini",
description="Keyword to identify Gemini models (e.g., 'webgemini')",
)
mode: str = Field(
default="auto",
description="Operation mode: 'auto' (decide based on model), 'direct' (pass file URL), or 'analyzer' (analyze first)",
)
analyzer_base_model_id: str = Field(
default="gemini-3.0-pro",
description="Base Model ID to use for document analysis (Brain)",
)
analyzer_custom_model_id: str = Field(
default="gemini-analyzer",
description="Custom model ID for general document analysis (Gem)",
)
subtitle_custom_model_id: str = Field(
default="subtitle-master",
description="Custom model ID for subtitle extraction / SRT polishing",
)
subtitle_keywords: str = Field(
default="字幕,字幕精修,字幕提取,srt",
description="Comma-separated keywords to trigger subtitle flow when paired with video uploads",
)
subtitle_video_extensions: str = Field(
default=".mp4, .mov, .avi, .mkv, .webm",
description="Video extensions that will trigger subtitle flow when keywords are present",
)
upload_timeout_seconds: int = Field(
default=600, description="Timeout for adapter upload requests (seconds)"
)
analyze_timeout_seconds: int = Field(
default=600, description="Timeout for adapter analysis requests (seconds)"
)
max_retries: int = Field(
default=2, description="Max retry attempts for adapter calls"
)
retry_backoff_seconds: float = Field(
default=60.0, description="Backoff seconds between retries"
)
circuit_failure_threshold: int = Field(
default=3,
description="Number of consecutive failures before opening circuit",
)
circuit_reset_seconds: int = Field(
default=60, description="Cooldown seconds before closing circuit after trip"
)
def __init__(self):
self.valves = self.Valves()
self._db_engine = None
self._SessionLocal = None
self._init_database()
self._failure_count = 0
self._circuit_open_until = 0
def _circuit_open(self) -> bool:
now = time.time()
if now < self._circuit_open_until:
return True
return False
def _record_success(self):
self._failure_count = 0
self._circuit_open_until = 0
def _record_failure(self):
self._failure_count += 1
if self._failure_count >= self.valves.circuit_failure_threshold:
self._circuit_open_until = time.time() + self.valves.circuit_reset_seconds
print(
f"🚧 Circuit open for adapter calls ({self.valves.circuit_reset_seconds}s) after {self._failure_count} failures"
)
def _init_database(self):
"""初始化数据库连接和表"""
try:
database_url = os.getenv("DATABASE_URL")
if not database_url:
print(
"⚠️ DATABASE_URL not set. Deduplication will fall back to memory (unreliable)."
)
return
# Handle postgres protocol
if database_url.startswith("postgres://"):
database_url = database_url.replace("postgres://", "postgresql://", 1)
self._db_engine = create_engine(database_url, pool_pre_ping=True)
self._SessionLocal = sessionmaker(
autocommit=False, autoflush=False, bind=self._db_engine
)
# Create our history table if it doesn't exist
Base.metadata.create_all(bind=self._db_engine)
print("✅ Gemini Filter: Database initialized and tables checked.")
except Exception as e:
print(f"❌ Gemini Filter: Database initialization failed: {e}")
self._db_engine = None
def get_db(self):
if not self._SessionLocal:
return None
return self._SessionLocal()
def get_file_hash(self, file_id: str) -> Optional[str]:
"""从 OpenWebUI 的 file 表获取文件哈希"""
db = self.get_db()
if not db:
return None
try:
file_record = db.query(File).filter(File.id == file_id).first()
return file_record.hash if file_record else None
except Exception as e:
print(f"⚠️ Failed to query file hash: {e}")
return None
finally:
db.close()
def is_file_analyzed(self, chat_id: str, file_hash: str) -> bool:
"""检查文件是否已在当前会话中分析过"""
db = self.get_db()
if not db:
return False
try:
record = (
db.query(GeminiAnalysisHistory)
.filter(
GeminiAnalysisHistory.chat_id == chat_id,
GeminiAnalysisHistory.file_hash == file_hash,
)
.first()
)
return record is not None
except Exception as e:
print(f"⚠️ Failed to check analysis history: {e}")
return False
finally:
db.close()
def mark_file_analyzed(
self, chat_id: str, file_hash: str, file_id: str, filename: str
):
"""标记文件为已分析"""
db = self.get_db()
if not db:
return
try:
new_record = GeminiAnalysisHistory(
chat_id=chat_id, file_hash=file_hash, file_id=file_id, filename=filename
)
db.add(new_record)
db.commit()
print(
f"💾 Marked file {filename} (hash: {file_hash[:8]}...) as analyzed in chat {chat_id}"
)
except Exception as e:
print(f"⚠️ Failed to mark file as analyzed: {e}")
db.rollback()
finally:
db.close()
def has_analyzed_files_in_chat(self, chat_id: str) -> bool:
"""检查当前会话是否有任何已分析的文件(用于追问判断)"""
db = self.get_db()
if not db:
return False
try:
record = (
db.query(GeminiAnalysisHistory)
.filter(GeminiAnalysisHistory.chat_id == chat_id)
.first()
)
return record is not None
except Exception as e:
return False
finally:
db.close()
def upload_to_adapter(
self, file_content: bytes, filename: str, content_type: str
) -> str:
"""
Uploads file content to Gemini Adapter and returns the accessible URL.
"""
if self._circuit_open():
print("🚫 Upload circuit open, skipping upload")
return None
upload_url = f"{self.valves.gemini_adapter_url}/v1/files/upload"
timeout = self.valves.upload_timeout_seconds
retries = self.valves.max_retries
backoff = self.valves.retry_backoff_seconds
print(
f"📤 Uploading {filename} to {upload_url} (timeout={timeout}s, retries={retries})..."
)
for attempt in range(retries + 1):
try:
files = {"file": (filename, file_content, content_type)}
response = requests.post(upload_url, files=files, timeout=timeout)
response.raise_for_status()
result = response.json()
file_url = result.get("url")
print(f"✅ Upload success: {file_url}")
self._record_success()
return file_url
except Exception as e:
print(f"❌ Upload attempt {attempt+1} failed: {e}")
self._record_failure()
if attempt < retries:
time.sleep(backoff)
else:
print("🚫 Upload exhausted retries")
return None
def analyze_document(
self,
file_url: Optional[str],
user_message: str,
openwebui_chat_id: Optional[str] = None,
custom_model_id: Optional[str] = None,
subtitle_mode: bool = False,
) -> Optional[str]:
"""
Send file URL and user message to Gemini Adapter for analysis or subtitle extraction.
Uses OpenWebUI's chat_id directly, relying on Adapter's database for context persistence.
"""
print(
f"🧠 Analyzing/Chatting with Gemini. File: {file_url}, ChatID: {openwebui_chat_id}, SubtitleMode: {subtitle_mode}"
)
headers = {
"Content-Type": "application/json",
}
chat_url = f"{self.valves.gemini_adapter_url}/v1/chat/completions"
timeout = self.valves.analyze_timeout_seconds
retries = self.valves.max_retries
backoff = self.valves.retry_backoff_seconds
if self._circuit_open():
print("🚫 Analyze circuit open, skipping analysis")
return None
# Construct Prompt
# If file_url is present, it's likely the first turn or a new file.
# If file_url is None, it's a follow-up question.
if subtitle_mode:
base_prompt = (
"你是资深的字幕精修专家,负责将上传的音视频内容转写并打磨为高质量的 SRT 字幕。"
"务必保留技术名词,删除口癖,保证双语混排空格规范。"
"时间轴需要连续、准确,文本建议单行不超过 20 个中文字符。"
)
if file_url:
prompt = (
f"{base_prompt}\n\n请聆听/阅读附件内容,并依据用户需求生成标准 SRT\n"
f"用户问题:{user_message}"
)
content_list = [
{"type": "text", "text": prompt},
{"type": "image_url", "image_url": {"url": file_url}},
]
else:
prompt = (
f"{base_prompt}\n\n基于前文上下文,继续回答用户的新需求:{user_message}\n"
"若需要补全或修订已有字幕,请保持时间轴连续。"
)
content_list = [{"type": "text", "text": prompt}]
else:
if file_url:
prompt = f"请阅读附件文档,并针对以下问题进行详细分析和回答:\n\n用户问题:{user_message}\n\n请提供详尽的分析结果,这将作为上下文提供给另一个 AI 模型。"
content_list = [
{"type": "text", "text": prompt},
{"type": "image_url", "image_url": {"url": file_url}},
]
else:
# Follow-up question (no file needed, context is in session)
prompt = f"基于之前的文档内容,回答用户的新问题:{user_message}\n\n请提供详尽的回答,这将作为上下文提供给另一个 AI 模型。"
content_list = [{"type": "text", "text": prompt}]
# Construct request payload
payload = {
"model": self.valves.analyzer_base_model_id,
"custom_model_id": custom_model_id or self.valves.analyzer_custom_model_id,
"chat_id": openwebui_chat_id, # Pass OpenWebUI chat_id directly
"messages": [
{
"role": "user",
"content": content_list,
}
],
"stream": False,
}
persona = custom_model_id or self.valves.analyzer_custom_model_id
print(
f"🧠 Analyzing document with {self.valves.analyzer_base_model_id} (Persona: {persona})..."
)
for attempt in range(retries + 1):
try:
response = requests.post(chat_url, json=payload, timeout=timeout)
response.raise_for_status()
result = response.json()
analysis = result["choices"][0]["message"]["content"]
print(f"✅ Analysis complete ({len(analysis)} chars)")
self._record_success()
return analysis
except Exception as e:
print(f"❌ Analysis attempt {attempt+1} failed: {e}")
self._record_failure()
if attempt < retries:
time.sleep(backoff)
else:
print("🚫 Analysis exhausted retries")
return None
def process_url(self, url: str, should_process_images: bool = True) -> str:
"""
Process a URL (Base64 or HTTP), upload to adapter if needed, and return new URL.
"""
# 1. Handle Base64
if url.startswith("data:"):
try:
header, encoded = url.split(",", 1)
mime_type = header.split(";")[0].split(":")[1]
# Check if it's an image and if we should process it
is_image = "image" in mime_type
if is_image and not should_process_images:
return url
# Only process PDF, images, audio, or video (if allowed)
if (
"pdf" not in mime_type
and not is_image
and "audio" not in mime_type
and "video" not in mime_type
):
return url
# Decode
file_content = base64.b64decode(encoded)
extension = mimetypes.guess_extension(mime_type) or ".bin"
filename = f"upload{extension}"
# Upload
new_url = self.upload_to_adapter(file_content, filename, mime_type)
return new_url if new_url else url
except Exception as e:
print(f"❌ Error processing Base64: {e}")
return url
return url
async def emit_status(
self, __event_emitter__, description: str, done: bool = False
):
if __event_emitter__:
await __event_emitter__(
{
"type": "status",
"data": {
"description": description,
"done": done,
},
}
)
async def inlet(
self,
body: dict,
__event_emitter__=None,
__user__: Optional[dict] = None,
__model__: Optional[dict] = None,
__metadata__: Optional[dict] = None,
) -> dict:
print(f"🔍 Gemini PDF Filter: Inlet triggered")
# Extract Chat ID from metadata
openwebui_chat_id = None
if __metadata__:
openwebui_chat_id = __metadata__.get("chat_id")
print(f"🆔 OpenWebUI Chat ID: {openwebui_chat_id}")
# 1. Determine the actual model ID (handling custom models)
base_model_id = None
if __model__:
if "openai" in __model__:
base_model_id = __model__["openai"].get("id")
else:
base_model_id = __model__.get("info", {}).get("base_model_id")
# Fallback to body['model'] if base_model_id not found or __model__ is missing
current_model = base_model_id if base_model_id else body.get("model", "")
print(
f"🤖 Checking model: {current_model} (Target: {self.valves.target_model_keyword})"
)
# Check if model matches target keyword
is_target_model = (
self.valves.target_model_keyword.lower() in current_model.lower()
)
# Determine effective mode
effective_mode = self.valves.mode
if effective_mode == "auto":
if is_target_model:
effective_mode = "direct"
else:
effective_mode = "analyzer"
print(f"⚙️ Effective Mode: {effective_mode}")
# If not target model AND effective mode is NOT analyzer, skip.
# This means if we are in 'direct' mode (forced or auto), we only run for target models.
if not is_target_model and effective_mode != "analyzer":
return body
messages = body.get("messages", [])
if not messages:
return body
last_message = messages[-1]
if last_message.get("role") != "user":
return body
# Extract plain text from the last user message for intent detection
user_text_plain = last_message.get("content", "")
if isinstance(user_text_plain, list):
user_text_plain = " ".join(
[
item.get("text", "")
for item in user_text_plain
if item.get("type") == "text"
]
)
subtitle_keywords = [
kw.strip() for kw in self.valves.subtitle_keywords.split(",") if kw.strip()
]
is_subtitle_query = any(kw in user_text_plain for kw in subtitle_keywords)
video_exts = tuple(
ext.strip().lower()
for ext in self.valves.subtitle_video_extensions.split(",")
)
# 1. Process files list to bypass RAG
processed_any = False
if files := body.get("files"):
files_to_keep = []
# Parse supported extensions
supported_exts = tuple(
ext.strip().lower()
for ext in self.valves.supported_extensions.split(",")
)
for file_item in files:
# 尝试适配不同的 files 结构
file_obj = file_item.get("file", file_item)
# 获取文件名和 ID
filename = file_obj.get("filename") or file_obj.get("name") or ""
file_id = file_obj.get("id", "")
print(f"📄 Checking file: {filename} (ID: {file_id})")
# Database-backed deduplication
file_hash = None
if openwebui_chat_id and file_id:
file_hash = self.get_file_hash(file_id)
if file_hash:
if self.is_file_analyzed(openwebui_chat_id, file_hash):
print(
f"♻️ File {filename} (hash: {file_hash[:8]}...) already analyzed in chat {openwebui_chat_id}, skipping."
)
continue
else:
print(
f"⚠️ Could not find hash for file {file_id}, skipping deduplication check."
)
# Check if extension is supported
# IMPORTANT: If it's an image, only process if is_target_model is True
is_image = any(
filename.lower().endswith(ext)
for ext in [".jpg", ".jpeg", ".png", ".webp", ".gif"]
)
is_video = filename.lower().endswith(video_exts)
should_process = False
if filename.lower().endswith(supported_exts):
if is_image and not is_target_model:
print(f"⚠️ Skipping image for non-Gemini model: {filename}")
should_process = False
else:
should_process = True
# Subtitle flow: only if video + subtitle intent
use_subtitle_flow = should_process and is_video and is_subtitle_query
if should_process:
print(f"📄 Detected supported file: {filename}")
await self.emit_status(
__event_emitter__,
f"🚀 发现新文件: {filename},准备处理...",
done=False,
)
# Construct local path in OpenWebUI container
file_path = os.path.join(
self.valves.openwebui_upload_path, f"{file_id}_{filename}"
)
upload_success = False
try:
if os.path.exists(file_path):
print(f"📖 Reading local file: {file_path}")
with open(file_path, "rb") as f:
file_content = f.read()
# Determine mime type
mime_type, _ = mimetypes.guess_type(filename)
if not mime_type:
mime_type = "application/octet-stream"
if filename.lower().endswith(".pdf"):
mime_type = "application/pdf"
# Upload to Adapter
# Upload is fast (local network), so we skip the status update for it to avoid flickering.
# Use run_in_executor to avoid blocking the event loop with synchronous requests
loop = asyncio.get_running_loop()
adapter_url = await loop.run_in_executor(
None,
self.upload_to_adapter,
file_content,
filename,
mime_type,
)
if adapter_url:
# Mark file as processed in DB
if openwebui_chat_id and file_hash:
self.mark_file_analyzed(
openwebui_chat_id, file_hash, file_id, filename
)
# MODE SWITCH
if effective_mode == "analyzer":
# Analyzer Mode: Analyze first, then inject context
user_text = last_message.get("content", "")
if isinstance(user_text, list):
# Extract text from list content
user_text = " ".join(
[
item["text"]
for item in user_text
if item["type"] == "text"
]
)
await self.emit_status(
__event_emitter__,
(
"🧠 正在使用字幕精修专家生成高质量字幕..."
if use_subtitle_flow
else "🧠 正在使用 Gemini 深度分析文档内容 (这可能需要几十秒)..."
),
done=False,
)
# Give a small yield to ensure status is sent
await asyncio.sleep(0.1)
# Pass openwebui_chat_id to analyze_document
# Also run in executor to avoid blocking
custom_model = (
self.valves.subtitle_custom_model_id
if use_subtitle_flow
else self.valves.analyzer_custom_model_id
)
analysis_result = await loop.run_in_executor(
None,
self.analyze_document,
adapter_url,
user_text,
openwebui_chat_id,
custom_model,
use_subtitle_flow,
)
if analysis_result:
print(
f"✅ Injecting analysis result into context"
)
# Inject as a system message or prepend to user message
# Prepending to user message is usually safer for context retention
context_title = (
"【字幕精修结果SRT 建议)】"
if use_subtitle_flow
else "【文档分析结果】"
)
context_message = f"{context_title}\n{analysis_result}\n\n【用户问题】\n"
content = last_message.get("content", "")
if isinstance(content, str):
last_message["content"] = (
context_message + content
)
elif isinstance(content, list):
# Insert at the beginning of the list
content.insert(
0,
{
"type": "text",
"text": context_message,
},
)
last_message["content"] = content
upload_success = True
processed_any = True
await self.emit_status(
__event_emitter__,
(
"✅ 字幕生成完成,已注入上下文"
if use_subtitle_flow
else "✅ 文档分析完成,已注入上下文"
),
done=True,
)
else:
print("❌ Analysis failed, falling back to RAG")
await self.emit_status(
__event_emitter__,
f"❌ 分析失败,回退到 RAG 模式",
done=True,
)
# upload_success remains False, so it falls back to RAG
else:
# Direct Mode: Pass URL to model
print(f"✅ Adding file to messages: {adapter_url}")
content = last_message.get("content", "")
# Force Subtitle Gem if needed
if use_subtitle_flow:
if not body.get("custom_model_id"):
body["custom_model_id"] = (
self.valves.subtitle_custom_model_id
)
print(
f"🎬 Subtitle flow detected. Binding custom_model_id -> {body['custom_model_id']}"
)
# Ensure content is a list
if isinstance(content, str):
content = [{"type": "text", "text": content}]
content.append(
{
"type": "image_url",
"image_url": {"url": adapter_url},
}
)
last_message["content"] = content
upload_success = True
processed_any = True
await self.emit_status(
__event_emitter__,
f"🔗 文件已连接到 Gemini 模型 (Direct Mode)",
done=True,
)
else:
print(f"❌ Failed to upload file to adapter")
else:
print(f"❌ Local file not found: {file_path}")
except Exception as e:
print(f"❌ Error processing file {filename}: {e}")
await self.emit_status(
__event_emitter__, f"❌ 处理文件失败: {e}", done=True
)
# RAG Fallback: If upload failed, keep the file in the list so OpenWebUI handles it
# If uploaded successfully, add to keep list (bypass RAG)
# OR if we skipped it because it was already processed (in Analyzer mode), we also bypass RAG?
# Actually, if we skip analysis, we usually want to bypass RAG too if we rely on Gemini context.
# But if we skip, we 'continue'd loop, so we didn't reach here.
# Wait, if we continue'd, we didn't add to files_to_keep.
# So it will be removed from files list?
# Let's check logic:
# files_to_keep = []
# for file in files:
# if processed: continue
# if success: files_to_keep.append(file)
# body['files'] = files_to_keep
# If we skip, it's NOT added to files_to_keep. So it's removed from body['files'].
# This is CORRECT for Analyzer mode (we don't want RAG).
# This is ALSO correct for Direct mode (we don't want RAG).
if upload_success:
# We processed it, so we remove it from RAG list (by NOT adding to files_to_keep? No wait)
# The logic below says:
# if upload_success: pass (don't add to files_to_keep) -> removed from RAG
# else: files_to_keep.append(file_item) -> kept for RAG
pass
else:
files_to_keep.append(file_item)
else:
# Not a supported file type or image for non-Gemini model, keep it for RAG
files_to_keep.append(file_item)
# Update files list
# Always update if we have filtered anything, regardless of whether we processed new files
if len(files_to_keep) != len(files):
print(
f"🚫 Updating body['files']: {len(files)} -> {len(files_to_keep)}"
)
body["files"] = files_to_keep
# Handle Analyzer Mode follow-up questions (No new files processed)
# Only trigger if we are in analyzer mode AND we have processed files in this chat history
has_processed_files = openwebui_chat_id and self.has_analyzed_files_in_chat(
openwebui_chat_id
)
if effective_mode == "analyzer" and not processed_any and has_processed_files:
# Check if we have a chat_id (meaning context might exist)
if openwebui_chat_id:
print(f"🤔 Analyzer Mode follow-up detected (has context).")
user_text = last_message.get("content", "")
if isinstance(user_text, list):
user_text = " ".join(
[item["text"] for item in user_text if item["type"] == "text"]
)
# Only analyze if there is text content
if user_text.strip():
await self.emit_status(
__event_emitter__,
(
"🧠 正在使用字幕精修专家处理追问..."
if is_subtitle_query
else "🧠 正在使用 Gemini 分析追问..."
),
done=False,
)
loop = asyncio.get_running_loop()
followup_custom_model = (
self.valves.subtitle_custom_model_id
if is_subtitle_query
else self.valves.analyzer_custom_model_id
)
analysis_result = await loop.run_in_executor(
None,
self.analyze_document,
None,
user_text,
openwebui_chat_id,
followup_custom_model,
is_subtitle_query,
)
if analysis_result:
print(f"✅ Injecting follow-up analysis into context")
context_title = (
"【字幕精修专家回答】"
if is_subtitle_query
else "【文档分析助手回答】"
)
context_message = (
f"{context_title}\n{analysis_result}\n\n【用户新问题】\n"
)
content = last_message.get("content", "")
if isinstance(content, str):
last_message["content"] = context_message + content
elif isinstance(content, list):
content.insert(0, {"type": "text", "text": context_message})
last_message["content"] = content
await self.emit_status(
__event_emitter__,
f"✅ 追问分析完成,已注入上下文",
done=True,
)
else:
# If analysis fails or returns None (e.g. no session), we just let it pass.
# But wait, if analyze_document returns None, it might mean error.
# If it's a new chat with no files and no context, analyze_document might be confused?
# analyze_document handles file_url=None by using a specific prompt.
pass
# 2. Process existing 'image_url' in content (Base64)
# Only process if it's the target model (Gemini)
if is_target_model:
content = last_message.get("content", "")
if isinstance(content, list):
for item in content:
if item.get("type") == "image_url":
url = item.get("image_url", {}).get("url", "")
# Pass should_process_images=True because we already checked is_target_model
new_url = self.process_url(url, should_process_images=True)
if new_url != url:
item["image_url"]["url"] = new_url
print("✅ Message content updated with adapter URLs")
return body
def outlet(self, body: dict, __user__: dict = None) -> dict:
return body

View File

@@ -0,0 +1,77 @@
# Awesome OpenWebUI 插件开发总结
## 功能增强概述
这12个插件为 OpenWebUI 带来了全方位的功能增强,显著提升了用户体验和生产力:
### 📊 可视化能力增强
- **智能信息图**:将文本内容自动转换为专业的 AntV 可视化图表,支持多种模板(流程图、对比图、象限图等),并可导出 SVG/PNG/HTML
- **思维导图**:基于 Markmap 的交互式思维导图生成,帮助结构化知识和可视化思维
### 💾 数据处理能力
- **Excel 导出**:一键将对话中的 Markdown 表格导出为符合中国规范的 Excel 文件,自动识别数据类型并应用合适的对齐和格式
- **多模态文件处理**:支持 PDF、Office 文档、音视频等多种格式的智能分析,自动上传并调用 Gemini 进行内容理解
### 🧠 学习与分析增强
- **闪记卡**:快速提炼文本核心要点为精美的记忆卡片,支持分类标签和关键点提取
- **精读分析**:深度分析长文本,自动生成摘要、关键信息点和可执行的行动建议
### ⚡ 性能与上下文优化
- **异步上下文压缩**:自动压缩对话历史并生成摘要,支持数据库持久化,有效管理超长对话
- **上下文增强**自动注入环境变量、优化模型功能适配、智能清洗输出内容修复代码块、LaTeX 等)
- **多模型回答合并**:将多个 AI 模型的回答合并为统一上下文,提升 MoE模型混合专家场景的效果
### 🎬 专业场景支持
- **字幕增强**:自动识别视频+字幕需求,调用专门的字幕精修专家生成高质量 SRT 字幕
- **智能路由**:根据模型类型自动选择最佳处理方式(直连 Gemini 或通过分析器)
- **提示词优化**MoE 场景下自动优化提示词,提取原始问题和各模型回答
### 🔧 开发者体验
- **数据库去重**:自动记录已分析文件,避免重复处理,节省资源
- **会话持久化**:基于 Chat ID 维护跨多轮对话的上下文
- **智能追问**:支持针对已上传文档的纯文本追问,无需重复上传
---
## Actions (动作插件)
1. **📊 智能信息图 (infographic/信息图.py)** - 基于 AntV Infographic 的智能信息图生成插件,支持多种专业模板与 SVG/PNG 下载
2. **🧠 智绘心图 (smart-mind-map/思维导图.py)** - 智能分析文本内容生成交互式思维导图,帮助用户结构化和可视化知识
3. **📊 导出为 Excel (export_to_excel/导出为Excel.py)** - 将对话历史中的 Markdown 表格导出为符合中国规范的 Excel 文件
4. **⚡ 闪记卡 (knowledge-card/闪记卡.py)** - 快速将文本提炼为精美的学习记忆卡片,支持核心要点提取与分类
5. **📖 精读 (summary/精读.py)** - 深度分析长篇文本,提炼详细摘要、关键信息点和可执行的行动建议
## Filters (过滤器插件)
1. **🔄 异步上下文压缩 (async-context-compression/异步上下文压缩.py)** - 异步生成摘要并压缩对话历史,支持数据库持久化存储
2. **✨ 上下文增强过滤器 (context_enhancement_filter/context_enhancement_filter.py)** - 增强请求上下文和优化模型功能,包含环境变量管理、模型功能适配和内容清洗
3. **📊 合并回答 (multi_model_context_merger.py)** - 将多个匿名 AI 模型的回答合并并注入到当前请求的上下文中
4. **🎬 Gemini 多模态过滤器 (web_gemini_multimodel_filter/web_gemini_multimodel.py)** - 为任何模型提供多模态能力(PDF/Office/音视频),支持智能路由、字幕增强和上下文持久化
5. **🔮 Gemini Manifold Companion (gemini_manifold_companion/gemini_manifold_companion.py)** - Gemini Manifold 的配套过滤器,提供增强功能支持
## Pipelines (管道插件)
1. **🎯 MoE 提示词优化器 (moe_prompt_refiner.py)** - 优化多模型汇总请求的提示词,提取原始用户查询和各模型回答
## Pipes (管道接口插件)
1. **💎 Gemini Manifold (gemini_mainfold/gemini_manifold.py)** - Gemini Developer API 和 Vertex AI 的 Manifold 函数,使用 google-genai SDK 支持丰富功能
---
**统计信息:**
- Actions: 5个插件
- Filters: 5个插件
- Pipelines: 1个插件
- Pipes: 1个插件
- **总计: 12个插件**
**项目地址:** [Fu-Jie/awesome-openwebui](https://github.com/Fu-Jie/awesome-openwebui)