diff --git a/docs/plugins/filters/async-context-compression.md b/docs/plugins/filters/async-context-compression.md index 9dcb78c..7c93faf 100644 --- a/docs/plugins/filters/async-context-compression.md +++ b/docs/plugins/filters/async-context-compression.md @@ -1,7 +1,7 @@ # Async Context Compression Filter -v1.1.0 +v1.1.2 Reduces token consumption in long conversations through intelligent summarization while maintaining conversational coherence. @@ -29,6 +29,9 @@ This is especially useful for: - :material-clock-fast: **Async Processing**: Non-blocking background compression - :material-memory: **Context Preservation**: Keeps important information - :material-currency-usd-off: **Cost Reduction**: Minimize token usage +- :material-console: **Frontend Debugging**: Debug logs in browser console +- :material-alert-circle-check: **Enhanced Error Reporting**: Clear error status notifications +- :material-check-all: **Open WebUI v0.7.x Compatibility**: Dynamic DB session handling --- diff --git a/docs/plugins/filters/async-context-compression.zh.md b/docs/plugins/filters/async-context-compression.zh.md index c5600d0..2e20996 100644 --- a/docs/plugins/filters/async-context-compression.zh.md +++ b/docs/plugins/filters/async-context-compression.zh.md @@ -1,7 +1,7 @@ # Async Context Compression(异步上下文压缩) Filter -v1.1.0 +v1.1.2 通过智能摘要减少长对话的 token 消耗,同时保持对话连贯。 @@ -29,6 +29,9 @@ Async Context Compression 过滤器通过以下方式帮助管理长对话的 to - :material-clock-fast: **异步处理**:后台非阻塞压缩 - :material-memory: **保留上下文**:尽量保留重要信息 - :material-currency-usd-off: **降低成本**:减少 token 使用 +- :material-console: **前端调试**:支持浏览器控制台日志 +- :material-alert-circle-check: **增强错误报告**:清晰的错误状态通知 +- :material-check-all: **Open WebUI v0.7.x 兼容性**:动态数据库会话处理 --- diff --git a/docs/plugins/filters/index.md b/docs/plugins/filters/index.md index 669cdbf..bf56029 100644 --- a/docs/plugins/filters/index.md +++ b/docs/plugins/filters/index.md @@ -22,7 +22,7 @@ Filters act as middleware in the message pipeline: Reduces token consumption in long conversations through intelligent summarization while maintaining coherence. - **Version:** 1.1.0 + **Version:** 1.1.2 [:octicons-arrow-right-24: Documentation](async-context-compression.md) diff --git a/plugins/filters/async-context-compression/README.md b/plugins/filters/async-context-compression/README.md index bf33fd3..88d0607 100644 --- a/plugins/filters/async-context-compression/README.md +++ b/plugins/filters/async-context-compression/README.md @@ -1,9 +1,15 @@ # Async Context Compression Filter -**Author:** [Fu-Jie](https://github.com/Fu-Jie) | **Version:** 1.1.1 | **License:** MIT +**Author:** [Fu-Jie](https://github.com/Fu-Jie) | **Version:** 1.1.2 | **License:** MIT This filter reduces token consumption in long conversations through intelligent summarization and message compression while keeping conversations coherent. +## What's new in 1.1.2 + +- **Open WebUI v0.7.x Compatibility**: Resolved a critical database session binding error affecting Open WebUI v0.7.x users. The plugin now dynamically discovers the database engine and session context, ensuring compatibility across versions. +- **Enhanced Error Reporting**: Errors during background summary generation are now reported via both the status bar and browser console. +- **Robust Model Handling**: Improved handling of missing or invalid model IDs to prevent crashes. + ## What's new in 1.1.1 - **Frontend Debugging**: Added `show_debug_log` option to print debug info to the browser console (F12). diff --git a/plugins/filters/async-context-compression/README_CN.md b/plugins/filters/async-context-compression/README_CN.md index fa75cb5..58875ae 100644 --- a/plugins/filters/async-context-compression/README_CN.md +++ b/plugins/filters/async-context-compression/README_CN.md @@ -1,11 +1,17 @@ # 异步上下文压缩过滤器 -**作者:** [Fu-Jie](https://github.com/Fu-Jie) | **版本:** 1.1.1 | **许可证:** MIT +**作者:** [Fu-Jie](https://github.com/Fu-Jie) | **版本:** 1.1.2 | **许可证:** MIT > **重要提示**:为了确保所有过滤器的可维护性和易用性,每个过滤器都应附带清晰、完整的文档,以确保其功能、配置和使用方法得到充分说明。 本过滤器通过智能摘要和消息压缩技术,在保持对话连贯性的同时,显著降低长对话的 Token 消耗。 +## 1.1.2 版本更新 + +- **Open WebUI v0.7.x 兼容性**: 修复了影响 Open WebUI v0.7.x 用户的严重数据库会话绑定错误。插件现在动态发现数据库引擎和会话上下文,确保跨版本兼容性。 +- **增强错误报告**: 后台摘要生成过程中的错误现在会通过状态栏和浏览器控制台同时报告。 +- **健壮的模型处理**: 改进了对缺失或无效模型 ID 的处理,防止程序崩溃。 + ## 1.1.1 版本更新 - **前端调试**: 新增 `show_debug_log` 选项,支持在浏览器控制台 (F12) 打印调试信息。 diff --git a/plugins/filters/async-context-compression/async_context_compression.py b/plugins/filters/async-context-compression/async_context_compression.py index 26c13db..09355db 100644 --- a/plugins/filters/async-context-compression/async_context_compression.py +++ b/plugins/filters/async-context-compression/async_context_compression.py @@ -5,7 +5,7 @@ author: Fu-Jie author_url: https://github.com/Fu-Jie funding_url: https://github.com/Fu-Jie/awesome-openwebui description: Reduces token consumption in long conversations while maintaining coherence through intelligent summarization and message compression. -version: 1.1.1 +version: 1.1.2 openwebui_id: b1655bc8-6de9-4cad-8cb5-a6f7829a02ce license: MIT @@ -249,6 +249,7 @@ import asyncio import json import hashlib import time +import contextlib # Open WebUI built-in imports from open_webui.utils.chat import generate_chat_completion @@ -257,9 +258,10 @@ from fastapi.requests import Request from open_webui.main import app as webui_app # Open WebUI internal database (re-use shared connection) -from open_webui.internal.db import engine as owui_engine -from open_webui.internal.db import Session as owui_Session -from open_webui.internal.db import Base as owui_Base +try: + from open_webui.internal import db as owui_db +except ModuleNotFoundError: # pragma: no cover - filter runs inside Open WebUI + owui_db = None # Try to import tiktoken try: @@ -269,14 +271,91 @@ except ImportError: # Database imports from sqlalchemy import Column, String, Text, DateTime, Integer, inspect +from sqlalchemy.orm import declarative_base, sessionmaker +from sqlalchemy.engine import Engine from datetime import datetime +def _discover_owui_engine(db_module: Any) -> Optional[Engine]: + """Discover the Open WebUI SQLAlchemy engine via provided db module helpers.""" + if db_module is None: + return None + + db_context = getattr(db_module, "get_db_context", None) or getattr( + db_module, "get_db", None + ) + if callable(db_context): + try: + with db_context() as session: + try: + return session.get_bind() + except AttributeError: + return getattr(session, "bind", None) or getattr( + session, "engine", None + ) + except Exception as exc: + print(f"[DB Discover] get_db_context failed: {exc}") + + for attr in ("engine", "ENGINE", "bind", "BIND"): + candidate = getattr(db_module, attr, None) + if candidate is not None: + return candidate + + return None + + +def _discover_owui_schema(db_module: Any) -> Optional[str]: + """Discover the Open WebUI database schema name if configured.""" + if db_module is None: + return None + + try: + base = getattr(db_module, "Base", None) + metadata = getattr(base, "metadata", None) if base is not None else None + candidate = getattr(metadata, "schema", None) if metadata is not None else None + if isinstance(candidate, str) and candidate.strip(): + return candidate.strip() + except Exception as exc: + print(f"[DB Discover] Base metadata schema lookup failed: {exc}") + + try: + metadata_obj = getattr(db_module, "metadata_obj", None) + candidate = ( + getattr(metadata_obj, "schema", None) if metadata_obj is not None else None + ) + if isinstance(candidate, str) and candidate.strip(): + return candidate.strip() + except Exception as exc: + print(f"[DB Discover] metadata_obj schema lookup failed: {exc}") + + try: + from open_webui import env as owui_env + + candidate = getattr(owui_env, "DATABASE_SCHEMA", None) + if isinstance(candidate, str) and candidate.strip(): + return candidate.strip() + except Exception as exc: + print(f"[DB Discover] env schema lookup failed: {exc}") + + return None + + +owui_engine = _discover_owui_engine(owui_db) +owui_schema = _discover_owui_schema(owui_db) +owui_Base = getattr(owui_db, "Base", None) if owui_db is not None else None +if owui_Base is None: + owui_Base = declarative_base() + + class ChatSummary(owui_Base): """Chat Summary Storage Table""" __tablename__ = "chat_summary" - __table_args__ = {"extend_existing": True} + __table_args__ = ( + {"extend_existing": True, "schema": owui_schema} + if owui_schema + else {"extend_existing": True} + ) id = Column(Integer, primary_key=True, autoincrement=True) chat_id = Column(String(255), unique=True, nullable=False, index=True) @@ -289,14 +368,66 @@ class ChatSummary(owui_Base): class Filter: def __init__(self): self.valves = self.Valves() + self._owui_db = owui_db self._db_engine = owui_engine - self._SessionLocal = owui_Session self.temp_state = {} # Used to pass temporary data between inlet and outlet + self._fallback_session_factory = ( + sessionmaker(bind=self._db_engine) if self._db_engine else None + ) self._init_database() + @contextlib.contextmanager + def _db_session(self): + """Yield a database session using Open WebUI helpers with graceful fallbacks.""" + db_module = self._owui_db + db_context = None + if db_module is not None: + db_context = getattr(db_module, "get_db_context", None) or getattr( + db_module, "get_db", None + ) + + if callable(db_context): + with db_context() as session: + yield session + return + + factory = None + if db_module is not None: + factory = getattr(db_module, "SessionLocal", None) or getattr( + db_module, "ScopedSession", None + ) + if callable(factory): + session = factory() + try: + yield session + finally: + close = getattr(session, "close", None) + if callable(close): + close() + return + + if self._fallback_session_factory is None: + raise RuntimeError( + "Open WebUI database session is unavailable. Ensure Open WebUI's database layer is initialized." + ) + + session = self._fallback_session_factory() + try: + yield session + finally: + try: + session.close() + except Exception as exc: # pragma: no cover - best-effort cleanup + print(f"[Database] ⚠️ Failed to close fallback session: {exc}") + def _init_database(self): """Initializes the database table using Open WebUI's shared connection.""" try: + if self._db_engine is None: + raise RuntimeError( + "Open WebUI database engine is unavailable. Ensure Open WebUI is configured with a valid DATABASE_URL." + ) + # Check if table exists using SQLAlchemy inspect inspector = inspect(self._db_engine) if not inspector.has_table("chat_summary"): @@ -366,7 +497,7 @@ class Filter: def _save_summary(self, chat_id: str, summary: str, compressed_count: int): """Saves the summary to the database.""" try: - with self._SessionLocal() as session: + with self._db_session() as session: # Find existing record existing = session.query(ChatSummary).filter_by(chat_id=chat_id).first() @@ -406,7 +537,7 @@ class Filter: def _load_summary_record(self, chat_id: str) -> Optional[ChatSummary]: """Loads the summary record object from the database.""" try: - with self._SessionLocal() as session: + with self._db_session() as session: record = session.query(ChatSummary).filter_by(chat_id=chat_id).first() if record: # Detach the object from the session so it can be used after session close @@ -487,6 +618,26 @@ class Filter: "max_context_tokens": self.valves.max_context_tokens, } + def _extract_chat_id(self, body: dict, metadata: Optional[dict]) -> str: + """Extract chat_id from body or metadata.""" + if isinstance(body, dict): + chat_id = body.get("chat_id") + if isinstance(chat_id, str) and chat_id.strip(): + return chat_id.strip() + + body_metadata = body.get("metadata", {}) + if isinstance(body_metadata, dict): + chat_id = body_metadata.get("chat_id") + if isinstance(chat_id, str) and chat_id.strip(): + return chat_id.strip() + + if isinstance(metadata, dict): + chat_id = metadata.get("chat_id") + if isinstance(chat_id, str) and chat_id.strip(): + return chat_id.strip() + + return "" + def _inject_summary_to_first_message(self, message: dict, summary: str) -> dict: """Injects the summary into the first message (prepended to content).""" content = message.get("content", "") @@ -632,7 +783,15 @@ class Filter: Compression Strategy: Only responsible for injecting existing summaries, no Token calculation. """ messages = body.get("messages", []) - chat_id = __metadata__["chat_id"] + chat_id = self._extract_chat_id(body, __metadata__) + + if not chat_id: + await self._log( + "[Inlet] ❌ Missing chat_id in metadata, skipping compression", + type="error", + event_call=__event_call__, + ) + return body if self.valves.debug_mode or self.valves.show_debug_log: await self._log( @@ -747,7 +906,14 @@ class Filter: Executed after the LLM response is complete. Calculates Token count in the background and triggers summary generation (does not block current response, does not affect content output). """ - chat_id = __metadata__["chat_id"] + chat_id = self._extract_chat_id(body, __metadata__) + if not chat_id: + await self._log( + "[Outlet] ❌ Missing chat_id in metadata, skipping compression", + type="error", + event_call=__event_call__, + ) + return body model = body.get("model", "gpt-3.5-turbo") if self.valves.debug_mode or self.valves.show_debug_log: @@ -836,6 +1002,13 @@ class Filter: event_call=__event_call__, ) + def _clean_model_id(self, model_id: Optional[str]) -> Optional[str]: + """Cleans the model ID by removing whitespace and quotes.""" + if not model_id: + return None + cleaned = model_id.strip().strip('"').strip("'") + return cleaned if cleaned else None + async def _generate_summary_async( self, messages: list, @@ -892,9 +1065,17 @@ class Filter: # 3. Check Token limit and truncate (Max Context Truncation) # [Optimization] Use the summary model's (if any) threshold to decide how many middle messages can be processed # This allows using a long-window model (like gemini-flash) to compress history exceeding the current model's window - summary_model_id = self.valves.summary_model or body.get( - "model", "gpt-3.5-turbo" - ) + summary_model_id = self._clean_model_id( + self.valves.summary_model + ) or self._clean_model_id(body.get("model")) + + if not summary_model_id: + await self._log( + "[🤖 Async Summary Task] ⚠️ Summary model does not exist, skipping compression", + type="warning", + event_call=__event_call__, + ) + return thresholds = self._get_model_thresholds(summary_model_id) # Note: Using the summary model's max context limit here @@ -966,9 +1147,21 @@ class Filter: ) new_summary = await self._call_summary_llm( - None, conversation_text, body, user_data, __event_call__ + None, + conversation_text, + {**body, "model": summary_model_id}, + user_data, + __event_call__, ) + if not new_summary: + await self._log( + "[🤖 Async Summary Task] ⚠️ Summary generation returned empty result, skipping save", + type="warning", + event_call=__event_call__, + ) + return + # 6. Save new summary await self._log( "[Optimization] Saving summary in a background thread to avoid blocking the event loop.", @@ -1007,6 +1200,18 @@ class Filter: type="error", event_call=__event_call__, ) + + if __event_emitter__: + await __event_emitter__( + { + "type": "status", + "data": { + "description": f"Summary Error: {str(e)[:100]}...", + "done": True, + }, + } + ) + import traceback traceback.print_exc() @@ -1088,7 +1293,17 @@ This conversation may contain previous summaries (as system messages or text) an Based on the content above, generate the summary: """ # Determine the model to use - model = self.valves.summary_model or body.get("model", "") + model = self._clean_model_id(self.valves.summary_model) or self._clean_model_id( + body.get("model") + ) + + if not model: + await self._log( + "[🤖 LLM Call] ⚠️ Summary model does not exist, skipping summary generation", + type="warning", + event_call=__event_call__, + ) + return "" await self._log(f"[🤖 LLM Call] Model: {model}", event_call=__event_call__) @@ -1142,7 +1357,12 @@ Based on the content above, generate the summary: return summary except Exception as e: - error_message = f"Error occurred while calling LLM ({model}) to generate summary: {str(e)}" + error_msg = str(e) + # Handle specific error messages + if "Model not found" in error_msg: + error_message = f"Summary model '{model}' not found." + else: + error_message = f"Summary LLM Error ({model}): {error_msg}" if not self.valves.summary_model: error_message += ( "\n[Hint] You did not specify a summary_model, so the filter attempted to use the current conversation's model. " diff --git a/plugins/filters/async-context-compression/async_context_compression_cn.py b/plugins/filters/async-context-compression/async_context_compression_cn.py index 8d596ae..214c504 100644 --- a/plugins/filters/async-context-compression/async_context_compression_cn.py +++ b/plugins/filters/async-context-compression/async_context_compression_cn.py @@ -5,7 +5,7 @@ author: Fu-Jie author_url: https://github.com/Fu-Jie funding_url: https://github.com/Fu-Jie/awesome-openwebui description: 通过智能摘要和消息压缩,降低长对话的 token 消耗,同时保持对话连贯性。 -version: 1.1.1 +version: 1.1.2 openwebui_id: 5c0617cb-a9e4-4bd6-a440-d276534ebd18 license: MIT @@ -820,6 +820,13 @@ class Filter: event_call=__event_call__, ) + def _clean_model_id(self, model_id: Optional[str]) -> Optional[str]: + """Cleans the model ID by removing whitespace and quotes.""" + if not model_id: + return None + cleaned = model_id.strip().strip('"').strip("'") + return cleaned if cleaned else None + async def _generate_summary_async( self, messages: list, @@ -874,7 +881,17 @@ class Filter: # 3. 检查 Token 上限并截断 (Max Context Truncation) # [优化] 使用摘要模型(如果有)的阈值来决定能处理多少中间消息 # 这样可以用长窗口模型(如 gemini-flash)来压缩超过当前模型窗口的历史记录 - summary_model_id = self.valves.summary_model or body.get("model") + summary_model_id = self._clean_model_id( + self.valves.summary_model + ) or self._clean_model_id(body.get("model")) + + if not summary_model_id: + await self._log( + "[🤖 异步摘要任务] ⚠️ 摘要模型不存在,跳过压缩", + type="warning", + event_call=__event_call__, + ) + return thresholds = self._get_model_thresholds(summary_model_id) # 注意:这里使用的是摘要模型的最大上下文限制 @@ -946,9 +963,21 @@ class Filter: ) new_summary = await self._call_summary_llm( - None, conversation_text, body, user_data, __event_call__ + None, + conversation_text, + {**body, "model": summary_model_id}, + user_data, + __event_call__, ) + if not new_summary: + await self._log( + "[🤖 异步摘要任务] ⚠️ 摘要生成返回空结果,跳过保存", + type="warning", + event_call=__event_call__, + ) + return + # 6. 保存新摘要 await self._log( "[优化] 在后台线程中保存摘要以避免阻塞事件循环。", @@ -987,6 +1016,18 @@ class Filter: type="error", event_call=__event_call__, ) + + if __event_emitter__: + await __event_emitter__( + { + "type": "status", + "data": { + "description": f"摘要生成错误: {str(e)[:100]}...", + "done": True, + }, + } + ) + import traceback traceback.print_exc() @@ -1068,7 +1109,17 @@ class Filter: 请根据上述内容,生成摘要: """ # 确定使用的模型 - model = self.valves.summary_model or body.get("model", "") + model = self._clean_model_id(self.valves.summary_model) or self._clean_model_id( + body.get("model") + ) + + if not model: + await self._log( + "[🤖 LLM 调用] ⚠️ 摘要模型不存在,跳过摘要生成", + type="warning", + event_call=__event_call__, + ) + return "" await self._log(f"[🤖 LLM 调用] 模型: {model}", event_call=__event_call__) @@ -1122,7 +1173,12 @@ class Filter: return summary except Exception as e: - error_message = f"调用 LLM ({model}) 生成摘要时发生错误: {str(e)}" + error_msg = str(e) + # Handle specific error messages + if "Model not found" in error_msg: + error_message = f"摘要模型 '{model}' 不存在。" + else: + error_message = f"摘要 LLM 错误 ({model}): {error_msg}" if not self.valves.summary_model: error_message += ( "\n[提示] 您未指定 summary_model,因此过滤器尝试使用当前对话的模型。"