From f479f23b38c98a5e23a0d7ec3baa6374c691d604 Mon Sep 17 00:00:00 2001 From: "copilot-swe-agent[bot]" <198982749+Copilot@users.noreply.github.com> Date: Sun, 11 Jan 2026 08:19:33 +0000 Subject: [PATCH 1/9] Initial plan From d2f35ce396e3d793c1b9df9e9f8898b6a4ae3c01 Mon Sep 17 00:00:00 2001 From: "copilot-swe-agent[bot]" <198982749+Copilot@users.noreply.github.com> Date: Sun, 11 Jan 2026 08:24:56 +0000 Subject: [PATCH 2/9] fix: harden async compression compatibility Co-authored-by: Fu-Jie <33599649+Fu-Jie@users.noreply.github.com> --- .../async_context_compression.py | 74 ++++++++++++++++--- 1 file changed, 63 insertions(+), 11 deletions(-) diff --git a/plugins/filters/async-context-compression/async_context_compression.py b/plugins/filters/async-context-compression/async_context_compression.py index 26c13db..3e09a29 100644 --- a/plugins/filters/async-context-compression/async_context_compression.py +++ b/plugins/filters/async-context-compression/async_context_compression.py @@ -257,9 +257,7 @@ from fastapi.requests import Request from open_webui.main import app as webui_app # Open WebUI internal database (re-use shared connection) -from open_webui.internal.db import engine as owui_engine -from open_webui.internal.db import Session as owui_Session -from open_webui.internal.db import Base as owui_Base +import open_webui.internal.db as owui_db # Try to import tiktoken try: @@ -272,6 +270,9 @@ from sqlalchemy import Column, String, Text, DateTime, Integer, inspect from datetime import datetime +owui_Base = owui_db.Base + + class ChatSummary(owui_Base): """Chat Summary Storage Table""" @@ -289,8 +290,14 @@ class ChatSummary(owui_Base): class Filter: def __init__(self): self.valves = self.Valves() - self._db_engine = owui_engine - self._SessionLocal = owui_Session + self._db_engine = owui_db.engine + self._SessionLocal = ( + getattr(owui_db, "ScopedSession", None) + or getattr(owui_db, "SessionLocal", None) + or getattr(owui_db, "Session", None) + ) + if self._SessionLocal is None: + raise RuntimeError("Open WebUI database session factory unavailable.") self.temp_state = {} # Used to pass temporary data between inlet and outlet self._init_database() @@ -632,7 +639,15 @@ class Filter: Compression Strategy: Only responsible for injecting existing summaries, no Token calculation. """ messages = body.get("messages", []) - chat_id = __metadata__["chat_id"] + chat_id = (__metadata__ or {}).get("chat_id") + + if not chat_id: + await self._log( + "[Inlet] ❌ Missing chat_id in metadata, skipping compression", + type="error", + event_call=__event_call__, + ) + return body if self.valves.debug_mode or self.valves.show_debug_log: await self._log( @@ -747,7 +762,14 @@ class Filter: Executed after the LLM response is complete. Calculates Token count in the background and triggers summary generation (does not block current response, does not affect content output). """ - chat_id = __metadata__["chat_id"] + chat_id = (__metadata__ or {}).get("chat_id") + if not chat_id: + await self._log( + "[Outlet] ❌ Missing chat_id in metadata, skipping compression", + type="error", + event_call=__event_call__, + ) + return body model = body.get("model", "gpt-3.5-turbo") if self.valves.debug_mode or self.valves.show_debug_log: @@ -892,10 +914,20 @@ class Filter: # 3. Check Token limit and truncate (Max Context Truncation) # [Optimization] Use the summary model's (if any) threshold to decide how many middle messages can be processed # This allows using a long-window model (like gemini-flash) to compress history exceeding the current model's window - summary_model_id = self.valves.summary_model or body.get( - "model", "gpt-3.5-turbo" + summary_model_id = ( + self.valves.summary_model + or body.get("model") + or "gpt-3.5-turbo" ) + if not summary_model_id: + await self._log( + "[🤖 Async Summary Task] ⚠️ Summary model is empty, skipping compression", + type="warning", + event_call=__event_call__, + ) + return + thresholds = self._get_model_thresholds(summary_model_id) # Note: Using the summary model's max context limit here max_context_tokens = thresholds.get( @@ -963,12 +995,24 @@ class Filter: "done": False, }, } - ) + ) new_summary = await self._call_summary_llm( - None, conversation_text, body, user_data, __event_call__ + None, + conversation_text, + {**body, "model": summary_model_id}, + user_data, + __event_call__, ) + if not new_summary: + await self._log( + "[🤖 Async Summary Task] ⚠️ Summary generation returned empty result, skipping save", + type="warning", + event_call=__event_call__, + ) + return + # 6. Save new summary await self._log( "[Optimization] Saving summary in a background thread to avoid blocking the event loop.", @@ -1090,6 +1134,14 @@ Based on the content above, generate the summary: # Determine the model to use model = self.valves.summary_model or body.get("model", "") + if not model: + await self._log( + "[🤖 LLM Call] ⚠️ Model ID is empty, skipping summary generation", + type="warning", + event_call=__event_call__, + ) + return "" + await self._log(f"[🤖 LLM Call] Model: {model}", event_call=__event_call__) # Build payload From 4b8515f68209c46ff4a8af9d3111332ca05ab69b Mon Sep 17 00:00:00 2001 From: "copilot-swe-agent[bot]" <198982749+Copilot@users.noreply.github.com> Date: Sun, 11 Jan 2026 08:25:33 +0000 Subject: [PATCH 3/9] fix: ensure empty summary model skips compression Co-authored-by: Fu-Jie <33599649+Fu-Jie@users.noreply.github.com> --- .../async-context-compression/async_context_compression.py | 6 +----- 1 file changed, 1 insertion(+), 5 deletions(-) diff --git a/plugins/filters/async-context-compression/async_context_compression.py b/plugins/filters/async-context-compression/async_context_compression.py index 3e09a29..0ec2323 100644 --- a/plugins/filters/async-context-compression/async_context_compression.py +++ b/plugins/filters/async-context-compression/async_context_compression.py @@ -914,11 +914,7 @@ class Filter: # 3. Check Token limit and truncate (Max Context Truncation) # [Optimization] Use the summary model's (if any) threshold to decide how many middle messages can be processed # This allows using a long-window model (like gemini-flash) to compress history exceeding the current model's window - summary_model_id = ( - self.valves.summary_model - or body.get("model") - or "gpt-3.5-turbo" - ) + summary_model_id = self.valves.summary_model or body.get("model") if not summary_model_id: await self._log( From 9e98d55e11d98144b25bfa9fa27c2f705f73e3d6 Mon Sep 17 00:00:00 2001 From: "copilot-swe-agent[bot]" <198982749+Copilot@users.noreply.github.com> Date: Sun, 11 Jan 2026 08:27:36 +0000 Subject: [PATCH 4/9] fix: make async compression db session discovery robust Co-authored-by: Fu-Jie <33599649+Fu-Jie@users.noreply.github.com> --- .../async_context_compression.py | 140 ++++++++++++++++-- 1 file changed, 127 insertions(+), 13 deletions(-) diff --git a/plugins/filters/async-context-compression/async_context_compression.py b/plugins/filters/async-context-compression/async_context_compression.py index 0ec2323..d757067 100644 --- a/plugins/filters/async-context-compression/async_context_compression.py +++ b/plugins/filters/async-context-compression/async_context_compression.py @@ -249,6 +249,7 @@ import asyncio import json import hashlib import time +import contextlib # Open WebUI built-in imports from open_webui.utils.chat import generate_chat_completion @@ -257,7 +258,10 @@ from fastapi.requests import Request from open_webui.main import app as webui_app # Open WebUI internal database (re-use shared connection) -import open_webui.internal.db as owui_db +try: + from open_webui.internal import db as owui_db +except Exception: # pragma: no cover - filter runs inside Open WebUI + owui_db = None # Try to import tiktoken try: @@ -267,17 +271,88 @@ except ImportError: # Database imports from sqlalchemy import Column, String, Text, DateTime, Integer, inspect +from sqlalchemy.orm import declarative_base, sessionmaker from datetime import datetime -owui_Base = owui_db.Base +def _discover_owui_engine(db_module) -> Any | None: + if db_module is None: + return None + + db_context = getattr(db_module, "get_db_context", None) or getattr( + db_module, "get_db", None + ) + if callable(db_context): + try: + with db_context() as session: + try: + return session.get_bind() + except Exception: + return getattr(session, "bind", None) or getattr( + session, "engine", None + ) + except Exception: + pass + + for attr in ("engine", "ENGINE", "bind", "BIND"): + candidate = getattr(db_module, attr, None) + if candidate is not None: + return candidate + + return None + + +def _discover_owui_schema(db_module) -> str | None: + if db_module is None: + return None + + try: + base = getattr(db_module, "Base", None) + metadata = getattr(base, "metadata", None) if base is not None else None + candidate = getattr(metadata, "schema", None) if metadata is not None else None + if isinstance(candidate, str) and candidate.strip(): + return candidate.strip() + except Exception: + pass + + try: + metadata_obj = getattr(db_module, "metadata_obj", None) + candidate = ( + getattr(metadata_obj, "schema", None) if metadata_obj is not None else None + ) + if isinstance(candidate, str) and candidate.strip(): + return candidate.strip() + except Exception: + pass + + try: + from open_webui import env as owui_env + + candidate = getattr(owui_env, "DATABASE_SCHEMA", None) + if isinstance(candidate, str) and candidate.strip(): + return candidate.strip() + except Exception: + pass + + return None + + +owui_engine = _discover_owui_engine(owui_db) +owui_schema = _discover_owui_schema(owui_db) +owui_Base = getattr(owui_db, "Base", None) if owui_db is not None else None +if owui_Base is None: + owui_Base = declarative_base() class ChatSummary(owui_Base): """Chat Summary Storage Table""" __tablename__ = "chat_summary" - __table_args__ = {"extend_existing": True} + __table_args__ = ( + {"extend_existing": True, "schema": owui_schema} + if owui_schema + else {"extend_existing": True} + ) id = Column(Integer, primary_key=True, autoincrement=True) chat_id = Column(String(255), unique=True, nullable=False, index=True) @@ -290,20 +365,59 @@ class ChatSummary(owui_Base): class Filter: def __init__(self): self.valves = self.Valves() - self._db_engine = owui_db.engine - self._SessionLocal = ( - getattr(owui_db, "ScopedSession", None) - or getattr(owui_db, "SessionLocal", None) - or getattr(owui_db, "Session", None) - ) - if self._SessionLocal is None: - raise RuntimeError("Open WebUI database session factory unavailable.") + self._owui_db = owui_db + self._db_engine = owui_engine self.temp_state = {} # Used to pass temporary data between inlet and outlet + self._fallback_session_factory = ( + sessionmaker(bind=self._db_engine) if self._db_engine else None + ) self._init_database() + @contextlib.contextmanager + def _db_session(self): + db_module = self._owui_db + db_context = None + if db_module is not None: + db_context = getattr(db_module, "get_db_context", None) or getattr( + db_module, "get_db", None + ) + + if callable(db_context): + with db_context() as session: + yield session + return + + factory = None + if db_module is not None: + factory = getattr(db_module, "SessionLocal", None) or getattr( + db_module, "ScopedSession", None + ) + if callable(factory): + session = factory() + try: + yield session + finally: + close = getattr(session, "close", None) + if callable(close): + close() + return + + if self._fallback_session_factory is None: + raise RuntimeError("Open WebUI database session is unavailable.") + + session = self._fallback_session_factory() + try: + yield session + finally: + with contextlib.suppress(Exception): + session.close() + def _init_database(self): """Initializes the database table using Open WebUI's shared connection.""" try: + if self._db_engine is None: + raise RuntimeError("Open WebUI database engine is unavailable.") + # Check if table exists using SQLAlchemy inspect inspector = inspect(self._db_engine) if not inspector.has_table("chat_summary"): @@ -373,7 +487,7 @@ class Filter: def _save_summary(self, chat_id: str, summary: str, compressed_count: int): """Saves the summary to the database.""" try: - with self._SessionLocal() as session: + with self._db_session() as session: # Find existing record existing = session.query(ChatSummary).filter_by(chat_id=chat_id).first() @@ -413,7 +527,7 @@ class Filter: def _load_summary_record(self, chat_id: str) -> Optional[ChatSummary]: """Loads the summary record object from the database.""" try: - with self._SessionLocal() as session: + with self._db_session() as session: record = session.query(ChatSummary).filter_by(chat_id=chat_id).first() if record: # Detach the object from the session so it can be used after session close From d3b4219a9ab723d5093db3526932457931bc5ec8 Mon Sep 17 00:00:00 2001 From: "copilot-swe-agent[bot]" <198982749+Copilot@users.noreply.github.com> Date: Sun, 11 Jan 2026 08:28:52 +0000 Subject: [PATCH 5/9] chore: refine db session discovery messaging Co-authored-by: Fu-Jie <33599649+Fu-Jie@users.noreply.github.com> --- .../async_context_compression.py | 17 ++++++++++++----- 1 file changed, 12 insertions(+), 5 deletions(-) diff --git a/plugins/filters/async-context-compression/async_context_compression.py b/plugins/filters/async-context-compression/async_context_compression.py index d757067..ceaf97f 100644 --- a/plugins/filters/async-context-compression/async_context_compression.py +++ b/plugins/filters/async-context-compression/async_context_compression.py @@ -272,10 +272,11 @@ except ImportError: # Database imports from sqlalchemy import Column, String, Text, DateTime, Integer, inspect from sqlalchemy.orm import declarative_base, sessionmaker +from sqlalchemy.engine import Engine from datetime import datetime -def _discover_owui_engine(db_module) -> Any | None: +def _discover_owui_engine(db_module: Any) -> Optional[Engine]: if db_module is None: return None @@ -302,7 +303,7 @@ def _discover_owui_engine(db_module) -> Any | None: return None -def _discover_owui_schema(db_module) -> str | None: +def _discover_owui_schema(db_module: Any) -> Optional[str]: if db_module is None: return None @@ -403,20 +404,26 @@ class Filter: return if self._fallback_session_factory is None: - raise RuntimeError("Open WebUI database session is unavailable.") + raise RuntimeError( + "Open WebUI database session is unavailable. Ensure Open WebUI's database layer is initialized." + ) session = self._fallback_session_factory() try: yield session finally: - with contextlib.suppress(Exception): + try: session.close() + except Exception as exc: # pragma: no cover - best-effort cleanup + print(f"[Database] ⚠️ Failed to close fallback session: {exc}") def _init_database(self): """Initializes the database table using Open WebUI's shared connection.""" try: if self._db_engine is None: - raise RuntimeError("Open WebUI database engine is unavailable.") + raise RuntimeError( + "Open WebUI database engine is unavailable. Ensure Open WebUI is configured with a valid DATABASE_URL." + ) # Check if table exists using SQLAlchemy inspect inspector = inspect(self._db_engine) From 3649d7553900a5dfa7de39ca4c0c9a0bf77df031 Mon Sep 17 00:00:00 2001 From: "copilot-swe-agent[bot]" <198982749+Copilot@users.noreply.github.com> Date: Sun, 11 Jan 2026 08:30:02 +0000 Subject: [PATCH 6/9] chore: add discovery debug logs Co-authored-by: Fu-Jie <33599649+Fu-Jie@users.noreply.github.com> --- .../async_context_compression.py | 18 +++++++++--------- 1 file changed, 9 insertions(+), 9 deletions(-) diff --git a/plugins/filters/async-context-compression/async_context_compression.py b/plugins/filters/async-context-compression/async_context_compression.py index ceaf97f..3297ee0 100644 --- a/plugins/filters/async-context-compression/async_context_compression.py +++ b/plugins/filters/async-context-compression/async_context_compression.py @@ -260,7 +260,7 @@ from open_webui.main import app as webui_app # Open WebUI internal database (re-use shared connection) try: from open_webui.internal import db as owui_db -except Exception: # pragma: no cover - filter runs inside Open WebUI +except ModuleNotFoundError: # pragma: no cover - filter runs inside Open WebUI owui_db = None # Try to import tiktoken @@ -292,8 +292,8 @@ def _discover_owui_engine(db_module: Any) -> Optional[Engine]: return getattr(session, "bind", None) or getattr( session, "engine", None ) - except Exception: - pass + except Exception as exc: + print(f"[DB Discover] get_db_context failed: {exc}") for attr in ("engine", "ENGINE", "bind", "BIND"): candidate = getattr(db_module, attr, None) @@ -313,8 +313,8 @@ def _discover_owui_schema(db_module: Any) -> Optional[str]: candidate = getattr(metadata, "schema", None) if metadata is not None else None if isinstance(candidate, str) and candidate.strip(): return candidate.strip() - except Exception: - pass + except Exception as exc: + print(f"[DB Discover] Base metadata schema lookup failed: {exc}") try: metadata_obj = getattr(db_module, "metadata_obj", None) @@ -323,8 +323,8 @@ def _discover_owui_schema(db_module: Any) -> Optional[str]: ) if isinstance(candidate, str) and candidate.strip(): return candidate.strip() - except Exception: - pass + except Exception as exc: + print(f"[DB Discover] metadata_obj schema lookup failed: {exc}") try: from open_webui import env as owui_env @@ -332,8 +332,8 @@ def _discover_owui_schema(db_module: Any) -> Optional[str]: candidate = getattr(owui_env, "DATABASE_SCHEMA", None) if isinstance(candidate, str) and candidate.strip(): return candidate.strip() - except Exception: - pass + except Exception as exc: + print(f"[DB Discover] env schema lookup failed: {exc}") return None From 7e44dde979d9e2b01e8c4d29da7329544db1cd75 Mon Sep 17 00:00:00 2001 From: "copilot-swe-agent[bot]" <198982749+Copilot@users.noreply.github.com> Date: Sun, 11 Jan 2026 08:31:10 +0000 Subject: [PATCH 7/9] chore: add discovery docstrings Co-authored-by: Fu-Jie <33599649+Fu-Jie@users.noreply.github.com> --- .../async-context-compression/async_context_compression.py | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/plugins/filters/async-context-compression/async_context_compression.py b/plugins/filters/async-context-compression/async_context_compression.py index 3297ee0..3d77db6 100644 --- a/plugins/filters/async-context-compression/async_context_compression.py +++ b/plugins/filters/async-context-compression/async_context_compression.py @@ -277,6 +277,7 @@ from datetime import datetime def _discover_owui_engine(db_module: Any) -> Optional[Engine]: + """Discover the Open WebUI SQLAlchemy engine via provided db module helpers.""" if db_module is None: return None @@ -288,7 +289,7 @@ def _discover_owui_engine(db_module: Any) -> Optional[Engine]: with db_context() as session: try: return session.get_bind() - except Exception: + except AttributeError: return getattr(session, "bind", None) or getattr( session, "engine", None ) @@ -304,6 +305,7 @@ def _discover_owui_engine(db_module: Any) -> Optional[Engine]: def _discover_owui_schema(db_module: Any) -> Optional[str]: + """Discover the Open WebUI database schema name if configured.""" if db_module is None: return None @@ -376,6 +378,7 @@ class Filter: @contextlib.contextmanager def _db_session(self): + """Yield a database session using Open WebUI helpers with graceful fallbacks.""" db_module = self._owui_db db_context = None if db_module is not None: From abbe3fb2486709c8583df6e0b9af03c725714bf5 Mon Sep 17 00:00:00 2001 From: "copilot-swe-agent[bot]" <198982749+Copilot@users.noreply.github.com> Date: Sun, 11 Jan 2026 08:36:13 +0000 Subject: [PATCH 8/9] chore: centralize chat_id extraction helper Co-authored-by: Fu-Jie <33599649+Fu-Jie@users.noreply.github.com> --- .../async_context_compression.py | 24 +++++++++++++++++-- 1 file changed, 22 insertions(+), 2 deletions(-) diff --git a/plugins/filters/async-context-compression/async_context_compression.py b/plugins/filters/async-context-compression/async_context_compression.py index 3d77db6..87e902e 100644 --- a/plugins/filters/async-context-compression/async_context_compression.py +++ b/plugins/filters/async-context-compression/async_context_compression.py @@ -618,6 +618,26 @@ class Filter: "max_context_tokens": self.valves.max_context_tokens, } + def _extract_chat_id(self, body: dict, metadata: Optional[dict]) -> str: + """Extract chat_id from body or metadata.""" + if isinstance(body, dict): + chat_id = body.get("chat_id") + if isinstance(chat_id, str) and chat_id.strip(): + return chat_id.strip() + + body_metadata = body.get("metadata", {}) + if isinstance(body_metadata, dict): + chat_id = body_metadata.get("chat_id") + if isinstance(chat_id, str) and chat_id.strip(): + return chat_id.strip() + + if isinstance(metadata, dict): + chat_id = metadata.get("chat_id") + if isinstance(chat_id, str) and chat_id.strip(): + return chat_id.strip() + + return "" + def _inject_summary_to_first_message(self, message: dict, summary: str) -> dict: """Injects the summary into the first message (prepended to content).""" content = message.get("content", "") @@ -763,7 +783,7 @@ class Filter: Compression Strategy: Only responsible for injecting existing summaries, no Token calculation. """ messages = body.get("messages", []) - chat_id = (__metadata__ or {}).get("chat_id") + chat_id = self._extract_chat_id(body, __metadata__) if not chat_id: await self._log( @@ -886,7 +906,7 @@ class Filter: Executed after the LLM response is complete. Calculates Token count in the background and triggers summary generation (does not block current response, does not affect content output). """ - chat_id = (__metadata__ or {}).get("chat_id") + chat_id = self._extract_chat_id(body, __metadata__) if not chat_id: await self._log( "[Outlet] ❌ Missing chat_id in metadata, skipping compression", From cbf2ff7f93f2b757e0868fba0be3a3007b2a1652 Mon Sep 17 00:00:00 2001 From: fujie Date: Sun, 11 Jan 2026 17:25:07 +0800 Subject: [PATCH 9/9] chore: release async-context-compression v1.1.2 - Enhanced error reporting via status bar and console - Robust model ID handling - Open WebUI v0.7.x compatibility (dynamic DB session) - Updated documentation and version bumps --- .../filters/async-context-compression.md | 5 +- .../filters/async-context-compression.zh.md | 5 +- docs/plugins/filters/index.md | 2 +- .../async-context-compression/README.md | 8 ++- .../async-context-compression/README_CN.md | 8 ++- .../async_context_compression.py | 42 ++++++++++-- .../async_context_compression_cn.py | 66 +++++++++++++++++-- 7 files changed, 119 insertions(+), 17 deletions(-) diff --git a/docs/plugins/filters/async-context-compression.md b/docs/plugins/filters/async-context-compression.md index 9dcb78c..7c93faf 100644 --- a/docs/plugins/filters/async-context-compression.md +++ b/docs/plugins/filters/async-context-compression.md @@ -1,7 +1,7 @@ # Async Context Compression Filter -v1.1.0 +v1.1.2 Reduces token consumption in long conversations through intelligent summarization while maintaining conversational coherence. @@ -29,6 +29,9 @@ This is especially useful for: - :material-clock-fast: **Async Processing**: Non-blocking background compression - :material-memory: **Context Preservation**: Keeps important information - :material-currency-usd-off: **Cost Reduction**: Minimize token usage +- :material-console: **Frontend Debugging**: Debug logs in browser console +- :material-alert-circle-check: **Enhanced Error Reporting**: Clear error status notifications +- :material-check-all: **Open WebUI v0.7.x Compatibility**: Dynamic DB session handling --- diff --git a/docs/plugins/filters/async-context-compression.zh.md b/docs/plugins/filters/async-context-compression.zh.md index c5600d0..2e20996 100644 --- a/docs/plugins/filters/async-context-compression.zh.md +++ b/docs/plugins/filters/async-context-compression.zh.md @@ -1,7 +1,7 @@ # Async Context Compression(异步上下文压缩) Filter -v1.1.0 +v1.1.2 通过智能摘要减少长对话的 token 消耗,同时保持对话连贯。 @@ -29,6 +29,9 @@ Async Context Compression 过滤器通过以下方式帮助管理长对话的 to - :material-clock-fast: **异步处理**:后台非阻塞压缩 - :material-memory: **保留上下文**:尽量保留重要信息 - :material-currency-usd-off: **降低成本**:减少 token 使用 +- :material-console: **前端调试**:支持浏览器控制台日志 +- :material-alert-circle-check: **增强错误报告**:清晰的错误状态通知 +- :material-check-all: **Open WebUI v0.7.x 兼容性**:动态数据库会话处理 --- diff --git a/docs/plugins/filters/index.md b/docs/plugins/filters/index.md index 669cdbf..bf56029 100644 --- a/docs/plugins/filters/index.md +++ b/docs/plugins/filters/index.md @@ -22,7 +22,7 @@ Filters act as middleware in the message pipeline: Reduces token consumption in long conversations through intelligent summarization while maintaining coherence. - **Version:** 1.1.0 + **Version:** 1.1.2 [:octicons-arrow-right-24: Documentation](async-context-compression.md) diff --git a/plugins/filters/async-context-compression/README.md b/plugins/filters/async-context-compression/README.md index bf33fd3..88d0607 100644 --- a/plugins/filters/async-context-compression/README.md +++ b/plugins/filters/async-context-compression/README.md @@ -1,9 +1,15 @@ # Async Context Compression Filter -**Author:** [Fu-Jie](https://github.com/Fu-Jie) | **Version:** 1.1.1 | **License:** MIT +**Author:** [Fu-Jie](https://github.com/Fu-Jie) | **Version:** 1.1.2 | **License:** MIT This filter reduces token consumption in long conversations through intelligent summarization and message compression while keeping conversations coherent. +## What's new in 1.1.2 + +- **Open WebUI v0.7.x Compatibility**: Resolved a critical database session binding error affecting Open WebUI v0.7.x users. The plugin now dynamically discovers the database engine and session context, ensuring compatibility across versions. +- **Enhanced Error Reporting**: Errors during background summary generation are now reported via both the status bar and browser console. +- **Robust Model Handling**: Improved handling of missing or invalid model IDs to prevent crashes. + ## What's new in 1.1.1 - **Frontend Debugging**: Added `show_debug_log` option to print debug info to the browser console (F12). diff --git a/plugins/filters/async-context-compression/README_CN.md b/plugins/filters/async-context-compression/README_CN.md index fa75cb5..58875ae 100644 --- a/plugins/filters/async-context-compression/README_CN.md +++ b/plugins/filters/async-context-compression/README_CN.md @@ -1,11 +1,17 @@ # 异步上下文压缩过滤器 -**作者:** [Fu-Jie](https://github.com/Fu-Jie) | **版本:** 1.1.1 | **许可证:** MIT +**作者:** [Fu-Jie](https://github.com/Fu-Jie) | **版本:** 1.1.2 | **许可证:** MIT > **重要提示**:为了确保所有过滤器的可维护性和易用性,每个过滤器都应附带清晰、完整的文档,以确保其功能、配置和使用方法得到充分说明。 本过滤器通过智能摘要和消息压缩技术,在保持对话连贯性的同时,显著降低长对话的 Token 消耗。 +## 1.1.2 版本更新 + +- **Open WebUI v0.7.x 兼容性**: 修复了影响 Open WebUI v0.7.x 用户的严重数据库会话绑定错误。插件现在动态发现数据库引擎和会话上下文,确保跨版本兼容性。 +- **增强错误报告**: 后台摘要生成过程中的错误现在会通过状态栏和浏览器控制台同时报告。 +- **健壮的模型处理**: 改进了对缺失或无效模型 ID 的处理,防止程序崩溃。 + ## 1.1.1 版本更新 - **前端调试**: 新增 `show_debug_log` 选项,支持在浏览器控制台 (F12) 打印调试信息。 diff --git a/plugins/filters/async-context-compression/async_context_compression.py b/plugins/filters/async-context-compression/async_context_compression.py index 87e902e..09355db 100644 --- a/plugins/filters/async-context-compression/async_context_compression.py +++ b/plugins/filters/async-context-compression/async_context_compression.py @@ -5,7 +5,7 @@ author: Fu-Jie author_url: https://github.com/Fu-Jie funding_url: https://github.com/Fu-Jie/awesome-openwebui description: Reduces token consumption in long conversations while maintaining coherence through intelligent summarization and message compression. -version: 1.1.1 +version: 1.1.2 openwebui_id: b1655bc8-6de9-4cad-8cb5-a6f7829a02ce license: MIT @@ -1002,6 +1002,13 @@ class Filter: event_call=__event_call__, ) + def _clean_model_id(self, model_id: Optional[str]) -> Optional[str]: + """Cleans the model ID by removing whitespace and quotes.""" + if not model_id: + return None + cleaned = model_id.strip().strip('"').strip("'") + return cleaned if cleaned else None + async def _generate_summary_async( self, messages: list, @@ -1058,11 +1065,13 @@ class Filter: # 3. Check Token limit and truncate (Max Context Truncation) # [Optimization] Use the summary model's (if any) threshold to decide how many middle messages can be processed # This allows using a long-window model (like gemini-flash) to compress history exceeding the current model's window - summary_model_id = self.valves.summary_model or body.get("model") + summary_model_id = self._clean_model_id( + self.valves.summary_model + ) or self._clean_model_id(body.get("model")) if not summary_model_id: await self._log( - "[🤖 Async Summary Task] ⚠️ Summary model is empty, skipping compression", + "[🤖 Async Summary Task] ⚠️ Summary model does not exist, skipping compression", type="warning", event_call=__event_call__, ) @@ -1135,7 +1144,7 @@ class Filter: "done": False, }, } - ) + ) new_summary = await self._call_summary_llm( None, @@ -1191,6 +1200,18 @@ class Filter: type="error", event_call=__event_call__, ) + + if __event_emitter__: + await __event_emitter__( + { + "type": "status", + "data": { + "description": f"Summary Error: {str(e)[:100]}...", + "done": True, + }, + } + ) + import traceback traceback.print_exc() @@ -1272,11 +1293,13 @@ This conversation may contain previous summaries (as system messages or text) an Based on the content above, generate the summary: """ # Determine the model to use - model = self.valves.summary_model or body.get("model", "") + model = self._clean_model_id(self.valves.summary_model) or self._clean_model_id( + body.get("model") + ) if not model: await self._log( - "[🤖 LLM Call] ⚠️ Model ID is empty, skipping summary generation", + "[🤖 LLM Call] ⚠️ Summary model does not exist, skipping summary generation", type="warning", event_call=__event_call__, ) @@ -1334,7 +1357,12 @@ Based on the content above, generate the summary: return summary except Exception as e: - error_message = f"Error occurred while calling LLM ({model}) to generate summary: {str(e)}" + error_msg = str(e) + # Handle specific error messages + if "Model not found" in error_msg: + error_message = f"Summary model '{model}' not found." + else: + error_message = f"Summary LLM Error ({model}): {error_msg}" if not self.valves.summary_model: error_message += ( "\n[Hint] You did not specify a summary_model, so the filter attempted to use the current conversation's model. " diff --git a/plugins/filters/async-context-compression/async_context_compression_cn.py b/plugins/filters/async-context-compression/async_context_compression_cn.py index 8d596ae..214c504 100644 --- a/plugins/filters/async-context-compression/async_context_compression_cn.py +++ b/plugins/filters/async-context-compression/async_context_compression_cn.py @@ -5,7 +5,7 @@ author: Fu-Jie author_url: https://github.com/Fu-Jie funding_url: https://github.com/Fu-Jie/awesome-openwebui description: 通过智能摘要和消息压缩,降低长对话的 token 消耗,同时保持对话连贯性。 -version: 1.1.1 +version: 1.1.2 openwebui_id: 5c0617cb-a9e4-4bd6-a440-d276534ebd18 license: MIT @@ -820,6 +820,13 @@ class Filter: event_call=__event_call__, ) + def _clean_model_id(self, model_id: Optional[str]) -> Optional[str]: + """Cleans the model ID by removing whitespace and quotes.""" + if not model_id: + return None + cleaned = model_id.strip().strip('"').strip("'") + return cleaned if cleaned else None + async def _generate_summary_async( self, messages: list, @@ -874,7 +881,17 @@ class Filter: # 3. 检查 Token 上限并截断 (Max Context Truncation) # [优化] 使用摘要模型(如果有)的阈值来决定能处理多少中间消息 # 这样可以用长窗口模型(如 gemini-flash)来压缩超过当前模型窗口的历史记录 - summary_model_id = self.valves.summary_model or body.get("model") + summary_model_id = self._clean_model_id( + self.valves.summary_model + ) or self._clean_model_id(body.get("model")) + + if not summary_model_id: + await self._log( + "[🤖 异步摘要任务] ⚠️ 摘要模型不存在,跳过压缩", + type="warning", + event_call=__event_call__, + ) + return thresholds = self._get_model_thresholds(summary_model_id) # 注意:这里使用的是摘要模型的最大上下文限制 @@ -946,9 +963,21 @@ class Filter: ) new_summary = await self._call_summary_llm( - None, conversation_text, body, user_data, __event_call__ + None, + conversation_text, + {**body, "model": summary_model_id}, + user_data, + __event_call__, ) + if not new_summary: + await self._log( + "[🤖 异步摘要任务] ⚠️ 摘要生成返回空结果,跳过保存", + type="warning", + event_call=__event_call__, + ) + return + # 6. 保存新摘要 await self._log( "[优化] 在后台线程中保存摘要以避免阻塞事件循环。", @@ -987,6 +1016,18 @@ class Filter: type="error", event_call=__event_call__, ) + + if __event_emitter__: + await __event_emitter__( + { + "type": "status", + "data": { + "description": f"摘要生成错误: {str(e)[:100]}...", + "done": True, + }, + } + ) + import traceback traceback.print_exc() @@ -1068,7 +1109,17 @@ class Filter: 请根据上述内容,生成摘要: """ # 确定使用的模型 - model = self.valves.summary_model or body.get("model", "") + model = self._clean_model_id(self.valves.summary_model) or self._clean_model_id( + body.get("model") + ) + + if not model: + await self._log( + "[🤖 LLM 调用] ⚠️ 摘要模型不存在,跳过摘要生成", + type="warning", + event_call=__event_call__, + ) + return "" await self._log(f"[🤖 LLM 调用] 模型: {model}", event_call=__event_call__) @@ -1122,7 +1173,12 @@ class Filter: return summary except Exception as e: - error_message = f"调用 LLM ({model}) 生成摘要时发生错误: {str(e)}" + error_msg = str(e) + # Handle specific error messages + if "Model not found" in error_msg: + error_message = f"摘要模型 '{model}' 不存在。" + else: + error_message = f"摘要 LLM 错误 ({model}): {error_msg}" if not self.valves.summary_model: error_message += ( "\n[提示] 您未指定 summary_model,因此过滤器尝试使用当前对话的模型。"