diff --git a/plugins/filters/async-context-compression/async_context_compression.py b/plugins/filters/async-context-compression/async_context_compression.py index 26c13db..3e09a29 100644 --- a/plugins/filters/async-context-compression/async_context_compression.py +++ b/plugins/filters/async-context-compression/async_context_compression.py @@ -257,9 +257,7 @@ from fastapi.requests import Request from open_webui.main import app as webui_app # Open WebUI internal database (re-use shared connection) -from open_webui.internal.db import engine as owui_engine -from open_webui.internal.db import Session as owui_Session -from open_webui.internal.db import Base as owui_Base +import open_webui.internal.db as owui_db # Try to import tiktoken try: @@ -272,6 +270,9 @@ from sqlalchemy import Column, String, Text, DateTime, Integer, inspect from datetime import datetime +owui_Base = owui_db.Base + + class ChatSummary(owui_Base): """Chat Summary Storage Table""" @@ -289,8 +290,14 @@ class ChatSummary(owui_Base): class Filter: def __init__(self): self.valves = self.Valves() - self._db_engine = owui_engine - self._SessionLocal = owui_Session + self._db_engine = owui_db.engine + self._SessionLocal = ( + getattr(owui_db, "ScopedSession", None) + or getattr(owui_db, "SessionLocal", None) + or getattr(owui_db, "Session", None) + ) + if self._SessionLocal is None: + raise RuntimeError("Open WebUI database session factory unavailable.") self.temp_state = {} # Used to pass temporary data between inlet and outlet self._init_database() @@ -632,7 +639,15 @@ class Filter: Compression Strategy: Only responsible for injecting existing summaries, no Token calculation. """ messages = body.get("messages", []) - chat_id = __metadata__["chat_id"] + chat_id = (__metadata__ or {}).get("chat_id") + + if not chat_id: + await self._log( + "[Inlet] ❌ Missing chat_id in metadata, skipping compression", + type="error", + event_call=__event_call__, + ) + return body if self.valves.debug_mode or self.valves.show_debug_log: await self._log( @@ -747,7 +762,14 @@ class Filter: Executed after the LLM response is complete. Calculates Token count in the background and triggers summary generation (does not block current response, does not affect content output). """ - chat_id = __metadata__["chat_id"] + chat_id = (__metadata__ or {}).get("chat_id") + if not chat_id: + await self._log( + "[Outlet] ❌ Missing chat_id in metadata, skipping compression", + type="error", + event_call=__event_call__, + ) + return body model = body.get("model", "gpt-3.5-turbo") if self.valves.debug_mode or self.valves.show_debug_log: @@ -892,10 +914,20 @@ class Filter: # 3. Check Token limit and truncate (Max Context Truncation) # [Optimization] Use the summary model's (if any) threshold to decide how many middle messages can be processed # This allows using a long-window model (like gemini-flash) to compress history exceeding the current model's window - summary_model_id = self.valves.summary_model or body.get( - "model", "gpt-3.5-turbo" + summary_model_id = ( + self.valves.summary_model + or body.get("model") + or "gpt-3.5-turbo" ) + if not summary_model_id: + await self._log( + "[🤖 Async Summary Task] ⚠️ Summary model is empty, skipping compression", + type="warning", + event_call=__event_call__, + ) + return + thresholds = self._get_model_thresholds(summary_model_id) # Note: Using the summary model's max context limit here max_context_tokens = thresholds.get( @@ -963,12 +995,24 @@ class Filter: "done": False, }, } - ) + ) new_summary = await self._call_summary_llm( - None, conversation_text, body, user_data, __event_call__ + None, + conversation_text, + {**body, "model": summary_model_id}, + user_data, + __event_call__, ) + if not new_summary: + await self._log( + "[🤖 Async Summary Task] ⚠️ Summary generation returned empty result, skipping save", + type="warning", + event_call=__event_call__, + ) + return + # 6. Save new summary await self._log( "[Optimization] Saving summary in a background thread to avoid blocking the event loop.", @@ -1090,6 +1134,14 @@ Based on the content above, generate the summary: # Determine the model to use model = self.valves.summary_model or body.get("model", "") + if not model: + await self._log( + "[🤖 LLM Call] ⚠️ Model ID is empty, skipping summary generation", + type="warning", + event_call=__event_call__, + ) + return "" + await self._log(f"[🤖 LLM Call] Model: {model}", event_call=__event_call__) # Build payload