diff --git a/docs/plugins/filters/async-context-compression.md b/docs/plugins/filters/async-context-compression.md index 9dcb78c..7c93faf 100644 --- a/docs/plugins/filters/async-context-compression.md +++ b/docs/plugins/filters/async-context-compression.md @@ -1,7 +1,7 @@ # Async Context Compression Filter -v1.1.0 +v1.1.2 Reduces token consumption in long conversations through intelligent summarization while maintaining conversational coherence. @@ -29,6 +29,9 @@ This is especially useful for: - :material-clock-fast: **Async Processing**: Non-blocking background compression - :material-memory: **Context Preservation**: Keeps important information - :material-currency-usd-off: **Cost Reduction**: Minimize token usage +- :material-console: **Frontend Debugging**: Debug logs in browser console +- :material-alert-circle-check: **Enhanced Error Reporting**: Clear error status notifications +- :material-check-all: **Open WebUI v0.7.x Compatibility**: Dynamic DB session handling --- diff --git a/docs/plugins/filters/async-context-compression.zh.md b/docs/plugins/filters/async-context-compression.zh.md index c5600d0..2e20996 100644 --- a/docs/plugins/filters/async-context-compression.zh.md +++ b/docs/plugins/filters/async-context-compression.zh.md @@ -1,7 +1,7 @@ # Async Context Compression(异步上下文压缩) Filter -v1.1.0 +v1.1.2 通过智能摘要减少长对话的 token 消耗,同时保持对话连贯。 @@ -29,6 +29,9 @@ Async Context Compression 过滤器通过以下方式帮助管理长对话的 to - :material-clock-fast: **异步处理**:后台非阻塞压缩 - :material-memory: **保留上下文**:尽量保留重要信息 - :material-currency-usd-off: **降低成本**:减少 token 使用 +- :material-console: **前端调试**:支持浏览器控制台日志 +- :material-alert-circle-check: **增强错误报告**:清晰的错误状态通知 +- :material-check-all: **Open WebUI v0.7.x 兼容性**:动态数据库会话处理 --- diff --git a/docs/plugins/filters/index.md b/docs/plugins/filters/index.md index 669cdbf..bf56029 100644 --- a/docs/plugins/filters/index.md +++ b/docs/plugins/filters/index.md @@ -22,7 +22,7 @@ Filters act as middleware in the message pipeline: Reduces token consumption in long conversations through intelligent summarization while maintaining coherence. - **Version:** 1.1.0 + **Version:** 1.1.2 [:octicons-arrow-right-24: Documentation](async-context-compression.md) diff --git a/plugins/filters/async-context-compression/README.md b/plugins/filters/async-context-compression/README.md index bf33fd3..88d0607 100644 --- a/plugins/filters/async-context-compression/README.md +++ b/plugins/filters/async-context-compression/README.md @@ -1,9 +1,15 @@ # Async Context Compression Filter -**Author:** [Fu-Jie](https://github.com/Fu-Jie) | **Version:** 1.1.1 | **License:** MIT +**Author:** [Fu-Jie](https://github.com/Fu-Jie) | **Version:** 1.1.2 | **License:** MIT This filter reduces token consumption in long conversations through intelligent summarization and message compression while keeping conversations coherent. +## What's new in 1.1.2 + +- **Open WebUI v0.7.x Compatibility**: Resolved a critical database session binding error affecting Open WebUI v0.7.x users. The plugin now dynamically discovers the database engine and session context, ensuring compatibility across versions. +- **Enhanced Error Reporting**: Errors during background summary generation are now reported via both the status bar and browser console. +- **Robust Model Handling**: Improved handling of missing or invalid model IDs to prevent crashes. + ## What's new in 1.1.1 - **Frontend Debugging**: Added `show_debug_log` option to print debug info to the browser console (F12). diff --git a/plugins/filters/async-context-compression/README_CN.md b/plugins/filters/async-context-compression/README_CN.md index fa75cb5..58875ae 100644 --- a/plugins/filters/async-context-compression/README_CN.md +++ b/plugins/filters/async-context-compression/README_CN.md @@ -1,11 +1,17 @@ # 异步上下文压缩过滤器 -**作者:** [Fu-Jie](https://github.com/Fu-Jie) | **版本:** 1.1.1 | **许可证:** MIT +**作者:** [Fu-Jie](https://github.com/Fu-Jie) | **版本:** 1.1.2 | **许可证:** MIT > **重要提示**:为了确保所有过滤器的可维护性和易用性,每个过滤器都应附带清晰、完整的文档,以确保其功能、配置和使用方法得到充分说明。 本过滤器通过智能摘要和消息压缩技术,在保持对话连贯性的同时,显著降低长对话的 Token 消耗。 +## 1.1.2 版本更新 + +- **Open WebUI v0.7.x 兼容性**: 修复了影响 Open WebUI v0.7.x 用户的严重数据库会话绑定错误。插件现在动态发现数据库引擎和会话上下文,确保跨版本兼容性。 +- **增强错误报告**: 后台摘要生成过程中的错误现在会通过状态栏和浏览器控制台同时报告。 +- **健壮的模型处理**: 改进了对缺失或无效模型 ID 的处理,防止程序崩溃。 + ## 1.1.1 版本更新 - **前端调试**: 新增 `show_debug_log` 选项,支持在浏览器控制台 (F12) 打印调试信息。 diff --git a/plugins/filters/async-context-compression/async_context_compression.py b/plugins/filters/async-context-compression/async_context_compression.py index 87e902e..09355db 100644 --- a/plugins/filters/async-context-compression/async_context_compression.py +++ b/plugins/filters/async-context-compression/async_context_compression.py @@ -5,7 +5,7 @@ author: Fu-Jie author_url: https://github.com/Fu-Jie funding_url: https://github.com/Fu-Jie/awesome-openwebui description: Reduces token consumption in long conversations while maintaining coherence through intelligent summarization and message compression. -version: 1.1.1 +version: 1.1.2 openwebui_id: b1655bc8-6de9-4cad-8cb5-a6f7829a02ce license: MIT @@ -1002,6 +1002,13 @@ class Filter: event_call=__event_call__, ) + def _clean_model_id(self, model_id: Optional[str]) -> Optional[str]: + """Cleans the model ID by removing whitespace and quotes.""" + if not model_id: + return None + cleaned = model_id.strip().strip('"').strip("'") + return cleaned if cleaned else None + async def _generate_summary_async( self, messages: list, @@ -1058,11 +1065,13 @@ class Filter: # 3. Check Token limit and truncate (Max Context Truncation) # [Optimization] Use the summary model's (if any) threshold to decide how many middle messages can be processed # This allows using a long-window model (like gemini-flash) to compress history exceeding the current model's window - summary_model_id = self.valves.summary_model or body.get("model") + summary_model_id = self._clean_model_id( + self.valves.summary_model + ) or self._clean_model_id(body.get("model")) if not summary_model_id: await self._log( - "[🤖 Async Summary Task] ⚠️ Summary model is empty, skipping compression", + "[🤖 Async Summary Task] ⚠️ Summary model does not exist, skipping compression", type="warning", event_call=__event_call__, ) @@ -1135,7 +1144,7 @@ class Filter: "done": False, }, } - ) + ) new_summary = await self._call_summary_llm( None, @@ -1191,6 +1200,18 @@ class Filter: type="error", event_call=__event_call__, ) + + if __event_emitter__: + await __event_emitter__( + { + "type": "status", + "data": { + "description": f"Summary Error: {str(e)[:100]}...", + "done": True, + }, + } + ) + import traceback traceback.print_exc() @@ -1272,11 +1293,13 @@ This conversation may contain previous summaries (as system messages or text) an Based on the content above, generate the summary: """ # Determine the model to use - model = self.valves.summary_model or body.get("model", "") + model = self._clean_model_id(self.valves.summary_model) or self._clean_model_id( + body.get("model") + ) if not model: await self._log( - "[🤖 LLM Call] ⚠️ Model ID is empty, skipping summary generation", + "[🤖 LLM Call] ⚠️ Summary model does not exist, skipping summary generation", type="warning", event_call=__event_call__, ) @@ -1334,7 +1357,12 @@ Based on the content above, generate the summary: return summary except Exception as e: - error_message = f"Error occurred while calling LLM ({model}) to generate summary: {str(e)}" + error_msg = str(e) + # Handle specific error messages + if "Model not found" in error_msg: + error_message = f"Summary model '{model}' not found." + else: + error_message = f"Summary LLM Error ({model}): {error_msg}" if not self.valves.summary_model: error_message += ( "\n[Hint] You did not specify a summary_model, so the filter attempted to use the current conversation's model. " diff --git a/plugins/filters/async-context-compression/async_context_compression_cn.py b/plugins/filters/async-context-compression/async_context_compression_cn.py index 8d596ae..214c504 100644 --- a/plugins/filters/async-context-compression/async_context_compression_cn.py +++ b/plugins/filters/async-context-compression/async_context_compression_cn.py @@ -5,7 +5,7 @@ author: Fu-Jie author_url: https://github.com/Fu-Jie funding_url: https://github.com/Fu-Jie/awesome-openwebui description: 通过智能摘要和消息压缩,降低长对话的 token 消耗,同时保持对话连贯性。 -version: 1.1.1 +version: 1.1.2 openwebui_id: 5c0617cb-a9e4-4bd6-a440-d276534ebd18 license: MIT @@ -820,6 +820,13 @@ class Filter: event_call=__event_call__, ) + def _clean_model_id(self, model_id: Optional[str]) -> Optional[str]: + """Cleans the model ID by removing whitespace and quotes.""" + if not model_id: + return None + cleaned = model_id.strip().strip('"').strip("'") + return cleaned if cleaned else None + async def _generate_summary_async( self, messages: list, @@ -874,7 +881,17 @@ class Filter: # 3. 检查 Token 上限并截断 (Max Context Truncation) # [优化] 使用摘要模型(如果有)的阈值来决定能处理多少中间消息 # 这样可以用长窗口模型(如 gemini-flash)来压缩超过当前模型窗口的历史记录 - summary_model_id = self.valves.summary_model or body.get("model") + summary_model_id = self._clean_model_id( + self.valves.summary_model + ) or self._clean_model_id(body.get("model")) + + if not summary_model_id: + await self._log( + "[🤖 异步摘要任务] ⚠️ 摘要模型不存在,跳过压缩", + type="warning", + event_call=__event_call__, + ) + return thresholds = self._get_model_thresholds(summary_model_id) # 注意:这里使用的是摘要模型的最大上下文限制 @@ -946,9 +963,21 @@ class Filter: ) new_summary = await self._call_summary_llm( - None, conversation_text, body, user_data, __event_call__ + None, + conversation_text, + {**body, "model": summary_model_id}, + user_data, + __event_call__, ) + if not new_summary: + await self._log( + "[🤖 异步摘要任务] ⚠️ 摘要生成返回空结果,跳过保存", + type="warning", + event_call=__event_call__, + ) + return + # 6. 保存新摘要 await self._log( "[优化] 在后台线程中保存摘要以避免阻塞事件循环。", @@ -987,6 +1016,18 @@ class Filter: type="error", event_call=__event_call__, ) + + if __event_emitter__: + await __event_emitter__( + { + "type": "status", + "data": { + "description": f"摘要生成错误: {str(e)[:100]}...", + "done": True, + }, + } + ) + import traceback traceback.print_exc() @@ -1068,7 +1109,17 @@ class Filter: 请根据上述内容,生成摘要: """ # 确定使用的模型 - model = self.valves.summary_model or body.get("model", "") + model = self._clean_model_id(self.valves.summary_model) or self._clean_model_id( + body.get("model") + ) + + if not model: + await self._log( + "[🤖 LLM 调用] ⚠️ 摘要模型不存在,跳过摘要生成", + type="warning", + event_call=__event_call__, + ) + return "" await self._log(f"[🤖 LLM 调用] 模型: {model}", event_call=__event_call__) @@ -1122,7 +1173,12 @@ class Filter: return summary except Exception as e: - error_message = f"调用 LLM ({model}) 生成摘要时发生错误: {str(e)}" + error_msg = str(e) + # Handle specific error messages + if "Model not found" in error_msg: + error_message = f"摘要模型 '{model}' 不存在。" + else: + error_message = f"摘要 LLM 错误 ({model}): {error_msg}" if not self.valves.summary_model: error_message += ( "\n[提示] 您未指定 summary_model,因此过滤器尝试使用当前对话的模型。"