feat(async-context-compression): upgrade summary prompt to Working Memory architecture

- Redefine summary task as 'Working Memory' generation for higher density
- Add explicit instructions to extract facts from raw JSON tool outputs
- Implement 'Incremental Integration' rule to prevent recursive summary degradation
- Enforce strict Markdown state structure (Goal, Facts, Code, Pending)
This commit is contained in:
fujie
2026-03-11 12:38:06 +08:00
parent cd95b5ff69
commit 5fe66a5803

View File

@@ -1516,27 +1516,31 @@ class Filter:
"index": index, "index": index,
"role": message.get("role", "unknown"), "role": message.get("role", "unknown"),
"has_tool_calls": bool(isinstance(tool_calls, list) and tool_calls), "has_tool_calls": bool(isinstance(tool_calls, list) and tool_calls),
"tool_call_count": len(tool_calls) "tool_call_count": (
if isinstance(tool_calls, list) len(tool_calls) if isinstance(tool_calls, list) else 0
else 0, ),
"tool_call_id_lengths": [ "tool_call_id_lengths": (
len(str(tc.get("id", ""))) [
for tc in tool_calls[:3] len(str(tc.get("id", "")))
if isinstance(tc, dict) for tc in tool_calls[:3]
] if isinstance(tc, dict)
if isinstance(tool_calls, list) ]
else [], if isinstance(tool_calls, list)
else []
),
"has_tool_call_id": isinstance(message.get("tool_call_id"), str), "has_tool_call_id": isinstance(message.get("tool_call_id"), str),
"tool_call_id_length": len(str(message.get("tool_call_id", ""))) "tool_call_id_length": (
if isinstance(message.get("tool_call_id"), str) len(str(message.get("tool_call_id", "")))
else 0, if isinstance(message.get("tool_call_id"), str)
else 0
),
"content_type": type(content).__name__, "content_type": type(content).__name__,
"content_length": len(content) if isinstance(content, str) else 0, "content_length": len(content) if isinstance(content, str) else 0,
"has_tool_details_block": isinstance(content, str) "has_tool_details_block": isinstance(content, str)
and '<details type="tool_calls"' in content, and '<details type="tool_calls"' in content,
"metadata_keys": sorted(metadata.keys())[:8] "metadata_keys": (
if isinstance(metadata, dict) sorted(metadata.keys())[:8] if isinstance(metadata, dict) else []
else [], ),
} }
if isinstance(content, list): if isinstance(content, list):
@@ -1585,14 +1589,16 @@ class Filter:
return { return {
"body_keys": sorted(body.keys()), "body_keys": sorted(body.keys()),
"metadata_keys": sorted(metadata.keys()) if isinstance(metadata, dict) else [], "metadata_keys": (
sorted(metadata.keys()) if isinstance(metadata, dict) else []
),
"params_keys": sorted(params.keys()) if isinstance(params, dict) else [], "params_keys": sorted(params.keys()) if isinstance(params, dict) else [],
"metadata_function_calling": metadata.get("function_calling") "metadata_function_calling": (
if isinstance(metadata, dict) metadata.get("function_calling") if isinstance(metadata, dict) else None
else None, ),
"params_function_calling": params.get("function_calling") "params_function_calling": (
if isinstance(params, dict) params.get("function_calling") if isinstance(params, dict) else None
else None, ),
"message_count": len(messages) if isinstance(messages, list) else 0, "message_count": len(messages) if isinstance(messages, list) else 0,
"role_counts": role_counts, "role_counts": role_counts,
"assistant_tool_call_indices": assistant_tool_call_indices[:8], "assistant_tool_call_indices": assistant_tool_call_indices[:8],
@@ -1624,9 +1630,11 @@ class Filter:
"id": message.get("id", ""), "id": message.get("id", ""),
"parentId": message.get("parentId") or message.get("parent_id"), "parentId": message.get("parentId") or message.get("parent_id"),
"tool_call_id": message.get("tool_call_id", ""), "tool_call_id": message.get("tool_call_id", ""),
"tool_call_count": len(message.get("tool_calls", [])) "tool_call_count": (
if isinstance(message.get("tool_calls"), list) len(message.get("tool_calls", []))
else 0, if isinstance(message.get("tool_calls"), list)
else 0
),
"is_summary": self._is_summary_message(message), "is_summary": self._is_summary_message(message),
"content_length": len(content) if isinstance(content, str) else 0, "content_length": len(content) if isinstance(content, str) else 0,
} }
@@ -1647,9 +1655,11 @@ class Filter:
"id": message.get("id", ""), "id": message.get("id", ""),
"parentId": message.get("parentId") or message.get("parent_id"), "parentId": message.get("parentId") or message.get("parent_id"),
"tool_call_id": message.get("tool_call_id", ""), "tool_call_id": message.get("tool_call_id", ""),
"tool_call_count": len(message.get("tool_calls", [])) "tool_call_count": (
if isinstance(message.get("tool_calls"), list) len(message.get("tool_calls", []))
else 0, if isinstance(message.get("tool_calls"), list)
else 0
),
"is_summary": self._is_summary_message(message), "is_summary": self._is_summary_message(message),
"content_length": len(content) if isinstance(content, str) else 0, "content_length": len(content) if isinstance(content, str) else 0,
} }
@@ -1659,7 +1669,9 @@ class Filter:
"message_count": len(messages), "message_count": len(messages),
"summary_state": summary_state, "summary_state": summary_state,
"original_history_count": self._get_original_history_count(messages), "original_history_count": self._get_original_history_count(messages),
"target_compressed_count": self._calculate_target_compressed_count(messages), "target_compressed_count": self._calculate_target_compressed_count(
messages
),
"effective_keep_first": self._get_effective_keep_first(messages), "effective_keep_first": self._get_effective_keep_first(messages),
"head_sample": sample, "head_sample": sample,
"tail_sample": tail_sample, "tail_sample": tail_sample,
@@ -1681,20 +1693,25 @@ class Filter:
continue continue
# If it's an assistant message with the hidden 'output' field, unfold it # If it's an assistant message with the hidden 'output' field, unfold it
if msg.get("role") == "assistant" and isinstance(msg.get("output"), list) and msg.get("output"): if (
msg.get("role") == "assistant"
and isinstance(msg.get("output"), list)
and msg.get("output")
):
try: try:
from open_webui.utils.misc import convert_output_to_messages from open_webui.utils.misc import convert_output_to_messages
expanded = convert_output_to_messages(msg["output"], raw=True) expanded = convert_output_to_messages(msg["output"], raw=True)
if expanded: if expanded:
unfolded.extend(expanded) unfolded.extend(expanded)
continue continue
except ImportError: except ImportError:
pass # Fallback if for some reason the internal import fails pass # Fallback if for some reason the internal import fails
# Clean message (strip 'output' field just like inlet does) # Clean message (strip 'output' field just like inlet does)
clean_msg = {k: v for k, v in msg.items() if k != "output"} clean_msg = {k: v for k, v in msg.items() if k != "output"}
unfolded.append(clean_msg) unfolded.append(clean_msg)
return unfolded return unfolded
def _get_function_calling_mode(self, body: dict) -> str: def _get_function_calling_mode(self, body: dict) -> str:
@@ -1831,7 +1848,9 @@ class Filter:
) )
except ValueError as ve: except ValueError as ve:
if "broadcast" in str(ve).lower(): if "broadcast" in str(ve).lower():
logger.debug("Cannot broadcast to frontend without explicit room; suppressing further frontend logs in this session.") logger.debug(
"Cannot broadcast to frontend without explicit room; suppressing further frontend logs in this session."
)
self.valves.show_debug_log = False self.valves.show_debug_log = False
else: else:
logger.error(f"Failed to process log to frontend: ValueError: {ve}") logger.error(f"Failed to process log to frontend: ValueError: {ve}")
@@ -2545,10 +2564,22 @@ class Filter:
# In the outlet phase, the frontend payload often lacks the hidden 'output' field. # In the outlet phase, the frontend payload often lacks the hidden 'output' field.
# We try to load the full, raw history from the database first. # We try to load the full, raw history from the database first.
db_messages = self._load_full_chat_messages(chat_id) db_messages = self._load_full_chat_messages(chat_id)
messages_to_unfold = db_messages if (db_messages and len(db_messages) >= len(messages)) else messages messages_to_unfold = (
db_messages
if (db_messages and len(db_messages) >= len(messages))
else messages
)
summary_messages = self._unfold_messages(messages_to_unfold) summary_messages = self._unfold_messages(messages_to_unfold)
message_source = "outlet-db-unfolded" if db_messages and len(summary_messages) != len(messages) else "outlet-body-unfolded" if len(summary_messages) != len(messages) else "outlet-body" message_source = (
"outlet-db-unfolded"
if db_messages and len(summary_messages) != len(messages)
else (
"outlet-body-unfolded"
if len(summary_messages) != len(messages)
else "outlet-body"
)
)
if self.valves.show_debug_log and __event_call__: if self.valves.show_debug_log and __event_call__:
source_progress = self._build_summary_progress_snapshot(summary_messages) source_progress = self._build_summary_progress_snapshot(summary_messages)
@@ -3179,43 +3210,37 @@ class Filter:
event_call=__event_call__, event_call=__event_call__,
) )
# Build summary prompt (Optimized) # Build summary prompt (Optimized for State/Working Memory and Tool Calling)
summary_prompt = f""" summary_prompt = f"""
You are a professional conversation context compression expert. Your task is to create a high-fidelity summary of the following conversation content. You are an expert Context Compression Engine. Your goal is to create a high-fidelity, highly dense "Working Memory" from the provided conversation.
This conversation may contain previous summaries (as system messages or text) and subsequent conversation content. This conversation may contain previous Working Memories and raw native tool-calling sequences (JSON arguments and results).
### Core Objectives ### Rules of Engagement
1. **Comprehensive Summary**: Concisely summarize key information, user intent, and assistant responses from the conversation. 1. **Incremental Integration**: If the conversation begins with an existing Working Memory/Summary, you must PRESERVE its core facts and MERGE the new conversation events into it. Do not discard older facts.
2. **De-noising**: Remove greetings, repetitions, confirmations, and other non-essential information. 2. **Tool-Call Decompression**: Raw JSON/Text outputs from tools are noisy. Extract ONLY the definitive facts, actionable data, or root causes of errors. Ignore the structural payload.
3. **Key Retention**: 3. **Ruthless Denoising**: Completely eliminate greetings, apologies ("I'm sorry for the error"), acknowledgments ("Sure, I can do that"), and redundant confirmations.
* **Code snippets, commands, and technical parameters must be preserved verbatim. Do not modify or generalize them.** 4. **Verbatim Retention**: ANY code snippets, shell commands, file paths, specific parameters, and Message IDs (e.g., [ID: ...]) MUST be kept exactly as they appear to maintain traceability.
* User intent, core requirements, decisions, and action items must be clearly preserved. 5. **Logic Preservation**: Clearly link "what the user asked" -> "what the tool found" -> "how the system reacted".
4. **Coherence**: The generated summary should be a cohesive whole that can replace the original conversation as context.
5. **Detailed Record**: Since length is permitted, please preserve details, reasoning processes, and nuances of multi-turn interactions as much as possible, rather than just high-level generalizations.
### Output Requirements ### Output Constraints
* **Format**: Structured text, logically clear. * **Format**: Strictly follow the Markdown structure below.
* **Language**: Consistent with the conversation language (usually English). * **Length**: Maximum {self.valves.max_summary_tokens} Tokens.
* **Length**: Strictly control within {self.valves.max_summary_tokens} Tokens. * **Tone**: Robotic, objective, dense.
* **Strictly Forbidden**: Do not output "According to the conversation...", "The summary is as follows..." or similar filler. Output the summary content directly. * **Language**: Consistent with the conversation language.
* **Forbidden**: NO conversational openings/closings (e.g., "Here is the summary", "Hope this helps"). Output the data directly.
### Suggested Summary Structure ### Suggested Summary Structure
* **Current Goal/Topic**: A one-sentence summary of the problem currently being solved. * **Current Goal**: What is the user ultimately trying to achieve?
* **Key Information & Context**: * **Working Memory & Facts**: (Bullet points of established facts, parsed tool results, and constraints. Cite Message IDs if critical).
* Confirmed facts/parameters. * **Code & Artifacts**: (Only if applicable. Include exact code blocks).
* **Code/Technical Details** (Wrap in code blocks). * **Recent Actions**: (e.g., "Attempted to run script, failed with SyntaxError, applied fix").
* **Progress & Conclusions**: Completed steps and reached consensus. * **Pending/Next Steps**: What is waiting to be done.
* **Action Items/Next Steps**: Clear follow-up actions.
### Identity Traceability
The input dialogue contains message IDs (e.g., [ID: ...]) and optional names.
If a specific message contributes a critical decision, a unique code snippet, or a tool-calling result, please reference its ID or Name in your summary to maintain traceability.
--- ---
{new_conversation_text} {new_conversation_text}
--- ---
Based on the content above, generate the summary (including key message identities where relevant): Generate the Working Memory:
""" """
# Determine the model to use # Determine the model to use
model = self._clean_model_id(self.valves.summary_model) or self._clean_model_id( model = self._clean_model_id(self.valves.summary_model) or self._clean_model_id(