feat(async-context-compression): upgrade summary prompt to Working Memory architecture

- Redefine summary task as 'Working Memory' generation for higher density
- Add explicit instructions to extract facts from raw JSON tool outputs
- Implement 'Incremental Integration' rule to prevent recursive summary degradation
- Enforce strict Markdown state structure (Goal, Facts, Code, Pending)
This commit is contained in:
fujie
2026-03-11 12:38:06 +08:00
parent cd95b5ff69
commit 5fe66a5803

View File

@@ -1516,27 +1516,31 @@ class Filter:
"index": index,
"role": message.get("role", "unknown"),
"has_tool_calls": bool(isinstance(tool_calls, list) and tool_calls),
"tool_call_count": len(tool_calls)
if isinstance(tool_calls, list)
else 0,
"tool_call_id_lengths": [
len(str(tc.get("id", "")))
for tc in tool_calls[:3]
if isinstance(tc, dict)
]
if isinstance(tool_calls, list)
else [],
"tool_call_count": (
len(tool_calls) if isinstance(tool_calls, list) else 0
),
"tool_call_id_lengths": (
[
len(str(tc.get("id", "")))
for tc in tool_calls[:3]
if isinstance(tc, dict)
]
if isinstance(tool_calls, list)
else []
),
"has_tool_call_id": isinstance(message.get("tool_call_id"), str),
"tool_call_id_length": len(str(message.get("tool_call_id", "")))
if isinstance(message.get("tool_call_id"), str)
else 0,
"tool_call_id_length": (
len(str(message.get("tool_call_id", "")))
if isinstance(message.get("tool_call_id"), str)
else 0
),
"content_type": type(content).__name__,
"content_length": len(content) if isinstance(content, str) else 0,
"has_tool_details_block": isinstance(content, str)
and '<details type="tool_calls"' in content,
"metadata_keys": sorted(metadata.keys())[:8]
if isinstance(metadata, dict)
else [],
"metadata_keys": (
sorted(metadata.keys())[:8] if isinstance(metadata, dict) else []
),
}
if isinstance(content, list):
@@ -1585,14 +1589,16 @@ class Filter:
return {
"body_keys": sorted(body.keys()),
"metadata_keys": sorted(metadata.keys()) if isinstance(metadata, dict) else [],
"metadata_keys": (
sorted(metadata.keys()) if isinstance(metadata, dict) else []
),
"params_keys": sorted(params.keys()) if isinstance(params, dict) else [],
"metadata_function_calling": metadata.get("function_calling")
if isinstance(metadata, dict)
else None,
"params_function_calling": params.get("function_calling")
if isinstance(params, dict)
else None,
"metadata_function_calling": (
metadata.get("function_calling") if isinstance(metadata, dict) else None
),
"params_function_calling": (
params.get("function_calling") if isinstance(params, dict) else None
),
"message_count": len(messages) if isinstance(messages, list) else 0,
"role_counts": role_counts,
"assistant_tool_call_indices": assistant_tool_call_indices[:8],
@@ -1624,9 +1630,11 @@ class Filter:
"id": message.get("id", ""),
"parentId": message.get("parentId") or message.get("parent_id"),
"tool_call_id": message.get("tool_call_id", ""),
"tool_call_count": len(message.get("tool_calls", []))
if isinstance(message.get("tool_calls"), list)
else 0,
"tool_call_count": (
len(message.get("tool_calls", []))
if isinstance(message.get("tool_calls"), list)
else 0
),
"is_summary": self._is_summary_message(message),
"content_length": len(content) if isinstance(content, str) else 0,
}
@@ -1647,9 +1655,11 @@ class Filter:
"id": message.get("id", ""),
"parentId": message.get("parentId") or message.get("parent_id"),
"tool_call_id": message.get("tool_call_id", ""),
"tool_call_count": len(message.get("tool_calls", []))
if isinstance(message.get("tool_calls"), list)
else 0,
"tool_call_count": (
len(message.get("tool_calls", []))
if isinstance(message.get("tool_calls"), list)
else 0
),
"is_summary": self._is_summary_message(message),
"content_length": len(content) if isinstance(content, str) else 0,
}
@@ -1659,7 +1669,9 @@ class Filter:
"message_count": len(messages),
"summary_state": summary_state,
"original_history_count": self._get_original_history_count(messages),
"target_compressed_count": self._calculate_target_compressed_count(messages),
"target_compressed_count": self._calculate_target_compressed_count(
messages
),
"effective_keep_first": self._get_effective_keep_first(messages),
"head_sample": sample,
"tail_sample": tail_sample,
@@ -1681,20 +1693,25 @@ class Filter:
continue
# If it's an assistant message with the hidden 'output' field, unfold it
if msg.get("role") == "assistant" and isinstance(msg.get("output"), list) and msg.get("output"):
if (
msg.get("role") == "assistant"
and isinstance(msg.get("output"), list)
and msg.get("output")
):
try:
from open_webui.utils.misc import convert_output_to_messages
expanded = convert_output_to_messages(msg["output"], raw=True)
if expanded:
unfolded.extend(expanded)
continue
except ImportError:
pass # Fallback if for some reason the internal import fails
pass # Fallback if for some reason the internal import fails
# Clean message (strip 'output' field just like inlet does)
clean_msg = {k: v for k, v in msg.items() if k != "output"}
unfolded.append(clean_msg)
return unfolded
def _get_function_calling_mode(self, body: dict) -> str:
@@ -1831,7 +1848,9 @@ class Filter:
)
except ValueError as ve:
if "broadcast" in str(ve).lower():
logger.debug("Cannot broadcast to frontend without explicit room; suppressing further frontend logs in this session.")
logger.debug(
"Cannot broadcast to frontend without explicit room; suppressing further frontend logs in this session."
)
self.valves.show_debug_log = False
else:
logger.error(f"Failed to process log to frontend: ValueError: {ve}")
@@ -2545,10 +2564,22 @@ class Filter:
# In the outlet phase, the frontend payload often lacks the hidden 'output' field.
# We try to load the full, raw history from the database first.
db_messages = self._load_full_chat_messages(chat_id)
messages_to_unfold = db_messages if (db_messages and len(db_messages) >= len(messages)) else messages
messages_to_unfold = (
db_messages
if (db_messages and len(db_messages) >= len(messages))
else messages
)
summary_messages = self._unfold_messages(messages_to_unfold)
message_source = "outlet-db-unfolded" if db_messages and len(summary_messages) != len(messages) else "outlet-body-unfolded" if len(summary_messages) != len(messages) else "outlet-body"
message_source = (
"outlet-db-unfolded"
if db_messages and len(summary_messages) != len(messages)
else (
"outlet-body-unfolded"
if len(summary_messages) != len(messages)
else "outlet-body"
)
)
if self.valves.show_debug_log and __event_call__:
source_progress = self._build_summary_progress_snapshot(summary_messages)
@@ -3179,43 +3210,37 @@ class Filter:
event_call=__event_call__,
)
# Build summary prompt (Optimized)
# Build summary prompt (Optimized for State/Working Memory and Tool Calling)
summary_prompt = f"""
You are a professional conversation context compression expert. Your task is to create a high-fidelity summary of the following conversation content.
This conversation may contain previous summaries (as system messages or text) and subsequent conversation content.
You are an expert Context Compression Engine. Your goal is to create a high-fidelity, highly dense "Working Memory" from the provided conversation.
This conversation may contain previous Working Memories and raw native tool-calling sequences (JSON arguments and results).
### Core Objectives
1. **Comprehensive Summary**: Concisely summarize key information, user intent, and assistant responses from the conversation.
2. **De-noising**: Remove greetings, repetitions, confirmations, and other non-essential information.
3. **Key Retention**:
* **Code snippets, commands, and technical parameters must be preserved verbatim. Do not modify or generalize them.**
* User intent, core requirements, decisions, and action items must be clearly preserved.
4. **Coherence**: The generated summary should be a cohesive whole that can replace the original conversation as context.
5. **Detailed Record**: Since length is permitted, please preserve details, reasoning processes, and nuances of multi-turn interactions as much as possible, rather than just high-level generalizations.
### Rules of Engagement
1. **Incremental Integration**: If the conversation begins with an existing Working Memory/Summary, you must PRESERVE its core facts and MERGE the new conversation events into it. Do not discard older facts.
2. **Tool-Call Decompression**: Raw JSON/Text outputs from tools are noisy. Extract ONLY the definitive facts, actionable data, or root causes of errors. Ignore the structural payload.
3. **Ruthless Denoising**: Completely eliminate greetings, apologies ("I'm sorry for the error"), acknowledgments ("Sure, I can do that"), and redundant confirmations.
4. **Verbatim Retention**: ANY code snippets, shell commands, file paths, specific parameters, and Message IDs (e.g., [ID: ...]) MUST be kept exactly as they appear to maintain traceability.
5. **Logic Preservation**: Clearly link "what the user asked" -> "what the tool found" -> "how the system reacted".
### Output Requirements
* **Format**: Structured text, logically clear.
* **Language**: Consistent with the conversation language (usually English).
* **Length**: Strictly control within {self.valves.max_summary_tokens} Tokens.
* **Strictly Forbidden**: Do not output "According to the conversation...", "The summary is as follows..." or similar filler. Output the summary content directly.
### Output Constraints
* **Format**: Strictly follow the Markdown structure below.
* **Length**: Maximum {self.valves.max_summary_tokens} Tokens.
* **Tone**: Robotic, objective, dense.
* **Language**: Consistent with the conversation language.
* **Forbidden**: NO conversational openings/closings (e.g., "Here is the summary", "Hope this helps"). Output the data directly.
### Suggested Summary Structure
* **Current Goal/Topic**: A one-sentence summary of the problem currently being solved.
* **Key Information & Context**:
* Confirmed facts/parameters.
* **Code/Technical Details** (Wrap in code blocks).
* **Progress & Conclusions**: Completed steps and reached consensus.
* **Action Items/Next Steps**: Clear follow-up actions.
### Identity Traceability
The input dialogue contains message IDs (e.g., [ID: ...]) and optional names.
If a specific message contributes a critical decision, a unique code snippet, or a tool-calling result, please reference its ID or Name in your summary to maintain traceability.
* **Current Goal**: What is the user ultimately trying to achieve?
* **Working Memory & Facts**: (Bullet points of established facts, parsed tool results, and constraints. Cite Message IDs if critical).
* **Code & Artifacts**: (Only if applicable. Include exact code blocks).
* **Recent Actions**: (e.g., "Attempted to run script, failed with SyntaxError, applied fix").
* **Pending/Next Steps**: What is waiting to be done.
---
{new_conversation_text}
---
Based on the content above, generate the summary (including key message identities where relevant):
Generate the Working Memory:
"""
# Determine the model to use
model = self._clean_model_id(self.valves.summary_model) or self._clean_model_id(