feat(async-context-compression): upgrade summary prompt to Working Memory architecture
- Redefine summary task as 'Working Memory' generation for higher density - Add explicit instructions to extract facts from raw JSON tool outputs - Implement 'Incremental Integration' rule to prevent recursive summary degradation - Enforce strict Markdown state structure (Goal, Facts, Code, Pending)
This commit is contained in:
@@ -1516,27 +1516,31 @@ class Filter:
|
|||||||
"index": index,
|
"index": index,
|
||||||
"role": message.get("role", "unknown"),
|
"role": message.get("role", "unknown"),
|
||||||
"has_tool_calls": bool(isinstance(tool_calls, list) and tool_calls),
|
"has_tool_calls": bool(isinstance(tool_calls, list) and tool_calls),
|
||||||
"tool_call_count": len(tool_calls)
|
"tool_call_count": (
|
||||||
if isinstance(tool_calls, list)
|
len(tool_calls) if isinstance(tool_calls, list) else 0
|
||||||
else 0,
|
),
|
||||||
"tool_call_id_lengths": [
|
"tool_call_id_lengths": (
|
||||||
len(str(tc.get("id", "")))
|
[
|
||||||
for tc in tool_calls[:3]
|
len(str(tc.get("id", "")))
|
||||||
if isinstance(tc, dict)
|
for tc in tool_calls[:3]
|
||||||
]
|
if isinstance(tc, dict)
|
||||||
if isinstance(tool_calls, list)
|
]
|
||||||
else [],
|
if isinstance(tool_calls, list)
|
||||||
|
else []
|
||||||
|
),
|
||||||
"has_tool_call_id": isinstance(message.get("tool_call_id"), str),
|
"has_tool_call_id": isinstance(message.get("tool_call_id"), str),
|
||||||
"tool_call_id_length": len(str(message.get("tool_call_id", "")))
|
"tool_call_id_length": (
|
||||||
if isinstance(message.get("tool_call_id"), str)
|
len(str(message.get("tool_call_id", "")))
|
||||||
else 0,
|
if isinstance(message.get("tool_call_id"), str)
|
||||||
|
else 0
|
||||||
|
),
|
||||||
"content_type": type(content).__name__,
|
"content_type": type(content).__name__,
|
||||||
"content_length": len(content) if isinstance(content, str) else 0,
|
"content_length": len(content) if isinstance(content, str) else 0,
|
||||||
"has_tool_details_block": isinstance(content, str)
|
"has_tool_details_block": isinstance(content, str)
|
||||||
and '<details type="tool_calls"' in content,
|
and '<details type="tool_calls"' in content,
|
||||||
"metadata_keys": sorted(metadata.keys())[:8]
|
"metadata_keys": (
|
||||||
if isinstance(metadata, dict)
|
sorted(metadata.keys())[:8] if isinstance(metadata, dict) else []
|
||||||
else [],
|
),
|
||||||
}
|
}
|
||||||
|
|
||||||
if isinstance(content, list):
|
if isinstance(content, list):
|
||||||
@@ -1585,14 +1589,16 @@ class Filter:
|
|||||||
|
|
||||||
return {
|
return {
|
||||||
"body_keys": sorted(body.keys()),
|
"body_keys": sorted(body.keys()),
|
||||||
"metadata_keys": sorted(metadata.keys()) if isinstance(metadata, dict) else [],
|
"metadata_keys": (
|
||||||
|
sorted(metadata.keys()) if isinstance(metadata, dict) else []
|
||||||
|
),
|
||||||
"params_keys": sorted(params.keys()) if isinstance(params, dict) else [],
|
"params_keys": sorted(params.keys()) if isinstance(params, dict) else [],
|
||||||
"metadata_function_calling": metadata.get("function_calling")
|
"metadata_function_calling": (
|
||||||
if isinstance(metadata, dict)
|
metadata.get("function_calling") if isinstance(metadata, dict) else None
|
||||||
else None,
|
),
|
||||||
"params_function_calling": params.get("function_calling")
|
"params_function_calling": (
|
||||||
if isinstance(params, dict)
|
params.get("function_calling") if isinstance(params, dict) else None
|
||||||
else None,
|
),
|
||||||
"message_count": len(messages) if isinstance(messages, list) else 0,
|
"message_count": len(messages) if isinstance(messages, list) else 0,
|
||||||
"role_counts": role_counts,
|
"role_counts": role_counts,
|
||||||
"assistant_tool_call_indices": assistant_tool_call_indices[:8],
|
"assistant_tool_call_indices": assistant_tool_call_indices[:8],
|
||||||
@@ -1624,9 +1630,11 @@ class Filter:
|
|||||||
"id": message.get("id", ""),
|
"id": message.get("id", ""),
|
||||||
"parentId": message.get("parentId") or message.get("parent_id"),
|
"parentId": message.get("parentId") or message.get("parent_id"),
|
||||||
"tool_call_id": message.get("tool_call_id", ""),
|
"tool_call_id": message.get("tool_call_id", ""),
|
||||||
"tool_call_count": len(message.get("tool_calls", []))
|
"tool_call_count": (
|
||||||
if isinstance(message.get("tool_calls"), list)
|
len(message.get("tool_calls", []))
|
||||||
else 0,
|
if isinstance(message.get("tool_calls"), list)
|
||||||
|
else 0
|
||||||
|
),
|
||||||
"is_summary": self._is_summary_message(message),
|
"is_summary": self._is_summary_message(message),
|
||||||
"content_length": len(content) if isinstance(content, str) else 0,
|
"content_length": len(content) if isinstance(content, str) else 0,
|
||||||
}
|
}
|
||||||
@@ -1647,9 +1655,11 @@ class Filter:
|
|||||||
"id": message.get("id", ""),
|
"id": message.get("id", ""),
|
||||||
"parentId": message.get("parentId") or message.get("parent_id"),
|
"parentId": message.get("parentId") or message.get("parent_id"),
|
||||||
"tool_call_id": message.get("tool_call_id", ""),
|
"tool_call_id": message.get("tool_call_id", ""),
|
||||||
"tool_call_count": len(message.get("tool_calls", []))
|
"tool_call_count": (
|
||||||
if isinstance(message.get("tool_calls"), list)
|
len(message.get("tool_calls", []))
|
||||||
else 0,
|
if isinstance(message.get("tool_calls"), list)
|
||||||
|
else 0
|
||||||
|
),
|
||||||
"is_summary": self._is_summary_message(message),
|
"is_summary": self._is_summary_message(message),
|
||||||
"content_length": len(content) if isinstance(content, str) else 0,
|
"content_length": len(content) if isinstance(content, str) else 0,
|
||||||
}
|
}
|
||||||
@@ -1659,7 +1669,9 @@ class Filter:
|
|||||||
"message_count": len(messages),
|
"message_count": len(messages),
|
||||||
"summary_state": summary_state,
|
"summary_state": summary_state,
|
||||||
"original_history_count": self._get_original_history_count(messages),
|
"original_history_count": self._get_original_history_count(messages),
|
||||||
"target_compressed_count": self._calculate_target_compressed_count(messages),
|
"target_compressed_count": self._calculate_target_compressed_count(
|
||||||
|
messages
|
||||||
|
),
|
||||||
"effective_keep_first": self._get_effective_keep_first(messages),
|
"effective_keep_first": self._get_effective_keep_first(messages),
|
||||||
"head_sample": sample,
|
"head_sample": sample,
|
||||||
"tail_sample": tail_sample,
|
"tail_sample": tail_sample,
|
||||||
@@ -1681,20 +1693,25 @@ class Filter:
|
|||||||
continue
|
continue
|
||||||
|
|
||||||
# If it's an assistant message with the hidden 'output' field, unfold it
|
# If it's an assistant message with the hidden 'output' field, unfold it
|
||||||
if msg.get("role") == "assistant" and isinstance(msg.get("output"), list) and msg.get("output"):
|
if (
|
||||||
|
msg.get("role") == "assistant"
|
||||||
|
and isinstance(msg.get("output"), list)
|
||||||
|
and msg.get("output")
|
||||||
|
):
|
||||||
try:
|
try:
|
||||||
from open_webui.utils.misc import convert_output_to_messages
|
from open_webui.utils.misc import convert_output_to_messages
|
||||||
|
|
||||||
expanded = convert_output_to_messages(msg["output"], raw=True)
|
expanded = convert_output_to_messages(msg["output"], raw=True)
|
||||||
if expanded:
|
if expanded:
|
||||||
unfolded.extend(expanded)
|
unfolded.extend(expanded)
|
||||||
continue
|
continue
|
||||||
except ImportError:
|
except ImportError:
|
||||||
pass # Fallback if for some reason the internal import fails
|
pass # Fallback if for some reason the internal import fails
|
||||||
|
|
||||||
# Clean message (strip 'output' field just like inlet does)
|
# Clean message (strip 'output' field just like inlet does)
|
||||||
clean_msg = {k: v for k, v in msg.items() if k != "output"}
|
clean_msg = {k: v for k, v in msg.items() if k != "output"}
|
||||||
unfolded.append(clean_msg)
|
unfolded.append(clean_msg)
|
||||||
|
|
||||||
return unfolded
|
return unfolded
|
||||||
|
|
||||||
def _get_function_calling_mode(self, body: dict) -> str:
|
def _get_function_calling_mode(self, body: dict) -> str:
|
||||||
@@ -1831,7 +1848,9 @@ class Filter:
|
|||||||
)
|
)
|
||||||
except ValueError as ve:
|
except ValueError as ve:
|
||||||
if "broadcast" in str(ve).lower():
|
if "broadcast" in str(ve).lower():
|
||||||
logger.debug("Cannot broadcast to frontend without explicit room; suppressing further frontend logs in this session.")
|
logger.debug(
|
||||||
|
"Cannot broadcast to frontend without explicit room; suppressing further frontend logs in this session."
|
||||||
|
)
|
||||||
self.valves.show_debug_log = False
|
self.valves.show_debug_log = False
|
||||||
else:
|
else:
|
||||||
logger.error(f"Failed to process log to frontend: ValueError: {ve}")
|
logger.error(f"Failed to process log to frontend: ValueError: {ve}")
|
||||||
@@ -2545,10 +2564,22 @@ class Filter:
|
|||||||
# In the outlet phase, the frontend payload often lacks the hidden 'output' field.
|
# In the outlet phase, the frontend payload often lacks the hidden 'output' field.
|
||||||
# We try to load the full, raw history from the database first.
|
# We try to load the full, raw history from the database first.
|
||||||
db_messages = self._load_full_chat_messages(chat_id)
|
db_messages = self._load_full_chat_messages(chat_id)
|
||||||
messages_to_unfold = db_messages if (db_messages and len(db_messages) >= len(messages)) else messages
|
messages_to_unfold = (
|
||||||
|
db_messages
|
||||||
|
if (db_messages and len(db_messages) >= len(messages))
|
||||||
|
else messages
|
||||||
|
)
|
||||||
|
|
||||||
summary_messages = self._unfold_messages(messages_to_unfold)
|
summary_messages = self._unfold_messages(messages_to_unfold)
|
||||||
message_source = "outlet-db-unfolded" if db_messages and len(summary_messages) != len(messages) else "outlet-body-unfolded" if len(summary_messages) != len(messages) else "outlet-body"
|
message_source = (
|
||||||
|
"outlet-db-unfolded"
|
||||||
|
if db_messages and len(summary_messages) != len(messages)
|
||||||
|
else (
|
||||||
|
"outlet-body-unfolded"
|
||||||
|
if len(summary_messages) != len(messages)
|
||||||
|
else "outlet-body"
|
||||||
|
)
|
||||||
|
)
|
||||||
|
|
||||||
if self.valves.show_debug_log and __event_call__:
|
if self.valves.show_debug_log and __event_call__:
|
||||||
source_progress = self._build_summary_progress_snapshot(summary_messages)
|
source_progress = self._build_summary_progress_snapshot(summary_messages)
|
||||||
@@ -3179,43 +3210,37 @@ class Filter:
|
|||||||
event_call=__event_call__,
|
event_call=__event_call__,
|
||||||
)
|
)
|
||||||
|
|
||||||
# Build summary prompt (Optimized)
|
# Build summary prompt (Optimized for State/Working Memory and Tool Calling)
|
||||||
summary_prompt = f"""
|
summary_prompt = f"""
|
||||||
You are a professional conversation context compression expert. Your task is to create a high-fidelity summary of the following conversation content.
|
You are an expert Context Compression Engine. Your goal is to create a high-fidelity, highly dense "Working Memory" from the provided conversation.
|
||||||
This conversation may contain previous summaries (as system messages or text) and subsequent conversation content.
|
This conversation may contain previous Working Memories and raw native tool-calling sequences (JSON arguments and results).
|
||||||
|
|
||||||
### Core Objectives
|
### Rules of Engagement
|
||||||
1. **Comprehensive Summary**: Concisely summarize key information, user intent, and assistant responses from the conversation.
|
1. **Incremental Integration**: If the conversation begins with an existing Working Memory/Summary, you must PRESERVE its core facts and MERGE the new conversation events into it. Do not discard older facts.
|
||||||
2. **De-noising**: Remove greetings, repetitions, confirmations, and other non-essential information.
|
2. **Tool-Call Decompression**: Raw JSON/Text outputs from tools are noisy. Extract ONLY the definitive facts, actionable data, or root causes of errors. Ignore the structural payload.
|
||||||
3. **Key Retention**:
|
3. **Ruthless Denoising**: Completely eliminate greetings, apologies ("I'm sorry for the error"), acknowledgments ("Sure, I can do that"), and redundant confirmations.
|
||||||
* **Code snippets, commands, and technical parameters must be preserved verbatim. Do not modify or generalize them.**
|
4. **Verbatim Retention**: ANY code snippets, shell commands, file paths, specific parameters, and Message IDs (e.g., [ID: ...]) MUST be kept exactly as they appear to maintain traceability.
|
||||||
* User intent, core requirements, decisions, and action items must be clearly preserved.
|
5. **Logic Preservation**: Clearly link "what the user asked" -> "what the tool found" -> "how the system reacted".
|
||||||
4. **Coherence**: The generated summary should be a cohesive whole that can replace the original conversation as context.
|
|
||||||
5. **Detailed Record**: Since length is permitted, please preserve details, reasoning processes, and nuances of multi-turn interactions as much as possible, rather than just high-level generalizations.
|
|
||||||
|
|
||||||
### Output Requirements
|
### Output Constraints
|
||||||
* **Format**: Structured text, logically clear.
|
* **Format**: Strictly follow the Markdown structure below.
|
||||||
* **Language**: Consistent with the conversation language (usually English).
|
* **Length**: Maximum {self.valves.max_summary_tokens} Tokens.
|
||||||
* **Length**: Strictly control within {self.valves.max_summary_tokens} Tokens.
|
* **Tone**: Robotic, objective, dense.
|
||||||
* **Strictly Forbidden**: Do not output "According to the conversation...", "The summary is as follows..." or similar filler. Output the summary content directly.
|
* **Language**: Consistent with the conversation language.
|
||||||
|
* **Forbidden**: NO conversational openings/closings (e.g., "Here is the summary", "Hope this helps"). Output the data directly.
|
||||||
|
|
||||||
### Suggested Summary Structure
|
### Suggested Summary Structure
|
||||||
* **Current Goal/Topic**: A one-sentence summary of the problem currently being solved.
|
* **Current Goal**: What is the user ultimately trying to achieve?
|
||||||
* **Key Information & Context**:
|
* **Working Memory & Facts**: (Bullet points of established facts, parsed tool results, and constraints. Cite Message IDs if critical).
|
||||||
* Confirmed facts/parameters.
|
* **Code & Artifacts**: (Only if applicable. Include exact code blocks).
|
||||||
* **Code/Technical Details** (Wrap in code blocks).
|
* **Recent Actions**: (e.g., "Attempted to run script, failed with SyntaxError, applied fix").
|
||||||
* **Progress & Conclusions**: Completed steps and reached consensus.
|
* **Pending/Next Steps**: What is waiting to be done.
|
||||||
* **Action Items/Next Steps**: Clear follow-up actions.
|
|
||||||
|
|
||||||
### Identity Traceability
|
|
||||||
The input dialogue contains message IDs (e.g., [ID: ...]) and optional names.
|
|
||||||
If a specific message contributes a critical decision, a unique code snippet, or a tool-calling result, please reference its ID or Name in your summary to maintain traceability.
|
|
||||||
|
|
||||||
---
|
---
|
||||||
{new_conversation_text}
|
{new_conversation_text}
|
||||||
---
|
---
|
||||||
|
|
||||||
Based on the content above, generate the summary (including key message identities where relevant):
|
Generate the Working Memory:
|
||||||
"""
|
"""
|
||||||
# Determine the model to use
|
# Determine the model to use
|
||||||
model = self._clean_model_id(self.valves.summary_model) or self._clean_model_id(
|
model = self._clean_model_id(self.valves.summary_model) or self._clean_model_id(
|
||||||
|
|||||||
Reference in New Issue
Block a user