feat(copilot): Release v0.5.1 - Smarter BYOK, Tool Caching & Refined Docs

This commit is contained in:
fujie
2026-02-08 08:21:32 +08:00
parent de9948a5b0
commit 409d2f663f
9 changed files with 2625 additions and 1167 deletions

View File

@@ -0,0 +1,424 @@
#!/usr/bin/env python3
"""
Copilot SDK System Message Test Script
Tests whether system_message is properly applied during session.resume
This script verifies the bug hypothesis:
- session.resume with system_message config may not reliably update the system prompt
Test scenarios:
1. Create a new session with a custom system message
2. Resume the same session with a DIFFERENT system message
3. Ask the model to describe its current system instructions
Requirements:
- github-copilot-sdk>=0.1.23
"""
import asyncio
import os
import sys
import time
from copilot import CopilotClient
from copilot.types import SessionConfig
from copilot.generated.session_events import SessionEventType
# Test system messages
SYSTEM_MSG_A = """You are a helpful assistant named "ALPHA".
When asked about your name or identity, you MUST respond: "I am ALPHA, the first assistant."
Always start your responses with "[ALPHA]:" prefix.
"""
SYSTEM_MSG_B = """You are a helpful assistant named "BETA".
When asked about your name or identity, you MUST respond: "I am BETA, the second assistant."
Always start your responses with "[BETA]:" prefix.
"""
async def send_and_get_response(session, prompt: str) -> str:
"""Send a message and collect the full response using event subscription."""
full_response = ""
response_complete = asyncio.Event()
def event_handler(event):
nonlocal full_response
if event.type == SessionEventType.ASSISTANT_MESSAGE_DELTA:
delta = getattr(event.data, "content", "") or ""
print(delta, end="", flush=True)
full_response += delta
elif event.type == SessionEventType.ASSISTANT_MESSAGE:
# Final complete message
content = getattr(event.data, "content", "") or ""
if content and not full_response:
full_response = content
print(content, end="", flush=True)
elif event.type == SessionEventType.SESSION_IDLE:
response_complete.set()
elif event.type == SessionEventType.ASSISTANT_TURN_END:
response_complete.set()
# Subscribe to events
unsubscribe = session.on(event_handler)
try:
# Send the message
await session.send({"prompt": prompt, "mode": "immediate"})
# Wait for completion (with timeout)
await asyncio.wait_for(response_complete.wait(), timeout=120)
print() # newline after completion
finally:
unsubscribe()
return full_response
async def test_new_session_system_message(client: CopilotClient):
"""Test 1: New session with system message A"""
print("\n" + "=" * 60)
print("TEST 1: New Session with System Message A (ALPHA)")
print("=" * 60)
session_config = SessionConfig(
session_id="test-session-001",
model="gpt-5-mini",
streaming=True,
system_message={
"mode": "replace",
"content": SYSTEM_MSG_A,
},
)
session = await client.create_session(config=session_config)
print(f"✅ Created new session: {session.session_id}")
print("\n📤 Asking: 'What is your name?'")
print("📥 Response: ", end="")
response = await send_and_get_response(session, "What is your name?")
if "ALPHA" in response:
print("✅ SUCCESS: Model correctly identified as ALPHA")
else:
print("⚠️ WARNING: Model did NOT identify as ALPHA")
return session
async def test_resume_session_with_new_system_message(
client: CopilotClient, session_id: str
):
"""Test 2: Resume session with DIFFERENT system message B"""
print("\n" + "=" * 60)
print("TEST 2: Resume Session with System Message B (BETA)")
print("=" * 60)
resume_config = {
"model": "gpt-5-mini",
"streaming": True,
"system_message": {
"mode": "replace",
"content": SYSTEM_MSG_B,
},
}
print(f"📋 Resume config includes system_message with mode='replace'")
print(f"📋 New system_message content: BETA identity")
session = await client.resume_session(session_id, resume_config)
print(f"✅ Resumed session: {session.session_id}")
print("\n📤 Asking: 'What is your name now? Did your identity change?'")
print("📥 Response: ", end="")
response = await send_and_get_response(
session, "What is your name now? Did your identity change?"
)
if "BETA" in response:
print("✅ SUCCESS: System message was updated to BETA")
return True
elif "ALPHA" in response:
print("❌ BUG CONFIRMED: System message was NOT updated (still ALPHA)")
return False
else:
print("⚠️ INCONCLUSIVE: Model response doesn't clearly indicate identity")
return None
async def test_resume_without_system_message(client: CopilotClient, session_id: str):
"""Test 3: Resume session without specifying system_message"""
print("\n" + "=" * 60)
print("TEST 3: Resume Session WITHOUT System Message")
print("=" * 60)
resume_config = {
"model": "gpt-4o",
"streaming": True,
# No system_message specified
}
session = await client.resume_session(session_id, resume_config)
print(f"✅ Resumed session: {session.session_id}")
print("\n📤 Asking: 'What is your name? Tell me your current identity.'")
print("📥 Response: ", end="")
response = await send_and_get_response(
session, "What is your name? Tell me your current identity."
)
if "ALPHA" in response:
print(
" Without system_message: Model still remembers ALPHA from original session"
)
elif "BETA" in response:
print(" Without system_message: Model remembers BETA from Test 2")
else:
print(" Model identity unclear")
async def main():
print("=" * 60)
print("🧪 Copilot SDK System Message Resume Test")
print("=" * 60)
print(f"Time: {time.strftime('%Y-%m-%d %H:%M:%S')}")
print(f"Testing with SDK from: {CopilotClient.__module__}")
# Create client with explicit CLI path if provided
cli_path = os.environ.get("COPILOT_CLI_PATH")
client_config = {"log_level": "info"}
if cli_path:
client_config["cli_path"] = cli_path
client = CopilotClient(client_config)
try:
await client.start()
print("✅ Client started successfully")
# Test 1: Create new session with system message A
session = await test_new_session_system_message(client)
session_id = session.session_id
# Wait a bit before resuming
print("\n⏳ Waiting 2 seconds before resume test...")
await asyncio.sleep(2)
# Test 2: Resume with different system message B
bug_confirmed = await test_resume_session_with_new_system_message(
client, session_id
)
# Test 3: Resume without system message
await test_resume_without_system_message(client, session_id)
# Summary
print("\n" + "=" * 60)
print("📊 TEST SUMMARY (Native Copilot)")
print("=" * 60)
if bug_confirmed is False:
print(
"❌ BUG CONFIRMED: session.resume does NOT apply system_message updates"
)
print(" The system message from create_session persists even when")
print(" resume_session specifies a different system_message.")
print("\n WORKAROUND: Inject system context into user prompt instead.")
elif bug_confirmed is True:
print("✅ NO BUG: session.resume correctly updates system_message")
else:
print("⚠️ INCONCLUSIVE: Could not determine if bug exists")
except Exception as e:
print(f"❌ Error: {e}")
import traceback
traceback.print_exc()
finally:
await client.stop()
print("\n✅ Client stopped")
# =============================================================================
# BYOK OpenAI Test
# =============================================================================
async def test_byok_new_session(client: CopilotClient, provider_config: dict):
"""BYOK Test 1: New session with BYOK provider and system message A"""
print("\n" + "=" * 60)
print("BYOK TEST 1: New Session with BYOK Provider + System Message A (ALPHA)")
print("=" * 60)
print(
f"📋 Provider: {provider_config.get('type')} @ {provider_config.get('base_url')}"
)
session_config = SessionConfig(
session_id="byok-test-session-001",
model="gpt-4o", # or your model name
streaming=True,
provider=provider_config,
system_message={
"mode": "replace",
"content": SYSTEM_MSG_A,
},
)
session = await client.create_session(config=session_config)
print(f"✅ Created BYOK session: {session.session_id}")
print("\n📤 Asking: 'What is your name?'")
print("📥 Response: ", end="")
response = await send_and_get_response(session, "What is your name?")
if "ALPHA" in response:
print("✅ SUCCESS: Model correctly identified as ALPHA")
else:
print("⚠️ WARNING: Model did NOT identify as ALPHA")
return session
async def test_byok_resume_with_new_system_message(
client: CopilotClient, session_id: str, provider_config: dict
):
"""BYOK Test 2: Resume BYOK session with DIFFERENT system message B"""
print("\n" + "=" * 60)
print("BYOK TEST 2: Resume BYOK Session with System Message B (BETA)")
print("=" * 60)
resume_config = {
"model": "gpt-4o",
"streaming": True,
"provider": provider_config,
"system_message": {
"mode": "replace",
"content": SYSTEM_MSG_B,
},
}
print(f"📋 Resume config includes system_message with mode='replace'")
print(f"📋 New system_message content: BETA identity")
print(
f"📋 Provider: {provider_config.get('type')} @ {provider_config.get('base_url')}"
)
session = await client.resume_session(session_id, resume_config)
print(f"✅ Resumed BYOK session: {session.session_id}")
print("\n📤 Asking: 'What is your name now? Did your identity change?'")
print("📥 Response: ", end="")
response = await send_and_get_response(
session, "What is your name now? Did your identity change?"
)
if "BETA" in response:
print("✅ SUCCESS: System message was updated to BETA")
return True
elif "ALPHA" in response:
print("❌ BUG CONFIRMED: System message was NOT updated (still ALPHA)")
return False
else:
print("⚠️ INCONCLUSIVE: Model response doesn't clearly indicate identity")
return None
async def main_byok():
"""Run BYOK-specific tests"""
print("=" * 60)
print("🧪 Copilot SDK BYOK System Message Resume Test")
print("=" * 60)
print(f"Time: {time.strftime('%Y-%m-%d %H:%M:%S')}")
# Get BYOK configuration from environment
byok_api_key = os.environ.get("BYOK_API_KEY") or os.environ.get("OPENAI_API_KEY")
byok_base_url = os.environ.get("BYOK_BASE_URL", "https://api.openai.com/v1")
byok_model = os.environ.get("BYOK_MODEL", "gpt-4o")
if not byok_api_key:
print(
"❌ Error: Please set BYOK_API_KEY or OPENAI_API_KEY environment variable"
)
print(" export BYOK_API_KEY='your_api_key'")
print(" export BYOK_BASE_URL='https://api.openai.com/v1' # optional")
print(" export BYOK_MODEL='gpt-4o' # optional")
return
provider_config = {
"type": "openai",
"base_url": byok_base_url,
"api_key": byok_api_key,
}
print(f"📋 BYOK Provider: openai @ {byok_base_url}")
print(f"📋 BYOK Model: {byok_model}")
# Create client
cli_path = os.environ.get("COPILOT_CLI_PATH")
client_config = {"log_level": "info"}
if cli_path:
client_config["cli_path"] = cli_path
client = CopilotClient(client_config)
try:
await client.start()
print("✅ Client started successfully")
# BYOK Test 1: Create new session with BYOK provider
session = await test_byok_new_session(client, provider_config)
session_id = session.session_id
# Wait a bit before resuming
print("\n⏳ Waiting 2 seconds before resume test...")
await asyncio.sleep(2)
# BYOK Test 2: Resume with different system message B
bug_confirmed = await test_byok_resume_with_new_system_message(
client, session_id, provider_config
)
# Summary
print("\n" + "=" * 60)
print("📊 BYOK TEST SUMMARY")
print("=" * 60)
if bug_confirmed is False:
print(
"❌ BYOK BUG CONFIRMED: session.resume does NOT apply system_message updates"
)
print(" In BYOK mode, the system message from create_session persists")
print(" even when resume_session specifies a different system_message.")
print("\n WORKAROUND: Inject system context into user prompt instead.")
elif bug_confirmed is True:
print("✅ BYOK NO BUG: session.resume correctly updates system_message")
else:
print("⚠️ BYOK INCONCLUSIVE: Could not determine if bug exists")
except Exception as e:
print(f"❌ Error: {e}")
import traceback
traceback.print_exc()
finally:
await client.stop()
print("\n✅ Client stopped")
if __name__ == "__main__":
import argparse
parser = argparse.ArgumentParser(
description="Copilot SDK System Message Resume Test"
)
parser.add_argument(
"--byok",
action="store_true",
help="Run BYOK (Bring Your Own Key) test instead of native Copilot test",
)
args = parser.parse_args()
if args.byok:
print("Running BYOK test mode...")
asyncio.run(main_byok())
else:
print("Running native Copilot test mode...")
print("(Use --byok flag for BYOK provider test)")
asyncio.run(main())