125 lines
4.3 KiB
Python
125 lines
4.3 KiB
Python
|
|
import asyncio
|
||
|
|
import os
|
||
|
|
import json
|
||
|
|
import sys
|
||
|
|
from copilot import CopilotClient, define_tool
|
||
|
|
from copilot.types import SessionConfig
|
||
|
|
from pydantic import BaseModel, Field
|
||
|
|
|
||
|
|
|
||
|
|
# Define a simple tool for testing
|
||
|
|
class RandomNumberParams(BaseModel):
|
||
|
|
min: int = Field(description="Minimum value")
|
||
|
|
max: int = Field(description="Maximum value")
|
||
|
|
|
||
|
|
|
||
|
|
@define_tool(description="Generate a random integer within a range.")
|
||
|
|
async def generate_random_number(params: RandomNumberParams) -> str:
|
||
|
|
import random
|
||
|
|
|
||
|
|
return f"Result: {random.randint(params.min, params.max)}"
|
||
|
|
|
||
|
|
|
||
|
|
async def main():
|
||
|
|
print(f"Running tests with Python: {sys.executable}")
|
||
|
|
|
||
|
|
# 1. Setup Client
|
||
|
|
client = CopilotClient({"log_level": "error"})
|
||
|
|
await client.start()
|
||
|
|
|
||
|
|
try:
|
||
|
|
print("\n=== Test 1: Session Creation & Formatting Injection ===")
|
||
|
|
# Use gpt-4o or similar capable model
|
||
|
|
model_id = "gpt-5-mini"
|
||
|
|
|
||
|
|
system_message_config = {
|
||
|
|
"mode": "append",
|
||
|
|
"content": "You are a test assistant. Always start your response with 'TEST_PREFIX: '.",
|
||
|
|
}
|
||
|
|
|
||
|
|
session_config = SessionConfig(
|
||
|
|
model=model_id,
|
||
|
|
system_message=system_message_config,
|
||
|
|
tools=[generate_random_number],
|
||
|
|
)
|
||
|
|
|
||
|
|
session = await client.create_session(config=session_config)
|
||
|
|
session_id = session.session_id
|
||
|
|
print(f"Session Created: {session_id}")
|
||
|
|
|
||
|
|
# Test 1.1: Check system prompt effect
|
||
|
|
resp = await session.send_and_wait(
|
||
|
|
{"prompt": "Say hello.", "mode": "immediate"}
|
||
|
|
)
|
||
|
|
content = resp.data.content
|
||
|
|
print(f"Response 1: {content}")
|
||
|
|
|
||
|
|
if "TEST_PREFIX:" in content:
|
||
|
|
print("✅ System prompt injection active.")
|
||
|
|
else:
|
||
|
|
print("⚠️ System prompt injection NOT detected.")
|
||
|
|
|
||
|
|
print("\n=== Test 2: Tool Execution ===")
|
||
|
|
# Test Tool Usage
|
||
|
|
prompt_with_tool = (
|
||
|
|
"Generate a random number between 100 and 200 using the tool."
|
||
|
|
)
|
||
|
|
print(f"Sending: {prompt_with_tool}")
|
||
|
|
|
||
|
|
# We need to listen to events to verify tool execution,
|
||
|
|
# but send_and_wait handles it internally and returns the final answer.
|
||
|
|
# We check if the final answer mentions the result.
|
||
|
|
|
||
|
|
resp_tool = await session.send_and_wait(
|
||
|
|
{"prompt": prompt_with_tool, "mode": "immediate"}
|
||
|
|
)
|
||
|
|
tool_content = resp_tool.data.content
|
||
|
|
print(f"Response 2: {tool_content}")
|
||
|
|
|
||
|
|
if "Result:" in tool_content or any(char.isdigit() for char in tool_content):
|
||
|
|
print("✅ Tool likely executed (numbers found).")
|
||
|
|
else:
|
||
|
|
print("⚠️ Tool execution uncertain.")
|
||
|
|
|
||
|
|
print("\n=== Test 3: Context Retention (Memory) ===")
|
||
|
|
# Store a fact
|
||
|
|
await session.send_and_wait(
|
||
|
|
{"prompt": "My secret code is 'BLUE-42'. Remember it.", "mode": "immediate"}
|
||
|
|
)
|
||
|
|
print("Fact sent.")
|
||
|
|
|
||
|
|
# Retrieve it
|
||
|
|
resp_mem = await session.send_and_wait(
|
||
|
|
{"prompt": "What is my secret code?", "mode": "immediate"}
|
||
|
|
)
|
||
|
|
mem_content = resp_mem.data.content
|
||
|
|
print(f"Response 3: {mem_content}")
|
||
|
|
|
||
|
|
if "BLUE-42" in mem_content:
|
||
|
|
print("✅ Context retention successful.")
|
||
|
|
else:
|
||
|
|
print("⚠️ Context retention failed.")
|
||
|
|
|
||
|
|
# Cleanup
|
||
|
|
await session.destroy()
|
||
|
|
|
||
|
|
print("\n=== Test 4: Resume Session (Simulation) ===")
|
||
|
|
# Note: Actual resuming depends on backend persistence.
|
||
|
|
# The SDK's client.resume_session(id) tries to find it.
|
||
|
|
# Since we destroyed it above, we expect failure or new session logic in real app.
|
||
|
|
# But let's create a new one to persist, close client, and try to resume if process was same?
|
||
|
|
# Actually persistence usually requires the Copilot Agent/Extension host to keep state or file backed.
|
||
|
|
# The Python SDK defaults to file-based workspace in standard generic usage?
|
||
|
|
# Let's just skip complex resume testing for this simple script as it depends on environment (vscode-chat-session vs file).
|
||
|
|
print("Skipping complex resume test in script.")
|
||
|
|
|
||
|
|
except Exception as e:
|
||
|
|
print(f"Test Failed: {e}")
|
||
|
|
finally:
|
||
|
|
await client.stop()
|
||
|
|
print("\nTests Completed.")
|
||
|
|
|
||
|
|
|
||
|
|
if __name__ == "__main__":
|
||
|
|
asyncio.run(main())
|