Files
Fu-Jie_openwebui-extensions/plugins/debug/github-copilot-sdk/test_capabilities.py

125 lines
4.3 KiB
Python

import asyncio
import os
import json
import sys
from copilot import CopilotClient, define_tool
from copilot.types import SessionConfig
from pydantic import BaseModel, Field
# Define a simple tool for testing
class RandomNumberParams(BaseModel):
min: int = Field(description="Minimum value")
max: int = Field(description="Maximum value")
@define_tool(description="Generate a random integer within a range.")
async def generate_random_number(params: RandomNumberParams) -> str:
import random
return f"Result: {random.randint(params.min, params.max)}"
async def main():
print(f"Running tests with Python: {sys.executable}")
# 1. Setup Client
client = CopilotClient({"log_level": "error"})
await client.start()
try:
print("\n=== Test 1: Session Creation & Formatting Injection ===")
# Use gpt-4o or similar capable model
model_id = "gpt-5-mini"
system_message_config = {
"mode": "append",
"content": "You are a test assistant. Always start your response with 'TEST_PREFIX: '.",
}
session_config = SessionConfig(
model=model_id,
system_message=system_message_config,
tools=[generate_random_number],
)
session = await client.create_session(config=session_config)
session_id = session.session_id
print(f"Session Created: {session_id}")
# Test 1.1: Check system prompt effect
resp = await session.send_and_wait(
{"prompt": "Say hello.", "mode": "immediate"}
)
content = resp.data.content
print(f"Response 1: {content}")
if "TEST_PREFIX:" in content:
print("✅ System prompt injection active.")
else:
print("⚠️ System prompt injection NOT detected.")
print("\n=== Test 2: Tool Execution ===")
# Test Tool Usage
prompt_with_tool = (
"Generate a random number between 100 and 200 using the tool."
)
print(f"Sending: {prompt_with_tool}")
# We need to listen to events to verify tool execution,
# but send_and_wait handles it internally and returns the final answer.
# We check if the final answer mentions the result.
resp_tool = await session.send_and_wait(
{"prompt": prompt_with_tool, "mode": "immediate"}
)
tool_content = resp_tool.data.content
print(f"Response 2: {tool_content}")
if "Result:" in tool_content or any(char.isdigit() for char in tool_content):
print("✅ Tool likely executed (numbers found).")
else:
print("⚠️ Tool execution uncertain.")
print("\n=== Test 3: Context Retention (Memory) ===")
# Store a fact
await session.send_and_wait(
{"prompt": "My secret code is 'BLUE-42'. Remember it.", "mode": "immediate"}
)
print("Fact sent.")
# Retrieve it
resp_mem = await session.send_and_wait(
{"prompt": "What is my secret code?", "mode": "immediate"}
)
mem_content = resp_mem.data.content
print(f"Response 3: {mem_content}")
if "BLUE-42" in mem_content:
print("✅ Context retention successful.")
else:
print("⚠️ Context retention failed.")
# Cleanup
await session.destroy()
print("\n=== Test 4: Resume Session (Simulation) ===")
# Note: Actual resuming depends on backend persistence.
# The SDK's client.resume_session(id) tries to find it.
# Since we destroyed it above, we expect failure or new session logic in real app.
# But let's create a new one to persist, close client, and try to resume if process was same?
# Actually persistence usually requires the Copilot Agent/Extension host to keep state or file backed.
# The Python SDK defaults to file-based workspace in standard generic usage?
# Let's just skip complex resume testing for this simple script as it depends on environment (vscode-chat-session vs file).
print("Skipping complex resume test in script.")
except Exception as e:
print(f"Test Failed: {e}")
finally:
await client.stop()
print("\nTests Completed.")
if __name__ == "__main__":
asyncio.run(main())