Fu-Jie_openwebui-extensions/plugins/debug/github-copilot-sdk/test_capabilities.py

import asyncio
import os
import json
import sys
from copilot import CopilotClient, define_tool
from copilot.types import SessionConfig
from pydantic import BaseModel, Field


# Define a simple tool for testing
class RandomNumberParams(BaseModel):
    min: int = Field(description="Minimum value")
    max: int = Field(description="Maximum value")


@define_tool(description="Generate a random integer within a range.")
async def generate_random_number(params: RandomNumberParams) -> str:
    import random

    return f"Result: {random.randint(params.min, params.max)}"


async def main():
    print(f"Running tests with Python: {sys.executable}")

    # 1. Setup Client
    client = CopilotClient({"log_level": "error"})
    await client.start()

    try:
        print("\n=== Test 1: Session Creation & Formatting Injection ===")
        # Use gpt-4o or similar capable model
        model_id = "gpt-5-mini"

        system_message_config = {
            "mode": "append",
            "content": "You are a test assistant. Always start your response with 'TEST_PREFIX: '.",
        }

        session_config = SessionConfig(
            model=model_id,
            system_message=system_message_config,
            tools=[generate_random_number],
        )

        session = await client.create_session(config=session_config)
        session_id = session.session_id
        print(f"Session Created: {session_id}")

        # Test 1.1: Check system prompt effect
        resp = await session.send_and_wait(
            {"prompt": "Say hello.", "mode": "immediate"}
        )
        content = resp.data.content
        print(f"Response 1: {content}")

        if "TEST_PREFIX:" in content:
            print("✅ System prompt injection active.")
        else:
            print("⚠️ System prompt injection NOT detected.")

        print("\n=== Test 2: Tool Execution ===")
        # Test Tool Usage
        prompt_with_tool = (
            "Generate a random number between 100 and 200 using the tool."
        )
        print(f"Sending: {prompt_with_tool}")

        # We need to listen to events to verify tool execution,
        # but send_and_wait handles it internally and returns the final answer.
        # We check if the final answer mentions the result.

        resp_tool = await session.send_and_wait(
            {"prompt": prompt_with_tool, "mode": "immediate"}
        )
        tool_content = resp_tool.data.content
        print(f"Response 2: {tool_content}")

        if "Result:" in tool_content or any(char.isdigit() for char in tool_content):
            print("✅ Tool likely executed (numbers found).")
        else:
            print("⚠️ Tool execution uncertain.")

        print("\n=== Test 3: Context Retention (Memory) ===")
        # Store a fact
        await session.send_and_wait(
            {"prompt": "My secret code is 'BLUE-42'. Remember it.", "mode": "immediate"}
        )
        print("Fact sent.")

        # Retrieve it
        resp_mem = await session.send_and_wait(
            {"prompt": "What is my secret code?", "mode": "immediate"}
        )
        mem_content = resp_mem.data.content
        print(f"Response 3: {mem_content}")

        if "BLUE-42" in mem_content:
            print("✅ Context retention successful.")
        else:
            print("⚠️ Context retention failed.")

        # Cleanup
        await session.destroy()

        print("\n=== Test 4: Resume Session (Simulation) ===")
        # Note: Actual resuming depends on backend persistence.
        # The SDK's client.resume_session(id) tries to find it.
        # Since we destroyed it above, we expect failure or new session logic in real app.
        # But let's create a new one to persist, close client, and try to resume if process was same?
        # Actually persistence usually requires the Copilot Agent/Extension host to keep state or file backed.
        # The Python SDK defaults to file-based workspace in standard generic usage?
        # Let's just skip complex resume testing for this simple script as it depends on environment (vscode-chat-session vs file).
        print("Skipping complex resume test in script.")

    except Exception as e:
        print(f"Test Failed: {e}")
    finally:
        await client.stop()
        print("\nTests Completed.")


if __name__ == "__main__":
    asyncio.run(main())