fix(copilot-sdk): implement dict-based isolated cache and optimize session config

- Fix model list flapping bug by utilizing dictionary-based '_discovery_cache' keyed by config hash instead of wiping a global list. - Optimize performance by removing redundant disk IO 'config.json' syncing ('_sync_mcp_config' and '_sync_copilot_config'); SDK directly accepts params via 'SessionConfig'. - Remove unused imports and variables based on flake8 lint rules.
2026-03-07 22:13:58 +08:00
parent a777112417
commit f4f7b65792
1 changed files with 225 additions and 368 deletions
--- a/plugins/pipes/github-copilot-sdk/github_copilot_sdk.py
+++ b/plugins/pipes/github-copilot-sdk/github_copilot_sdk.py
@@ -504,16 +504,17 @@ class Pipe:
            description="BYOK Wire API override.",
        )
-    # ==================== Class-Level Caches ====================
+    _shared_clients: Dict[str, Any] = {}  # Map: token_hash -> CopilotClient
-    # These caches persist across requests since OpenWebUI may create
+    _shared_client_lock = asyncio.Lock()  # Lock for thread-safe client lifecycle
-    # new Pipe instances for each request.
+    _model_cache: List[dict] = []  # Model list cache (Memory only fallback)
    # =============================================================
    _model_cache: List[dict] = []  # Model list cache
    _standard_model_ids: set = set()  # Track standard model IDs
-    _last_byok_config_hash: str = ""  # Track BYOK config for cache invalidation
+    _last_byok_config_hash: str = ""  # Track BYOK config (Status only)
-    _last_model_cache_time: float = 0  # Timestamp of last model cache refresh
+    _last_model_cache_time: float = 0  # Timestamp
    _env_setup_done = False  # Track if env setup has been completed
    _last_update_check = 0  # Timestamp of last CLI update check
    _discovery_cache: Dict[str, Dict[str, Any]] = (
        {}
    )  # Map config_hash -> {"time": float, "models": list}
    def _is_version_at_least(self, target: str) -> bool:
        """Check if OpenWebUI version is at least the target version."""
@@ -3918,7 +3919,9 @@ class Pipe:
            return None
        return os.path.join(self._get_session_metadata_dir(chat_id), "plan.md")
-    def _persist_plan_text(self, chat_id: Optional[str], content: Optional[str]) -> None:
+    def _persist_plan_text(
        self, chat_id: Optional[str], content: Optional[str]
    ) -> None:
        """Persist plan text into the chat-specific session metadata directory."""
        plan_path = self._get_plan_file_path(chat_id)
        if not plan_path or not isinstance(content, str):
@@ -4908,7 +4911,6 @@ class Pipe:
                # Setup Python Virtual Environment to strictly protect system python
                if not os.path.exists(f"{venv_dir}/bin/activate"):
                    import subprocess
                    import sys
                    subprocess.run(
@@ -5423,51 +5425,191 @@ class Pipe:
                logger.warning(f"[Copilot] Failed to parse UserValves: {e}")
        return self.UserValves()
    def _format_model_item(self, m: Any, source: str = "copilot") -> Optional[dict]:
        """Standardize model item into OpenWebUI pipe format."""
        try:
            # 1. Resolve ID
            mid = m.get("id") if isinstance(m, dict) else getattr(m, "id", "")
            if not mid:
                return None
            # 2. Extract Multiplier (billing info)
            bill = (
                m.get("billing") if isinstance(m, dict) else getattr(m, "billing", {})
            )
            if hasattr(bill, "to_dict"):
                bill = bill.to_dict()
            mult = float(bill.get("multiplier", 1.0)) if isinstance(bill, dict) else 1.0
            # 3. Clean ID and build display name
            cid = self._clean_model_id(mid)
            # Format name based on source
            if source == "byok":
                display_name = f"-{cid}"
            else:
                display_name = f"-{cid} ({mult}x)" if mult > 0 else f"-🔥 {cid} (0x)"
            return {
                "id": f"{self.id}-{mid}" if source == "copilot" else mid,
                "name": display_name,
                "multiplier": mult,
                "raw_id": mid,
                "source": source,
                "provider": (
                    self._get_provider_name(m) if source == "copilot" else "BYOK"
                ),
            }
        except Exception as e:
            logger.debug(f"[Pipes] Format error for model {m}: {e}")
            return None
    async def pipes(self, __user__: Optional[dict] = None) -> List[dict]:
-        """Dynamically fetch and filter model list."""
+        """Model discovery: Fetches standard and BYOK models with config-isolated caching."""
        if self.valves.DEBUG:
            logger.info(f"[Pipes] Called with user context: {bool(__user__)}")
        uv = self._get_user_valves(__user__)
        token = uv.GH_TOKEN
        # Determine check interval (24 hours default)
        now = datetime.now().timestamp()
        needs_setup = not self.__class__._env_setup_done or (
            now - self.__class__._last_update_check > 86400
        )
        # 1. Environment Setup (Only if needed or not done)
        if needs_setup:
            self._setup_env(token=token)
            self.__class__._last_update_check = now
        else:
            # Still inject token for BYOK real-time updates
            if token:
                os.environ["GH_TOKEN"] = os.environ["GITHUB_TOKEN"] = token
        # Get user info for isolation
        user_data = (
            __user__[0] if isinstance(__user__, (list, tuple)) else (__user__ or {})
        )
        user_id = user_data.get("id") or user_data.get("user_id") or "default_user"
        token = uv.GH_TOKEN or self.valves.GH_TOKEN
-        # Multiplier filtering: User can constrain, but not exceed global limit
+        now = datetime.now().timestamp()
-        global_max = self.valves.MAX_MULTIPLIER
+        cache_ttl = self.valves.MODEL_CACHE_TTL
-        user_max = uv.MAX_MULTIPLIER
+
-        if user_max is not None:
+        # Fingerprint the context so different users/tokens DO NOT evict each other
-            eff_max = min(float(user_max), float(global_max))
+        current_config_str = f"{token}|{uv.BYOK_BASE_URL or self.valves.BYOK_BASE_URL}|{uv.BYOK_API_KEY or self.valves.BYOK_API_KEY}|{self.valves.BYOK_BEARER_TOKEN}"
        current_config_hash = hashlib.md5(current_config_str.encode()).hexdigest()
        # Dictionary-based Cache lookup (Solves the flapping bug)
        if hasattr(self.__class__, "_discovery_cache"):
            cached = self.__class__._discovery_cache.get(current_config_hash)
            if cached and cache_ttl > 0 and (now - cached["time"]) <= cache_ttl:
                self.__class__._model_cache = cached[
                    "models"
                ]  # Update global for pipeline capability fallbacks
                return self._apply_model_filters(cached["models"], uv)
        # 1. Core discovery logic (Always fresh)
        results = await asyncio.gather(
            self._fetch_standard_models(token, __user__),
            self._fetch_byok_models(uv),
            return_exceptions=True,
        )
        standard_results = results[0] if not isinstance(results[0], Exception) else []
        byok_results = results[1] if not isinstance(results[1], Exception) else []
        # Merge all discovered models
        all_models = standard_results + byok_results
        # Update local instance cache for validation purposes in _pipe_impl
        self.__class__._model_cache = all_models
        # Update Config-isolated dict cache
        if not hasattr(self.__class__, "_discovery_cache"):
            self.__class__._discovery_cache = {}
        if all_models:
            self.__class__._discovery_cache[current_config_hash] = {
                "time": now,
                "models": all_models,
            }
        else:
-            eff_max = float(global_max)
+            # If discovery completely failed, cache for a very short duration (10s) to prevent spam but allow quick recovery
            self.__class__._discovery_cache[current_config_hash] = {
                "time": now - cache_ttl + 10,
                "models": all_models,
            }
-        if self.valves.DEBUG:
+        # 2. Return results with real-time user-specific filtering
-            logger.info(
+        return self._apply_model_filters(all_models, uv)
-                f"[Pipes] Multiplier Filter: User={user_max}, Global={global_max}, Effective={eff_max}"
+
    async def _get_client(self, token: str) -> Any:
        """Get or create the persistent CopilotClient from the pool based on token."""
        if not token:
            raise ValueError("GitHub Token is required to initialize CopilotClient")
        # Use an MD5 hash of the token as the key for the client pool
        token_hash = hashlib.md5(token.encode()).hexdigest()
        async with self.__class__._shared_client_lock:
            # Check if client exists for this token and is healthy
            client = self.__class__._shared_clients.get(token_hash)
            if client:
                try:
                    state = client.get_state()
                    if state == "connected":
                        return client
                    if state == "error":
                        try:
                            await client.stop()
                        except:
                            pass
                        del self.__class__._shared_clients[token_hash]
                except Exception:
                    del self.__class__._shared_clients[token_hash]
            # Ensure environment discovery is done
            if not self.__class__._env_setup_done:
                self._setup_env(token=token)
            # Build configuration and start persistent client
            client_config = self._build_client_config(user_id=None, chat_id=None)
            client_config["github_token"] = token
            client_config["auto_start"] = True
            new_client = CopilotClient(client_config)
            await new_client.start()
            self.__class__._shared_clients[token_hash] = new_client
            return new_client
    async def _fetch_standard_models(self, token: str, __user__: dict) -> List[dict]:
        """Fetch models using the shared persistent client pool."""
        if not token:
            return []
        try:
            client = await self._get_client(token)
            raw = await client.list_models()
            models = []
            for m in raw if isinstance(raw, list) else []:
                formatted = self._format_model_item(m, source="copilot")
                if formatted:
                    models.append(formatted)
            models.sort(key=lambda x: (x.get("multiplier", 1.0), x.get("raw_id", "")))
            return models
        except Exception as e:
            logger.error(f"[Pipes] Standard fetch failed: {e}")
            return []
    def _apply_model_filters(
        self, models: List[dict], uv: "Pipe.UserValves"
    ) -> List[dict]:
        """Apply user-defined multiplier and keyword exclusions to the model list."""
        if not models:
            # Check if BYOK or GH_TOKEN is configured at all
            has_byok_config = (uv.BYOK_BASE_URL or self.valves.BYOK_BASE_URL) and (
                uv.BYOK_API_KEY
                or self.valves.BYOK_API_KEY
                or uv.BYOK_BEARER_TOKEN
                or self.valves.BYOK_BEARER_TOKEN
            )
            if not (uv.GH_TOKEN or self.valves.GH_TOKEN) and not has_byok_config:
                return [
                    {
                        "id": "no_credentials",
                        "name": "⚠️ No credentials configured. Please set GH_TOKEN or BYOK settings in Valves.",
                    }
                ]
            return [{"id": "warming_up", "name": "Waiting for model discovery..."}]
        # Resolve constraints
        global_max = getattr(self.valves, "MAX_MULTIPLIER", 1.0)
        user_max = getattr(uv, "MAX_MULTIPLIER", None)
        eff_max = (
            min(float(user_max), float(global_max))
            if user_max is not None
            else float(global_max)
        )
        # Keyword filtering: combine global and user keywords
        ex_kw = [
            k.strip().lower()
            for k in (self.valves.EXCLUDE_KEYWORDS + "," + uv.EXCLUDE_KEYWORDS).split(
@@ -5475,189 +5617,31 @@ class Pipe:
            )
            if k.strip()
        ]
        # --- NEW: CONFIG-AWARE CACHE INVALIDATION ---
        # Calculate current config fingerprint to detect changes
        current_config_str = f"{token}|{uv.BYOK_BASE_URL or self.valves.BYOK_BASE_URL}|{uv.BYOK_API_KEY or self.valves.BYOK_API_KEY}|{self.valves.BYOK_BEARER_TOKEN}"
        current_config_hash = hashlib.md5(current_config_str.encode()).hexdigest()
        # TTL-based cache expiry
        cache_ttl = self.valves.MODEL_CACHE_TTL
        if (
            self._model_cache
            and cache_ttl > 0
            and (now - self.__class__._last_model_cache_time) > cache_ttl
        ):
            if self.valves.DEBUG:
                logger.info(
                    f"[Pipes] Model cache expired (TTL={cache_ttl}s). Invalidating."
                )
            self.__class__._model_cache = []
        if (
            self._model_cache
            and self.__class__._last_byok_config_hash != current_config_hash
        ):
            if self.valves.DEBUG:
                logger.info(
                    f"[Pipes] Configuration change detected. Invalidating model cache."
                )
            self.__class__._model_cache = []
            self.__class__._last_byok_config_hash = current_config_hash
        if not self._model_cache:
            # Update the hash when we refresh the cache
            self.__class__._last_byok_config_hash = current_config_hash
            if self.valves.DEBUG:
                logger.info("[Pipes] Refreshing model cache...")
            try:
                # Use effective token for fetching.
                # If COPILOT_CLI_PATH is missing (e.g. env cleared after worker restart),
                # force a full re-discovery by resetting _env_setup_done first.
                if not os.environ.get("COPILOT_CLI_PATH"):
                    self.__class__._env_setup_done = False
                self._setup_env(token=token)
                # Fetch BYOK models if configured
                byok = []
                effective_base_url = uv.BYOK_BASE_URL or self.valves.BYOK_BASE_URL
                if effective_base_url and (
                    uv.BYOK_API_KEY
                    or self.valves.BYOK_API_KEY
                    or uv.BYOK_BEARER_TOKEN
                    or self.valves.BYOK_BEARER_TOKEN
                ):
                    byok = await self._fetch_byok_models(uv=uv)
                standard = []
                cli_path = os.environ.get("COPILOT_CLI_PATH", "")
                cli_ready = bool(cli_path and os.path.exists(cli_path))
                if token and cli_ready:
                    client_config = {
                        "cli_path": cli_path,
                        "cwd": self._get_workspace_dir(
                            user_id=user_id, chat_id="listing"
                        ),
                    }
                    c = CopilotClient(client_config)
                    try:
                        await c.start()
                        raw = await c.list_models()
                        for m in raw if isinstance(raw, list) else []:
                            try:
                                mid = (
                                    m.get("id")
                                    if isinstance(m, dict)
                                    else getattr(m, "id", "")
                                )
                                if not mid:
                                    continue
                                # Extract multiplier
                                bill = (
                                    m.get("billing")
                                    if isinstance(m, dict)
                                    else getattr(m, "billing", {})
                                )
                                if hasattr(bill, "to_dict"):
                                    bill = bill.to_dict()
                                mult = (
                                    float(bill.get("multiplier", 1))
                                    if isinstance(bill, dict)
                                    else 1.0
                                )
                                cid = self._clean_model_id(mid)
                                standard.append(
                                    {
                                        "id": f"{self.id}-{mid}",
                                        "name": (
                                            f"-{cid} ({mult}x)"
                                            if mult > 0
                                            else f"-🔥 {cid} (0x)"
                                        ),
                                        "multiplier": mult,
                                        "raw_id": mid,
                                        "source": "copilot",
                                        "provider": self._get_provider_name(m),
                                    }
                                )
                            except:
                                pass
                        standard.sort(key=lambda x: (x["multiplier"], x["raw_id"]))
                        self._standard_model_ids = {m["raw_id"] for m in standard}
                    except Exception as e:
                        logger.error(f"[Pipes] Error listing models: {e}")
                    finally:
                        await c.stop()
                elif token and self.valves.DEBUG:
                    logger.info(
                        "[Pipes] Copilot CLI not ready during listing. Skip standard model probe to avoid blocking startup."
                    )
                self._model_cache = standard + byok
                self.__class__._last_model_cache_time = now
                if not self._model_cache:
                    has_byok = bool(
                        (uv.BYOK_BASE_URL or self.valves.BYOK_BASE_URL)
                        and (
                            uv.BYOK_API_KEY
                            or self.valves.BYOK_API_KEY
                            or uv.BYOK_BEARER_TOKEN
                            or self.valves.BYOK_BEARER_TOKEN
                        )
                    )
                    if not token and not has_byok:
                        return [
                            {
                                "id": "no_token",
                                "name": "⚠️ No credentials configured. Please set GH_TOKEN or BYOK settings in Valves.",
                            }
                        ]
                    return [
                        {
                            "id": "warming_up",
                            "name": "Copilot CLI is preparing in background. Please retry in a moment.",
                        }
                    ]
            except Exception as e:
                return [{"id": "error", "name": f"Error: {e}"}]
        # Final pass filtering from cache (applied on every request)
        res = []
        # Use a small epsilon for float comparison to avoid precision issues (e.g. 0.33 vs 0.33000001)
        epsilon = 0.0001
-        for m in self._model_cache:
+        for m in models:
            # 1. Keyword filter
            mid = (m.get("raw_id") or m.get("id", "")).lower()
            mname = m.get("name", "").lower()
            # Filter by Keyword
            if any(kw in mid or kw in mname for kw in ex_kw):
                continue
-            # 2. Multiplier filter (only for standard Copilot models)
+            # Filter by Multiplier (Copilot source only)
            if m.get("source") == "copilot":
-                m_mult = float(m.get("multiplier", 0))
+                if float(m.get("multiplier", 1.0)) > (eff_max + epsilon):
                if m_mult > (eff_max + epsilon):
                    if self.valves.DEBUG:
                        logger.debug(
                            f"[Pipes] Filtered {m.get('id')} (Mult: {m_mult} > {eff_max})"
                        )
                    continue
            res.append(m)
-        return res if res else [{"id": "none", "name": "No models matched filters"}]
+        return (
-
+            res
-    async def _get_client(self):
+            if res
-        """Helper to get or create a CopilotClient instance."""
+            else [
-        client_config = {}
+                {"id": "none", "name": "No models matched your current Valve filters"}
-        if os.environ.get("COPILOT_CLI_PATH"):
+            ]
-            client_config["cli_path"] = os.environ["COPILOT_CLI_PATH"]
+        )
        client = CopilotClient(client_config)
        await client.start()
        return client
    def _setup_env(
        self,
@@ -5666,7 +5650,7 @@ class Pipe:
        token: str = None,
        enable_mcp: bool = True,
    ):
-        """Setup environment variables and resolve Copilot CLI path from SDK bundle."""
+        """Setup environment variables and resolve the deterministic Copilot CLI path."""
        # 1. Real-time Token Injection (Always updates on each call)
        effective_token = token or self.valves.GH_TOKEN
@@ -5674,42 +5658,30 @@ class Pipe:
            os.environ["GH_TOKEN"] = os.environ["GITHUB_TOKEN"] = effective_token
        if self._env_setup_done:
            if debug_enabled:
                self._sync_mcp_config(
                    __event_call__,
                    debug_enabled,
                    enable_mcp=enable_mcp,
                )
            return
-        os.environ["COPILOT_AUTO_UPDATE"] = "false"
+        # 2. Deterministic CLI Path Discovery
        # We prioritize the bundled CLI from the SDK to ensure version compatibility.
        cli_path = ""
        try:
            from copilot.client import _get_bundled_cli_path
-        # 2. CLI Path Discovery (priority: env var > PATH > SDK bundle)
+            cli_path = _get_bundled_cli_path() or ""
-        cli_path = os.environ.get("COPILOT_CLI_PATH", "")
+        except ImportError:
-        found = bool(cli_path and os.path.exists(cli_path))
+            pass
-        if not found:
+        # Fallback to environment or system PATH only if bundled path is invalid
-            sys_path = shutil.which("copilot")
+        if not cli_path or not os.path.exists(cli_path):
-            if sys_path:
+            cli_path = (
-                cli_path = sys_path
+                os.environ.get("COPILOT_CLI_PATH") or shutil.which("copilot") or ""
-                found = True
+            )
-        if not found:
+        cli_ready = bool(cli_path and os.path.exists(cli_path))
            try:
                from copilot.client import _get_bundled_cli_path
-                bundled_path = _get_bundled_cli_path()
+        # 3. Finalize Environment
                if bundled_path and os.path.exists(bundled_path):
                    cli_path = bundled_path
                    found = True
            except ImportError:
                pass
        # 3. Finalize
        cli_ready = found
        if cli_ready:
            os.environ["COPILOT_CLI_PATH"] = cli_path
-            # Add the CLI's parent directory to PATH so subprocesses can invoke `copilot` directly
+            # Add to PATH for subprocess visibility
            cli_bin_dir = os.path.dirname(cli_path)
            current_path = os.environ.get("PATH", "")
            if cli_bin_dir and cli_bin_dir not in current_path.split(os.pathsep):
@@ -5719,7 +5691,7 @@ class Pipe:
        self.__class__._last_update_check = datetime.now().timestamp()
        self._emit_debug_log_sync(
-            f"Environment setup complete. CLI ready={cli_ready}. Path: {cli_path}",
+            f"Deterministic Env Setup: CLI ready={cli_ready}, Path={cli_path}",
            __event_call__,
            debug_enabled=debug_enabled,
        )
@@ -5831,117 +5803,6 @@ class Pipe:
        return text_content, attachments
    def _sync_copilot_config(
        self, reasoning_effort: str, __event_call__=None, debug_enabled: bool = False
    ):
        """
        Dynamically update config.json if REASONING_EFFORT is set.
        This provides a fallback if API injection is ignored by the server.
        """
        if not reasoning_effort:
            return
        effort = reasoning_effort
        try:
            # Target dynamic config path
            config_path = os.path.join(self._get_copilot_config_dir(), "config.json")
            config_dir = os.path.dirname(config_path)
            # Only proceed if directory exists (avoid creating trash types of files if path is wrong)
            if not os.path.exists(config_dir):
                return
            data = {}
            # Read existing config
            if os.path.exists(config_path):
                try:
                    with open(config_path, "r") as f:
                        data = json.load(f)
                except Exception:
                    data = {}
            # Update if changed
            current_val = data.get("reasoning_effort")
            if current_val != effort:
                data["reasoning_effort"] = effort
                try:
                    with open(config_path, "w") as f:
                        json.dump(data, f, indent=4)
                    self._emit_debug_log_sync(
                        f"Dynamically updated config.json: reasoning_effort='{effort}'",
                        __event_call__,
                        debug_enabled=debug_enabled,
                    )
                except Exception as e:
                    self._emit_debug_log_sync(
                        f"Failed to write config.json: {e}",
                        __event_call__,
                        debug_enabled=debug_enabled,
                    )
        except Exception as e:
            self._emit_debug_log_sync(
                f"Config sync check failed: {e}",
                __event_call__,
                debug_enabled=debug_enabled,
            )
    def _sync_mcp_config(
        self,
        __event_call__=None,
        debug_enabled: bool = False,
        enable_mcp: bool = True,
    ):
        """Sync MCP configuration to dynamic config.json."""
        path = os.path.join(self._get_copilot_config_dir(), "config.json")
        # If disabled, we should ensure the config doesn't contain stale MCP info
        if not enable_mcp:
            if os.path.exists(path):
                try:
                    with open(path, "r") as f:
                        data = json.load(f)
                    if "mcp_servers" in data:
                        del data["mcp_servers"]
                        with open(path, "w") as f:
                            json.dump(data, f, indent=4)
                        self._emit_debug_log_sync(
                            "MCP disabled: Cleared MCP servers from config.json",
                            __event_call__,
                            debug_enabled,
                        )
                except:
                    pass
            return
        mcp = self._parse_mcp_servers(__event_call__, enable_mcp=enable_mcp)
        if not mcp:
            return
        try:
            path = os.path.join(self._get_copilot_config_dir(), "config.json")
            os.makedirs(os.path.dirname(path), exist_ok=True)
            data = {}
            if os.path.exists(path):
                try:
                    with open(path, "r") as f:
                        data = json.load(f)
                except:
                    pass
            if json.dumps(data.get("mcp_servers"), sort_keys=True) != json.dumps(
                mcp, sort_keys=True
            ):
                data["mcp_servers"] = mcp
                with open(path, "w") as f:
                    json.dump(data, f, indent=4)
                self._emit_debug_log_sync(
                    f"Synced {len(mcp)} MCP servers to config.json",
                    __event_call__,
                    debug_enabled,
                )
        except:
            pass
    # ==================== Internal Implementation ====================
    # _pipe_impl() contains the main request handling logic.
    # ================================================================
@@ -5993,6 +5854,7 @@ class Pipe:
        effective_debug = self.valves.DEBUG or user_valves.DEBUG
        effective_token = user_valves.GH_TOKEN or self.valves.GH_TOKEN
        token = effective_token  # For compatibility with _get_client(token)
        # Get Chat ID using improved helper
        chat_ctx = self._get_chat_context(
@@ -6332,26 +6194,21 @@ class Pipe:
        else:
            is_byok_model = not has_multiplier and byok_active
        # Mode Selection Info
        await self._emit_debug_log(
            f"Mode: {'BYOK' if is_byok_model else 'Standard'}, Reasoning: {is_reasoning}, Admin: {is_admin}",
            __event_call__,
            debug_enabled=effective_debug,
        )
        # Ensure we have the latest config (only for standard Copilot models)
        if not is_byok_model:
            self._sync_copilot_config(effective_reasoning_effort, __event_call__)
        # Shared state for delayed HTML embeds (Premium Experience)
        pending_embeds = []
-        # Initialize Client
+        # Use Shared Persistent Client Pool (Token-aware)
-        client = CopilotClient(
+        client = await self._get_client(token)
-            self._build_client_config(user_id=user_id, chat_id=chat_id)
+        should_stop_client = False  # Never stop the shared singleton pool!
        )
        should_stop_client = True
        try:
-            await client.start()
+            # Note: client is already started in _get_client
            # Initialize custom tools (Handles caching internally)
            custom_tools = await self._initialize_custom_tools(
@@ -7831,7 +7688,7 @@ class Pipe:
                # We do not destroy session here to allow persistence,
                # but we must stop the client.
                await client.stop()
-            except Exception as e:
+            except Exception:
                pass