chat/chat/llm/client.py

from __future__ import annotations
from dataclasses import dataclass
from typing import Protocol, AsyncIterator, Sequence


@dataclass
class Message:
    role: str  # "system" | "user" | "assistant"
    content: str


class LLMClient(Protocol):
    async def generate(self, messages: Sequence[Message], *, model: str, **params) -> str: ...
    def stream(self, messages: Sequence[Message], *, model: str, **params) -> AsyncIterator[str]: ...
    # T112 (Phase 4.5): real-embedding seam. Implementations either call a
    # provider's ``/v1/embeddings`` endpoint or, when the provider doesn't
    # expose embeddings (e.g. Featherless today), raise ``NotImplementedError``
    # so ``generate_embedding`` can catch it and degrade to the zero-vector
    # fallback. The Protocol is structural, so this method only needs to
    # exist on implementations; existing callers that don't use it are
    # unaffected.
    async def embed(self, text: str, *, model: str) -> list[float]: ...