5f16bb575a
Adds async def embed(self, text: str, *, model: str) -> list[float] to the LLMClient Protocol so Phase 4.5 can wire a real-embedding swap without changing call sites. Protocol is structural — existing implementations that don't use it remain compatible; downstream implementations (FeatherlessClient, MockLLMClient) ship in T112.2 and T112.3.
23 lines
982 B
Python
23 lines
982 B
Python
from __future__ import annotations
|
|
from dataclasses import dataclass
|
|
from typing import Protocol, AsyncIterator, Sequence
|
|
|
|
|
|
@dataclass
|
|
class Message:
|
|
role: str # "system" | "user" | "assistant"
|
|
content: str
|
|
|
|
|
|
class LLMClient(Protocol):
|
|
async def generate(self, messages: Sequence[Message], *, model: str, **params) -> str: ...
|
|
def stream(self, messages: Sequence[Message], *, model: str, **params) -> AsyncIterator[str]: ...
|
|
# T112 (Phase 4.5): real-embedding seam. Implementations either call a
|
|
# provider's ``/v1/embeddings`` endpoint or, when the provider doesn't
|
|
# expose embeddings (e.g. Featherless today), raise ``NotImplementedError``
|
|
# so ``generate_embedding`` can catch it and degrade to the zero-vector
|
|
# fallback. The Protocol is structural, so this method only needs to
|
|
# exist on implementations; existing callers that don't use it are
|
|
# unaffected.
|
|
async def embed(self, text: str, *, model: str) -> list[float]: ...
|