feat: LLMClient protocol with Featherless and mock implementations

2026-04-26 11:35:57 -04:00
parent 67517926aa
commit e627356168
5 changed files with 80 additions and 0 deletions
@@ -0,0 +1,14 @@
 from __future__ import annotations
 from dataclasses import dataclass
 from typing import Protocol, AsyncIterator, Sequence
@dataclass
 class Message:
    role: str  # "system" | "user" | "assistant"
    content: str
 class LLMClient(Protocol):
    async def generate(self, messages: Sequence[Message], *, model: str, **params) -> str: ...
    def stream(self, messages: Sequence[Message], *, model: str, **params) -> AsyncIterator[str]: ...
@@ -0,0 +1,29 @@
 from __future__ import annotations
 from typing import AsyncIterator, Sequence
 from openai import AsyncOpenAI
 from .client import Message
 class FeatherlessClient:
    def __init__(self, api_key: str, base_url: str = "https://api.featherless.ai/v1"):
        self._client = AsyncOpenAI(api_key=api_key, base_url=base_url)
    async def generate(self, messages: Sequence[Message], *, model: str, **params) -> str:
        resp = await self._client.chat.completions.create(
            model=model,
            messages=[{"role": m.role, "content": m.content} for m in messages],
            **params,
        )
        return resp.choices[0].message.content or ""
    async def stream(self, messages: Sequence[Message], *, model: str, **params) -> AsyncIterator[str]:
        stream = await self._client.chat.completions.create(
            model=model,
            messages=[{"role": m.role, "content": m.content} for m in messages],
            stream=True,
            **params,
        )
        async for chunk in stream:
            delta = chunk.choices[0].delta.content or ""
            if delta:
                yield delta
@@ -0,0 +1,16 @@
 from __future__ import annotations
 from typing import AsyncIterator, Sequence
 from .client import Message
 class MockLLMClient:
    def __init__(self, canned: list[str]):
        self._canned = list(canned)
    async def generate(self, messages: Sequence[Message], *, model: str, **params) -> str:
        return self._canned.pop(0)
    async def stream(self, messages: Sequence[Message], *, model: str, **params) -> AsyncIterator[str]:
        text = self._canned.pop(0)
        for ch in text:
            yield ch
@@ -0,0 +1,21 @@
 import pytest
 from chat.llm.mock import MockLLMClient
 from chat.llm.client import Message
@pytest.mark.asyncio
 async def test_mock_returns_canned_response():
    client = MockLLMClient(canned=["Hello, world."])
    msgs = [Message(role="user", content="hi")]
    out = await client.generate(msgs, model="any")
    assert out == "Hello, world."
@pytest.mark.asyncio
 async def test_mock_streams_tokens():
    client = MockLLMClient(canned=["abcd"])
    msgs = [Message(role="user", content="hi")]
    chunks = []
    async for chunk in client.stream(msgs, model="any"):
        chunks.append(chunk)
    assert "".join(chunks) == "abcd"