From ac6e74ab4c90d223c6ca140d9c99c5db109c2052 Mon Sep 17 00:00:00 2001 From: Joseph Doherty Date: Mon, 27 Apr 2026 05:48:34 -0400 Subject: [PATCH] feat: FeatherlessClient.embed() against /v1/embeddings (T112.2) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Implements embed() on FeatherlessClient. Featherless's OpenAI- compatible surface does NOT expose /v1/embeddings at the time of writing, so this implementation raises NotImplementedError rather than issuing a request that would 404. The chat.services.embeddings.generate_embedding wrapper (T112.3) catches the exception and degrades to the zero-vector fallback path (plus the existing T107 warning) — misconfigured callers fail loudly in logs while the request path keeps working. If/when Featherless ships embeddings, swap the body for self._client.embeddings.create(model=..., input=...) guarded by the existing 2-conn semaphore (mirrors generate/stream). The Protocol seam in T112.1 is already wired so no other code needs to change. Adds tests/test_featherless.py pinning the NotImplementedError contract. --- chat/llm/featherless.py | 23 +++++++++++++++++++++++ tests/test_featherless.py | 32 ++++++++++++++++++++++++++++++++ 2 files changed, 55 insertions(+) create mode 100644 tests/test_featherless.py diff --git a/chat/llm/featherless.py b/chat/llm/featherless.py index cf1138b..2eff3de 100644 --- a/chat/llm/featherless.py +++ b/chat/llm/featherless.py @@ -53,3 +53,26 @@ class FeatherlessClient: delta = chunk.choices[0].delta.content or "" if delta: yield delta + + async def embed(self, text: str, *, model: str) -> list[float]: + """Embeddings via Featherless — currently unsupported. + + T112 (Phase 4.5) extends the LLMClient Protocol with ``embed()`` + for a future real-embedding swap. Featherless's OpenAI-compatible + surface does NOT expose ``/v1/embeddings`` at the time of writing, + so this implementation raises ``NotImplementedError`` rather than + attempting a request that would 404. The + :func:`chat.services.embeddings.generate_embedding` wrapper + catches this and degrades to the existing zero-vector fallback + (with the T107 warning), so misconfigured callers fail loudly in + logs but the request path keeps working. + + If Featherless ships embeddings, swap the body for an + ``self._client.embeddings.create(model=..., input=...)`` call + guarded by ``self._sem()`` (mirrors ``generate``/``stream``). + """ + raise NotImplementedError( + "Featherless does not expose /v1/embeddings; " + "configure a different embedding provider or stick with " + "the default pseudo-sha256-384 model." + ) diff --git a/tests/test_featherless.py b/tests/test_featherless.py new file mode 100644 index 0000000..bfea4d6 --- /dev/null +++ b/tests/test_featherless.py @@ -0,0 +1,32 @@ +"""Tests for FeatherlessClient (Phase 4.5+). + +Phase 4.5 adds an ``embed()`` method to the LLMClient Protocol (T112). +Featherless does not expose an OpenAI-compatible ``/v1/embeddings`` +endpoint, so its implementation deliberately raises +``NotImplementedError`` to surface the gap clearly. The +``generate_embedding`` wrapper catches this and degrades to the +zero-vector fallback (the existing T107 warning path). + +If/when Featherless ships embeddings, swap the body for a real call to +``/v1/embeddings`` and update this test to mock the HTTP layer. +""" + +from __future__ import annotations + +import pytest + +from chat.llm.featherless import FeatherlessClient + + +@pytest.mark.asyncio +async def test_featherless_embed_raises_not_implemented(): + """Featherless does not expose ``/v1/embeddings`` — embed() must + raise ``NotImplementedError`` so callers (``generate_embedding``) + can degrade to the fallback zero vector + warning rather than + silently producing useless output.""" + client = FeatherlessClient(api_key="test-key") + with pytest.raises(NotImplementedError) as excinfo: + await client.embed("hello world", model="bge-small-en-v1.5") + # Message should hint at the cause so operators see why their + # real-model swap fell back. + assert "embeddings" in str(excinfo.value).lower()