From b3d78c1603cd7bc1714260f9e589dace1956ddc9 Mon Sep 17 00:00:00 2001 From: Joseph Doherty Date: Mon, 27 Apr 2026 11:39:53 -0400 Subject: [PATCH] docs: clarify FeatherlessClient.embed() rationale (verified 500 + empty embedding catalog) Updates the docstring + test docstring for the NotImplementedError stub shipped in T112 (Phase 4.5). Original wording said Featherless 'does not expose /v1/embeddings'; verified the endpoint actually responds but always returns HTTP 500 with type='completions_error' for every model tried (text-embedding-3-small, BAAI/bge-small-en-v1.5, sentence-transformers/all-MiniLM-L6-v2, etc.) and /v1/models has no embedding-class entries. Stub behavior unchanged. --- chat/llm/featherless.py | 30 +++++++++++++++++++----------- tests/test_featherless.py | 19 +++++++++++-------- 2 files changed, 30 insertions(+), 19 deletions(-) diff --git a/chat/llm/featherless.py b/chat/llm/featherless.py index 2eff3de..00fc9ce 100644 --- a/chat/llm/featherless.py +++ b/chat/llm/featherless.py @@ -55,24 +55,32 @@ class FeatherlessClient: yield delta async def embed(self, text: str, *, model: str) -> list[float]: - """Embeddings via Featherless — currently unsupported. + """Embeddings via Featherless — unsupported in practice. T112 (Phase 4.5) extends the LLMClient Protocol with ``embed()`` for a future real-embedding swap. Featherless's OpenAI-compatible - surface does NOT expose ``/v1/embeddings`` at the time of writing, - so this implementation raises ``NotImplementedError`` rather than - attempting a request that would 404. The + surface routes ``/v1/embeddings`` (no 404), but every request + returns HTTP 500 ``{"error": {"type": "completions_error", ...}}`` + — including standard names like ``text-embedding-3-small`` and + ``BAAI/bge-small-en-v1.5``. ``/v1/models`` confirms it: the + catalog has no embedding-class entries, only chat/completion + classes (``llama3-*``, ``gemma3-*``, ``glm5-*``, etc.). + + Rather than ship a request that always 500s, this implementation + raises ``NotImplementedError``. The :func:`chat.services.embeddings.generate_embedding` wrapper - catches this and degrades to the existing zero-vector fallback + catches it and degrades to the existing zero-vector fallback (with the T107 warning), so misconfigured callers fail loudly in logs but the request path keeps working. - If Featherless ships embeddings, swap the body for an - ``self._client.embeddings.create(model=..., input=...)`` call - guarded by ``self._sem()`` (mirrors ``generate``/``stream``). + For real embeddings, configure a different provider (OpenAI + direct, Cohere, Voyage, Together, self-hosted Ollama / + sentence-transformers). The Mock + routing seam from T112 keeps + the swap to a one-class change in ``chat/llm/``. """ raise NotImplementedError( - "Featherless does not expose /v1/embeddings; " - "configure a different embedding provider or stick with " - "the default pseudo-sha256-384 model." + "Featherless /v1/embeddings always returns 500 " + '("completions_error") and the model catalog has no ' + "embedding class; configure a different embedding provider " + "or stick with the default pseudo-sha256-384 model." ) diff --git a/tests/test_featherless.py b/tests/test_featherless.py index bfea4d6..1deebf3 100644 --- a/tests/test_featherless.py +++ b/tests/test_featherless.py @@ -1,10 +1,12 @@ """Tests for FeatherlessClient (Phase 4.5+). Phase 4.5 adds an ``embed()`` method to the LLMClient Protocol (T112). -Featherless does not expose an OpenAI-compatible ``/v1/embeddings`` -endpoint, so its implementation deliberately raises -``NotImplementedError`` to surface the gap clearly. The -``generate_embedding`` wrapper catches this and degrades to the +Featherless's OpenAI-compatible surface routes ``/v1/embeddings`` but +every request returns HTTP 500 ``{"type": "completions_error"}`` (the +router accepts the URL but the backend has no embedding handler), and +``/v1/models`` lists no embedding-class models. The implementation +raises ``NotImplementedError`` rather than ship a request that always +errors; ``generate_embedding`` catches it and degrades to the zero-vector fallback (the existing T107 warning path). If/when Featherless ships embeddings, swap the body for a real call to @@ -20,10 +22,11 @@ from chat.llm.featherless import FeatherlessClient @pytest.mark.asyncio async def test_featherless_embed_raises_not_implemented(): - """Featherless does not expose ``/v1/embeddings`` — embed() must - raise ``NotImplementedError`` so callers (``generate_embedding``) - can degrade to the fallback zero vector + warning rather than - silently producing useless output.""" + """Featherless's ``/v1/embeddings`` always 500s with + ``"completions_error"`` and its model catalog has no embedding + class — embed() must raise ``NotImplementedError`` so callers + (``generate_embedding``) can degrade to the fallback zero vector + + warning rather than silently producing useless output.""" client = FeatherlessClient(api_key="test-key") with pytest.raises(NotImplementedError) as excinfo: await client.embed("hello world", model="bge-small-en-v1.5")