From b3d78c1603cd7bc1714260f9e589dace1956ddc9 Mon Sep 17 00:00:00 2001
From: Joseph Doherty <dohejw01@gmail.com>
Date: Mon, 27 Apr 2026 11:39:53 -0400
Subject: [PATCH] docs: clarify FeatherlessClient.embed() rationale (verified
 500 + empty embedding catalog)

Updates the docstring + test docstring for the NotImplementedError stub
shipped in T112 (Phase 4.5). Original wording said Featherless 'does
not expose /v1/embeddings'; verified the endpoint actually responds
but always returns HTTP 500 with type='completions_error' for every
model tried (text-embedding-3-small, BAAI/bge-small-en-v1.5,
sentence-transformers/all-MiniLM-L6-v2, etc.) and /v1/models has no
embedding-class entries. Stub behavior unchanged.
---
 chat/llm/featherless.py   | 30 +++++++++++++++++++-----------
 tests/test_featherless.py | 19 +++++++++++--------
 2 files changed, 30 insertions(+), 19 deletions(-)

diff --git a/chat/llm/featherless.py b/chat/llm/featherless.py
index 2eff3de..00fc9ce 100644
--- a/chat/llm/featherless.py
+++ b/chat/llm/featherless.py
@@ -55,24 +55,32 @@ class FeatherlessClient:
                     yield delta
 
     async def embed(self, text: str, *, model: str) -> list[float]:
-        """Embeddings via Featherless — currently unsupported.
+        """Embeddings via Featherless — unsupported in practice.
 
         T112 (Phase 4.5) extends the LLMClient Protocol with ``embed()``
         for a future real-embedding swap. Featherless's OpenAI-compatible
-        surface does NOT expose ``/v1/embeddings`` at the time of writing,
-        so this implementation raises ``NotImplementedError`` rather than
-        attempting a request that would 404. The
+        surface routes ``/v1/embeddings`` (no 404), but every request
+        returns HTTP 500 ``{"error": {"type": "completions_error", ...}}``
+        — including standard names like ``text-embedding-3-small`` and
+        ``BAAI/bge-small-en-v1.5``. ``/v1/models`` confirms it: the
+        catalog has no embedding-class entries, only chat/completion
+        classes (``llama3-*``, ``gemma3-*``, ``glm5-*``, etc.).
+
+        Rather than ship a request that always 500s, this implementation
+        raises ``NotImplementedError``. The
         :func:`chat.services.embeddings.generate_embedding` wrapper
-        catches this and degrades to the existing zero-vector fallback
+        catches it and degrades to the existing zero-vector fallback
         (with the T107 warning), so misconfigured callers fail loudly in
         logs but the request path keeps working.
 
-        If Featherless ships embeddings, swap the body for an
-        ``self._client.embeddings.create(model=..., input=...)`` call
-        guarded by ``self._sem()`` (mirrors ``generate``/``stream``).
+        For real embeddings, configure a different provider (OpenAI
+        direct, Cohere, Voyage, Together, self-hosted Ollama /
+        sentence-transformers). The Mock + routing seam from T112 keeps
+        the swap to a one-class change in ``chat/llm/``.
         """
         raise NotImplementedError(
-            "Featherless does not expose /v1/embeddings; "
-            "configure a different embedding provider or stick with "
-            "the default pseudo-sha256-384 model."
+            "Featherless /v1/embeddings always returns 500 "
+            '("completions_error") and the model catalog has no '
+            "embedding class; configure a different embedding provider "
+            "or stick with the default pseudo-sha256-384 model."
         )
diff --git a/tests/test_featherless.py b/tests/test_featherless.py
index bfea4d6..1deebf3 100644
--- a/tests/test_featherless.py
+++ b/tests/test_featherless.py
@@ -1,10 +1,12 @@
 """Tests for FeatherlessClient (Phase 4.5+).
 
 Phase 4.5 adds an ``embed()`` method to the LLMClient Protocol (T112).
-Featherless does not expose an OpenAI-compatible ``/v1/embeddings``
-endpoint, so its implementation deliberately raises
-``NotImplementedError`` to surface the gap clearly. The
-``generate_embedding`` wrapper catches this and degrades to the
+Featherless's OpenAI-compatible surface routes ``/v1/embeddings`` but
+every request returns HTTP 500 ``{"type": "completions_error"}`` (the
+router accepts the URL but the backend has no embedding handler), and
+``/v1/models`` lists no embedding-class models. The implementation
+raises ``NotImplementedError`` rather than ship a request that always
+errors; ``generate_embedding`` catches it and degrades to the
 zero-vector fallback (the existing T107 warning path).
 
 If/when Featherless ships embeddings, swap the body for a real call to
@@ -20,10 +22,11 @@ from chat.llm.featherless import FeatherlessClient
 
 @pytest.mark.asyncio
 async def test_featherless_embed_raises_not_implemented():
-    """Featherless does not expose ``/v1/embeddings`` — embed() must
-    raise ``NotImplementedError`` so callers (``generate_embedding``)
-    can degrade to the fallback zero vector + warning rather than
-    silently producing useless output."""
+    """Featherless's ``/v1/embeddings`` always 500s with
+    ``"completions_error"`` and its model catalog has no embedding
+    class — embed() must raise ``NotImplementedError`` so callers
+    (``generate_embedding``) can degrade to the fallback zero vector
+    + warning rather than silently producing useless output."""
     client = FeatherlessClient(api_key="test-key")
     with pytest.raises(NotImplementedError) as excinfo:
         await client.embed("hello world", model="bge-small-en-v1.5")