b3d78c1603
Updates the docstring + test docstring for the NotImplementedError stub shipped in T112 (Phase 4.5). Original wording said Featherless 'does not expose /v1/embeddings'; verified the endpoint actually responds but always returns HTTP 500 with type='completions_error' for every model tried (text-embedding-3-small, BAAI/bge-small-en-v1.5, sentence-transformers/all-MiniLM-L6-v2, etc.) and /v1/models has no embedding-class entries. Stub behavior unchanged.
36 lines
1.5 KiB
Python
36 lines
1.5 KiB
Python
"""Tests for FeatherlessClient (Phase 4.5+).
|
|
|
|
Phase 4.5 adds an ``embed()`` method to the LLMClient Protocol (T112).
|
|
Featherless's OpenAI-compatible surface routes ``/v1/embeddings`` but
|
|
every request returns HTTP 500 ``{"type": "completions_error"}`` (the
|
|
router accepts the URL but the backend has no embedding handler), and
|
|
``/v1/models`` lists no embedding-class models. The implementation
|
|
raises ``NotImplementedError`` rather than ship a request that always
|
|
errors; ``generate_embedding`` catches it and degrades to the
|
|
zero-vector fallback (the existing T107 warning path).
|
|
|
|
If/when Featherless ships embeddings, swap the body for a real call to
|
|
``/v1/embeddings`` and update this test to mock the HTTP layer.
|
|
"""
|
|
|
|
from __future__ import annotations
|
|
|
|
import pytest
|
|
|
|
from chat.llm.featherless import FeatherlessClient
|
|
|
|
|
|
@pytest.mark.asyncio
|
|
async def test_featherless_embed_raises_not_implemented():
|
|
"""Featherless's ``/v1/embeddings`` always 500s with
|
|
``"completions_error"`` and its model catalog has no embedding
|
|
class — embed() must raise ``NotImplementedError`` so callers
|
|
(``generate_embedding``) can degrade to the fallback zero vector
|
|
+ warning rather than silently producing useless output."""
|
|
client = FeatherlessClient(api_key="test-key")
|
|
with pytest.raises(NotImplementedError) as excinfo:
|
|
await client.embed("hello world", model="bge-small-en-v1.5")
|
|
# Message should hint at the cause so operators see why their
|
|
# real-model swap fell back.
|
|
assert "embeddings" in str(excinfo.value).lower()
|