chat/tests/test_featherless.py

"""Tests for FeatherlessClient (Phase 4.5+).

Phase 4.5 adds an ``embed()`` method to the LLMClient Protocol (T112).
Featherless's OpenAI-compatible surface routes ``/v1/embeddings`` but
every request returns HTTP 500 ``{"type": "completions_error"}`` (the
router accepts the URL but the backend has no embedding handler), and
``/v1/models`` lists no embedding-class models. The implementation
raises ``NotImplementedError`` rather than ship a request that always
errors; ``generate_embedding`` catches it and degrades to the
zero-vector fallback (the existing T107 warning path).

If/when Featherless ships embeddings, swap the body for a real call to
``/v1/embeddings`` and update this test to mock the HTTP layer.
"""

from __future__ import annotations

import pytest

from chat.llm.featherless import FeatherlessClient


@pytest.mark.asyncio
async def test_featherless_embed_raises_not_implemented():
    """Featherless's ``/v1/embeddings`` always 500s with
    ``"completions_error"`` and its model catalog has no embedding
    class — embed() must raise ``NotImplementedError`` so callers
    (``generate_embedding``) can degrade to the fallback zero vector
    + warning rather than silently producing useless output."""
    client = FeatherlessClient(api_key="test-key")
    with pytest.raises(NotImplementedError) as excinfo:
        await client.embed("hello world", model="bge-small-en-v1.5")
    # Message should hint at the cause so operators see why their
    # real-model swap fell back.
    assert "embeddings" in str(excinfo.value).lower()