e0a28abbcd
When model != DEFAULT_EMBEDDING_MODEL, generate_embedding now calls client.embed(text, model=model) and wraps the returned vector in an EmbeddingResult tagged with the requested model. On any exception (NotImplementedError from providers without an embeddings endpoint, transient network errors, etc.), the existing T107 warning fires and the function falls back to the zero-vector sentinel — callers detect model == 'fallback' and skip indexing. Adds: - MockLLMClient accepts a canned_embeddings queue mirroring the existing canned pattern. embed() pops from the front; empty queue raises IndexError so misconfigured tests fail loudly. - Settings.embedding_model defaults to "pseudo-sha256-384" so existing zero-config installs keep Phase 4 behavior. The app lifespan now passes this through to EmbeddingWorker.model. The public signature of generate_embedding is unchanged: (client, *, text, model=DEFAULT_EMBEDDING_MODEL, dim=..., timeout_s=...).
47 lines
1.5 KiB
Python
47 lines
1.5 KiB
Python
import pytest
|
|
from chat.llm.mock import MockLLMClient
|
|
from chat.llm.client import Message
|
|
|
|
|
|
@pytest.mark.asyncio
|
|
async def test_mock_returns_canned_response():
|
|
client = MockLLMClient(canned=["Hello, world."])
|
|
msgs = [Message(role="user", content="hi")]
|
|
out = await client.generate(msgs, model="any")
|
|
assert out == "Hello, world."
|
|
|
|
|
|
@pytest.mark.asyncio
|
|
async def test_mock_streams_tokens():
|
|
client = MockLLMClient(canned=["abcd"])
|
|
msgs = [Message(role="user", content="hi")]
|
|
chunks = []
|
|
async for chunk in client.stream(msgs, model="any"):
|
|
chunks.append(chunk)
|
|
assert "".join(chunks) == "abcd"
|
|
|
|
|
|
@pytest.mark.asyncio
|
|
async def test_mock_llm_client_embed_pops_canned():
|
|
"""T112: MockLLMClient.embed() pops a canned vector from the front
|
|
of ``canned_embeddings`` (mirrors the existing ``canned`` queue
|
|
pattern for generate/stream)."""
|
|
v1 = [0.1, 0.2, 0.3]
|
|
v2 = [0.4, 0.5, 0.6]
|
|
client = MockLLMClient(canned=[], canned_embeddings=[v1, v2])
|
|
|
|
out1 = await client.embed("first", model="bge-small-en-v1.5")
|
|
out2 = await client.embed("second", model="bge-small-en-v1.5")
|
|
assert out1 == v1
|
|
assert out2 == v2
|
|
|
|
|
|
@pytest.mark.asyncio
|
|
async def test_mock_llm_client_embed_empty_queue_raises():
|
|
"""When the canned_embeddings queue is empty, ``embed`` must raise
|
|
a clear failure (IndexError) so misconfigured tests don't silently
|
|
return None or hang."""
|
|
client = MockLLMClient(canned=[])
|
|
with pytest.raises(IndexError):
|
|
await client.embed("text", model="any")
|