feat: classifier wrapper with retry, timeout, schema-default fallback

This commit is contained in:
Joseph Doherty
2026-04-26 11:38:48 -04:00
parent e627356168
commit c2aceffda1
3 changed files with 73 additions and 0 deletions
@@ -0,0 +1,8 @@
CREATE TABLE classifier_failures (
id INTEGER PRIMARY KEY,
kind TEXT NOT NULL,
model TEXT NOT NULL,
raw_text TEXT,
attempt_count INTEGER NOT NULL,
created_at TEXT NOT NULL DEFAULT (datetime('now'))
);
+41
View File
@@ -0,0 +1,41 @@
from __future__ import annotations
import json
import asyncio
from typing import TypeVar
from pydantic import BaseModel, ValidationError
from .client import LLMClient, Message
T = TypeVar("T", bound=BaseModel)
REFUSAL_PATTERNS = ("i can't", "i cannot", "i'm sorry, but", "as an ai")
async def classify(
client: LLMClient,
*,
model: str,
system: str,
user: str,
schema: type[T],
default: T | None = None,
timeout_s: float = 10.0,
) -> T:
msgs = [
Message(role="system", content=system + "\n\nRespond with JSON only matching the schema."),
Message(role="user", content=user),
]
for attempt in range(2):
try:
text = await asyncio.wait_for(
client.generate(msgs, model=model, response_format={"type": "json_object"}),
timeout=timeout_s,
)
if any(p in text.lower()[:80] for p in REFUSAL_PATTERNS) and not text.strip().startswith("{"):
raise ValueError("refusal-shaped response")
return schema.model_validate_json(text)
except (ValidationError, ValueError, json.JSONDecodeError, asyncio.TimeoutError):
msgs[0] = Message(role="system", content=system + "\n\nRespond with valid JSON ONLY. No prose.")
continue
if default is None:
raise RuntimeError(f"classify failed for schema {schema.__name__} with no default")
return default
+24
View File
@@ -0,0 +1,24 @@
import pytest
from pydantic import BaseModel
from chat.llm.mock import MockLLMClient
from chat.llm.classify import classify
class Verdict(BaseModel):
score: int
reason: str
@pytest.mark.asyncio
async def test_classify_parses_valid_json():
mock = MockLLMClient(canned=['{"score": 2, "reason": "notable"}'])
result = await classify(mock, model="m", system="x", user="y", schema=Verdict)
assert result.score == 2
@pytest.mark.asyncio
async def test_classify_falls_back_on_unparseable_after_retry():
mock = MockLLMClient(canned=["nope", "still nope"])
default = Verdict(score=1, reason="fallback")
result = await classify(mock, model="m", system="x", user="y", schema=Verdict, default=default)
assert result.reason == "fallback"