diff --git a/chat/llm/classify.py b/chat/llm/classify.py index bf46554..42697d7 100644 --- a/chat/llm/classify.py +++ b/chat/llm/classify.py @@ -1,11 +1,13 @@ from __future__ import annotations import json import asyncio +import logging from typing import TypeVar from pydantic import BaseModel, ValidationError from .client import LLMClient, Message T = TypeVar("T", bound=BaseModel) +_log = logging.getLogger(__name__) REFUSAL_PATTERNS = ("i can't", "i cannot", "i'm sorry, but", "as an ai") @@ -47,6 +49,8 @@ async def classify( # several seconds on tokens we'll never use. Classifier responses # are small JSON objects — 512 tokens is generous; usual completions # are 50-150. + last_text = None + last_error: BaseException | None = None for attempt in range(3): try: text = await asyncio.wait_for( @@ -58,16 +62,29 @@ async def classify( ), timeout=timeout_s, ) + last_text = text cleaned = _strip_json_fences(text) if any(p in cleaned.lower()[:80] for p in REFUSAL_PATTERNS) and not cleaned.lstrip().startswith("{"): raise ValueError("refusal-shaped response") return schema.model_validate_json(cleaned) - except (ValidationError, ValueError, json.JSONDecodeError, asyncio.TimeoutError): + except (ValidationError, ValueError, json.JSONDecodeError, asyncio.TimeoutError) as exc: + last_error = exc msgs[0] = Message( role="system", content=system + schema_block + "\n\nRespond with valid JSON ONLY. No prose, no markdown fences.", ) continue + # Log when we're falling back so flapping classifiers are + # diagnosable without taking down the request. + snippet = (last_text or "")[:200].replace("\n", " ") + _log.warning( + "classify(%s) exhausted 3 attempts; last_error=%s last_text=%r; " + "falling back to %s", + schema.__name__, + type(last_error).__name__ if last_error else "?", + snippet, + "default" if default is not None else "RuntimeError (no default)", + ) if default is None: raise RuntimeError(f"classify failed for schema {schema.__name__} with no default") return default diff --git a/chat/services/turn_parse.py b/chat/services/turn_parse.py index 9c1e778..971ddf0 100644 --- a/chat/services/turn_parse.py +++ b/chat/services/turn_parse.py @@ -107,13 +107,23 @@ async def parse_turn( without an LLM call (the classifier would error on empty input anyway, and the result is unambiguous). - Raises ``RuntimeError`` if the classifier fails twice — no default - is supplied, since the caller (T19's turn flow) is responsible for - surfacing the error to the user. + Falls back to a single dialogue-shaped segment containing the + whole prose if the classifier flaps after retries — the turn flow + can keep moving (the narrative will still fire on the prose) at + the cost of finer-grained segment classification. The original + code raised ``RuntimeError`` here, which 500'd the whole request + and was particularly painful in multi-bot scenes where every + user turn paid the classifier round-trip. """ if not prose.strip(): return ParsedTurn(segments=[]) + fallback = ParsedTurn( + segments=[TurnSegment(kind="dialogue", text=prose)], + intent="narrative", + landing_state_hint="", + ) + user_prompt = f"INPUT:\n{prose}" return await classify( client, @@ -121,5 +131,6 @@ async def parse_turn( system=_SYSTEM_PROMPT, user=user_prompt, schema=ParsedTurn, + default=fallback, timeout_s=timeout_s, ) diff --git a/tests/test_turn_parse.py b/tests/test_turn_parse.py index 026f9b1..1735f42 100644 --- a/tests/test_turn_parse.py +++ b/tests/test_turn_parse.py @@ -73,11 +73,25 @@ async def test_parse_turn_empty_prose_short_circuits_without_classifier_call(): @pytest.mark.asyncio -async def test_parse_turn_raises_when_classifier_fails_twice(): +async def test_parse_turn_falls_back_to_whole_prose_when_classifier_fails(): + """A flapping classifier (3 invalid responses) no longer 500s the + request. ``parse_turn`` returns the original prose as a single + ``dialogue`` segment so the turn flow can keep moving — the + narrative will still fire on the prose, just without finer-grained + segment classification. + + The old contract was ``RuntimeError`` (no default), but in + production that took down the whole turn endpoint with a 500 the + moment any classifier provider hiccuped — particularly painful in + multi-bot scenes where every user turn pays the parse_turn cost. + """ mock = MockLLMClient(canned=["nope", "still nope", "nope3"]) - with pytest.raises(RuntimeError): - await parse_turn( - mock, - model="m", - prose='*shrugs* "whatever"', - ) + result = await parse_turn( + mock, + model="m", + prose='*shrugs* "whatever"', + ) + assert len(result.segments) == 1 + assert result.segments[0].kind == "dialogue" + assert result.segments[0].text == '*shrugs* "whatever"' + assert result.intent == "narrative"