feat: event-lifecycle detection service (T52)

2026-04-26 20:09:13 -04:00
parent da1f67fb6a
commit 98250644ad
2 changed files with 175 additions and 0 deletions
@@ -0,0 +1,72 @@
+"""Event-lifecycle detection (T52).
+
+After each turn, classify whether any active events transitioned
+(started, completed, cancelled). Conservative bias — most turns
+return empty. T61 turn flow appends one event_started/completed/
+cancelled per transition via append_and_apply.
+"""
+
+from __future__ import annotations
+from pydantic import BaseModel, Field
+
+from chat.llm.classify import classify
+from chat.llm.client import LLMClient
+
+
+class EventTransition(BaseModel):
+    event_id: str
+    new_status: str  # "active" | "completed" | "cancelled"
+    reason: str = ""
+
+
+class EventLifecycleDecision(BaseModel):
+    transitions: list[EventTransition] = Field(default_factory=list)
+
+
+_SYSTEM = (
+    "You decide whether any active events transitioned this turn. "
+    "STRONGLY default to empty transitions — most turns do NOT resolve "
+    "or start a known event. Output only transitions that the narrative "
+    "text clearly resolves or starts. Each transition MUST reference an "
+    "event_id from the active_events list. new_status is one of "
+    "'active' (planned -> active), 'completed', or 'cancelled'. "
+    "Output strict JSON matching the schema."
+)
+
+
+async def detect_event_transitions(
+    client: LLMClient,
+    *,
+    classifier_model: str,
+    narrative_text: str,
+    active_events: list[dict],   # [{event_id, kind, status, props}, ...]
+    timeout_s: float = 30.0,
+) -> EventLifecycleDecision:
+    """Classify event transitions for the latest turn. Empty active_events
+    short-circuits without an LLM call."""
+    if not active_events:
+        return EventLifecycleDecision()
+
+    user_lines = ["Active events:"]
+    for ev in active_events:
+        user_lines.append(
+            f"- event_id={ev['event_id']} kind={ev['kind']} "
+            f"status={ev['status']} props={ev.get('props', {})}"
+        )
+    user_lines.append("")
+    user_lines.append("Latest narrative:")
+    user_lines.append(narrative_text.strip())
+    user = "\n".join(user_lines)
+
+    return await classify(
+        client,
+        model=classifier_model,
+        system=_SYSTEM,
+        user=user,
+        schema=EventLifecycleDecision,
+        default=EventLifecycleDecision(),
+        timeout_s=timeout_s,
+    )
+
+
+__all__ = ["EventTransition", "EventLifecycleDecision", "detect_event_transitions"]
@@ -0,0 +1,103 @@
+"""Tests for the event-lifecycle detection service (T52).
+
+Per Phase 3, after each narrated turn we ask a classifier whether any
+active events transitioned (started, completed, cancelled). The bias is
+strongly toward an empty result — most turns do NOT resolve or start a
+known event, and the turn-flow caller (T61) only appends an
+event_started/completed/cancelled record when this service yields one.
+
+These tests cover:
+
+* The classifier returning a single transition is honored end-to-end.
+* An empty ``active_events`` list short-circuits before any LLM call,
+  so callers that hold no live events pay zero classifier cost.
+* Three rounds of malformed JSON exhaust ``classify``'s retries and we
+  fall back to the empty default — graceful degradation per §3.3.
+"""
+
+from __future__ import annotations
+
+import json
+
+import pytest
+
+from chat.llm.mock import MockLLMClient
+from chat.services.event_lifecycle import (
+    EventLifecycleDecision,
+    detect_event_transitions,
+)
+
+
+@pytest.mark.asyncio
+async def test_detects_one_transition_happy_path():
+    canned = json.dumps(
+        {
+            "transitions": [
+                {
+                    "event_id": "evt_1",
+                    "new_status": "completed",
+                    "reason": "they arrived at the park",
+                }
+            ]
+        }
+    )
+    mock = MockLLMClient(canned=[canned])
+    result = await detect_event_transitions(
+        mock,
+        classifier_model="x",
+        narrative_text="They walked through the park gate, finally there.",
+        active_events=[
+            {
+                "event_id": "evt_1",
+                "kind": "date_at_park",
+                "status": "active",
+                "props": {},
+            }
+        ],
+    )
+    assert isinstance(result, EventLifecycleDecision)
+    assert len(result.transitions) == 1
+    assert result.transitions[0].event_id == "evt_1"
+    assert result.transitions[0].new_status == "completed"
+    assert result.transitions[0].reason == "they arrived at the park"
+
+
+@pytest.mark.asyncio
+async def test_empty_active_events_short_circuits_without_classifier_call():
+    """No active events -> no classifier call.
+
+    The mock has an empty canned list; any ``generate`` call would raise
+    ``IndexError`` from ``list.pop(0)``. The test passing proves the
+    short-circuit holds.
+    """
+    mock = MockLLMClient(canned=[])
+    result = await detect_event_transitions(
+        mock,
+        classifier_model="x",
+        narrative_text="Just a quiet moment between them.",
+        active_events=[],
+    )
+    assert isinstance(result, EventLifecycleDecision)
+    assert result.transitions == []
+
+
+@pytest.mark.asyncio
+async def test_classifier_failure_returns_empty_default():
+    """``classify`` retries 3 times; after all fail it returns the empty
+    default so the turn flow keeps moving (§3.3 graceful degradation)."""
+    mock = MockLLMClient(canned=["bad", "bad", "bad"])
+    result = await detect_event_transitions(
+        mock,
+        classifier_model="x",
+        narrative_text="Some text the classifier will choke on.",
+        active_events=[
+            {
+                "event_id": "evt_1",
+                "kind": "date_at_park",
+                "status": "active",
+                "props": {},
+            }
+        ],
+    )
+    assert isinstance(result, EventLifecycleDecision)
+    assert result.transitions == []