chat/chat/services/turn_parse.py

"""Turn input parser.

Service-layer function that splits a user's authored turn into typed
segments — ``dialogue``, ``action``, or ``ooc`` (out-of-character).

Per Requirements §6.1 a turn is mixed prose with three conventions:

- ``*action*`` (single asterisks around prose) → action segment.
- Quoted text, or bare prose between the conventions → dialogue.
- ``((double parens))`` → OOC, the author talking to the system rather
  than the bot. Downstream (T19) strips OOC from the prompt sent to the
  bot but keeps it in the transcript display.

A regex-based splitter would brittle on edge cases (unclosed asterisks,
nested quotes, mixed punctuation), so v1 delegates the segmentation to
the classifier. The configurable ``Settings.ooc_marker`` is *not* read
here: the classifier figures OOC out from ``((`` ``))`` regardless of
config-time choice; marker-based stripping is a downstream concern.

T62 extends the parser with an ``intent`` field so the turn flow can
short-circuit time-skip phrases before the regular narrative path.
``intent`` defaults to ``"narrative"``; the classifier may set it to
``"skip_elision"`` when prose like "skip to when we arrive" or
``"skip_jump"`` when prose like "next morning" / "a week later" is
detected. ``landing_state_hint`` carries the residual descriptor for
elision skips (the "to when we ..." phrase). Existing callers that
don't read ``intent`` continue to work because the default keeps the
narrative path intact.
"""

from __future__ import annotations

from pydantic import BaseModel

from chat.llm.classify import classify
from chat.llm.client import LLMClient


class TurnSegment(BaseModel):
    """One classified piece of a turn.

    ``kind`` is kept as a plain ``str`` (not a ``Literal``) so an
    unexpected classifier output doesn't crash parsing — callers that
    care about specific values can check defensively.
    """

    kind: str  # "dialogue" | "action" | "ooc"
    text: str


class ParsedTurn(BaseModel):
    """A turn split into ordered, typed segments.

    ``intent`` distinguishes a regular narrative beat (the default) from
    a natural-language time-skip command (T62). ``landing_state_hint``
    captures the descriptor following "skip to when we ..." for elision
    skips so the downstream skip controller can pass it to the
    narration helper. Both fields are optional and default-empty so
    older fixtures and tests that don't supply them keep working.
    """

    segments: list[TurnSegment]
    intent: str = "narrative"  # "narrative" | "skip_elision" | "skip_jump"
    landing_state_hint: str = ""


_SYSTEM_PROMPT = (
    "You are splitting a roleplay turn into typed segments. The input "
    "is mixed prose with three conventions:\n"
    "- *text in single asterisks* is an ACTION segment.\n"
    "- \"quoted text\" or bare prose between conventions is a DIALOGUE segment.\n"
    "- ((text in double parens)) is an OOC (out-of-character) segment — "
    "the author talking to the system, not the in-fiction bot.\n\n"
    "Output a JSON object with shape "
    '{"segments": [{"kind": "...", "text": "..."}, ...], '
    '"intent": "...", "landing_state_hint": "..."} '
    "where each segment ``kind`` is exactly one of: dialogue, action, "
    "ooc. Preserve the original substring text as ``text``: do not "
    "rewrite, translate, or normalize punctuation — strip only the "
    "marker characters (asterisks, surrounding quotes, double parens) "
    "so ``text`` is the inner content. Emit segments in the order they "
    "appear in the input.\n\n"
    "``intent`` is exactly one of: narrative, skip_elision, skip_jump. "
    "Default to ``narrative``. Use ``skip_elision`` when the prose is a "
    "directive to fast-forward an in-progress activity to a near-term "
    "landing state — e.g. 'skip to when we arrive', 'fast-forward to "
    "after dinner'. Use ``skip_jump`` when the prose denotes a longer "
    "fiction-time bridge — e.g. 'next morning', 'a week later', 'the "
    "following day'.\n"
    "``landing_state_hint`` is a short descriptor of the landing state "
    "for ``skip_elision`` (e.g. 'we arrive at the park'). Empty string "
    "for ``skip_jump`` and ``narrative``."
)


async def parse_turn(
    client: LLMClient,
    *,
    model: str,
    prose: str,
    timeout_s: float = 10.0,
) -> ParsedTurn:
    """Parse a user turn into typed segments.

    Calls :func:`chat.llm.classify.classify` under the hood. Empty or
    whitespace-only prose short-circuits to an empty ``ParsedTurn``
    without an LLM call (the classifier would error on empty input
    anyway, and the result is unambiguous).

    Raises ``RuntimeError`` if the classifier fails twice — no default
    is supplied, since the caller (T19's turn flow) is responsible for
    surfacing the error to the user.
    """
    if not prose.strip():
        return ParsedTurn(segments=[])

    user_prompt = f"INPUT:\n{prose}"
    return await classify(
        client,
        model=model,
        system=_SYSTEM_PROMPT,
        user=user_prompt,
        schema=ParsedTurn,
        timeout_s=timeout_s,
    )