Phase 1: v1 single-bot roleplay engine #1

Merged
dohertj2 merged 45 commits from phase-1 into main 2026-04-26 19:59:30 -04:00
3 changed files with 53 additions and 19 deletions
Showing only changes of commit 5c039c8e56 - Show all commits
+7 -2
View File
@@ -65,12 +65,17 @@ async def lifespan(app: FastAPI):
app.state.settings = settings
# Cap concurrent Featherless connections to the account's limit
# (free / lower paid tiers cap at 2). Shared across all
# FeatherlessClient instances in the process.
from chat.llm.featherless import FeatherlessClient
FeatherlessClient.configure_concurrency(settings.featherless_max_concurrent)
# Background worker for the async significance pass (T22). Each job
# constructs a fresh FeatherlessClient via the factory; tests can
# disable enqueue by toggling ``app.state.background_worker.enabled``.
def _factory():
from chat.llm.featherless import FeatherlessClient
return FeatherlessClient(
api_key=settings.featherless_api_key,
base_url=settings.featherless_base_url,
+4 -1
View File
@@ -24,7 +24,10 @@ class Settings(BaseModel):
narrative_budget_hard: int = 8000
narrative_budget_soft: int = 6000
classifier_budget_hard: int = 4000
classifier_timeout_s: float = 10.0
classifier_timeout_s: float = 30.0
# Featherless free tier and lower paid tiers cap concurrent connections.
# Set this to your account's max-concurrent-connections limit.
featherless_max_concurrent: int = 2
db_path: Path = DEFAULT_DB
data_dir: Path = REPO_ROOT / "data"
bind_host: str = "127.0.0.1"
+42 -16
View File
@@ -1,29 +1,55 @@
from __future__ import annotations
import asyncio
from typing import AsyncIterator, Sequence
from openai import AsyncOpenAI
from .client import Message
class FeatherlessClient:
"""Client for Featherless's OpenAI-compatible API.
Featherless caps concurrent connections per account (2 on free / lower
paid tiers). A class-level semaphore gates every ``generate`` and
``stream`` call so the orchestrator never exceeds the configured cap,
regardless of how many ``FeatherlessClient`` instances are alive.
Configure once at app startup via :meth:`configure_concurrency`. The
default is 2.
"""
_semaphore: asyncio.Semaphore | None = None
@classmethod
def configure_concurrency(cls, max_concurrent: int) -> None:
cls._semaphore = asyncio.Semaphore(max(1, int(max_concurrent)))
@classmethod
def _sem(cls) -> asyncio.Semaphore:
if cls._semaphore is None:
cls._semaphore = asyncio.Semaphore(2)
return cls._semaphore
def __init__(self, api_key: str, base_url: str = "https://api.featherless.ai/v1"):
self._client = AsyncOpenAI(api_key=api_key, base_url=base_url)
async def generate(self, messages: Sequence[Message], *, model: str, **params) -> str:
resp = await self._client.chat.completions.create(
model=model,
messages=[{"role": m.role, "content": m.content} for m in messages],
**params,
)
return resp.choices[0].message.content or ""
async with self._sem():
resp = await self._client.chat.completions.create(
model=model,
messages=[{"role": m.role, "content": m.content} for m in messages],
**params,
)
return resp.choices[0].message.content or ""
async def stream(self, messages: Sequence[Message], *, model: str, **params) -> AsyncIterator[str]:
stream = await self._client.chat.completions.create(
model=model,
messages=[{"role": m.role, "content": m.content} for m in messages],
stream=True,
**params,
)
async for chunk in stream:
delta = chunk.choices[0].delta.content or ""
if delta:
yield delta
async with self._sem():
stream = await self._client.chat.completions.create(
model=model,
messages=[{"role": m.role, "content": m.content} for m in messages],
stream=True,
**params,
)
async for chunk in stream:
delta = chunk.choices[0].delta.content or ""
if delta:
yield delta