Phase 1: v1 single-bot roleplay engine #1

Merged
dohertj2 merged 45 commits from phase-1 into main 2026-04-26 19:59:30 -04:00
3 changed files with 53 additions and 19 deletions
Showing only changes of commit 5c039c8e56 - Show all commits
+7 -2
View File
@@ -65,12 +65,17 @@ async def lifespan(app: FastAPI):
app.state.settings = settings app.state.settings = settings
# Cap concurrent Featherless connections to the account's limit
# (free / lower paid tiers cap at 2). Shared across all
# FeatherlessClient instances in the process.
from chat.llm.featherless import FeatherlessClient
FeatherlessClient.configure_concurrency(settings.featherless_max_concurrent)
# Background worker for the async significance pass (T22). Each job # Background worker for the async significance pass (T22). Each job
# constructs a fresh FeatherlessClient via the factory; tests can # constructs a fresh FeatherlessClient via the factory; tests can
# disable enqueue by toggling ``app.state.background_worker.enabled``. # disable enqueue by toggling ``app.state.background_worker.enabled``.
def _factory(): def _factory():
from chat.llm.featherless import FeatherlessClient
return FeatherlessClient( return FeatherlessClient(
api_key=settings.featherless_api_key, api_key=settings.featherless_api_key,
base_url=settings.featherless_base_url, base_url=settings.featherless_base_url,
+4 -1
View File
@@ -24,7 +24,10 @@ class Settings(BaseModel):
narrative_budget_hard: int = 8000 narrative_budget_hard: int = 8000
narrative_budget_soft: int = 6000 narrative_budget_soft: int = 6000
classifier_budget_hard: int = 4000 classifier_budget_hard: int = 4000
classifier_timeout_s: float = 10.0 classifier_timeout_s: float = 30.0
# Featherless free tier and lower paid tiers cap concurrent connections.
# Set this to your account's max-concurrent-connections limit.
featherless_max_concurrent: int = 2
db_path: Path = DEFAULT_DB db_path: Path = DEFAULT_DB
data_dir: Path = REPO_ROOT / "data" data_dir: Path = REPO_ROOT / "data"
bind_host: str = "127.0.0.1" bind_host: str = "127.0.0.1"
+26
View File
@@ -1,14 +1,39 @@
from __future__ import annotations from __future__ import annotations
import asyncio
from typing import AsyncIterator, Sequence from typing import AsyncIterator, Sequence
from openai import AsyncOpenAI from openai import AsyncOpenAI
from .client import Message from .client import Message
class FeatherlessClient: class FeatherlessClient:
"""Client for Featherless's OpenAI-compatible API.
Featherless caps concurrent connections per account (2 on free / lower
paid tiers). A class-level semaphore gates every ``generate`` and
``stream`` call so the orchestrator never exceeds the configured cap,
regardless of how many ``FeatherlessClient`` instances are alive.
Configure once at app startup via :meth:`configure_concurrency`. The
default is 2.
"""
_semaphore: asyncio.Semaphore | None = None
@classmethod
def configure_concurrency(cls, max_concurrent: int) -> None:
cls._semaphore = asyncio.Semaphore(max(1, int(max_concurrent)))
@classmethod
def _sem(cls) -> asyncio.Semaphore:
if cls._semaphore is None:
cls._semaphore = asyncio.Semaphore(2)
return cls._semaphore
def __init__(self, api_key: str, base_url: str = "https://api.featherless.ai/v1"): def __init__(self, api_key: str, base_url: str = "https://api.featherless.ai/v1"):
self._client = AsyncOpenAI(api_key=api_key, base_url=base_url) self._client = AsyncOpenAI(api_key=api_key, base_url=base_url)
async def generate(self, messages: Sequence[Message], *, model: str, **params) -> str: async def generate(self, messages: Sequence[Message], *, model: str, **params) -> str:
async with self._sem():
resp = await self._client.chat.completions.create( resp = await self._client.chat.completions.create(
model=model, model=model,
messages=[{"role": m.role, "content": m.content} for m in messages], messages=[{"role": m.role, "content": m.content} for m in messages],
@@ -17,6 +42,7 @@ class FeatherlessClient:
return resp.choices[0].message.content or "" return resp.choices[0].message.content or ""
async def stream(self, messages: Sequence[Message], *, model: str, **params) -> AsyncIterator[str]: async def stream(self, messages: Sequence[Message], *, model: str, **params) -> AsyncIterator[str]:
async with self._sem():
stream = await self._client.chat.completions.create( stream = await self._client.chat.completions.create(
model=model, model=model,
messages=[{"role": m.role, "content": m.content} for m in messages], messages=[{"role": m.role, "content": m.content} for m in messages],