2026-04-26 19:59:30 -04:00
3 changed files with 53 additions and 19 deletions
@@ -65,12 +65,17 @@ async def lifespan(app: FastAPI):
    app.state.settings = settings
    # Cap concurrent Featherless connections to the account's limit
    # (free / lower paid tiers cap at 2). Shared across all
    # FeatherlessClient instances in the process.
    from chat.llm.featherless import FeatherlessClient
    FeatherlessClient.configure_concurrency(settings.featherless_max_concurrent)
    # Background worker for the async significance pass (T22). Each job
    # constructs a fresh FeatherlessClient via the factory; tests can
    # disable enqueue by toggling ``app.state.background_worker.enabled``.
    def _factory():
        from chat.llm.featherless import FeatherlessClient
        return FeatherlessClient(
            api_key=settings.featherless_api_key,
            base_url=settings.featherless_base_url,
@@ -24,7 +24,10 @@ class Settings(BaseModel):
    narrative_budget_hard: int = 8000
    narrative_budget_soft: int = 6000
    classifier_budget_hard: int = 4000
-    classifier_timeout_s: float = 10.0
+    classifier_timeout_s: float = 30.0
    # Featherless free tier and lower paid tiers cap concurrent connections.
    # Set this to your account's max-concurrent-connections limit.
    featherless_max_concurrent: int = 2
    db_path: Path = DEFAULT_DB
    data_dir: Path = REPO_ROOT / "data"
    bind_host: str = "127.0.0.1"
@@ -1,14 +1,39 @@
 from __future__ import annotations
 import asyncio
 from typing import AsyncIterator, Sequence
 from openai import AsyncOpenAI
 from .client import Message
 class FeatherlessClient:
    """Client for Featherless's OpenAI-compatible API.
    Featherless caps concurrent connections per account (2 on free / lower
    paid tiers). A class-level semaphore gates every ``generate`` and
    ``stream`` call so the orchestrator never exceeds the configured cap,
    regardless of how many ``FeatherlessClient`` instances are alive.
    Configure once at app startup via :meth:`configure_concurrency`. The
    default is 2.
    """
    _semaphore: asyncio.Semaphore | None = None
    @classmethod
    def configure_concurrency(cls, max_concurrent: int) -> None:
        cls._semaphore = asyncio.Semaphore(max(1, int(max_concurrent)))
    @classmethod
    def _sem(cls) -> asyncio.Semaphore:
        if cls._semaphore is None:
            cls._semaphore = asyncio.Semaphore(2)
        return cls._semaphore
    def __init__(self, api_key: str, base_url: str = "https://api.featherless.ai/v1"):
        self._client = AsyncOpenAI(api_key=api_key, base_url=base_url)
    async def generate(self, messages: Sequence[Message], *, model: str, **params) -> str:
        async with self._sem():
            resp = await self._client.chat.completions.create(
                model=model,
                messages=[{"role": m.role, "content": m.content} for m in messages],
@@ -17,6 +42,7 @@ class FeatherlessClient:
            return resp.choices[0].message.content or ""
    async def stream(self, messages: Sequence[Message], *, model: str, **params) -> AsyncIterator[str]:
        async with self._sem():
            stream = await self._client.chat.completions.create(
                model=model,
                messages=[{"role": m.role, "content": m.content} for m in messages],