chat/scripts/start_mlx_server.sh

#!/usr/bin/env bash
# Start the local mlx-omni-server that serves the classifier + embedding
# models. The chat app's RoutedLLMClient routes everything except the
# narrative model to this server; with no MLX server running, classifier
# calls fail and embeddings degrade to the zero-vector fallback.
#
# Run in the foreground:
#   ./scripts/start_mlx_server.sh
# Run as a background daemon (logs to data/mlx-server.log):
#   ./scripts/start_mlx_server.sh --daemon
#
# Models are pulled from Hugging Face on first request; expect a delay
# the first time you exercise the classifier or embedding path.

set -euo pipefail

REPO_ROOT="$(cd "$(dirname "$0")/.." && pwd)"
VENV="${REPO_ROOT}/.mlx-venv"
LOG="${REPO_ROOT}/data/mlx-server.log"
PORT="${MLX_PORT:-10240}"
HOST="${MLX_HOST:-127.0.0.1}"

if [ ! -x "${VENV}/bin/mlx-omni-server" ]; then
  echo "error: mlx-omni-server not installed in ${VENV}" >&2
  echo "create the venv with:" >&2
  echo "  python3.12 -m venv ${VENV} && ${VENV}/bin/pip install mlx-omni-server" >&2
  exit 1
fi

if [ "${1:-}" = "--daemon" ]; then
  mkdir -p "$(dirname "${LOG}")"
  nohup "${VENV}/bin/mlx-omni-server" --host "${HOST}" --port "${PORT}" \
    >>"${LOG}" 2>&1 &
  echo "mlx-omni-server started in background (pid $!)"
  echo "logs: ${LOG}"
else
  exec "${VENV}/bin/mlx-omni-server" --host "${HOST}" --port "${PORT}"
fi