From a9bbea8c341d734c6a2b9b145a4cee90c1990961 Mon Sep 17 00:00:00 2001
From: geoffsee <>
Date: Mon, 2 Jun 2025 13:51:05 -0400
Subject: [PATCH] hotfix: add default for local-inference

---
 README.md    | 36 ++++++++++++++++++++----------------
 package.json |  2 +-
 2 files changed, 21 insertions(+), 17 deletions(-)

diff --git a/README.md b/README.md
index 4c50064..df5a648 100644
--- a/README.md
+++ b/README.md
@@ -15,10 +15,10 @@
 - [Installation](#installation)
 - [Deployment](#deployment)
 - [Local Inference](#local-inference)
+    - [mlx-omni-server (default)](#mlx-omni-server)
+        - [Adding models](#adding-models-for-local-inference-apple-silicon)
     - [Ollama](#ollama)
-        - [Adding models for local inference (ollama)](#adding-models-for-local-inference-ollama)
-    - [mlx-omni-server (Apple Silicon Only)](#mlx-omni-server-apple-silicon-only)
-        - [Adding models for local inference (Apple Silicon)](#adding-models-for-local-inference-apple-silicon)
+        - [Adding models](#adding-models-for-local-inference-ollama)
 - [Testing](#testing)
 - [Troubleshooting](#troubleshooting)
 - [History](#history)
@@ -53,20 +53,9 @@
 
 ## Local Inference
 > Local inference is achieved by overriding the `OPENAI_API_KEY` and `OPENAI_API_ENDPOINT` environment variables. See below.
-### Ollama
-~~~bash
-bun run openai:local ollama                  # Start ollama server
-bun run openai:local:enable                  # Configure connection
-bun run server:dev                           # Restart server
-~~~
-#### Adding models for local inference (ollama)
 
-~~~bash
-# See https://ollama.com/library for available models
-MODEL_TO_ADD=gemma3 
-docker exec -it ollama ollama run ${MODEL_TO_ADD}
-~~~  
-### mlx-omni-server (Apple Silicon Only)
+### mlx-omni-server
+(default) (Apple Silicon Only) - Use Ollama for other platforms.
 ~~~bash
 # (prereq) install mlx-omni-server
 brew tap seemueller-io/tap                   
@@ -92,6 +81,21 @@ curl http://localhost:10240/v1/chat/completions \
   }"
 ~~~  
 
+### Ollama
+~~~bash
+bun run openai:local ollama                  # Start ollama server
+bun run openai:local:enable                  # Configure connection
+bun run server:dev                           # Restart server
+~~~
+#### Adding models for local inference (ollama)
+
+~~~bash
+# See https://ollama.com/library for available models
+MODEL_TO_ADD=gemma3 
+docker exec -it ollama ollama run ${MODEL_TO_ADD}
+~~~  
+
+
 ## Testing
 
 Tests are located in `__tests__` directories next to the code they test. Testing is incomplete at this time.
diff --git a/package.json b/package.json
index 73df755..7e464ed 100644
--- a/package.json
+++ b/package.json
@@ -18,7 +18,7 @@
     "tail:email-service": "wrangler tail -c workers/email/wrangler-email.toml",
     "tail:analytics-service": "wrangler tail -c workers/analytics/wrangler-analytics.toml",
     "tail:session-proxy": "wrangler tail -c workers/session-proxy/wrangler-session-proxy.toml --env production",
-    "openai:local": "./scripts/start_inference_server.sh",
+    "openai:local": "./scripts/start_inference_server.sh mlx-omni-server",
     "openai:local:mlx": "./scripts/start_inference_server.sh mlx-omni-server",
     "openai:local:ollama": "./scripts/start_inference_server.sh ollama",
     "openai:local:configure": "scripts/configure_local_inference.sh",