diff --git a/docker-compose.yml b/docker-compose.yml
new file mode 100644
index 0000000..42a1878
--- /dev/null
+++ b/docker-compose.yml
@@ -0,0 +1,54 @@
+services:
+  # Main orchestration server - handles inference and embeddings
+  predict-otron-9000:
+    build:
+      context: .
+      dockerfile: crates/predict-otron-9000/Dockerfile
+    ports:
+      - "8080:8080"
+    environment:
+      - SERVER_PORT=8080
+      - RUST_LOG=${RUST_LOG:-info}
+      - HF_TOKEN=${HF_TOKEN}
+      - HF_HOME=/app/.hf-cache
+    volumes:
+      # Mount HF cache to persist downloaded models
+      - hf-cache:/app/.hf-cache
+      # Mount FastEmbed cache for embeddings
+      - fastembed-cache:/app/.fastembed_cache
+    networks:
+      - predict-otron-network
+    healthcheck:
+      test: ["CMD", "curl", "-f", "http://localhost:8080"]
+      interval: 5s
+      timeout: 1s
+      retries: 10
+      start_period: 10s
+
+  # Web frontend - Leptos WASM chat interface
+  leptos-chat:
+    build:
+      context: crates/leptos-chat
+      dockerfile: Dockerfile
+    ports:
+      - "8788:8788"
+    depends_on:
+      predict-otron-9000:
+        condition: service_healthy
+    networks:
+      - predict-otron-network
+    environment:
+      # Configure API endpoint for the frontend to connect to backend
+      - API_BASE_URL=http://predict-otron-9000:8080
+
+volumes:
+  # Persistent storage for Hugging Face model cache
+  hf-cache:
+    driver: local
+  # Persistent storage for FastEmbed model cache
+  fastembed-cache:
+    driver: local
+
+networks:
+  predict-otron-network:
+    driver: bridge
\ No newline at end of file