diff --git a/machine-learning/Dockerfile b/machine-learning/Dockerfile
index 8e07778fe9..126cfee291 100644
--- a/machine-learning/Dockerfile
+++ b/machine-learning/Dockerfile
@@ -1,19 +1,25 @@
-FROM python:3.10
+FROM python:3.10 as builder
+
+ENV PYTHONDONTWRITEBYTECODE=1 \
+    PYTHONUNBUFFERED=1 \
+    PIP_NO_CACHE_DIR=true
+
+RUN python -m venv /opt/venv
+RUN /opt/venv/bin/pip install --pre torch  -f https://download.pytorch.org/whl/nightly/cpu/torch_nightly.html
+RUN /opt/venv/bin/pip install transformers tqdm numpy scikit-learn scipy nltk sentencepiece flask Pillow gunicorn
+RUN /opt/venv/bin/pip install --no-deps sentence-transformers
+
+FROM python:3.10-slim
+
+COPY --from=builder /opt/venv /opt/venv
 
 ENV TRANSFORMERS_CACHE=/cache \
     PYTHONDONTWRITEBYTECODE=1 \
     PYTHONUNBUFFERED=1 \
-    PIP_NO_CACHE_DIR=true
+    PATH="/opt/venv/bin:$PATH"
 
 WORKDIR /usr/src/app
 
-RUN python -m venv /opt/venv
-ENV PATH="/opt/venv/bin:$PATH"
-
-RUN pip install --pre torch  -f https://download.pytorch.org/whl/nightly/cpu/torch_nightly.html
-RUN pip install transformers tqdm numpy scikit-learn scipy nltk sentencepiece flask Pillow
-RUN pip install --no-deps sentence-transformers
-
 COPY . .
 
-CMD ["python", "src/main.py"]
+CMD ["gunicorn", "src.main:server"]
diff --git a/machine-learning/gunicorn.conf.py b/machine-learning/gunicorn.conf.py
new file mode 100644
index 0000000000..0db0e8ee7d
--- /dev/null
+++ b/machine-learning/gunicorn.conf.py
@@ -0,0 +1,29 @@
+"""
+Gunicorn configuration options.
+https://docs.gunicorn.org/en/stable/settings.html
+"""
+import os
+
+
+# Set the bind address based on the env
+port = os.getenv("MACHINE_LEARNING_PORT") or "3003"
+listen_ip = os.getenv("MACHINE_LEARNING_IP") or "0.0.0.0"
+bind = [f"{listen_ip}:{port}"]
+
+# Preload the Flask app / models etc. before starting the server
+preload_app = True
+
+# Logging settings - log to stdout and set log level
+accesslog = "-"
+loglevel = os.getenv("MACHINE_LEARNING_LOG_LEVEL") or "info"
+
+# Worker settings
+# ----------------------
+# It is important these are chosen carefully as per
+# https://pythonspeed.com/articles/gunicorn-in-docker/
+# Otherwise we get workers failing to respond to heartbeat checks,
+# especially as requests take a long time to complete.
+workers = 2
+threads = 4
+worker_tmp_dir = "/dev/shm"
+timeout = 60