Feat/ml image optimisations (#1916)

* Use multi stage build to slim down ML image size * Use gunicorn as WSGI server in ML image * Configure gunicorn server for ML use case * Use requirements.txt file to install python dependencies in ML image * Make ML listen IP configurable * Revert "Use requirements.txt file to install python dependencies in ML image" This reverts commit 32e706c7f3. * Separate out pip installs in ML builder image
2024-12-28 22:51:59 +00:00 · 2023-03-03 22:45:20 +00:00 · 2023-03-03 22:45:20 +00:00 · d5d0624311
commit d5d0624311
parent 8708867c1c
2 changed files with 45 additions and 10 deletions
--- a/machine-learning/Dockerfile
+++ b/machine-learning/Dockerfile
@ -1,19 +1,25 @@
-FROM python:3.10
+FROM python:3.10 as builder
+
+ENV PYTHONDONTWRITEBYTECODE=1 \
+    PYTHONUNBUFFERED=1 \
+    PIP_NO_CACHE_DIR=true
+
+RUN python -m venv /opt/venv
+RUN /opt/venv/bin/pip install --pre torch  -f https://download.pytorch.org/whl/nightly/cpu/torch_nightly.html
+RUN /opt/venv/bin/pip install transformers tqdm numpy scikit-learn scipy nltk sentencepiece flask Pillow gunicorn
+RUN /opt/venv/bin/pip install --no-deps sentence-transformers
+
+FROM python:3.10-slim
+
+COPY --from=builder /opt/venv /opt/venv

 ENV TRANSFORMERS_CACHE=/cache \
    PYTHONDONTWRITEBYTECODE=1 \
    PYTHONUNBUFFERED=1 \
-    PIP_NO_CACHE_DIR=true
+    PATH="/opt/venv/bin:$PATH"

 WORKDIR /usr/src/app

-RUN python -m venv /opt/venv
-ENV PATH="/opt/venv/bin:$PATH"
-
-RUN pip install --pre torch  -f https://download.pytorch.org/whl/nightly/cpu/torch_nightly.html
-RUN pip install transformers tqdm numpy scikit-learn scipy nltk sentencepiece flask Pillow
-RUN pip install --no-deps sentence-transformers
-
 COPY . .

-CMD ["python", "src/main.py"]
+CMD ["gunicorn", "src.main:server"]
--- a/machine-learning/gunicorn.conf.py
+++ b/machine-learning/gunicorn.conf.py
@ -0,0 +1,29 @@
+"""
+Gunicorn configuration options.
+https://docs.gunicorn.org/en/stable/settings.html
+"""
+import os
+
+
+# Set the bind address based on the env
+port = os.getenv("MACHINE_LEARNING_PORT") or "3003"
+listen_ip = os.getenv("MACHINE_LEARNING_IP") or "0.0.0.0"
+bind = [f"{listen_ip}:{port}"]
+
+# Preload the Flask app / models etc. before starting the server
+preload_app = True
+
+# Logging settings - log to stdout and set log level
+accesslog = "-"
+loglevel = os.getenv("MACHINE_LEARNING_LOG_LEVEL") or "info"
+
+# Worker settings
+# ----------------------
+# It is important these are chosen carefully as per
+# https://pythonspeed.com/articles/gunicorn-in-docker/
+# Otherwise we get workers failing to respond to heartbeat checks,
+# especially as requests take a long time to complete.
+workers = 2
+threads = 4
+worker_tmp_dir = "/dev/shm"
+timeout = 60