chore(ml): improved logging (#3918)

* fixed `minScore` not being set correctly * apply to init * don't send `enabled` * fix eslint warning * added logger * added logging * refinements * enable access log for info level * formatting * merged strings --------- Co-authored-by: Alex <alex.tran1502@gmail.com>
2025-04-21 07:26:25 +02:00 · 2023-08-30 04:22:01 -04:00 · 2023-08-30 04:22:01 -04:00 · 54b2779b79
commit 54b2779b79
parent df26e12db6
5 changed files with 92 additions and 11 deletions
--- a/machine-learning/app/config.py
+++ b/machine-learning/app/config.py
@ -1,7 +1,11 @@
+import logging
 import os
 from pathlib import Path

+import starlette
 from pydantic import BaseSettings
+from rich.console import Console
+from rich.logging import RichHandler

 from .schemas import ModelType

@ -23,6 +27,14 @@ class Settings(BaseSettings):
        case_sensitive = False


+class LogSettings(BaseSettings):
+    log_level: str = "info"
+    no_color: bool = False
+
+    class Config:
+        case_sensitive = False
+
+
 _clean_name = str.maketrans(":\\/", "___", ".")


@ -30,4 +42,26 @@ def get_cache_dir(model_name: str, model_type: ModelType) -> Path:
    return Path(settings.cache_folder) / model_type.value / model_name.translate(_clean_name)


+LOG_LEVELS: dict[str, int] = {
+    "critical": logging.ERROR,
+    "error": logging.ERROR,
+    "warning": logging.WARNING,
+    "warn": logging.WARNING,
+    "info": logging.INFO,
+    "log": logging.INFO,
+    "debug": logging.DEBUG,
+    "verbose": logging.DEBUG,
+}
+
 settings = Settings()
+log_settings = LogSettings()
+
+console = Console(color_system="standard", no_color=log_settings.no_color)
+logging.basicConfig(
+    format="%(message)s",
+    handlers=[
+        RichHandler(show_path=False, omit_repeated_times=False, console=console, tracebacks_suppress=[starlette])
+    ],
+)
+log = logging.getLogger("uvicorn")
+log.setLevel(LOG_LEVELS.get(log_settings.log_level.lower(), logging.INFO))
--- a/machine-learning/app/main.py
+++ b/machine-learning/app/main.py
@ -1,4 +1,5 @@
 import asyncio
+import logging
 import os
 from concurrent.futures import ThreadPoolExecutor
 from typing import Any
@ -11,7 +12,7 @@ from starlette.formparsers import MultiPartParser

 from app.models.base import InferenceModel

-from .config import settings
+from .config import log, settings
 from .models.cache import ModelCache
 from .schemas import (
    MessageResponse,
@ -20,14 +21,20 @@ from .schemas import (
 )

 MultiPartParser.max_file_size = 2**24  # spools to disk if payload is 16 MiB or larger
-
 app = FastAPI()


 def init_state() -> None:
    app.state.model_cache = ModelCache(ttl=settings.model_ttl, revalidate=settings.model_ttl > 0)
+    log.info(
+        (
+            "Created in-memory cache with unloading "
+            f"{f'after {settings.model_ttl}s of inactivity' if settings.model_ttl > 0 else 'disabled'}."
+        )
+    )
    # asyncio is a huge bottleneck for performance, so we use a thread pool to run blocking code
    app.state.thread_pool = ThreadPoolExecutor(settings.request_threads)
+    log.info(f"Initialized request thread pool with {settings.request_threads} threads.")


@app.on_event("startup")
@ -77,4 +84,6 @@ if __name__ == "__main__":
        port=settings.port,
        reload=is_dev,
        workers=settings.workers,
+        log_config=None,
+        access_log=log.isEnabledFor(logging.INFO),
    )
--- a/machine-learning/app/models/base.py
+++ b/machine-learning/app/models/base.py
@ -1,6 +1,5 @@
 from __future__ import annotations

-import os
 import pickle
 from abc import ABC, abstractmethod
 from pathlib import Path
@ -11,7 +10,7 @@ from zipfile import BadZipFile
 import onnxruntime as ort
 from onnxruntime.capi.onnxruntime_pybind11_state import InvalidProtobuf  # type: ignore

-from ..config import get_cache_dir, settings
+from ..config import get_cache_dir, log, settings
 from ..schemas import ModelType


@ -37,22 +36,41 @@ class InferenceModel(ABC):
        self.provider_options = model_kwargs.pop(
            "provider_options", [{"arena_extend_strategy": "kSameAsRequested"}] * len(self.providers)
        )
+        log.debug(
+            (
+                f"Setting '{self.model_name}' execution providers to {self.providers}"
+                "in descending order of preference"
+            ),
+        )
+        log.debug(f"Setting execution provider options to {self.provider_options}")
        self.sess_options = PicklableSessionOptions()
        # avoid thread contention between models
        if inter_op_num_threads > 1:
            self.sess_options.execution_mode = ort.ExecutionMode.ORT_PARALLEL
+
+        log.debug(f"Setting execution_mode to {self.sess_options.execution_mode.name}")
+        log.debug(f"Setting inter_op_num_threads to {inter_op_num_threads}")
+        log.debug(f"Setting intra_op_num_threads to {intra_op_num_threads}")
        self.sess_options.inter_op_num_threads = inter_op_num_threads
        self.sess_options.intra_op_num_threads = intra_op_num_threads

        try:
            loader(**model_kwargs)
        except (OSError, InvalidProtobuf, BadZipFile):
+            log.warn(
+                (
+                    f"Failed to load {self.model_type.replace('_', ' ')} model '{self.model_name}'."
+                    "Clearing cache and retrying."
+                )
+            )
            self.clear_cache()
            loader(**model_kwargs)

    def download(self, **model_kwargs: Any) -> None:
        if not self.cached:
-            print(f"Downloading {self.model_type.value.replace('_', ' ')} model. This may take a while...")
+            log.info(
+                (f"Downloading {self.model_type.replace('_', ' ')} model '{self.model_name}'." "This may take a while.")
+            )
            self._download(**model_kwargs)

    def load(self, **model_kwargs: Any) -> None:
@ -62,7 +80,7 @@ class InferenceModel(ABC):

    def predict(self, inputs: Any, **model_kwargs: Any) -> Any:
        if not self._loaded:
-            print(f"Loading {self.model_type.value.replace('_', ' ')} model...")
+            log.info(f"Loading {self.model_type.replace('_', ' ')} model '{self.model_name}'")
            self.load()
        if model_kwargs:
            self.configure(**model_kwargs)
@ -109,13 +127,23 @@ class InferenceModel(ABC):

    def clear_cache(self) -> None:
        if not self.cache_dir.exists():
+            log.warn(
+                f"Attempted to clear cache for model '{self.model_name}' but cache directory does not exist.",
+            )
            return
        if not rmtree.avoids_symlink_attacks:
            raise RuntimeError("Attempted to clear cache, but rmtree is not safe on this platform.")

        if self.cache_dir.is_dir():
+            log.info(f"Cleared cache directory for model '{self.model_name}'.")
            rmtree(self.cache_dir)
        else:
+            log.warn(
+                (
+                    f"Encountered file instead of directory at cache path "
+                    f"for '{self.model_name}'. Removing file and replacing with a directory."
+                ),
+            )
            self.cache_dir.unlink()
        self.cache_dir.mkdir(parents=True, exist_ok=True)

--- a/machine-learning/app/models/clip.py
+++ b/machine-learning/app/models/clip.py
@ -12,6 +12,7 @@ from clip_server.model.tokenization import Tokenizer
 from PIL import Image
 from torchvision.transforms import CenterCrop, Compose, Normalize, Resize, ToTensor

+from ..config import log
 from ..schemas import ModelType
 from .base import InferenceModel

@ -105,9 +106,11 @@ class CLIPEncoder(InferenceModel):
        if model_name in _MODELS:
            return model_name
        elif model_name in _ST_TO_JINA_MODEL_NAME:
-            print(
-                (f"Warning: Sentence-Transformer model names such as '{model_name}' are no longer supported."),
-                (f"Using '{_ST_TO_JINA_MODEL_NAME[model_name]}' instead as it is the best match for '{model_name}'."),
+            log.warn(
+                (
+                    f"Sentence-Transformer models like '{model_name}' are not supported."
+                    f"Using '{_ST_TO_JINA_MODEL_NAME[model_name]}' instead as it is the best match for '{model_name}'."
+                ),
            )
            return _ST_TO_JINA_MODEL_NAME[model_name]
        else:
--- a/machine-learning/app/models/image_classification.py
+++ b/machine-learning/app/models/image_classification.py
@ -8,6 +8,7 @@ from optimum.pipelines import pipeline
 from PIL import Image
 from transformers import AutoImageProcessor

+from ..config import log
 from ..schemas import ModelType
 from .base import InferenceModel

@ -35,19 +36,25 @@ class ImageClassifier(InferenceModel):
        )

    def _load(self, **model_kwargs: Any) -> None:
-        processor = AutoImageProcessor.from_pretrained(self.cache_dir)
+        processor = AutoImageProcessor.from_pretrained(self.cache_dir, cache_dir=self.cache_dir)
+        model_path = self.cache_dir / "model.onnx"
        model_kwargs |= {
            "cache_dir": self.cache_dir,
            "provider": self.providers[0],
            "provider_options": self.provider_options[0],
            "session_options": self.sess_options,
        }
-        model_path = self.cache_dir / "model.onnx"

        if model_path.exists():
            model = ORTModelForImageClassification.from_pretrained(self.cache_dir, **model_kwargs)
            self.model = pipeline(self.model_type.value, model, feature_extractor=processor)
        else:
+            log.info(
+                (
+                    f"ONNX model not found in cache directory for '{self.model_name}'."
+                    "Exporting optimized model for future use."
+                ),
+            )
            self.sess_options.optimized_model_filepath = model_path.as_posix()
            self.model = pipeline(
                self.model_type.value,