1
0
Fork 0
mirror of https://github.com/immich-app/immich.git synced 2025-01-07 20:36:48 +01:00
immich/machine-learning/app/models.py
Zeeshan Khan 34201be74c
feat(ml) backend takes image over HTTP (#2783)
* using pydantic BaseSetting

* ML API takes image file as input

* keeping image in memory

* reducing duplicate code

* using bytes instead of UploadFile & other small code improvements

* removed form-multipart, using HTTP body

* format code

---------

Co-authored-by: Alex Tran <alex.tran1502@gmail.com>
2023-06-17 22:49:19 -05:00

119 lines
3.4 KiB
Python

import torch
from insightface.app import FaceAnalysis
from pathlib import Path
from transformers import pipeline, Pipeline
from sentence_transformers import SentenceTransformer
from typing import Any, BinaryIO
import cv2 as cv
import numpy as np
from PIL import Image
from config import settings
device = "cuda" if torch.cuda.is_available() else "cpu"
def get_model(model_name: str, model_type: str, **model_kwargs):
"""
Instantiates the specified model.
Args:
model_name: Name of model in the model hub used for the task.
model_type: Model type or task, which determines which model zoo is used.
`facial-recognition` uses Insightface, while all other models use the HF Model Hub.
Options:
`image-classification`, `clip`,`facial-recognition`, `tokenizer`, `processor`
Returns:
model: The requested model.
"""
cache_dir = _get_cache_dir(model_name, model_type)
match model_type:
case "facial-recognition":
model = _load_facial_recognition(
model_name, cache_dir=cache_dir, **model_kwargs
)
case "clip":
model = SentenceTransformer(
model_name, cache_folder=cache_dir, **model_kwargs
)
case _:
model = pipeline(
model_type,
model_name,
model_kwargs={"cache_dir": cache_dir, **model_kwargs},
)
return model
def run_classification(
model: Pipeline, image: Image, min_score: float | None = None
):
predictions: list[dict[str, Any]] = model(image) # type: ignore
result = {
tag
for pred in predictions
for tag in pred["label"].split(", ")
if min_score is None or pred["score"] >= min_score
}
return list(result)
def run_facial_recognition(
model: FaceAnalysis, image: bytes
) -> list[dict[str, Any]]:
file_bytes = np.frombuffer(image, dtype=np.uint8)
img = cv.imdecode(file_bytes, cv.IMREAD_COLOR)
height, width, _ = img.shape
results = []
faces = model.get(img)
for face in faces:
x1, y1, x2, y2 = face.bbox
results.append(
{
"imageWidth": width,
"imageHeight": height,
"boundingBox": {
"x1": round(x1),
"y1": round(y1),
"x2": round(x2),
"y2": round(y2),
},
"score": face.det_score.item(),
"embedding": face.normed_embedding.tolist(),
}
)
return results
def _load_facial_recognition(
model_name: str,
min_face_score: float | None = None,
cache_dir: Path | str | None = None,
**model_kwargs,
):
if cache_dir is None:
cache_dir = _get_cache_dir(model_name, "facial-recognition")
if isinstance(cache_dir, Path):
cache_dir = cache_dir.as_posix()
if min_face_score is None:
min_face_score = settings.min_face_score
model = FaceAnalysis(
name=model_name,
root=cache_dir,
allowed_modules=["detection", "recognition"],
**model_kwargs,
)
model.prepare(ctx_id=0, det_thresh=min_face_score, det_size=(640, 640))
return model
def _get_cache_dir(model_name: str, model_type: str) -> Path:
return Path(settings.cache_folder, device, model_type, model_name)