1
0
Fork 0
mirror of https://github.com/immich-app/immich.git synced 2025-01-09 13:26:47 +01:00
immich/machine-learning/app/models/clip.py
Mert a2f5674bbb
refactor(ml): modularization and styling (#2835)
* basic refactor and styling

* removed batching

* module entrypoint

* removed unused imports

* model superclass,  model cache now in app state

* fixed cache dir and enforced abstract method

---------

Co-authored-by: Alex Tran <alex.tran1502@gmail.com>
2023-06-24 22:18:09 -05:00

37 lines
995 B
Python

from pathlib import Path
from PIL.Image import Image
from sentence_transformers import SentenceTransformer
from ..schemas import ModelType
from .base import InferenceModel
class CLIPSTEncoder(InferenceModel):
_model_type = ModelType.CLIP
def __init__(
self,
model_name: str,
cache_dir: Path | None = None,
**model_kwargs,
):
super().__init__(model_name, cache_dir)
self.model = SentenceTransformer(
self.model_name,
cache_folder=self.cache_dir.as_posix(),
**model_kwargs,
)
def predict(self, image_or_text: Image | str) -> list[float]:
return self.model.encode(image_or_text).tolist()
# stubs to allow different behavior between the two in the future
# and handle loading different image and text clip models
class CLIPSTVisionEncoder(CLIPSTEncoder):
_model_type = ModelType.CLIP_VISION
class CLIPSTTextEncoder(CLIPSTEncoder):
_model_type = ModelType.CLIP_TEXT