mirror of
https://github.com/immich-app/immich.git
synced 2025-01-09 13:26:47 +01:00
a2f5674bbb
* basic refactor and styling * removed batching * module entrypoint * removed unused imports * model superclass, model cache now in app state * fixed cache dir and enforced abstract method --------- Co-authored-by: Alex Tran <alex.tran1502@gmail.com>
37 lines
995 B
Python
37 lines
995 B
Python
from pathlib import Path
|
|
|
|
from PIL.Image import Image
|
|
from sentence_transformers import SentenceTransformer
|
|
|
|
from ..schemas import ModelType
|
|
from .base import InferenceModel
|
|
|
|
|
|
class CLIPSTEncoder(InferenceModel):
|
|
_model_type = ModelType.CLIP
|
|
|
|
def __init__(
|
|
self,
|
|
model_name: str,
|
|
cache_dir: Path | None = None,
|
|
**model_kwargs,
|
|
):
|
|
super().__init__(model_name, cache_dir)
|
|
self.model = SentenceTransformer(
|
|
self.model_name,
|
|
cache_folder=self.cache_dir.as_posix(),
|
|
**model_kwargs,
|
|
)
|
|
|
|
def predict(self, image_or_text: Image | str) -> list[float]:
|
|
return self.model.encode(image_or_text).tolist()
|
|
|
|
|
|
# stubs to allow different behavior between the two in the future
|
|
# and handle loading different image and text clip models
|
|
class CLIPSTVisionEncoder(CLIPSTEncoder):
|
|
_model_type = ModelType.CLIP_VISION
|
|
|
|
|
|
class CLIPSTTextEncoder(CLIPSTEncoder):
|
|
_model_type = ModelType.CLIP_TEXT
|