2023-06-25 05:18:09 +02:00
|
|
|
from pathlib import Path
|
2023-06-27 23:01:24 +02:00
|
|
|
from typing import Any
|
2023-06-25 05:18:09 +02:00
|
|
|
|
|
|
|
from PIL.Image import Image
|
|
|
|
from sentence_transformers import SentenceTransformer
|
|
|
|
|
|
|
|
from ..schemas import ModelType
|
|
|
|
from .base import InferenceModel
|
|
|
|
|
|
|
|
|
|
|
|
class CLIPSTEncoder(InferenceModel):
|
|
|
|
_model_type = ModelType.CLIP
|
|
|
|
|
2023-06-27 23:01:24 +02:00
|
|
|
def load(self, **model_kwargs: Any) -> None:
|
2023-06-25 05:18:09 +02:00
|
|
|
self.model = SentenceTransformer(
|
|
|
|
self.model_name,
|
|
|
|
cache_folder=self.cache_dir.as_posix(),
|
|
|
|
**model_kwargs,
|
|
|
|
)
|
|
|
|
|
|
|
|
def predict(self, image_or_text: Image | str) -> list[float]:
|
|
|
|
return self.model.encode(image_or_text).tolist()
|