mirror of
https://github.com/immich-app/immich.git
synced 2025-01-16 00:36:47 +01:00
feat(server,ml): remove image tagging (#5903)
* remove image tagging * updated lock * fixed tests, improved logging * be nice * fixed tests
This commit is contained in:
parent
154292242f
commit
092a23fd7f
65 changed files with 984 additions and 2669 deletions
2
.github/workflows/test.yml
vendored
2
.github/workflows/test.yml
vendored
|
@ -209,7 +209,7 @@ jobs:
|
|||
poetry run black --check app export
|
||||
- name: Run mypy type checking
|
||||
run: |
|
||||
poetry run mypy --install-types --non-interactive --strict app/ export/
|
||||
poetry run mypy --install-types --non-interactive --strict app/
|
||||
- name: Run tests and coverage
|
||||
run: |
|
||||
poetry run pytest --cov app
|
||||
|
|
59
cli/src/api/open-api/api.ts
generated
59
cli/src/api/open-api/api.ts
generated
|
@ -373,12 +373,6 @@ export interface AllJobStatusResponseDto {
|
|||
* @memberof AllJobStatusResponseDto
|
||||
*/
|
||||
'migration': JobStatusDto;
|
||||
/**
|
||||
*
|
||||
* @type {JobStatusDto}
|
||||
* @memberof AllJobStatusResponseDto
|
||||
*/
|
||||
'objectTagging': JobStatusDto;
|
||||
/**
|
||||
*
|
||||
* @type {JobStatusDto}
|
||||
|
@ -1318,39 +1312,6 @@ export interface CheckExistingAssetsResponseDto {
|
|||
*/
|
||||
'existingIds': Array<string>;
|
||||
}
|
||||
/**
|
||||
*
|
||||
* @export
|
||||
* @interface ClassificationConfig
|
||||
*/
|
||||
export interface ClassificationConfig {
|
||||
/**
|
||||
*
|
||||
* @type {boolean}
|
||||
* @memberof ClassificationConfig
|
||||
*/
|
||||
'enabled': boolean;
|
||||
/**
|
||||
*
|
||||
* @type {number}
|
||||
* @memberof ClassificationConfig
|
||||
*/
|
||||
'minScore': number;
|
||||
/**
|
||||
*
|
||||
* @type {string}
|
||||
* @memberof ClassificationConfig
|
||||
*/
|
||||
'modelName': string;
|
||||
/**
|
||||
*
|
||||
* @type {ModelType}
|
||||
* @memberof ClassificationConfig
|
||||
*/
|
||||
'modelType'?: ModelType;
|
||||
}
|
||||
|
||||
|
||||
/**
|
||||
*
|
||||
* @export
|
||||
|
@ -2015,7 +1976,6 @@ export const JobName = {
|
|||
ThumbnailGeneration: 'thumbnailGeneration',
|
||||
MetadataExtraction: 'metadataExtraction',
|
||||
VideoConversion: 'videoConversion',
|
||||
ObjectTagging: 'objectTagging',
|
||||
RecognizeFaces: 'recognizeFaces',
|
||||
SmartSearch: 'smartSearch',
|
||||
BackgroundTask: 'backgroundTask',
|
||||
|
@ -2358,7 +2318,6 @@ export interface MergePersonDto {
|
|||
*/
|
||||
|
||||
export const ModelType = {
|
||||
ImageClassification: 'image-classification',
|
||||
FacialRecognition: 'facial-recognition',
|
||||
Clip: 'clip'
|
||||
} as const;
|
||||
|
@ -3139,12 +3098,6 @@ export interface ServerFeaturesDto {
|
|||
* @memberof ServerFeaturesDto
|
||||
*/
|
||||
'sidecar': boolean;
|
||||
/**
|
||||
*
|
||||
* @type {boolean}
|
||||
* @memberof ServerFeaturesDto
|
||||
*/
|
||||
'tagImage': boolean;
|
||||
/**
|
||||
*
|
||||
* @type {boolean}
|
||||
|
@ -3803,12 +3756,6 @@ export interface SystemConfigJobDto {
|
|||
* @memberof SystemConfigJobDto
|
||||
*/
|
||||
'migration': JobSettingsDto;
|
||||
/**
|
||||
*
|
||||
* @type {JobSettingsDto}
|
||||
* @memberof SystemConfigJobDto
|
||||
*/
|
||||
'objectTagging': JobSettingsDto;
|
||||
/**
|
||||
*
|
||||
* @type {JobSettingsDto}
|
||||
|
@ -3911,12 +3858,6 @@ export interface SystemConfigLoggingDto {
|
|||
* @interface SystemConfigMachineLearningDto
|
||||
*/
|
||||
export interface SystemConfigMachineLearningDto {
|
||||
/**
|
||||
*
|
||||
* @type {ClassificationConfig}
|
||||
* @memberof SystemConfigMachineLearningDto
|
||||
*/
|
||||
'classification': ClassificationConfig;
|
||||
/**
|
||||
*
|
||||
* @type {CLIPConfig}
|
||||
|
|
|
@ -56,10 +56,6 @@ Template changes will only apply to new assets. To retroactively apply the templ
|
|||
|
||||
This is fixed by running the storage migration job.
|
||||
|
||||
### Why is object detection not very good?
|
||||
|
||||
The default image tagging model is relatively small. You can change this for a larger model like `google/vit-base-patch16-224` by setting the model name under Settings > Machine Learning Settings > Image Tagging. You can then re-run the Image Tagging job to get improved tags.
|
||||
|
||||
### Why are there so many thumbnail generation jobs?
|
||||
|
||||
Immich generates three thumbnails for each asset (blurred, small, and large), as well as a thumbnail for each recognized face.
|
||||
|
|
|
@ -73,7 +73,7 @@ The Immich Microservices image uses the same `Dockerfile` as the Immich Server,
|
|||
- Thumbnail Generation
|
||||
- Metadata Extraction
|
||||
- Video Transcoding
|
||||
- Object Tagging
|
||||
- Smart Search
|
||||
- Facial Recognition
|
||||
- Storage Template Migration
|
||||
- Sidecar (see [XMP Sidecars](/docs/features/xmp-sidecars.md))
|
||||
|
|
|
@ -38,9 +38,6 @@ The default configuration looks like this:
|
|||
"metadataExtraction": {
|
||||
"concurrency": 5
|
||||
},
|
||||
"objectTagging": {
|
||||
"concurrency": 2
|
||||
},
|
||||
"recognizeFaces": {
|
||||
"concurrency": 2
|
||||
},
|
||||
|
@ -73,11 +70,6 @@ The default configuration looks like this:
|
|||
"machineLearning": {
|
||||
"enabled": true,
|
||||
"url": "http://immich-machine-learning:3003",
|
||||
"classification": {
|
||||
"enabled": false,
|
||||
"modelName": "microsoft/resnet-50",
|
||||
"minScore": 0.9
|
||||
},
|
||||
"clip": {
|
||||
"enabled": true,
|
||||
"modelName": "ViT-B-32__openai"
|
||||
|
|
|
@ -1,6 +1,5 @@
|
|||
# Immich Machine Learning
|
||||
|
||||
- Image classification
|
||||
- CLIP embeddings
|
||||
- Facial recognition
|
||||
|
||||
|
|
|
@ -59,3 +59,37 @@ def clip_preprocess_cfg() -> dict[str, Any]:
|
|||
"resize_mode": "shortest",
|
||||
"fill_color": 0,
|
||||
}
|
||||
|
||||
|
||||
@pytest.fixture(scope="session")
|
||||
def clip_tokenizer_cfg() -> dict[str, Any]:
|
||||
return {
|
||||
"add_prefix_space": False,
|
||||
"added_tokens_decoder": {
|
||||
"49406": {
|
||||
"content": "<|startoftext|>",
|
||||
"lstrip": False,
|
||||
"normalized": True,
|
||||
"rstrip": False,
|
||||
"single_word": False,
|
||||
"special": True,
|
||||
},
|
||||
"49407": {
|
||||
"content": "<|endoftext|>",
|
||||
"lstrip": False,
|
||||
"normalized": True,
|
||||
"rstrip": False,
|
||||
"single_word": False,
|
||||
"special": True,
|
||||
},
|
||||
},
|
||||
"bos_token": "<|startoftext|>",
|
||||
"clean_up_tokenization_spaces": True,
|
||||
"do_lower_case": True,
|
||||
"eos_token": "<|endoftext|>",
|
||||
"errors": "replace",
|
||||
"model_max_length": 77,
|
||||
"pad_token": "<|endoftext|>",
|
||||
"tokenizer_class": "CLIPTokenizer",
|
||||
"unk_token": "<|endoftext|>",
|
||||
}
|
||||
|
|
|
@ -6,7 +6,6 @@ from .base import InferenceModel
|
|||
from .clip import MCLIPEncoder, OpenCLIPEncoder
|
||||
from .constants import is_insightface, is_mclip, is_openclip
|
||||
from .facial_recognition import FaceRecognizer
|
||||
from .image_classification import ImageClassifier
|
||||
|
||||
|
||||
def from_model_type(model_type: ModelType, model_name: str, **model_kwargs: Any) -> InferenceModel:
|
||||
|
@ -19,8 +18,6 @@ def from_model_type(model_type: ModelType, model_name: str, **model_kwargs: Any)
|
|||
case ModelType.FACIAL_RECOGNITION:
|
||||
if is_insightface(model_name):
|
||||
return FaceRecognizer(model_name, **model_kwargs)
|
||||
case ModelType.IMAGE_CLASSIFICATION:
|
||||
return ImageClassifier(model_name, **model_kwargs)
|
||||
case _:
|
||||
raise ValueError(f"Unknown model type {model_type}")
|
||||
|
||||
|
|
|
@ -35,7 +35,7 @@ class InferenceModel(ABC):
|
|||
)
|
||||
log.debug(
|
||||
(
|
||||
f"Setting '{self.model_name}' execution providers to {self.providers}"
|
||||
f"Setting '{self.model_name}' execution providers to {self.providers} "
|
||||
"in descending order of preference"
|
||||
),
|
||||
)
|
||||
|
@ -55,7 +55,7 @@ class InferenceModel(ABC):
|
|||
def download(self) -> None:
|
||||
if not self.cached:
|
||||
log.info(
|
||||
(f"Downloading {self.model_type.replace('-', ' ')} model '{self.model_name}'." "This may take a while.")
|
||||
f"Downloading {self.model_type.replace('-', ' ')} model '{self.model_name}'. This may take a while."
|
||||
)
|
||||
self._download()
|
||||
|
||||
|
@ -63,7 +63,7 @@ class InferenceModel(ABC):
|
|||
if self.loaded:
|
||||
return
|
||||
self.download()
|
||||
log.info(f"Loading {self.model_type.replace('-', ' ')} model '{self.model_name}'")
|
||||
log.info(f"Loading {self.model_type.replace('-', ' ')} model '{self.model_name}' to memory")
|
||||
self._load()
|
||||
self.loaded = True
|
||||
|
||||
|
@ -119,11 +119,11 @@ class InferenceModel(ABC):
|
|||
def clear_cache(self) -> None:
|
||||
if not self.cache_dir.exists():
|
||||
log.warn(
|
||||
f"Attempted to clear cache for model '{self.model_name}' but cache directory does not exist.",
|
||||
f"Attempted to clear cache for model '{self.model_name}', but cache directory does not exist",
|
||||
)
|
||||
return
|
||||
if not rmtree.avoids_symlink_attacks:
|
||||
raise RuntimeError("Attempted to clear cache, but rmtree is not safe on this platform.")
|
||||
raise RuntimeError("Attempted to clear cache, but rmtree is not safe on this platform")
|
||||
|
||||
if self.cache_dir.is_dir():
|
||||
log.info(f"Cleared cache directory for model '{self.model_name}'.")
|
||||
|
|
|
@ -8,11 +8,11 @@ from typing import Any, Literal
|
|||
import numpy as np
|
||||
import onnxruntime as ort
|
||||
from PIL import Image
|
||||
from transformers import AutoTokenizer
|
||||
from tokenizers import Encoding, Tokenizer
|
||||
|
||||
from app.config import clean_name, log
|
||||
from app.models.transforms import crop, get_pil_resampling, normalize, resize, to_numpy
|
||||
from app.schemas import ModelType, ndarray_f32, ndarray_i32, ndarray_i64
|
||||
from app.schemas import ModelType, ndarray_f32, ndarray_i32
|
||||
|
||||
from .base import InferenceModel
|
||||
|
||||
|
@ -40,6 +40,7 @@ class BaseCLIPEncoder(InferenceModel):
|
|||
providers=self.providers,
|
||||
provider_options=self.provider_options,
|
||||
)
|
||||
log.debug(f"Loaded clip text model '{self.model_name}'")
|
||||
|
||||
if self.mode == "vision" or self.mode is None:
|
||||
log.debug(f"Loading clip vision model '{self.model_name}'")
|
||||
|
@ -50,6 +51,7 @@ class BaseCLIPEncoder(InferenceModel):
|
|||
providers=self.providers,
|
||||
provider_options=self.provider_options,
|
||||
)
|
||||
log.debug(f"Loaded clip vision model '{self.model_name}'")
|
||||
|
||||
def _predict(self, image_or_text: Image.Image | str) -> ndarray_f32:
|
||||
if isinstance(image_or_text, bytes):
|
||||
|
@ -99,6 +101,14 @@ class BaseCLIPEncoder(InferenceModel):
|
|||
def visual_path(self) -> Path:
|
||||
return self.visual_dir / "model.onnx"
|
||||
|
||||
@property
|
||||
def tokenizer_file_path(self) -> Path:
|
||||
return self.textual_dir / "tokenizer.json"
|
||||
|
||||
@property
|
||||
def tokenizer_cfg_path(self) -> Path:
|
||||
return self.textual_dir / "tokenizer_config.json"
|
||||
|
||||
@property
|
||||
def preprocess_cfg_path(self) -> Path:
|
||||
return self.visual_dir / "preprocess_cfg.json"
|
||||
|
@ -107,6 +117,34 @@ class BaseCLIPEncoder(InferenceModel):
|
|||
def cached(self) -> bool:
|
||||
return self.textual_path.is_file() and self.visual_path.is_file()
|
||||
|
||||
@cached_property
|
||||
def model_cfg(self) -> dict[str, Any]:
|
||||
log.debug(f"Loading model config for CLIP model '{self.model_name}'")
|
||||
model_cfg: dict[str, Any] = json.load(self.model_cfg_path.open())
|
||||
log.debug(f"Loaded model config for CLIP model '{self.model_name}'")
|
||||
return model_cfg
|
||||
|
||||
@cached_property
|
||||
def tokenizer_file(self) -> dict[str, Any]:
|
||||
log.debug(f"Loading tokenizer file for CLIP model '{self.model_name}'")
|
||||
tokenizer_file: dict[str, Any] = json.load(self.tokenizer_file_path.open())
|
||||
log.debug(f"Loaded tokenizer file for CLIP model '{self.model_name}'")
|
||||
return tokenizer_file
|
||||
|
||||
@cached_property
|
||||
def tokenizer_cfg(self) -> dict[str, Any]:
|
||||
log.debug(f"Loading tokenizer config for CLIP model '{self.model_name}'")
|
||||
tokenizer_cfg: dict[str, Any] = json.load(self.tokenizer_cfg_path.open())
|
||||
log.debug(f"Loaded tokenizer config for CLIP model '{self.model_name}'")
|
||||
return tokenizer_cfg
|
||||
|
||||
@cached_property
|
||||
def preprocess_cfg(self) -> dict[str, Any]:
|
||||
log.debug(f"Loading visual preprocessing config for CLIP model '{self.model_name}'")
|
||||
preprocess_cfg: dict[str, Any] = json.load(self.preprocess_cfg_path.open())
|
||||
log.debug(f"Loaded visual preprocessing config for CLIP model '{self.model_name}'")
|
||||
return preprocess_cfg
|
||||
|
||||
|
||||
class OpenCLIPEncoder(BaseCLIPEncoder):
|
||||
def __init__(
|
||||
|
@ -121,8 +159,8 @@ class OpenCLIPEncoder(BaseCLIPEncoder):
|
|||
def _load(self) -> None:
|
||||
super()._load()
|
||||
|
||||
self.tokenizer = AutoTokenizer.from_pretrained(self.textual_dir)
|
||||
self.sequence_length = self.model_cfg["text_cfg"]["context_length"]
|
||||
context_length = self.model_cfg["text_cfg"]["context_length"]
|
||||
pad_token = self.tokenizer_cfg["pad_token"]
|
||||
|
||||
self.size = (
|
||||
self.preprocess_cfg["size"][0] if type(self.preprocess_cfg["size"]) == list else self.preprocess_cfg["size"]
|
||||
|
@ -131,16 +169,16 @@ class OpenCLIPEncoder(BaseCLIPEncoder):
|
|||
self.mean = np.array(self.preprocess_cfg["mean"], dtype=np.float32)
|
||||
self.std = np.array(self.preprocess_cfg["std"], dtype=np.float32)
|
||||
|
||||
log.debug(f"Loading tokenizer for CLIP model '{self.model_name}'")
|
||||
self.tokenizer: Tokenizer = Tokenizer.from_file(self.tokenizer_file_path.as_posix())
|
||||
pad_id = self.tokenizer.token_to_id(pad_token)
|
||||
self.tokenizer.enable_padding(length=context_length, pad_token=pad_token, pad_id=pad_id)
|
||||
self.tokenizer.enable_truncation(max_length=context_length)
|
||||
log.debug(f"Loaded tokenizer for CLIP model '{self.model_name}'")
|
||||
|
||||
def tokenize(self, text: str) -> dict[str, ndarray_i32]:
|
||||
input_ids: ndarray_i64 = self.tokenizer(
|
||||
text,
|
||||
max_length=self.sequence_length,
|
||||
return_tensors="np",
|
||||
return_attention_mask=False,
|
||||
padding="max_length",
|
||||
truncation=True,
|
||||
).input_ids
|
||||
return {"text": input_ids.astype(np.int32)}
|
||||
tokens: Encoding = self.tokenizer.encode(text)
|
||||
return {"text": np.array([tokens.ids], dtype=np.int32)}
|
||||
|
||||
def transform(self, image: Image.Image) -> dict[str, ndarray_f32]:
|
||||
image = resize(image, self.size)
|
||||
|
@ -149,18 +187,11 @@ class OpenCLIPEncoder(BaseCLIPEncoder):
|
|||
image_np = normalize(image_np, self.mean, self.std)
|
||||
return {"image": np.expand_dims(image_np.transpose(2, 0, 1), 0)}
|
||||
|
||||
@cached_property
|
||||
def model_cfg(self) -> dict[str, Any]:
|
||||
model_cfg: dict[str, Any] = json.load(self.model_cfg_path.open())
|
||||
return model_cfg
|
||||
|
||||
@cached_property
|
||||
def preprocess_cfg(self) -> dict[str, Any]:
|
||||
preprocess_cfg: dict[str, Any] = json.load(self.preprocess_cfg_path.open())
|
||||
return preprocess_cfg
|
||||
|
||||
|
||||
class MCLIPEncoder(OpenCLIPEncoder):
|
||||
def tokenize(self, text: str) -> dict[str, ndarray_i32]:
|
||||
tokens: dict[str, ndarray_i64] = self.tokenizer(text, return_tensors="np")
|
||||
return {k: v.astype(np.int32) for k, v in tokens.items()}
|
||||
tokens: Encoding = self.tokenizer.encode(text)
|
||||
return {
|
||||
"input_ids": np.array([tokens.ids], dtype=np.int32),
|
||||
"attention_mask": np.array([tokens.attention_mask], dtype=np.int32),
|
||||
}
|
||||
|
|
|
@ -1,75 +0,0 @@
|
|||
from io import BytesIO
|
||||
from pathlib import Path
|
||||
from typing import Any
|
||||
|
||||
from huggingface_hub import snapshot_download
|
||||
from optimum.onnxruntime import ORTModelForImageClassification
|
||||
from optimum.pipelines import pipeline
|
||||
from PIL import Image
|
||||
from transformers import AutoImageProcessor
|
||||
|
||||
from ..config import log
|
||||
from ..schemas import ModelType
|
||||
from .base import InferenceModel
|
||||
|
||||
|
||||
class ImageClassifier(InferenceModel):
|
||||
_model_type = ModelType.IMAGE_CLASSIFICATION
|
||||
|
||||
def __init__(
|
||||
self,
|
||||
model_name: str,
|
||||
min_score: float = 0.9,
|
||||
cache_dir: Path | str | None = None,
|
||||
**model_kwargs: Any,
|
||||
) -> None:
|
||||
self.min_score = model_kwargs.pop("minScore", min_score)
|
||||
super().__init__(model_name, cache_dir, **model_kwargs)
|
||||
|
||||
def _download(self) -> None:
|
||||
snapshot_download(
|
||||
cache_dir=self.cache_dir,
|
||||
repo_id=self.model_name,
|
||||
allow_patterns=["*.bin", "*.json", "*.txt"],
|
||||
local_dir=self.cache_dir,
|
||||
local_dir_use_symlinks=True,
|
||||
)
|
||||
|
||||
def _load(self) -> None:
|
||||
processor = AutoImageProcessor.from_pretrained(self.cache_dir, cache_dir=self.cache_dir)
|
||||
model_path = self.cache_dir / "model.onnx"
|
||||
model_kwargs = {
|
||||
"cache_dir": self.cache_dir,
|
||||
"provider": self.providers[0],
|
||||
"provider_options": self.provider_options[0],
|
||||
"session_options": self.sess_options,
|
||||
}
|
||||
|
||||
if model_path.exists():
|
||||
model = ORTModelForImageClassification.from_pretrained(self.cache_dir, **model_kwargs)
|
||||
self.model = pipeline(self.model_type.value, model, feature_extractor=processor)
|
||||
else:
|
||||
log.info(
|
||||
(
|
||||
f"ONNX model not found in cache directory for '{self.model_name}'."
|
||||
"Exporting optimized model for future use."
|
||||
),
|
||||
)
|
||||
self.sess_options.optimized_model_filepath = model_path.as_posix()
|
||||
self.model = pipeline(
|
||||
self.model_type.value,
|
||||
self.model_name,
|
||||
model_kwargs=model_kwargs,
|
||||
feature_extractor=processor,
|
||||
)
|
||||
|
||||
def _predict(self, image: Image.Image | bytes) -> list[str]:
|
||||
if isinstance(image, bytes):
|
||||
image = Image.open(BytesIO(image))
|
||||
predictions: list[dict[str, Any]] = self.model(image)
|
||||
tags = [tag for pred in predictions for tag in pred["label"].split(", ") if pred["score"] >= self.min_score]
|
||||
|
||||
return tags
|
||||
|
||||
def configure(self, **model_kwargs: Any) -> None:
|
||||
self.min_score = model_kwargs.pop("minScore", self.min_score)
|
|
@ -25,7 +25,6 @@ class BoundingBox(TypedDict):
|
|||
|
||||
|
||||
class ModelType(StrEnum):
|
||||
IMAGE_CLASSIFICATION = "image-classification"
|
||||
CLIP = "clip"
|
||||
FACIAL_RECOGNITION = "facial-recognition"
|
||||
|
||||
|
|
|
@ -17,42 +17,9 @@ from .models.base import PicklableSessionOptions
|
|||
from .models.cache import ModelCache
|
||||
from .models.clip import OpenCLIPEncoder
|
||||
from .models.facial_recognition import FaceRecognizer
|
||||
from .models.image_classification import ImageClassifier
|
||||
from .schemas import ModelType
|
||||
|
||||
|
||||
class TestImageClassifier:
|
||||
classifier_preds = [
|
||||
{"label": "that's an image alright", "score": 0.8},
|
||||
{"label": "well it ends with .jpg", "score": 0.1},
|
||||
{"label": "idk, im just seeing bytes", "score": 0.05},
|
||||
{"label": "not sure", "score": 0.04},
|
||||
{"label": "probably a virus", "score": 0.01},
|
||||
]
|
||||
|
||||
def test_min_score(self, pil_image: Image.Image, mocker: MockerFixture) -> None:
|
||||
mocker.patch.object(ImageClassifier, "load")
|
||||
classifier = ImageClassifier("test_model_name", min_score=0.0)
|
||||
assert classifier.min_score == 0.0
|
||||
|
||||
classifier.model = mock.Mock()
|
||||
classifier.model.return_value = self.classifier_preds
|
||||
|
||||
all_labels = classifier.predict(pil_image)
|
||||
classifier.min_score = 0.5
|
||||
filtered_labels = classifier.predict(pil_image)
|
||||
|
||||
assert all_labels == [
|
||||
"that's an image alright",
|
||||
"well it ends with .jpg",
|
||||
"idk",
|
||||
"im just seeing bytes",
|
||||
"not sure",
|
||||
"probably a virus",
|
||||
]
|
||||
assert filtered_labels == ["that's an image alright"]
|
||||
|
||||
|
||||
class TestCLIP:
|
||||
embedding = np.random.rand(512).astype(np.float32)
|
||||
cache_dir = Path("test_cache")
|
||||
|
@ -63,11 +30,13 @@ class TestCLIP:
|
|||
mocker: MockerFixture,
|
||||
clip_model_cfg: dict[str, Any],
|
||||
clip_preprocess_cfg: Callable[[Path], dict[str, Any]],
|
||||
clip_tokenizer_cfg: Callable[[Path], dict[str, Any]],
|
||||
) -> None:
|
||||
mocker.patch.object(OpenCLIPEncoder, "download")
|
||||
mocker.patch.object(OpenCLIPEncoder, "model_cfg", clip_model_cfg)
|
||||
mocker.patch.object(OpenCLIPEncoder, "preprocess_cfg", clip_preprocess_cfg)
|
||||
mocker.patch("app.models.clip.AutoTokenizer.from_pretrained", autospec=True)
|
||||
mocker.patch.object(OpenCLIPEncoder, "tokenizer_cfg", clip_tokenizer_cfg)
|
||||
mocker.patch("app.models.clip.Tokenizer.from_file", autospec=True)
|
||||
mocked = mocker.patch("app.models.clip.ort.InferenceSession", autospec=True)
|
||||
mocked.return_value.run.return_value = [[self.embedding]]
|
||||
|
||||
|
@ -85,11 +54,13 @@ class TestCLIP:
|
|||
mocker: MockerFixture,
|
||||
clip_model_cfg: dict[str, Any],
|
||||
clip_preprocess_cfg: Callable[[Path], dict[str, Any]],
|
||||
clip_tokenizer_cfg: Callable[[Path], dict[str, Any]],
|
||||
) -> None:
|
||||
mocker.patch.object(OpenCLIPEncoder, "download")
|
||||
mocker.patch.object(OpenCLIPEncoder, "model_cfg", clip_model_cfg)
|
||||
mocker.patch.object(OpenCLIPEncoder, "preprocess_cfg", clip_preprocess_cfg)
|
||||
mocker.patch("app.models.clip.AutoTokenizer.from_pretrained", autospec=True)
|
||||
mocker.patch.object(OpenCLIPEncoder, "tokenizer_cfg", clip_tokenizer_cfg)
|
||||
mocker.patch("app.models.clip.Tokenizer.from_file", autospec=True)
|
||||
mocked = mocker.patch("app.models.clip.ort.InferenceSession", autospec=True)
|
||||
mocked.return_value.run.return_value = [[self.embedding]]
|
||||
|
||||
|
@ -145,17 +116,15 @@ class TestFaceRecognition:
|
|||
class TestCache:
|
||||
async def test_caches(self, mock_get_model: mock.Mock) -> None:
|
||||
model_cache = ModelCache()
|
||||
await model_cache.get("test_model_name", ModelType.IMAGE_CLASSIFICATION)
|
||||
await model_cache.get("test_model_name", ModelType.IMAGE_CLASSIFICATION)
|
||||
await model_cache.get("test_model_name", ModelType.FACIAL_RECOGNITION)
|
||||
await model_cache.get("test_model_name", ModelType.FACIAL_RECOGNITION)
|
||||
assert len(model_cache.cache._cache) == 1
|
||||
mock_get_model.assert_called_once()
|
||||
|
||||
async def test_kwargs_used(self, mock_get_model: mock.Mock) -> None:
|
||||
model_cache = ModelCache()
|
||||
await model_cache.get("test_model_name", ModelType.IMAGE_CLASSIFICATION, cache_dir="test_cache")
|
||||
mock_get_model.assert_called_once_with(
|
||||
ModelType.IMAGE_CLASSIFICATION, "test_model_name", cache_dir="test_cache"
|
||||
)
|
||||
await model_cache.get("test_model_name", ModelType.FACIAL_RECOGNITION, cache_dir="test_cache")
|
||||
mock_get_model.assert_called_once_with(ModelType.FACIAL_RECOGNITION, "test_model_name", cache_dir="test_cache")
|
||||
|
||||
async def test_different_clip(self, mock_get_model: mock.Mock) -> None:
|
||||
model_cache = ModelCache()
|
||||
|
@ -172,14 +141,14 @@ class TestCache:
|
|||
@mock.patch("app.models.cache.OptimisticLock", autospec=True)
|
||||
async def test_model_ttl(self, mock_lock_cls: mock.Mock, mock_get_model: mock.Mock) -> None:
|
||||
model_cache = ModelCache(ttl=100)
|
||||
await model_cache.get("test_model_name", ModelType.IMAGE_CLASSIFICATION)
|
||||
await model_cache.get("test_model_name", ModelType.FACIAL_RECOGNITION)
|
||||
mock_lock_cls.return_value.__aenter__.return_value.cas.assert_called_with(mock.ANY, ttl=100)
|
||||
|
||||
@mock.patch("app.models.cache.SimpleMemoryCache.expire")
|
||||
async def test_revalidate(self, mock_cache_expire: mock.Mock, mock_get_model: mock.Mock) -> None:
|
||||
model_cache = ModelCache(ttl=100, revalidate=True)
|
||||
await model_cache.get("test_model_name", ModelType.IMAGE_CLASSIFICATION)
|
||||
await model_cache.get("test_model_name", ModelType.IMAGE_CLASSIFICATION)
|
||||
await model_cache.get("test_model_name", ModelType.FACIAL_RECOGNITION)
|
||||
await model_cache.get("test_model_name", ModelType.FACIAL_RECOGNITION)
|
||||
mock_cache_expire.assert_called_once_with(mock.ANY, 100)
|
||||
|
||||
|
||||
|
@ -188,23 +157,6 @@ class TestCache:
|
|||
reason="More time-consuming since it deploys the app and loads models.",
|
||||
)
|
||||
class TestEndpoints:
|
||||
def test_tagging_endpoint(
|
||||
self, pil_image: Image.Image, responses: dict[str, Any], deployed_app: TestClient
|
||||
) -> None:
|
||||
byte_image = BytesIO()
|
||||
pil_image.save(byte_image, format="jpeg")
|
||||
response = deployed_app.post(
|
||||
"http://localhost:3003/predict",
|
||||
data={
|
||||
"modelName": "microsoft/resnet-50",
|
||||
"modelType": "image-classification",
|
||||
"options": json.dumps({"minScore": 0.0}),
|
||||
},
|
||||
files={"image": byte_image.getvalue()},
|
||||
)
|
||||
assert response.status_code == 200
|
||||
assert response.json() == responses["image-classification"]
|
||||
|
||||
def test_clip_image_endpoint(
|
||||
self, pil_image: Image.Image, responses: dict[str, Any], deployed_app: TestClient
|
||||
) -> None:
|
||||
|
|
|
@ -12,7 +12,6 @@ byte_image = BytesIO()
|
|||
|
||||
@events.init_command_line_parser.add_listener
|
||||
def _(parser: ArgumentParser) -> None:
|
||||
parser.add_argument("--tag-model", type=str, default="microsoft/resnet-50")
|
||||
parser.add_argument("--clip-model", type=str, default="ViT-B-32::openai")
|
||||
parser.add_argument("--face-model", type=str, default="buffalo_l")
|
||||
parser.add_argument(
|
||||
|
@ -54,18 +53,6 @@ class InferenceLoadTest(HttpUser):
|
|||
self.data = byte_image.getvalue()
|
||||
|
||||
|
||||
class ClassificationFormDataLoadTest(InferenceLoadTest):
|
||||
@task
|
||||
def classify(self) -> None:
|
||||
data = [
|
||||
("modelName", self.environment.parsed_options.clip_model),
|
||||
("modelType", "clip"),
|
||||
("options", json.dumps({"minScore": self.environment.parsed_options.tag_min_score})),
|
||||
]
|
||||
files = {"image": self.data}
|
||||
self.client.post("/predict", data=data, files=files)
|
||||
|
||||
|
||||
class CLIPTextFormDataLoadTest(InferenceLoadTest):
|
||||
@task
|
||||
def encode_text(self) -> None:
|
||||
|
|
|
@ -5,8 +5,7 @@
|
|||
"handlers": {
|
||||
"console": {
|
||||
"class": "app.config.CustomRichHandler",
|
||||
"formatter": "rich",
|
||||
"level": "INFO"
|
||||
"formatter": "rich"
|
||||
}
|
||||
},
|
||||
"loggers": {
|
||||
|
|
2826
machine-learning/poetry.lock
generated
2826
machine-learning/poetry.lock
generated
File diff suppressed because it is too large
Load diff
|
@ -7,12 +7,7 @@ readme = "README.md"
|
|||
packages = [{include = "app"}]
|
||||
|
||||
[tool.poetry.dependencies]
|
||||
python = "~3.11"
|
||||
torch = [
|
||||
{markers = "platform_machine == 'arm64' or platform_machine == 'aarch64'", version = "=2.1.0", source = "pypi"},
|
||||
{markers = "platform_machine == 'amd64' or platform_machine == 'x86_64'", version = "=2.1.0", source = "pytorch-cpu"}
|
||||
]
|
||||
transformers = "^4.29.2"
|
||||
python = "=3.11.*"
|
||||
onnxruntime = "^1.15.0"
|
||||
insightface = "^0.7.3"
|
||||
opencv-python-headless = "^4.7.0.72"
|
||||
|
@ -21,14 +16,14 @@ fastapi = "^0.95.2"
|
|||
uvicorn = {extras = ["standard"], version = "^0.22.0"}
|
||||
pydantic = "^1.10.8"
|
||||
aiocache = "^0.12.1"
|
||||
optimum = "^1.9.1"
|
||||
rich = "^13.4.2"
|
||||
ftfy = "^6.1.1"
|
||||
setuptools = "^68.0.0"
|
||||
python-multipart = "^0.0.6"
|
||||
orjson = "^3.9.5"
|
||||
safetensors = "0.3.2"
|
||||
gunicorn = "^21.1.0"
|
||||
huggingface-hub = "^0.20.1"
|
||||
tokenizers = "^0.15.0"
|
||||
|
||||
[tool.poetry.group.dev.dependencies]
|
||||
mypy = "^1.3.0"
|
||||
|
@ -41,11 +36,6 @@ pytest-cov = "^4.1.0"
|
|||
ruff = "^0.0.272"
|
||||
pytest-mock = "^3.11.1"
|
||||
|
||||
[[tool.poetry.source]]
|
||||
name = "pytorch-cpu"
|
||||
url = "https://download.pytorch.org/whl/cpu"
|
||||
priority = "explicit"
|
||||
|
||||
[build-system]
|
||||
requires = ["poetry-core"]
|
||||
build-backend = "poetry.core.masonry.api"
|
||||
|
|
3
mobile/openapi/.openapi-generator/FILES
generated
3
mobile/openapi/.openapi-generator/FILES
generated
|
@ -50,7 +50,6 @@ doc/CQMode.md
|
|||
doc/ChangePasswordDto.md
|
||||
doc/CheckExistingAssetsDto.md
|
||||
doc/CheckExistingAssetsResponseDto.md
|
||||
doc/ClassificationConfig.md
|
||||
doc/Colorspace.md
|
||||
doc/CreateAlbumDto.md
|
||||
doc/CreateLibraryDto.md
|
||||
|
@ -244,7 +243,6 @@ lib/model/bulk_ids_dto.dart
|
|||
lib/model/change_password_dto.dart
|
||||
lib/model/check_existing_assets_dto.dart
|
||||
lib/model/check_existing_assets_response_dto.dart
|
||||
lib/model/classification_config.dart
|
||||
lib/model/clip_config.dart
|
||||
lib/model/clip_mode.dart
|
||||
lib/model/colorspace.dart
|
||||
|
@ -408,7 +406,6 @@ test/bulk_ids_dto_test.dart
|
|||
test/change_password_dto_test.dart
|
||||
test/check_existing_assets_dto_test.dart
|
||||
test/check_existing_assets_response_dto_test.dart
|
||||
test/classification_config_test.dart
|
||||
test/clip_config_test.dart
|
||||
test/clip_mode_test.dart
|
||||
test/colorspace_test.dart
|
||||
|
|
BIN
mobile/openapi/README.md
generated
BIN
mobile/openapi/README.md
generated
Binary file not shown.
BIN
mobile/openapi/doc/AllJobStatusResponseDto.md
generated
BIN
mobile/openapi/doc/AllJobStatusResponseDto.md
generated
Binary file not shown.
BIN
mobile/openapi/doc/ClassificationConfig.md
generated
BIN
mobile/openapi/doc/ClassificationConfig.md
generated
Binary file not shown.
BIN
mobile/openapi/doc/ServerFeaturesDto.md
generated
BIN
mobile/openapi/doc/ServerFeaturesDto.md
generated
Binary file not shown.
BIN
mobile/openapi/doc/SystemConfigJobDto.md
generated
BIN
mobile/openapi/doc/SystemConfigJobDto.md
generated
Binary file not shown.
BIN
mobile/openapi/doc/SystemConfigMachineLearningDto.md
generated
BIN
mobile/openapi/doc/SystemConfigMachineLearningDto.md
generated
Binary file not shown.
BIN
mobile/openapi/lib/api.dart
generated
BIN
mobile/openapi/lib/api.dart
generated
Binary file not shown.
BIN
mobile/openapi/lib/api_client.dart
generated
BIN
mobile/openapi/lib/api_client.dart
generated
Binary file not shown.
Binary file not shown.
BIN
mobile/openapi/lib/model/classification_config.dart
generated
BIN
mobile/openapi/lib/model/classification_config.dart
generated
Binary file not shown.
BIN
mobile/openapi/lib/model/job_name.dart
generated
BIN
mobile/openapi/lib/model/job_name.dart
generated
Binary file not shown.
BIN
mobile/openapi/lib/model/model_type.dart
generated
BIN
mobile/openapi/lib/model/model_type.dart
generated
Binary file not shown.
BIN
mobile/openapi/lib/model/server_features_dto.dart
generated
BIN
mobile/openapi/lib/model/server_features_dto.dart
generated
Binary file not shown.
BIN
mobile/openapi/lib/model/system_config_job_dto.dart
generated
BIN
mobile/openapi/lib/model/system_config_job_dto.dart
generated
Binary file not shown.
Binary file not shown.
Binary file not shown.
BIN
mobile/openapi/test/classification_config_test.dart
generated
BIN
mobile/openapi/test/classification_config_test.dart
generated
Binary file not shown.
BIN
mobile/openapi/test/server_features_dto_test.dart
generated
BIN
mobile/openapi/test/server_features_dto_test.dart
generated
Binary file not shown.
BIN
mobile/openapi/test/system_config_job_dto_test.dart
generated
BIN
mobile/openapi/test/system_config_job_dto_test.dart
generated
Binary file not shown.
Binary file not shown.
|
@ -6479,9 +6479,6 @@
|
|||
"migration": {
|
||||
"$ref": "#/components/schemas/JobStatusDto"
|
||||
},
|
||||
"objectTagging": {
|
||||
"$ref": "#/components/schemas/JobStatusDto"
|
||||
},
|
||||
"recognizeFaces": {
|
||||
"$ref": "#/components/schemas/JobStatusDto"
|
||||
},
|
||||
|
@ -6508,7 +6505,6 @@
|
|||
"thumbnailGeneration",
|
||||
"metadataExtraction",
|
||||
"videoConversion",
|
||||
"objectTagging",
|
||||
"smartSearch",
|
||||
"storageTemplateMigration",
|
||||
"migration",
|
||||
|
@ -7201,28 +7197,6 @@
|
|||
],
|
||||
"type": "object"
|
||||
},
|
||||
"ClassificationConfig": {
|
||||
"properties": {
|
||||
"enabled": {
|
||||
"type": "boolean"
|
||||
},
|
||||
"minScore": {
|
||||
"type": "integer"
|
||||
},
|
||||
"modelName": {
|
||||
"type": "string"
|
||||
},
|
||||
"modelType": {
|
||||
"$ref": "#/components/schemas/ModelType"
|
||||
}
|
||||
},
|
||||
"required": [
|
||||
"minScore",
|
||||
"enabled",
|
||||
"modelName"
|
||||
],
|
||||
"type": "object"
|
||||
},
|
||||
"Colorspace": {
|
||||
"enum": [
|
||||
"srgb",
|
||||
|
@ -7819,7 +7793,6 @@
|
|||
"thumbnailGeneration",
|
||||
"metadataExtraction",
|
||||
"videoConversion",
|
||||
"objectTagging",
|
||||
"recognizeFaces",
|
||||
"smartSearch",
|
||||
"backgroundTask",
|
||||
|
@ -8090,7 +8063,6 @@
|
|||
},
|
||||
"ModelType": {
|
||||
"enum": [
|
||||
"image-classification",
|
||||
"facial-recognition",
|
||||
"clip"
|
||||
],
|
||||
|
@ -8674,9 +8646,6 @@
|
|||
"sidecar": {
|
||||
"type": "boolean"
|
||||
},
|
||||
"tagImage": {
|
||||
"type": "boolean"
|
||||
},
|
||||
"trash": {
|
||||
"type": "boolean"
|
||||
}
|
||||
|
@ -8692,8 +8661,7 @@
|
|||
"oauthAutoLaunch",
|
||||
"passwordLogin",
|
||||
"sidecar",
|
||||
"search",
|
||||
"tagImage"
|
||||
"search"
|
||||
],
|
||||
"type": "object"
|
||||
},
|
||||
|
@ -9191,9 +9159,6 @@
|
|||
"migration": {
|
||||
"$ref": "#/components/schemas/JobSettingsDto"
|
||||
},
|
||||
"objectTagging": {
|
||||
"$ref": "#/components/schemas/JobSettingsDto"
|
||||
},
|
||||
"recognizeFaces": {
|
||||
"$ref": "#/components/schemas/JobSettingsDto"
|
||||
},
|
||||
|
@ -9220,7 +9185,6 @@
|
|||
"thumbnailGeneration",
|
||||
"metadataExtraction",
|
||||
"videoConversion",
|
||||
"objectTagging",
|
||||
"smartSearch",
|
||||
"storageTemplateMigration",
|
||||
"migration",
|
||||
|
@ -9275,9 +9239,6 @@
|
|||
},
|
||||
"SystemConfigMachineLearningDto": {
|
||||
"properties": {
|
||||
"classification": {
|
||||
"$ref": "#/components/schemas/ClassificationConfig"
|
||||
},
|
||||
"clip": {
|
||||
"$ref": "#/components/schemas/CLIPConfig"
|
||||
},
|
||||
|
@ -9294,7 +9255,6 @@
|
|||
"required": [
|
||||
"enabled",
|
||||
"url",
|
||||
"classification",
|
||||
"clip",
|
||||
"facialRecognition"
|
||||
],
|
||||
|
|
|
@ -2,7 +2,6 @@ export enum QueueName {
|
|||
THUMBNAIL_GENERATION = 'thumbnailGeneration',
|
||||
METADATA_EXTRACTION = 'metadataExtraction',
|
||||
VIDEO_CONVERSION = 'videoConversion',
|
||||
OBJECT_TAGGING = 'objectTagging',
|
||||
RECOGNIZE_FACES = 'recognizeFaces',
|
||||
SMART_SEARCH = 'smartSearch',
|
||||
BACKGROUND_TASK = 'backgroundTask',
|
||||
|
@ -55,10 +54,6 @@ export enum JobName {
|
|||
MIGRATE_ASSET = 'migrate-asset',
|
||||
MIGRATE_PERSON = 'migrate-person',
|
||||
|
||||
// object tagging
|
||||
QUEUE_OBJECT_TAGGING = 'queue-object-tagging',
|
||||
CLASSIFY_IMAGE = 'classify-image',
|
||||
|
||||
// facial recognition
|
||||
PERSON_CLEANUP = 'person-cleanup',
|
||||
PERSON_DELETE = 'person-delete',
|
||||
|
@ -126,10 +121,6 @@ export const JOBS_TO_QUEUE: Record<JobName, QueueName> = {
|
|||
[JobName.MIGRATE_ASSET]: QueueName.MIGRATION,
|
||||
[JobName.MIGRATE_PERSON]: QueueName.MIGRATION,
|
||||
|
||||
// object tagging
|
||||
[JobName.QUEUE_OBJECT_TAGGING]: QueueName.OBJECT_TAGGING,
|
||||
[JobName.CLASSIFY_IMAGE]: QueueName.OBJECT_TAGGING,
|
||||
|
||||
// facial recognition
|
||||
[JobName.QUEUE_RECOGNIZE_FACES]: QueueName.RECOGNIZE_FACES,
|
||||
[JobName.RECOGNIZE_FACES]: QueueName.RECOGNIZE_FACES,
|
||||
|
|
|
@ -59,9 +59,6 @@ export class AllJobStatusResponseDto implements Record<QueueName, JobStatusDto>
|
|||
@ApiProperty({ type: JobStatusDto })
|
||||
[QueueName.VIDEO_CONVERSION]!: JobStatusDto;
|
||||
|
||||
@ApiProperty({ type: JobStatusDto })
|
||||
[QueueName.OBJECT_TAGGING]!: JobStatusDto;
|
||||
|
||||
@ApiProperty({ type: JobStatusDto })
|
||||
[QueueName.SMART_SEARCH]!: JobStatusDto;
|
||||
|
||||
|
|
|
@ -99,7 +99,6 @@ describe(JobService.name, () => {
|
|||
[QueueName.BACKGROUND_TASK]: expectedJobStatus,
|
||||
[QueueName.SMART_SEARCH]: expectedJobStatus,
|
||||
[QueueName.METADATA_EXTRACTION]: expectedJobStatus,
|
||||
[QueueName.OBJECT_TAGGING]: expectedJobStatus,
|
||||
[QueueName.SEARCH]: expectedJobStatus,
|
||||
[QueueName.STORAGE_TEMPLATE_MIGRATION]: expectedJobStatus,
|
||||
[QueueName.MIGRATION]: expectedJobStatus,
|
||||
|
@ -157,17 +156,6 @@ describe(JobService.name, () => {
|
|||
expect(jobMock.queue).toHaveBeenCalledWith({ name: JobName.STORAGE_TEMPLATE_MIGRATION });
|
||||
});
|
||||
|
||||
it('should handle a start object tagging command', async () => {
|
||||
jobMock.getQueueStatus.mockResolvedValue({ isActive: false, isPaused: false });
|
||||
configMock.load.mockResolvedValue([
|
||||
{ key: SystemConfigKey.MACHINE_LEARNING_CLASSIFICATION_ENABLED, value: true },
|
||||
]);
|
||||
|
||||
await sut.handleCommand(QueueName.OBJECT_TAGGING, { command: JobCommand.START, force: false });
|
||||
|
||||
expect(jobMock.queue).toHaveBeenCalledWith({ name: JobName.QUEUE_OBJECT_TAGGING, data: { force: false } });
|
||||
});
|
||||
|
||||
it('should handle a start clip encoding command', async () => {
|
||||
jobMock.getQueueStatus.mockResolvedValue({ isActive: false, isPaused: false });
|
||||
|
||||
|
@ -234,7 +222,6 @@ describe(JobService.name, () => {
|
|||
[QueueName.BACKGROUND_TASK]: { concurrency: 10 },
|
||||
[QueueName.SMART_SEARCH]: { concurrency: 10 },
|
||||
[QueueName.METADATA_EXTRACTION]: { concurrency: 10 },
|
||||
[QueueName.OBJECT_TAGGING]: { concurrency: 10 },
|
||||
[QueueName.RECOGNIZE_FACES]: { concurrency: 10 },
|
||||
[QueueName.SEARCH]: { concurrency: 10 },
|
||||
[QueueName.SIDECAR]: { concurrency: 10 },
|
||||
|
@ -249,7 +236,6 @@ describe(JobService.name, () => {
|
|||
expect(jobMock.setConcurrency).toHaveBeenCalledWith(QueueName.BACKGROUND_TASK, 10);
|
||||
expect(jobMock.setConcurrency).toHaveBeenCalledWith(QueueName.SMART_SEARCH, 10);
|
||||
expect(jobMock.setConcurrency).toHaveBeenCalledWith(QueueName.METADATA_EXTRACTION, 10);
|
||||
expect(jobMock.setConcurrency).toHaveBeenCalledWith(QueueName.OBJECT_TAGGING, 10);
|
||||
expect(jobMock.setConcurrency).toHaveBeenCalledWith(QueueName.RECOGNIZE_FACES, 10);
|
||||
expect(jobMock.setConcurrency).toHaveBeenCalledWith(QueueName.SIDECAR, 10);
|
||||
expect(jobMock.setConcurrency).toHaveBeenCalledWith(QueueName.LIBRARY, 10);
|
||||
|
@ -292,7 +278,6 @@ describe(JobService.name, () => {
|
|||
item: { name: JobName.GENERATE_JPEG_THUMBNAIL, data: { id: 'asset-1' } },
|
||||
jobs: [
|
||||
JobName.GENERATE_WEBP_THUMBNAIL,
|
||||
JobName.CLASSIFY_IMAGE,
|
||||
JobName.ENCODE_CLIP,
|
||||
JobName.RECOGNIZE_FACES,
|
||||
JobName.GENERATE_THUMBHASH_THUMBNAIL,
|
||||
|
@ -302,7 +287,6 @@ describe(JobService.name, () => {
|
|||
item: { name: JobName.GENERATE_JPEG_THUMBNAIL, data: { id: 'asset-1', source: 'upload' } },
|
||||
jobs: [
|
||||
JobName.GENERATE_WEBP_THUMBNAIL,
|
||||
JobName.CLASSIFY_IMAGE,
|
||||
JobName.ENCODE_CLIP,
|
||||
JobName.RECOGNIZE_FACES,
|
||||
JobName.GENERATE_THUMBHASH_THUMBNAIL,
|
||||
|
@ -312,7 +296,6 @@ describe(JobService.name, () => {
|
|||
{
|
||||
item: { name: JobName.GENERATE_JPEG_THUMBNAIL, data: { id: 'asset-live-image', source: 'upload' } },
|
||||
jobs: [
|
||||
JobName.CLASSIFY_IMAGE,
|
||||
JobName.GENERATE_WEBP_THUMBNAIL,
|
||||
JobName.RECOGNIZE_FACES,
|
||||
JobName.GENERATE_THUMBHASH_THUMBNAIL,
|
||||
|
@ -320,10 +303,6 @@ describe(JobService.name, () => {
|
|||
JobName.VIDEO_CONVERSION,
|
||||
],
|
||||
},
|
||||
{
|
||||
item: { name: JobName.CLASSIFY_IMAGE, data: { id: 'asset-1' } },
|
||||
jobs: [],
|
||||
},
|
||||
{
|
||||
item: { name: JobName.ENCODE_CLIP, data: { id: 'asset-1' } },
|
||||
jobs: [],
|
||||
|
@ -371,11 +350,6 @@ describe(JobService.name, () => {
|
|||
feature: FeatureFlag.CLIP_ENCODE,
|
||||
configKey: SystemConfigKey.MACHINE_LEARNING_CLIP_ENABLED,
|
||||
},
|
||||
{
|
||||
queue: QueueName.OBJECT_TAGGING,
|
||||
feature: FeatureFlag.TAG_IMAGE,
|
||||
configKey: SystemConfigKey.MACHINE_LEARNING_CLASSIFICATION_ENABLED,
|
||||
},
|
||||
{
|
||||
queue: QueueName.RECOGNIZE_FACES,
|
||||
feature: FeatureFlag.FACIAL_RECOGNITION,
|
||||
|
|
|
@ -94,10 +94,6 @@ export class JobService {
|
|||
case QueueName.MIGRATION:
|
||||
return this.jobRepository.queue({ name: JobName.QUEUE_MIGRATION });
|
||||
|
||||
case QueueName.OBJECT_TAGGING:
|
||||
await this.configCore.requireFeature(FeatureFlag.TAG_IMAGE);
|
||||
return this.jobRepository.queue({ name: JobName.QUEUE_OBJECT_TAGGING, data: { force } });
|
||||
|
||||
case QueueName.SMART_SEARCH:
|
||||
await this.configCore.requireFeature(FeatureFlag.CLIP_ENCODE);
|
||||
return this.jobRepository.queue({ name: JobName.QUEUE_ENCODE_CLIP, data: { force } });
|
||||
|
@ -209,7 +205,6 @@ export class JobService {
|
|||
case JobName.GENERATE_JPEG_THUMBNAIL: {
|
||||
await this.jobRepository.queue({ name: JobName.GENERATE_WEBP_THUMBNAIL, data: item.data });
|
||||
await this.jobRepository.queue({ name: JobName.GENERATE_THUMBHASH_THUMBNAIL, data: item.data });
|
||||
await this.jobRepository.queue({ name: JobName.CLASSIFY_IMAGE, data: item.data });
|
||||
await this.jobRepository.queue({ name: JobName.ENCODE_CLIP, data: item.data });
|
||||
await this.jobRepository.queue({ name: JobName.RECOGNIZE_FACES, data: item.data });
|
||||
|
||||
|
|
|
@ -62,10 +62,6 @@ export type JobItem =
|
|||
| { name: JobName.SIDECAR_SYNC; data: IEntityJob }
|
||||
| { name: JobName.SIDECAR_WRITE; data: ISidecarWriteJob }
|
||||
|
||||
// Object Tagging
|
||||
| { name: JobName.QUEUE_OBJECT_TAGGING; data: IBaseJob }
|
||||
| { name: JobName.CLASSIFY_IMAGE; data: IEntityJob }
|
||||
|
||||
// Recognize Faces
|
||||
| { name: JobName.QUEUE_RECOGNIZE_FACES; data: IBaseJob }
|
||||
| { name: JobName.RECOGNIZE_FACES; data: IEntityJob }
|
||||
|
|
|
@ -1,4 +1,4 @@
|
|||
import { ClassificationConfig, CLIPConfig, RecognitionConfig } from '../smart-info/dto';
|
||||
import { CLIPConfig, RecognitionConfig } from '../smart-info/dto';
|
||||
|
||||
export const IMachineLearningRepository = 'IMachineLearningRepository';
|
||||
|
||||
|
@ -26,7 +26,6 @@ export interface DetectFaceResult {
|
|||
}
|
||||
|
||||
export enum ModelType {
|
||||
IMAGE_CLASSIFICATION = 'image-classification',
|
||||
FACIAL_RECOGNITION = 'facial-recognition',
|
||||
CLIP = 'clip',
|
||||
}
|
||||
|
@ -37,7 +36,6 @@ export enum CLIPMode {
|
|||
}
|
||||
|
||||
export interface IMachineLearningRepository {
|
||||
classifyImage(url: string, input: VisionModelInput, config: ClassificationConfig): Promise<string[]>;
|
||||
encodeImage(url: string, input: VisionModelInput, config: CLIPConfig): Promise<number[]>;
|
||||
encodeText(url: string, input: TextModelInput, config: CLIPConfig): Promise<number[]>;
|
||||
detectFaces(url: string, input: VisionModelInput, config: RecognitionConfig): Promise<DetectFaceResult[]>;
|
||||
|
|
|
@ -100,5 +100,4 @@ export class ServerFeaturesDto implements FeatureFlags {
|
|||
passwordLogin!: boolean;
|
||||
sidecar!: boolean;
|
||||
search!: boolean;
|
||||
tagImage!: boolean;
|
||||
}
|
||||
|
|
|
@ -171,7 +171,6 @@ describe(ServerInfoService.name, () => {
|
|||
passwordLogin: true,
|
||||
search: true,
|
||||
sidecar: true,
|
||||
tagImage: false,
|
||||
configFile: false,
|
||||
trash: true,
|
||||
});
|
||||
|
|
|
@ -18,15 +18,6 @@ export class ModelConfig {
|
|||
modelType?: ModelType;
|
||||
}
|
||||
|
||||
export class ClassificationConfig extends ModelConfig {
|
||||
@IsNumber()
|
||||
@Min(0)
|
||||
@Max(1)
|
||||
@Type(() => Number)
|
||||
@ApiProperty({ type: 'integer' })
|
||||
minScore!: number;
|
||||
}
|
||||
|
||||
export class CLIPConfig extends ModelConfig {
|
||||
@IsEnum(CLIPMode)
|
||||
@Optional()
|
||||
|
|
|
@ -47,107 +47,6 @@ describe(SmartInfoService.name, () => {
|
|||
expect(sut).toBeDefined();
|
||||
});
|
||||
|
||||
describe('handleQueueObjectTagging', () => {
|
||||
beforeEach(async () => {
|
||||
configMock.load.mockResolvedValue([
|
||||
{ key: SystemConfigKey.MACHINE_LEARNING_CLASSIFICATION_ENABLED, value: true },
|
||||
]);
|
||||
});
|
||||
|
||||
it('should do nothing if machine learning is disabled', async () => {
|
||||
configMock.load.mockResolvedValue([{ key: SystemConfigKey.MACHINE_LEARNING_ENABLED, value: false }]);
|
||||
|
||||
await sut.handleQueueObjectTagging({});
|
||||
|
||||
expect(assetMock.getAll).not.toHaveBeenCalled();
|
||||
expect(assetMock.getWithout).not.toHaveBeenCalled();
|
||||
});
|
||||
|
||||
it('should queue the assets without tags', async () => {
|
||||
configMock.load.mockResolvedValue([
|
||||
{ key: SystemConfigKey.MACHINE_LEARNING_CLASSIFICATION_ENABLED, value: true },
|
||||
]);
|
||||
assetMock.getWithout.mockResolvedValue({
|
||||
items: [assetStub.image],
|
||||
hasNextPage: false,
|
||||
});
|
||||
|
||||
await sut.handleQueueObjectTagging({ force: false });
|
||||
|
||||
expect(jobMock.queue.mock.calls).toEqual([[{ name: JobName.CLASSIFY_IMAGE, data: { id: assetStub.image.id } }]]);
|
||||
expect(assetMock.getWithout).toHaveBeenCalledWith({ skip: 0, take: 1000 }, WithoutProperty.OBJECT_TAGS);
|
||||
});
|
||||
|
||||
it('should queue all the assets', async () => {
|
||||
configMock.load.mockResolvedValue([
|
||||
{ key: SystemConfigKey.MACHINE_LEARNING_CLASSIFICATION_ENABLED, value: true },
|
||||
]);
|
||||
assetMock.getAll.mockResolvedValue({
|
||||
items: [assetStub.image],
|
||||
hasNextPage: false,
|
||||
});
|
||||
|
||||
await sut.handleQueueObjectTagging({ force: true });
|
||||
|
||||
expect(jobMock.queue.mock.calls).toEqual([[{ name: JobName.CLASSIFY_IMAGE, data: { id: assetStub.image.id } }]]);
|
||||
expect(assetMock.getAll).toHaveBeenCalled();
|
||||
});
|
||||
});
|
||||
|
||||
describe('handleClassifyImage', () => {
|
||||
it('should do nothing if machine learning is disabled', async () => {
|
||||
configMock.load.mockResolvedValue([{ key: SystemConfigKey.MACHINE_LEARNING_ENABLED, value: false }]);
|
||||
|
||||
await sut.handleClassifyImage({ id: '123' });
|
||||
|
||||
expect(machineMock.classifyImage).not.toHaveBeenCalled();
|
||||
expect(assetMock.getByIds).not.toHaveBeenCalled();
|
||||
});
|
||||
|
||||
it('should skip assets without a resize path', async () => {
|
||||
const asset = { resizePath: '' } as AssetEntity;
|
||||
assetMock.getByIds.mockResolvedValue([asset]);
|
||||
|
||||
await sut.handleClassifyImage({ id: asset.id });
|
||||
|
||||
expect(smartMock.upsert).not.toHaveBeenCalled();
|
||||
expect(machineMock.classifyImage).not.toHaveBeenCalled();
|
||||
});
|
||||
|
||||
it('should save the returned tags', async () => {
|
||||
configMock.load.mockResolvedValue([
|
||||
{ key: SystemConfigKey.MACHINE_LEARNING_CLASSIFICATION_ENABLED, value: true },
|
||||
]);
|
||||
machineMock.classifyImage.mockResolvedValue(['tag1', 'tag2', 'tag3']);
|
||||
|
||||
await sut.handleClassifyImage({ id: asset.id });
|
||||
|
||||
expect(machineMock.classifyImage).toHaveBeenCalledWith(
|
||||
'http://immich-machine-learning:3003',
|
||||
{
|
||||
imagePath: 'path/to/resize.ext',
|
||||
},
|
||||
{ enabled: true, minScore: 0.9, modelName: 'microsoft/resnet-50' },
|
||||
);
|
||||
expect(smartMock.upsert).toHaveBeenCalledWith({
|
||||
assetId: 'asset-1',
|
||||
tags: ['tag1', 'tag2', 'tag3'],
|
||||
});
|
||||
});
|
||||
|
||||
it('should always overwrite old tags', async () => {
|
||||
configMock.load.mockResolvedValue([
|
||||
{ key: SystemConfigKey.MACHINE_LEARNING_CLASSIFICATION_ENABLED, value: true },
|
||||
]);
|
||||
machineMock.classifyImage.mockResolvedValue([]);
|
||||
|
||||
await sut.handleClassifyImage({ id: asset.id });
|
||||
|
||||
expect(machineMock.classifyImage).toHaveBeenCalled();
|
||||
expect(smartMock.upsert).toHaveBeenCalled();
|
||||
});
|
||||
});
|
||||
|
||||
describe('handleQueueEncodeClip', () => {
|
||||
it('should do nothing if machine learning is disabled', async () => {
|
||||
configMock.load.mockResolvedValue([{ key: SystemConfigKey.MACHINE_LEARNING_ENABLED, value: false }]);
|
||||
|
|
|
@ -46,48 +46,6 @@ export class SmartInfoService {
|
|||
await this.jobRepository.resume(QueueName.SMART_SEARCH);
|
||||
}
|
||||
|
||||
async handleQueueObjectTagging({ force }: IBaseJob) {
|
||||
const { machineLearning } = await this.configCore.getConfig();
|
||||
if (!machineLearning.enabled || !machineLearning.classification.enabled) {
|
||||
return true;
|
||||
}
|
||||
|
||||
const assetPagination = usePagination(JOBS_ASSET_PAGINATION_SIZE, (pagination) => {
|
||||
return force
|
||||
? this.assetRepository.getAll(pagination)
|
||||
: this.assetRepository.getWithout(pagination, WithoutProperty.OBJECT_TAGS);
|
||||
});
|
||||
|
||||
for await (const assets of assetPagination) {
|
||||
for (const asset of assets) {
|
||||
await this.jobRepository.queue({ name: JobName.CLASSIFY_IMAGE, data: { id: asset.id } });
|
||||
}
|
||||
}
|
||||
|
||||
return true;
|
||||
}
|
||||
|
||||
async handleClassifyImage({ id }: IEntityJob) {
|
||||
const { machineLearning } = await this.configCore.getConfig();
|
||||
if (!machineLearning.enabled || !machineLearning.classification.enabled) {
|
||||
return true;
|
||||
}
|
||||
|
||||
const [asset] = await this.assetRepository.getByIds([id]);
|
||||
if (!asset.resizePath) {
|
||||
return false;
|
||||
}
|
||||
|
||||
const tags = await this.machineLearning.classifyImage(
|
||||
machineLearning.url,
|
||||
{ imagePath: asset.resizePath },
|
||||
machineLearning.classification,
|
||||
);
|
||||
await this.repository.upsert({ assetId: asset.id, tags });
|
||||
|
||||
return true;
|
||||
}
|
||||
|
||||
async handleQueueEncodeClip({ force }: IBaseJob) {
|
||||
const { machineLearning } = await this.configCore.getConfig();
|
||||
if (!machineLearning.enabled || !machineLearning.clip.enabled) {
|
||||
|
|
|
@ -29,12 +29,6 @@ export class SystemConfigJobDto implements Record<QueueName, JobSettingsDto> {
|
|||
@Type(() => JobSettingsDto)
|
||||
[QueueName.VIDEO_CONVERSION]!: JobSettingsDto;
|
||||
|
||||
@ApiProperty({ type: JobSettingsDto })
|
||||
@ValidateNested()
|
||||
@IsObject()
|
||||
@Type(() => JobSettingsDto)
|
||||
[QueueName.OBJECT_TAGGING]!: JobSettingsDto;
|
||||
|
||||
@ApiProperty({ type: JobSettingsDto })
|
||||
@ValidateNested()
|
||||
@IsObject()
|
||||
|
|
|
@ -1,4 +1,4 @@
|
|||
import { ClassificationConfig, CLIPConfig, RecognitionConfig } from '@app/domain';
|
||||
import { CLIPConfig, RecognitionConfig } from '@app/domain';
|
||||
import { Type } from 'class-transformer';
|
||||
import { IsBoolean, IsObject, IsUrl, ValidateIf, ValidateNested } from 'class-validator';
|
||||
|
||||
|
@ -10,11 +10,6 @@ export class SystemConfigMachineLearningDto {
|
|||
@ValidateIf((dto) => dto.enabled)
|
||||
url!: string;
|
||||
|
||||
@Type(() => ClassificationConfig)
|
||||
@ValidateNested()
|
||||
@IsObject()
|
||||
classification!: ClassificationConfig;
|
||||
|
||||
@Type(() => CLIPConfig)
|
||||
@ValidateNested()
|
||||
@IsObject()
|
||||
|
|
|
@ -49,7 +49,6 @@ export const defaults = Object.freeze<SystemConfig>({
|
|||
[QueueName.BACKGROUND_TASK]: { concurrency: 5 },
|
||||
[QueueName.SMART_SEARCH]: { concurrency: 2 },
|
||||
[QueueName.METADATA_EXTRACTION]: { concurrency: 5 },
|
||||
[QueueName.OBJECT_TAGGING]: { concurrency: 2 },
|
||||
[QueueName.RECOGNIZE_FACES]: { concurrency: 2 },
|
||||
[QueueName.SEARCH]: { concurrency: 5 },
|
||||
[QueueName.SIDECAR]: { concurrency: 5 },
|
||||
|
@ -66,11 +65,6 @@ export const defaults = Object.freeze<SystemConfig>({
|
|||
machineLearning: {
|
||||
enabled: process.env.IMMICH_MACHINE_LEARNING_ENABLED !== 'false',
|
||||
url: process.env.IMMICH_MACHINE_LEARNING_URL || 'http://immich-machine-learning:3003',
|
||||
classification: {
|
||||
enabled: false,
|
||||
modelName: 'microsoft/resnet-50',
|
||||
minScore: 0.9,
|
||||
},
|
||||
clip: {
|
||||
enabled: true,
|
||||
modelName: 'ViT-B-32__openai',
|
||||
|
@ -137,7 +131,6 @@ export const defaults = Object.freeze<SystemConfig>({
|
|||
export enum FeatureFlag {
|
||||
CLIP_ENCODE = 'clipEncode',
|
||||
FACIAL_RECOGNITION = 'facialRecognition',
|
||||
TAG_IMAGE = 'tagImage',
|
||||
MAP = 'map',
|
||||
REVERSE_GEOCODING = 'reverseGeocoding',
|
||||
SIDECAR = 'sidecar',
|
||||
|
@ -182,8 +175,6 @@ export class SystemConfigCore {
|
|||
throw new BadRequestException('Clip encoding is not enabled');
|
||||
case FeatureFlag.FACIAL_RECOGNITION:
|
||||
throw new BadRequestException('Facial recognition is not enabled');
|
||||
case FeatureFlag.TAG_IMAGE:
|
||||
throw new BadRequestException('Image tagging is not enabled');
|
||||
case FeatureFlag.SIDECAR:
|
||||
throw new BadRequestException('Sidecar is not enabled');
|
||||
case FeatureFlag.SEARCH:
|
||||
|
@ -212,7 +203,6 @@ export class SystemConfigCore {
|
|||
return {
|
||||
[FeatureFlag.CLIP_ENCODE]: mlEnabled && config.machineLearning.clip.enabled,
|
||||
[FeatureFlag.FACIAL_RECOGNITION]: mlEnabled && config.machineLearning.facialRecognition.enabled,
|
||||
[FeatureFlag.TAG_IMAGE]: mlEnabled && config.machineLearning.classification.enabled,
|
||||
[FeatureFlag.MAP]: config.map.enabled,
|
||||
[FeatureFlag.REVERSE_GEOCODING]: config.reverseGeocoding.enabled,
|
||||
[FeatureFlag.SIDECAR]: true,
|
||||
|
@ -245,10 +235,7 @@ export class SystemConfigCore {
|
|||
_.set(config, key, value);
|
||||
}
|
||||
|
||||
const errors = await validate(plainToInstance(SystemConfigDto, config), {
|
||||
forbidNonWhitelisted: true,
|
||||
forbidUnknownValues: true,
|
||||
});
|
||||
const errors = await validate(plainToInstance(SystemConfigDto, config));
|
||||
if (errors.length > 0) {
|
||||
this.logger.error('Validation error', errors);
|
||||
if (configFilePath) {
|
||||
|
@ -334,13 +321,13 @@ export class SystemConfigCore {
|
|||
}
|
||||
|
||||
if (!_.isEmpty(file)) {
|
||||
throw new Error(`Unknown keys found: ${JSON.stringify(file)}`);
|
||||
this.logger.warn(`Unknown keys found: ${JSON.stringify(file, null, 2)}`);
|
||||
}
|
||||
|
||||
this.configCache = overrides;
|
||||
} catch (error: Error | any) {
|
||||
this.logger.error(`Unable to load configuration file: ${filepath} due to ${error}`, error?.stack);
|
||||
throw new Error('Invalid configuration file');
|
||||
this.logger.error(`Unable to load configuration file: ${filepath}`);
|
||||
throw error;
|
||||
}
|
||||
}
|
||||
|
||||
|
|
|
@ -11,6 +11,7 @@ import {
|
|||
TranscodePolicy,
|
||||
VideoCodec,
|
||||
} from '@app/infra/entities';
|
||||
import { ImmichLogger } from '@app/infra/logger';
|
||||
import { BadRequestException } from '@nestjs/common';
|
||||
import { newCommunicationRepositoryMock, newSystemConfigRepositoryMock } from '@test';
|
||||
import { QueueName } from '../job';
|
||||
|
@ -29,7 +30,6 @@ const updatedConfig = Object.freeze<SystemConfig>({
|
|||
[QueueName.BACKGROUND_TASK]: { concurrency: 5 },
|
||||
[QueueName.SMART_SEARCH]: { concurrency: 2 },
|
||||
[QueueName.METADATA_EXTRACTION]: { concurrency: 5 },
|
||||
[QueueName.OBJECT_TAGGING]: { concurrency: 2 },
|
||||
[QueueName.RECOGNIZE_FACES]: { concurrency: 2 },
|
||||
[QueueName.SEARCH]: { concurrency: 5 },
|
||||
[QueueName.SIDECAR]: { concurrency: 5 },
|
||||
|
@ -65,11 +65,6 @@ const updatedConfig = Object.freeze<SystemConfig>({
|
|||
machineLearning: {
|
||||
enabled: true,
|
||||
url: 'http://immich-machine-learning:3003',
|
||||
classification: {
|
||||
enabled: false,
|
||||
modelName: 'microsoft/resnet-50',
|
||||
minScore: 0.9,
|
||||
},
|
||||
clip: {
|
||||
enabled: true,
|
||||
modelName: 'ViT-B-32__openai',
|
||||
|
@ -169,6 +164,16 @@ describe(SystemConfigService.name, () => {
|
|||
});
|
||||
|
||||
describe('getConfig', () => {
|
||||
let warnLog: jest.SpyInstance;
|
||||
|
||||
beforeEach(() => {
|
||||
warnLog = jest.spyOn(ImmichLogger.prototype, 'warn');
|
||||
});
|
||||
|
||||
afterEach(() => {
|
||||
warnLog.mockRestore();
|
||||
});
|
||||
|
||||
it('should return the default config', async () => {
|
||||
configMock.load.mockResolvedValue([]);
|
||||
|
||||
|
@ -217,9 +222,9 @@ describe(SystemConfigService.name, () => {
|
|||
{ should: 'validate numbers', config: { ffmpeg: { crf: 'not-a-number' } } },
|
||||
{ should: 'validate booleans', config: { oauth: { enabled: 'invalid' } } },
|
||||
{ should: 'validate enums', config: { ffmpeg: { transcode: 'unknown' } } },
|
||||
{ should: 'validate top level unknown options', config: { unknownOption: true } },
|
||||
{ should: 'validate nested unknown options', config: { ffmpeg: { unknownOption: true } } },
|
||||
{ should: 'validate required oauth fields', config: { oauth: { enabled: true } } },
|
||||
{ should: 'warn for top level unknown options', warn: true, config: { unknownOption: true } },
|
||||
{ should: 'warn for nested unknown options', warn: true, config: { ffmpeg: { unknownOption: true } } },
|
||||
];
|
||||
|
||||
for (const test of tests) {
|
||||
|
@ -227,7 +232,12 @@ describe(SystemConfigService.name, () => {
|
|||
process.env.IMMICH_CONFIG_FILE = 'immich-config.json';
|
||||
configMock.readFile.mockResolvedValue(JSON.stringify(test.config));
|
||||
|
||||
await expect(sut.getConfig()).rejects.toBeInstanceOf(Error);
|
||||
if (test.warn) {
|
||||
await sut.getConfig();
|
||||
expect(warnLog).toHaveBeenCalled();
|
||||
} else {
|
||||
await expect(sut.getConfig()).rejects.toBeInstanceOf(Error);
|
||||
}
|
||||
});
|
||||
}
|
||||
});
|
||||
|
|
|
@ -35,7 +35,6 @@ export enum SystemConfigKey {
|
|||
JOB_THUMBNAIL_GENERATION_CONCURRENCY = 'job.thumbnailGeneration.concurrency',
|
||||
JOB_METADATA_EXTRACTION_CONCURRENCY = 'job.metadataExtraction.concurrency',
|
||||
JOB_VIDEO_CONVERSION_CONCURRENCY = 'job.videoConversion.concurrency',
|
||||
JOB_OBJECT_TAGGING_CONCURRENCY = 'job.objectTagging.concurrency',
|
||||
JOB_RECOGNIZE_FACES_CONCURRENCY = 'job.recognizeFaces.concurrency',
|
||||
JOB_CLIP_ENCODING_CONCURRENCY = 'job.smartSearch.concurrency',
|
||||
JOB_BACKGROUND_TASK_CONCURRENCY = 'job.backgroundTask.concurrency',
|
||||
|
@ -54,10 +53,6 @@ export enum SystemConfigKey {
|
|||
MACHINE_LEARNING_ENABLED = 'machineLearning.enabled',
|
||||
MACHINE_LEARNING_URL = 'machineLearning.url',
|
||||
|
||||
MACHINE_LEARNING_CLASSIFICATION_ENABLED = 'machineLearning.classification.enabled',
|
||||
MACHINE_LEARNING_CLASSIFICATION_MODEL_NAME = 'machineLearning.classification.modelName',
|
||||
MACHINE_LEARNING_CLASSIFICATION_MIN_SCORE = 'machineLearning.classification.minScore',
|
||||
|
||||
MACHINE_LEARNING_CLIP_ENABLED = 'machineLearning.clip.enabled',
|
||||
MACHINE_LEARNING_CLIP_MODEL_NAME = 'machineLearning.clip.modelName',
|
||||
|
||||
|
@ -184,11 +179,6 @@ export interface SystemConfig {
|
|||
machineLearning: {
|
||||
enabled: boolean;
|
||||
url: string;
|
||||
classification: {
|
||||
enabled: boolean;
|
||||
modelName: string;
|
||||
minScore: number;
|
||||
};
|
||||
clip: {
|
||||
enabled: boolean;
|
||||
modelName: string;
|
||||
|
|
|
@ -1,5 +1,4 @@
|
|||
import {
|
||||
ClassificationConfig,
|
||||
CLIPConfig,
|
||||
CLIPMode,
|
||||
DetectFaceResult,
|
||||
|
@ -27,10 +26,6 @@ export class MachineLearningRepository implements IMachineLearningRepository {
|
|||
return res.json();
|
||||
}
|
||||
|
||||
classifyImage(url: string, input: VisionModelInput, config: ClassificationConfig): Promise<string[]> {
|
||||
return this.post<string[]>(url, input, { ...config, modelType: ModelType.IMAGE_CLASSIFICATION });
|
||||
}
|
||||
|
||||
detectFaces(url: string, input: VisionModelInput, config: RecognitionConfig): Promise<DetectFaceResult[]> {
|
||||
return this.post<DetectFaceResult[]>(url, input, { ...config, modelType: ModelType.FACIAL_RECOGNITION });
|
||||
}
|
||||
|
|
|
@ -42,8 +42,6 @@ export class AppService {
|
|||
[JobName.CLEAN_OLD_AUDIT_LOGS]: () => this.auditService.handleCleanup(),
|
||||
[JobName.USER_DELETE_CHECK]: () => this.userService.handleUserDeleteCheck(),
|
||||
[JobName.USER_DELETION]: (data) => this.userService.handleUserDelete(data),
|
||||
[JobName.QUEUE_OBJECT_TAGGING]: (data) => this.smartInfoService.handleQueueObjectTagging(data),
|
||||
[JobName.CLASSIFY_IMAGE]: (data) => this.smartInfoService.handleClassifyImage(data),
|
||||
[JobName.QUEUE_ENCODE_CLIP]: (data) => this.smartInfoService.handleQueueEncodeClip(data),
|
||||
[JobName.ENCODE_CLIP]: (data) => this.smartInfoService.handleEncodeClip(data),
|
||||
[JobName.STORAGE_TEMPLATE_MIGRATION]: () => this.storageTemplateService.handleMigration(),
|
||||
|
|
|
@ -83,7 +83,6 @@ describe(`${ServerInfoController.name} (e2e)`, () => {
|
|||
passwordLogin: true,
|
||||
search: true,
|
||||
sidecar: true,
|
||||
tagImage: false,
|
||||
trash: true,
|
||||
});
|
||||
});
|
||||
|
|
|
@ -2,7 +2,6 @@ import { IMachineLearningRepository } from '@app/domain';
|
|||
|
||||
export const newMachineLearningRepositoryMock = (): jest.Mocked<IMachineLearningRepository> => {
|
||||
return {
|
||||
classifyImage: jest.fn(),
|
||||
encodeImage: jest.fn(),
|
||||
encodeText: jest.fn(),
|
||||
detectFaces: jest.fn(),
|
||||
|
|
|
@ -135,7 +135,6 @@ class ImmichApi {
|
|||
[JobName.ThumbnailGeneration]: 'Generate Thumbnails',
|
||||
[JobName.MetadataExtraction]: 'Extract Metadata',
|
||||
[JobName.Sidecar]: 'Sidecar Metadata',
|
||||
[JobName.ObjectTagging]: 'Tag Objects',
|
||||
[JobName.SmartSearch]: 'Smart Search',
|
||||
[JobName.RecognizeFaces]: 'Recognize Faces',
|
||||
[JobName.VideoConversion]: 'Transcode Videos',
|
||||
|
|
59
web/src/api/open-api/api.ts
generated
59
web/src/api/open-api/api.ts
generated
|
@ -373,12 +373,6 @@ export interface AllJobStatusResponseDto {
|
|||
* @memberof AllJobStatusResponseDto
|
||||
*/
|
||||
'migration': JobStatusDto;
|
||||
/**
|
||||
*
|
||||
* @type {JobStatusDto}
|
||||
* @memberof AllJobStatusResponseDto
|
||||
*/
|
||||
'objectTagging': JobStatusDto;
|
||||
/**
|
||||
*
|
||||
* @type {JobStatusDto}
|
||||
|
@ -1318,39 +1312,6 @@ export interface CheckExistingAssetsResponseDto {
|
|||
*/
|
||||
'existingIds': Array<string>;
|
||||
}
|
||||
/**
|
||||
*
|
||||
* @export
|
||||
* @interface ClassificationConfig
|
||||
*/
|
||||
export interface ClassificationConfig {
|
||||
/**
|
||||
*
|
||||
* @type {boolean}
|
||||
* @memberof ClassificationConfig
|
||||
*/
|
||||
'enabled': boolean;
|
||||
/**
|
||||
*
|
||||
* @type {number}
|
||||
* @memberof ClassificationConfig
|
||||
*/
|
||||
'minScore': number;
|
||||
/**
|
||||
*
|
||||
* @type {string}
|
||||
* @memberof ClassificationConfig
|
||||
*/
|
||||
'modelName': string;
|
||||
/**
|
||||
*
|
||||
* @type {ModelType}
|
||||
* @memberof ClassificationConfig
|
||||
*/
|
||||
'modelType'?: ModelType;
|
||||
}
|
||||
|
||||
|
||||
/**
|
||||
*
|
||||
* @export
|
||||
|
@ -2015,7 +1976,6 @@ export const JobName = {
|
|||
ThumbnailGeneration: 'thumbnailGeneration',
|
||||
MetadataExtraction: 'metadataExtraction',
|
||||
VideoConversion: 'videoConversion',
|
||||
ObjectTagging: 'objectTagging',
|
||||
RecognizeFaces: 'recognizeFaces',
|
||||
SmartSearch: 'smartSearch',
|
||||
BackgroundTask: 'backgroundTask',
|
||||
|
@ -2358,7 +2318,6 @@ export interface MergePersonDto {
|
|||
*/
|
||||
|
||||
export const ModelType = {
|
||||
ImageClassification: 'image-classification',
|
||||
FacialRecognition: 'facial-recognition',
|
||||
Clip: 'clip'
|
||||
} as const;
|
||||
|
@ -3139,12 +3098,6 @@ export interface ServerFeaturesDto {
|
|||
* @memberof ServerFeaturesDto
|
||||
*/
|
||||
'sidecar': boolean;
|
||||
/**
|
||||
*
|
||||
* @type {boolean}
|
||||
* @memberof ServerFeaturesDto
|
||||
*/
|
||||
'tagImage': boolean;
|
||||
/**
|
||||
*
|
||||
* @type {boolean}
|
||||
|
@ -3803,12 +3756,6 @@ export interface SystemConfigJobDto {
|
|||
* @memberof SystemConfigJobDto
|
||||
*/
|
||||
'migration': JobSettingsDto;
|
||||
/**
|
||||
*
|
||||
* @type {JobSettingsDto}
|
||||
* @memberof SystemConfigJobDto
|
||||
*/
|
||||
'objectTagging': JobSettingsDto;
|
||||
/**
|
||||
*
|
||||
* @type {JobSettingsDto}
|
||||
|
@ -3911,12 +3858,6 @@ export interface SystemConfigLoggingDto {
|
|||
* @interface SystemConfigMachineLearningDto
|
||||
*/
|
||||
export interface SystemConfigMachineLearningDto {
|
||||
/**
|
||||
*
|
||||
* @type {ClassificationConfig}
|
||||
* @memberof SystemConfigMachineLearningDto
|
||||
*/
|
||||
'classification': ClassificationConfig;
|
||||
/**
|
||||
*
|
||||
* @type {CLIPConfig}
|
||||
|
|
|
@ -15,7 +15,6 @@
|
|||
mdiImageSearch,
|
||||
mdiLibraryShelves,
|
||||
mdiTable,
|
||||
mdiTagMultiple,
|
||||
mdiVideo,
|
||||
} from '@mdi/js';
|
||||
import ConfirmDialogue from '../../shared-components/confirm-dialogue.svelte';
|
||||
|
@ -78,13 +77,6 @@
|
|||
missingText: 'DISCOVER',
|
||||
disabled: !$featureFlags.sidecar,
|
||||
},
|
||||
[JobName.ObjectTagging]: {
|
||||
icon: mdiTagMultiple,
|
||||
title: api.getJobName(JobName.ObjectTagging),
|
||||
subtitle:
|
||||
'Run machine learning on assets to tag objects\nNote that some assets may not have any objects detected',
|
||||
disabled: !$featureFlags.tagImage,
|
||||
},
|
||||
[JobName.SmartSearch]: {
|
||||
icon: mdiImageSearch,
|
||||
title: api.getJobName(JobName.SmartSearch),
|
||||
|
|
|
@ -22,7 +22,6 @@
|
|||
JobName.MetadataExtraction,
|
||||
JobName.Library,
|
||||
JobName.Sidecar,
|
||||
JobName.ObjectTagging,
|
||||
JobName.SmartSearch,
|
||||
JobName.RecognizeFaces,
|
||||
JobName.VideoConversion,
|
||||
|
|
|
@ -89,46 +89,6 @@
|
|||
/>
|
||||
</div>
|
||||
|
||||
<SettingAccordion title="Image Tagging" subtitle="Tag and classify images with object labels">
|
||||
<div class="ml-4 mt-4 flex flex-col gap-4">
|
||||
<SettingSwitch
|
||||
title="ENABLED"
|
||||
subtitle="If disabled, images will not be tagged. This affects the Things section in the Explore page as well as 'm:' searches."
|
||||
bind:checked={machineLearningConfig.classification.enabled}
|
||||
disabled={disabled || !machineLearningConfig.enabled}
|
||||
/>
|
||||
|
||||
<hr />
|
||||
|
||||
<SettingInputField
|
||||
inputType={SettingInputFieldType.TEXT}
|
||||
label="IMAGE CLASSIFICATION MODEL"
|
||||
bind:value={machineLearningConfig.classification.modelName}
|
||||
required={true}
|
||||
disabled={disabled || !machineLearningConfig.enabled || !machineLearningConfig.classification.enabled}
|
||||
isEdited={machineLearningConfig.classification.modelName !== savedConfig.classification.modelName}
|
||||
>
|
||||
<p slot="desc" class="immich-form-label pb-2 text-sm">
|
||||
The name of an image classification model listed <a
|
||||
href="https://huggingface.co/models?pipeline_tag=image-classification&sort=trending"><u>here</u></a
|
||||
>. It must be tagged with the 'Image Classification' task and must support ONNX conversion.
|
||||
</p>
|
||||
</SettingInputField>
|
||||
|
||||
<SettingInputField
|
||||
inputType={SettingInputFieldType.NUMBER}
|
||||
label="IMAGE CLASSIFICATION THRESHOLD"
|
||||
desc="Minimum confidence score to add a particular object tag. Lower values will add more tags to images, but may result in more false positives. Will not have any effect until the Tag Objects job is re-run."
|
||||
bind:value={machineLearningConfig.classification.minScore}
|
||||
step="0.1"
|
||||
min="0"
|
||||
max="1"
|
||||
disabled={disabled || !machineLearningConfig.enabled || !machineLearningConfig.classification.enabled}
|
||||
isEdited={machineLearningConfig.classification.minScore !== savedConfig.classification.minScore}
|
||||
/>
|
||||
</div>
|
||||
</SettingAccordion>
|
||||
|
||||
<SettingAccordion title="Smart Search" subtitle="Search for images semantically using CLIP embeddings">
|
||||
<div class="ml-4 mt-4 flex flex-col gap-4">
|
||||
<SettingSwitch
|
||||
|
|
|
@ -8,7 +8,6 @@ export const featureFlags = writable<FeatureFlags>({
|
|||
clipEncode: true,
|
||||
facialRecognition: true,
|
||||
sidecar: true,
|
||||
tagImage: true,
|
||||
map: true,
|
||||
reverseGeocoding: true,
|
||||
search: true,
|
||||
|
|
Loading…
Reference in a new issue