1
0
Fork 0
mirror of https://github.com/immich-app/immich.git synced 2025-01-27 22:22:45 +01:00

feat(server,ml): remove image tagging ()

* remove image tagging

* updated lock

* fixed tests, improved logging

* be nice

* fixed tests
This commit is contained in:
Mert 2023-12-20 20:47:56 -05:00 committed by GitHub
parent 154292242f
commit 092a23fd7f
No known key found for this signature in database
GPG key ID: 4AEE18F83AFDEB23
65 changed files with 984 additions and 2669 deletions
.github/workflows
cli/src/api/open-api
docs/docs
machine-learning
mobile/openapi
server
web/src
api
api.ts
open-api
lib
components/admin-page
jobs
settings
job-settings
machine-learning-settings
stores

View file

@ -209,7 +209,7 @@ jobs:
poetry run black --check app export poetry run black --check app export
- name: Run mypy type checking - name: Run mypy type checking
run: | run: |
poetry run mypy --install-types --non-interactive --strict app/ export/ poetry run mypy --install-types --non-interactive --strict app/
- name: Run tests and coverage - name: Run tests and coverage
run: | run: |
poetry run pytest --cov app poetry run pytest --cov app

View file

@ -373,12 +373,6 @@ export interface AllJobStatusResponseDto {
* @memberof AllJobStatusResponseDto * @memberof AllJobStatusResponseDto
*/ */
'migration': JobStatusDto; 'migration': JobStatusDto;
/**
*
* @type {JobStatusDto}
* @memberof AllJobStatusResponseDto
*/
'objectTagging': JobStatusDto;
/** /**
* *
* @type {JobStatusDto} * @type {JobStatusDto}
@ -1318,39 +1312,6 @@ export interface CheckExistingAssetsResponseDto {
*/ */
'existingIds': Array<string>; 'existingIds': Array<string>;
} }
/**
*
* @export
* @interface ClassificationConfig
*/
export interface ClassificationConfig {
/**
*
* @type {boolean}
* @memberof ClassificationConfig
*/
'enabled': boolean;
/**
*
* @type {number}
* @memberof ClassificationConfig
*/
'minScore': number;
/**
*
* @type {string}
* @memberof ClassificationConfig
*/
'modelName': string;
/**
*
* @type {ModelType}
* @memberof ClassificationConfig
*/
'modelType'?: ModelType;
}
/** /**
* *
* @export * @export
@ -2015,7 +1976,6 @@ export const JobName = {
ThumbnailGeneration: 'thumbnailGeneration', ThumbnailGeneration: 'thumbnailGeneration',
MetadataExtraction: 'metadataExtraction', MetadataExtraction: 'metadataExtraction',
VideoConversion: 'videoConversion', VideoConversion: 'videoConversion',
ObjectTagging: 'objectTagging',
RecognizeFaces: 'recognizeFaces', RecognizeFaces: 'recognizeFaces',
SmartSearch: 'smartSearch', SmartSearch: 'smartSearch',
BackgroundTask: 'backgroundTask', BackgroundTask: 'backgroundTask',
@ -2358,7 +2318,6 @@ export interface MergePersonDto {
*/ */
export const ModelType = { export const ModelType = {
ImageClassification: 'image-classification',
FacialRecognition: 'facial-recognition', FacialRecognition: 'facial-recognition',
Clip: 'clip' Clip: 'clip'
} as const; } as const;
@ -3139,12 +3098,6 @@ export interface ServerFeaturesDto {
* @memberof ServerFeaturesDto * @memberof ServerFeaturesDto
*/ */
'sidecar': boolean; 'sidecar': boolean;
/**
*
* @type {boolean}
* @memberof ServerFeaturesDto
*/
'tagImage': boolean;
/** /**
* *
* @type {boolean} * @type {boolean}
@ -3803,12 +3756,6 @@ export interface SystemConfigJobDto {
* @memberof SystemConfigJobDto * @memberof SystemConfigJobDto
*/ */
'migration': JobSettingsDto; 'migration': JobSettingsDto;
/**
*
* @type {JobSettingsDto}
* @memberof SystemConfigJobDto
*/
'objectTagging': JobSettingsDto;
/** /**
* *
* @type {JobSettingsDto} * @type {JobSettingsDto}
@ -3911,12 +3858,6 @@ export interface SystemConfigLoggingDto {
* @interface SystemConfigMachineLearningDto * @interface SystemConfigMachineLearningDto
*/ */
export interface SystemConfigMachineLearningDto { export interface SystemConfigMachineLearningDto {
/**
*
* @type {ClassificationConfig}
* @memberof SystemConfigMachineLearningDto
*/
'classification': ClassificationConfig;
/** /**
* *
* @type {CLIPConfig} * @type {CLIPConfig}

View file

@ -56,10 +56,6 @@ Template changes will only apply to new assets. To retroactively apply the templ
This is fixed by running the storage migration job. This is fixed by running the storage migration job.
### Why is object detection not very good?
The default image tagging model is relatively small. You can change this for a larger model like `google/vit-base-patch16-224` by setting the model name under Settings > Machine Learning Settings > Image Tagging. You can then re-run the Image Tagging job to get improved tags.
### Why are there so many thumbnail generation jobs? ### Why are there so many thumbnail generation jobs?
Immich generates three thumbnails for each asset (blurred, small, and large), as well as a thumbnail for each recognized face. Immich generates three thumbnails for each asset (blurred, small, and large), as well as a thumbnail for each recognized face.

View file

@ -73,7 +73,7 @@ The Immich Microservices image uses the same `Dockerfile` as the Immich Server,
- Thumbnail Generation - Thumbnail Generation
- Metadata Extraction - Metadata Extraction
- Video Transcoding - Video Transcoding
- Object Tagging - Smart Search
- Facial Recognition - Facial Recognition
- Storage Template Migration - Storage Template Migration
- Sidecar (see [XMP Sidecars](/docs/features/xmp-sidecars.md)) - Sidecar (see [XMP Sidecars](/docs/features/xmp-sidecars.md))

View file

@ -38,9 +38,6 @@ The default configuration looks like this:
"metadataExtraction": { "metadataExtraction": {
"concurrency": 5 "concurrency": 5
}, },
"objectTagging": {
"concurrency": 2
},
"recognizeFaces": { "recognizeFaces": {
"concurrency": 2 "concurrency": 2
}, },
@ -73,11 +70,6 @@ The default configuration looks like this:
"machineLearning": { "machineLearning": {
"enabled": true, "enabled": true,
"url": "http://immich-machine-learning:3003", "url": "http://immich-machine-learning:3003",
"classification": {
"enabled": false,
"modelName": "microsoft/resnet-50",
"minScore": 0.9
},
"clip": { "clip": {
"enabled": true, "enabled": true,
"modelName": "ViT-B-32__openai" "modelName": "ViT-B-32__openai"

View file

@ -1,6 +1,5 @@
# Immich Machine Learning # Immich Machine Learning
- Image classification
- CLIP embeddings - CLIP embeddings
- Facial recognition - Facial recognition

View file

@ -59,3 +59,37 @@ def clip_preprocess_cfg() -> dict[str, Any]:
"resize_mode": "shortest", "resize_mode": "shortest",
"fill_color": 0, "fill_color": 0,
} }
@pytest.fixture(scope="session")
def clip_tokenizer_cfg() -> dict[str, Any]:
return {
"add_prefix_space": False,
"added_tokens_decoder": {
"49406": {
"content": "<|startoftext|>",
"lstrip": False,
"normalized": True,
"rstrip": False,
"single_word": False,
"special": True,
},
"49407": {
"content": "<|endoftext|>",
"lstrip": False,
"normalized": True,
"rstrip": False,
"single_word": False,
"special": True,
},
},
"bos_token": "<|startoftext|>",
"clean_up_tokenization_spaces": True,
"do_lower_case": True,
"eos_token": "<|endoftext|>",
"errors": "replace",
"model_max_length": 77,
"pad_token": "<|endoftext|>",
"tokenizer_class": "CLIPTokenizer",
"unk_token": "<|endoftext|>",
}

View file

@ -6,7 +6,6 @@ from .base import InferenceModel
from .clip import MCLIPEncoder, OpenCLIPEncoder from .clip import MCLIPEncoder, OpenCLIPEncoder
from .constants import is_insightface, is_mclip, is_openclip from .constants import is_insightface, is_mclip, is_openclip
from .facial_recognition import FaceRecognizer from .facial_recognition import FaceRecognizer
from .image_classification import ImageClassifier
def from_model_type(model_type: ModelType, model_name: str, **model_kwargs: Any) -> InferenceModel: def from_model_type(model_type: ModelType, model_name: str, **model_kwargs: Any) -> InferenceModel:
@ -19,8 +18,6 @@ def from_model_type(model_type: ModelType, model_name: str, **model_kwargs: Any)
case ModelType.FACIAL_RECOGNITION: case ModelType.FACIAL_RECOGNITION:
if is_insightface(model_name): if is_insightface(model_name):
return FaceRecognizer(model_name, **model_kwargs) return FaceRecognizer(model_name, **model_kwargs)
case ModelType.IMAGE_CLASSIFICATION:
return ImageClassifier(model_name, **model_kwargs)
case _: case _:
raise ValueError(f"Unknown model type {model_type}") raise ValueError(f"Unknown model type {model_type}")

View file

@ -35,7 +35,7 @@ class InferenceModel(ABC):
) )
log.debug( log.debug(
( (
f"Setting '{self.model_name}' execution providers to {self.providers}" f"Setting '{self.model_name}' execution providers to {self.providers} "
"in descending order of preference" "in descending order of preference"
), ),
) )
@ -55,7 +55,7 @@ class InferenceModel(ABC):
def download(self) -> None: def download(self) -> None:
if not self.cached: if not self.cached:
log.info( log.info(
(f"Downloading {self.model_type.replace('-', ' ')} model '{self.model_name}'." "This may take a while.") f"Downloading {self.model_type.replace('-', ' ')} model '{self.model_name}'. This may take a while."
) )
self._download() self._download()
@ -63,7 +63,7 @@ class InferenceModel(ABC):
if self.loaded: if self.loaded:
return return
self.download() self.download()
log.info(f"Loading {self.model_type.replace('-', ' ')} model '{self.model_name}'") log.info(f"Loading {self.model_type.replace('-', ' ')} model '{self.model_name}' to memory")
self._load() self._load()
self.loaded = True self.loaded = True
@ -119,11 +119,11 @@ class InferenceModel(ABC):
def clear_cache(self) -> None: def clear_cache(self) -> None:
if not self.cache_dir.exists(): if not self.cache_dir.exists():
log.warn( log.warn(
f"Attempted to clear cache for model '{self.model_name}' but cache directory does not exist.", f"Attempted to clear cache for model '{self.model_name}', but cache directory does not exist",
) )
return return
if not rmtree.avoids_symlink_attacks: if not rmtree.avoids_symlink_attacks:
raise RuntimeError("Attempted to clear cache, but rmtree is not safe on this platform.") raise RuntimeError("Attempted to clear cache, but rmtree is not safe on this platform")
if self.cache_dir.is_dir(): if self.cache_dir.is_dir():
log.info(f"Cleared cache directory for model '{self.model_name}'.") log.info(f"Cleared cache directory for model '{self.model_name}'.")

View file

@ -8,11 +8,11 @@ from typing import Any, Literal
import numpy as np import numpy as np
import onnxruntime as ort import onnxruntime as ort
from PIL import Image from PIL import Image
from transformers import AutoTokenizer from tokenizers import Encoding, Tokenizer
from app.config import clean_name, log from app.config import clean_name, log
from app.models.transforms import crop, get_pil_resampling, normalize, resize, to_numpy from app.models.transforms import crop, get_pil_resampling, normalize, resize, to_numpy
from app.schemas import ModelType, ndarray_f32, ndarray_i32, ndarray_i64 from app.schemas import ModelType, ndarray_f32, ndarray_i32
from .base import InferenceModel from .base import InferenceModel
@ -40,6 +40,7 @@ class BaseCLIPEncoder(InferenceModel):
providers=self.providers, providers=self.providers,
provider_options=self.provider_options, provider_options=self.provider_options,
) )
log.debug(f"Loaded clip text model '{self.model_name}'")
if self.mode == "vision" or self.mode is None: if self.mode == "vision" or self.mode is None:
log.debug(f"Loading clip vision model '{self.model_name}'") log.debug(f"Loading clip vision model '{self.model_name}'")
@ -50,6 +51,7 @@ class BaseCLIPEncoder(InferenceModel):
providers=self.providers, providers=self.providers,
provider_options=self.provider_options, provider_options=self.provider_options,
) )
log.debug(f"Loaded clip vision model '{self.model_name}'")
def _predict(self, image_or_text: Image.Image | str) -> ndarray_f32: def _predict(self, image_or_text: Image.Image | str) -> ndarray_f32:
if isinstance(image_or_text, bytes): if isinstance(image_or_text, bytes):
@ -99,6 +101,14 @@ class BaseCLIPEncoder(InferenceModel):
def visual_path(self) -> Path: def visual_path(self) -> Path:
return self.visual_dir / "model.onnx" return self.visual_dir / "model.onnx"
@property
def tokenizer_file_path(self) -> Path:
return self.textual_dir / "tokenizer.json"
@property
def tokenizer_cfg_path(self) -> Path:
return self.textual_dir / "tokenizer_config.json"
@property @property
def preprocess_cfg_path(self) -> Path: def preprocess_cfg_path(self) -> Path:
return self.visual_dir / "preprocess_cfg.json" return self.visual_dir / "preprocess_cfg.json"
@ -107,6 +117,34 @@ class BaseCLIPEncoder(InferenceModel):
def cached(self) -> bool: def cached(self) -> bool:
return self.textual_path.is_file() and self.visual_path.is_file() return self.textual_path.is_file() and self.visual_path.is_file()
@cached_property
def model_cfg(self) -> dict[str, Any]:
log.debug(f"Loading model config for CLIP model '{self.model_name}'")
model_cfg: dict[str, Any] = json.load(self.model_cfg_path.open())
log.debug(f"Loaded model config for CLIP model '{self.model_name}'")
return model_cfg
@cached_property
def tokenizer_file(self) -> dict[str, Any]:
log.debug(f"Loading tokenizer file for CLIP model '{self.model_name}'")
tokenizer_file: dict[str, Any] = json.load(self.tokenizer_file_path.open())
log.debug(f"Loaded tokenizer file for CLIP model '{self.model_name}'")
return tokenizer_file
@cached_property
def tokenizer_cfg(self) -> dict[str, Any]:
log.debug(f"Loading tokenizer config for CLIP model '{self.model_name}'")
tokenizer_cfg: dict[str, Any] = json.load(self.tokenizer_cfg_path.open())
log.debug(f"Loaded tokenizer config for CLIP model '{self.model_name}'")
return tokenizer_cfg
@cached_property
def preprocess_cfg(self) -> dict[str, Any]:
log.debug(f"Loading visual preprocessing config for CLIP model '{self.model_name}'")
preprocess_cfg: dict[str, Any] = json.load(self.preprocess_cfg_path.open())
log.debug(f"Loaded visual preprocessing config for CLIP model '{self.model_name}'")
return preprocess_cfg
class OpenCLIPEncoder(BaseCLIPEncoder): class OpenCLIPEncoder(BaseCLIPEncoder):
def __init__( def __init__(
@ -121,8 +159,8 @@ class OpenCLIPEncoder(BaseCLIPEncoder):
def _load(self) -> None: def _load(self) -> None:
super()._load() super()._load()
self.tokenizer = AutoTokenizer.from_pretrained(self.textual_dir) context_length = self.model_cfg["text_cfg"]["context_length"]
self.sequence_length = self.model_cfg["text_cfg"]["context_length"] pad_token = self.tokenizer_cfg["pad_token"]
self.size = ( self.size = (
self.preprocess_cfg["size"][0] if type(self.preprocess_cfg["size"]) == list else self.preprocess_cfg["size"] self.preprocess_cfg["size"][0] if type(self.preprocess_cfg["size"]) == list else self.preprocess_cfg["size"]
@ -131,16 +169,16 @@ class OpenCLIPEncoder(BaseCLIPEncoder):
self.mean = np.array(self.preprocess_cfg["mean"], dtype=np.float32) self.mean = np.array(self.preprocess_cfg["mean"], dtype=np.float32)
self.std = np.array(self.preprocess_cfg["std"], dtype=np.float32) self.std = np.array(self.preprocess_cfg["std"], dtype=np.float32)
log.debug(f"Loading tokenizer for CLIP model '{self.model_name}'")
self.tokenizer: Tokenizer = Tokenizer.from_file(self.tokenizer_file_path.as_posix())
pad_id = self.tokenizer.token_to_id(pad_token)
self.tokenizer.enable_padding(length=context_length, pad_token=pad_token, pad_id=pad_id)
self.tokenizer.enable_truncation(max_length=context_length)
log.debug(f"Loaded tokenizer for CLIP model '{self.model_name}'")
def tokenize(self, text: str) -> dict[str, ndarray_i32]: def tokenize(self, text: str) -> dict[str, ndarray_i32]:
input_ids: ndarray_i64 = self.tokenizer( tokens: Encoding = self.tokenizer.encode(text)
text, return {"text": np.array([tokens.ids], dtype=np.int32)}
max_length=self.sequence_length,
return_tensors="np",
return_attention_mask=False,
padding="max_length",
truncation=True,
).input_ids
return {"text": input_ids.astype(np.int32)}
def transform(self, image: Image.Image) -> dict[str, ndarray_f32]: def transform(self, image: Image.Image) -> dict[str, ndarray_f32]:
image = resize(image, self.size) image = resize(image, self.size)
@ -149,18 +187,11 @@ class OpenCLIPEncoder(BaseCLIPEncoder):
image_np = normalize(image_np, self.mean, self.std) image_np = normalize(image_np, self.mean, self.std)
return {"image": np.expand_dims(image_np.transpose(2, 0, 1), 0)} return {"image": np.expand_dims(image_np.transpose(2, 0, 1), 0)}
@cached_property
def model_cfg(self) -> dict[str, Any]:
model_cfg: dict[str, Any] = json.load(self.model_cfg_path.open())
return model_cfg
@cached_property
def preprocess_cfg(self) -> dict[str, Any]:
preprocess_cfg: dict[str, Any] = json.load(self.preprocess_cfg_path.open())
return preprocess_cfg
class MCLIPEncoder(OpenCLIPEncoder): class MCLIPEncoder(OpenCLIPEncoder):
def tokenize(self, text: str) -> dict[str, ndarray_i32]: def tokenize(self, text: str) -> dict[str, ndarray_i32]:
tokens: dict[str, ndarray_i64] = self.tokenizer(text, return_tensors="np") tokens: Encoding = self.tokenizer.encode(text)
return {k: v.astype(np.int32) for k, v in tokens.items()} return {
"input_ids": np.array([tokens.ids], dtype=np.int32),
"attention_mask": np.array([tokens.attention_mask], dtype=np.int32),
}

View file

@ -1,75 +0,0 @@
from io import BytesIO
from pathlib import Path
from typing import Any
from huggingface_hub import snapshot_download
from optimum.onnxruntime import ORTModelForImageClassification
from optimum.pipelines import pipeline
from PIL import Image
from transformers import AutoImageProcessor
from ..config import log
from ..schemas import ModelType
from .base import InferenceModel
class ImageClassifier(InferenceModel):
_model_type = ModelType.IMAGE_CLASSIFICATION
def __init__(
self,
model_name: str,
min_score: float = 0.9,
cache_dir: Path | str | None = None,
**model_kwargs: Any,
) -> None:
self.min_score = model_kwargs.pop("minScore", min_score)
super().__init__(model_name, cache_dir, **model_kwargs)
def _download(self) -> None:
snapshot_download(
cache_dir=self.cache_dir,
repo_id=self.model_name,
allow_patterns=["*.bin", "*.json", "*.txt"],
local_dir=self.cache_dir,
local_dir_use_symlinks=True,
)
def _load(self) -> None:
processor = AutoImageProcessor.from_pretrained(self.cache_dir, cache_dir=self.cache_dir)
model_path = self.cache_dir / "model.onnx"
model_kwargs = {
"cache_dir": self.cache_dir,
"provider": self.providers[0],
"provider_options": self.provider_options[0],
"session_options": self.sess_options,
}
if model_path.exists():
model = ORTModelForImageClassification.from_pretrained(self.cache_dir, **model_kwargs)
self.model = pipeline(self.model_type.value, model, feature_extractor=processor)
else:
log.info(
(
f"ONNX model not found in cache directory for '{self.model_name}'."
"Exporting optimized model for future use."
),
)
self.sess_options.optimized_model_filepath = model_path.as_posix()
self.model = pipeline(
self.model_type.value,
self.model_name,
model_kwargs=model_kwargs,
feature_extractor=processor,
)
def _predict(self, image: Image.Image | bytes) -> list[str]:
if isinstance(image, bytes):
image = Image.open(BytesIO(image))
predictions: list[dict[str, Any]] = self.model(image)
tags = [tag for pred in predictions for tag in pred["label"].split(", ") if pred["score"] >= self.min_score]
return tags
def configure(self, **model_kwargs: Any) -> None:
self.min_score = model_kwargs.pop("minScore", self.min_score)

View file

@ -25,7 +25,6 @@ class BoundingBox(TypedDict):
class ModelType(StrEnum): class ModelType(StrEnum):
IMAGE_CLASSIFICATION = "image-classification"
CLIP = "clip" CLIP = "clip"
FACIAL_RECOGNITION = "facial-recognition" FACIAL_RECOGNITION = "facial-recognition"

View file

@ -17,42 +17,9 @@ from .models.base import PicklableSessionOptions
from .models.cache import ModelCache from .models.cache import ModelCache
from .models.clip import OpenCLIPEncoder from .models.clip import OpenCLIPEncoder
from .models.facial_recognition import FaceRecognizer from .models.facial_recognition import FaceRecognizer
from .models.image_classification import ImageClassifier
from .schemas import ModelType from .schemas import ModelType
class TestImageClassifier:
classifier_preds = [
{"label": "that's an image alright", "score": 0.8},
{"label": "well it ends with .jpg", "score": 0.1},
{"label": "idk, im just seeing bytes", "score": 0.05},
{"label": "not sure", "score": 0.04},
{"label": "probably a virus", "score": 0.01},
]
def test_min_score(self, pil_image: Image.Image, mocker: MockerFixture) -> None:
mocker.patch.object(ImageClassifier, "load")
classifier = ImageClassifier("test_model_name", min_score=0.0)
assert classifier.min_score == 0.0
classifier.model = mock.Mock()
classifier.model.return_value = self.classifier_preds
all_labels = classifier.predict(pil_image)
classifier.min_score = 0.5
filtered_labels = classifier.predict(pil_image)
assert all_labels == [
"that's an image alright",
"well it ends with .jpg",
"idk",
"im just seeing bytes",
"not sure",
"probably a virus",
]
assert filtered_labels == ["that's an image alright"]
class TestCLIP: class TestCLIP:
embedding = np.random.rand(512).astype(np.float32) embedding = np.random.rand(512).astype(np.float32)
cache_dir = Path("test_cache") cache_dir = Path("test_cache")
@ -63,11 +30,13 @@ class TestCLIP:
mocker: MockerFixture, mocker: MockerFixture,
clip_model_cfg: dict[str, Any], clip_model_cfg: dict[str, Any],
clip_preprocess_cfg: Callable[[Path], dict[str, Any]], clip_preprocess_cfg: Callable[[Path], dict[str, Any]],
clip_tokenizer_cfg: Callable[[Path], dict[str, Any]],
) -> None: ) -> None:
mocker.patch.object(OpenCLIPEncoder, "download") mocker.patch.object(OpenCLIPEncoder, "download")
mocker.patch.object(OpenCLIPEncoder, "model_cfg", clip_model_cfg) mocker.patch.object(OpenCLIPEncoder, "model_cfg", clip_model_cfg)
mocker.patch.object(OpenCLIPEncoder, "preprocess_cfg", clip_preprocess_cfg) mocker.patch.object(OpenCLIPEncoder, "preprocess_cfg", clip_preprocess_cfg)
mocker.patch("app.models.clip.AutoTokenizer.from_pretrained", autospec=True) mocker.patch.object(OpenCLIPEncoder, "tokenizer_cfg", clip_tokenizer_cfg)
mocker.patch("app.models.clip.Tokenizer.from_file", autospec=True)
mocked = mocker.patch("app.models.clip.ort.InferenceSession", autospec=True) mocked = mocker.patch("app.models.clip.ort.InferenceSession", autospec=True)
mocked.return_value.run.return_value = [[self.embedding]] mocked.return_value.run.return_value = [[self.embedding]]
@ -85,11 +54,13 @@ class TestCLIP:
mocker: MockerFixture, mocker: MockerFixture,
clip_model_cfg: dict[str, Any], clip_model_cfg: dict[str, Any],
clip_preprocess_cfg: Callable[[Path], dict[str, Any]], clip_preprocess_cfg: Callable[[Path], dict[str, Any]],
clip_tokenizer_cfg: Callable[[Path], dict[str, Any]],
) -> None: ) -> None:
mocker.patch.object(OpenCLIPEncoder, "download") mocker.patch.object(OpenCLIPEncoder, "download")
mocker.patch.object(OpenCLIPEncoder, "model_cfg", clip_model_cfg) mocker.patch.object(OpenCLIPEncoder, "model_cfg", clip_model_cfg)
mocker.patch.object(OpenCLIPEncoder, "preprocess_cfg", clip_preprocess_cfg) mocker.patch.object(OpenCLIPEncoder, "preprocess_cfg", clip_preprocess_cfg)
mocker.patch("app.models.clip.AutoTokenizer.from_pretrained", autospec=True) mocker.patch.object(OpenCLIPEncoder, "tokenizer_cfg", clip_tokenizer_cfg)
mocker.patch("app.models.clip.Tokenizer.from_file", autospec=True)
mocked = mocker.patch("app.models.clip.ort.InferenceSession", autospec=True) mocked = mocker.patch("app.models.clip.ort.InferenceSession", autospec=True)
mocked.return_value.run.return_value = [[self.embedding]] mocked.return_value.run.return_value = [[self.embedding]]
@ -145,17 +116,15 @@ class TestFaceRecognition:
class TestCache: class TestCache:
async def test_caches(self, mock_get_model: mock.Mock) -> None: async def test_caches(self, mock_get_model: mock.Mock) -> None:
model_cache = ModelCache() model_cache = ModelCache()
await model_cache.get("test_model_name", ModelType.IMAGE_CLASSIFICATION) await model_cache.get("test_model_name", ModelType.FACIAL_RECOGNITION)
await model_cache.get("test_model_name", ModelType.IMAGE_CLASSIFICATION) await model_cache.get("test_model_name", ModelType.FACIAL_RECOGNITION)
assert len(model_cache.cache._cache) == 1 assert len(model_cache.cache._cache) == 1
mock_get_model.assert_called_once() mock_get_model.assert_called_once()
async def test_kwargs_used(self, mock_get_model: mock.Mock) -> None: async def test_kwargs_used(self, mock_get_model: mock.Mock) -> None:
model_cache = ModelCache() model_cache = ModelCache()
await model_cache.get("test_model_name", ModelType.IMAGE_CLASSIFICATION, cache_dir="test_cache") await model_cache.get("test_model_name", ModelType.FACIAL_RECOGNITION, cache_dir="test_cache")
mock_get_model.assert_called_once_with( mock_get_model.assert_called_once_with(ModelType.FACIAL_RECOGNITION, "test_model_name", cache_dir="test_cache")
ModelType.IMAGE_CLASSIFICATION, "test_model_name", cache_dir="test_cache"
)
async def test_different_clip(self, mock_get_model: mock.Mock) -> None: async def test_different_clip(self, mock_get_model: mock.Mock) -> None:
model_cache = ModelCache() model_cache = ModelCache()
@ -172,14 +141,14 @@ class TestCache:
@mock.patch("app.models.cache.OptimisticLock", autospec=True) @mock.patch("app.models.cache.OptimisticLock", autospec=True)
async def test_model_ttl(self, mock_lock_cls: mock.Mock, mock_get_model: mock.Mock) -> None: async def test_model_ttl(self, mock_lock_cls: mock.Mock, mock_get_model: mock.Mock) -> None:
model_cache = ModelCache(ttl=100) model_cache = ModelCache(ttl=100)
await model_cache.get("test_model_name", ModelType.IMAGE_CLASSIFICATION) await model_cache.get("test_model_name", ModelType.FACIAL_RECOGNITION)
mock_lock_cls.return_value.__aenter__.return_value.cas.assert_called_with(mock.ANY, ttl=100) mock_lock_cls.return_value.__aenter__.return_value.cas.assert_called_with(mock.ANY, ttl=100)
@mock.patch("app.models.cache.SimpleMemoryCache.expire") @mock.patch("app.models.cache.SimpleMemoryCache.expire")
async def test_revalidate(self, mock_cache_expire: mock.Mock, mock_get_model: mock.Mock) -> None: async def test_revalidate(self, mock_cache_expire: mock.Mock, mock_get_model: mock.Mock) -> None:
model_cache = ModelCache(ttl=100, revalidate=True) model_cache = ModelCache(ttl=100, revalidate=True)
await model_cache.get("test_model_name", ModelType.IMAGE_CLASSIFICATION) await model_cache.get("test_model_name", ModelType.FACIAL_RECOGNITION)
await model_cache.get("test_model_name", ModelType.IMAGE_CLASSIFICATION) await model_cache.get("test_model_name", ModelType.FACIAL_RECOGNITION)
mock_cache_expire.assert_called_once_with(mock.ANY, 100) mock_cache_expire.assert_called_once_with(mock.ANY, 100)
@ -188,23 +157,6 @@ class TestCache:
reason="More time-consuming since it deploys the app and loads models.", reason="More time-consuming since it deploys the app and loads models.",
) )
class TestEndpoints: class TestEndpoints:
def test_tagging_endpoint(
self, pil_image: Image.Image, responses: dict[str, Any], deployed_app: TestClient
) -> None:
byte_image = BytesIO()
pil_image.save(byte_image, format="jpeg")
response = deployed_app.post(
"http://localhost:3003/predict",
data={
"modelName": "microsoft/resnet-50",
"modelType": "image-classification",
"options": json.dumps({"minScore": 0.0}),
},
files={"image": byte_image.getvalue()},
)
assert response.status_code == 200
assert response.json() == responses["image-classification"]
def test_clip_image_endpoint( def test_clip_image_endpoint(
self, pil_image: Image.Image, responses: dict[str, Any], deployed_app: TestClient self, pil_image: Image.Image, responses: dict[str, Any], deployed_app: TestClient
) -> None: ) -> None:

View file

@ -12,7 +12,6 @@ byte_image = BytesIO()
@events.init_command_line_parser.add_listener @events.init_command_line_parser.add_listener
def _(parser: ArgumentParser) -> None: def _(parser: ArgumentParser) -> None:
parser.add_argument("--tag-model", type=str, default="microsoft/resnet-50")
parser.add_argument("--clip-model", type=str, default="ViT-B-32::openai") parser.add_argument("--clip-model", type=str, default="ViT-B-32::openai")
parser.add_argument("--face-model", type=str, default="buffalo_l") parser.add_argument("--face-model", type=str, default="buffalo_l")
parser.add_argument( parser.add_argument(
@ -54,18 +53,6 @@ class InferenceLoadTest(HttpUser):
self.data = byte_image.getvalue() self.data = byte_image.getvalue()
class ClassificationFormDataLoadTest(InferenceLoadTest):
@task
def classify(self) -> None:
data = [
("modelName", self.environment.parsed_options.clip_model),
("modelType", "clip"),
("options", json.dumps({"minScore": self.environment.parsed_options.tag_min_score})),
]
files = {"image": self.data}
self.client.post("/predict", data=data, files=files)
class CLIPTextFormDataLoadTest(InferenceLoadTest): class CLIPTextFormDataLoadTest(InferenceLoadTest):
@task @task
def encode_text(self) -> None: def encode_text(self) -> None:

View file

@ -5,8 +5,7 @@
"handlers": { "handlers": {
"console": { "console": {
"class": "app.config.CustomRichHandler", "class": "app.config.CustomRichHandler",
"formatter": "rich", "formatter": "rich"
"level": "INFO"
} }
}, },
"loggers": { "loggers": {

File diff suppressed because it is too large Load diff

View file

@ -7,12 +7,7 @@ readme = "README.md"
packages = [{include = "app"}] packages = [{include = "app"}]
[tool.poetry.dependencies] [tool.poetry.dependencies]
python = "~3.11" python = "=3.11.*"
torch = [
{markers = "platform_machine == 'arm64' or platform_machine == 'aarch64'", version = "=2.1.0", source = "pypi"},
{markers = "platform_machine == 'amd64' or platform_machine == 'x86_64'", version = "=2.1.0", source = "pytorch-cpu"}
]
transformers = "^4.29.2"
onnxruntime = "^1.15.0" onnxruntime = "^1.15.0"
insightface = "^0.7.3" insightface = "^0.7.3"
opencv-python-headless = "^4.7.0.72" opencv-python-headless = "^4.7.0.72"
@ -21,14 +16,14 @@ fastapi = "^0.95.2"
uvicorn = {extras = ["standard"], version = "^0.22.0"} uvicorn = {extras = ["standard"], version = "^0.22.0"}
pydantic = "^1.10.8" pydantic = "^1.10.8"
aiocache = "^0.12.1" aiocache = "^0.12.1"
optimum = "^1.9.1"
rich = "^13.4.2" rich = "^13.4.2"
ftfy = "^6.1.1" ftfy = "^6.1.1"
setuptools = "^68.0.0" setuptools = "^68.0.0"
python-multipart = "^0.0.6" python-multipart = "^0.0.6"
orjson = "^3.9.5" orjson = "^3.9.5"
safetensors = "0.3.2"
gunicorn = "^21.1.0" gunicorn = "^21.1.0"
huggingface-hub = "^0.20.1"
tokenizers = "^0.15.0"
[tool.poetry.group.dev.dependencies] [tool.poetry.group.dev.dependencies]
mypy = "^1.3.0" mypy = "^1.3.0"
@ -41,11 +36,6 @@ pytest-cov = "^4.1.0"
ruff = "^0.0.272" ruff = "^0.0.272"
pytest-mock = "^3.11.1" pytest-mock = "^3.11.1"
[[tool.poetry.source]]
name = "pytorch-cpu"
url = "https://download.pytorch.org/whl/cpu"
priority = "explicit"
[build-system] [build-system]
requires = ["poetry-core"] requires = ["poetry-core"]
build-backend = "poetry.core.masonry.api" build-backend = "poetry.core.masonry.api"

View file

@ -50,7 +50,6 @@ doc/CQMode.md
doc/ChangePasswordDto.md doc/ChangePasswordDto.md
doc/CheckExistingAssetsDto.md doc/CheckExistingAssetsDto.md
doc/CheckExistingAssetsResponseDto.md doc/CheckExistingAssetsResponseDto.md
doc/ClassificationConfig.md
doc/Colorspace.md doc/Colorspace.md
doc/CreateAlbumDto.md doc/CreateAlbumDto.md
doc/CreateLibraryDto.md doc/CreateLibraryDto.md
@ -244,7 +243,6 @@ lib/model/bulk_ids_dto.dart
lib/model/change_password_dto.dart lib/model/change_password_dto.dart
lib/model/check_existing_assets_dto.dart lib/model/check_existing_assets_dto.dart
lib/model/check_existing_assets_response_dto.dart lib/model/check_existing_assets_response_dto.dart
lib/model/classification_config.dart
lib/model/clip_config.dart lib/model/clip_config.dart
lib/model/clip_mode.dart lib/model/clip_mode.dart
lib/model/colorspace.dart lib/model/colorspace.dart
@ -408,7 +406,6 @@ test/bulk_ids_dto_test.dart
test/change_password_dto_test.dart test/change_password_dto_test.dart
test/check_existing_assets_dto_test.dart test/check_existing_assets_dto_test.dart
test/check_existing_assets_response_dto_test.dart test/check_existing_assets_response_dto_test.dart
test/classification_config_test.dart
test/clip_config_test.dart test/clip_config_test.dart
test/clip_mode_test.dart test/clip_mode_test.dart
test/colorspace_test.dart test/colorspace_test.dart

BIN
mobile/openapi/README.md generated

Binary file not shown.

Binary file not shown.

Binary file not shown.

Binary file not shown.

Binary file not shown.

Binary file not shown.

Binary file not shown.

Binary file not shown.

Binary file not shown.

Binary file not shown.

Binary file not shown.

Binary file not shown.

Binary file not shown.

Binary file not shown.

Binary file not shown.

View file

@ -6479,9 +6479,6 @@
"migration": { "migration": {
"$ref": "#/components/schemas/JobStatusDto" "$ref": "#/components/schemas/JobStatusDto"
}, },
"objectTagging": {
"$ref": "#/components/schemas/JobStatusDto"
},
"recognizeFaces": { "recognizeFaces": {
"$ref": "#/components/schemas/JobStatusDto" "$ref": "#/components/schemas/JobStatusDto"
}, },
@ -6508,7 +6505,6 @@
"thumbnailGeneration", "thumbnailGeneration",
"metadataExtraction", "metadataExtraction",
"videoConversion", "videoConversion",
"objectTagging",
"smartSearch", "smartSearch",
"storageTemplateMigration", "storageTemplateMigration",
"migration", "migration",
@ -7201,28 +7197,6 @@
], ],
"type": "object" "type": "object"
}, },
"ClassificationConfig": {
"properties": {
"enabled": {
"type": "boolean"
},
"minScore": {
"type": "integer"
},
"modelName": {
"type": "string"
},
"modelType": {
"$ref": "#/components/schemas/ModelType"
}
},
"required": [
"minScore",
"enabled",
"modelName"
],
"type": "object"
},
"Colorspace": { "Colorspace": {
"enum": [ "enum": [
"srgb", "srgb",
@ -7819,7 +7793,6 @@
"thumbnailGeneration", "thumbnailGeneration",
"metadataExtraction", "metadataExtraction",
"videoConversion", "videoConversion",
"objectTagging",
"recognizeFaces", "recognizeFaces",
"smartSearch", "smartSearch",
"backgroundTask", "backgroundTask",
@ -8090,7 +8063,6 @@
}, },
"ModelType": { "ModelType": {
"enum": [ "enum": [
"image-classification",
"facial-recognition", "facial-recognition",
"clip" "clip"
], ],
@ -8674,9 +8646,6 @@
"sidecar": { "sidecar": {
"type": "boolean" "type": "boolean"
}, },
"tagImage": {
"type": "boolean"
},
"trash": { "trash": {
"type": "boolean" "type": "boolean"
} }
@ -8692,8 +8661,7 @@
"oauthAutoLaunch", "oauthAutoLaunch",
"passwordLogin", "passwordLogin",
"sidecar", "sidecar",
"search", "search"
"tagImage"
], ],
"type": "object" "type": "object"
}, },
@ -9191,9 +9159,6 @@
"migration": { "migration": {
"$ref": "#/components/schemas/JobSettingsDto" "$ref": "#/components/schemas/JobSettingsDto"
}, },
"objectTagging": {
"$ref": "#/components/schemas/JobSettingsDto"
},
"recognizeFaces": { "recognizeFaces": {
"$ref": "#/components/schemas/JobSettingsDto" "$ref": "#/components/schemas/JobSettingsDto"
}, },
@ -9220,7 +9185,6 @@
"thumbnailGeneration", "thumbnailGeneration",
"metadataExtraction", "metadataExtraction",
"videoConversion", "videoConversion",
"objectTagging",
"smartSearch", "smartSearch",
"storageTemplateMigration", "storageTemplateMigration",
"migration", "migration",
@ -9275,9 +9239,6 @@
}, },
"SystemConfigMachineLearningDto": { "SystemConfigMachineLearningDto": {
"properties": { "properties": {
"classification": {
"$ref": "#/components/schemas/ClassificationConfig"
},
"clip": { "clip": {
"$ref": "#/components/schemas/CLIPConfig" "$ref": "#/components/schemas/CLIPConfig"
}, },
@ -9294,7 +9255,6 @@
"required": [ "required": [
"enabled", "enabled",
"url", "url",
"classification",
"clip", "clip",
"facialRecognition" "facialRecognition"
], ],

View file

@ -2,7 +2,6 @@ export enum QueueName {
THUMBNAIL_GENERATION = 'thumbnailGeneration', THUMBNAIL_GENERATION = 'thumbnailGeneration',
METADATA_EXTRACTION = 'metadataExtraction', METADATA_EXTRACTION = 'metadataExtraction',
VIDEO_CONVERSION = 'videoConversion', VIDEO_CONVERSION = 'videoConversion',
OBJECT_TAGGING = 'objectTagging',
RECOGNIZE_FACES = 'recognizeFaces', RECOGNIZE_FACES = 'recognizeFaces',
SMART_SEARCH = 'smartSearch', SMART_SEARCH = 'smartSearch',
BACKGROUND_TASK = 'backgroundTask', BACKGROUND_TASK = 'backgroundTask',
@ -55,10 +54,6 @@ export enum JobName {
MIGRATE_ASSET = 'migrate-asset', MIGRATE_ASSET = 'migrate-asset',
MIGRATE_PERSON = 'migrate-person', MIGRATE_PERSON = 'migrate-person',
// object tagging
QUEUE_OBJECT_TAGGING = 'queue-object-tagging',
CLASSIFY_IMAGE = 'classify-image',
// facial recognition // facial recognition
PERSON_CLEANUP = 'person-cleanup', PERSON_CLEANUP = 'person-cleanup',
PERSON_DELETE = 'person-delete', PERSON_DELETE = 'person-delete',
@ -126,10 +121,6 @@ export const JOBS_TO_QUEUE: Record<JobName, QueueName> = {
[JobName.MIGRATE_ASSET]: QueueName.MIGRATION, [JobName.MIGRATE_ASSET]: QueueName.MIGRATION,
[JobName.MIGRATE_PERSON]: QueueName.MIGRATION, [JobName.MIGRATE_PERSON]: QueueName.MIGRATION,
// object tagging
[JobName.QUEUE_OBJECT_TAGGING]: QueueName.OBJECT_TAGGING,
[JobName.CLASSIFY_IMAGE]: QueueName.OBJECT_TAGGING,
// facial recognition // facial recognition
[JobName.QUEUE_RECOGNIZE_FACES]: QueueName.RECOGNIZE_FACES, [JobName.QUEUE_RECOGNIZE_FACES]: QueueName.RECOGNIZE_FACES,
[JobName.RECOGNIZE_FACES]: QueueName.RECOGNIZE_FACES, [JobName.RECOGNIZE_FACES]: QueueName.RECOGNIZE_FACES,

View file

@ -59,9 +59,6 @@ export class AllJobStatusResponseDto implements Record<QueueName, JobStatusDto>
@ApiProperty({ type: JobStatusDto }) @ApiProperty({ type: JobStatusDto })
[QueueName.VIDEO_CONVERSION]!: JobStatusDto; [QueueName.VIDEO_CONVERSION]!: JobStatusDto;
@ApiProperty({ type: JobStatusDto })
[QueueName.OBJECT_TAGGING]!: JobStatusDto;
@ApiProperty({ type: JobStatusDto }) @ApiProperty({ type: JobStatusDto })
[QueueName.SMART_SEARCH]!: JobStatusDto; [QueueName.SMART_SEARCH]!: JobStatusDto;

View file

@ -99,7 +99,6 @@ describe(JobService.name, () => {
[QueueName.BACKGROUND_TASK]: expectedJobStatus, [QueueName.BACKGROUND_TASK]: expectedJobStatus,
[QueueName.SMART_SEARCH]: expectedJobStatus, [QueueName.SMART_SEARCH]: expectedJobStatus,
[QueueName.METADATA_EXTRACTION]: expectedJobStatus, [QueueName.METADATA_EXTRACTION]: expectedJobStatus,
[QueueName.OBJECT_TAGGING]: expectedJobStatus,
[QueueName.SEARCH]: expectedJobStatus, [QueueName.SEARCH]: expectedJobStatus,
[QueueName.STORAGE_TEMPLATE_MIGRATION]: expectedJobStatus, [QueueName.STORAGE_TEMPLATE_MIGRATION]: expectedJobStatus,
[QueueName.MIGRATION]: expectedJobStatus, [QueueName.MIGRATION]: expectedJobStatus,
@ -157,17 +156,6 @@ describe(JobService.name, () => {
expect(jobMock.queue).toHaveBeenCalledWith({ name: JobName.STORAGE_TEMPLATE_MIGRATION }); expect(jobMock.queue).toHaveBeenCalledWith({ name: JobName.STORAGE_TEMPLATE_MIGRATION });
}); });
it('should handle a start object tagging command', async () => {
jobMock.getQueueStatus.mockResolvedValue({ isActive: false, isPaused: false });
configMock.load.mockResolvedValue([
{ key: SystemConfigKey.MACHINE_LEARNING_CLASSIFICATION_ENABLED, value: true },
]);
await sut.handleCommand(QueueName.OBJECT_TAGGING, { command: JobCommand.START, force: false });
expect(jobMock.queue).toHaveBeenCalledWith({ name: JobName.QUEUE_OBJECT_TAGGING, data: { force: false } });
});
it('should handle a start clip encoding command', async () => { it('should handle a start clip encoding command', async () => {
jobMock.getQueueStatus.mockResolvedValue({ isActive: false, isPaused: false }); jobMock.getQueueStatus.mockResolvedValue({ isActive: false, isPaused: false });
@ -234,7 +222,6 @@ describe(JobService.name, () => {
[QueueName.BACKGROUND_TASK]: { concurrency: 10 }, [QueueName.BACKGROUND_TASK]: { concurrency: 10 },
[QueueName.SMART_SEARCH]: { concurrency: 10 }, [QueueName.SMART_SEARCH]: { concurrency: 10 },
[QueueName.METADATA_EXTRACTION]: { concurrency: 10 }, [QueueName.METADATA_EXTRACTION]: { concurrency: 10 },
[QueueName.OBJECT_TAGGING]: { concurrency: 10 },
[QueueName.RECOGNIZE_FACES]: { concurrency: 10 }, [QueueName.RECOGNIZE_FACES]: { concurrency: 10 },
[QueueName.SEARCH]: { concurrency: 10 }, [QueueName.SEARCH]: { concurrency: 10 },
[QueueName.SIDECAR]: { concurrency: 10 }, [QueueName.SIDECAR]: { concurrency: 10 },
@ -249,7 +236,6 @@ describe(JobService.name, () => {
expect(jobMock.setConcurrency).toHaveBeenCalledWith(QueueName.BACKGROUND_TASK, 10); expect(jobMock.setConcurrency).toHaveBeenCalledWith(QueueName.BACKGROUND_TASK, 10);
expect(jobMock.setConcurrency).toHaveBeenCalledWith(QueueName.SMART_SEARCH, 10); expect(jobMock.setConcurrency).toHaveBeenCalledWith(QueueName.SMART_SEARCH, 10);
expect(jobMock.setConcurrency).toHaveBeenCalledWith(QueueName.METADATA_EXTRACTION, 10); expect(jobMock.setConcurrency).toHaveBeenCalledWith(QueueName.METADATA_EXTRACTION, 10);
expect(jobMock.setConcurrency).toHaveBeenCalledWith(QueueName.OBJECT_TAGGING, 10);
expect(jobMock.setConcurrency).toHaveBeenCalledWith(QueueName.RECOGNIZE_FACES, 10); expect(jobMock.setConcurrency).toHaveBeenCalledWith(QueueName.RECOGNIZE_FACES, 10);
expect(jobMock.setConcurrency).toHaveBeenCalledWith(QueueName.SIDECAR, 10); expect(jobMock.setConcurrency).toHaveBeenCalledWith(QueueName.SIDECAR, 10);
expect(jobMock.setConcurrency).toHaveBeenCalledWith(QueueName.LIBRARY, 10); expect(jobMock.setConcurrency).toHaveBeenCalledWith(QueueName.LIBRARY, 10);
@ -292,7 +278,6 @@ describe(JobService.name, () => {
item: { name: JobName.GENERATE_JPEG_THUMBNAIL, data: { id: 'asset-1' } }, item: { name: JobName.GENERATE_JPEG_THUMBNAIL, data: { id: 'asset-1' } },
jobs: [ jobs: [
JobName.GENERATE_WEBP_THUMBNAIL, JobName.GENERATE_WEBP_THUMBNAIL,
JobName.CLASSIFY_IMAGE,
JobName.ENCODE_CLIP, JobName.ENCODE_CLIP,
JobName.RECOGNIZE_FACES, JobName.RECOGNIZE_FACES,
JobName.GENERATE_THUMBHASH_THUMBNAIL, JobName.GENERATE_THUMBHASH_THUMBNAIL,
@ -302,7 +287,6 @@ describe(JobService.name, () => {
item: { name: JobName.GENERATE_JPEG_THUMBNAIL, data: { id: 'asset-1', source: 'upload' } }, item: { name: JobName.GENERATE_JPEG_THUMBNAIL, data: { id: 'asset-1', source: 'upload' } },
jobs: [ jobs: [
JobName.GENERATE_WEBP_THUMBNAIL, JobName.GENERATE_WEBP_THUMBNAIL,
JobName.CLASSIFY_IMAGE,
JobName.ENCODE_CLIP, JobName.ENCODE_CLIP,
JobName.RECOGNIZE_FACES, JobName.RECOGNIZE_FACES,
JobName.GENERATE_THUMBHASH_THUMBNAIL, JobName.GENERATE_THUMBHASH_THUMBNAIL,
@ -312,7 +296,6 @@ describe(JobService.name, () => {
{ {
item: { name: JobName.GENERATE_JPEG_THUMBNAIL, data: { id: 'asset-live-image', source: 'upload' } }, item: { name: JobName.GENERATE_JPEG_THUMBNAIL, data: { id: 'asset-live-image', source: 'upload' } },
jobs: [ jobs: [
JobName.CLASSIFY_IMAGE,
JobName.GENERATE_WEBP_THUMBNAIL, JobName.GENERATE_WEBP_THUMBNAIL,
JobName.RECOGNIZE_FACES, JobName.RECOGNIZE_FACES,
JobName.GENERATE_THUMBHASH_THUMBNAIL, JobName.GENERATE_THUMBHASH_THUMBNAIL,
@ -320,10 +303,6 @@ describe(JobService.name, () => {
JobName.VIDEO_CONVERSION, JobName.VIDEO_CONVERSION,
], ],
}, },
{
item: { name: JobName.CLASSIFY_IMAGE, data: { id: 'asset-1' } },
jobs: [],
},
{ {
item: { name: JobName.ENCODE_CLIP, data: { id: 'asset-1' } }, item: { name: JobName.ENCODE_CLIP, data: { id: 'asset-1' } },
jobs: [], jobs: [],
@ -371,11 +350,6 @@ describe(JobService.name, () => {
feature: FeatureFlag.CLIP_ENCODE, feature: FeatureFlag.CLIP_ENCODE,
configKey: SystemConfigKey.MACHINE_LEARNING_CLIP_ENABLED, configKey: SystemConfigKey.MACHINE_LEARNING_CLIP_ENABLED,
}, },
{
queue: QueueName.OBJECT_TAGGING,
feature: FeatureFlag.TAG_IMAGE,
configKey: SystemConfigKey.MACHINE_LEARNING_CLASSIFICATION_ENABLED,
},
{ {
queue: QueueName.RECOGNIZE_FACES, queue: QueueName.RECOGNIZE_FACES,
feature: FeatureFlag.FACIAL_RECOGNITION, feature: FeatureFlag.FACIAL_RECOGNITION,

View file

@ -94,10 +94,6 @@ export class JobService {
case QueueName.MIGRATION: case QueueName.MIGRATION:
return this.jobRepository.queue({ name: JobName.QUEUE_MIGRATION }); return this.jobRepository.queue({ name: JobName.QUEUE_MIGRATION });
case QueueName.OBJECT_TAGGING:
await this.configCore.requireFeature(FeatureFlag.TAG_IMAGE);
return this.jobRepository.queue({ name: JobName.QUEUE_OBJECT_TAGGING, data: { force } });
case QueueName.SMART_SEARCH: case QueueName.SMART_SEARCH:
await this.configCore.requireFeature(FeatureFlag.CLIP_ENCODE); await this.configCore.requireFeature(FeatureFlag.CLIP_ENCODE);
return this.jobRepository.queue({ name: JobName.QUEUE_ENCODE_CLIP, data: { force } }); return this.jobRepository.queue({ name: JobName.QUEUE_ENCODE_CLIP, data: { force } });
@ -209,7 +205,6 @@ export class JobService {
case JobName.GENERATE_JPEG_THUMBNAIL: { case JobName.GENERATE_JPEG_THUMBNAIL: {
await this.jobRepository.queue({ name: JobName.GENERATE_WEBP_THUMBNAIL, data: item.data }); await this.jobRepository.queue({ name: JobName.GENERATE_WEBP_THUMBNAIL, data: item.data });
await this.jobRepository.queue({ name: JobName.GENERATE_THUMBHASH_THUMBNAIL, data: item.data }); await this.jobRepository.queue({ name: JobName.GENERATE_THUMBHASH_THUMBNAIL, data: item.data });
await this.jobRepository.queue({ name: JobName.CLASSIFY_IMAGE, data: item.data });
await this.jobRepository.queue({ name: JobName.ENCODE_CLIP, data: item.data }); await this.jobRepository.queue({ name: JobName.ENCODE_CLIP, data: item.data });
await this.jobRepository.queue({ name: JobName.RECOGNIZE_FACES, data: item.data }); await this.jobRepository.queue({ name: JobName.RECOGNIZE_FACES, data: item.data });

View file

@ -62,10 +62,6 @@ export type JobItem =
| { name: JobName.SIDECAR_SYNC; data: IEntityJob } | { name: JobName.SIDECAR_SYNC; data: IEntityJob }
| { name: JobName.SIDECAR_WRITE; data: ISidecarWriteJob } | { name: JobName.SIDECAR_WRITE; data: ISidecarWriteJob }
// Object Tagging
| { name: JobName.QUEUE_OBJECT_TAGGING; data: IBaseJob }
| { name: JobName.CLASSIFY_IMAGE; data: IEntityJob }
// Recognize Faces // Recognize Faces
| { name: JobName.QUEUE_RECOGNIZE_FACES; data: IBaseJob } | { name: JobName.QUEUE_RECOGNIZE_FACES; data: IBaseJob }
| { name: JobName.RECOGNIZE_FACES; data: IEntityJob } | { name: JobName.RECOGNIZE_FACES; data: IEntityJob }

View file

@ -1,4 +1,4 @@
import { ClassificationConfig, CLIPConfig, RecognitionConfig } from '../smart-info/dto'; import { CLIPConfig, RecognitionConfig } from '../smart-info/dto';
export const IMachineLearningRepository = 'IMachineLearningRepository'; export const IMachineLearningRepository = 'IMachineLearningRepository';
@ -26,7 +26,6 @@ export interface DetectFaceResult {
} }
export enum ModelType { export enum ModelType {
IMAGE_CLASSIFICATION = 'image-classification',
FACIAL_RECOGNITION = 'facial-recognition', FACIAL_RECOGNITION = 'facial-recognition',
CLIP = 'clip', CLIP = 'clip',
} }
@ -37,7 +36,6 @@ export enum CLIPMode {
} }
export interface IMachineLearningRepository { export interface IMachineLearningRepository {
classifyImage(url: string, input: VisionModelInput, config: ClassificationConfig): Promise<string[]>;
encodeImage(url: string, input: VisionModelInput, config: CLIPConfig): Promise<number[]>; encodeImage(url: string, input: VisionModelInput, config: CLIPConfig): Promise<number[]>;
encodeText(url: string, input: TextModelInput, config: CLIPConfig): Promise<number[]>; encodeText(url: string, input: TextModelInput, config: CLIPConfig): Promise<number[]>;
detectFaces(url: string, input: VisionModelInput, config: RecognitionConfig): Promise<DetectFaceResult[]>; detectFaces(url: string, input: VisionModelInput, config: RecognitionConfig): Promise<DetectFaceResult[]>;

View file

@ -100,5 +100,4 @@ export class ServerFeaturesDto implements FeatureFlags {
passwordLogin!: boolean; passwordLogin!: boolean;
sidecar!: boolean; sidecar!: boolean;
search!: boolean; search!: boolean;
tagImage!: boolean;
} }

View file

@ -171,7 +171,6 @@ describe(ServerInfoService.name, () => {
passwordLogin: true, passwordLogin: true,
search: true, search: true,
sidecar: true, sidecar: true,
tagImage: false,
configFile: false, configFile: false,
trash: true, trash: true,
}); });

View file

@ -18,15 +18,6 @@ export class ModelConfig {
modelType?: ModelType; modelType?: ModelType;
} }
export class ClassificationConfig extends ModelConfig {
@IsNumber()
@Min(0)
@Max(1)
@Type(() => Number)
@ApiProperty({ type: 'integer' })
minScore!: number;
}
export class CLIPConfig extends ModelConfig { export class CLIPConfig extends ModelConfig {
@IsEnum(CLIPMode) @IsEnum(CLIPMode)
@Optional() @Optional()

View file

@ -47,107 +47,6 @@ describe(SmartInfoService.name, () => {
expect(sut).toBeDefined(); expect(sut).toBeDefined();
}); });
describe('handleQueueObjectTagging', () => {
beforeEach(async () => {
configMock.load.mockResolvedValue([
{ key: SystemConfigKey.MACHINE_LEARNING_CLASSIFICATION_ENABLED, value: true },
]);
});
it('should do nothing if machine learning is disabled', async () => {
configMock.load.mockResolvedValue([{ key: SystemConfigKey.MACHINE_LEARNING_ENABLED, value: false }]);
await sut.handleQueueObjectTagging({});
expect(assetMock.getAll).not.toHaveBeenCalled();
expect(assetMock.getWithout).not.toHaveBeenCalled();
});
it('should queue the assets without tags', async () => {
configMock.load.mockResolvedValue([
{ key: SystemConfigKey.MACHINE_LEARNING_CLASSIFICATION_ENABLED, value: true },
]);
assetMock.getWithout.mockResolvedValue({
items: [assetStub.image],
hasNextPage: false,
});
await sut.handleQueueObjectTagging({ force: false });
expect(jobMock.queue.mock.calls).toEqual([[{ name: JobName.CLASSIFY_IMAGE, data: { id: assetStub.image.id } }]]);
expect(assetMock.getWithout).toHaveBeenCalledWith({ skip: 0, take: 1000 }, WithoutProperty.OBJECT_TAGS);
});
it('should queue all the assets', async () => {
configMock.load.mockResolvedValue([
{ key: SystemConfigKey.MACHINE_LEARNING_CLASSIFICATION_ENABLED, value: true },
]);
assetMock.getAll.mockResolvedValue({
items: [assetStub.image],
hasNextPage: false,
});
await sut.handleQueueObjectTagging({ force: true });
expect(jobMock.queue.mock.calls).toEqual([[{ name: JobName.CLASSIFY_IMAGE, data: { id: assetStub.image.id } }]]);
expect(assetMock.getAll).toHaveBeenCalled();
});
});
describe('handleClassifyImage', () => {
it('should do nothing if machine learning is disabled', async () => {
configMock.load.mockResolvedValue([{ key: SystemConfigKey.MACHINE_LEARNING_ENABLED, value: false }]);
await sut.handleClassifyImage({ id: '123' });
expect(machineMock.classifyImage).not.toHaveBeenCalled();
expect(assetMock.getByIds).not.toHaveBeenCalled();
});
it('should skip assets without a resize path', async () => {
const asset = { resizePath: '' } as AssetEntity;
assetMock.getByIds.mockResolvedValue([asset]);
await sut.handleClassifyImage({ id: asset.id });
expect(smartMock.upsert).not.toHaveBeenCalled();
expect(machineMock.classifyImage).not.toHaveBeenCalled();
});
it('should save the returned tags', async () => {
configMock.load.mockResolvedValue([
{ key: SystemConfigKey.MACHINE_LEARNING_CLASSIFICATION_ENABLED, value: true },
]);
machineMock.classifyImage.mockResolvedValue(['tag1', 'tag2', 'tag3']);
await sut.handleClassifyImage({ id: asset.id });
expect(machineMock.classifyImage).toHaveBeenCalledWith(
'http://immich-machine-learning:3003',
{
imagePath: 'path/to/resize.ext',
},
{ enabled: true, minScore: 0.9, modelName: 'microsoft/resnet-50' },
);
expect(smartMock.upsert).toHaveBeenCalledWith({
assetId: 'asset-1',
tags: ['tag1', 'tag2', 'tag3'],
});
});
it('should always overwrite old tags', async () => {
configMock.load.mockResolvedValue([
{ key: SystemConfigKey.MACHINE_LEARNING_CLASSIFICATION_ENABLED, value: true },
]);
machineMock.classifyImage.mockResolvedValue([]);
await sut.handleClassifyImage({ id: asset.id });
expect(machineMock.classifyImage).toHaveBeenCalled();
expect(smartMock.upsert).toHaveBeenCalled();
});
});
describe('handleQueueEncodeClip', () => { describe('handleQueueEncodeClip', () => {
it('should do nothing if machine learning is disabled', async () => { it('should do nothing if machine learning is disabled', async () => {
configMock.load.mockResolvedValue([{ key: SystemConfigKey.MACHINE_LEARNING_ENABLED, value: false }]); configMock.load.mockResolvedValue([{ key: SystemConfigKey.MACHINE_LEARNING_ENABLED, value: false }]);

View file

@ -46,48 +46,6 @@ export class SmartInfoService {
await this.jobRepository.resume(QueueName.SMART_SEARCH); await this.jobRepository.resume(QueueName.SMART_SEARCH);
} }
async handleQueueObjectTagging({ force }: IBaseJob) {
const { machineLearning } = await this.configCore.getConfig();
if (!machineLearning.enabled || !machineLearning.classification.enabled) {
return true;
}
const assetPagination = usePagination(JOBS_ASSET_PAGINATION_SIZE, (pagination) => {
return force
? this.assetRepository.getAll(pagination)
: this.assetRepository.getWithout(pagination, WithoutProperty.OBJECT_TAGS);
});
for await (const assets of assetPagination) {
for (const asset of assets) {
await this.jobRepository.queue({ name: JobName.CLASSIFY_IMAGE, data: { id: asset.id } });
}
}
return true;
}
async handleClassifyImage({ id }: IEntityJob) {
const { machineLearning } = await this.configCore.getConfig();
if (!machineLearning.enabled || !machineLearning.classification.enabled) {
return true;
}
const [asset] = await this.assetRepository.getByIds([id]);
if (!asset.resizePath) {
return false;
}
const tags = await this.machineLearning.classifyImage(
machineLearning.url,
{ imagePath: asset.resizePath },
machineLearning.classification,
);
await this.repository.upsert({ assetId: asset.id, tags });
return true;
}
async handleQueueEncodeClip({ force }: IBaseJob) { async handleQueueEncodeClip({ force }: IBaseJob) {
const { machineLearning } = await this.configCore.getConfig(); const { machineLearning } = await this.configCore.getConfig();
if (!machineLearning.enabled || !machineLearning.clip.enabled) { if (!machineLearning.enabled || !machineLearning.clip.enabled) {

View file

@ -29,12 +29,6 @@ export class SystemConfigJobDto implements Record<QueueName, JobSettingsDto> {
@Type(() => JobSettingsDto) @Type(() => JobSettingsDto)
[QueueName.VIDEO_CONVERSION]!: JobSettingsDto; [QueueName.VIDEO_CONVERSION]!: JobSettingsDto;
@ApiProperty({ type: JobSettingsDto })
@ValidateNested()
@IsObject()
@Type(() => JobSettingsDto)
[QueueName.OBJECT_TAGGING]!: JobSettingsDto;
@ApiProperty({ type: JobSettingsDto }) @ApiProperty({ type: JobSettingsDto })
@ValidateNested() @ValidateNested()
@IsObject() @IsObject()

View file

@ -1,4 +1,4 @@
import { ClassificationConfig, CLIPConfig, RecognitionConfig } from '@app/domain'; import { CLIPConfig, RecognitionConfig } from '@app/domain';
import { Type } from 'class-transformer'; import { Type } from 'class-transformer';
import { IsBoolean, IsObject, IsUrl, ValidateIf, ValidateNested } from 'class-validator'; import { IsBoolean, IsObject, IsUrl, ValidateIf, ValidateNested } from 'class-validator';
@ -10,11 +10,6 @@ export class SystemConfigMachineLearningDto {
@ValidateIf((dto) => dto.enabled) @ValidateIf((dto) => dto.enabled)
url!: string; url!: string;
@Type(() => ClassificationConfig)
@ValidateNested()
@IsObject()
classification!: ClassificationConfig;
@Type(() => CLIPConfig) @Type(() => CLIPConfig)
@ValidateNested() @ValidateNested()
@IsObject() @IsObject()

View file

@ -49,7 +49,6 @@ export const defaults = Object.freeze<SystemConfig>({
[QueueName.BACKGROUND_TASK]: { concurrency: 5 }, [QueueName.BACKGROUND_TASK]: { concurrency: 5 },
[QueueName.SMART_SEARCH]: { concurrency: 2 }, [QueueName.SMART_SEARCH]: { concurrency: 2 },
[QueueName.METADATA_EXTRACTION]: { concurrency: 5 }, [QueueName.METADATA_EXTRACTION]: { concurrency: 5 },
[QueueName.OBJECT_TAGGING]: { concurrency: 2 },
[QueueName.RECOGNIZE_FACES]: { concurrency: 2 }, [QueueName.RECOGNIZE_FACES]: { concurrency: 2 },
[QueueName.SEARCH]: { concurrency: 5 }, [QueueName.SEARCH]: { concurrency: 5 },
[QueueName.SIDECAR]: { concurrency: 5 }, [QueueName.SIDECAR]: { concurrency: 5 },
@ -66,11 +65,6 @@ export const defaults = Object.freeze<SystemConfig>({
machineLearning: { machineLearning: {
enabled: process.env.IMMICH_MACHINE_LEARNING_ENABLED !== 'false', enabled: process.env.IMMICH_MACHINE_LEARNING_ENABLED !== 'false',
url: process.env.IMMICH_MACHINE_LEARNING_URL || 'http://immich-machine-learning:3003', url: process.env.IMMICH_MACHINE_LEARNING_URL || 'http://immich-machine-learning:3003',
classification: {
enabled: false,
modelName: 'microsoft/resnet-50',
minScore: 0.9,
},
clip: { clip: {
enabled: true, enabled: true,
modelName: 'ViT-B-32__openai', modelName: 'ViT-B-32__openai',
@ -137,7 +131,6 @@ export const defaults = Object.freeze<SystemConfig>({
export enum FeatureFlag { export enum FeatureFlag {
CLIP_ENCODE = 'clipEncode', CLIP_ENCODE = 'clipEncode',
FACIAL_RECOGNITION = 'facialRecognition', FACIAL_RECOGNITION = 'facialRecognition',
TAG_IMAGE = 'tagImage',
MAP = 'map', MAP = 'map',
REVERSE_GEOCODING = 'reverseGeocoding', REVERSE_GEOCODING = 'reverseGeocoding',
SIDECAR = 'sidecar', SIDECAR = 'sidecar',
@ -182,8 +175,6 @@ export class SystemConfigCore {
throw new BadRequestException('Clip encoding is not enabled'); throw new BadRequestException('Clip encoding is not enabled');
case FeatureFlag.FACIAL_RECOGNITION: case FeatureFlag.FACIAL_RECOGNITION:
throw new BadRequestException('Facial recognition is not enabled'); throw new BadRequestException('Facial recognition is not enabled');
case FeatureFlag.TAG_IMAGE:
throw new BadRequestException('Image tagging is not enabled');
case FeatureFlag.SIDECAR: case FeatureFlag.SIDECAR:
throw new BadRequestException('Sidecar is not enabled'); throw new BadRequestException('Sidecar is not enabled');
case FeatureFlag.SEARCH: case FeatureFlag.SEARCH:
@ -212,7 +203,6 @@ export class SystemConfigCore {
return { return {
[FeatureFlag.CLIP_ENCODE]: mlEnabled && config.machineLearning.clip.enabled, [FeatureFlag.CLIP_ENCODE]: mlEnabled && config.machineLearning.clip.enabled,
[FeatureFlag.FACIAL_RECOGNITION]: mlEnabled && config.machineLearning.facialRecognition.enabled, [FeatureFlag.FACIAL_RECOGNITION]: mlEnabled && config.machineLearning.facialRecognition.enabled,
[FeatureFlag.TAG_IMAGE]: mlEnabled && config.machineLearning.classification.enabled,
[FeatureFlag.MAP]: config.map.enabled, [FeatureFlag.MAP]: config.map.enabled,
[FeatureFlag.REVERSE_GEOCODING]: config.reverseGeocoding.enabled, [FeatureFlag.REVERSE_GEOCODING]: config.reverseGeocoding.enabled,
[FeatureFlag.SIDECAR]: true, [FeatureFlag.SIDECAR]: true,
@ -245,10 +235,7 @@ export class SystemConfigCore {
_.set(config, key, value); _.set(config, key, value);
} }
const errors = await validate(plainToInstance(SystemConfigDto, config), { const errors = await validate(plainToInstance(SystemConfigDto, config));
forbidNonWhitelisted: true,
forbidUnknownValues: true,
});
if (errors.length > 0) { if (errors.length > 0) {
this.logger.error('Validation error', errors); this.logger.error('Validation error', errors);
if (configFilePath) { if (configFilePath) {
@ -334,13 +321,13 @@ export class SystemConfigCore {
} }
if (!_.isEmpty(file)) { if (!_.isEmpty(file)) {
throw new Error(`Unknown keys found: ${JSON.stringify(file)}`); this.logger.warn(`Unknown keys found: ${JSON.stringify(file, null, 2)}`);
} }
this.configCache = overrides; this.configCache = overrides;
} catch (error: Error | any) { } catch (error: Error | any) {
this.logger.error(`Unable to load configuration file: ${filepath} due to ${error}`, error?.stack); this.logger.error(`Unable to load configuration file: ${filepath}`);
throw new Error('Invalid configuration file'); throw error;
} }
} }

View file

@ -11,6 +11,7 @@ import {
TranscodePolicy, TranscodePolicy,
VideoCodec, VideoCodec,
} from '@app/infra/entities'; } from '@app/infra/entities';
import { ImmichLogger } from '@app/infra/logger';
import { BadRequestException } from '@nestjs/common'; import { BadRequestException } from '@nestjs/common';
import { newCommunicationRepositoryMock, newSystemConfigRepositoryMock } from '@test'; import { newCommunicationRepositoryMock, newSystemConfigRepositoryMock } from '@test';
import { QueueName } from '../job'; import { QueueName } from '../job';
@ -29,7 +30,6 @@ const updatedConfig = Object.freeze<SystemConfig>({
[QueueName.BACKGROUND_TASK]: { concurrency: 5 }, [QueueName.BACKGROUND_TASK]: { concurrency: 5 },
[QueueName.SMART_SEARCH]: { concurrency: 2 }, [QueueName.SMART_SEARCH]: { concurrency: 2 },
[QueueName.METADATA_EXTRACTION]: { concurrency: 5 }, [QueueName.METADATA_EXTRACTION]: { concurrency: 5 },
[QueueName.OBJECT_TAGGING]: { concurrency: 2 },
[QueueName.RECOGNIZE_FACES]: { concurrency: 2 }, [QueueName.RECOGNIZE_FACES]: { concurrency: 2 },
[QueueName.SEARCH]: { concurrency: 5 }, [QueueName.SEARCH]: { concurrency: 5 },
[QueueName.SIDECAR]: { concurrency: 5 }, [QueueName.SIDECAR]: { concurrency: 5 },
@ -65,11 +65,6 @@ const updatedConfig = Object.freeze<SystemConfig>({
machineLearning: { machineLearning: {
enabled: true, enabled: true,
url: 'http://immich-machine-learning:3003', url: 'http://immich-machine-learning:3003',
classification: {
enabled: false,
modelName: 'microsoft/resnet-50',
minScore: 0.9,
},
clip: { clip: {
enabled: true, enabled: true,
modelName: 'ViT-B-32__openai', modelName: 'ViT-B-32__openai',
@ -169,6 +164,16 @@ describe(SystemConfigService.name, () => {
}); });
describe('getConfig', () => { describe('getConfig', () => {
let warnLog: jest.SpyInstance;
beforeEach(() => {
warnLog = jest.spyOn(ImmichLogger.prototype, 'warn');
});
afterEach(() => {
warnLog.mockRestore();
});
it('should return the default config', async () => { it('should return the default config', async () => {
configMock.load.mockResolvedValue([]); configMock.load.mockResolvedValue([]);
@ -217,9 +222,9 @@ describe(SystemConfigService.name, () => {
{ should: 'validate numbers', config: { ffmpeg: { crf: 'not-a-number' } } }, { should: 'validate numbers', config: { ffmpeg: { crf: 'not-a-number' } } },
{ should: 'validate booleans', config: { oauth: { enabled: 'invalid' } } }, { should: 'validate booleans', config: { oauth: { enabled: 'invalid' } } },
{ should: 'validate enums', config: { ffmpeg: { transcode: 'unknown' } } }, { should: 'validate enums', config: { ffmpeg: { transcode: 'unknown' } } },
{ should: 'validate top level unknown options', config: { unknownOption: true } },
{ should: 'validate nested unknown options', config: { ffmpeg: { unknownOption: true } } },
{ should: 'validate required oauth fields', config: { oauth: { enabled: true } } }, { should: 'validate required oauth fields', config: { oauth: { enabled: true } } },
{ should: 'warn for top level unknown options', warn: true, config: { unknownOption: true } },
{ should: 'warn for nested unknown options', warn: true, config: { ffmpeg: { unknownOption: true } } },
]; ];
for (const test of tests) { for (const test of tests) {
@ -227,7 +232,12 @@ describe(SystemConfigService.name, () => {
process.env.IMMICH_CONFIG_FILE = 'immich-config.json'; process.env.IMMICH_CONFIG_FILE = 'immich-config.json';
configMock.readFile.mockResolvedValue(JSON.stringify(test.config)); configMock.readFile.mockResolvedValue(JSON.stringify(test.config));
await expect(sut.getConfig()).rejects.toBeInstanceOf(Error); if (test.warn) {
await sut.getConfig();
expect(warnLog).toHaveBeenCalled();
} else {
await expect(sut.getConfig()).rejects.toBeInstanceOf(Error);
}
}); });
} }
}); });

View file

@ -35,7 +35,6 @@ export enum SystemConfigKey {
JOB_THUMBNAIL_GENERATION_CONCURRENCY = 'job.thumbnailGeneration.concurrency', JOB_THUMBNAIL_GENERATION_CONCURRENCY = 'job.thumbnailGeneration.concurrency',
JOB_METADATA_EXTRACTION_CONCURRENCY = 'job.metadataExtraction.concurrency', JOB_METADATA_EXTRACTION_CONCURRENCY = 'job.metadataExtraction.concurrency',
JOB_VIDEO_CONVERSION_CONCURRENCY = 'job.videoConversion.concurrency', JOB_VIDEO_CONVERSION_CONCURRENCY = 'job.videoConversion.concurrency',
JOB_OBJECT_TAGGING_CONCURRENCY = 'job.objectTagging.concurrency',
JOB_RECOGNIZE_FACES_CONCURRENCY = 'job.recognizeFaces.concurrency', JOB_RECOGNIZE_FACES_CONCURRENCY = 'job.recognizeFaces.concurrency',
JOB_CLIP_ENCODING_CONCURRENCY = 'job.smartSearch.concurrency', JOB_CLIP_ENCODING_CONCURRENCY = 'job.smartSearch.concurrency',
JOB_BACKGROUND_TASK_CONCURRENCY = 'job.backgroundTask.concurrency', JOB_BACKGROUND_TASK_CONCURRENCY = 'job.backgroundTask.concurrency',
@ -54,10 +53,6 @@ export enum SystemConfigKey {
MACHINE_LEARNING_ENABLED = 'machineLearning.enabled', MACHINE_LEARNING_ENABLED = 'machineLearning.enabled',
MACHINE_LEARNING_URL = 'machineLearning.url', MACHINE_LEARNING_URL = 'machineLearning.url',
MACHINE_LEARNING_CLASSIFICATION_ENABLED = 'machineLearning.classification.enabled',
MACHINE_LEARNING_CLASSIFICATION_MODEL_NAME = 'machineLearning.classification.modelName',
MACHINE_LEARNING_CLASSIFICATION_MIN_SCORE = 'machineLearning.classification.minScore',
MACHINE_LEARNING_CLIP_ENABLED = 'machineLearning.clip.enabled', MACHINE_LEARNING_CLIP_ENABLED = 'machineLearning.clip.enabled',
MACHINE_LEARNING_CLIP_MODEL_NAME = 'machineLearning.clip.modelName', MACHINE_LEARNING_CLIP_MODEL_NAME = 'machineLearning.clip.modelName',
@ -184,11 +179,6 @@ export interface SystemConfig {
machineLearning: { machineLearning: {
enabled: boolean; enabled: boolean;
url: string; url: string;
classification: {
enabled: boolean;
modelName: string;
minScore: number;
};
clip: { clip: {
enabled: boolean; enabled: boolean;
modelName: string; modelName: string;

View file

@ -1,5 +1,4 @@
import { import {
ClassificationConfig,
CLIPConfig, CLIPConfig,
CLIPMode, CLIPMode,
DetectFaceResult, DetectFaceResult,
@ -27,10 +26,6 @@ export class MachineLearningRepository implements IMachineLearningRepository {
return res.json(); return res.json();
} }
classifyImage(url: string, input: VisionModelInput, config: ClassificationConfig): Promise<string[]> {
return this.post<string[]>(url, input, { ...config, modelType: ModelType.IMAGE_CLASSIFICATION });
}
detectFaces(url: string, input: VisionModelInput, config: RecognitionConfig): Promise<DetectFaceResult[]> { detectFaces(url: string, input: VisionModelInput, config: RecognitionConfig): Promise<DetectFaceResult[]> {
return this.post<DetectFaceResult[]>(url, input, { ...config, modelType: ModelType.FACIAL_RECOGNITION }); return this.post<DetectFaceResult[]>(url, input, { ...config, modelType: ModelType.FACIAL_RECOGNITION });
} }

View file

@ -42,8 +42,6 @@ export class AppService {
[JobName.CLEAN_OLD_AUDIT_LOGS]: () => this.auditService.handleCleanup(), [JobName.CLEAN_OLD_AUDIT_LOGS]: () => this.auditService.handleCleanup(),
[JobName.USER_DELETE_CHECK]: () => this.userService.handleUserDeleteCheck(), [JobName.USER_DELETE_CHECK]: () => this.userService.handleUserDeleteCheck(),
[JobName.USER_DELETION]: (data) => this.userService.handleUserDelete(data), [JobName.USER_DELETION]: (data) => this.userService.handleUserDelete(data),
[JobName.QUEUE_OBJECT_TAGGING]: (data) => this.smartInfoService.handleQueueObjectTagging(data),
[JobName.CLASSIFY_IMAGE]: (data) => this.smartInfoService.handleClassifyImage(data),
[JobName.QUEUE_ENCODE_CLIP]: (data) => this.smartInfoService.handleQueueEncodeClip(data), [JobName.QUEUE_ENCODE_CLIP]: (data) => this.smartInfoService.handleQueueEncodeClip(data),
[JobName.ENCODE_CLIP]: (data) => this.smartInfoService.handleEncodeClip(data), [JobName.ENCODE_CLIP]: (data) => this.smartInfoService.handleEncodeClip(data),
[JobName.STORAGE_TEMPLATE_MIGRATION]: () => this.storageTemplateService.handleMigration(), [JobName.STORAGE_TEMPLATE_MIGRATION]: () => this.storageTemplateService.handleMigration(),

View file

@ -83,7 +83,6 @@ describe(`${ServerInfoController.name} (e2e)`, () => {
passwordLogin: true, passwordLogin: true,
search: true, search: true,
sidecar: true, sidecar: true,
tagImage: false,
trash: true, trash: true,
}); });
}); });

View file

@ -2,7 +2,6 @@ import { IMachineLearningRepository } from '@app/domain';
export const newMachineLearningRepositoryMock = (): jest.Mocked<IMachineLearningRepository> => { export const newMachineLearningRepositoryMock = (): jest.Mocked<IMachineLearningRepository> => {
return { return {
classifyImage: jest.fn(),
encodeImage: jest.fn(), encodeImage: jest.fn(),
encodeText: jest.fn(), encodeText: jest.fn(),
detectFaces: jest.fn(), detectFaces: jest.fn(),

View file

@ -135,7 +135,6 @@ class ImmichApi {
[JobName.ThumbnailGeneration]: 'Generate Thumbnails', [JobName.ThumbnailGeneration]: 'Generate Thumbnails',
[JobName.MetadataExtraction]: 'Extract Metadata', [JobName.MetadataExtraction]: 'Extract Metadata',
[JobName.Sidecar]: 'Sidecar Metadata', [JobName.Sidecar]: 'Sidecar Metadata',
[JobName.ObjectTagging]: 'Tag Objects',
[JobName.SmartSearch]: 'Smart Search', [JobName.SmartSearch]: 'Smart Search',
[JobName.RecognizeFaces]: 'Recognize Faces', [JobName.RecognizeFaces]: 'Recognize Faces',
[JobName.VideoConversion]: 'Transcode Videos', [JobName.VideoConversion]: 'Transcode Videos',

View file

@ -373,12 +373,6 @@ export interface AllJobStatusResponseDto {
* @memberof AllJobStatusResponseDto * @memberof AllJobStatusResponseDto
*/ */
'migration': JobStatusDto; 'migration': JobStatusDto;
/**
*
* @type {JobStatusDto}
* @memberof AllJobStatusResponseDto
*/
'objectTagging': JobStatusDto;
/** /**
* *
* @type {JobStatusDto} * @type {JobStatusDto}
@ -1318,39 +1312,6 @@ export interface CheckExistingAssetsResponseDto {
*/ */
'existingIds': Array<string>; 'existingIds': Array<string>;
} }
/**
*
* @export
* @interface ClassificationConfig
*/
export interface ClassificationConfig {
/**
*
* @type {boolean}
* @memberof ClassificationConfig
*/
'enabled': boolean;
/**
*
* @type {number}
* @memberof ClassificationConfig
*/
'minScore': number;
/**
*
* @type {string}
* @memberof ClassificationConfig
*/
'modelName': string;
/**
*
* @type {ModelType}
* @memberof ClassificationConfig
*/
'modelType'?: ModelType;
}
/** /**
* *
* @export * @export
@ -2015,7 +1976,6 @@ export const JobName = {
ThumbnailGeneration: 'thumbnailGeneration', ThumbnailGeneration: 'thumbnailGeneration',
MetadataExtraction: 'metadataExtraction', MetadataExtraction: 'metadataExtraction',
VideoConversion: 'videoConversion', VideoConversion: 'videoConversion',
ObjectTagging: 'objectTagging',
RecognizeFaces: 'recognizeFaces', RecognizeFaces: 'recognizeFaces',
SmartSearch: 'smartSearch', SmartSearch: 'smartSearch',
BackgroundTask: 'backgroundTask', BackgroundTask: 'backgroundTask',
@ -2358,7 +2318,6 @@ export interface MergePersonDto {
*/ */
export const ModelType = { export const ModelType = {
ImageClassification: 'image-classification',
FacialRecognition: 'facial-recognition', FacialRecognition: 'facial-recognition',
Clip: 'clip' Clip: 'clip'
} as const; } as const;
@ -3139,12 +3098,6 @@ export interface ServerFeaturesDto {
* @memberof ServerFeaturesDto * @memberof ServerFeaturesDto
*/ */
'sidecar': boolean; 'sidecar': boolean;
/**
*
* @type {boolean}
* @memberof ServerFeaturesDto
*/
'tagImage': boolean;
/** /**
* *
* @type {boolean} * @type {boolean}
@ -3803,12 +3756,6 @@ export interface SystemConfigJobDto {
* @memberof SystemConfigJobDto * @memberof SystemConfigJobDto
*/ */
'migration': JobSettingsDto; 'migration': JobSettingsDto;
/**
*
* @type {JobSettingsDto}
* @memberof SystemConfigJobDto
*/
'objectTagging': JobSettingsDto;
/** /**
* *
* @type {JobSettingsDto} * @type {JobSettingsDto}
@ -3911,12 +3858,6 @@ export interface SystemConfigLoggingDto {
* @interface SystemConfigMachineLearningDto * @interface SystemConfigMachineLearningDto
*/ */
export interface SystemConfigMachineLearningDto { export interface SystemConfigMachineLearningDto {
/**
*
* @type {ClassificationConfig}
* @memberof SystemConfigMachineLearningDto
*/
'classification': ClassificationConfig;
/** /**
* *
* @type {CLIPConfig} * @type {CLIPConfig}

View file

@ -15,7 +15,6 @@
mdiImageSearch, mdiImageSearch,
mdiLibraryShelves, mdiLibraryShelves,
mdiTable, mdiTable,
mdiTagMultiple,
mdiVideo, mdiVideo,
} from '@mdi/js'; } from '@mdi/js';
import ConfirmDialogue from '../../shared-components/confirm-dialogue.svelte'; import ConfirmDialogue from '../../shared-components/confirm-dialogue.svelte';
@ -78,13 +77,6 @@
missingText: 'DISCOVER', missingText: 'DISCOVER',
disabled: !$featureFlags.sidecar, disabled: !$featureFlags.sidecar,
}, },
[JobName.ObjectTagging]: {
icon: mdiTagMultiple,
title: api.getJobName(JobName.ObjectTagging),
subtitle:
'Run machine learning on assets to tag objects\nNote that some assets may not have any objects detected',
disabled: !$featureFlags.tagImage,
},
[JobName.SmartSearch]: { [JobName.SmartSearch]: {
icon: mdiImageSearch, icon: mdiImageSearch,
title: api.getJobName(JobName.SmartSearch), title: api.getJobName(JobName.SmartSearch),

View file

@ -22,7 +22,6 @@
JobName.MetadataExtraction, JobName.MetadataExtraction,
JobName.Library, JobName.Library,
JobName.Sidecar, JobName.Sidecar,
JobName.ObjectTagging,
JobName.SmartSearch, JobName.SmartSearch,
JobName.RecognizeFaces, JobName.RecognizeFaces,
JobName.VideoConversion, JobName.VideoConversion,

View file

@ -89,46 +89,6 @@
/> />
</div> </div>
<SettingAccordion title="Image Tagging" subtitle="Tag and classify images with object labels">
<div class="ml-4 mt-4 flex flex-col gap-4">
<SettingSwitch
title="ENABLED"
subtitle="If disabled, images will not be tagged. This affects the Things section in the Explore page as well as 'm:' searches."
bind:checked={machineLearningConfig.classification.enabled}
disabled={disabled || !machineLearningConfig.enabled}
/>
<hr />
<SettingInputField
inputType={SettingInputFieldType.TEXT}
label="IMAGE CLASSIFICATION MODEL"
bind:value={machineLearningConfig.classification.modelName}
required={true}
disabled={disabled || !machineLearningConfig.enabled || !machineLearningConfig.classification.enabled}
isEdited={machineLearningConfig.classification.modelName !== savedConfig.classification.modelName}
>
<p slot="desc" class="immich-form-label pb-2 text-sm">
The name of an image classification model listed <a
href="https://huggingface.co/models?pipeline_tag=image-classification&sort=trending"><u>here</u></a
>. It must be tagged with the 'Image Classification' task and must support ONNX conversion.
</p>
</SettingInputField>
<SettingInputField
inputType={SettingInputFieldType.NUMBER}
label="IMAGE CLASSIFICATION THRESHOLD"
desc="Minimum confidence score to add a particular object tag. Lower values will add more tags to images, but may result in more false positives. Will not have any effect until the Tag Objects job is re-run."
bind:value={machineLearningConfig.classification.minScore}
step="0.1"
min="0"
max="1"
disabled={disabled || !machineLearningConfig.enabled || !machineLearningConfig.classification.enabled}
isEdited={machineLearningConfig.classification.minScore !== savedConfig.classification.minScore}
/>
</div>
</SettingAccordion>
<SettingAccordion title="Smart Search" subtitle="Search for images semantically using CLIP embeddings"> <SettingAccordion title="Smart Search" subtitle="Search for images semantically using CLIP embeddings">
<div class="ml-4 mt-4 flex flex-col gap-4"> <div class="ml-4 mt-4 flex flex-col gap-4">
<SettingSwitch <SettingSwitch

View file

@ -8,7 +8,6 @@ export const featureFlags = writable<FeatureFlags>({
clipEncode: true, clipEncode: true,
facialRecognition: true, facialRecognition: true,
sidecar: true, sidecar: true,
tagImage: true,
map: true, map: true,
reverseGeocoding: true, reverseGeocoding: true,
search: true, search: true,