1
0
Fork 0
mirror of https://github.com/immich-app/immich.git synced 2025-01-16 00:36:47 +01:00

feat(server,ml): remove image tagging (#5903)

* remove image tagging

* updated lock

* fixed tests, improved logging

* be nice

* fixed tests
This commit is contained in:
Mert 2023-12-20 20:47:56 -05:00 committed by GitHub
parent 154292242f
commit 092a23fd7f
No known key found for this signature in database
GPG key ID: 4AEE18F83AFDEB23
65 changed files with 984 additions and 2669 deletions

View file

@ -209,7 +209,7 @@ jobs:
poetry run black --check app export
- name: Run mypy type checking
run: |
poetry run mypy --install-types --non-interactive --strict app/ export/
poetry run mypy --install-types --non-interactive --strict app/
- name: Run tests and coverage
run: |
poetry run pytest --cov app

View file

@ -373,12 +373,6 @@ export interface AllJobStatusResponseDto {
* @memberof AllJobStatusResponseDto
*/
'migration': JobStatusDto;
/**
*
* @type {JobStatusDto}
* @memberof AllJobStatusResponseDto
*/
'objectTagging': JobStatusDto;
/**
*
* @type {JobStatusDto}
@ -1318,39 +1312,6 @@ export interface CheckExistingAssetsResponseDto {
*/
'existingIds': Array<string>;
}
/**
*
* @export
* @interface ClassificationConfig
*/
export interface ClassificationConfig {
/**
*
* @type {boolean}
* @memberof ClassificationConfig
*/
'enabled': boolean;
/**
*
* @type {number}
* @memberof ClassificationConfig
*/
'minScore': number;
/**
*
* @type {string}
* @memberof ClassificationConfig
*/
'modelName': string;
/**
*
* @type {ModelType}
* @memberof ClassificationConfig
*/
'modelType'?: ModelType;
}
/**
*
* @export
@ -2015,7 +1976,6 @@ export const JobName = {
ThumbnailGeneration: 'thumbnailGeneration',
MetadataExtraction: 'metadataExtraction',
VideoConversion: 'videoConversion',
ObjectTagging: 'objectTagging',
RecognizeFaces: 'recognizeFaces',
SmartSearch: 'smartSearch',
BackgroundTask: 'backgroundTask',
@ -2358,7 +2318,6 @@ export interface MergePersonDto {
*/
export const ModelType = {
ImageClassification: 'image-classification',
FacialRecognition: 'facial-recognition',
Clip: 'clip'
} as const;
@ -3139,12 +3098,6 @@ export interface ServerFeaturesDto {
* @memberof ServerFeaturesDto
*/
'sidecar': boolean;
/**
*
* @type {boolean}
* @memberof ServerFeaturesDto
*/
'tagImage': boolean;
/**
*
* @type {boolean}
@ -3803,12 +3756,6 @@ export interface SystemConfigJobDto {
* @memberof SystemConfigJobDto
*/
'migration': JobSettingsDto;
/**
*
* @type {JobSettingsDto}
* @memberof SystemConfigJobDto
*/
'objectTagging': JobSettingsDto;
/**
*
* @type {JobSettingsDto}
@ -3911,12 +3858,6 @@ export interface SystemConfigLoggingDto {
* @interface SystemConfigMachineLearningDto
*/
export interface SystemConfigMachineLearningDto {
/**
*
* @type {ClassificationConfig}
* @memberof SystemConfigMachineLearningDto
*/
'classification': ClassificationConfig;
/**
*
* @type {CLIPConfig}

View file

@ -56,10 +56,6 @@ Template changes will only apply to new assets. To retroactively apply the templ
This is fixed by running the storage migration job.
### Why is object detection not very good?
The default image tagging model is relatively small. You can change this for a larger model like `google/vit-base-patch16-224` by setting the model name under Settings > Machine Learning Settings > Image Tagging. You can then re-run the Image Tagging job to get improved tags.
### Why are there so many thumbnail generation jobs?
Immich generates three thumbnails for each asset (blurred, small, and large), as well as a thumbnail for each recognized face.

View file

@ -73,7 +73,7 @@ The Immich Microservices image uses the same `Dockerfile` as the Immich Server,
- Thumbnail Generation
- Metadata Extraction
- Video Transcoding
- Object Tagging
- Smart Search
- Facial Recognition
- Storage Template Migration
- Sidecar (see [XMP Sidecars](/docs/features/xmp-sidecars.md))

View file

@ -38,9 +38,6 @@ The default configuration looks like this:
"metadataExtraction": {
"concurrency": 5
},
"objectTagging": {
"concurrency": 2
},
"recognizeFaces": {
"concurrency": 2
},
@ -73,11 +70,6 @@ The default configuration looks like this:
"machineLearning": {
"enabled": true,
"url": "http://immich-machine-learning:3003",
"classification": {
"enabled": false,
"modelName": "microsoft/resnet-50",
"minScore": 0.9
},
"clip": {
"enabled": true,
"modelName": "ViT-B-32__openai"

View file

@ -1,6 +1,5 @@
# Immich Machine Learning
- Image classification
- CLIP embeddings
- Facial recognition

View file

@ -59,3 +59,37 @@ def clip_preprocess_cfg() -> dict[str, Any]:
"resize_mode": "shortest",
"fill_color": 0,
}
@pytest.fixture(scope="session")
def clip_tokenizer_cfg() -> dict[str, Any]:
return {
"add_prefix_space": False,
"added_tokens_decoder": {
"49406": {
"content": "<|startoftext|>",
"lstrip": False,
"normalized": True,
"rstrip": False,
"single_word": False,
"special": True,
},
"49407": {
"content": "<|endoftext|>",
"lstrip": False,
"normalized": True,
"rstrip": False,
"single_word": False,
"special": True,
},
},
"bos_token": "<|startoftext|>",
"clean_up_tokenization_spaces": True,
"do_lower_case": True,
"eos_token": "<|endoftext|>",
"errors": "replace",
"model_max_length": 77,
"pad_token": "<|endoftext|>",
"tokenizer_class": "CLIPTokenizer",
"unk_token": "<|endoftext|>",
}

View file

@ -6,7 +6,6 @@ from .base import InferenceModel
from .clip import MCLIPEncoder, OpenCLIPEncoder
from .constants import is_insightface, is_mclip, is_openclip
from .facial_recognition import FaceRecognizer
from .image_classification import ImageClassifier
def from_model_type(model_type: ModelType, model_name: str, **model_kwargs: Any) -> InferenceModel:
@ -19,8 +18,6 @@ def from_model_type(model_type: ModelType, model_name: str, **model_kwargs: Any)
case ModelType.FACIAL_RECOGNITION:
if is_insightface(model_name):
return FaceRecognizer(model_name, **model_kwargs)
case ModelType.IMAGE_CLASSIFICATION:
return ImageClassifier(model_name, **model_kwargs)
case _:
raise ValueError(f"Unknown model type {model_type}")

View file

@ -35,7 +35,7 @@ class InferenceModel(ABC):
)
log.debug(
(
f"Setting '{self.model_name}' execution providers to {self.providers}"
f"Setting '{self.model_name}' execution providers to {self.providers} "
"in descending order of preference"
),
)
@ -55,7 +55,7 @@ class InferenceModel(ABC):
def download(self) -> None:
if not self.cached:
log.info(
(f"Downloading {self.model_type.replace('-', ' ')} model '{self.model_name}'." "This may take a while.")
f"Downloading {self.model_type.replace('-', ' ')} model '{self.model_name}'. This may take a while."
)
self._download()
@ -63,7 +63,7 @@ class InferenceModel(ABC):
if self.loaded:
return
self.download()
log.info(f"Loading {self.model_type.replace('-', ' ')} model '{self.model_name}'")
log.info(f"Loading {self.model_type.replace('-', ' ')} model '{self.model_name}' to memory")
self._load()
self.loaded = True
@ -119,11 +119,11 @@ class InferenceModel(ABC):
def clear_cache(self) -> None:
if not self.cache_dir.exists():
log.warn(
f"Attempted to clear cache for model '{self.model_name}' but cache directory does not exist.",
f"Attempted to clear cache for model '{self.model_name}', but cache directory does not exist",
)
return
if not rmtree.avoids_symlink_attacks:
raise RuntimeError("Attempted to clear cache, but rmtree is not safe on this platform.")
raise RuntimeError("Attempted to clear cache, but rmtree is not safe on this platform")
if self.cache_dir.is_dir():
log.info(f"Cleared cache directory for model '{self.model_name}'.")

View file

@ -8,11 +8,11 @@ from typing import Any, Literal
import numpy as np
import onnxruntime as ort
from PIL import Image
from transformers import AutoTokenizer
from tokenizers import Encoding, Tokenizer
from app.config import clean_name, log
from app.models.transforms import crop, get_pil_resampling, normalize, resize, to_numpy
from app.schemas import ModelType, ndarray_f32, ndarray_i32, ndarray_i64
from app.schemas import ModelType, ndarray_f32, ndarray_i32
from .base import InferenceModel
@ -40,6 +40,7 @@ class BaseCLIPEncoder(InferenceModel):
providers=self.providers,
provider_options=self.provider_options,
)
log.debug(f"Loaded clip text model '{self.model_name}'")
if self.mode == "vision" or self.mode is None:
log.debug(f"Loading clip vision model '{self.model_name}'")
@ -50,6 +51,7 @@ class BaseCLIPEncoder(InferenceModel):
providers=self.providers,
provider_options=self.provider_options,
)
log.debug(f"Loaded clip vision model '{self.model_name}'")
def _predict(self, image_or_text: Image.Image | str) -> ndarray_f32:
if isinstance(image_or_text, bytes):
@ -99,6 +101,14 @@ class BaseCLIPEncoder(InferenceModel):
def visual_path(self) -> Path:
return self.visual_dir / "model.onnx"
@property
def tokenizer_file_path(self) -> Path:
return self.textual_dir / "tokenizer.json"
@property
def tokenizer_cfg_path(self) -> Path:
return self.textual_dir / "tokenizer_config.json"
@property
def preprocess_cfg_path(self) -> Path:
return self.visual_dir / "preprocess_cfg.json"
@ -107,6 +117,34 @@ class BaseCLIPEncoder(InferenceModel):
def cached(self) -> bool:
return self.textual_path.is_file() and self.visual_path.is_file()
@cached_property
def model_cfg(self) -> dict[str, Any]:
log.debug(f"Loading model config for CLIP model '{self.model_name}'")
model_cfg: dict[str, Any] = json.load(self.model_cfg_path.open())
log.debug(f"Loaded model config for CLIP model '{self.model_name}'")
return model_cfg
@cached_property
def tokenizer_file(self) -> dict[str, Any]:
log.debug(f"Loading tokenizer file for CLIP model '{self.model_name}'")
tokenizer_file: dict[str, Any] = json.load(self.tokenizer_file_path.open())
log.debug(f"Loaded tokenizer file for CLIP model '{self.model_name}'")
return tokenizer_file
@cached_property
def tokenizer_cfg(self) -> dict[str, Any]:
log.debug(f"Loading tokenizer config for CLIP model '{self.model_name}'")
tokenizer_cfg: dict[str, Any] = json.load(self.tokenizer_cfg_path.open())
log.debug(f"Loaded tokenizer config for CLIP model '{self.model_name}'")
return tokenizer_cfg
@cached_property
def preprocess_cfg(self) -> dict[str, Any]:
log.debug(f"Loading visual preprocessing config for CLIP model '{self.model_name}'")
preprocess_cfg: dict[str, Any] = json.load(self.preprocess_cfg_path.open())
log.debug(f"Loaded visual preprocessing config for CLIP model '{self.model_name}'")
return preprocess_cfg
class OpenCLIPEncoder(BaseCLIPEncoder):
def __init__(
@ -121,8 +159,8 @@ class OpenCLIPEncoder(BaseCLIPEncoder):
def _load(self) -> None:
super()._load()
self.tokenizer = AutoTokenizer.from_pretrained(self.textual_dir)
self.sequence_length = self.model_cfg["text_cfg"]["context_length"]
context_length = self.model_cfg["text_cfg"]["context_length"]
pad_token = self.tokenizer_cfg["pad_token"]
self.size = (
self.preprocess_cfg["size"][0] if type(self.preprocess_cfg["size"]) == list else self.preprocess_cfg["size"]
@ -131,16 +169,16 @@ class OpenCLIPEncoder(BaseCLIPEncoder):
self.mean = np.array(self.preprocess_cfg["mean"], dtype=np.float32)
self.std = np.array(self.preprocess_cfg["std"], dtype=np.float32)
log.debug(f"Loading tokenizer for CLIP model '{self.model_name}'")
self.tokenizer: Tokenizer = Tokenizer.from_file(self.tokenizer_file_path.as_posix())
pad_id = self.tokenizer.token_to_id(pad_token)
self.tokenizer.enable_padding(length=context_length, pad_token=pad_token, pad_id=pad_id)
self.tokenizer.enable_truncation(max_length=context_length)
log.debug(f"Loaded tokenizer for CLIP model '{self.model_name}'")
def tokenize(self, text: str) -> dict[str, ndarray_i32]:
input_ids: ndarray_i64 = self.tokenizer(
text,
max_length=self.sequence_length,
return_tensors="np",
return_attention_mask=False,
padding="max_length",
truncation=True,
).input_ids
return {"text": input_ids.astype(np.int32)}
tokens: Encoding = self.tokenizer.encode(text)
return {"text": np.array([tokens.ids], dtype=np.int32)}
def transform(self, image: Image.Image) -> dict[str, ndarray_f32]:
image = resize(image, self.size)
@ -149,18 +187,11 @@ class OpenCLIPEncoder(BaseCLIPEncoder):
image_np = normalize(image_np, self.mean, self.std)
return {"image": np.expand_dims(image_np.transpose(2, 0, 1), 0)}
@cached_property
def model_cfg(self) -> dict[str, Any]:
model_cfg: dict[str, Any] = json.load(self.model_cfg_path.open())
return model_cfg
@cached_property
def preprocess_cfg(self) -> dict[str, Any]:
preprocess_cfg: dict[str, Any] = json.load(self.preprocess_cfg_path.open())
return preprocess_cfg
class MCLIPEncoder(OpenCLIPEncoder):
def tokenize(self, text: str) -> dict[str, ndarray_i32]:
tokens: dict[str, ndarray_i64] = self.tokenizer(text, return_tensors="np")
return {k: v.astype(np.int32) for k, v in tokens.items()}
tokens: Encoding = self.tokenizer.encode(text)
return {
"input_ids": np.array([tokens.ids], dtype=np.int32),
"attention_mask": np.array([tokens.attention_mask], dtype=np.int32),
}

View file

@ -1,75 +0,0 @@
from io import BytesIO
from pathlib import Path
from typing import Any
from huggingface_hub import snapshot_download
from optimum.onnxruntime import ORTModelForImageClassification
from optimum.pipelines import pipeline
from PIL import Image
from transformers import AutoImageProcessor
from ..config import log
from ..schemas import ModelType
from .base import InferenceModel
class ImageClassifier(InferenceModel):
_model_type = ModelType.IMAGE_CLASSIFICATION
def __init__(
self,
model_name: str,
min_score: float = 0.9,
cache_dir: Path | str | None = None,
**model_kwargs: Any,
) -> None:
self.min_score = model_kwargs.pop("minScore", min_score)
super().__init__(model_name, cache_dir, **model_kwargs)
def _download(self) -> None:
snapshot_download(
cache_dir=self.cache_dir,
repo_id=self.model_name,
allow_patterns=["*.bin", "*.json", "*.txt"],
local_dir=self.cache_dir,
local_dir_use_symlinks=True,
)
def _load(self) -> None:
processor = AutoImageProcessor.from_pretrained(self.cache_dir, cache_dir=self.cache_dir)
model_path = self.cache_dir / "model.onnx"
model_kwargs = {
"cache_dir": self.cache_dir,
"provider": self.providers[0],
"provider_options": self.provider_options[0],
"session_options": self.sess_options,
}
if model_path.exists():
model = ORTModelForImageClassification.from_pretrained(self.cache_dir, **model_kwargs)
self.model = pipeline(self.model_type.value, model, feature_extractor=processor)
else:
log.info(
(
f"ONNX model not found in cache directory for '{self.model_name}'."
"Exporting optimized model for future use."
),
)
self.sess_options.optimized_model_filepath = model_path.as_posix()
self.model = pipeline(
self.model_type.value,
self.model_name,
model_kwargs=model_kwargs,
feature_extractor=processor,
)
def _predict(self, image: Image.Image | bytes) -> list[str]:
if isinstance(image, bytes):
image = Image.open(BytesIO(image))
predictions: list[dict[str, Any]] = self.model(image)
tags = [tag for pred in predictions for tag in pred["label"].split(", ") if pred["score"] >= self.min_score]
return tags
def configure(self, **model_kwargs: Any) -> None:
self.min_score = model_kwargs.pop("minScore", self.min_score)

View file

@ -25,7 +25,6 @@ class BoundingBox(TypedDict):
class ModelType(StrEnum):
IMAGE_CLASSIFICATION = "image-classification"
CLIP = "clip"
FACIAL_RECOGNITION = "facial-recognition"

View file

@ -17,42 +17,9 @@ from .models.base import PicklableSessionOptions
from .models.cache import ModelCache
from .models.clip import OpenCLIPEncoder
from .models.facial_recognition import FaceRecognizer
from .models.image_classification import ImageClassifier
from .schemas import ModelType
class TestImageClassifier:
classifier_preds = [
{"label": "that's an image alright", "score": 0.8},
{"label": "well it ends with .jpg", "score": 0.1},
{"label": "idk, im just seeing bytes", "score": 0.05},
{"label": "not sure", "score": 0.04},
{"label": "probably a virus", "score": 0.01},
]
def test_min_score(self, pil_image: Image.Image, mocker: MockerFixture) -> None:
mocker.patch.object(ImageClassifier, "load")
classifier = ImageClassifier("test_model_name", min_score=0.0)
assert classifier.min_score == 0.0
classifier.model = mock.Mock()
classifier.model.return_value = self.classifier_preds
all_labels = classifier.predict(pil_image)
classifier.min_score = 0.5
filtered_labels = classifier.predict(pil_image)
assert all_labels == [
"that's an image alright",
"well it ends with .jpg",
"idk",
"im just seeing bytes",
"not sure",
"probably a virus",
]
assert filtered_labels == ["that's an image alright"]
class TestCLIP:
embedding = np.random.rand(512).astype(np.float32)
cache_dir = Path("test_cache")
@ -63,11 +30,13 @@ class TestCLIP:
mocker: MockerFixture,
clip_model_cfg: dict[str, Any],
clip_preprocess_cfg: Callable[[Path], dict[str, Any]],
clip_tokenizer_cfg: Callable[[Path], dict[str, Any]],
) -> None:
mocker.patch.object(OpenCLIPEncoder, "download")
mocker.patch.object(OpenCLIPEncoder, "model_cfg", clip_model_cfg)
mocker.patch.object(OpenCLIPEncoder, "preprocess_cfg", clip_preprocess_cfg)
mocker.patch("app.models.clip.AutoTokenizer.from_pretrained", autospec=True)
mocker.patch.object(OpenCLIPEncoder, "tokenizer_cfg", clip_tokenizer_cfg)
mocker.patch("app.models.clip.Tokenizer.from_file", autospec=True)
mocked = mocker.patch("app.models.clip.ort.InferenceSession", autospec=True)
mocked.return_value.run.return_value = [[self.embedding]]
@ -85,11 +54,13 @@ class TestCLIP:
mocker: MockerFixture,
clip_model_cfg: dict[str, Any],
clip_preprocess_cfg: Callable[[Path], dict[str, Any]],
clip_tokenizer_cfg: Callable[[Path], dict[str, Any]],
) -> None:
mocker.patch.object(OpenCLIPEncoder, "download")
mocker.patch.object(OpenCLIPEncoder, "model_cfg", clip_model_cfg)
mocker.patch.object(OpenCLIPEncoder, "preprocess_cfg", clip_preprocess_cfg)
mocker.patch("app.models.clip.AutoTokenizer.from_pretrained", autospec=True)
mocker.patch.object(OpenCLIPEncoder, "tokenizer_cfg", clip_tokenizer_cfg)
mocker.patch("app.models.clip.Tokenizer.from_file", autospec=True)
mocked = mocker.patch("app.models.clip.ort.InferenceSession", autospec=True)
mocked.return_value.run.return_value = [[self.embedding]]
@ -145,17 +116,15 @@ class TestFaceRecognition:
class TestCache:
async def test_caches(self, mock_get_model: mock.Mock) -> None:
model_cache = ModelCache()
await model_cache.get("test_model_name", ModelType.IMAGE_CLASSIFICATION)
await model_cache.get("test_model_name", ModelType.IMAGE_CLASSIFICATION)
await model_cache.get("test_model_name", ModelType.FACIAL_RECOGNITION)
await model_cache.get("test_model_name", ModelType.FACIAL_RECOGNITION)
assert len(model_cache.cache._cache) == 1
mock_get_model.assert_called_once()
async def test_kwargs_used(self, mock_get_model: mock.Mock) -> None:
model_cache = ModelCache()
await model_cache.get("test_model_name", ModelType.IMAGE_CLASSIFICATION, cache_dir="test_cache")
mock_get_model.assert_called_once_with(
ModelType.IMAGE_CLASSIFICATION, "test_model_name", cache_dir="test_cache"
)
await model_cache.get("test_model_name", ModelType.FACIAL_RECOGNITION, cache_dir="test_cache")
mock_get_model.assert_called_once_with(ModelType.FACIAL_RECOGNITION, "test_model_name", cache_dir="test_cache")
async def test_different_clip(self, mock_get_model: mock.Mock) -> None:
model_cache = ModelCache()
@ -172,14 +141,14 @@ class TestCache:
@mock.patch("app.models.cache.OptimisticLock", autospec=True)
async def test_model_ttl(self, mock_lock_cls: mock.Mock, mock_get_model: mock.Mock) -> None:
model_cache = ModelCache(ttl=100)
await model_cache.get("test_model_name", ModelType.IMAGE_CLASSIFICATION)
await model_cache.get("test_model_name", ModelType.FACIAL_RECOGNITION)
mock_lock_cls.return_value.__aenter__.return_value.cas.assert_called_with(mock.ANY, ttl=100)
@mock.patch("app.models.cache.SimpleMemoryCache.expire")
async def test_revalidate(self, mock_cache_expire: mock.Mock, mock_get_model: mock.Mock) -> None:
model_cache = ModelCache(ttl=100, revalidate=True)
await model_cache.get("test_model_name", ModelType.IMAGE_CLASSIFICATION)
await model_cache.get("test_model_name", ModelType.IMAGE_CLASSIFICATION)
await model_cache.get("test_model_name", ModelType.FACIAL_RECOGNITION)
await model_cache.get("test_model_name", ModelType.FACIAL_RECOGNITION)
mock_cache_expire.assert_called_once_with(mock.ANY, 100)
@ -188,23 +157,6 @@ class TestCache:
reason="More time-consuming since it deploys the app and loads models.",
)
class TestEndpoints:
def test_tagging_endpoint(
self, pil_image: Image.Image, responses: dict[str, Any], deployed_app: TestClient
) -> None:
byte_image = BytesIO()
pil_image.save(byte_image, format="jpeg")
response = deployed_app.post(
"http://localhost:3003/predict",
data={
"modelName": "microsoft/resnet-50",
"modelType": "image-classification",
"options": json.dumps({"minScore": 0.0}),
},
files={"image": byte_image.getvalue()},
)
assert response.status_code == 200
assert response.json() == responses["image-classification"]
def test_clip_image_endpoint(
self, pil_image: Image.Image, responses: dict[str, Any], deployed_app: TestClient
) -> None:

View file

@ -12,7 +12,6 @@ byte_image = BytesIO()
@events.init_command_line_parser.add_listener
def _(parser: ArgumentParser) -> None:
parser.add_argument("--tag-model", type=str, default="microsoft/resnet-50")
parser.add_argument("--clip-model", type=str, default="ViT-B-32::openai")
parser.add_argument("--face-model", type=str, default="buffalo_l")
parser.add_argument(
@ -54,18 +53,6 @@ class InferenceLoadTest(HttpUser):
self.data = byte_image.getvalue()
class ClassificationFormDataLoadTest(InferenceLoadTest):
@task
def classify(self) -> None:
data = [
("modelName", self.environment.parsed_options.clip_model),
("modelType", "clip"),
("options", json.dumps({"minScore": self.environment.parsed_options.tag_min_score})),
]
files = {"image": self.data}
self.client.post("/predict", data=data, files=files)
class CLIPTextFormDataLoadTest(InferenceLoadTest):
@task
def encode_text(self) -> None:

View file

@ -5,8 +5,7 @@
"handlers": {
"console": {
"class": "app.config.CustomRichHandler",
"formatter": "rich",
"level": "INFO"
"formatter": "rich"
}
},
"loggers": {

File diff suppressed because it is too large Load diff

View file

@ -7,12 +7,7 @@ readme = "README.md"
packages = [{include = "app"}]
[tool.poetry.dependencies]
python = "~3.11"
torch = [
{markers = "platform_machine == 'arm64' or platform_machine == 'aarch64'", version = "=2.1.0", source = "pypi"},
{markers = "platform_machine == 'amd64' or platform_machine == 'x86_64'", version = "=2.1.0", source = "pytorch-cpu"}
]
transformers = "^4.29.2"
python = "=3.11.*"
onnxruntime = "^1.15.0"
insightface = "^0.7.3"
opencv-python-headless = "^4.7.0.72"
@ -21,14 +16,14 @@ fastapi = "^0.95.2"
uvicorn = {extras = ["standard"], version = "^0.22.0"}
pydantic = "^1.10.8"
aiocache = "^0.12.1"
optimum = "^1.9.1"
rich = "^13.4.2"
ftfy = "^6.1.1"
setuptools = "^68.0.0"
python-multipart = "^0.0.6"
orjson = "^3.9.5"
safetensors = "0.3.2"
gunicorn = "^21.1.0"
huggingface-hub = "^0.20.1"
tokenizers = "^0.15.0"
[tool.poetry.group.dev.dependencies]
mypy = "^1.3.0"
@ -41,11 +36,6 @@ pytest-cov = "^4.1.0"
ruff = "^0.0.272"
pytest-mock = "^3.11.1"
[[tool.poetry.source]]
name = "pytorch-cpu"
url = "https://download.pytorch.org/whl/cpu"
priority = "explicit"
[build-system]
requires = ["poetry-core"]
build-backend = "poetry.core.masonry.api"

View file

@ -50,7 +50,6 @@ doc/CQMode.md
doc/ChangePasswordDto.md
doc/CheckExistingAssetsDto.md
doc/CheckExistingAssetsResponseDto.md
doc/ClassificationConfig.md
doc/Colorspace.md
doc/CreateAlbumDto.md
doc/CreateLibraryDto.md
@ -244,7 +243,6 @@ lib/model/bulk_ids_dto.dart
lib/model/change_password_dto.dart
lib/model/check_existing_assets_dto.dart
lib/model/check_existing_assets_response_dto.dart
lib/model/classification_config.dart
lib/model/clip_config.dart
lib/model/clip_mode.dart
lib/model/colorspace.dart
@ -408,7 +406,6 @@ test/bulk_ids_dto_test.dart
test/change_password_dto_test.dart
test/check_existing_assets_dto_test.dart
test/check_existing_assets_response_dto_test.dart
test/classification_config_test.dart
test/clip_config_test.dart
test/clip_mode_test.dart
test/colorspace_test.dart

BIN
mobile/openapi/README.md generated

Binary file not shown.

Binary file not shown.

Binary file not shown.

Binary file not shown.

Binary file not shown.

Binary file not shown.

Binary file not shown.

Binary file not shown.

Binary file not shown.

Binary file not shown.

Binary file not shown.

Binary file not shown.

Binary file not shown.

Binary file not shown.

Binary file not shown.

View file

@ -6479,9 +6479,6 @@
"migration": {
"$ref": "#/components/schemas/JobStatusDto"
},
"objectTagging": {
"$ref": "#/components/schemas/JobStatusDto"
},
"recognizeFaces": {
"$ref": "#/components/schemas/JobStatusDto"
},
@ -6508,7 +6505,6 @@
"thumbnailGeneration",
"metadataExtraction",
"videoConversion",
"objectTagging",
"smartSearch",
"storageTemplateMigration",
"migration",
@ -7201,28 +7197,6 @@
],
"type": "object"
},
"ClassificationConfig": {
"properties": {
"enabled": {
"type": "boolean"
},
"minScore": {
"type": "integer"
},
"modelName": {
"type": "string"
},
"modelType": {
"$ref": "#/components/schemas/ModelType"
}
},
"required": [
"minScore",
"enabled",
"modelName"
],
"type": "object"
},
"Colorspace": {
"enum": [
"srgb",
@ -7819,7 +7793,6 @@
"thumbnailGeneration",
"metadataExtraction",
"videoConversion",
"objectTagging",
"recognizeFaces",
"smartSearch",
"backgroundTask",
@ -8090,7 +8063,6 @@
},
"ModelType": {
"enum": [
"image-classification",
"facial-recognition",
"clip"
],
@ -8674,9 +8646,6 @@
"sidecar": {
"type": "boolean"
},
"tagImage": {
"type": "boolean"
},
"trash": {
"type": "boolean"
}
@ -8692,8 +8661,7 @@
"oauthAutoLaunch",
"passwordLogin",
"sidecar",
"search",
"tagImage"
"search"
],
"type": "object"
},
@ -9191,9 +9159,6 @@
"migration": {
"$ref": "#/components/schemas/JobSettingsDto"
},
"objectTagging": {
"$ref": "#/components/schemas/JobSettingsDto"
},
"recognizeFaces": {
"$ref": "#/components/schemas/JobSettingsDto"
},
@ -9220,7 +9185,6 @@
"thumbnailGeneration",
"metadataExtraction",
"videoConversion",
"objectTagging",
"smartSearch",
"storageTemplateMigration",
"migration",
@ -9275,9 +9239,6 @@
},
"SystemConfigMachineLearningDto": {
"properties": {
"classification": {
"$ref": "#/components/schemas/ClassificationConfig"
},
"clip": {
"$ref": "#/components/schemas/CLIPConfig"
},
@ -9294,7 +9255,6 @@
"required": [
"enabled",
"url",
"classification",
"clip",
"facialRecognition"
],

View file

@ -2,7 +2,6 @@ export enum QueueName {
THUMBNAIL_GENERATION = 'thumbnailGeneration',
METADATA_EXTRACTION = 'metadataExtraction',
VIDEO_CONVERSION = 'videoConversion',
OBJECT_TAGGING = 'objectTagging',
RECOGNIZE_FACES = 'recognizeFaces',
SMART_SEARCH = 'smartSearch',
BACKGROUND_TASK = 'backgroundTask',
@ -55,10 +54,6 @@ export enum JobName {
MIGRATE_ASSET = 'migrate-asset',
MIGRATE_PERSON = 'migrate-person',
// object tagging
QUEUE_OBJECT_TAGGING = 'queue-object-tagging',
CLASSIFY_IMAGE = 'classify-image',
// facial recognition
PERSON_CLEANUP = 'person-cleanup',
PERSON_DELETE = 'person-delete',
@ -126,10 +121,6 @@ export const JOBS_TO_QUEUE: Record<JobName, QueueName> = {
[JobName.MIGRATE_ASSET]: QueueName.MIGRATION,
[JobName.MIGRATE_PERSON]: QueueName.MIGRATION,
// object tagging
[JobName.QUEUE_OBJECT_TAGGING]: QueueName.OBJECT_TAGGING,
[JobName.CLASSIFY_IMAGE]: QueueName.OBJECT_TAGGING,
// facial recognition
[JobName.QUEUE_RECOGNIZE_FACES]: QueueName.RECOGNIZE_FACES,
[JobName.RECOGNIZE_FACES]: QueueName.RECOGNIZE_FACES,

View file

@ -59,9 +59,6 @@ export class AllJobStatusResponseDto implements Record<QueueName, JobStatusDto>
@ApiProperty({ type: JobStatusDto })
[QueueName.VIDEO_CONVERSION]!: JobStatusDto;
@ApiProperty({ type: JobStatusDto })
[QueueName.OBJECT_TAGGING]!: JobStatusDto;
@ApiProperty({ type: JobStatusDto })
[QueueName.SMART_SEARCH]!: JobStatusDto;

View file

@ -99,7 +99,6 @@ describe(JobService.name, () => {
[QueueName.BACKGROUND_TASK]: expectedJobStatus,
[QueueName.SMART_SEARCH]: expectedJobStatus,
[QueueName.METADATA_EXTRACTION]: expectedJobStatus,
[QueueName.OBJECT_TAGGING]: expectedJobStatus,
[QueueName.SEARCH]: expectedJobStatus,
[QueueName.STORAGE_TEMPLATE_MIGRATION]: expectedJobStatus,
[QueueName.MIGRATION]: expectedJobStatus,
@ -157,17 +156,6 @@ describe(JobService.name, () => {
expect(jobMock.queue).toHaveBeenCalledWith({ name: JobName.STORAGE_TEMPLATE_MIGRATION });
});
it('should handle a start object tagging command', async () => {
jobMock.getQueueStatus.mockResolvedValue({ isActive: false, isPaused: false });
configMock.load.mockResolvedValue([
{ key: SystemConfigKey.MACHINE_LEARNING_CLASSIFICATION_ENABLED, value: true },
]);
await sut.handleCommand(QueueName.OBJECT_TAGGING, { command: JobCommand.START, force: false });
expect(jobMock.queue).toHaveBeenCalledWith({ name: JobName.QUEUE_OBJECT_TAGGING, data: { force: false } });
});
it('should handle a start clip encoding command', async () => {
jobMock.getQueueStatus.mockResolvedValue({ isActive: false, isPaused: false });
@ -234,7 +222,6 @@ describe(JobService.name, () => {
[QueueName.BACKGROUND_TASK]: { concurrency: 10 },
[QueueName.SMART_SEARCH]: { concurrency: 10 },
[QueueName.METADATA_EXTRACTION]: { concurrency: 10 },
[QueueName.OBJECT_TAGGING]: { concurrency: 10 },
[QueueName.RECOGNIZE_FACES]: { concurrency: 10 },
[QueueName.SEARCH]: { concurrency: 10 },
[QueueName.SIDECAR]: { concurrency: 10 },
@ -249,7 +236,6 @@ describe(JobService.name, () => {
expect(jobMock.setConcurrency).toHaveBeenCalledWith(QueueName.BACKGROUND_TASK, 10);
expect(jobMock.setConcurrency).toHaveBeenCalledWith(QueueName.SMART_SEARCH, 10);
expect(jobMock.setConcurrency).toHaveBeenCalledWith(QueueName.METADATA_EXTRACTION, 10);
expect(jobMock.setConcurrency).toHaveBeenCalledWith(QueueName.OBJECT_TAGGING, 10);
expect(jobMock.setConcurrency).toHaveBeenCalledWith(QueueName.RECOGNIZE_FACES, 10);
expect(jobMock.setConcurrency).toHaveBeenCalledWith(QueueName.SIDECAR, 10);
expect(jobMock.setConcurrency).toHaveBeenCalledWith(QueueName.LIBRARY, 10);
@ -292,7 +278,6 @@ describe(JobService.name, () => {
item: { name: JobName.GENERATE_JPEG_THUMBNAIL, data: { id: 'asset-1' } },
jobs: [
JobName.GENERATE_WEBP_THUMBNAIL,
JobName.CLASSIFY_IMAGE,
JobName.ENCODE_CLIP,
JobName.RECOGNIZE_FACES,
JobName.GENERATE_THUMBHASH_THUMBNAIL,
@ -302,7 +287,6 @@ describe(JobService.name, () => {
item: { name: JobName.GENERATE_JPEG_THUMBNAIL, data: { id: 'asset-1', source: 'upload' } },
jobs: [
JobName.GENERATE_WEBP_THUMBNAIL,
JobName.CLASSIFY_IMAGE,
JobName.ENCODE_CLIP,
JobName.RECOGNIZE_FACES,
JobName.GENERATE_THUMBHASH_THUMBNAIL,
@ -312,7 +296,6 @@ describe(JobService.name, () => {
{
item: { name: JobName.GENERATE_JPEG_THUMBNAIL, data: { id: 'asset-live-image', source: 'upload' } },
jobs: [
JobName.CLASSIFY_IMAGE,
JobName.GENERATE_WEBP_THUMBNAIL,
JobName.RECOGNIZE_FACES,
JobName.GENERATE_THUMBHASH_THUMBNAIL,
@ -320,10 +303,6 @@ describe(JobService.name, () => {
JobName.VIDEO_CONVERSION,
],
},
{
item: { name: JobName.CLASSIFY_IMAGE, data: { id: 'asset-1' } },
jobs: [],
},
{
item: { name: JobName.ENCODE_CLIP, data: { id: 'asset-1' } },
jobs: [],
@ -371,11 +350,6 @@ describe(JobService.name, () => {
feature: FeatureFlag.CLIP_ENCODE,
configKey: SystemConfigKey.MACHINE_LEARNING_CLIP_ENABLED,
},
{
queue: QueueName.OBJECT_TAGGING,
feature: FeatureFlag.TAG_IMAGE,
configKey: SystemConfigKey.MACHINE_LEARNING_CLASSIFICATION_ENABLED,
},
{
queue: QueueName.RECOGNIZE_FACES,
feature: FeatureFlag.FACIAL_RECOGNITION,

View file

@ -94,10 +94,6 @@ export class JobService {
case QueueName.MIGRATION:
return this.jobRepository.queue({ name: JobName.QUEUE_MIGRATION });
case QueueName.OBJECT_TAGGING:
await this.configCore.requireFeature(FeatureFlag.TAG_IMAGE);
return this.jobRepository.queue({ name: JobName.QUEUE_OBJECT_TAGGING, data: { force } });
case QueueName.SMART_SEARCH:
await this.configCore.requireFeature(FeatureFlag.CLIP_ENCODE);
return this.jobRepository.queue({ name: JobName.QUEUE_ENCODE_CLIP, data: { force } });
@ -209,7 +205,6 @@ export class JobService {
case JobName.GENERATE_JPEG_THUMBNAIL: {
await this.jobRepository.queue({ name: JobName.GENERATE_WEBP_THUMBNAIL, data: item.data });
await this.jobRepository.queue({ name: JobName.GENERATE_THUMBHASH_THUMBNAIL, data: item.data });
await this.jobRepository.queue({ name: JobName.CLASSIFY_IMAGE, data: item.data });
await this.jobRepository.queue({ name: JobName.ENCODE_CLIP, data: item.data });
await this.jobRepository.queue({ name: JobName.RECOGNIZE_FACES, data: item.data });

View file

@ -62,10 +62,6 @@ export type JobItem =
| { name: JobName.SIDECAR_SYNC; data: IEntityJob }
| { name: JobName.SIDECAR_WRITE; data: ISidecarWriteJob }
// Object Tagging
| { name: JobName.QUEUE_OBJECT_TAGGING; data: IBaseJob }
| { name: JobName.CLASSIFY_IMAGE; data: IEntityJob }
// Recognize Faces
| { name: JobName.QUEUE_RECOGNIZE_FACES; data: IBaseJob }
| { name: JobName.RECOGNIZE_FACES; data: IEntityJob }

View file

@ -1,4 +1,4 @@
import { ClassificationConfig, CLIPConfig, RecognitionConfig } from '../smart-info/dto';
import { CLIPConfig, RecognitionConfig } from '../smart-info/dto';
export const IMachineLearningRepository = 'IMachineLearningRepository';
@ -26,7 +26,6 @@ export interface DetectFaceResult {
}
export enum ModelType {
IMAGE_CLASSIFICATION = 'image-classification',
FACIAL_RECOGNITION = 'facial-recognition',
CLIP = 'clip',
}
@ -37,7 +36,6 @@ export enum CLIPMode {
}
export interface IMachineLearningRepository {
classifyImage(url: string, input: VisionModelInput, config: ClassificationConfig): Promise<string[]>;
encodeImage(url: string, input: VisionModelInput, config: CLIPConfig): Promise<number[]>;
encodeText(url: string, input: TextModelInput, config: CLIPConfig): Promise<number[]>;
detectFaces(url: string, input: VisionModelInput, config: RecognitionConfig): Promise<DetectFaceResult[]>;

View file

@ -100,5 +100,4 @@ export class ServerFeaturesDto implements FeatureFlags {
passwordLogin!: boolean;
sidecar!: boolean;
search!: boolean;
tagImage!: boolean;
}

View file

@ -171,7 +171,6 @@ describe(ServerInfoService.name, () => {
passwordLogin: true,
search: true,
sidecar: true,
tagImage: false,
configFile: false,
trash: true,
});

View file

@ -18,15 +18,6 @@ export class ModelConfig {
modelType?: ModelType;
}
export class ClassificationConfig extends ModelConfig {
@IsNumber()
@Min(0)
@Max(1)
@Type(() => Number)
@ApiProperty({ type: 'integer' })
minScore!: number;
}
export class CLIPConfig extends ModelConfig {
@IsEnum(CLIPMode)
@Optional()

View file

@ -47,107 +47,6 @@ describe(SmartInfoService.name, () => {
expect(sut).toBeDefined();
});
describe('handleQueueObjectTagging', () => {
beforeEach(async () => {
configMock.load.mockResolvedValue([
{ key: SystemConfigKey.MACHINE_LEARNING_CLASSIFICATION_ENABLED, value: true },
]);
});
it('should do nothing if machine learning is disabled', async () => {
configMock.load.mockResolvedValue([{ key: SystemConfigKey.MACHINE_LEARNING_ENABLED, value: false }]);
await sut.handleQueueObjectTagging({});
expect(assetMock.getAll).not.toHaveBeenCalled();
expect(assetMock.getWithout).not.toHaveBeenCalled();
});
it('should queue the assets without tags', async () => {
configMock.load.mockResolvedValue([
{ key: SystemConfigKey.MACHINE_LEARNING_CLASSIFICATION_ENABLED, value: true },
]);
assetMock.getWithout.mockResolvedValue({
items: [assetStub.image],
hasNextPage: false,
});
await sut.handleQueueObjectTagging({ force: false });
expect(jobMock.queue.mock.calls).toEqual([[{ name: JobName.CLASSIFY_IMAGE, data: { id: assetStub.image.id } }]]);
expect(assetMock.getWithout).toHaveBeenCalledWith({ skip: 0, take: 1000 }, WithoutProperty.OBJECT_TAGS);
});
it('should queue all the assets', async () => {
configMock.load.mockResolvedValue([
{ key: SystemConfigKey.MACHINE_LEARNING_CLASSIFICATION_ENABLED, value: true },
]);
assetMock.getAll.mockResolvedValue({
items: [assetStub.image],
hasNextPage: false,
});
await sut.handleQueueObjectTagging({ force: true });
expect(jobMock.queue.mock.calls).toEqual([[{ name: JobName.CLASSIFY_IMAGE, data: { id: assetStub.image.id } }]]);
expect(assetMock.getAll).toHaveBeenCalled();
});
});
describe('handleClassifyImage', () => {
it('should do nothing if machine learning is disabled', async () => {
configMock.load.mockResolvedValue([{ key: SystemConfigKey.MACHINE_LEARNING_ENABLED, value: false }]);
await sut.handleClassifyImage({ id: '123' });
expect(machineMock.classifyImage).not.toHaveBeenCalled();
expect(assetMock.getByIds).not.toHaveBeenCalled();
});
it('should skip assets without a resize path', async () => {
const asset = { resizePath: '' } as AssetEntity;
assetMock.getByIds.mockResolvedValue([asset]);
await sut.handleClassifyImage({ id: asset.id });
expect(smartMock.upsert).not.toHaveBeenCalled();
expect(machineMock.classifyImage).not.toHaveBeenCalled();
});
it('should save the returned tags', async () => {
configMock.load.mockResolvedValue([
{ key: SystemConfigKey.MACHINE_LEARNING_CLASSIFICATION_ENABLED, value: true },
]);
machineMock.classifyImage.mockResolvedValue(['tag1', 'tag2', 'tag3']);
await sut.handleClassifyImage({ id: asset.id });
expect(machineMock.classifyImage).toHaveBeenCalledWith(
'http://immich-machine-learning:3003',
{
imagePath: 'path/to/resize.ext',
},
{ enabled: true, minScore: 0.9, modelName: 'microsoft/resnet-50' },
);
expect(smartMock.upsert).toHaveBeenCalledWith({
assetId: 'asset-1',
tags: ['tag1', 'tag2', 'tag3'],
});
});
it('should always overwrite old tags', async () => {
configMock.load.mockResolvedValue([
{ key: SystemConfigKey.MACHINE_LEARNING_CLASSIFICATION_ENABLED, value: true },
]);
machineMock.classifyImage.mockResolvedValue([]);
await sut.handleClassifyImage({ id: asset.id });
expect(machineMock.classifyImage).toHaveBeenCalled();
expect(smartMock.upsert).toHaveBeenCalled();
});
});
describe('handleQueueEncodeClip', () => {
it('should do nothing if machine learning is disabled', async () => {
configMock.load.mockResolvedValue([{ key: SystemConfigKey.MACHINE_LEARNING_ENABLED, value: false }]);

View file

@ -46,48 +46,6 @@ export class SmartInfoService {
await this.jobRepository.resume(QueueName.SMART_SEARCH);
}
async handleQueueObjectTagging({ force }: IBaseJob) {
const { machineLearning } = await this.configCore.getConfig();
if (!machineLearning.enabled || !machineLearning.classification.enabled) {
return true;
}
const assetPagination = usePagination(JOBS_ASSET_PAGINATION_SIZE, (pagination) => {
return force
? this.assetRepository.getAll(pagination)
: this.assetRepository.getWithout(pagination, WithoutProperty.OBJECT_TAGS);
});
for await (const assets of assetPagination) {
for (const asset of assets) {
await this.jobRepository.queue({ name: JobName.CLASSIFY_IMAGE, data: { id: asset.id } });
}
}
return true;
}
async handleClassifyImage({ id }: IEntityJob) {
const { machineLearning } = await this.configCore.getConfig();
if (!machineLearning.enabled || !machineLearning.classification.enabled) {
return true;
}
const [asset] = await this.assetRepository.getByIds([id]);
if (!asset.resizePath) {
return false;
}
const tags = await this.machineLearning.classifyImage(
machineLearning.url,
{ imagePath: asset.resizePath },
machineLearning.classification,
);
await this.repository.upsert({ assetId: asset.id, tags });
return true;
}
async handleQueueEncodeClip({ force }: IBaseJob) {
const { machineLearning } = await this.configCore.getConfig();
if (!machineLearning.enabled || !machineLearning.clip.enabled) {

View file

@ -29,12 +29,6 @@ export class SystemConfigJobDto implements Record<QueueName, JobSettingsDto> {
@Type(() => JobSettingsDto)
[QueueName.VIDEO_CONVERSION]!: JobSettingsDto;
@ApiProperty({ type: JobSettingsDto })
@ValidateNested()
@IsObject()
@Type(() => JobSettingsDto)
[QueueName.OBJECT_TAGGING]!: JobSettingsDto;
@ApiProperty({ type: JobSettingsDto })
@ValidateNested()
@IsObject()

View file

@ -1,4 +1,4 @@
import { ClassificationConfig, CLIPConfig, RecognitionConfig } from '@app/domain';
import { CLIPConfig, RecognitionConfig } from '@app/domain';
import { Type } from 'class-transformer';
import { IsBoolean, IsObject, IsUrl, ValidateIf, ValidateNested } from 'class-validator';
@ -10,11 +10,6 @@ export class SystemConfigMachineLearningDto {
@ValidateIf((dto) => dto.enabled)
url!: string;
@Type(() => ClassificationConfig)
@ValidateNested()
@IsObject()
classification!: ClassificationConfig;
@Type(() => CLIPConfig)
@ValidateNested()
@IsObject()

View file

@ -49,7 +49,6 @@ export const defaults = Object.freeze<SystemConfig>({
[QueueName.BACKGROUND_TASK]: { concurrency: 5 },
[QueueName.SMART_SEARCH]: { concurrency: 2 },
[QueueName.METADATA_EXTRACTION]: { concurrency: 5 },
[QueueName.OBJECT_TAGGING]: { concurrency: 2 },
[QueueName.RECOGNIZE_FACES]: { concurrency: 2 },
[QueueName.SEARCH]: { concurrency: 5 },
[QueueName.SIDECAR]: { concurrency: 5 },
@ -66,11 +65,6 @@ export const defaults = Object.freeze<SystemConfig>({
machineLearning: {
enabled: process.env.IMMICH_MACHINE_LEARNING_ENABLED !== 'false',
url: process.env.IMMICH_MACHINE_LEARNING_URL || 'http://immich-machine-learning:3003',
classification: {
enabled: false,
modelName: 'microsoft/resnet-50',
minScore: 0.9,
},
clip: {
enabled: true,
modelName: 'ViT-B-32__openai',
@ -137,7 +131,6 @@ export const defaults = Object.freeze<SystemConfig>({
export enum FeatureFlag {
CLIP_ENCODE = 'clipEncode',
FACIAL_RECOGNITION = 'facialRecognition',
TAG_IMAGE = 'tagImage',
MAP = 'map',
REVERSE_GEOCODING = 'reverseGeocoding',
SIDECAR = 'sidecar',
@ -182,8 +175,6 @@ export class SystemConfigCore {
throw new BadRequestException('Clip encoding is not enabled');
case FeatureFlag.FACIAL_RECOGNITION:
throw new BadRequestException('Facial recognition is not enabled');
case FeatureFlag.TAG_IMAGE:
throw new BadRequestException('Image tagging is not enabled');
case FeatureFlag.SIDECAR:
throw new BadRequestException('Sidecar is not enabled');
case FeatureFlag.SEARCH:
@ -212,7 +203,6 @@ export class SystemConfigCore {
return {
[FeatureFlag.CLIP_ENCODE]: mlEnabled && config.machineLearning.clip.enabled,
[FeatureFlag.FACIAL_RECOGNITION]: mlEnabled && config.machineLearning.facialRecognition.enabled,
[FeatureFlag.TAG_IMAGE]: mlEnabled && config.machineLearning.classification.enabled,
[FeatureFlag.MAP]: config.map.enabled,
[FeatureFlag.REVERSE_GEOCODING]: config.reverseGeocoding.enabled,
[FeatureFlag.SIDECAR]: true,
@ -245,10 +235,7 @@ export class SystemConfigCore {
_.set(config, key, value);
}
const errors = await validate(plainToInstance(SystemConfigDto, config), {
forbidNonWhitelisted: true,
forbidUnknownValues: true,
});
const errors = await validate(plainToInstance(SystemConfigDto, config));
if (errors.length > 0) {
this.logger.error('Validation error', errors);
if (configFilePath) {
@ -334,13 +321,13 @@ export class SystemConfigCore {
}
if (!_.isEmpty(file)) {
throw new Error(`Unknown keys found: ${JSON.stringify(file)}`);
this.logger.warn(`Unknown keys found: ${JSON.stringify(file, null, 2)}`);
}
this.configCache = overrides;
} catch (error: Error | any) {
this.logger.error(`Unable to load configuration file: ${filepath} due to ${error}`, error?.stack);
throw new Error('Invalid configuration file');
this.logger.error(`Unable to load configuration file: ${filepath}`);
throw error;
}
}

View file

@ -11,6 +11,7 @@ import {
TranscodePolicy,
VideoCodec,
} from '@app/infra/entities';
import { ImmichLogger } from '@app/infra/logger';
import { BadRequestException } from '@nestjs/common';
import { newCommunicationRepositoryMock, newSystemConfigRepositoryMock } from '@test';
import { QueueName } from '../job';
@ -29,7 +30,6 @@ const updatedConfig = Object.freeze<SystemConfig>({
[QueueName.BACKGROUND_TASK]: { concurrency: 5 },
[QueueName.SMART_SEARCH]: { concurrency: 2 },
[QueueName.METADATA_EXTRACTION]: { concurrency: 5 },
[QueueName.OBJECT_TAGGING]: { concurrency: 2 },
[QueueName.RECOGNIZE_FACES]: { concurrency: 2 },
[QueueName.SEARCH]: { concurrency: 5 },
[QueueName.SIDECAR]: { concurrency: 5 },
@ -65,11 +65,6 @@ const updatedConfig = Object.freeze<SystemConfig>({
machineLearning: {
enabled: true,
url: 'http://immich-machine-learning:3003',
classification: {
enabled: false,
modelName: 'microsoft/resnet-50',
minScore: 0.9,
},
clip: {
enabled: true,
modelName: 'ViT-B-32__openai',
@ -169,6 +164,16 @@ describe(SystemConfigService.name, () => {
});
describe('getConfig', () => {
let warnLog: jest.SpyInstance;
beforeEach(() => {
warnLog = jest.spyOn(ImmichLogger.prototype, 'warn');
});
afterEach(() => {
warnLog.mockRestore();
});
it('should return the default config', async () => {
configMock.load.mockResolvedValue([]);
@ -217,9 +222,9 @@ describe(SystemConfigService.name, () => {
{ should: 'validate numbers', config: { ffmpeg: { crf: 'not-a-number' } } },
{ should: 'validate booleans', config: { oauth: { enabled: 'invalid' } } },
{ should: 'validate enums', config: { ffmpeg: { transcode: 'unknown' } } },
{ should: 'validate top level unknown options', config: { unknownOption: true } },
{ should: 'validate nested unknown options', config: { ffmpeg: { unknownOption: true } } },
{ should: 'validate required oauth fields', config: { oauth: { enabled: true } } },
{ should: 'warn for top level unknown options', warn: true, config: { unknownOption: true } },
{ should: 'warn for nested unknown options', warn: true, config: { ffmpeg: { unknownOption: true } } },
];
for (const test of tests) {
@ -227,7 +232,12 @@ describe(SystemConfigService.name, () => {
process.env.IMMICH_CONFIG_FILE = 'immich-config.json';
configMock.readFile.mockResolvedValue(JSON.stringify(test.config));
await expect(sut.getConfig()).rejects.toBeInstanceOf(Error);
if (test.warn) {
await sut.getConfig();
expect(warnLog).toHaveBeenCalled();
} else {
await expect(sut.getConfig()).rejects.toBeInstanceOf(Error);
}
});
}
});

View file

@ -35,7 +35,6 @@ export enum SystemConfigKey {
JOB_THUMBNAIL_GENERATION_CONCURRENCY = 'job.thumbnailGeneration.concurrency',
JOB_METADATA_EXTRACTION_CONCURRENCY = 'job.metadataExtraction.concurrency',
JOB_VIDEO_CONVERSION_CONCURRENCY = 'job.videoConversion.concurrency',
JOB_OBJECT_TAGGING_CONCURRENCY = 'job.objectTagging.concurrency',
JOB_RECOGNIZE_FACES_CONCURRENCY = 'job.recognizeFaces.concurrency',
JOB_CLIP_ENCODING_CONCURRENCY = 'job.smartSearch.concurrency',
JOB_BACKGROUND_TASK_CONCURRENCY = 'job.backgroundTask.concurrency',
@ -54,10 +53,6 @@ export enum SystemConfigKey {
MACHINE_LEARNING_ENABLED = 'machineLearning.enabled',
MACHINE_LEARNING_URL = 'machineLearning.url',
MACHINE_LEARNING_CLASSIFICATION_ENABLED = 'machineLearning.classification.enabled',
MACHINE_LEARNING_CLASSIFICATION_MODEL_NAME = 'machineLearning.classification.modelName',
MACHINE_LEARNING_CLASSIFICATION_MIN_SCORE = 'machineLearning.classification.minScore',
MACHINE_LEARNING_CLIP_ENABLED = 'machineLearning.clip.enabled',
MACHINE_LEARNING_CLIP_MODEL_NAME = 'machineLearning.clip.modelName',
@ -184,11 +179,6 @@ export interface SystemConfig {
machineLearning: {
enabled: boolean;
url: string;
classification: {
enabled: boolean;
modelName: string;
minScore: number;
};
clip: {
enabled: boolean;
modelName: string;

View file

@ -1,5 +1,4 @@
import {
ClassificationConfig,
CLIPConfig,
CLIPMode,
DetectFaceResult,
@ -27,10 +26,6 @@ export class MachineLearningRepository implements IMachineLearningRepository {
return res.json();
}
classifyImage(url: string, input: VisionModelInput, config: ClassificationConfig): Promise<string[]> {
return this.post<string[]>(url, input, { ...config, modelType: ModelType.IMAGE_CLASSIFICATION });
}
detectFaces(url: string, input: VisionModelInput, config: RecognitionConfig): Promise<DetectFaceResult[]> {
return this.post<DetectFaceResult[]>(url, input, { ...config, modelType: ModelType.FACIAL_RECOGNITION });
}

View file

@ -42,8 +42,6 @@ export class AppService {
[JobName.CLEAN_OLD_AUDIT_LOGS]: () => this.auditService.handleCleanup(),
[JobName.USER_DELETE_CHECK]: () => this.userService.handleUserDeleteCheck(),
[JobName.USER_DELETION]: (data) => this.userService.handleUserDelete(data),
[JobName.QUEUE_OBJECT_TAGGING]: (data) => this.smartInfoService.handleQueueObjectTagging(data),
[JobName.CLASSIFY_IMAGE]: (data) => this.smartInfoService.handleClassifyImage(data),
[JobName.QUEUE_ENCODE_CLIP]: (data) => this.smartInfoService.handleQueueEncodeClip(data),
[JobName.ENCODE_CLIP]: (data) => this.smartInfoService.handleEncodeClip(data),
[JobName.STORAGE_TEMPLATE_MIGRATION]: () => this.storageTemplateService.handleMigration(),

View file

@ -83,7 +83,6 @@ describe(`${ServerInfoController.name} (e2e)`, () => {
passwordLogin: true,
search: true,
sidecar: true,
tagImage: false,
trash: true,
});
});

View file

@ -2,7 +2,6 @@ import { IMachineLearningRepository } from '@app/domain';
export const newMachineLearningRepositoryMock = (): jest.Mocked<IMachineLearningRepository> => {
return {
classifyImage: jest.fn(),
encodeImage: jest.fn(),
encodeText: jest.fn(),
detectFaces: jest.fn(),

View file

@ -135,7 +135,6 @@ class ImmichApi {
[JobName.ThumbnailGeneration]: 'Generate Thumbnails',
[JobName.MetadataExtraction]: 'Extract Metadata',
[JobName.Sidecar]: 'Sidecar Metadata',
[JobName.ObjectTagging]: 'Tag Objects',
[JobName.SmartSearch]: 'Smart Search',
[JobName.RecognizeFaces]: 'Recognize Faces',
[JobName.VideoConversion]: 'Transcode Videos',

View file

@ -373,12 +373,6 @@ export interface AllJobStatusResponseDto {
* @memberof AllJobStatusResponseDto
*/
'migration': JobStatusDto;
/**
*
* @type {JobStatusDto}
* @memberof AllJobStatusResponseDto
*/
'objectTagging': JobStatusDto;
/**
*
* @type {JobStatusDto}
@ -1318,39 +1312,6 @@ export interface CheckExistingAssetsResponseDto {
*/
'existingIds': Array<string>;
}
/**
*
* @export
* @interface ClassificationConfig
*/
export interface ClassificationConfig {
/**
*
* @type {boolean}
* @memberof ClassificationConfig
*/
'enabled': boolean;
/**
*
* @type {number}
* @memberof ClassificationConfig
*/
'minScore': number;
/**
*
* @type {string}
* @memberof ClassificationConfig
*/
'modelName': string;
/**
*
* @type {ModelType}
* @memberof ClassificationConfig
*/
'modelType'?: ModelType;
}
/**
*
* @export
@ -2015,7 +1976,6 @@ export const JobName = {
ThumbnailGeneration: 'thumbnailGeneration',
MetadataExtraction: 'metadataExtraction',
VideoConversion: 'videoConversion',
ObjectTagging: 'objectTagging',
RecognizeFaces: 'recognizeFaces',
SmartSearch: 'smartSearch',
BackgroundTask: 'backgroundTask',
@ -2358,7 +2318,6 @@ export interface MergePersonDto {
*/
export const ModelType = {
ImageClassification: 'image-classification',
FacialRecognition: 'facial-recognition',
Clip: 'clip'
} as const;
@ -3139,12 +3098,6 @@ export interface ServerFeaturesDto {
* @memberof ServerFeaturesDto
*/
'sidecar': boolean;
/**
*
* @type {boolean}
* @memberof ServerFeaturesDto
*/
'tagImage': boolean;
/**
*
* @type {boolean}
@ -3803,12 +3756,6 @@ export interface SystemConfigJobDto {
* @memberof SystemConfigJobDto
*/
'migration': JobSettingsDto;
/**
*
* @type {JobSettingsDto}
* @memberof SystemConfigJobDto
*/
'objectTagging': JobSettingsDto;
/**
*
* @type {JobSettingsDto}
@ -3911,12 +3858,6 @@ export interface SystemConfigLoggingDto {
* @interface SystemConfigMachineLearningDto
*/
export interface SystemConfigMachineLearningDto {
/**
*
* @type {ClassificationConfig}
* @memberof SystemConfigMachineLearningDto
*/
'classification': ClassificationConfig;
/**
*
* @type {CLIPConfig}

View file

@ -15,7 +15,6 @@
mdiImageSearch,
mdiLibraryShelves,
mdiTable,
mdiTagMultiple,
mdiVideo,
} from '@mdi/js';
import ConfirmDialogue from '../../shared-components/confirm-dialogue.svelte';
@ -78,13 +77,6 @@
missingText: 'DISCOVER',
disabled: !$featureFlags.sidecar,
},
[JobName.ObjectTagging]: {
icon: mdiTagMultiple,
title: api.getJobName(JobName.ObjectTagging),
subtitle:
'Run machine learning on assets to tag objects\nNote that some assets may not have any objects detected',
disabled: !$featureFlags.tagImage,
},
[JobName.SmartSearch]: {
icon: mdiImageSearch,
title: api.getJobName(JobName.SmartSearch),

View file

@ -22,7 +22,6 @@
JobName.MetadataExtraction,
JobName.Library,
JobName.Sidecar,
JobName.ObjectTagging,
JobName.SmartSearch,
JobName.RecognizeFaces,
JobName.VideoConversion,

View file

@ -89,46 +89,6 @@
/>
</div>
<SettingAccordion title="Image Tagging" subtitle="Tag and classify images with object labels">
<div class="ml-4 mt-4 flex flex-col gap-4">
<SettingSwitch
title="ENABLED"
subtitle="If disabled, images will not be tagged. This affects the Things section in the Explore page as well as 'm:' searches."
bind:checked={machineLearningConfig.classification.enabled}
disabled={disabled || !machineLearningConfig.enabled}
/>
<hr />
<SettingInputField
inputType={SettingInputFieldType.TEXT}
label="IMAGE CLASSIFICATION MODEL"
bind:value={machineLearningConfig.classification.modelName}
required={true}
disabled={disabled || !machineLearningConfig.enabled || !machineLearningConfig.classification.enabled}
isEdited={machineLearningConfig.classification.modelName !== savedConfig.classification.modelName}
>
<p slot="desc" class="immich-form-label pb-2 text-sm">
The name of an image classification model listed <a
href="https://huggingface.co/models?pipeline_tag=image-classification&sort=trending"><u>here</u></a
>. It must be tagged with the 'Image Classification' task and must support ONNX conversion.
</p>
</SettingInputField>
<SettingInputField
inputType={SettingInputFieldType.NUMBER}
label="IMAGE CLASSIFICATION THRESHOLD"
desc="Minimum confidence score to add a particular object tag. Lower values will add more tags to images, but may result in more false positives. Will not have any effect until the Tag Objects job is re-run."
bind:value={machineLearningConfig.classification.minScore}
step="0.1"
min="0"
max="1"
disabled={disabled || !machineLearningConfig.enabled || !machineLearningConfig.classification.enabled}
isEdited={machineLearningConfig.classification.minScore !== savedConfig.classification.minScore}
/>
</div>
</SettingAccordion>
<SettingAccordion title="Smart Search" subtitle="Search for images semantically using CLIP embeddings">
<div class="ml-4 mt-4 flex flex-col gap-4">
<SettingSwitch

View file

@ -8,7 +8,6 @@ export const featureFlags = writable<FeatureFlags>({
clipEncode: true,
facialRecognition: true,
sidecar: true,
tagImage: true,
map: true,
reverseGeocoding: true,
search: true,