immich/machine-learning/src/main.py

import os
from flask import Flask, request
from transformers import pipeline
from sentence_transformers import SentenceTransformer, util
from PIL import Image

is_dev = os.getenv('NODE_ENV') == 'development'
server_port = os.getenv('MACHINE_LEARNING_PORT', 3003)
server_host = os.getenv('MACHINE_LEARNING_HOST', '0.0.0.0')

classification_model = os.getenv('MACHINE_LEARNING_CLASSIFICATION_MODEL', 'microsoft/resnet-50')
object_model = os.getenv('MACHINE_LEARNING_OBJECT_MODEL', 'hustvl/yolos-tiny')
clip_image_model = os.getenv('MACHINE_LEARNING_CLIP_IMAGE_MODEL', 'clip-ViT-B-32')
clip_text_model = os.getenv('MACHINE_LEARNING_CLIP_TEXT_MODEL', 'clip-ViT-B-32')

_model_cache = {}
def _get_model(model, task=None):
  global _model_cache
  key = '|'.join([model, str(task)])
  if key not in _model_cache:
    if task:
      _model_cache[key] = pipeline(model=model, task=task)
    else:
      _model_cache[key] = SentenceTransformer(model)
  return _model_cache[key]

server = Flask(__name__)

@server.route("/ping")
def ping():
    return "pong"

@server.route("/object-detection/detect-object", methods=['POST'])
def object_detection():
    model = _get_model(object_model, 'object-detection')
    assetPath = request.json['thumbnailPath']
    return run_engine(model, assetPath), 200

@server.route("/image-classifier/tag-image", methods=['POST'])
def image_classification():
    model = _get_model(classification_model, 'image-classification')
    assetPath = request.json['thumbnailPath']
    return run_engine(model, assetPath), 200

@server.route("/sentence-transformer/encode-image", methods=['POST'])
def clip_encode_image():
    model = _get_model(clip_image_model)
    assetPath = request.json['thumbnailPath']
    return model.encode(Image.open(assetPath)).tolist(), 200

@server.route("/sentence-transformer/encode-text", methods=['POST'])
def clip_encode_text():
    model = _get_model(clip_text_model)
    text = request.json['text']
    return model.encode(text).tolist(), 200

def run_engine(engine, path):
    result = []
    predictions = engine(path)

    for index, pred in enumerate(predictions):
        tags = pred['label'].split(', ')
        if (pred['score'] > 0.9):
            result = [*result, *tags]

    if (len(result) > 1):
        result = list(set(result))

    return result


if __name__ == "__main__":
    server.run(debug=is_dev, host=server_host, port=server_port)
feat(machine-learning)!: move machine learning to Python based image (#1774) BREAKING CHANGES * Users have to update the docker-compose file, machine-learning portion. * Temporary dropping machine-learning support for Arm64 and Armv7 2023-02-18 16:13:37 +01:00			`import os`
			`from flask import Flask, request`
			`from transformers import pipeline`
feat(server): CLIP search integration (#1939) 2023-03-18 14:44:42 +01:00			`from sentence_transformers import SentenceTransformer, util`
			`from PIL import Image`
feat(machine-learning)!: move machine learning to Python based image (#1774) BREAKING CHANGES * Users have to update the docker-compose file, machine-learning portion. * Temporary dropping machine-learning support for Arm64 and Armv7 2023-02-18 16:13:37 +01:00
			`is_dev = os.getenv('NODE_ENV') == 'development'`
feat(server): CLIP search integration (#1939) 2023-03-18 14:44:42 +01:00			`server_port = os.getenv('MACHINE_LEARNING_PORT', 3003)`
			`server_host = os.getenv('MACHINE_LEARNING_HOST', '0.0.0.0')`

			`classification_model = os.getenv('MACHINE_LEARNING_CLASSIFICATION_MODEL', 'microsoft/resnet-50')`
			`object_model = os.getenv('MACHINE_LEARNING_OBJECT_MODEL', 'hustvl/yolos-tiny')`
			`clip_image_model = os.getenv('MACHINE_LEARNING_CLIP_IMAGE_MODEL', 'clip-ViT-B-32')`
			`clip_text_model = os.getenv('MACHINE_LEARNING_CLIP_TEXT_MODEL', 'clip-ViT-B-32')`

			`_model_cache = {}`
			`def _get_model(model, task=None):`
			`global _model_cache`
			`key = '\|'.join([model, str(task)])`
			`if key not in _model_cache:`
			`if task:`
			`_model_cache[key] = pipeline(model=model, task=task)`
			`else:`
			`_model_cache[key] = SentenceTransformer(model)`
			`return _model_cache[key]`
feat(machine-learning)!: move machine learning to Python based image (#1774) BREAKING CHANGES * Users have to update the docker-compose file, machine-learning portion. * Temporary dropping machine-learning support for Arm64 and Armv7 2023-02-18 16:13:37 +01:00
feat(server): CLIP search integration (#1939) 2023-03-18 14:44:42 +01:00			`server = Flask(__name__)`
feat(machine-learning)!: move machine learning to Python based image (#1774) BREAKING CHANGES * Users have to update the docker-compose file, machine-learning portion. * Temporary dropping machine-learning support for Arm64 and Armv7 2023-02-18 16:13:37 +01:00
			`@server.route("/ping")`
			`def ping():`
			`return "pong"`

			`@server.route("/object-detection/detect-object", methods=['POST'])`
			`def object_detection():`
feat(server): CLIP search integration (#1939) 2023-03-18 14:44:42 +01:00			`model = _get_model(object_model, 'object-detection')`
feat(machine-learning)!: move machine learning to Python based image (#1774) BREAKING CHANGES * Users have to update the docker-compose file, machine-learning portion. * Temporary dropping machine-learning support for Arm64 and Armv7 2023-02-18 16:13:37 +01:00			`assetPath = request.json['thumbnailPath']`
feat(server): CLIP search integration (#1939) 2023-03-18 14:44:42 +01:00			`return run_engine(model, assetPath), 200`
feat(machine-learning)!: move machine learning to Python based image (#1774) BREAKING CHANGES * Users have to update the docker-compose file, machine-learning portion. * Temporary dropping machine-learning support for Arm64 and Armv7 2023-02-18 16:13:37 +01:00
			`@server.route("/image-classifier/tag-image", methods=['POST'])`
			`def image_classification():`
feat(server): CLIP search integration (#1939) 2023-03-18 14:44:42 +01:00			`model = _get_model(classification_model, 'image-classification')`
			`assetPath = request.json['thumbnailPath']`
			`return run_engine(model, assetPath), 200`

			`@server.route("/sentence-transformer/encode-image", methods=['POST'])`
			`def clip_encode_image():`
			`model = _get_model(clip_image_model)`
feat(machine-learning)!: move machine learning to Python based image (#1774) BREAKING CHANGES * Users have to update the docker-compose file, machine-learning portion. * Temporary dropping machine-learning support for Arm64 and Armv7 2023-02-18 16:13:37 +01:00			`assetPath = request.json['thumbnailPath']`
feat(server): CLIP search integration (#1939) 2023-03-18 14:44:42 +01:00			`return model.encode(Image.open(assetPath)).tolist(), 200`
feat(machine-learning)!: move machine learning to Python based image (#1774) BREAKING CHANGES * Users have to update the docker-compose file, machine-learning portion. * Temporary dropping machine-learning support for Arm64 and Armv7 2023-02-18 16:13:37 +01:00
feat(server): CLIP search integration (#1939) 2023-03-18 14:44:42 +01:00			`@server.route("/sentence-transformer/encode-text", methods=['POST'])`
			`def clip_encode_text():`
			`model = _get_model(clip_text_model)`
			`text = request.json['text']`
			`return model.encode(text).tolist(), 200`
feat(machine-learning)!: move machine learning to Python based image (#1774) BREAKING CHANGES * Users have to update the docker-compose file, machine-learning portion. * Temporary dropping machine-learning support for Arm64 and Armv7 2023-02-18 16:13:37 +01:00
			`def run_engine(engine, path):`
			`result = []`
			`predictions = engine(path)`

			`for index, pred in enumerate(predictions):`
			`tags = pred['label'].split(', ')`
fix: machine learning only take results with > 90% confidence (#1875) 2023-02-26 05:02:35 +01:00			`if (pred['score'] > 0.9):`
			`result = [result, tags]`
feat(machine-learning)!: move machine learning to Python based image (#1774) BREAKING CHANGES * Users have to update the docker-compose file, machine-learning portion. * Temporary dropping machine-learning support for Arm64 and Armv7 2023-02-18 16:13:37 +01:00
			`if (len(result) > 1):`
			`result = list(set(result))`

			`return result`


			`if __name__ == "__main__":`
feat(server): CLIP search integration (#1939) 2023-03-18 14:44:42 +01:00			`server.run(debug=is_dev, host=server_host, port=server_port)`