fix(server): Fix delay with multiple ml servers (#16284)

* Prospective fix for ensuring that known active ML servers are used to reduce search delay. * Added some logging and renamed backoff const. * Fix lint issues. * Update to use env vars for timeouts and updated documentation and strings. * Fix docs. * Make counter logic clearer. * Minor readability improvements. * Extract skipUrl logic per feedback, and change log to verbose. * Make code harder to read.
2025-04-21 15:36:26 +02:00 · 2025-02-28 03:14:09 +11:00 · 2025-02-28 03:14:09 +11:00 · a808b8610e
commit a808b8610e
parent c70c9067b0
5 changed files with 82 additions and 1 deletions
--- a/docs/docs/administration/system-settings.md
+++ b/docs/docs/administration/system-settings.md
@ -98,6 +98,14 @@ The default Immich log level is `Log` (commonly known as `Info`). The Immich adm
 Through this setting, you can manage all the settings related to machine learning in Immich, from the setting of remote machine learning to the model and its parameters
 You can choose to disable a certain type of machine learning, for example smart search or facial recognition.

+### URL
+
+The built in (`http://immich-machine-learning:3003`) machine learning server will be configured by default, but you can change this or add additional servers.
+
+Hosting the `immich-machine-learning` container on a machine with a more powerful GPU can be helpful to for processing a large number of photos (such as during batch import) or for faster search.
+
+If more than one URL is provided, each server will be attempted one-at-a-time until one responds successfully, in order from first to last. Servers that don't respond will be temporarily ignored until they come back online.
+
 ### Smart Search

 The [smart search](/docs/features/searching) settings allow you to change the [CLIP model](https://openai.com/research/clip). Larger models will typically provide [more accurate search results](https://github.com/immich-app/immich/discussions/11862) but consume more processing power and RAM. When [changing the CLIP model](/docs/FAQ#can-i-use-a-custom-clip-model) it is mandatory to re-run the Smart Search job on all images to fully apply the change.
--- a/docs/docs/install/environment-variables.md
+++ b/docs/docs/install/environment-variables.md
@ -168,6 +168,8 @@ Redis (Sentinel) URL example JSON before encoding:
 | `MACHINE_LEARNING_ANN_TUNING_LEVEL`                         | ARM-NN GPU tuning level (1: rapid, 2: normal, 3: exhaustive)                                        |               `2`               | machine learning |
 | `MACHINE_LEARNING_DEVICE_IDS`<sup>\*4</sup>                 | Device IDs to use in multi-GPU environments                                                         |               `0`               | machine learning |
 | `MACHINE_LEARNING_MAX_BATCH_SIZE__FACIAL_RECOGNITION`       | Set the maximum number of faces that will be processed at once by the facial recognition model      |  None (`1` if using OpenVINO)   | machine learning |
+| `MACHINE_LEARNING_PING_TIMEOUT`                             | How long (ms) to wait for a PING response when checking if an ML server is available                |             `2000`              | server           |
+| `MACHINE_LEARNING_AVAILABILITY_BACKOFF_TIME`                | How long to ignore ML servers that are offline before trying again                                  |             `30000`             | server           |

 \*1: It is recommended to begin with this parameter when changing the concurrency levels of the machine learning service and then tune the other ones.

--- a/i18n/en.json
+++ b/i18n/en.json
@ -131,7 +131,7 @@
    "machine_learning_smart_search_description": "Search for images semantically using CLIP embeddings",
    "machine_learning_smart_search_enabled": "Enable smart search",
    "machine_learning_smart_search_enabled_description": "If disabled, images will not be encoded for smart search.",
-    "machine_learning_url_description": "The URL of the machine learning server. If more than one URL is provided, each server will be attempted one-at-a-time until one responds successfully, in order from first to last.",
+    "machine_learning_url_description": "The URL of the machine learning server. If more than one URL is provided, each server will be attempted one-at-a-time until one responds successfully, in order from first to last. Servers that don't respond will be temporarily ignored until they come back online.",
    "manage_concurrency": "Manage Concurrency",
    "manage_log_settings": "Manage log settings",
    "map_dark_style": "Dark style",
--- a/server/src/constants.ts
+++ b/server/src/constants.ts
@ -38,6 +38,11 @@ export const ONE_HOUR = Duration.fromObject({ hours: 1 });

 export const APP_MEDIA_LOCATION = process.env.IMMICH_MEDIA_LOCATION || './upload';

+export const MACHINE_LEARNING_PING_TIMEOUT = Number(process.env.MACHINE_LEARNING_PING_TIMEOUT || 2000);
+export const MACHINE_LEARNING_AVAILABILITY_BACKOFF_TIME = Number(
+  process.env.MACHINE_LEARNING_AVAILABILITY_BACKOFF_TIME || 30_000,
+);
+
 export const citiesFile = 'cities500.txt';

 export const MOBILE_REDIRECT = 'app.immich:///oauth-callback';
--- a/server/src/repositories/machine-learning.repository.ts
+++ b/server/src/repositories/machine-learning.repository.ts
@ -1,5 +1,6 @@
 import { Injectable } from '@nestjs/common';
 import { readFile } from 'node:fs/promises';
+import { MACHINE_LEARNING_AVAILABILITY_BACKOFF_TIME, MACHINE_LEARNING_PING_TIMEOUT } from 'src/constants';
 import { CLIPConfig } from 'src/dtos/model-config.dto';
 import { LoggingRepository } from 'src/repositories/logging.repository';

@ -55,16 +56,80 @@ export type MachineLearningRequest = ClipVisualRequest | ClipTextualRequest | Fa

@Injectable()
 export class MachineLearningRepository {
+  // Note that deleted URL's are not removed from this map (ie: they're leaked)
+  // Cleaning them up is low priority since there should be very few over a
+  // typical server uptime cycle
+  private urlAvailability: {
+    [url: string]:
+      | {
+          active: boolean;
+          lastChecked: number;
+        }
+      | undefined;
+  };
+
  constructor(private logger: LoggingRepository) {
    this.logger.setContext(MachineLearningRepository.name);
+    this.urlAvailability = {};
+  }
+
+  private setUrlAvailability(url: string, active: boolean) {
+    const current = this.urlAvailability[url];
+    if (current?.active !== active) {
+      this.logger.verbose(`Setting ${url} ML server to ${active ? 'active' : 'inactive'}.`);
+    }
+    this.urlAvailability[url] = {
+      active,
+      lastChecked: Date.now(),
+    };
+  }
+
+  private async checkAvailability(url: string) {
+    let active = false;
+    try {
+      const response = await fetch(new URL('/ping', url), {
+        signal: AbortSignal.timeout(MACHINE_LEARNING_PING_TIMEOUT),
+      });
+      active = response.ok;
+    } catch {}
+    this.setUrlAvailability(url, active);
+    return active;
+  }
+
+  private async shouldSkipUrl(url: string) {
+    const availability = this.urlAvailability[url];
+    if (availability === undefined) {
+      // If this is a new endpoint, then check inline and skip if it fails
+      if (!(await this.checkAvailability(url))) {
+        return true;
+      }
+      return false;
+    }
+    if (!availability.active && Date.now() - availability.lastChecked < MACHINE_LEARNING_AVAILABILITY_BACKOFF_TIME) {
+      // If this is an old inactive endpoint that hasn't been checked in a
+      // while then check but don't wait for the result, just skip it
+      // This avoids delays on every search whilst allowing higher priority
+      // ML servers to recover over time.
+      void this.checkAvailability(url);
+      return true;
+    }
+    return false;
  }

  private async predict<T>(urls: string[], payload: ModelPayload, config: MachineLearningRequest): Promise<T> {
    const formData = await this.getFormData(payload, config);
+    let urlCounter = 0;
    for (const url of urls) {
+      urlCounter++;
+      const isLast = urlCounter >= urls.length;
+      if (!isLast && (await this.shouldSkipUrl(url))) {
+        continue;
+      }
+
      try {
        const response = await fetch(new URL('/predict', url), { method: 'POST', body: formData });
        if (response.ok) {
+          this.setUrlAvailability(url, true);
          return response.json();
        }

@ -76,6 +141,7 @@ export class MachineLearningRepository {
          `Machine learning request to "${url}" failed: ${error instanceof Error ? error.message : error}`,
        );
      }
+      this.setUrlAvailability(url, false);
    }

    throw new Error(`Machine learning request '${JSON.stringify(config)}' failed for all URLs`);