From 151ba9f1d94d0c5115aa0744d1607c08575937d1 Mon Sep 17 00:00:00 2001 From: Daniel Dietzler <36593685+danieldietzler@users.noreply.github.com> Date: Thu, 24 Oct 2024 23:07:32 +0200 Subject: [PATCH] refactor(server): telemetry env variables (#13705) refactor(server)!: telemetry env variables Co-authored-by: Mert <101130780+mertalev@users.noreply.github.com> --- docs/docs/features/monitoring.md | 4 +- docs/docs/install/environment-variables.md | 13 ++---- server/src/enum.ts | 8 ++++ server/src/interfaces/config.interface.ts | 8 +--- .../repositories/config.repository.spec.ts | 45 ++++++++----------- server/src/repositories/config.repository.ts | 43 +++++++++--------- .../src/repositories/telemetry.repository.ts | 15 ++++--- server/src/workers/api.ts | 2 +- server/src/workers/microservices.ts | 2 +- .../repositories/config.repository.mock.ts | 6 +-- 10 files changed, 67 insertions(+), 79 deletions(-) diff --git a/docs/docs/features/monitoring.md b/docs/docs/features/monitoring.md index 9de3feb7f6..184394abd0 100644 --- a/docs/docs/features/monitoring.md +++ b/docs/docs/features/monitoring.md @@ -25,10 +25,10 @@ The metrics in immich are grouped into API (endpoint calls and response times), ### Configuration -Immich will not expose an endpoint for metrics by default. To enable this endpoint, you can add the `IMMICH_METRICS=true` environmental variable to your `.env` file. Note that only the server and microservices containers currently use this variable. +Immich will not expose an endpoint for metrics by default. To enable this endpoint, you can add the `IMMICH_TELEMETRY_INCLUDE=all` environmental variable to your `.env` file. Note that only the server container currently use this variable. :::tip -`IMMICH_METRICS` enables all metrics, but there are also [environmental variables](/docs/install/environment-variables.md#prometheus) to toggle specific metric groups. If you'd like to only expose certain kinds of metrics, you can set only those environmental variables to `true`. Explicitly setting the environmental variable for a metric group overrides `IMMICH_METRICS` for that group. For example, setting `IMMICH_METRICS=true` and `IMMICH_API_METRICS=false` will enable all metrics except API metrics. +`IMMICH_TELEMETRY_INCLUDE=all` enables all metrics. For a more granular configuration you can enumerate the telemetry metrics that should be included as a comma separated list (e.g. `IMMICH_TELEMETRY_INCLUDE=repo,api`). Alternatively, you can also exclude specific metrics with `IMMICH_TELEMETRY_EXCLUDE`. For more information refer to the [environment section](/docs/install/environment-variables.md#prometheus). ::: The next step is to configure a new or existing Prometheus instance to scrape this endpoint. The following steps assume that you do not have an existing Prometheus instance, but the steps will be similar either way. diff --git a/docs/docs/install/environment-variables.md b/docs/docs/install/environment-variables.md index e86199dc74..1f34b5c6d0 100644 --- a/docs/docs/install/environment-variables.md +++ b/docs/docs/install/environment-variables.md @@ -183,15 +183,10 @@ Other machine learning parameters can be tuned from the admin UI. ## Prometheus -| Variable | Description | Default | Containers | Workers | -| :----------------------------- | :-------------------------------------------------------------------------------------------- | :-----: | :--------- | :----------------- | -| `IMMICH_METRICS`<sup>\*1</sup> | Toggle all metrics (one of [`true`, `false`]) | | server | api, microservices | -| `IMMICH_API_METRICS` | Toggle metrics for endpoints and response times (one of [`true`, `false`]) | | server | api, microservices | -| `IMMICH_HOST_METRICS` | Toggle metrics for CPU and memory utilization for host and process (one of [`true`, `false`]) | | server | api, microservices | -| `IMMICH_IO_METRICS` | Toggle metrics for database queries, image processing, etc. (one of [`true`, `false`]) | | server | api, microservices | -| `IMMICH_JOB_METRICS` | Toggle metrics for jobs and queues (one of [`true`, `false`]) | | server | api, microservices | - -\*1: Overridden for a metric group when its corresponding environmental variable is set. +| Variable | Description | Default | Containers | Workers | +| :------------------------- | :-------------------------------------------------------------------------------------------------------------------- | :-----: | :--------- | :----------------- | +| `IMMICH_TELEMETRY_INCLUDE` | Collect these telemetries. List of `host`, `api`, `io`, `repo`, `job`. Note: You can also specify `all` to enable all | | server | api, microservices | +| `IMMICH_TELEMETRY_EXCLUDE` | Do not collect these telemetries. List of `host`, `api`, `io`, `repo`, `job` | | server | api, microservices | ## Docker Secrets diff --git a/server/src/enum.ts b/server/src/enum.ts index 902d6635e7..1212f41ab0 100644 --- a/server/src/enum.ts +++ b/server/src/enum.ts @@ -363,3 +363,11 @@ export enum ImmichWorker { API = 'api', MICROSERVICES = 'microservices', } + +export enum ImmichTelemetry { + HOST = 'host', + API = 'api', + IO = 'io', + REPO = 'repo', + JOB = 'job', +} diff --git a/server/src/interfaces/config.interface.ts b/server/src/interfaces/config.interface.ts index 4391909df7..e201241e82 100644 --- a/server/src/interfaces/config.interface.ts +++ b/server/src/interfaces/config.interface.ts @@ -2,7 +2,7 @@ import { RegisterQueueOptions } from '@nestjs/bullmq'; import { QueueOptions } from 'bullmq'; import { RedisOptions } from 'ioredis'; import { OpenTelemetryModuleOptions } from 'nestjs-otel/lib/interfaces'; -import { ImmichEnvironment, ImmichWorker, LogLevel } from 'src/enum'; +import { ImmichEnvironment, ImmichTelemetry, ImmichWorker, LogLevel } from 'src/enum'; import { VectorExtension } from 'src/interfaces/database.interface'; export const IConfigRepository = 'IConfigRepository'; @@ -77,11 +77,7 @@ export interface EnvData { telemetry: { apiPort: number; microservicesPort: number; - enabled: boolean; - apiMetrics: boolean; - hostMetrics: boolean; - repoMetrics: boolean; - jobMetrics: boolean; + metrics: Set<ImmichTelemetry>; }; storage: { diff --git a/server/src/repositories/config.repository.spec.ts b/server/src/repositories/config.repository.spec.ts index 84da211182..516ceaaf82 100644 --- a/server/src/repositories/config.repository.spec.ts +++ b/server/src/repositories/config.repository.spec.ts @@ -1,3 +1,4 @@ +import { ImmichTelemetry } from 'src/enum'; import { clearEnvCache, ConfigRepository } from 'src/repositories/config.repository'; const getEnv = () => { @@ -12,11 +13,8 @@ const resetEnv = () => { 'IMMICH_TRUSTED_PROXIES', 'IMMICH_API_METRICS_PORT', 'IMMICH_MICROSERVICES_METRICS_PORT', - 'IMMICH_METRICS', - 'IMMICH_API_METRICS', - 'IMMICH_HOST_METRICS', - 'IMMICH_IO_METRICS', - 'IMMICH_JOB_METRICS', + 'IMMICH_TELEMETRY_INCLUDE', + 'IMMICH_TELEMETRY_EXCLUDE', 'DB_URL', 'DB_HOSTNAME', @@ -210,11 +208,7 @@ describe('getEnv', () => { expect(telemetry).toEqual({ apiPort: 8081, microservicesPort: 8082, - enabled: false, - apiMetrics: false, - hostMetrics: false, - jobMetrics: false, - repoMetrics: false, + metrics: new Set([]), }); }); @@ -225,32 +219,29 @@ describe('getEnv', () => { expect(telemetry).toMatchObject({ apiPort: 2001, microservicesPort: 2002, + metrics: expect.any(Set), }); }); it('should run with telemetry enabled', () => { - process.env.IMMICH_METRICS = 'true'; + process.env.IMMICH_TELEMETRY_INCLUDE = 'all'; const { telemetry } = getEnv(); - expect(telemetry).toMatchObject({ - enabled: true, - apiMetrics: true, - hostMetrics: true, - jobMetrics: true, - repoMetrics: true, - }); + expect(telemetry.metrics).toEqual(new Set(Object.values(ImmichTelemetry))); }); it('should run with telemetry enabled and jobs disabled', () => { - process.env.IMMICH_METRICS = 'true'; - process.env.IMMICH_JOB_METRICS = 'false'; + process.env.IMMICH_TELEMETRY_INCLUDE = 'all'; + process.env.IMMICH_TELEMETRY_EXCLUDE = 'job'; const { telemetry } = getEnv(); - expect(telemetry).toMatchObject({ - enabled: true, - apiMetrics: true, - hostMetrics: true, - jobMetrics: false, - repoMetrics: true, - }); + expect(telemetry.metrics).toEqual( + new Set([ImmichTelemetry.API, ImmichTelemetry.HOST, ImmichTelemetry.IO, ImmichTelemetry.REPO]), + ); + }); + + it('should run with specific telemetry metrics', () => { + process.env.IMMICH_TELEMETRY_INCLUDE = 'io, host, api'; + const { telemetry } = getEnv(); + expect(telemetry.metrics).toEqual(new Set([ImmichTelemetry.API, ImmichTelemetry.HOST, ImmichTelemetry.IO])); }); }); }); diff --git a/server/src/repositories/config.repository.ts b/server/src/repositories/config.repository.ts index fabccd7846..0abee0f603 100644 --- a/server/src/repositories/config.repository.ts +++ b/server/src/repositories/config.repository.ts @@ -2,7 +2,7 @@ import { Injectable } from '@nestjs/common'; import { join } from 'node:path'; import { citiesFile, excludePaths } from 'src/constants'; import { Telemetry } from 'src/decorators'; -import { ImmichEnvironment, ImmichWorker, LogLevel } from 'src/enum'; +import { ImmichEnvironment, ImmichTelemetry, ImmichWorker, LogLevel } from 'src/enum'; import { EnvData, IConfigRepository } from 'src/interfaces/config.interface'; import { DatabaseExtension } from 'src/interfaces/database.interface'; import { QueueName } from 'src/interfaces/job.interface'; @@ -25,18 +25,17 @@ const stagingKeys = { }; const WORKER_TYPES = new Set(Object.values(ImmichWorker)); +const TELEMETRY_TYPES = new Set(Object.values(ImmichTelemetry)); -const asSet = (value: string | undefined, defaults: ImmichWorker[]) => { +const asSet = <T>(value: string | undefined, defaults: T[]) => { const values = (value || '').replaceAll(/\s/g, '').split(',').filter(Boolean); - return new Set(values.length === 0 ? defaults : (values as ImmichWorker[])); + return new Set(values.length === 0 ? defaults : (values as T[])); }; -const parseBoolean = (value: string | undefined, defaultValue: boolean) => (value ? value === 'true' : defaultValue); - const getEnv = (): EnvData => { - const included = asSet(process.env.IMMICH_WORKERS_INCLUDE, [ImmichWorker.API, ImmichWorker.MICROSERVICES]); - const excluded = asSet(process.env.IMMICH_WORKERS_EXCLUDE, []); - const workers = [...setDifference(included, excluded)]; + const includedWorkers = asSet(process.env.IMMICH_WORKERS_INCLUDE, [ImmichWorker.API, ImmichWorker.MICROSERVICES]); + const excludedWorkers = asSet(process.env.IMMICH_WORKERS_EXCLUDE, []); + const workers = [...setDifference(includedWorkers, excludedWorkers)]; for (const worker of workers) { if (!WORKER_TYPES.has(worker)) { throw new Error(`Invalid worker(s) found: ${workers.join(',')}`); @@ -69,12 +68,18 @@ const getEnv = (): EnvData => { } } - const globalEnabled = parseBoolean(process.env.IMMICH_METRICS, false); - const hostMetrics = parseBoolean(process.env.IMMICH_HOST_METRICS, globalEnabled); - const apiMetrics = parseBoolean(process.env.IMMICH_API_METRICS, globalEnabled); - const repoMetrics = parseBoolean(process.env.IMMICH_IO_METRICS, globalEnabled); - const jobMetrics = parseBoolean(process.env.IMMICH_JOB_METRICS, globalEnabled); - const telemetryEnabled = globalEnabled || hostMetrics || apiMetrics || repoMetrics || jobMetrics; + const includedTelemetries = + process.env.IMMICH_TELEMETRY_INCLUDE === 'all' + ? new Set(Object.values(ImmichTelemetry)) + : asSet<ImmichTelemetry>(process.env.IMMICH_TELEMETRY_INCLUDE, []); + + const excludedTelemetries = asSet<ImmichTelemetry>(process.env.IMMICH_TELEMETRY_EXCLUDE, []); + const telemetries = setDifference(includedTelemetries, excludedTelemetries); + for (const telemetry of telemetries) { + if (!TELEMETRY_TYPES.has(telemetry)) { + throw new Error(`Invalid telemetry found: ${telemetry}`); + } + } return { host: process.env.IMMICH_HOST, @@ -136,9 +141,9 @@ const getEnv = (): EnvData => { otel: { metrics: { - hostMetrics, + hostMetrics: telemetries.has(ImmichTelemetry.HOST), apiMetrics: { - enable: apiMetrics, + enable: telemetries.has(ImmichTelemetry.API), ignoreRoutes: excludePaths, }, }, @@ -168,11 +173,7 @@ const getEnv = (): EnvData => { telemetry: { apiPort: Number(process.env.IMMICH_API_METRICS_PORT || '') || 8081, microservicesPort: Number(process.env.IMMICH_MICROSERVICES_METRICS_PORT || '') || 8082, - enabled: telemetryEnabled, - hostMetrics, - apiMetrics, - repoMetrics, - jobMetrics, + metrics: telemetries, }, workers, diff --git a/server/src/repositories/telemetry.repository.ts b/server/src/repositories/telemetry.repository.ts index f450c162dc..2510460967 100644 --- a/server/src/repositories/telemetry.repository.ts +++ b/server/src/repositories/telemetry.repository.ts @@ -14,7 +14,7 @@ import { snakeCase, startCase } from 'lodash'; import { MetricService } from 'nestjs-otel'; import { copyMetadataFromFunctionToFunction } from 'nestjs-otel/lib/opentelemetry.utils'; import { serverVersion } from 'src/constants'; -import { MetadataKey } from 'src/enum'; +import { ImmichTelemetry, MetadataKey } from 'src/enum'; import { IConfigRepository } from 'src/interfaces/config.interface'; import { ILoggerRepository } from 'src/interfaces/logger.interface'; import { IMetricGroupRepository, ITelemetryRepository, MetricGroupOptions } from 'src/interfaces/telemetry.interface'; @@ -99,17 +99,18 @@ export class TelemetryRepository implements ITelemetryRepository { @Inject(ILoggerRepository) private logger: ILoggerRepository, ) { const { telemetry } = this.configRepository.getEnv(); - const { apiMetrics, hostMetrics, jobMetrics, repoMetrics } = telemetry; + const { metrics } = telemetry; - this.api = new MetricGroupRepository(metricService).configure({ enabled: apiMetrics }); - this.host = new MetricGroupRepository(metricService).configure({ enabled: hostMetrics }); - this.jobs = new MetricGroupRepository(metricService).configure({ enabled: jobMetrics }); - this.repo = new MetricGroupRepository(metricService).configure({ enabled: repoMetrics }); + this.api = new MetricGroupRepository(metricService).configure({ enabled: metrics.has(ImmichTelemetry.API) }); + this.host = new MetricGroupRepository(metricService).configure({ enabled: metrics.has(ImmichTelemetry.HOST) }); + this.jobs = new MetricGroupRepository(metricService).configure({ enabled: metrics.has(ImmichTelemetry.JOB) }); + this.repo = new MetricGroupRepository(metricService).configure({ enabled: metrics.has(ImmichTelemetry.REPO) }); } setup({ repositories }: { repositories: ClassConstructor<unknown>[] }) { const { telemetry } = this.configRepository.getEnv(); - if (!telemetry.enabled || !telemetry.repoMetrics) { + const { metrics } = telemetry; + if (!metrics.has(ImmichTelemetry.REPO)) { return; } diff --git a/server/src/workers/api.ts b/server/src/workers/api.ts index 6451f1b792..bc8eb22b20 100644 --- a/server/src/workers/api.ts +++ b/server/src/workers/api.ts @@ -20,7 +20,7 @@ async function bootstrap() { process.title = 'immich-api'; const { telemetry, network } = new ConfigRepository().getEnv(); - if (telemetry.enabled) { + if (telemetry.metrics.size > 0) { bootstrapTelemetry(telemetry.apiPort); } diff --git a/server/src/workers/microservices.ts b/server/src/workers/microservices.ts index df4abb01da..bd1e65d6cc 100644 --- a/server/src/workers/microservices.ts +++ b/server/src/workers/microservices.ts @@ -11,7 +11,7 @@ import { isStartUpError } from 'src/services/storage.service'; export async function bootstrap() { const { telemetry } = new ConfigRepository().getEnv(); - if (telemetry.enabled) { + if (telemetry.metrics.size > 0) { bootstrapTelemetry(telemetry.microservicesPort); } diff --git a/server/test/repositories/config.repository.mock.ts b/server/test/repositories/config.repository.mock.ts index bb3cfcebb9..462e9f8327 100644 --- a/server/test/repositories/config.repository.mock.ts +++ b/server/test/repositories/config.repository.mock.ts @@ -73,11 +73,7 @@ const envData: EnvData = { telemetry: { apiPort: 8081, microservicesPort: 8082, - enabled: false, - hostMetrics: false, - apiMetrics: false, - jobMetrics: false, - repoMetrics: false, + metrics: new Set(), }, workers: [ImmichWorker.API, ImmichWorker.MICROSERVICES],