From 4f38a283b44746a5616a3e323a7c7e09f7ab8cef Mon Sep 17 00:00:00 2001 From: Mert <101130780+mertalev@users.noreply.github.com> Date: Sun, 17 Dec 2023 11:55:35 -0500 Subject: [PATCH] fix(server): stricter dim size check for pgvecto.rs migration (#5767) * stricter dim size check * remove unused import * added null check --- .../migrations/1700713871511-UsePgVectors.ts | 15 ++++++++------- 1 file changed, 8 insertions(+), 7 deletions(-) diff --git a/server/src/infra/migrations/1700713871511-UsePgVectors.ts b/server/src/infra/migrations/1700713871511-UsePgVectors.ts index 62a5a885f7..96882a0cf4 100644 --- a/server/src/infra/migrations/1700713871511-UsePgVectors.ts +++ b/server/src/infra/migrations/1700713871511-UsePgVectors.ts @@ -1,3 +1,4 @@ +import { getCLIPModelInfo } from '@app/domain/smart-info/smart-info.constant'; import { MigrationInterface, QueryRunner } from 'typeorm'; export class UsePgVectors1700713871511 implements MigrationInterface { @@ -8,13 +9,11 @@ export class UsePgVectors1700713871511 implements MigrationInterface { SELECT CARDINALITY(embedding::real[]) as dimsize FROM asset_faces LIMIT 1`); - const clipDimQuery = await queryRunner.query(` - SELECT CARDINALITY("clipEmbedding"::real[]) as dimsize - FROM smart_info - LIMIT 1`); - const faceDimSize = faceDimQuery?.[0]?.['dimsize'] ?? 512; - const clipDimSize = clipDimQuery?.[0]?.['dimsize'] ?? 512; + + const clipModelNameQuery = await queryRunner.query(`SELECT value FROM system_config WHERE key = 'machineLearning.clip.modelName'`); + const clipModelName: string = clipModelNameQuery?.[0]?.['value'] ?? 'ViT-B-32__openai'; + const clipDimSize = getCLIPModelInfo(clipModelName.replace(/"/g, '')).dimSize; await queryRunner.query('CREATE EXTENSION IF NOT EXISTS vectors'); @@ -32,7 +31,9 @@ export class UsePgVectors1700713871511 implements MigrationInterface { INSERT INTO smart_search("assetId", embedding) SELECT si."assetId", si."clipEmbedding" FROM smart_info si - WHERE "clipEmbedding" IS NOT NULL`); + WHERE "clipEmbedding" IS NOT NULL + AND CARDINALITY("clipEmbedding"::real[]) = ${clipDimSize} + AND array_position(si."clipEmbedding", NULL) IS NULL`); await queryRunner.query(`ALTER TABLE smart_info DROP COLUMN IF EXISTS "clipEmbedding"`); }