From 7fc1954e2a524fa0f2cdffd40ec067888f484233 Mon Sep 17 00:00:00 2001 From: Sushain Cherivirala Date: Mon, 15 Jan 2024 12:40:28 -0800 Subject: [PATCH] fix(server): add filename search (#6394) Fixes https://github.com/immich-app/immich/issues/5982. There are basically three options: 1. Search `originalFileName` by dropping a file extension from the query (if present). Lower fidelity but very easy - just a standard index & equality. 2. Search `originalPath` by adding an index on `reverse(originalPath)` and using `starts_with(reverse(query) + "/", reverse(originalPath)`. A weird index & query but high fidelity. 3. Add a new generated column called `originalFileNameWithExtension` or something. More storage, kinda jank. TBH, I think (1) is good enough and easy to make better in the future. For example, if I search "DSC_4242.jpg", I don't really think it matters if "DSC_4242.mov" also shows up. edit: There's a fourth approach that we discussed a bit in Discord and decided we could switch to it in the future: using a GIN. The minor issue is that Postgres doesn't tokenize paths in a useful (they're a single token and it won't match against partial components). We can solve that by tokenizing it ourselves. For example: ``` immich=# with vecs as (select to_tsvector('simple', array_to_string(string_to_array('upload/library/sushain/2015/2015-08-09/IMG_275.JPG', '/'), ' ')) as vec) select * from vecs where vec @@ phraseto_tsquery('simple', array_to_string(string_to_array('library/sushain', '/'), ' ')); vec ------------------------------------------------------------------------------- '-08':6 '-09':7 '2015':4,5 'img_275.jpg':8 'library':2 'sushain':3 'upload':1 (1 row) ``` The query is also tokenized with the 'split-by-slash-join-with-space' strategy. This strategy results in `IMG_275.JPG`, `2015`, `sushain` and `library/sushain` matching. But, `08` and `IMG_275` do not match. The former is because the token is `-08` and the latter because the `img_275.jpg` token is matched against exactly. --- server/e2e/api/specs/search.e2e-spec.ts | 77 +++++++++++++++++++ server/src/infra/entities/asset.entity.ts | 1 + .../1705306747072-AddOriginalFileNameIndex.ts | 13 ++++ .../infra/repositories/asset.repository.ts | 13 +++- server/src/infra/sql/asset.repository.sql | 13 ++-- 5 files changed, 108 insertions(+), 9 deletions(-) create mode 100644 server/src/infra/migrations/1705306747072-AddOriginalFileNameIndex.ts diff --git a/server/e2e/api/specs/search.e2e-spec.ts b/server/e2e/api/specs/search.e2e-spec.ts index 5d72411838..3b73b29524 100644 --- a/server/e2e/api/specs/search.e2e-spec.ts +++ b/server/e2e/api/specs/search.e2e-spec.ts @@ -212,4 +212,81 @@ describe(`${SearchController.name}`, () => { }); }); }); + + describe('GET /search (file name)', () => { + beforeEach(async () => { + const assetId = (await assetRepository.create(generateAsset(loginResponse.userId, libraries))).id; + await assetRepository.upsertExif({ assetId, ...searchStub.exif }); + + const assetWithMetadata = await assetRepository.getById(assetId, { exifInfo: true }); + if (!assetWithMetadata) { + throw new Error('Asset not found'); + } + asset1 = mapAsset(assetWithMetadata); + }); + + it('should return assets when searching by file name', async () => { + if (asset1?.originalFileName.length === 0) { + throw new Error('Asset 1 does not have an original file name'); + } + + const { status, body } = await request(server) + .get('/search') + .set('Authorization', `Bearer ${accessToken}`) + .query({ q: asset1.originalFileName }); + + expect(status).toBe(200); + expect(body).toMatchObject({ + albums: { + total: 0, + count: 0, + items: [], + facets: [], + }, + assets: { + total: 1, + count: 1, + items: [ + { + id: asset1.id, + originalFileName: asset1.originalFileName, + }, + ], + facets: [], + }, + }); + }); + + it('should return assets when searching by file name with extension', async () => { + if (asset1?.originalFileName.length === 0) { + throw new Error('Asset 1 does not have an original file name'); + } + + const { status, body } = await request(server) + .get('/search') + .set('Authorization', `Bearer ${accessToken}`) + .query({ q: asset1.originalFileName + '.jpg' }); + + expect(status).toBe(200); + expect(body).toMatchObject({ + albums: { + total: 0, + count: 0, + items: [], + facets: [], + }, + assets: { + total: 1, + count: 1, + items: [ + { + id: asset1.id, + originalFileName: asset1.originalFileName, + }, + ], + facets: [], + }, + }); + }); + }); }); diff --git a/server/src/infra/entities/asset.entity.ts b/server/src/infra/entities/asset.entity.ts index 07de006858..ea1ed123f1 100644 --- a/server/src/infra/entities/asset.entity.ts +++ b/server/src/infra/entities/asset.entity.ts @@ -127,6 +127,7 @@ export class AssetEntity { livePhotoVideoId!: string | null; @Column({ type: 'varchar' }) + @Index() originalFileName!: string; @Column({ type: 'varchar', nullable: true }) diff --git a/server/src/infra/migrations/1705306747072-AddOriginalFileNameIndex.ts b/server/src/infra/migrations/1705306747072-AddOriginalFileNameIndex.ts new file mode 100644 index 0000000000..b465d42943 --- /dev/null +++ b/server/src/infra/migrations/1705306747072-AddOriginalFileNameIndex.ts @@ -0,0 +1,13 @@ +import { MigrationInterface, QueryRunner } from 'typeorm'; + +export class AddOriginalFileNameIndex1705306747072 implements MigrationInterface { + name = 'AddOriginalFileNameIndex1705306747072'; + + public async up(queryRunner: QueryRunner): Promise { + await queryRunner.query(`CREATE INDEX "IDX_4d66e76dada1ca180f67a205dc" ON "assets" ("originalFileName") `); + } + + public async down(queryRunner: QueryRunner): Promise { + await queryRunner.query(`DROP INDEX "public"."IDX_4d66e76dada1ca180f67a205dc"`); + } +} diff --git a/server/src/infra/repositories/asset.repository.ts b/server/src/infra/repositories/asset.repository.ts index 6e18048716..edff22e4a7 100644 --- a/server/src/infra/repositories/asset.repository.ts +++ b/server/src/infra/repositories/asset.repository.ts @@ -24,7 +24,8 @@ import { Injectable } from '@nestjs/common'; import { InjectRepository } from '@nestjs/typeorm'; import _ from 'lodash'; import { DateTime } from 'luxon'; -import { And, FindOptionsRelations, FindOptionsWhere, In, IsNull, LessThan, Not, Repository } from 'typeorm'; +import path from 'path'; +import { And, Brackets, FindOptionsRelations, FindOptionsWhere, In, IsNull, LessThan, Not, Repository } from 'typeorm'; import { AssetEntity, AssetJobStatusEntity, AssetType, ExifEntity, SmartInfoEntity } from '../entities'; import { DummyValue, GenerateSql } from '../infra.util'; import { Chunked, ChunkedArray, OptionalBetween, paginate } from '../infra.utils'; @@ -820,9 +821,13 @@ export class AssetRepository implements IAssetRepository { .innerJoin('exif', 'e', 'asset."id" = e."assetId"') .leftJoin('smart_info', 'si', 'si."assetId" = asset."id"') .andWhere( - `(e."exifTextSearchableColumn" || COALESCE(si."smartInfoTextSearchableColumn", to_tsvector('english', ''))) - @@ PLAINTO_TSQUERY('english', :query)`, - { query }, + new Brackets((qb) => { + qb.where( + `(e."exifTextSearchableColumn" || COALESCE(si."smartInfoTextSearchableColumn", to_tsvector('english', ''))) + @@ PLAINTO_TSQUERY('english', :query)`, + { query }, + ).orWhere('asset."originalFileName" = :path', { path: path.parse(query).name }); + }), ) .addOrderBy('asset.fileCreatedAt', 'DESC') .limit(numResults) diff --git a/server/src/infra/sql/asset.repository.sql b/server/src/infra/sql/asset.repository.sql index f36ec07645..16e13208ea 100644 --- a/server/src/infra/sql/asset.repository.sql +++ b/server/src/infra/sql/asset.repository.sql @@ -765,11 +765,14 @@ WHERE AND "asset"."ownerId" IN ($1) AND "asset"."isArchived" = $2 AND ( - e."exifTextSearchableColumn" || COALESCE( - si."smartInfoTextSearchableColumn", - to_tsvector('english', '') - ) - ) @@ PLAINTO_TSQUERY('english', $3) + ( + e."exifTextSearchableColumn" || COALESCE( + si."smartInfoTextSearchableColumn", + to_tsvector('english', '') + ) + ) @@ PLAINTO_TSQUERY('english', $3) + OR asset."originalFileName" = $4 + ) ) AND ("asset"."deletedAt" IS NULL) ORDER BY