1
0
Fork 0
mirror of https://github.com/immich-app/immich.git synced 2025-01-10 05:46:46 +01:00
immich/server/src/services/search.service.ts

276 lines
10 KiB
TypeScript
Raw Normal View History

import { BadRequestException, Inject, Injectable } from '@nestjs/common';
import { SystemConfigCore } from 'src/cores/system-config.core';
import { AssetResponseDto, mapAsset } from 'src/dtos/asset-response.dto';
import { AuthDto } from 'src/dtos/auth.dto';
import { PersonResponseDto } from 'src/dtos/person.dto';
import {
MetadataSearchDto,
PlacesResponseDto,
SearchPeopleDto,
SearchPlacesDto,
SearchResponseDto,
SearchSuggestionRequestDto,
SearchSuggestionType,
SmartSearchDto,
mapPlaces,
} from 'src/dtos/search.dto';
2024-03-20 22:02:51 +01:00
import { AssetOrder } from 'src/entities/album.entity';
import { AssetEntity } from 'src/entities/asset.entity';
feat(server): near-duplicate detection (#8228) * duplicate detection job, entity, config * queueing * job panel, update api * use embedding in db instead of fetching * disable concurrency * only queue visible assets * handle multiple duplicateIds * update concurrent queue check * add provider * add web placeholder, server endpoint, migration, various fixes * update sql * select embedding by default * rename variable * simplify * remove separate entity, handle re-running with different threshold, set default back to 0.02 * fix tests * add tests * add index to entity * formatting * update asset mock * fix `upsertJobStatus` signature * update sql * formatting * default to 0.03 * optimize clustering * use asset's `duplicateId` if present * update sql * update tests * expose admin setting * refactor * formatting * skip if ml is disabled * debug trash e2e * remove from web * remove from sidebar * test if ml is disabled * update sql * separate duplicate detection from clip in config, disable by default for now * fix doc * lower minimum `maxDistance` * update api * Add and Use Duplicate Detection Feature Flag (#9364) * Add Duplicate Detection Flag * Use Duplicate Detection Flag * Attempt Fixes for Failing Checks * lower minimum `maxDistance` * fix tests --------- Co-authored-by: mertalev <101130780+mertalev@users.noreply.github.com> * chore: fixes and additions after rebase * chore: update api (remove new Role enum) * fix: left join smart search so getAll works without machine learning * test: trash e2e go back to checking length of assets is zero * chore: regen api after rebase * test: fix tests after rebase * redundant join --------- Co-authored-by: Nicholas Flamy <30300649+NicholasFlamy@users.noreply.github.com> Co-authored-by: Zack Pollard <zackpollard@ymail.com> Co-authored-by: Zack Pollard <zack@futo.org>
2024-05-16 19:08:37 +02:00
import { IAssetRepository, WithoutProperty } from 'src/interfaces/asset.interface';
import { ICryptoRepository } from 'src/interfaces/crypto.interface';
import {
IBaseJob,
IEntityJob,
IJobRepository,
JOBS_ASSET_PAGINATION_SIZE,
JobName,
JobStatus,
} from 'src/interfaces/job.interface';
import { ILoggerRepository } from 'src/interfaces/logger.interface';
import { IMachineLearningRepository } from 'src/interfaces/machine-learning.interface';
import { IMetadataRepository } from 'src/interfaces/metadata.interface';
import { IPartnerRepository } from 'src/interfaces/partner.interface';
import { IPersonRepository } from 'src/interfaces/person.interface';
feat(server): near-duplicate detection (#8228) * duplicate detection job, entity, config * queueing * job panel, update api * use embedding in db instead of fetching * disable concurrency * only queue visible assets * handle multiple duplicateIds * update concurrent queue check * add provider * add web placeholder, server endpoint, migration, various fixes * update sql * select embedding by default * rename variable * simplify * remove separate entity, handle re-running with different threshold, set default back to 0.02 * fix tests * add tests * add index to entity * formatting * update asset mock * fix `upsertJobStatus` signature * update sql * formatting * default to 0.03 * optimize clustering * use asset's `duplicateId` if present * update sql * update tests * expose admin setting * refactor * formatting * skip if ml is disabled * debug trash e2e * remove from web * remove from sidebar * test if ml is disabled * update sql * separate duplicate detection from clip in config, disable by default for now * fix doc * lower minimum `maxDistance` * update api * Add and Use Duplicate Detection Feature Flag (#9364) * Add Duplicate Detection Flag * Use Duplicate Detection Flag * Attempt Fixes for Failing Checks * lower minimum `maxDistance` * fix tests --------- Co-authored-by: mertalev <101130780+mertalev@users.noreply.github.com> * chore: fixes and additions after rebase * chore: update api (remove new Role enum) * fix: left join smart search so getAll works without machine learning * test: trash e2e go back to checking length of assets is zero * chore: regen api after rebase * test: fix tests after rebase * redundant join --------- Co-authored-by: Nicholas Flamy <30300649+NicholasFlamy@users.noreply.github.com> Co-authored-by: Zack Pollard <zackpollard@ymail.com> Co-authored-by: Zack Pollard <zack@futo.org>
2024-05-16 19:08:37 +02:00
import { AssetDuplicateResult, ISearchRepository, SearchExploreItem } from 'src/interfaces/search.interface';
import { ISystemMetadataRepository } from 'src/interfaces/system-metadata.interface';
feat(server): near-duplicate detection (#8228) * duplicate detection job, entity, config * queueing * job panel, update api * use embedding in db instead of fetching * disable concurrency * only queue visible assets * handle multiple duplicateIds * update concurrent queue check * add provider * add web placeholder, server endpoint, migration, various fixes * update sql * select embedding by default * rename variable * simplify * remove separate entity, handle re-running with different threshold, set default back to 0.02 * fix tests * add tests * add index to entity * formatting * update asset mock * fix `upsertJobStatus` signature * update sql * formatting * default to 0.03 * optimize clustering * use asset's `duplicateId` if present * update sql * update tests * expose admin setting * refactor * formatting * skip if ml is disabled * debug trash e2e * remove from web * remove from sidebar * test if ml is disabled * update sql * separate duplicate detection from clip in config, disable by default for now * fix doc * lower minimum `maxDistance` * update api * Add and Use Duplicate Detection Feature Flag (#9364) * Add Duplicate Detection Flag * Use Duplicate Detection Flag * Attempt Fixes for Failing Checks * lower minimum `maxDistance` * fix tests --------- Co-authored-by: mertalev <101130780+mertalev@users.noreply.github.com> * chore: fixes and additions after rebase * chore: update api (remove new Role enum) * fix: left join smart search so getAll works without machine learning * test: trash e2e go back to checking length of assets is zero * chore: regen api after rebase * test: fix tests after rebase * redundant join --------- Co-authored-by: Nicholas Flamy <30300649+NicholasFlamy@users.noreply.github.com> Co-authored-by: Zack Pollard <zackpollard@ymail.com> Co-authored-by: Zack Pollard <zack@futo.org>
2024-05-16 19:08:37 +02:00
import { isDuplicateDetectionEnabled, isSmartSearchEnabled } from 'src/utils/misc';
import { usePagination } from 'src/utils/pagination';
@Injectable()
export class SearchService {
private configCore: SystemConfigCore;
constructor(
@Inject(ISystemMetadataRepository) systemMetadataRepository: ISystemMetadataRepository,
@Inject(IMachineLearningRepository) private machineLearning: IMachineLearningRepository,
@Inject(IPersonRepository) private personRepository: IPersonRepository,
@Inject(ISearchRepository) private searchRepository: ISearchRepository,
2023-12-08 17:15:46 +01:00
@Inject(IAssetRepository) private assetRepository: IAssetRepository,
@Inject(IPartnerRepository) private partnerRepository: IPartnerRepository,
@Inject(IMetadataRepository) private metadataRepository: IMetadataRepository,
@Inject(ILoggerRepository) private logger: ILoggerRepository,
feat(server): near-duplicate detection (#8228) * duplicate detection job, entity, config * queueing * job panel, update api * use embedding in db instead of fetching * disable concurrency * only queue visible assets * handle multiple duplicateIds * update concurrent queue check * add provider * add web placeholder, server endpoint, migration, various fixes * update sql * select embedding by default * rename variable * simplify * remove separate entity, handle re-running with different threshold, set default back to 0.02 * fix tests * add tests * add index to entity * formatting * update asset mock * fix `upsertJobStatus` signature * update sql * formatting * default to 0.03 * optimize clustering * use asset's `duplicateId` if present * update sql * update tests * expose admin setting * refactor * formatting * skip if ml is disabled * debug trash e2e * remove from web * remove from sidebar * test if ml is disabled * update sql * separate duplicate detection from clip in config, disable by default for now * fix doc * lower minimum `maxDistance` * update api * Add and Use Duplicate Detection Feature Flag (#9364) * Add Duplicate Detection Flag * Use Duplicate Detection Flag * Attempt Fixes for Failing Checks * lower minimum `maxDistance` * fix tests --------- Co-authored-by: mertalev <101130780+mertalev@users.noreply.github.com> * chore: fixes and additions after rebase * chore: update api (remove new Role enum) * fix: left join smart search so getAll works without machine learning * test: trash e2e go back to checking length of assets is zero * chore: regen api after rebase * test: fix tests after rebase * redundant join --------- Co-authored-by: Nicholas Flamy <30300649+NicholasFlamy@users.noreply.github.com> Co-authored-by: Zack Pollard <zackpollard@ymail.com> Co-authored-by: Zack Pollard <zack@futo.org>
2024-05-16 19:08:37 +02:00
@Inject(ICryptoRepository) private cryptoRepository: ICryptoRepository,
@Inject(IJobRepository) private jobRepository: IJobRepository,
) {
this.logger.setContext(SearchService.name);
this.configCore = SystemConfigCore.create(systemMetadataRepository, logger);
}
async searchPerson(auth: AuthDto, dto: SearchPeopleDto): Promise<PersonResponseDto[]> {
return this.personRepository.getByName(auth.user.id, dto.name, { withHidden: dto.withHidden });
}
async searchPlaces(dto: SearchPlacesDto): Promise<PlacesResponseDto[]> {
const places = await this.searchRepository.searchPlaces(dto.name);
return places.map((place) => mapPlaces(place));
}
async getExploreData(auth: AuthDto): Promise<SearchExploreItem<AssetResponseDto>[]> {
2023-12-08 17:15:46 +01:00
const options = { maxFields: 12, minAssetsPerField: 5 };
const results = await Promise.all([
this.assetRepository.getAssetIdByCity(auth.user.id, options),
this.assetRepository.getAssetIdByTag(auth.user.id, options),
2023-12-08 17:15:46 +01:00
]);
const assetIds = new Set<string>(results.flatMap((field) => field.items.map((item) => item.data)));
const assets = await this.assetRepository.getByIdsWithAllRelations([...assetIds]);
2023-12-08 17:15:46 +01:00
const assetMap = new Map<string, AssetResponseDto>(assets.map((asset) => [asset.id, mapAsset(asset)]));
return results.map(({ fieldName, items }) => ({
fieldName,
2023-12-08 17:15:46 +01:00
items: items.map(({ value, data }) => ({ value, data: assetMap.get(data) as AssetResponseDto })),
}));
}
async searchMetadata(auth: AuthDto, dto: MetadataSearchDto): Promise<SearchResponseDto> {
let checksum: Buffer | undefined;
const userIds = await this.getUserIdsToSearch(auth);
if (dto.checksum) {
const encoding = dto.checksum.length === 28 ? 'base64' : 'hex';
checksum = Buffer.from(dto.checksum, encoding);
}
dto.previewPath ??= dto.resizePath;
dto.thumbnailPath ??= dto.webpPath;
const page = dto.page ?? 1;
const size = dto.size || 250;
const enumToOrder = { [AssetOrder.ASC]: 'ASC', [AssetOrder.DESC]: 'DESC' } as const;
const { hasNextPage, items } = await this.searchRepository.searchMetadata(
{ page, size },
{
...dto,
checksum,
userIds,
orderDirection: dto.order ? enumToOrder[dto.order] : 'DESC',
},
);
return this.mapResponse(items, hasNextPage ? (page + 1).toString() : null);
}
async searchSmart(auth: AuthDto, dto: SmartSearchDto): Promise<SearchResponseDto> {
const { machineLearning } = await this.configCore.getConfig();
if (!isSmartSearchEnabled(machineLearning)) {
throw new BadRequestException('Smart search is not enabled');
}
const userIds = await this.getUserIdsToSearch(auth);
const embedding = await this.machineLearning.encodeText(
machineLearning.url,
{ text: dto.query },
machineLearning.clip,
);
const page = dto.page ?? 1;
const size = dto.size || 100;
const { hasNextPage, items } = await this.searchRepository.searchSmart(
{ page, size },
{ ...dto, userIds, embedding },
);
return this.mapResponse(items, hasNextPage ? (page + 1).toString() : null);
}
async getAssetsByCity(auth: AuthDto): Promise<AssetResponseDto[]> {
const userIds = await this.getUserIdsToSearch(auth);
const assets = await this.searchRepository.getAssetsByCity(userIds);
return assets.map((asset) => mapAsset(asset));
}
getSearchSuggestions(auth: AuthDto, dto: SearchSuggestionRequestDto): Promise<string[]> {
switch (dto.type) {
case SearchSuggestionType.COUNTRY: {
return this.metadataRepository.getCountries(auth.user.id);
}
case SearchSuggestionType.STATE: {
return this.metadataRepository.getStates(auth.user.id, dto.country);
}
case SearchSuggestionType.CITY: {
return this.metadataRepository.getCities(auth.user.id, dto.country, dto.state);
}
case SearchSuggestionType.CAMERA_MAKE: {
return this.metadataRepository.getCameraMakes(auth.user.id, dto.model);
}
case SearchSuggestionType.CAMERA_MODEL: {
return this.metadataRepository.getCameraModels(auth.user.id, dto.make);
}
}
}
feat(server): near-duplicate detection (#8228) * duplicate detection job, entity, config * queueing * job panel, update api * use embedding in db instead of fetching * disable concurrency * only queue visible assets * handle multiple duplicateIds * update concurrent queue check * add provider * add web placeholder, server endpoint, migration, various fixes * update sql * select embedding by default * rename variable * simplify * remove separate entity, handle re-running with different threshold, set default back to 0.02 * fix tests * add tests * add index to entity * formatting * update asset mock * fix `upsertJobStatus` signature * update sql * formatting * default to 0.03 * optimize clustering * use asset's `duplicateId` if present * update sql * update tests * expose admin setting * refactor * formatting * skip if ml is disabled * debug trash e2e * remove from web * remove from sidebar * test if ml is disabled * update sql * separate duplicate detection from clip in config, disable by default for now * fix doc * lower minimum `maxDistance` * update api * Add and Use Duplicate Detection Feature Flag (#9364) * Add Duplicate Detection Flag * Use Duplicate Detection Flag * Attempt Fixes for Failing Checks * lower minimum `maxDistance` * fix tests --------- Co-authored-by: mertalev <101130780+mertalev@users.noreply.github.com> * chore: fixes and additions after rebase * chore: update api (remove new Role enum) * fix: left join smart search so getAll works without machine learning * test: trash e2e go back to checking length of assets is zero * chore: regen api after rebase * test: fix tests after rebase * redundant join --------- Co-authored-by: Nicholas Flamy <30300649+NicholasFlamy@users.noreply.github.com> Co-authored-by: Zack Pollard <zackpollard@ymail.com> Co-authored-by: Zack Pollard <zack@futo.org>
2024-05-16 19:08:37 +02:00
async handleQueueSearchDuplicates({ force }: IBaseJob): Promise<JobStatus> {
const { machineLearning } = await this.configCore.getConfig();
if (!isDuplicateDetectionEnabled(machineLearning)) {
return JobStatus.SKIPPED;
}
const assetPagination = usePagination(JOBS_ASSET_PAGINATION_SIZE, (pagination) => {
return force
? this.assetRepository.getAll(pagination, { isVisible: true })
: this.assetRepository.getWithout(pagination, WithoutProperty.DUPLICATE);
});
for await (const assets of assetPagination) {
await this.jobRepository.queueAll(
assets.map((asset) => ({ name: JobName.DUPLICATE_DETECTION, data: { id: asset.id } })),
);
}
return JobStatus.SUCCESS;
}
async handleSearchDuplicates({ id }: IEntityJob): Promise<JobStatus> {
const { machineLearning } = await this.configCore.getConfig();
if (!isDuplicateDetectionEnabled(machineLearning)) {
return JobStatus.SKIPPED;
}
const asset = await this.assetRepository.getById(id, { smartSearch: true });
if (!asset) {
this.logger.error(`Asset ${id} not found`);
return JobStatus.FAILED;
}
if (!asset.isVisible) {
this.logger.debug(`Asset ${id} is not visible, skipping`);
return JobStatus.SKIPPED;
}
if (!asset.previewPath) {
this.logger.warn(`Asset ${id} is missing preview image`);
return JobStatus.FAILED;
}
if (!asset.smartSearch?.embedding) {
this.logger.debug(`Asset ${id} is missing embedding`);
return JobStatus.FAILED;
}
const duplicateAssets = await this.searchRepository.searchDuplicates({
assetId: asset.id,
embedding: asset.smartSearch.embedding,
maxDistance: machineLearning.duplicateDetection.maxDistance,
userIds: [asset.ownerId],
});
let assetIds = [asset.id];
if (duplicateAssets.length > 0) {
this.logger.debug(
`Found ${duplicateAssets.length} duplicate${duplicateAssets.length === 1 ? '' : 's'} for asset ${asset.id}`,
);
assetIds = await this.updateDuplicates(asset, duplicateAssets);
} else if (asset.duplicateId) {
this.logger.debug(`No duplicates found for asset ${asset.id}, removing duplicateId`);
await this.assetRepository.update({ id: asset.id, duplicateId: null });
}
const duplicatesDetectedAt = new Date();
await this.assetRepository.upsertJobStatus(...assetIds.map((assetId) => ({ assetId, duplicatesDetectedAt })));
return JobStatus.SUCCESS;
}
private async updateDuplicates(asset: AssetEntity, duplicateAssets: AssetDuplicateResult[]): Promise<string[]> {
const duplicateIds = [
...new Set(
duplicateAssets
.filter((asset): asset is AssetDuplicateResult & { duplicateId: string } => !!asset.duplicateId)
.map((duplicate) => duplicate.duplicateId),
),
];
const targetDuplicateId = asset.duplicateId ?? duplicateIds.shift() ?? this.cryptoRepository.randomUUID();
const assetIdsToUpdate = duplicateAssets
.filter((asset) => asset.duplicateId !== targetDuplicateId)
.map((duplicate) => duplicate.assetId);
assetIdsToUpdate.push(asset.id);
await this.assetRepository.updateDuplicates({ targetDuplicateId, assetIds: assetIdsToUpdate, duplicateIds });
return assetIdsToUpdate;
}
private async getUserIdsToSearch(auth: AuthDto): Promise<string[]> {
const userIds: string[] = [auth.user.id];
const partners = await this.partnerRepository.getAll(auth.user.id);
const partnersIds = partners
.filter((partner) => partner.sharedBy && partner.inTimeline)
.map((partner) => partner.sharedById);
userIds.push(...partnersIds);
return userIds;
}
private mapResponse(assets: AssetEntity[], nextPage: string | null): SearchResponseDto {
return {
albums: { total: 0, count: 0, items: [], facets: [] },
assets: {
total: assets.length,
count: assets.length,
items: assets.map((asset) => mapAsset(asset)),
facets: [],
nextPage,
},
};
}
}