mirror of
https://github.com/immich-app/immich.git
synced 2025-01-01 08:31:59 +00:00
refactor(cli): crawl service (#8190)
This commit is contained in:
parent
a56cf35d8c
commit
db744f500b
4 changed files with 147 additions and 157 deletions
|
@ -12,11 +12,10 @@ import cliProgress from 'cli-progress';
|
|||
import { chunk, zip } from 'lodash-es';
|
||||
import { createHash } from 'node:crypto';
|
||||
import fs, { createReadStream } from 'node:fs';
|
||||
import { access, constants, stat, unlink } from 'node:fs/promises';
|
||||
import { access, constants, lstat, stat, unlink } from 'node:fs/promises';
|
||||
import os from 'node:os';
|
||||
import path, { basename } from 'node:path';
|
||||
import { CrawlService } from 'src/services/crawl.service';
|
||||
import { BaseOptions, authenticate } from 'src/utils';
|
||||
import { BaseOptions, authenticate, crawl } from 'src/utils';
|
||||
|
||||
const zipDefined = zip as <T, U>(a: T[], b: U[]) => [T, U][];
|
||||
|
||||
|
@ -115,7 +114,7 @@ class Asset {
|
|||
return unlink(this.path);
|
||||
}
|
||||
|
||||
public async hash(): Promise<string> {
|
||||
async hash(): Promise<string> {
|
||||
const sha1 = (filePath: string) => {
|
||||
const hash = createHash('sha1');
|
||||
return new Promise<string>((resolve, reject) => {
|
||||
|
@ -134,40 +133,60 @@ class Asset {
|
|||
}
|
||||
}
|
||||
|
||||
class UploadOptionsDto {
|
||||
recursive? = false;
|
||||
exclusionPatterns?: string[] = [];
|
||||
dryRun? = false;
|
||||
skipHash? = false;
|
||||
delete? = false;
|
||||
album? = false;
|
||||
albumName? = '';
|
||||
includeHidden? = false;
|
||||
concurrency? = 4;
|
||||
interface UploadOptionsDto {
|
||||
recursive?: boolean;
|
||||
exclusionPatterns?: string[];
|
||||
dryRun?: boolean;
|
||||
skipHash?: boolean;
|
||||
delete?: boolean;
|
||||
album?: boolean;
|
||||
albumName?: string;
|
||||
includeHidden?: boolean;
|
||||
concurrency: number;
|
||||
}
|
||||
|
||||
export const upload = (paths: string[], baseOptions: BaseOptions, uploadOptions: UploadOptionsDto) =>
|
||||
new UploadCommand().run(paths, baseOptions, uploadOptions);
|
||||
export const upload = async (paths: string[], baseOptions: BaseOptions, uploadOptions: UploadOptionsDto) => {
|
||||
await authenticate(baseOptions);
|
||||
|
||||
console.log('Crawling for assets...');
|
||||
|
||||
const inputFiles: string[] = [];
|
||||
for (const pathArgument of paths) {
|
||||
const fileStat = await lstat(pathArgument);
|
||||
if (fileStat.isFile()) {
|
||||
inputFiles.push(pathArgument);
|
||||
}
|
||||
}
|
||||
|
||||
const { image, video } = await getSupportedMediaTypes();
|
||||
const files = await crawl({
|
||||
pathsToCrawl: paths,
|
||||
recursive: uploadOptions.recursive,
|
||||
exclusionPatterns: uploadOptions.exclusionPatterns,
|
||||
includeHidden: uploadOptions.includeHidden,
|
||||
extensions: [...image, ...video],
|
||||
});
|
||||
|
||||
files.push(...inputFiles);
|
||||
|
||||
if (files.length === 0) {
|
||||
console.log('No assets found, exiting');
|
||||
return;
|
||||
}
|
||||
|
||||
return new UploadCommand().run(files, uploadOptions);
|
||||
};
|
||||
|
||||
// TODO refactor this
|
||||
class UploadCommand {
|
||||
public async run(paths: string[], baseOptions: BaseOptions, options: UploadOptionsDto): Promise<void> {
|
||||
await authenticate(baseOptions);
|
||||
|
||||
console.log('Crawling for assets...');
|
||||
const files = await this.getFiles(paths, options);
|
||||
|
||||
if (files.length === 0) {
|
||||
console.log('No assets found, exiting');
|
||||
return;
|
||||
}
|
||||
|
||||
async run(files: string[], options: UploadOptionsDto): Promise<void> {
|
||||
const { concurrency, dryRun } = options;
|
||||
const assetsToCheck = files.map((path) => new Asset(path));
|
||||
|
||||
const { newAssets, duplicateAssets } = await this.checkAssets(assetsToCheck, options.concurrency ?? 4);
|
||||
const { newAssets, duplicateAssets } = await this.checkAssets(assetsToCheck, concurrency);
|
||||
|
||||
const totalSizeUploaded = await this.upload(newAssets, options);
|
||||
const messageStart = options.dryRun ? 'Would have' : 'Successfully';
|
||||
const messageStart = dryRun ? 'Would have' : 'Successfully';
|
||||
if (newAssets.length === 0) {
|
||||
console.log('All assets were already uploaded, nothing to do.');
|
||||
} else {
|
||||
|
@ -189,7 +208,7 @@ class UploadCommand {
|
|||
return;
|
||||
}
|
||||
|
||||
if (options.dryRun) {
|
||||
if (dryRun) {
|
||||
console.log(`Would now have deleted assets, but skipped due to dry run`);
|
||||
return;
|
||||
}
|
||||
|
@ -199,7 +218,7 @@ class UploadCommand {
|
|||
await this.deleteAssets(newAssets, options);
|
||||
}
|
||||
|
||||
public async checkAssets(
|
||||
async checkAssets(
|
||||
assetsToCheck: Asset[],
|
||||
concurrency: number,
|
||||
): Promise<{ newAssets: Asset[]; duplicateAssets: Asset[]; rejectedAssets: Asset[] }> {
|
||||
|
@ -237,7 +256,7 @@ class UploadCommand {
|
|||
return { newAssets, duplicateAssets, rejectedAssets };
|
||||
}
|
||||
|
||||
public async upload(assetsToUpload: Asset[], options: UploadOptionsDto): Promise<number> {
|
||||
async upload(assetsToUpload: Asset[], { dryRun, concurrency }: UploadOptionsDto): Promise<number> {
|
||||
let totalSize = 0;
|
||||
|
||||
// Compute total size first
|
||||
|
@ -245,7 +264,7 @@ class UploadCommand {
|
|||
totalSize += asset.fileSize ?? 0;
|
||||
}
|
||||
|
||||
if (options.dryRun) {
|
||||
if (dryRun) {
|
||||
return totalSize;
|
||||
}
|
||||
|
||||
|
@ -260,7 +279,7 @@ class UploadCommand {
|
|||
|
||||
let totalSizeUploaded = 0;
|
||||
try {
|
||||
for (const assets of chunk(assetsToUpload, options.concurrency)) {
|
||||
for (const assets of chunk(assetsToUpload, concurrency)) {
|
||||
const ids = await this.uploadAssets(assets);
|
||||
for (const [asset, id] of zipDefined(assets, ids)) {
|
||||
asset.id = id;
|
||||
|
@ -279,42 +298,21 @@ class UploadCommand {
|
|||
return totalSizeUploaded;
|
||||
}
|
||||
|
||||
public async getFiles(paths: string[], options: UploadOptionsDto): Promise<string[]> {
|
||||
const inputFiles: string[] = [];
|
||||
for (const pathArgument of paths) {
|
||||
const fileStat = await fs.promises.lstat(pathArgument);
|
||||
if (fileStat.isFile()) {
|
||||
inputFiles.push(pathArgument);
|
||||
}
|
||||
}
|
||||
|
||||
const files: string[] = await this.crawl(paths, options);
|
||||
files.push(...inputFiles);
|
||||
return files;
|
||||
}
|
||||
|
||||
public async getAlbums(): Promise<Map<string, string>> {
|
||||
const existingAlbums = await getAllAlbums({});
|
||||
|
||||
const albumMapping = new Map<string, string>();
|
||||
for (const album of existingAlbums) {
|
||||
albumMapping.set(album.albumName, album.id);
|
||||
}
|
||||
|
||||
return albumMapping;
|
||||
}
|
||||
|
||||
public async updateAlbums(
|
||||
async updateAlbums(
|
||||
assets: Asset[],
|
||||
options: UploadOptionsDto,
|
||||
): Promise<{ createdAlbumCount: number; updatedAssetCount: number }> {
|
||||
const { dryRun, concurrency } = options;
|
||||
|
||||
if (options.albumName) {
|
||||
for (const asset of assets) {
|
||||
asset.albumName = options.albumName;
|
||||
}
|
||||
}
|
||||
|
||||
const existingAlbums = await this.getAlbums();
|
||||
const albums = await getAllAlbums({});
|
||||
const existingAlbums = new Map(albums.map((album) => [album.albumName, album.id]));
|
||||
|
||||
const assetsToUpdate = assets.filter(
|
||||
(asset): asset is Asset & { albumName: string; id: string } => !!(asset.albumName && asset.id),
|
||||
);
|
||||
|
@ -328,7 +326,7 @@ class UploadCommand {
|
|||
|
||||
const newAlbums = [...newAlbumsSet];
|
||||
|
||||
if (options.dryRun) {
|
||||
if (dryRun) {
|
||||
return { createdAlbumCount: newAlbums.length, updatedAssetCount: assetsToUpdate.length };
|
||||
}
|
||||
|
||||
|
@ -341,7 +339,7 @@ class UploadCommand {
|
|||
albumCreationProgress.start(newAlbums.length, 0);
|
||||
|
||||
try {
|
||||
for (const albumNames of chunk(newAlbums, options.concurrency)) {
|
||||
for (const albumNames of chunk(newAlbums, concurrency)) {
|
||||
const newAlbumIds = await Promise.all(
|
||||
albumNames.map((albumName: string) => createAlbum({ createAlbumDto: { albumName } }).then((r) => r.id)),
|
||||
);
|
||||
|
@ -377,7 +375,7 @@ class UploadCommand {
|
|||
|
||||
try {
|
||||
for (const [albumId, assets] of albumToAssets.entries()) {
|
||||
for (const assetBatch of chunk(assets, Math.min(1000 * (options.concurrency ?? 4), 65_000))) {
|
||||
for (const assetBatch of chunk(assets, Math.min(1000 * concurrency, 65_000))) {
|
||||
await addAssetsToAlbum({ id: albumId, bulkIdsDto: { ids: assetBatch } });
|
||||
albumUpdateProgress.increment(assetBatch.length);
|
||||
}
|
||||
|
@ -389,7 +387,7 @@ class UploadCommand {
|
|||
return { createdAlbumCount: newAlbums.length, updatedAssetCount: assetsToUpdate.length };
|
||||
}
|
||||
|
||||
public async deleteAssets(assets: Asset[], options: UploadOptionsDto): Promise<void> {
|
||||
async deleteAssets(assets: Asset[], options: UploadOptionsDto): Promise<void> {
|
||||
const deletionProgress = new cliProgress.SingleBar(
|
||||
{
|
||||
format: 'Deleting local assets | {bar} | {percentage}% | ETA: {eta}s | {value}/{total} assets',
|
||||
|
@ -444,18 +442,6 @@ class UploadCommand {
|
|||
return results.map((response) => response.id);
|
||||
}
|
||||
|
||||
private async crawl(paths: string[], options: UploadOptionsDto): Promise<string[]> {
|
||||
const formatResponse = await getSupportedMediaTypes();
|
||||
const crawlService = new CrawlService(formatResponse.image, formatResponse.video);
|
||||
|
||||
return crawlService.crawl({
|
||||
pathsToCrawl: paths,
|
||||
recursive: options.recursive,
|
||||
exclusionPatterns: options.exclusionPatterns,
|
||||
includeHidden: options.includeHidden,
|
||||
});
|
||||
}
|
||||
|
||||
private async uploadAsset(data: FormData): Promise<{ id: string }> {
|
||||
const { baseUrl, headers } = defaults;
|
||||
|
||||
|
|
|
@ -1,70 +0,0 @@
|
|||
import { glob } from 'glob';
|
||||
import * as fs from 'node:fs';
|
||||
|
||||
export class CrawlOptions {
|
||||
pathsToCrawl!: string[];
|
||||
recursive? = false;
|
||||
includeHidden? = false;
|
||||
exclusionPatterns?: string[];
|
||||
}
|
||||
|
||||
export class CrawlService {
|
||||
private readonly extensions!: string[];
|
||||
|
||||
constructor(image: string[], video: string[]) {
|
||||
this.extensions = [...image, ...video].map((extension) => extension.replace('.', ''));
|
||||
}
|
||||
|
||||
async crawl(options: CrawlOptions): Promise<string[]> {
|
||||
const { recursive, pathsToCrawl, exclusionPatterns, includeHidden } = options;
|
||||
|
||||
if (!pathsToCrawl) {
|
||||
return [];
|
||||
}
|
||||
|
||||
const patterns: string[] = [];
|
||||
const crawledFiles: string[] = [];
|
||||
|
||||
for await (const currentPath of pathsToCrawl) {
|
||||
try {
|
||||
const stats = await fs.promises.stat(currentPath);
|
||||
if (stats.isFile() || stats.isSymbolicLink()) {
|
||||
crawledFiles.push(currentPath);
|
||||
} else {
|
||||
patterns.push(currentPath);
|
||||
}
|
||||
} catch (error: any) {
|
||||
if (error.code === 'ENOENT') {
|
||||
patterns.push(currentPath);
|
||||
} else {
|
||||
throw error;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
let searchPattern: string;
|
||||
if (patterns.length === 1) {
|
||||
searchPattern = patterns[0];
|
||||
} else if (patterns.length === 0) {
|
||||
return crawledFiles;
|
||||
} else {
|
||||
searchPattern = '{' + patterns.join(',') + '}';
|
||||
}
|
||||
|
||||
if (recursive) {
|
||||
searchPattern = searchPattern + '/**/';
|
||||
}
|
||||
|
||||
searchPattern = `${searchPattern}/*.{${this.extensions.join(',')}}`;
|
||||
|
||||
const globbedFiles = await glob(searchPattern, {
|
||||
absolute: true,
|
||||
nocase: true,
|
||||
nodir: true,
|
||||
dot: includeHidden,
|
||||
ignore: exclusionPatterns,
|
||||
});
|
||||
|
||||
return [...crawledFiles, ...globbedFiles].sort();
|
||||
}
|
||||
}
|
|
@ -1,14 +1,31 @@
|
|||
import mockfs from 'mock-fs';
|
||||
import { CrawlOptions, CrawlService } from './crawl.service';
|
||||
import { CrawlOptions, crawl } from 'src/utils';
|
||||
|
||||
interface Test {
|
||||
test: string;
|
||||
options: CrawlOptions;
|
||||
options: Omit<CrawlOptions, 'extensions'>;
|
||||
files: Record<string, boolean>;
|
||||
}
|
||||
|
||||
const cwd = process.cwd();
|
||||
|
||||
const extensions = [
|
||||
'.jpg',
|
||||
'.jpeg',
|
||||
'.png',
|
||||
'.heif',
|
||||
'.heic',
|
||||
'.tif',
|
||||
'.nef',
|
||||
'.webp',
|
||||
'.tiff',
|
||||
'.dng',
|
||||
'.gif',
|
||||
'.mov',
|
||||
'.mp4',
|
||||
'.webm',
|
||||
];
|
||||
|
||||
const tests: Test[] = [
|
||||
{
|
||||
test: 'should return empty when crawling an empty path list',
|
||||
|
@ -251,12 +268,7 @@ const tests: Test[] = [
|
|||
},
|
||||
];
|
||||
|
||||
describe(CrawlService.name, () => {
|
||||
const sut = new CrawlService(
|
||||
['.jpg', '.jpeg', '.png', '.heif', '.heic', '.tif', '.nef', '.webp', '.tiff', '.dng', '.gif'],
|
||||
['.mov', '.mp4', '.webm'],
|
||||
);
|
||||
|
||||
describe('crawl', () => {
|
||||
afterEach(() => {
|
||||
mockfs.restore();
|
||||
});
|
||||
|
@ -266,7 +278,7 @@ describe(CrawlService.name, () => {
|
|||
it(test, async () => {
|
||||
mockfs(Object.fromEntries(Object.keys(files).map((file) => [file, ''])));
|
||||
|
||||
const actual = await sut.crawl(options);
|
||||
const actual = await crawl({ ...options, extensions });
|
||||
const expected = Object.entries(files)
|
||||
.filter((entry) => entry[1])
|
||||
.map(([file]) => file);
|
|
@ -1,5 +1,6 @@
|
|||
import { defaults, getMyUserInfo, isHttpError } from '@immich/sdk';
|
||||
import { readFile, writeFile } from 'node:fs/promises';
|
||||
import { glob } from 'glob';
|
||||
import { readFile, stat, writeFile } from 'node:fs/promises';
|
||||
import { join } from 'node:path';
|
||||
import yaml from 'yaml';
|
||||
|
||||
|
@ -87,3 +88,64 @@ export const withError = async <T>(promise: Promise<T>): Promise<[Error, undefin
|
|||
return [error, undefined];
|
||||
}
|
||||
};
|
||||
|
||||
export interface CrawlOptions {
|
||||
pathsToCrawl: string[];
|
||||
recursive?: boolean;
|
||||
includeHidden?: boolean;
|
||||
exclusionPatterns?: string[];
|
||||
extensions: string[];
|
||||
}
|
||||
export const crawl = async (options: CrawlOptions): Promise<string[]> => {
|
||||
const { extensions: extensionsWithPeriod, recursive, pathsToCrawl, exclusionPatterns, includeHidden } = options;
|
||||
const extensions = extensionsWithPeriod.map((extension) => extension.replace('.', ''));
|
||||
|
||||
if (!pathsToCrawl) {
|
||||
return [];
|
||||
}
|
||||
|
||||
const patterns: string[] = [];
|
||||
const crawledFiles: string[] = [];
|
||||
|
||||
for await (const currentPath of pathsToCrawl) {
|
||||
try {
|
||||
const stats = await stat(currentPath);
|
||||
if (stats.isFile() || stats.isSymbolicLink()) {
|
||||
crawledFiles.push(currentPath);
|
||||
} else {
|
||||
patterns.push(currentPath);
|
||||
}
|
||||
} catch (error: any) {
|
||||
if (error.code === 'ENOENT') {
|
||||
patterns.push(currentPath);
|
||||
} else {
|
||||
throw error;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
let searchPattern: string;
|
||||
if (patterns.length === 1) {
|
||||
searchPattern = patterns[0];
|
||||
} else if (patterns.length === 0) {
|
||||
return crawledFiles;
|
||||
} else {
|
||||
searchPattern = '{' + patterns.join(',') + '}';
|
||||
}
|
||||
|
||||
if (recursive) {
|
||||
searchPattern = searchPattern + '/**/';
|
||||
}
|
||||
|
||||
searchPattern = `${searchPattern}/*.{${extensions.join(',')}}`;
|
||||
|
||||
const globbedFiles = await glob(searchPattern, {
|
||||
absolute: true,
|
||||
nocase: true,
|
||||
nodir: true,
|
||||
dot: includeHidden,
|
||||
ignore: exclusionPatterns,
|
||||
});
|
||||
|
||||
return [...crawledFiles, ...globbedFiles].sort();
|
||||
};
|
||||
|
|
Loading…
Reference in a new issue