From 9033e7f1790a3d9c2dac83be356c669d80cfd2b6 Mon Sep 17 00:00:00 2001 From: Jason Rasmussen Date: Mon, 9 Oct 2023 08:25:26 -0400 Subject: [PATCH] refactor(server): filesystem crawl (#4395) Co-authored-by: Jonathan Jogenfors --- .../repositories/filesystem.provider.spec.ts | 392 +++++++++--------- .../infra/repositories/filesystem.provider.ts | 25 +- 2 files changed, 205 insertions(+), 212 deletions(-) diff --git a/server/src/infra/repositories/filesystem.provider.spec.ts b/server/src/infra/repositories/filesystem.provider.spec.ts index 35c12e39af..d8f6077bf3 100644 --- a/server/src/infra/repositories/filesystem.provider.spec.ts +++ b/server/src/infra/repositories/filesystem.provider.spec.ts @@ -2,208 +2,204 @@ import { CrawlOptionsDto } from '@app/domain'; import mockfs from 'mock-fs'; import { FilesystemProvider } from './filesystem.provider'; +interface Test { + test: string; + options: CrawlOptionsDto; + files: Record; +} + +const cwd = process.cwd(); + +const tests: Test[] = [ + { + test: 'should return empty when crawling an empty path list', + options: { + pathsToCrawl: [], + }, + files: {}, + }, + { + test: 'should crawl a single path', + options: { + pathsToCrawl: ['/photos/'], + }, + files: { + '/photos/image.jpg': true, + }, + }, + { + test: 'should exclude by file extension', + options: { + pathsToCrawl: ['/photos/'], + exclusionPatterns: ['**/*.tif'], + }, + files: { + '/photos/image.jpg': true, + '/photos/image.tif': false, + }, + }, + { + test: 'should exclude by file extension without case sensitivity', + options: { + pathsToCrawl: ['/photos/'], + exclusionPatterns: ['**/*.TIF'], + }, + files: { + '/photos/image.jpg': true, + '/photos/image.tif': false, + }, + }, + { + test: 'should exclude by folder', + options: { + pathsToCrawl: ['/photos/'], + exclusionPatterns: ['**/raw/**'], + }, + files: { + '/photos/image.jpg': true, + '/photos/raw/image.jpg': false, + '/photos/raw2/image.jpg': true, + '/photos/folder/raw/image.jpg': false, + '/photos/crawl/image.jpg': true, + }, + }, + { + test: 'should crawl multiple paths', + options: { + pathsToCrawl: ['/photos/', '/images/', '/albums/'], + }, + files: { + '/photos/image1.jpg': true, + '/images/image2.jpg': true, + '/albums/image3.jpg': true, + }, + }, + { + test: 'should support globbing paths', + options: { + pathsToCrawl: ['/photos*'], + }, + files: { + '/photos1/image1.jpg': true, + '/photos2/image2.jpg': true, + '/images/image3.jpg': false, + }, + }, + { + test: 'should crawl a single path without trailing slash', + options: { + pathsToCrawl: ['/photos'], + }, + files: { + '/photos/image.jpg': true, + }, + }, + { + test: 'should crawl a single path', + options: { + pathsToCrawl: ['/photos/'], + }, + files: { + '/photos/image.jpg': true, + '/photos/subfolder/image1.jpg': true, + '/photos/subfolder/image2.jpg': true, + '/image1.jpg': false, + }, + }, + { + test: 'should filter file extensions', + options: { + pathsToCrawl: ['/photos/'], + }, + files: { + '/photos/image.jpg': true, + '/photos/image.txt': false, + '/photos/1': false, + }, + }, + { + test: 'should include photo and video extensions', + options: { + pathsToCrawl: ['/photos/', '/videos/'], + }, + files: { + '/photos/image.jpg': true, + '/photos/image.jpeg': true, + '/photos/image.heic': true, + '/photos/image.heif': true, + '/photos/image.png': true, + '/photos/image.gif': true, + '/photos/image.tif': true, + '/photos/image.tiff': true, + '/photos/image.webp': true, + '/photos/image.dng': true, + '/photos/image.nef': true, + '/videos/video.mp4': true, + '/videos/video.mov': true, + '/videos/video.webm': true, + }, + }, + { + test: 'should check file extensions without case sensitivity', + options: { + pathsToCrawl: ['/photos/'], + }, + files: { + '/photos/image.jpg': true, + '/photos/image.Jpg': true, + '/photos/image.jpG': true, + '/photos/image.JPG': true, + '/photos/image.jpEg': true, + '/photos/image.TIFF': true, + '/photos/image.tif': true, + '/photos/image.dng': true, + '/photos/image.NEF': true, + }, + }, + { + test: 'should normalize the path', + options: { + pathsToCrawl: ['/photos/1/../2'], + }, + files: { + '/photos/1/image.jpg': false, + '/photos/2/image.jpg': true, + }, + }, + { + test: 'should return absolute paths', + options: { + pathsToCrawl: ['photos'], + }, + files: { + [`${cwd}/photos/1.jpg`]: true, + [`${cwd}/photos/2.jpg`]: true, + [`/photos/3.jpg`]: false, + }, + }, +]; + describe(FilesystemProvider.name, () => { - const sut: FilesystemProvider = new FilesystemProvider(); + const sut = new FilesystemProvider(); + + console.log(process.cwd()); + + afterEach(() => { + mockfs.restore(); + }); describe('crawl', () => { - it('should return empty wnen crawling an empty path list', async () => { - const options = new CrawlOptionsDto(); - options.pathsToCrawl = []; - const paths: string[] = await sut.crawl(options); - expect(paths).toHaveLength(0); - }); + for (const { test, options, files } of tests) { + it(test, async () => { + mockfs(Object.fromEntries(Object.keys(files).map((file) => [file, '']))); - it('should crawl a single path', async () => { - mockfs({ - '/photos/image.jpg': '', + const actual = await sut.crawl(options); + const expected = Object.entries(files) + .filter((entry) => entry[1]) + .map(([file]) => file); + + expect(actual.sort()).toEqual(expected.sort()); }); - - const options = new CrawlOptionsDto(); - options.pathsToCrawl = ['/photos/']; - const paths: string[] = await sut.crawl(options); - expect(paths.sort()).toEqual(['/photos/image.jpg'].sort()); - }); - - it('should exclude by file extension', async () => { - mockfs({ - '/photos/image.jpg': '', - '/photos/image.tif': '', - }); - - const options = new CrawlOptionsDto(); - options.pathsToCrawl = ['/photos/']; - options.exclusionPatterns = ['**/*.tif']; - const paths: string[] = await sut.crawl(options); - expect(paths.sort()).toEqual(['/photos/image.jpg'].sort()); - }); - - it('should exclude by file extension without case sensitivity', async () => { - mockfs({ - '/photos/image.jpg': '', - '/photos/image.tif': '', - }); - - const options = new CrawlOptionsDto(); - options.pathsToCrawl = ['/photos/']; - options.exclusionPatterns = ['**/*.TIF']; - const paths: string[] = await sut.crawl(options); - expect(paths.sort()).toEqual(['/photos/image.jpg'].sort()); - }); - - it('should exclude by folder', async () => { - mockfs({ - '/photos/image.jpg': '', - '/photos/raw/image.jpg': '', - '/photos/raw2/image.jpg': '', - '/photos/folder/raw/image.jpg': '', - '/photos/crawl/image.jpg': '', - }); - - const options = new CrawlOptionsDto(); - options.pathsToCrawl = ['/photos/']; - options.exclusionPatterns = ['**/raw/**']; - const paths: string[] = await sut.crawl(options); - expect(paths.sort()).toEqual(['/photos/image.jpg', '/photos/raw2/image.jpg', '/photos/crawl/image.jpg'].sort()); - }); - - it('should crawl multiple paths', async () => { - mockfs({ - '/photos/image1.jpg': '', - '/images/image2.jpg': '', - '/albums/image3.jpg': '', - }); - const options = new CrawlOptionsDto(); - options.pathsToCrawl = ['/photos/', '/images/', '/albums/']; - const paths: string[] = await sut.crawl(options); - expect(paths.sort()).toEqual(['/photos/image1.jpg', '/images/image2.jpg', '/albums/image3.jpg'].sort()); - }); - - it('should support globbing paths', async () => { - mockfs({ - '/photos1/image1.jpg': '', - '/photos2/image2.jpg': '', - '/images/image3.jpg': '', - }); - const options = new CrawlOptionsDto(); - options.pathsToCrawl = ['/photos*']; - const paths: string[] = await sut.crawl(options); - expect(paths.sort()).toEqual(['/photos1/image1.jpg', '/photos2/image2.jpg'].sort()); - }); - - it('should crawl a single path without trailing slash', async () => { - mockfs({ - '/photos/image.jpg': '', - }); - const options = new CrawlOptionsDto(); - options.pathsToCrawl = ['/photos']; - const paths: string[] = await sut.crawl(options); - expect(paths.sort()).toEqual(['/photos/image.jpg'].sort()); - }); - - // TODO: test for hidden paths (not yet implemented) - - it('should crawl a single path', async () => { - mockfs({ - '/photos/image.jpg': '', - '/photos/subfolder/image1.jpg': '', - '/photos/subfolder/image2.jpg': '', - '/image1.jpg': '', - }); - const options = new CrawlOptionsDto(); - options.pathsToCrawl = ['/photos/']; - const paths: string[] = await sut.crawl(options); - expect(paths.sort()).toEqual( - ['/photos/image.jpg', '/photos/subfolder/image1.jpg', '/photos/subfolder/image2.jpg'].sort(), - ); - }); - - it('should filter file extensions', async () => { - mockfs({ - '/photos/image.jpg': '', - '/photos/image.txt': '', - '/photos/1': '', - }); - const options = new CrawlOptionsDto(); - options.pathsToCrawl = ['/photos/']; - const paths: string[] = await sut.crawl(options); - expect(paths.sort()).toEqual(['/photos/image.jpg'].sort()); - }); - - it('should include photo and video extensions', async () => { - mockfs({ - '/photos/image.jpg': '', - '/photos/image.jpeg': '', - '/photos/image.heic': '', - '/photos/image.heif': '', - '/photos/image.png': '', - '/photos/image.gif': '', - '/photos/image.tif': '', - '/photos/image.tiff': '', - '/photos/image.webp': '', - '/photos/image.dng': '', - '/photos/image.nef': '', - '/videos/video.mp4': '', - '/videos/video.mov': '', - '/videos/video.webm': '', - }); - - const options = new CrawlOptionsDto(); - options.pathsToCrawl = ['/photos/', '/videos/']; - const paths: string[] = await sut.crawl(options); - - expect(paths.sort()).toEqual( - [ - '/photos/image.jpg', - '/photos/image.jpeg', - '/photos/image.heic', - '/photos/image.heif', - '/photos/image.png', - '/photos/image.gif', - '/photos/image.tif', - '/photos/image.tiff', - '/photos/image.webp', - '/photos/image.dng', - '/photos/image.nef', - '/videos/video.mp4', - '/videos/video.mov', - '/videos/video.webm', - ].sort(), - ); - }); - - it('should check file extensions without case sensitivity', async () => { - mockfs({ - '/photos/image.jpg': '', - '/photos/image.Jpg': '', - '/photos/image.jpG': '', - '/photos/image.JPG': '', - '/photos/image.jpEg': '', - '/photos/image.TIFF': '', - '/photos/image.tif': '', - '/photos/image.dng': '', - '/photos/image.NEF': '', - }); - - const options = new CrawlOptionsDto(); - options.pathsToCrawl = ['/photos/']; - const paths: string[] = await sut.crawl(options); - expect(paths.sort()).toEqual( - [ - '/photos/image.jpg', - '/photos/image.Jpg', - '/photos/image.jpG', - '/photos/image.JPG', - '/photos/image.jpEg', - '/photos/image.TIFF', - '/photos/image.tif', - '/photos/image.dng', - '/photos/image.NEF', - ].sort(), - ); - }); - - afterEach(() => { - mockfs.restore(); - }); + } }); }); diff --git a/server/src/infra/repositories/filesystem.provider.ts b/server/src/infra/repositories/filesystem.provider.ts index d734286044..25ad0288dd 100644 --- a/server/src/infra/repositories/filesystem.provider.ts +++ b/server/src/infra/repositories/filesystem.provider.ts @@ -111,24 +111,21 @@ export class FilesystemProvider implements IStorageRepository { }; } - async crawl(crawlOptions: CrawlOptionsDto): Promise { - const pathsToCrawl = crawlOptions.pathsToCrawl; - - let paths: string; + crawl(crawlOptions: CrawlOptionsDto): Promise { + const { pathsToCrawl, exclusionPatterns } = crawlOptions; if (!pathsToCrawl) { - // No paths to crawl, return empty list - return []; - } else if (pathsToCrawl.length === 1) { - paths = pathsToCrawl[0]; - } else { - paths = '{' + pathsToCrawl.join(',') + '}'; + return Promise.resolve([]); } - paths = paths + '/**/*{' + mimeTypes.getSupportedFileExtensions().join(',') + '}'; + const base = pathsToCrawl.length === 1 ? pathsToCrawl[0] : `{${pathsToCrawl.join(',')}}`; + const extensions = `*{${mimeTypes.getSupportedFileExtensions().join(',')}}`; - return (await glob(paths, { nocase: true, nodir: true, ignore: crawlOptions.exclusionPatterns })).map((assetPath) => - path.normalize(assetPath), - ); + return glob(`${base}/**/${extensions}`, { + absolute: true, + nocase: true, + nodir: true, + ignore: exclusionPatterns, + }); } readdir = readdir;