1
0
Fork 0
mirror of https://github.com/immich-app/immich.git synced 2025-01-19 18:26:46 +01:00

refactor(server): filesystem crawl (#4395)

Co-authored-by: Jonathan Jogenfors <jonathan@jogenfors.se>
This commit is contained in:
Jason Rasmussen 2023-10-09 08:25:26 -04:00 committed by GitHub
parent 9070a361bc
commit 9033e7f179
No known key found for this signature in database
GPG key ID: 4AEE18F83AFDEB23
2 changed files with 205 additions and 212 deletions

View file

@ -2,208 +2,204 @@ import { CrawlOptionsDto } from '@app/domain';
import mockfs from 'mock-fs';
import { FilesystemProvider } from './filesystem.provider';
interface Test {
test: string;
options: CrawlOptionsDto;
files: Record<string, boolean>;
}
const cwd = process.cwd();
const tests: Test[] = [
{
test: 'should return empty when crawling an empty path list',
options: {
pathsToCrawl: [],
},
files: {},
},
{
test: 'should crawl a single path',
options: {
pathsToCrawl: ['/photos/'],
},
files: {
'/photos/image.jpg': true,
},
},
{
test: 'should exclude by file extension',
options: {
pathsToCrawl: ['/photos/'],
exclusionPatterns: ['**/*.tif'],
},
files: {
'/photos/image.jpg': true,
'/photos/image.tif': false,
},
},
{
test: 'should exclude by file extension without case sensitivity',
options: {
pathsToCrawl: ['/photos/'],
exclusionPatterns: ['**/*.TIF'],
},
files: {
'/photos/image.jpg': true,
'/photos/image.tif': false,
},
},
{
test: 'should exclude by folder',
options: {
pathsToCrawl: ['/photos/'],
exclusionPatterns: ['**/raw/**'],
},
files: {
'/photos/image.jpg': true,
'/photos/raw/image.jpg': false,
'/photos/raw2/image.jpg': true,
'/photos/folder/raw/image.jpg': false,
'/photos/crawl/image.jpg': true,
},
},
{
test: 'should crawl multiple paths',
options: {
pathsToCrawl: ['/photos/', '/images/', '/albums/'],
},
files: {
'/photos/image1.jpg': true,
'/images/image2.jpg': true,
'/albums/image3.jpg': true,
},
},
{
test: 'should support globbing paths',
options: {
pathsToCrawl: ['/photos*'],
},
files: {
'/photos1/image1.jpg': true,
'/photos2/image2.jpg': true,
'/images/image3.jpg': false,
},
},
{
test: 'should crawl a single path without trailing slash',
options: {
pathsToCrawl: ['/photos'],
},
files: {
'/photos/image.jpg': true,
},
},
{
test: 'should crawl a single path',
options: {
pathsToCrawl: ['/photos/'],
},
files: {
'/photos/image.jpg': true,
'/photos/subfolder/image1.jpg': true,
'/photos/subfolder/image2.jpg': true,
'/image1.jpg': false,
},
},
{
test: 'should filter file extensions',
options: {
pathsToCrawl: ['/photos/'],
},
files: {
'/photos/image.jpg': true,
'/photos/image.txt': false,
'/photos/1': false,
},
},
{
test: 'should include photo and video extensions',
options: {
pathsToCrawl: ['/photos/', '/videos/'],
},
files: {
'/photos/image.jpg': true,
'/photos/image.jpeg': true,
'/photos/image.heic': true,
'/photos/image.heif': true,
'/photos/image.png': true,
'/photos/image.gif': true,
'/photos/image.tif': true,
'/photos/image.tiff': true,
'/photos/image.webp': true,
'/photos/image.dng': true,
'/photos/image.nef': true,
'/videos/video.mp4': true,
'/videos/video.mov': true,
'/videos/video.webm': true,
},
},
{
test: 'should check file extensions without case sensitivity',
options: {
pathsToCrawl: ['/photos/'],
},
files: {
'/photos/image.jpg': true,
'/photos/image.Jpg': true,
'/photos/image.jpG': true,
'/photos/image.JPG': true,
'/photos/image.jpEg': true,
'/photos/image.TIFF': true,
'/photos/image.tif': true,
'/photos/image.dng': true,
'/photos/image.NEF': true,
},
},
{
test: 'should normalize the path',
options: {
pathsToCrawl: ['/photos/1/../2'],
},
files: {
'/photos/1/image.jpg': false,
'/photos/2/image.jpg': true,
},
},
{
test: 'should return absolute paths',
options: {
pathsToCrawl: ['photos'],
},
files: {
[`${cwd}/photos/1.jpg`]: true,
[`${cwd}/photos/2.jpg`]: true,
[`/photos/3.jpg`]: false,
},
},
];
describe(FilesystemProvider.name, () => {
const sut: FilesystemProvider = new FilesystemProvider();
const sut = new FilesystemProvider();
describe('crawl', () => {
it('should return empty wnen crawling an empty path list', async () => {
const options = new CrawlOptionsDto();
options.pathsToCrawl = [];
const paths: string[] = await sut.crawl(options);
expect(paths).toHaveLength(0);
});
it('should crawl a single path', async () => {
mockfs({
'/photos/image.jpg': '',
});
const options = new CrawlOptionsDto();
options.pathsToCrawl = ['/photos/'];
const paths: string[] = await sut.crawl(options);
expect(paths.sort()).toEqual(['/photos/image.jpg'].sort());
});
it('should exclude by file extension', async () => {
mockfs({
'/photos/image.jpg': '',
'/photos/image.tif': '',
});
const options = new CrawlOptionsDto();
options.pathsToCrawl = ['/photos/'];
options.exclusionPatterns = ['**/*.tif'];
const paths: string[] = await sut.crawl(options);
expect(paths.sort()).toEqual(['/photos/image.jpg'].sort());
});
it('should exclude by file extension without case sensitivity', async () => {
mockfs({
'/photos/image.jpg': '',
'/photos/image.tif': '',
});
const options = new CrawlOptionsDto();
options.pathsToCrawl = ['/photos/'];
options.exclusionPatterns = ['**/*.TIF'];
const paths: string[] = await sut.crawl(options);
expect(paths.sort()).toEqual(['/photos/image.jpg'].sort());
});
it('should exclude by folder', async () => {
mockfs({
'/photos/image.jpg': '',
'/photos/raw/image.jpg': '',
'/photos/raw2/image.jpg': '',
'/photos/folder/raw/image.jpg': '',
'/photos/crawl/image.jpg': '',
});
const options = new CrawlOptionsDto();
options.pathsToCrawl = ['/photos/'];
options.exclusionPatterns = ['**/raw/**'];
const paths: string[] = await sut.crawl(options);
expect(paths.sort()).toEqual(['/photos/image.jpg', '/photos/raw2/image.jpg', '/photos/crawl/image.jpg'].sort());
});
it('should crawl multiple paths', async () => {
mockfs({
'/photos/image1.jpg': '',
'/images/image2.jpg': '',
'/albums/image3.jpg': '',
});
const options = new CrawlOptionsDto();
options.pathsToCrawl = ['/photos/', '/images/', '/albums/'];
const paths: string[] = await sut.crawl(options);
expect(paths.sort()).toEqual(['/photos/image1.jpg', '/images/image2.jpg', '/albums/image3.jpg'].sort());
});
it('should support globbing paths', async () => {
mockfs({
'/photos1/image1.jpg': '',
'/photos2/image2.jpg': '',
'/images/image3.jpg': '',
});
const options = new CrawlOptionsDto();
options.pathsToCrawl = ['/photos*'];
const paths: string[] = await sut.crawl(options);
expect(paths.sort()).toEqual(['/photos1/image1.jpg', '/photos2/image2.jpg'].sort());
});
it('should crawl a single path without trailing slash', async () => {
mockfs({
'/photos/image.jpg': '',
});
const options = new CrawlOptionsDto();
options.pathsToCrawl = ['/photos'];
const paths: string[] = await sut.crawl(options);
expect(paths.sort()).toEqual(['/photos/image.jpg'].sort());
});
// TODO: test for hidden paths (not yet implemented)
it('should crawl a single path', async () => {
mockfs({
'/photos/image.jpg': '',
'/photos/subfolder/image1.jpg': '',
'/photos/subfolder/image2.jpg': '',
'/image1.jpg': '',
});
const options = new CrawlOptionsDto();
options.pathsToCrawl = ['/photos/'];
const paths: string[] = await sut.crawl(options);
expect(paths.sort()).toEqual(
['/photos/image.jpg', '/photos/subfolder/image1.jpg', '/photos/subfolder/image2.jpg'].sort(),
);
});
it('should filter file extensions', async () => {
mockfs({
'/photos/image.jpg': '',
'/photos/image.txt': '',
'/photos/1': '',
});
const options = new CrawlOptionsDto();
options.pathsToCrawl = ['/photos/'];
const paths: string[] = await sut.crawl(options);
expect(paths.sort()).toEqual(['/photos/image.jpg'].sort());
});
it('should include photo and video extensions', async () => {
mockfs({
'/photos/image.jpg': '',
'/photos/image.jpeg': '',
'/photos/image.heic': '',
'/photos/image.heif': '',
'/photos/image.png': '',
'/photos/image.gif': '',
'/photos/image.tif': '',
'/photos/image.tiff': '',
'/photos/image.webp': '',
'/photos/image.dng': '',
'/photos/image.nef': '',
'/videos/video.mp4': '',
'/videos/video.mov': '',
'/videos/video.webm': '',
});
const options = new CrawlOptionsDto();
options.pathsToCrawl = ['/photos/', '/videos/'];
const paths: string[] = await sut.crawl(options);
expect(paths.sort()).toEqual(
[
'/photos/image.jpg',
'/photos/image.jpeg',
'/photos/image.heic',
'/photos/image.heif',
'/photos/image.png',
'/photos/image.gif',
'/photos/image.tif',
'/photos/image.tiff',
'/photos/image.webp',
'/photos/image.dng',
'/photos/image.nef',
'/videos/video.mp4',
'/videos/video.mov',
'/videos/video.webm',
].sort(),
);
});
it('should check file extensions without case sensitivity', async () => {
mockfs({
'/photos/image.jpg': '',
'/photos/image.Jpg': '',
'/photos/image.jpG': '',
'/photos/image.JPG': '',
'/photos/image.jpEg': '',
'/photos/image.TIFF': '',
'/photos/image.tif': '',
'/photos/image.dng': '',
'/photos/image.NEF': '',
});
const options = new CrawlOptionsDto();
options.pathsToCrawl = ['/photos/'];
const paths: string[] = await sut.crawl(options);
expect(paths.sort()).toEqual(
[
'/photos/image.jpg',
'/photos/image.Jpg',
'/photos/image.jpG',
'/photos/image.JPG',
'/photos/image.jpEg',
'/photos/image.TIFF',
'/photos/image.tif',
'/photos/image.dng',
'/photos/image.NEF',
].sort(),
);
});
console.log(process.cwd());
afterEach(() => {
mockfs.restore();
});
describe('crawl', () => {
for (const { test, options, files } of tests) {
it(test, async () => {
mockfs(Object.fromEntries(Object.keys(files).map((file) => [file, ''])));
const actual = await sut.crawl(options);
const expected = Object.entries(files)
.filter((entry) => entry[1])
.map(([file]) => file);
expect(actual.sort()).toEqual(expected.sort());
});
}
});
});

View file

@ -111,24 +111,21 @@ export class FilesystemProvider implements IStorageRepository {
};
}
async crawl(crawlOptions: CrawlOptionsDto): Promise<string[]> {
const pathsToCrawl = crawlOptions.pathsToCrawl;
let paths: string;
crawl(crawlOptions: CrawlOptionsDto): Promise<string[]> {
const { pathsToCrawl, exclusionPatterns } = crawlOptions;
if (!pathsToCrawl) {
// No paths to crawl, return empty list
return [];
} else if (pathsToCrawl.length === 1) {
paths = pathsToCrawl[0];
} else {
paths = '{' + pathsToCrawl.join(',') + '}';
return Promise.resolve([]);
}
paths = paths + '/**/*{' + mimeTypes.getSupportedFileExtensions().join(',') + '}';
const base = pathsToCrawl.length === 1 ? pathsToCrawl[0] : `{${pathsToCrawl.join(',')}}`;
const extensions = `*{${mimeTypes.getSupportedFileExtensions().join(',')}}`;
return (await glob(paths, { nocase: true, nodir: true, ignore: crawlOptions.exclusionPatterns })).map((assetPath) =>
path.normalize(assetPath),
);
return glob(`${base}/**/${extensions}`, {
absolute: true,
nocase: true,
nodir: true,
ignore: exclusionPatterns,
});
}
readdir = readdir;