Skip to content

Commit

Permalink
Support unrelated check in clone worker
Browse files Browse the repository at this point in the history
  • Loading branch information
SimplyBoo6 committed Apr 30, 2024
1 parent e2c79bb commit f066e22
Show file tree
Hide file tree
Showing 2 changed files with 48 additions and 20 deletions.
15 changes: 14 additions & 1 deletion server/src/cache/clone-map-worker.ts
Original file line number Diff line number Diff line change
Expand Up @@ -36,7 +36,12 @@ for (let i = 0; i < blockCount; i++) {
}
const hashPhashMap = new Map<string, string>();

const hashUnrelatedMap = new Map<string, string[]>();

for (const image of job.data) {
if (image.unrelated) {
hashUnrelatedMap.set(image.hash, image.unrelated);
}
hashPhashMap.set(image.hash, image.phash);
const chunks = createChunks(image.phash);
for (let i = 0; i < blockCount; i++) {
Expand Down Expand Up @@ -79,7 +84,15 @@ parentPort.on('message', (imageA: MediaPhash) => {
continue;
}
const difference = hammingDistance(imageA.phash, potentialPhash);
if (difference <= job.threshold) {
let related = false;
const matchRelated = hashUnrelatedMap.get(potentialMatch);
if (imageA.unrelated && imageA.unrelated.includes(potentialMatch)) {
related = true;
}
if (matchRelated && matchRelated.includes(imageA.hash)) {
related = true;
}
if (difference <= job.threshold && !related) {
clones.push({
hash: potentialMatch,
difference,
Expand Down
53 changes: 34 additions & 19 deletions server/src/cache/clone-map.ts
Original file line number Diff line number Diff line change
Expand Up @@ -18,6 +18,7 @@ export interface MediaPhash {
hash: string;
phash: string;
clones?: string[];
unrelated?: string[];
}

export interface Job {
Expand All @@ -36,6 +37,20 @@ interface WorkerWrapper {
idle: boolean;
}

function stringArrayEqual(a: string[], b: string[]): boolean {
a.sort();
b.sort();
if (a.length !== b.length) {
return false;
}
for (let i = 0; i < a.length; i++) {
if (a[i] !== b[i]) {
return false;
}
}
return true;
}

export function generateImageCloneMap(
database: Database,
updateStatus: (current: number, max: number) => void,
Expand All @@ -45,14 +60,15 @@ export function generateImageCloneMap(
// Now find all phashed
const imagesRaw = await database.subsetFields(
{ type: { equalsAll: ['still'] }, phashed: true, duplicateOf: { exists: false } },
{ phash: 1, hash: 1, clones: 1 },
{ phash: 1, hash: 1, unrelated: 1 },
);

const images: MediaPhash[] = imagesRaw.map((image) => {
return {
hash: image.hash,
phash: image.phash!,
clones: [],
unrelated: image.unrelated,
};
});

Expand Down Expand Up @@ -88,10 +104,6 @@ export function generateImageCloneMap(
}

const workerRun = (image: MediaPhash): Promise<void> => {
if (image.clones !== undefined) {
return Promise.resolve();
}

const workerWrapper = workers.find((it) => it.idle);
if (!workerWrapper) {
throw new Error('No available worker');
Expand All @@ -106,20 +118,23 @@ export function generateImageCloneMap(
if (data.err) {
reject(data.err);
} else if (data.clones) {
// data.clones is always set unless there is an error.
// That means this is a clear operation too.
database
.saveMedia(data.hash, {
clones: data.clones.map((c) => c.hash),
cloneDate: Math.floor(Date.now() / 1000),
})
.then(() => {
if (data.clones && data.clones.length > 0) {
console.log(`${data.hash} has ${data.clones.length} possible clones`);
}
resolve();
})
.catch((err) => reject(err));
const clones = data.clones.map((c) => c.hash);
if (!stringArrayEqual(clones, image.clones ?? [])) {
// data.clones is always set unless there is an error.
// That means this is a clear operation too.
database
.saveMedia(data.hash, {
clones,
cloneDate: Math.floor(Date.now() / 1000),
})
.then(() => {
if (data.clones && data.clones.length > 0) {
console.log(`${data.hash} has ${data.clones.length} possible clones`);
}
resolve();
})
.catch((err) => reject(err));
}
}
});

Expand Down

0 comments on commit f066e22

Please sign in to comment.