From b347ac76c19d861c31a94654580009a13617a903 Mon Sep 17 00:00:00 2001 From: aeltheos <aeltheos@crans.org> Date: Fri, 12 Nov 2021 21:40:00 +0100 Subject: [PATCH 1/5] finished find duplicate function --- .../management/commands/duplicate.py | 43 +++++++++++++++++++ 1 file changed, 43 insertions(+) create mode 100644 photologue_custom/management/commands/duplicate.py diff --git a/photologue_custom/management/commands/duplicate.py b/photologue_custom/management/commands/duplicate.py new file mode 100644 index 0000000..ff546aa --- /dev/null +++ b/photologue_custom/management/commands/duplicate.py @@ -0,0 +1,43 @@ +from django.core.management.base import BaseCommand + +import argparse +import hashlib + + +class Command(BaseCommand): + help = 'List all duplicate for chosen galleries' + + def add_arguments(self, parser): + pass + + def handle(self, *args, **options): + pass + + +def find_duplicate(gallery): + # Dict of all already checked photos + non_duplicate = {} + # Dict of all found duplicate {original.slug:[duplicates]} + duplicate = {} + + for photo in gallery.photos.all(): + h0 = hashlib.sha256(photo.image.read()).digest() + if photo not in non_duplicate: + # Photo is not a duplicate + non_duplicate[h0] = photo + elif len(photo.slug) > len(non_duplicate[h0.slug()]): + # Photo is a duplicate and photo slug is longer + if non_duplicate[h0].slug in duplicate: + duplicate[h0][1] += [photo] + else: + duplicate[h0] = [non_duplicate[h0], [photo]] + else: + # Photo is a duplicate and photo slug is shorter + if non_duplicate[h0].slug in duplicate: + duplicate[h0][0] = photo + duplicate[h0][1] += [non_duplicate[h0]] + else: + duplicate[h0] = [photo, [non_duplicate[h0]]] + non_duplicate[h0] += [photo] + # Return values because hash aren't need anymore + return duplicate.values() -- GitLab From 02426942d0a53d36a3f13bf662dc6aa4d4d8ee51 Mon Sep 17 00:00:00 2001 From: aeltheos <aeltheos@crans.org> Date: Fri, 12 Nov 2021 21:40:29 +0100 Subject: [PATCH 2/5] Added argument to argparse --- photologue_custom/management/commands/duplicate.py | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/photologue_custom/management/commands/duplicate.py b/photologue_custom/management/commands/duplicate.py index ff546aa..e597a52 100644 --- a/photologue_custom/management/commands/duplicate.py +++ b/photologue_custom/management/commands/duplicate.py @@ -8,7 +8,11 @@ class Command(BaseCommand): help = 'List all duplicate for chosen galleries' def add_arguments(self, parser): - pass + parser.add_arguments( + '--slugs', nargs='+', help='Try to find duplicate in the selected galleries', default=[]) + parser.add_arguments('-a', '--all', action='store_true', + help='Try to find duplicate in all galleries') + parser.add_arguments('-d', '--delete', action='store_true') def handle(self, *args, **options): pass -- GitLab From dbb71d088a2273ca99d32344955032cf314d7f5c Mon Sep 17 00:00:00 2001 From: aeltheos <aeltheos@crans.org> Date: Sun, 14 Nov 2021 00:17:02 +0100 Subject: [PATCH 3/5] Added logic to find galleries based on user input Cleaned up comment in find duplicate --- .../management/commands/duplicate.py | 34 +++++++++++++------ 1 file changed, 24 insertions(+), 10 deletions(-) diff --git a/photologue_custom/management/commands/duplicate.py b/photologue_custom/management/commands/duplicate.py index e597a52..5b08f5c 100644 --- a/photologue_custom/management/commands/duplicate.py +++ b/photologue_custom/management/commands/duplicate.py @@ -1,5 +1,5 @@ -from django.core.management.base import BaseCommand - +from django.core.management.base import BaseCommand, CommandError +from photologue_custom.models import Gallery import argparse import hashlib @@ -8,20 +8,34 @@ class Command(BaseCommand): help = 'List all duplicate for chosen galleries' def add_arguments(self, parser): - parser.add_arguments( + parser.add_argument( '--slugs', nargs='+', help='Try to find duplicate in the selected galleries', default=[]) - parser.add_arguments('-a', '--all', action='store_true', - help='Try to find duplicate in all galleries') - parser.add_arguments('-d', '--delete', action='store_true') + parser.add_argument('-a', '--all', action='store_true', + help='Try to find duplicate in all galleries, overide any slugs given') + parser.add_argument('-d', '--delete', action='store_true') def handle(self, *args, **options): - pass + # Collect all required galleries + if options['all']: + galleries = Gallery.objects.all() + else: + for slug in options['slugs']: + for gallery in Gallery.objects.all(): + if gallery.slug == slug: + galleries += [gallery] + break + else: + raise CommandError( + 'Slug {} does not correspond to a gallery in the database.'.format(slug)) + print('error') + # + def find_duplicate(gallery): # Dict of all already checked photos non_duplicate = {} - # Dict of all found duplicate {original.slug:[duplicates]} + # Dict of all found duplicate {h0 : (original:[duplicates])} duplicate = {} for photo in gallery.photos.all(): @@ -34,14 +48,14 @@ def find_duplicate(gallery): if non_duplicate[h0].slug in duplicate: duplicate[h0][1] += [photo] else: - duplicate[h0] = [non_duplicate[h0], [photo]] + duplicate[h0] = (non_duplicate[h0], [photo]) else: # Photo is a duplicate and photo slug is shorter if non_duplicate[h0].slug in duplicate: duplicate[h0][0] = photo duplicate[h0][1] += [non_duplicate[h0]] else: - duplicate[h0] = [photo, [non_duplicate[h0]]] + duplicate[h0] = (photo, [non_duplicate[h0]]) non_duplicate[h0] += [photo] # Return values because hash aren't need anymore return duplicate.values() -- GitLab From ff50845a130e604b419940556d1cc26d78f4b07a Mon Sep 17 00:00:00 2001 From: aeltheos <aeltheos@crans.org> Date: Sun, 14 Nov 2021 00:45:44 +0100 Subject: [PATCH 4/5] Added output of found duplicate Fixed find_duplicate --- .../management/commands/duplicate.py | 33 ++++++++++--------- 1 file changed, 17 insertions(+), 16 deletions(-) diff --git a/photologue_custom/management/commands/duplicate.py b/photologue_custom/management/commands/duplicate.py index 5b08f5c..4719375 100644 --- a/photologue_custom/management/commands/duplicate.py +++ b/photologue_custom/management/commands/duplicate.py @@ -1,6 +1,6 @@ from django.core.management.base import BaseCommand, CommandError from photologue_custom.models import Gallery -import argparse + import hashlib @@ -28,8 +28,15 @@ class Command(BaseCommand): raise CommandError( 'Slug {} does not correspond to a gallery in the database.'.format(slug)) print('error') - # - + # Find duplicates in all galleries + for gallery in galleries: + duplicate = find_duplicate(gallery) + self.stdout.write('Gallery {} :'.format(gallery.slug)) + print(duplicate) + for (original, copies) in duplicate: + for copy in copies: + self.stdout.write('{} is duplicate of {}'.format( + copy.slug, original.slug)) def find_duplicate(gallery): @@ -40,22 +47,16 @@ def find_duplicate(gallery): for photo in gallery.photos.all(): h0 = hashlib.sha256(photo.image.read()).digest() - if photo not in non_duplicate: + if h0 not in non_duplicate: # Photo is not a duplicate non_duplicate[h0] = photo - elif len(photo.slug) > len(non_duplicate[h0.slug()]): - # Photo is a duplicate and photo slug is longer - if non_duplicate[h0].slug in duplicate: + elif h0 in duplicate: + if len(photo.slug) > len(duplicate[h0][0].slug): duplicate[h0][1] += [photo] else: - duplicate[h0] = (non_duplicate[h0], [photo]) - else: - # Photo is a duplicate and photo slug is shorter - if non_duplicate[h0].slug in duplicate: + duplicate[h0][1] += [duplicate[h0][0]] duplicate[h0][0] = photo - duplicate[h0][1] += [non_duplicate[h0]] - else: - duplicate[h0] = (photo, [non_duplicate[h0]]) - non_duplicate[h0] += [photo] - # Return values because hash aren't need anymore + else: + duplicate[h0] = [non_duplicate[h0], [photo]] + # Return only value because hash aren't usefull return duplicate.values() -- GitLab From 2df1bc4d50fc6051c7b61cd903d24cd7b0a42b1d Mon Sep 17 00:00:00 2001 From: aeltheos <aeltheos@crans.org> Date: Sun, 14 Nov 2021 01:06:03 +0100 Subject: [PATCH 5/5] Added deletion of duplicate if --delete is specified --- .../management/commands/duplicate.py | 19 ++++++++++++++----- 1 file changed, 14 insertions(+), 5 deletions(-) diff --git a/photologue_custom/management/commands/duplicate.py b/photologue_custom/management/commands/duplicate.py index 4719375..c9fd70a 100644 --- a/photologue_custom/management/commands/duplicate.py +++ b/photologue_custom/management/commands/duplicate.py @@ -30,13 +30,22 @@ class Command(BaseCommand): print('error') # Find duplicates in all galleries for gallery in galleries: - duplicate = find_duplicate(gallery) + duplicates = find_duplicate(gallery) self.stdout.write('Gallery {} :'.format(gallery.slug)) - print(duplicate) - for (original, copies) in duplicate: + for (original, copies) in duplicates: + self.stdout.write( + ' {} has following duplicate(s) :'.format(original.slug)) for copy in copies: - self.stdout.write('{} is duplicate of {}'.format( - copy.slug, original.slug)) + self.stdout.write(' {}'.format(copy.slug)) + # Delete them if --delete + if options['delete']: + self.stdout.write( + ' Deleting duplicate in {} :'.format(gallery.slug)) + for (_original, copies) in duplicates: + for copy in copies: + self.stdout.write( + ' Deleting {}...'.format(copy.slug)) + copy.delete() def find_duplicate(gallery): -- GitLab