Skip to content
Snippets Groups Projects

merge duplicate deletion command into master

Merged aeltheos requested to merge duplicate into master
1 file
+ 71
0
Compare changes
  • Side-by-side
  • Inline
from django.core.management.base import BaseCommand, CommandError
from photologue_custom.models import Gallery
import hashlib
class Command(BaseCommand):
help = 'List all duplicate for chosen galleries'
def add_arguments(self, parser):
parser.add_argument(
'--slugs', nargs='+', help='Try to find duplicate in the selected galleries', default=[])
parser.add_argument('-a', '--all', action='store_true',
help='Try to find duplicate in all galleries, overide any slugs given')
parser.add_argument('-d', '--delete', action='store_true')
def handle(self, *args, **options):
# Collect all required galleries
if options['all']:
galleries = Gallery.objects.all()
else:
for slug in options['slugs']:
for gallery in Gallery.objects.all():
if gallery.slug == slug:
galleries += [gallery]
break
else:
raise CommandError(
'Slug {} does not correspond to a gallery in the database.'.format(slug))
print('error')
# Find duplicates in all galleries
for gallery in galleries:
duplicates = find_duplicate(gallery)
self.stdout.write('Gallery {} :'.format(gallery.slug))
for (original, copies) in duplicates:
self.stdout.write(
' {} has following duplicate(s) :'.format(original.slug))
for copy in copies:
self.stdout.write(' {}'.format(copy.slug))
# Delete them if --delete
if options['delete']:
self.stdout.write(
' Deleting duplicate in {} :'.format(gallery.slug))
for (_original, copies) in duplicates:
for copy in copies:
self.stdout.write(
' Deleting {}...'.format(copy.slug))
copy.delete()
def find_duplicate(gallery):
# Dict of all already checked photos
non_duplicate = {}
# Dict of all found duplicate {h0 : (original:[duplicates])}
duplicate = {}
for photo in gallery.photos.all():
h0 = hashlib.sha256(photo.image.read()).digest()
if h0 not in non_duplicate:
# Photo is not a duplicate
non_duplicate[h0] = photo
elif h0 in duplicate:
if len(photo.slug) > len(duplicate[h0][0].slug):
duplicate[h0][1] += [photo]
else:
duplicate[h0][1] += [duplicate[h0][0]]
duplicate[h0][0] = photo
else:
duplicate[h0] = [non_duplicate[h0], [photo]]
# Return only value because hash aren't usefull
return duplicate.values()
Loading