From b347ac76c19d861c31a94654580009a13617a903 Mon Sep 17 00:00:00 2001
From: aeltheos <aeltheos@crans.org>
Date: Fri, 12 Nov 2021 21:40:00 +0100
Subject: [PATCH 1/5] finished find duplicate function

---
 .../management/commands/duplicate.py          | 43 +++++++++++++++++++
 1 file changed, 43 insertions(+)
 create mode 100644 photologue_custom/management/commands/duplicate.py

diff --git a/photologue_custom/management/commands/duplicate.py b/photologue_custom/management/commands/duplicate.py
new file mode 100644
index 0000000..ff546aa
--- /dev/null
+++ b/photologue_custom/management/commands/duplicate.py
@@ -0,0 +1,43 @@
+from django.core.management.base import BaseCommand
+
+import argparse
+import hashlib
+
+
+class Command(BaseCommand):
+    help = 'List all duplicate for chosen galleries'
+
+    def add_arguments(self, parser):
+        pass
+
+    def handle(self, *args, **options):
+        pass
+
+
+def find_duplicate(gallery):
+    # Dict of all already checked photos
+    non_duplicate = {}
+    # Dict of all found duplicate {original.slug:[duplicates]}
+    duplicate = {}
+
+    for photo in gallery.photos.all():
+        h0 = hashlib.sha256(photo.image.read()).digest()
+        if photo not in non_duplicate:
+            # Photo is not a duplicate
+            non_duplicate[h0] = photo
+        elif len(photo.slug) > len(non_duplicate[h0.slug()]):
+            # Photo is a duplicate and photo slug is longer
+            if non_duplicate[h0].slug in duplicate:
+                duplicate[h0][1] += [photo]
+            else:
+                duplicate[h0] = [non_duplicate[h0], [photo]]
+        else:
+            # Photo is a duplicate and photo slug is shorter
+            if non_duplicate[h0].slug in duplicate:
+                duplicate[h0][0] = photo
+                duplicate[h0][1] += [non_duplicate[h0]]
+            else:
+                duplicate[h0] = [photo, [non_duplicate[h0]]]
+        non_duplicate[h0] += [photo]
+    # Return values because hash aren't need anymore
+    return duplicate.values()
-- 
GitLab


From 02426942d0a53d36a3f13bf662dc6aa4d4d8ee51 Mon Sep 17 00:00:00 2001
From: aeltheos <aeltheos@crans.org>
Date: Fri, 12 Nov 2021 21:40:29 +0100
Subject: [PATCH 2/5] Added argument to argparse

---
 photologue_custom/management/commands/duplicate.py | 6 +++++-
 1 file changed, 5 insertions(+), 1 deletion(-)

diff --git a/photologue_custom/management/commands/duplicate.py b/photologue_custom/management/commands/duplicate.py
index ff546aa..e597a52 100644
--- a/photologue_custom/management/commands/duplicate.py
+++ b/photologue_custom/management/commands/duplicate.py
@@ -8,7 +8,11 @@ class Command(BaseCommand):
     help = 'List all duplicate for chosen galleries'
 
     def add_arguments(self, parser):
-        pass
+        parser.add_arguments(
+            '--slugs', nargs='+', help='Try to find duplicate in the selected galleries', default=[])
+        parser.add_arguments('-a', '--all', action='store_true',
+                             help='Try to find duplicate in all galleries')
+        parser.add_arguments('-d', '--delete', action='store_true')
 
     def handle(self, *args, **options):
         pass
-- 
GitLab


From dbb71d088a2273ca99d32344955032cf314d7f5c Mon Sep 17 00:00:00 2001
From: aeltheos <aeltheos@crans.org>
Date: Sun, 14 Nov 2021 00:17:02 +0100
Subject: [PATCH 3/5] Added logic to find galleries based on user input Cleaned
 up comment in find duplicate

---
 .../management/commands/duplicate.py          | 34 +++++++++++++------
 1 file changed, 24 insertions(+), 10 deletions(-)

diff --git a/photologue_custom/management/commands/duplicate.py b/photologue_custom/management/commands/duplicate.py
index e597a52..5b08f5c 100644
--- a/photologue_custom/management/commands/duplicate.py
+++ b/photologue_custom/management/commands/duplicate.py
@@ -1,5 +1,5 @@
-from django.core.management.base import BaseCommand
-
+from django.core.management.base import BaseCommand, CommandError
+from photologue_custom.models import Gallery
 import argparse
 import hashlib
 
@@ -8,20 +8,34 @@ class Command(BaseCommand):
     help = 'List all duplicate for chosen galleries'
 
     def add_arguments(self, parser):
-        parser.add_arguments(
+        parser.add_argument(
             '--slugs', nargs='+', help='Try to find duplicate in the selected galleries', default=[])
-        parser.add_arguments('-a', '--all', action='store_true',
-                             help='Try to find duplicate in all galleries')
-        parser.add_arguments('-d', '--delete', action='store_true')
+        parser.add_argument('-a', '--all', action='store_true',
+                            help='Try to find duplicate in all galleries, overide any slugs given')
+        parser.add_argument('-d', '--delete', action='store_true')
 
     def handle(self, *args, **options):
-        pass
+        # Collect all required galleries
+        if options['all']:
+            galleries = Gallery.objects.all()
+        else:
+            for slug in options['slugs']:
+                for gallery in Gallery.objects.all():
+                    if gallery.slug == slug:
+                        galleries += [gallery]
+                        break
+                else:
+                    raise CommandError(
+                        'Slug {} does not correspond to a gallery in the database.'.format(slug))
+                    print('error')
+        #
+
 
 
 def find_duplicate(gallery):
     # Dict of all already checked photos
     non_duplicate = {}
-    # Dict of all found duplicate {original.slug:[duplicates]}
+    # Dict of all found duplicate {h0 : (original:[duplicates])}
     duplicate = {}
 
     for photo in gallery.photos.all():
@@ -34,14 +48,14 @@ def find_duplicate(gallery):
             if non_duplicate[h0].slug in duplicate:
                 duplicate[h0][1] += [photo]
             else:
-                duplicate[h0] = [non_duplicate[h0], [photo]]
+                duplicate[h0] = (non_duplicate[h0], [photo])
         else:
             # Photo is a duplicate and photo slug is shorter
             if non_duplicate[h0].slug in duplicate:
                 duplicate[h0][0] = photo
                 duplicate[h0][1] += [non_duplicate[h0]]
             else:
-                duplicate[h0] = [photo, [non_duplicate[h0]]]
+                duplicate[h0] = (photo, [non_duplicate[h0]])
         non_duplicate[h0] += [photo]
     # Return values because hash aren't need anymore
     return duplicate.values()
-- 
GitLab


From ff50845a130e604b419940556d1cc26d78f4b07a Mon Sep 17 00:00:00 2001
From: aeltheos <aeltheos@crans.org>
Date: Sun, 14 Nov 2021 00:45:44 +0100
Subject: [PATCH 4/5] Added output of found duplicate Fixed find_duplicate

---
 .../management/commands/duplicate.py          | 33 ++++++++++---------
 1 file changed, 17 insertions(+), 16 deletions(-)

diff --git a/photologue_custom/management/commands/duplicate.py b/photologue_custom/management/commands/duplicate.py
index 5b08f5c..4719375 100644
--- a/photologue_custom/management/commands/duplicate.py
+++ b/photologue_custom/management/commands/duplicate.py
@@ -1,6 +1,6 @@
 from django.core.management.base import BaseCommand, CommandError
 from photologue_custom.models import Gallery
-import argparse
+
 import hashlib
 
 
@@ -28,8 +28,15 @@ class Command(BaseCommand):
                     raise CommandError(
                         'Slug {} does not correspond to a gallery in the database.'.format(slug))
                     print('error')
-        #
-
+        # Find duplicates in all galleries
+        for gallery in galleries:
+            duplicate = find_duplicate(gallery)
+            self.stdout.write('Gallery {} :'.format(gallery.slug))
+            print(duplicate)
+            for (original, copies) in duplicate:
+                for copy in copies:
+                    self.stdout.write('{} is duplicate of {}'.format(
+                        copy.slug, original.slug))
 
 
 def find_duplicate(gallery):
@@ -40,22 +47,16 @@ def find_duplicate(gallery):
 
     for photo in gallery.photos.all():
         h0 = hashlib.sha256(photo.image.read()).digest()
-        if photo not in non_duplicate:
+        if h0 not in non_duplicate:
             # Photo is not a duplicate
             non_duplicate[h0] = photo
-        elif len(photo.slug) > len(non_duplicate[h0.slug()]):
-            # Photo is a duplicate and photo slug is longer
-            if non_duplicate[h0].slug in duplicate:
+        elif h0 in duplicate:
+            if len(photo.slug) > len(duplicate[h0][0].slug):
                 duplicate[h0][1] += [photo]
             else:
-                duplicate[h0] = (non_duplicate[h0], [photo])
-        else:
-            # Photo is a duplicate and photo slug is shorter
-            if non_duplicate[h0].slug in duplicate:
+                duplicate[h0][1] += [duplicate[h0][0]]
                 duplicate[h0][0] = photo
-                duplicate[h0][1] += [non_duplicate[h0]]
-            else:
-                duplicate[h0] = (photo, [non_duplicate[h0]])
-        non_duplicate[h0] += [photo]
-    # Return values because hash aren't need anymore
+        else:
+            duplicate[h0] = [non_duplicate[h0], [photo]]
+    # Return only value because hash aren't usefull
     return duplicate.values()
-- 
GitLab


From 2df1bc4d50fc6051c7b61cd903d24cd7b0a42b1d Mon Sep 17 00:00:00 2001
From: aeltheos <aeltheos@crans.org>
Date: Sun, 14 Nov 2021 01:06:03 +0100
Subject: [PATCH 5/5] Added deletion of duplicate if --delete is specified

---
 .../management/commands/duplicate.py          | 19 ++++++++++++++-----
 1 file changed, 14 insertions(+), 5 deletions(-)

diff --git a/photologue_custom/management/commands/duplicate.py b/photologue_custom/management/commands/duplicate.py
index 4719375..c9fd70a 100644
--- a/photologue_custom/management/commands/duplicate.py
+++ b/photologue_custom/management/commands/duplicate.py
@@ -30,13 +30,22 @@ class Command(BaseCommand):
                     print('error')
         # Find duplicates in all galleries
         for gallery in galleries:
-            duplicate = find_duplicate(gallery)
+            duplicates = find_duplicate(gallery)
             self.stdout.write('Gallery {} :'.format(gallery.slug))
-            print(duplicate)
-            for (original, copies) in duplicate:
+            for (original, copies) in duplicates:
+                self.stdout.write(
+                    '  {} has following duplicate(s) :'.format(original.slug))
                 for copy in copies:
-                    self.stdout.write('{} is duplicate of {}'.format(
-                        copy.slug, original.slug))
+                    self.stdout.write('    {}'.format(copy.slug))
+            # Delete them if --delete
+            if options['delete']:
+                self.stdout.write(
+                    '  Deleting duplicate in {} :'.format(gallery.slug))
+                for (_original, copies) in duplicates:
+                    for copy in copies:
+                        self.stdout.write(
+                            '    Deleting {}...'.format(copy.slug))
+                        copy.delete()
 
 
 def find_duplicate(gallery):
-- 
GitLab