Skip to content
Commits on Source (17)
#!/usr/bin/env python3
import json
import time
from collections import defaultdict
from django.core.management.base import BaseCommand
from django.apps import apps
from django.db import transaction
from polymorphic.models import PolymorphicModel
def timed(method):
""""
A simple decorator to measure time elapsed in class function (hence the args[0])
"""
def _timed(*args, **kw):
ts = time.time()
result = method(*args, **kw)
te = time.time()
args[0].print_success(f"{method.__name__} executed ({te-ts:.2f}s)")
return result
return _timed
class ImportCommand(BaseCommand):
"""
Generic command for import of NK15 database
"""
def __init__(self, *args, **kwargs):
super().__init__(args, kwargs)
self.MAP_IDBDE = dict()
def print_success(self, to_print):
return self.stdout.write(self.style.SUCCESS(to_print))
def print_error(self, to_print):
return self.stdout.write(self.style.ERROR(to_print))
def update_line(self, n, total, content):
n = str(n)
total = str(total)
n.rjust(len(total))
print(f"\r ({n}/{total}) {content:10.10}", end="")
def create_parser(self, prog_name, subcommand, **kwargs):
parser = super().create_parser(prog_name, subcommand, **kwargs)
parser.add_argument('--nk15db', action='store', default='nk15', help='NK15 database name')
parser.add_argument('--nk15user', action='store', default='nk15_user', help='NK15 database owner')
parser.add_argument('-s', '--save', action='store', help="save mapping of idbde")
parser.add_argument('-m', '--map', action='store', help="import mapping of idbde")
parser.add_argument('-c', '--chunk', type=int, default=100, help="chunk size for bulk_create")
return parser
def save_map(self, filename):
with open(filename, 'w') as fp:
json.dump(self.MAP_IDBDE, fp, sort_keys=True, indent=2)
def load_map(self, filename):
with open(filename, 'r') as fp:
self.MAP_IDBDE = json.load(fp, object_hook=lambda d: {int(k): int(v) for k, v in d.items()})
class BulkCreateManager(object):
"""
This helper class keeps track of ORM objects to be created for multiple
model classes, and automatically creates those objects with `bulk_create`
when the number of objects accumulated for a given model class exceeds
`chunk_size`.
Upon completion of the loop that's `add()`ing objects, the developer must
call `done()` to ensure the final set of objects is created for all models.
"""
def __init__(self, chunk_size=100):
self._create_queues = defaultdict(list)
self.chunk_size = chunk_size
def _commit(self, model_class):
model_key = model_class._meta.label
# check for mutli-table inheritance it happens
# if model_class is a grand-child of PolymorphicModel
if model_class.__base__.__base__ is PolymorphicModel:
self._commit(model_class.__base__)
with transaction.atomic():
for obj in self._create_queues[model_key]:
obj.save_base(raw=True)
else:
model_class.objects.bulk_create(self._create_queues[model_key])
self._create_queues[model_key] = []
def add(self, *args):
"""
Add an object to the queue to be created, and call bulk_create if we
have enough objs.
"""
for obj in args:
model_class = type(obj)
model_key = model_class._meta.label
self._create_queues[model_key].append(obj)
if len(self._create_queues[model_key]) >= self.chunk_size:
self._commit(model_class)
def done(self):
"""
Always call this upon completion to make sure the final partial chunk
is saved.
"""
for model_name, objs in self._create_queues.items():
if len(objs) > 0:
self._commit(apps.get_model(model_name))
#!/usr/bin/env python3
import psycopg2 as pg
import psycopg2.extras as pge
import datetime
import json
from django.utils.timezone import make_aware, now
from django.contrib.auth.models import User
from django.contrib.contenttypes.models import ContentType
from django.db import transaction
from note.models import Note, NoteUser, NoteClub
from note.models import Alias
from member.models import Club, Profile
from ._import_utils import ImportCommand, BulkCreateManager, timed
M_DURATION = 396
M_START = datetime.date(2019, 8, 31)
M_END = datetime.date(2020, 9, 30)
MAP_IDBDE = {
-4: 2, # Carte Bancaire
-3: 4, # Virement
-2: 1, # Especes
-1: 3, # Chèque
0: 5, # BDE
}
# some Aliases have been created in the fixtures
ALIAS_SET = {a[0] for a in Alias.objects.all().values_list("normalized_name")}
note_user_type = ContentType.objects.get(app_label="note", model="noteuser")
note_club_type = ContentType.objects.get(app_label="note", model="noteclub")
class Command(ImportCommand):
"""
Import command for People base data (Comptes, and Aliases)
"""
def add_arguments(self, parser):
parser.add_argument('-a', '--alias', action='store_true', help="import alias")
def import_special_account(self, cur):
cur.execute("SELECT idbde, solde from comptes where idbde <=0")
for row in cur:
note = Note.objects.get(pk=MAP_IDBDE[row["idbde"]])
note.amount = row["solde"]
note.save()
@timed
@transaction.atomic
def import_account(self, cur, chunk_size):
"""
Import every account of the nk15 in a batch fashion.
Every Model has to be manually created, and no magic `.save()`
function is being called.
"""
cur.execute("SELECT * FROM comptes WHERE idbde > 0 ORDER BY idbde;")
pk_club = 3
pk_user = 1
pk_profile = 1
pk_note = 7 # pk 6 is Kfet!
n = cur.rowcount
bulk_mgr = BulkCreateManager(chunk_size=chunk_size)
for idx, row in enumerate(cur):
pseudo = row["pseudo"]
pseudo_norm = Alias.normalize(pseudo)
self.update_line(idx, n, pseudo)
# clean pseudo (normalized pseudo must be unique)
if pseudo_norm in ALIAS_SET:
pseudo = pseudo + str(row["idbde"])
else:
ALIAS_SET.add(pseudo_norm)
# clean date
note_dict = {
"pk": pk_note,
"balance": row['solde'],
"last_negative": None,
"is_active": True,
"display_image": "",
"created_at": now()
}
if row["last_negatif"] is not None:
note_dict["last_negative"] = make_aware(row["last_negatif"])
if row["type"] == "personne":
# sanitize password
if row["passwd"] != "*|*" and not row["deleted"]:
passwd_nk15 = "$".join(["custom_nk15", "1", row["passwd"]])
else:
passwd_nk15 = ''
obj_dict = {
"pk": pk_user,
"username": row["pseudo"],
"password": passwd_nk15,
"first_name": row["nom"],
"last_name": row["prenom"],
"email": row["mail"],
"is_active": True, # temporary
}
profile_dict = {
"pk": pk_profile,
"user_id": pk_user,
"phone_number": row['tel'],
"address": row['adresse'],
"paid": row['normalien'],
"registration_valid": True,
"email_confirmed": True,
}
note_dict["polymorphic_ctype"] = note_user_type
note_user_dict = {
"pk": pk_note,
"user_id": pk_user,
}
alias_dict = {
"pk": pk_note,
"name": pseudo,
"normalized_name": Alias.normalize(pseudo),
"note_id": pk_note,
}
bulk_mgr.add(User(**obj_dict),
Profile(**profile_dict),
Note(**note_dict),
NoteUser(**note_user_dict),
Alias(**alias_dict),)
pk_user += 1
pk_profile += 1
else: # club
obj_dict = {
"pk": pk_club,
"name": row["pseudo"],
"email": row["mail"],
"membership_duration": M_DURATION,
"membership_start": M_START,
"membership_end": M_END,
"membership_fee_paid": 0,
"membership_fee_unpaid": 0,
}
note_club_dict = {
"pk": pk_note,
"club_id": pk_club,
}
alias_dict = {
"pk": pk_note,
"name": pseudo,
"normalized_name": Alias.normalize(pseudo),
"note_id": pk_note
}
note_dict["polymorphic_ctype"] = note_club_type
bulk_mgr.add(Club(**obj_dict),
Note(**note_dict),
NoteClub(**note_club_dict),
Alias(**alias_dict))
pk_club += 1
# row import completed
MAP_IDBDE[row["idbde"]] = pk_note
pk_note += 1
bulk_mgr.done()
@timed
def import_alias(self, cur, chunk_size):
"""
Import Alias from nk15
We rely on validation of the models, but it is slow.
"""
cur.execute("SELECT * FROM aliases ORDER by id")
n = cur.rowcount
bulk_mgr = BulkCreateManager(chunk_size=chunk_size)
pk_alias = Alias.objects.order_by('-id').first().id + 1
for idx, row in enumerate(cur):
alias_name = row["alias"]
alias_name = (alias_name[:252] + '...') if len(alias_name) > 255 else alias_name
alias_norm = Alias.normalize(alias_name)
self.update_line(idx, n, alias_norm)
# clean pseudo (normalized pseudo must be unique)
if alias_norm in ALIAS_SET:
continue
else:
ALIAS_SET.add(alias_norm)
obj_dict = {
"pk": pk_alias,
"note_id": MAP_IDBDE[row["idbde"]],
"name": alias_name,
"normalized_name": alias_norm,
}
pk_alias += 1
bulk_mgr.add(Alias(**obj_dict))
bulk_mgr.done()
def handle(self, *args, **kwargs):
# default args, provided by ImportCommand.
nk15db, nk15user = kwargs['nk15db'], kwargs['nk15user']
# connecting to nk15 database
conn = pg.connect(database=nk15db, user=nk15user)
cur = conn.cursor(cursor_factory=pge.DictCursor)
self.import_special_account(cur)
self.import_account(cur, kwargs["chunk"])
# Alias Management
if kwargs["alias"]:
self.import_alias(cur, kwargs["chunk"])
# save to disk
if kwargs["save"]:
filename = kwargs["save"]
with open(filename, 'w') as fp:
json.dump(MAP_IDBDE, fp, sort_keys=True, indent=2)
#!/usr/bin/env python3
import psycopg2 as pg
import psycopg2.extras as pge
import datetime
import copy
from django.utils.timezone import make_aware
from django.db import transaction
from activity.models import ActivityType, Activity, Guest, Entry
from member.models import Club
from note.models import Note
from ._import_utils import ImportCommand, BulkCreateManager, timed
MAP_ACTIVITY = dict()
class Command(ImportCommand):
"""
Import command for Activities Base Data (Comptes, and Aliases)
"""
@timed
@transaction.atomic
def import_activities(self, cur, chunk):
cur.execute("SELECT * FROM activites ORDER by id")
n = cur.rowcount
bulk_mgr = BulkCreateManager(chunk_size=chunk)
activity_type_id = ActivityType.objects.get(name="Pot").pk # Need to be fixed manually
kfet = Club.objects.get(name="Kfet")
pk_activity = 1
for idx, row in enumerate(cur):
self.update_line(idx, n, row["titre"])
note = self.MAP_IDBDE[row["responsable"]]
if note == 6244:
# Licorne magique ne doit pas utiliser son compte club pour proposer des activités
note = Note.objects.get(pk=self.MAP_IDBDE[6524])
note = note.user_id
organizer = Club.objects.filter(name=row["signature"])
if organizer.exists():
# Try to find the club that organizes the activity.
# If not found, assume it's Kfet (fix manually)
organizer = organizer.get()
else:
organizer = kfet
obj_dict = {
"pk": pk_activity,
"name": row["titre"],
"description": row["description"],
"activity_type_id": activity_type_id, # By default Pot
"creater_id": note,
"organizer_id": organizer.pk,
"attendees_club_id": kfet.pk, # Maybe fix manually
"date_start": make_aware(row["debut"]),
"date_end": make_aware(row["fin"]),
"valid": row["validepar"] is not None,
"open": row["ouvert"], # Should always be False
}
# WARNING: Fields lieu, liste, listeimprimee are missing
MAP_ACTIVITY[row["id"]] = pk_activity
pk_activity +=1
bulk_mgr.add(Activity(**obj_dict))
bulk_mgr.done()
@timed
@transaction.atomic
def import_guest(self, cur, chunk):
bulk_mgr = BulkCreateManager(chunk_size=chunk)
cur.execute("SELECT * FROM invites ORDER by id")
n = cur.rowcount
for idx, row in enumerate(cur):
self.update_line(idx, n, f"{row['nom']} {row['prenom']}")
obj_dict = {
"pk": row["id"],
"activity_id": MAP_ACTIVITY[row["activite"]],
"last_name": row["nom"],
"first_name": row["prenom"],
"inviter_id": self.MAP_IDBDE[row["responsable"]],
}
bulk_mgr.add(Guest(**obj_dict))
bulk_mgr.done()
@timed
@transaction.atomic
def import_activities_entries(self, cur, chunk):
bulk_mgr = BulkCreateManager(chunk_size=chunk)
cur.execute("SELECT * FROM entree_activites ORDER by id")
n = cur.rowcount
for idx, row in enumerate(cur):
self.update_line(idx, n, f"{row['idbde']} {row['responsable']}")
obj_dict = {
"activity_id": MAP_ACTIVITY[row["activite"]],
"time": make_aware(row["heure_entree"]),
"note_id": self.MAP_IDBDE[row["responsable"] if row['est_invite'] else row["idbde"]],
"guest_id": self.MAP_IDBDE[row["idbde"]] if row['est_invite'] else None,
}
bulk_mgr.add(Entry(**obj_dict))
bulk_mgr.done()
def handle(self, *args, **kwargs):
# default args, provided by ImportCommand.
nk15db, nk15user = kwargs['nk15db'], kwargs['nk15user']
# connecting to nk15 database
conn = pg.connect(database=nk15db, user=nk15user)
cur = conn.cursor(cursor_factory=pge.DictCursor)
if kwargs["map"]:
self.load_map(kwargs["map"])
self.import_activities(cur, kwargs["chunk"])
self.import_guest(cur, kwargs["chunk"])
self.import_activities_entries(cur, kwargs["chunk"])
This diff is collapsed.
#!/usr/bin/env python3
import re
import psycopg2 as pg
import psycopg2.extras as pge
import pytz
import datetime
import copy
from django.utils.timezone import make_aware
from django.db import transaction
from note.models import (TemplateCategory,
TransactionTemplate,
Transaction,
RecurrentTransaction,
SpecialTransaction
)
from note.models import Note
from activity.models import Guest, GuestTransaction
from member.models import Membership, MembershipTransaction
from ._import_utils import ImportCommand, BulkCreateManager, timed
BDE_PK = 1
KFET_PK = 2
NOTE_SPECIAL_CODE = {
"espèce": 1,
"carte": 2,
"chèque": 3,
"virement": 4,
}
def get_date_end(date_start):
date_end = copy.deepcopy(date_start)
if date_start > 8:
date_end.year = date_start + 1
date_end.month = 9
date_end.day = 30
return date_end
class Command(ImportCommand):
"""
Import command for People base data (Comptes, and Aliases)
"""
def add_arguments(self, parser):
parser.add_argument('-b', '--buttons', action='store_true', help="import buttons")
parser.add_argument('-t', '--transactions', action='store', default=0, help="start id for transaction import")
@timed
@transaction.atomic
def import_buttons(self, cur, chunk_size):
categories = dict()
buttons = dict()
bulk_mgr = BulkCreateManager(chunk_size=chunk_size)
cur.execute("SELECT * FROM boutons;")
n = cur.rowcount
pk_category = 1
for idx, row in enumerate(cur):
self.update_line(idx, n, row["label"])
if row["categorie"] not in categories:
bulk_mgr.add(TemplateCategory(pk=pk_category, name=row["categorie"]))
pk_category += 1
categories[row["categorie"]] = pk_category
obj_dict = {
"pk": row["id"],
"name": row["label"],
"amount": row["montant"],
"destination_id": self.MAP_IDBDE[row["destinataire"]],
"category_id": categories[row["categorie"]],
"display": row["affiche"],
"description": row["description"],
}
if row["label"] in buttons:
obj_dict["label"] = f"{obj_dict['label']}_{obj_dict['destination_id']}"
bulk_mgr.add(TransactionTemplate(**obj_dict))
buttons[obj_dict["label"]] = row["id"]
bulk_mgr.done()
return buttons, categories
@timed
@transaction.atomic
def import_transaction(self, cur, chunk_size, idmin, buttons, categories):
bulk_mgr = BulkCreateManager(chunk_size=chunk_size)
cur.execute(
f"SELECT t.date AS transac_date, t.type, t.emetteur,\
t.destinataire,t.quantite, t.montant, t.description,\
t.valide, t.cantinvalidate, t.categorie, \
a.idbde, a.annee, a.wei, a.date AS adh_date, a.section\
FROM transactions AS t \
LEFT JOIN adhesions AS a ON t.id = a.idtransaction \
WHERE t.id >= {idmin} \
ORDER BY t.id;")
n = cur.rowcount
pk_membership = 1
pk_transaction = 1
for idx, row in enumerate(cur):
self.update_line(idx, n, row["description"])
try:
date = make_aware(row["transac_date"])
except (pytz.NonExistentTimeError, pytz.AmbiguousTimeError):
date = make_aware(row["transac_date"] + datetime.timedelta(hours=1))
# standart transaction object
obj_dict = {
"pk": pk_transaction,
"destination_id": self.MAP_IDBDE[row["destinataire"]],
"source_id": self.MAP_IDBDE[row["emetteur"]],
"created_at": date,
"amount": row["montant"],
"quantity": row["quantite"],
"reason": row["description"],
"valid": row["valide"],
}
# for child transaction Models
child_dict = {"pk": obj_dict["pk"]}
ttype = row["type"]
if ttype == "don" or ttype == "transfert":
child_transaction = None
elif ttype == "bouton":
child_transaction = RecurrentTransaction
child_dict["category_id"] = categories.get(row["categorie"], categories["Autre"])
child_dict["template_id"] = buttons[row["description"]]
elif ttype == "crédit" or ttype == "retrait":
child_transaction = SpecialTransaction
# Some transaction uses BDE (idbde=0) as source or destination,
# lets fix that.
field_id = "source_id" if ttype == "crédit" else "destination_id"
if "espèce" in row["description"]:
obj_dict[field_id] = 1
elif "carte" in row["description"]:
obj_dict[field_id] = 2
elif "cheques" in row["description"]:
obj_dict[field_id] = 3
elif "virement" in row["description"]:
obj_dict[field_id] = 4
# humans and clubs have always the biggest id
actor_pk = max(row["destinataire"], row["emetteur"])
actor = Note.objects.get(id=self.MAP_IDBDE[actor_pk])
# custom fields of SpecialTransaction
if actor.__class__.__name__ == "NoteUser":
child_dict["first_name"] = actor.user.first_name
child_dict["last_name"] = actor.user.last_name
else:
child_dict["first_name"] = actor.club.name
child_dict["last_name"] = actor.club.name
elif ttype == "adhésion" and row["valide"]:
child_transaction = MembershipTransaction
# Kfet membership
montant = row["montant"]
obj_dict["amount"] = min(500, montant)
child_dict["membership_id"] = pk_membership
kfet_dict = {
"pk": pk_membership,
"user": self.MAP_IDBDE[row["idbde"]],
"club": KFET_PK,
"date_start": row["date"].date(), # Only date, not time
"date_end": get_date_end(row["date"].date()),
"fee": min(500, montant)
}
pk_membership += 1
pk_transaction += 1
# BDE Membership
obj_dict2 = obj_dict.copy()
child_dict2 = dict()
obj_dict2["pk"] = pk_transaction
obj_dict2["amount"] = max(montant - 500, 0)
child_dict2["pk"] = pk_transaction
bde_dict = {
"pk": pk_membership,
"user": self.MAP_IDBDE[row["idbde"]],
"club": BDE_PK,
"date_start": row["date"].date(), # Only date, not time
"date_end": get_date_end(row["date"].date()),
"fee": max(montant - 500, 0),
}
pk_membership += 1
# BDE membership Transaction is inserted before the Kfet membershipTransaction
bulk_mgr.add(
Transaction(**obj_dict2),
child_transaction(**child_dict2),
Membership(**bde_dict),
Membership(**kfet_dict),
)
elif ttype == "invitation":
child_transaction = GuestTransaction
m = re.search(r"Invitation (.*?)(?:\s\()(.*?)\s(.*?)\)", row["description"])
if m:
first_name, last_name = m.groups(1), m.groups(2)
guest_id = Guest.object.filter(first_name__iexact=first_name,
last_name__iexact=last_name).first().pk
child_dict["guest_id"] = guest_id
else:
raise(f"Guest not Found {row['id']} {first_name}, last_name" )
bulk_mgr.add(Transaction(**obj_dict),
child_transaction(**child_dict))
pk_transaction += 1
def handle(self, *args, **kwargs):
# default args, provided by ImportCommand.
nk15db, nk15user = kwargs['nk15db'], kwargs['nk15user']
# connecting to nk15 database
conn = pg.connect(database=nk15db, user=nk15user)
cur = conn.cursor(cursor_factory=pge.DictCursor)
if kwargs["map"]:
self.load(kwargs["map"])
self.import_buttons(cur, kwargs["chunk"])
self.import_transaction(cur, kwargs["chunk"])
#!/usr/bin/sh
sudo -u postgres sh -c "dropdb note_db && psql -c 'CREATE DATABASE note_db OWNER note;'";
echo 'reset db';
source "env/bin/activate"
./manage.py migrate
./manage.py loaddata initial