Commit d8aa9d8a authored by Daniel Stan's avatar Daniel Stan

continue parsing

parent e39883a9
......@@ -3,6 +3,7 @@
import bs4
import decimal
import re
def parse_decimal(txt):
"""Parse a decimal seen as an amount in €"""
......@@ -12,26 +13,59 @@ def parse_decimal(txt):
doc = bs4.BeautifulSoup(open('1.example', 'r').read())
# Replace expenses with payback, for second form
r = doc.find(attrs={'id': 'expenses_accordion'})
depenses = r.findAll(attrs={'class': 'panel panel-default'})
# On va parser une dépense donnée
def parse_depense(dep):
txt = dep.find(attrs={'class': 'panel-body'}).text.split('\n')[1].strip()
buyer, parts = parse_participants(txt)
return {
'title': dep.find(attrs={'class': 'col-xs-6'}).text,
'value': parse_decimal(dep.findAll(attrs={'class':'col-xs-3'})[0].text),
'id': int(dep.findAll('a')[-1].attrs['href'].split('=')[-1]),
'participants': dep.find(attrs={'class': 'panel-body'}).text.split('\n')[1],
'participants': parts,
'buyer': buyer,
}
def parse_participants(txt):
"""Take a participant list, and returns a dictionnary whose keys are
participants names and values are the integral number of shares they
take in the bill"""
raise NotImplementedError
fmatch = re.match('^Payé par ([^ ]*) ; participants :(.*)\.$', txt)
if fmatch is None:
print(txt)
raise ValueError
buyer = fmatch.group(1)
res = {}
for part in fmatch.group(2).split(','):
part = part.strip()
pmatch = re.match('^([^ ]*)(?: \(([0-9]+) parts\))?$', part)
res[pmatch.group(1)] = int(pmatch.group(2) or 1)
return (buyer, res)
#print(parse_depense(depenses[0]))
def parse_depenses(doc):
r = doc.find(attrs={'id': 'expenses_accordion'})
depenses = r.findAll(attrs={'class': 'panel panel-default'})
for depense in depenses:
yield parse_depense(depense)
def parse_paybacks(doc):
r = doc.find(attrs={'id': 'paybacks_accordion'})
paybacks = r.findAll(attrs={'class': 'panel panel-default'})
for payback in paybacks:
yield parse_payback(payback)
print(parse_participants('Payé par PEB ; participants : 20-100, b2moo (2 parts), Chopopope (2 parts), PEB.'))
def parse_payback(pb):
title = pb.find(attrs={'class': 'panel-body'}).text.strip().split('\n')[0]
entries = pb.findAll(attrs={'class':'col-xs-4'})
return {
'title': title,
'value': parse_decimal(entries[2].text),
'id': int(pb.find(attrs={'name': 'expense_id'}).attrs['value']),
'participants': {entries[1].text: 1},
'buyer': entries[0].text.strip(),
}
for p in parse_paybacks(doc):
print(p)
break
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment