first.py 2.22 KB
Newer Older
Daniel Stan's avatar
Daniel Stan committed
1
#!/usr/bin/env python3
2 3 4 5
# -*- coding: utf-8 -*-

import bs4
import decimal
Daniel Stan's avatar
Daniel Stan committed
6
import re
7 8

def parse_decimal(txt):
Daniel Stan's avatar
Daniel Stan committed
9
    """Parse a decimal seen as an amount in €"""
10 11 12 13 14 15 16 17
    txt = txt.replace(' ','').replace(',', '.').replace('€', '')
    return decimal.Decimal(txt)


doc = bs4.BeautifulSoup(open('1.example', 'r').read())


# On va parser une dépense donnée
Daniel Stan's avatar
Daniel Stan committed
18
def parse_depense(dep):
Daniel Stan's avatar
Daniel Stan committed
19 20
    txt = dep.find(attrs={'class': 'panel-body'}).text.split('\n')[1].strip()
    buyer, parts = parse_participants(txt)
Daniel Stan's avatar
Daniel Stan committed
21 22 23 24
    return {
        'title': dep.find(attrs={'class': 'col-xs-6'}).text,
        'value': parse_decimal(dep.findAll(attrs={'class':'col-xs-3'})[0].text),
        'id': int(dep.findAll('a')[-1].attrs['href'].split('=')[-1]),
Daniel Stan's avatar
Daniel Stan committed
25 26
        'participants': parts,
        'buyer': buyer,
Daniel Stan's avatar
Daniel Stan committed
27
    }
Daniel Stan's avatar
Daniel Stan committed
28

Daniel Stan's avatar
Daniel Stan committed
29 30 31 32
def parse_participants(txt):
    """Take a participant list, and returns a dictionnary whose keys are
    participants names and values are the integral number of shares they
    take in the bill"""
Daniel Stan's avatar
Daniel Stan committed
33 34 35 36 37 38 39 40 41 42 43
    fmatch = re.match('^Payé par ([^ ]*) ; participants :(.*)\.$', txt)
    if fmatch is None:
        print(txt)
        raise ValueError
    buyer = fmatch.group(1)
    res = {}
    for part in fmatch.group(2).split(','):
        part = part.strip()
        pmatch = re.match('^([^ ]*)(?: \(([0-9]+) parts\))?$', part)
        res[pmatch.group(1)] = int(pmatch.group(2) or 1)
    return (buyer, res)
Daniel Stan's avatar
Daniel Stan committed
44
   
Daniel Stan's avatar
Daniel Stan committed
45 46 47 48 49 50 51 52 53 54 55
def parse_depenses(doc):
    r = doc.find(attrs={'id': 'expenses_accordion'})
    depenses = r.findAll(attrs={'class': 'panel panel-default'})
    for depense in depenses:
        yield parse_depense(depense)

def parse_paybacks(doc):
    r = doc.find(attrs={'id': 'paybacks_accordion'})
    paybacks = r.findAll(attrs={'class': 'panel panel-default'})
    for payback in paybacks:
        yield parse_payback(payback)
Daniel Stan's avatar
Daniel Stan committed
56 57


Daniel Stan's avatar
Daniel Stan committed
58 59 60 61 62 63 64 65 66 67
def parse_payback(pb):
    title = pb.find(attrs={'class': 'panel-body'}).text.strip().split('\n')[0]
    entries = pb.findAll(attrs={'class':'col-xs-4'})
    return {
        'title': title,
        'value': parse_decimal(entries[2].text),
        'id': int(pb.find(attrs={'name': 'expense_id'}).attrs['value']),
        'participants': {entries[1].text: 1},
        'buyer': entries[0].text.strip(),
    }
68

Daniel Stan's avatar
Daniel Stan committed
69 70 71
for p in parse_paybacks(doc):
    print(p)
    break