legifrance.py 3.57 KB
Newer Older
Jean-Benoist Leger's avatar
Jean-Benoist Leger committed
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27

# Copyright (c) 2016, Jean-Benoist Leger <jb@leger.tf>
# 
# Redistribution and use in source and binary forms, with or without
# modification, are permitted provided that the following conditions are
# met:
# 
#     Redistributions of source code must retain the above copyright
#     notice, this list of conditions and the following disclaimer.
# 
#     Redistributions in binary form must reproduce the above copyright
#     notice, this list of conditions and the following disclaimer in
#     the documentation and/or other materials provided with the
#     distribution.
# 
# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
# "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
# LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
# A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
# HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
# SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
# LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
# DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
# THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.

Jean-Benoist Leger's avatar
Jean-Benoist Leger committed
28 29 30 31 32
import requests
import re
import time
import configobj

Jean-Benoist Leger's avatar
Jean-Benoist Leger committed
33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52
def get_articles_from_page(link):
    articles={}
    r = requests.get(link)
    
    reg = '^.*?<a href="(affichCodeArticle\.do[^"]*idArticle[^"]*)" title="En savoir plus sur l\'article ([^"]+)"'
    c=r.content

    while True:
        a = re.match(reg, c, re.DOTALL)
        if a is None:
            break
        l1 = 'https://www.legifrance.gouv.fr/'+a.groups()[0]
        l1 = re.sub('&amp;','&',l1)
        l1 = re.sub(';jsessionid=[^\?]*\?','?',l1)
        l1 = re.sub('&dateTexte=[^&]*','',l1)
        articles[a.groups()[1]] = l1
        c = re.sub('href=','',c,1)

    return articles

Jean-Benoist Leger's avatar
Jean-Benoist Leger committed
53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77
def get_code(codename,codeids):

    if not codeids.has_key(codename):
        return None

    codeid = codeids[codename]

    r=requests.get('https://www.legifrance.gouv.fr/affichCode.do?cidTexte='+codeid)

    reg = '^.*?href="(affichCode\.do[^"]*idSectionTA[^"]*)"'
    c=r.content
    links=set()

    while True:
        a = re.match(reg,c,re.DOTALL)
        if a is None:
            break
        l1 = 'https://www.legifrance.gouv.fr/'+a.groups()[0]
        l1 = re.sub('&amp;','&',l1)
        links.add(l1)
        c = re.sub('href=','',c,1)

    articles = {}

    for link in links:
Jean-Benoist Leger's avatar
Jean-Benoist Leger committed
78
        articles.update(det_articles_from_page)
Jean-Benoist Leger's avatar
Jean-Benoist Leger committed
79 80 81 82 83 84 85 86 87 88 89 90

class codes:

    def __init__(self,conffile):
        self.codes = {}
        self.conf = configobj.ConfigObj(conffile)

    def get(self,codename,codearticle):

        t = time.time()

        if self.codes.has_key(codename):
Jean-Benoist Leger's avatar
Jean-Benoist Leger committed
91
            if t - self.codes[codename]['timestamp'] > int(self.conf['expire']):
Jean-Benoist Leger's avatar
Jean-Benoist Leger committed
92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116
                del self.codes[codename]
        if self.codes.has_key(codename):
            pass
        else:
            articles = get_code(codename,self.conf['codeids'])
            if not articles is None:
                self.codes[codename] = {'timestamp':t, 'articles':articles}

        if self.codes.has_key(codename):
            if self.codes[codename]['articles'].has_key(codearticle):
                return self.codes[codename]['articles'][codearticle]
        return None

    def force_code_reload(self,codename):
        if self.codes.has_key(codename):
            self.codes[codename]['timestamp']=0
            return True
        return False