Commit 26c69bec authored by Pauline Pommeret's avatar Pauline Pommeret
Browse files

Adding Sequence class + file_tools + test main

parent 3fb7e4f3
#!/usr/bin/env python2.7
# XXX
"""
Docstring
"""
from Bio import SeqIO
from Bio.Alphabet import IUPAC
def load_fasta(sequence_alphabet, path):
"""
Loads the fasta file and returns the sequence contained
in it.
Arguments:
* alphabet : the IUPAC
* path : path to FASTA file
"""
return SeqIO.read(path, format="fasta", alphabet=sequence_alphabet)
def load_md_data(path):
# XXX
"""
"""
positions = ""
output = []
with open(path, 'r') as handle:
# Fetch the sequence positions from the first line
header = handle.readline()
header = header.replace('\n', '').split()
for c in header:
positions += re.sub(r'[0-9]', '', c.split('/')[0])
# Pour avoir le dernier élément (quand même)
positions += re.sub(r'[0-9]', '', header[-1].split('/')[1])
# Pour enlever 'frame' parce qu'il parait que c'est pas dans l'ADN
positions = re.sub(r'frame', '', positions)
# Obtenir un json.dump du fichier en entier
line = handle.readline()
while line:
line = line.replace('\n', '').split()
newline = [float(x) for x in line]
newline = [[float(re.sub(r'[A-Za-z]', '', header[x].split('/')[0])), newline[x]] for x in range(1, len(newline))]
newline.insert(0, ["frame", line[0]])
output.append(newline)
line = handle.readline()
return (positions, output)
#!/usr/bin/env python2.7
# -*- coding: utf-8 -*-
"""
Un fichier pour faire des tests
"""
import argparse
if __name__ == "__main__":
# Use argparse to parse arguments. We first create a parser.
parser = argparse.ArgumentParser(description="ToDo")
# Adding arguments to the parser
parser.addArgument("-a", "--alphabet", type=str, help="Alphabet (DNA, RNA, prot): Unambiguous IUPAC alphabet that will be used.", action="store")
parser.addArgument("-f", "--fasta", type=str, help="Path to the FASTA file to load.", action="store")
parser.addArgument("-r", "--roll", type=str, help="Path to the Roll MD file.", action="store")
parser.addArgument("-R", "--rise", type=str, help="Path to the Rise MD file.", action="store")
parser.addArgument("-s", "--slide", type=str, help="Path to the Slide MD file.", action="store")
parser.addArgument("-S", "--shift", type=str, help="Path to the Shift MD file.", action="store")
parser.addArgument("-t", "--twist", type=str, help="Path to the Twist MD file.", action="store")
parser.addArgument("-T", "--tilt", type=str, help="Path to the Tilt MD file.", action="store")
# Then, parse it
args = parse.parse_args()
if not args.fasta:
raise EnvironmentError("You have to give a FASTA file.")
# Blah
# seq = Sequence(args.alphabet, args.fasta, args.roll, args.rise, args.slide, ...)
#!/usr/bin/env python2.7
# XXX
"""
Docstring
"""
import lib.file_tools as file_tools
class Sequence(object):
# XXX
"""
Sequence class
"""
def __init__(self, alphabet, fasta, md_parameters):
# XXX
"""
"""
if not alphabet:
raise EnvironmentError("Alphabet is mandatory.")
if not fasta:
# Error used when not enough information was given
raise EnvironmentError("FASTA file is mandatory.")
self.alphabet = alphabet
self.load_fasta(fasta)
self.load_md(md_parameters)
def load_fasta(self, fasta):
"""
Loads a sequence from a fasta file.
Populates following attributes:
* sequence
* name
* accession number (self.an)
* description
"""
parsed_sequence = file_tools.load_fasta(self.alphabet, fasta)
self.sequence = parsed_sequence.seq
self.name = parsed_sequence.name
self.an = parsed_sequence.id.split("|")[3]
self.description = parsed_sequence.description
def load_md(self, md_parameters):
# XXX
"""
"""
self.md = {}
for (helicoidal_parameter, path) in md_parameters.iteritems():
self.md[helicoidal_parameter] = file_tools.load_md_data(path)
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment