Commit b370ea69 authored by Pauline Pommeret's avatar Pauline Pommeret
Browse files

Comments, docstring and pylint

parent 7400015a
#!/usr/bin/env python2.7
# XXX
"""
Docstring
This module contains functions required to load Xylok date from files.
Requires:
- :py:mod:`re`
- :py:mod:`Bio.SeqIO`
- :py:mod:`Bio.Alphabet.IUPAC`
"""
import re
......@@ -10,26 +15,47 @@ from Bio.Alphabet import IUPAC
def load_fasta(sequence_alphabet, path):
"""
Loads the fasta file and returns the sequence contained
in it.
Loads a fasta file from a file, using its alphabet and returns a record of
it.
Parameters:
- ``sequence_alphabet`` : str (dna, rna, prot)
- ``path`` : path to FASTA file
Arguments:
* alphabet : the IUPAC
* path : path to FASTA file
Uses:
- :py:mod:`Bio.SeqIO`
- :py:mod:`Bio.Alphabet`
Returns a Bio.SeqRecord.SeqRecord object
Returns:
- an object of type :py:class:`Bio.SeqRecord.SeqRecord`
"""
if sequence_alphabet.lower() == 'dna':
sequence_alphabet = IUPAC.unambiguous_dna
else:
# Maybe someday, RNA analysis will be enabled
raise NotImplementedError
return SeqIO.read(path, format="fasta", alphabet=sequence_alphabet)
def load_md_data(path):
# XXX
"""
Loads a post-processed MD file into a list of dictionaries (each frame has
its dictionary).
MD file must look like:
frame C16/C17 C17/G18 G18/A19 ...
0 -1.0 9.0 -1.8
1 -4.4 -7.6 -0.1
2 -6.4 1.0 -6.0
3 -3.6 -1.1 -2.5
Parameters:
- ``path`` : str, path to the file
Returns:
- a list of dictionaries (each dictionary is a MD frame)
[ ... {"frame": frame#, ..., position_i: value_i, ...} ...]
"""
positions = []
......@@ -44,15 +70,16 @@ def load_md_data(path):
header = header.replace('\n', '').split()
# Working on 'frame' (header[0]) or string like 'C17/G18'
for c in header:
for char in header:
# Uses the string at the left side of the '/'
positions.append(re.sub(r'[A-Z]', '', c.split('/')[0]))
positions.append(re.sub(r'[A-Z]', '', char.split('/')[0]))
# Retrieves the last element (never on the left side of the '/')
positions.append(re.sub(r'[A-Z]', '', header[-1].split('/')[1]))
# Getting rid of 'frame' (first word of the file)
# And getting rid of the last position.
# http://stackoverflow.com/questions/5893163/underscore-in-python
_ = positions.pop(0)
_ = positions.pop(-1)
......@@ -69,12 +96,12 @@ def load_md_data(path):
# values to float for further use
line = [int(line[0])] + [float(x) for x in line[1:]]
# Creating a list looking like
# [..., ['position_i', 'helicoidal_parameter_value_i'], ...]
# Creating a dictionary looking like
# {..., 'position_i': 'helicoidal_parameter_value_i', ...}
# There is a shift between positions (contains frame number) and
# line (doesn't)
newline = { int(positions[x]): line[x+1] for x in xrange(len(positions))}
# Inserting the list ["frame", frame number] at first position
newline = {int(positions[x]): line[x+1] for x in xrange(len(positions))}
# Inserting the "frame": frame number at first position
# (for developper convenience)
newline["frame"] = line[0]
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment