Commit 7b95a56e authored by Pauline Pommeret's avatar Pauline Pommeret

Docstrings + comments

parent f6af6231
#!/usr/bin/env python2.7 #!/usr/bin/env python2.7
# XXX
# XXX REMOVE FRENCH SHIT.
""" """
Docstring Main module of the Xylok project
Author: Pauline Pommeret <pommeret@crans.org>, Jonas Senizergues <senizergues@crans.org>
Defines:
- :py:class:`Sequence`
Requires:
- :py:mod:`lib.file_tools`
- :py:mod:`lib.fft_tools`
- :py:mod:`lib.correlation`
- :py:mod:`trx`
""" """
import lib.file_tools as file_tools import lib.file_tools as file_tools
...@@ -11,15 +20,13 @@ import lib.correlation as correlation_lib ...@@ -11,15 +20,13 @@ import lib.correlation as correlation_lib
import lib.trx as trx_lib import lib.trx as trx_lib
class Sequence(object): class Sequence(object):
# XXX
""" """
Sequence class Class that handles a sequence.
""" """
def __init__(self, fasta, md_parameters, label="", alphabet="dna", trx_scale_path=trx_lib.SCALE_FILE, sliding=72, centering=72, alpha=0.05, graph=None): def __init__(self, fasta, md_parameters, label="", alphabet="dna", trx_scale_path=trx_lib.SCALE_FILE, sliding=72, centering=72, alpha=0.05, graph=None):
# XXX
""" """
Initializes Sequence class
""" """
if not alphabet: if not alphabet:
# EnvironmentError: error used when not enough information was given # EnvironmentError: error used when not enough information was given
...@@ -49,24 +56,47 @@ class Sequence(object): ...@@ -49,24 +56,47 @@ class Sequence(object):
# used in the TRX part of the program # used in the TRX part of the program
self.trx_scale_path = trx_scale_path self.trx_scale_path = trx_scale_path
# # self.accession stores the accession number of the sequence (retrieved
# from the fasta file)
self.accession = "" self.accession = ""
# self.sequence stores the sequence (retrieved from the fasta file)
self.sequence = "" self.sequence = ""
# self.name stores the name of the sequence (retrieved from the fasta
# file)
self.name = "" self.name = ""
# self.name stores the description of the sequence (retrieved from the
# fasta file)
self.description = "" self.description = ""
# self.mdd stores the meaningful values from the processing of the MD
# data stored in the .dat files
self.mdd = {} self.mdd = {}
# self.correlation stores the answers to the Spearman and Pearson tests
# processed on every pair of helicoidal parameters, data come from the
# .dat files
self.correlation = {} self.correlation = {}
# self.trx stores the meaningful values from the processing of
# self.sequence using self.trx_scale_path
self.trx = {} self.trx = {}
# Fetch # Populates some of the attributes
self.load_fasta(fasta) self.load_fasta(fasta)
self.load_md(md_parameters) self.load_md(md_parameters)
self.load_trx() self.load_trx()
def get(self, name, default): def get(self, name, default):
""" """
Try to return self[name], if it fails, returns Tries to return self[name], if it fails, returns default value "".
default
Parameters:
- ``self``
- ``name``
- ``default``
Returns:
- a string
May raise:
- AttributeError
""" """
try: try:
return getattr(self, name) return getattr(self, name)
...@@ -75,9 +105,19 @@ class Sequence(object): ...@@ -75,9 +105,19 @@ class Sequence(object):
def __getitem__(self, name): def __getitem__(self, name):
""" """
When doing self["a"], will try to fetch self.a. Tries to fetch self.a when self["a"] is called in the code. If it fails
If it fails, will raise a KeyError, because it's what it raises a KeyError (because that's what dict["a"] is supposed to
dict["a"] is supposed to return if "a" is not a key. return if "a" is not a key).
Parameters:
- ``self``
- ``name``
Returns:
- self["a"]
May raise:
- KeyError
""" """
try: try:
return getattr(self, name) return getattr(self, name)
...@@ -86,13 +126,21 @@ class Sequence(object): ...@@ -86,13 +126,21 @@ class Sequence(object):
def load_fasta(self, fasta): def load_fasta(self, fasta):
""" """
Loads a sequence from a fasta file. Loads a sequence from a fasta file and populates:
- ``self.sequence``
- ``self.name``
- ``self.accession``
- ``self.description``
Parameters:
- ``self``
- ``fasta`` : str, path to the considered fasta file
Populates following attributes: Uses:
* sequence - :py:meth:`lib.file_tools:load_fasta`
* name
* accession number (self.accession) Returns:
* description - nothing
""" """
parsed_sequence = file_tools.load_fasta(self.alphabet, fasta) parsed_sequence = file_tools.load_fasta(self.alphabet, fasta)
self.sequence = str(parsed_sequence.seq) self.sequence = str(parsed_sequence.seq)
...@@ -103,26 +151,53 @@ class Sequence(object): ...@@ -103,26 +151,53 @@ class Sequence(object):
def load_md(self, md_parameters): def load_md(self, md_parameters):
# XXX # XXX
""" """
Loads a Yasara MD file from a MD Yasara data file Loads MD files and populates:
- ``self.mdd``
- ``self.correlation``
Parameters:
- ``self``
- ``md_parameters`` : dictionary {..., "helicoidal param": path, ...}
Uses:
- :py:meth:lib.file_tools:`load_md_data`
- :py:meth:lib.fft_tools:`fft`
- :py:meth:lib.fft_tools:`get_noticeable_data`
- :py:meth:lib.correlation:`compute_correlations`
Returns:
- nothing
May raise:
- NotImplementedError
""" """
_md_params = {} _md_params = {}
# -*- Populating self.mdd -*- #
# Iterates on every helicoidal parameter file
for (helicoidal_parameter, path) in md_parameters.iteritems(): for (helicoidal_parameter, path) in md_parameters.iteritems():
# Loads the file
_md_params[helicoidal_parameter] = file_tools.load_md_data(path) _md_params[helicoidal_parameter] = file_tools.load_md_data(path)
# Stocke les fft dans un objet temporaire. # Stores Fast Fourier Transform results in temp objects
# _md_ffts est une liste de 3-tuples de la forme # _md_ffts is a list of 3-tuples: (frame_number, freqs, transform)
# (frame_number, freqs, transform)
_md_ffts = [fft_lib.fft(frame) for frame in _md_params[helicoidal_parameter]] _md_ffts = [fft_lib.fft(frame) for frame in _md_params[helicoidal_parameter]]
# Stores Fast Fourier Transform results of the sequence's center
_md_ffts_c = [fft_lib.fft(frame, centering=self.centering) for frame in _md_params[helicoidal_parameter]] _md_ffts_c = [fft_lib.fft(frame, centering=self.centering) for frame in _md_params[helicoidal_parameter]]
# Stores Fast Fourier Transform results of the "sliding" sequence
_md_ffts_s = [fft_lib.sliding_fft(frame, window=self.sliding) for frame in _md_params[helicoidal_parameter]] _md_ffts_s = [fft_lib.sliding_fft(frame, window=self.sliding) for frame in _md_params[helicoidal_parameter]]
# Toto # Retrieves meaningful values for each type of results
# Type of results: complete, center and sliding
good_values = [fft_lib.get_noticeable_data(fft) for fft in _md_ffts] good_values = [fft_lib.get_noticeable_data(fft) for fft in _md_ffts]
good_values_c = [fft_lib.get_noticeable_data(fft) for fft in _md_ffts_c] good_values_c = [fft_lib.get_noticeable_data(fft) for fft in _md_ffts_c]
good_values_s = [fft_lib.get_noticeable_data(fft) for fft in _md_ffts_s] good_values_s = [fft_lib.get_noticeable_data(fft) for fft in _md_ffts_s]
# _md_ffts[i][0] : frame number # Populates self.mdd with those results
# NB: _md_ffts[i][0] stores the frame number
self.mdd[helicoidal_parameter] = { self.mdd[helicoidal_parameter] = {
_md_ffts[i][0] : { _md_ffts[i][0] : {
'complete_peak_freq' : good_values[i][0], 'complete_peak_freq' : good_values[i][0],
...@@ -141,10 +216,15 @@ class Sequence(object): ...@@ -141,10 +216,15 @@ class Sequence(object):
raise NotImplementedError("Plotting is not implemented yet") raise NotImplementedError("Plotting is not implemented yet")
# fft_lib.plot(_md_ffts, _md_ffts_c, _md_ffts_s, self.graph, helicoidal_parameter) # fft_lib.plot(_md_ffts, _md_ffts_c, _md_ffts_s, self.graph, helicoidal_parameter)
# For now, we got all md data, except correlation data. This is why we have kept _md_params
# We have a double loop to run amongst possible helicoidal_parms values # -*- Populating self.correlation -*- #
# We get lexicographically-ordered lists
# In order to compute every frame correlation between 2 parameters,
# _md_params must be kept in python's memory
# To compute every "pair" correlation once and only one time, a double
# loop is required. It runs amongst possible helicoidal_parms values
# that are lexicographically-ordered.
helicoidal_parameters = [] + _md_params.keys() helicoidal_parameters = [] + _md_params.keys()
helicoidal_parameters.sort() helicoidal_parameters.sort()
helicoidal_parameters2 = [] + helicoidal_parameters helicoidal_parameters2 = [] + helicoidal_parameters
...@@ -154,24 +234,44 @@ class Sequence(object): ...@@ -154,24 +234,44 @@ class Sequence(object):
for param2 in helicoidal_parameters2: for param2 in helicoidal_parameters2:
self.correlation[param + "/" + param2] = correlation_lib.compute_correlations(_md_params[param], _md_params[param2], param, param2, alpha=self.alpha, centering=self.centering) self.correlation[param + "/" + param2] = correlation_lib.compute_correlations(_md_params[param], _md_params[param2], param, param2, alpha=self.alpha, centering=self.centering)
# End.
def load_trx(self): def load_trx(self):
# XXX
"""
Loads a Yasara MD file from a MD Yasara data file
""" """
Populates:
- ``self.trx``
Parameters:
- ``self``
Uses:
- :py:meth:lib.trx:`match`
- :py:meth:lib.trx:`parse_trx_scale`
- :py:meth:lib.fft:`fft`
- :py:meth:lib.fft:`sliding_fft`
- :py:meth:lib.fft:`get_noticeable_data`
Returns:
- nothing
"""
# Translates self.sequence in TRX values
trx_dict = trx_lib.match(self.sequence, trx_lib.parse_trx_scale(self.trx_scale_path)) trx_dict = trx_lib.match(self.sequence, trx_lib.parse_trx_scale(self.trx_scale_path))
fft = fft_lib.fft(trx_dict) # Stores the Fast Fourier Transform in temp objects
fft_c = fft_lib.fft(trx_dict, centering=self.centering) _fft = fft_lib.fft(trx_dict)
fft_s = fft_lib.sliding_fft(trx_dict, window=self.sliding)
# Stores the Fast Fourier Transform of sequence's center
_fft_c = fft_lib.fft(trx_dict, centering=self.centering)
# Stores the Fast Fourier Transform of the "sliding" sequence
_fft_s = fft_lib.sliding_fft(trx_dict, window=self.sliding)
good_values = fft_lib.get_noticeable_data(fft) # Retrieves meaningful values for each type of results
good_values_c = fft_lib.get_noticeable_data(fft_c) # Type of results: complete, center and sliding
good_values_s = fft_lib.get_noticeable_data(fft_s) good_values = fft_lib.get_noticeable_data(_fft)
good_values_c = fft_lib.get_noticeable_data(_fft_c)
good_values_s = fft_lib.get_noticeable_data(_fft_s)
# Populates self.trx with those results
self.trx = { self.trx = {
'complete_peak_freq' : good_values[0], 'complete_peak_freq' : good_values[0],
'complete_peak' : good_values[1], 'complete_peak' : good_values[1],
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment