Commit 7b95a56e authored by Pauline Pommeret's avatar Pauline Pommeret

Docstrings + comments

parent f6af6231
#!/usr/bin/env python2.7
# XXX
# XXX REMOVE FRENCH SHIT.
"""
Docstring
Main module of the Xylok project
Author: Pauline Pommeret <pommeret@crans.org>, Jonas Senizergues <senizergues@crans.org>
Defines:
- :py:class:`Sequence`
Requires:
- :py:mod:`lib.file_tools`
- :py:mod:`lib.fft_tools`
- :py:mod:`lib.correlation`
- :py:mod:`trx`
"""
import lib.file_tools as file_tools
......@@ -11,15 +20,13 @@ import lib.correlation as correlation_lib
import lib.trx as trx_lib
class Sequence(object):
# XXX
"""
Sequence class
Class that handles a sequence.
"""
def __init__(self, fasta, md_parameters, label="", alphabet="dna", trx_scale_path=trx_lib.SCALE_FILE, sliding=72, centering=72, alpha=0.05, graph=None):
# XXX
"""
Initializes Sequence class
"""
if not alphabet:
# EnvironmentError: error used when not enough information was given
......@@ -49,24 +56,47 @@ class Sequence(object):
# used in the TRX part of the program
self.trx_scale_path = trx_scale_path
#
# self.accession stores the accession number of the sequence (retrieved
# from the fasta file)
self.accession = ""
# self.sequence stores the sequence (retrieved from the fasta file)
self.sequence = ""
# self.name stores the name of the sequence (retrieved from the fasta
# file)
self.name = ""
# self.name stores the description of the sequence (retrieved from the
# fasta file)
self.description = ""
# self.mdd stores the meaningful values from the processing of the MD
# data stored in the .dat files
self.mdd = {}
# self.correlation stores the answers to the Spearman and Pearson tests
# processed on every pair of helicoidal parameters, data come from the
# .dat files
self.correlation = {}
# self.trx stores the meaningful values from the processing of
# self.sequence using self.trx_scale_path
self.trx = {}
# Fetch
# Populates some of the attributes
self.load_fasta(fasta)
self.load_md(md_parameters)
self.load_trx()
def get(self, name, default):
"""
Try to return self[name], if it fails, returns
default
Tries to return self[name], if it fails, returns default value "".
Parameters:
- ``self``
- ``name``
- ``default``
Returns:
- a string
May raise:
- AttributeError
"""
try:
return getattr(self, name)
......@@ -75,9 +105,19 @@ class Sequence(object):
def __getitem__(self, name):
"""
When doing self["a"], will try to fetch self.a.
If it fails, will raise a KeyError, because it's what
dict["a"] is supposed to return if "a" is not a key.
Tries to fetch self.a when self["a"] is called in the code. If it fails
it raises a KeyError (because that's what dict["a"] is supposed to
return if "a" is not a key).
Parameters:
- ``self``
- ``name``
Returns:
- self["a"]
May raise:
- KeyError
"""
try:
return getattr(self, name)
......@@ -86,13 +126,21 @@ class Sequence(object):
def load_fasta(self, fasta):
"""
Loads a sequence from a fasta file.
Loads a sequence from a fasta file and populates:
- ``self.sequence``
- ``self.name``
- ``self.accession``
- ``self.description``
Parameters:
- ``self``
- ``fasta`` : str, path to the considered fasta file
Populates following attributes:
* sequence
* name
* accession number (self.accession)
* description
Uses:
- :py:meth:`lib.file_tools:load_fasta`
Returns:
- nothing
"""
parsed_sequence = file_tools.load_fasta(self.alphabet, fasta)
self.sequence = str(parsed_sequence.seq)
......@@ -103,26 +151,53 @@ class Sequence(object):
def load_md(self, md_parameters):
# XXX
"""
Loads a Yasara MD file from a MD Yasara data file
Loads MD files and populates:
- ``self.mdd``
- ``self.correlation``
Parameters:
- ``self``
- ``md_parameters`` : dictionary {..., "helicoidal param": path, ...}
Uses:
- :py:meth:lib.file_tools:`load_md_data`
- :py:meth:lib.fft_tools:`fft`
- :py:meth:lib.fft_tools:`get_noticeable_data`
- :py:meth:lib.correlation:`compute_correlations`
Returns:
- nothing
May raise:
- NotImplementedError
"""
_md_params = {}
# -*- Populating self.mdd -*- #
# Iterates on every helicoidal parameter file
for (helicoidal_parameter, path) in md_parameters.iteritems():
# Loads the file
_md_params[helicoidal_parameter] = file_tools.load_md_data(path)
# Stocke les fft dans un objet temporaire.
# _md_ffts est une liste de 3-tuples de la forme
# (frame_number, freqs, transform)
# Stores Fast Fourier Transform results in temp objects
# _md_ffts is a list of 3-tuples: (frame_number, freqs, transform)
_md_ffts = [fft_lib.fft(frame) for frame in _md_params[helicoidal_parameter]]
# Stores Fast Fourier Transform results of the sequence's center
_md_ffts_c = [fft_lib.fft(frame, centering=self.centering) for frame in _md_params[helicoidal_parameter]]
# Stores Fast Fourier Transform results of the "sliding" sequence
_md_ffts_s = [fft_lib.sliding_fft(frame, window=self.sliding) for frame in _md_params[helicoidal_parameter]]
# Toto
# Retrieves meaningful values for each type of results
# Type of results: complete, center and sliding
good_values = [fft_lib.get_noticeable_data(fft) for fft in _md_ffts]
good_values_c = [fft_lib.get_noticeable_data(fft) for fft in _md_ffts_c]
good_values_s = [fft_lib.get_noticeable_data(fft) for fft in _md_ffts_s]
# _md_ffts[i][0] : frame number
# Populates self.mdd with those results
# NB: _md_ffts[i][0] stores the frame number
self.mdd[helicoidal_parameter] = {
_md_ffts[i][0] : {
'complete_peak_freq' : good_values[i][0],
......@@ -141,10 +216,15 @@ class Sequence(object):
raise NotImplementedError("Plotting is not implemented yet")
# fft_lib.plot(_md_ffts, _md_ffts_c, _md_ffts_s, self.graph, helicoidal_parameter)
# For now, we got all md data, except correlation data. This is why we have kept _md_params
# We have a double loop to run amongst possible helicoidal_parms values
# We get lexicographically-ordered lists
# -*- Populating self.correlation -*- #
# In order to compute every frame correlation between 2 parameters,
# _md_params must be kept in python's memory
# To compute every "pair" correlation once and only one time, a double
# loop is required. It runs amongst possible helicoidal_parms values
# that are lexicographically-ordered.
helicoidal_parameters = [] + _md_params.keys()
helicoidal_parameters.sort()
helicoidal_parameters2 = [] + helicoidal_parameters
......@@ -154,24 +234,44 @@ class Sequence(object):
for param2 in helicoidal_parameters2:
self.correlation[param + "/" + param2] = correlation_lib.compute_correlations(_md_params[param], _md_params[param2], param, param2, alpha=self.alpha, centering=self.centering)
# End.
def load_trx(self):
# XXX
"""
Loads a Yasara MD file from a MD Yasara data file
"""
Populates:
- ``self.trx``
Parameters:
- ``self``
Uses:
- :py:meth:lib.trx:`match`
- :py:meth:lib.trx:`parse_trx_scale`
- :py:meth:lib.fft:`fft`
- :py:meth:lib.fft:`sliding_fft`
- :py:meth:lib.fft:`get_noticeable_data`
Returns:
- nothing
"""
# Translates self.sequence in TRX values
trx_dict = trx_lib.match(self.sequence, trx_lib.parse_trx_scale(self.trx_scale_path))
fft = fft_lib.fft(trx_dict)
fft_c = fft_lib.fft(trx_dict, centering=self.centering)
fft_s = fft_lib.sliding_fft(trx_dict, window=self.sliding)
# Stores the Fast Fourier Transform in temp objects
_fft = fft_lib.fft(trx_dict)
# Stores the Fast Fourier Transform of sequence's center
_fft_c = fft_lib.fft(trx_dict, centering=self.centering)
# Stores the Fast Fourier Transform of the "sliding" sequence
_fft_s = fft_lib.sliding_fft(trx_dict, window=self.sliding)
good_values = fft_lib.get_noticeable_data(fft)
good_values_c = fft_lib.get_noticeable_data(fft_c)
good_values_s = fft_lib.get_noticeable_data(fft_s)
# Retrieves meaningful values for each type of results
# Type of results: complete, center and sliding
good_values = fft_lib.get_noticeable_data(_fft)
good_values_c = fft_lib.get_noticeable_data(_fft_c)
good_values_s = fft_lib.get_noticeable_data(_fft_s)
# Populates self.trx with those results
self.trx = {
'complete_peak_freq' : good_values[0],
'complete_peak' : good_values[1],
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment