Skip to content
GitLab
Menu
Projects
Groups
Snippets
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in
Toggle navigation
Menu
Open sidebar
Senizergues-Pommeret
ITPP
Commits
7b95a56e
Commit
7b95a56e
authored
Dec 02, 2014
by
Pauline Pommeret
Browse files
Docstrings + comments
parent
f6af6231
Changes
1
Hide whitespace changes
Inline
Side-by-side
sequence.py
View file @
7b95a56e
#!/usr/bin/env python2.7
# XXX
# XXX REMOVE FRENCH SHIT.
"""
Docstring
Main module of the Xylok project
Author: Pauline Pommeret <pommeret@crans.org>, Jonas Senizergues <senizergues@crans.org>
Defines:
- :py:class:`Sequence`
Requires:
- :py:mod:`lib.file_tools`
- :py:mod:`lib.fft_tools`
- :py:mod:`lib.correlation`
- :py:mod:`trx`
"""
import
lib.file_tools
as
file_tools
...
...
@@ -11,15 +20,13 @@ import lib.correlation as correlation_lib
import
lib.trx
as
trx_lib
class
Sequence
(
object
):
# XXX
"""
S
equence
class
Class that handles a s
equence
.
"""
def
__init__
(
self
,
fasta
,
md_parameters
,
label
=
""
,
alphabet
=
"dna"
,
trx_scale_path
=
trx_lib
.
SCALE_FILE
,
sliding
=
72
,
centering
=
72
,
alpha
=
0.05
,
graph
=
None
):
# XXX
"""
Initializes Sequence class
"""
if
not
alphabet
:
# EnvironmentError: error used when not enough information was given
...
...
@@ -49,24 +56,47 @@ class Sequence(object):
# used in the TRX part of the program
self
.
trx_scale_path
=
trx_scale_path
#
# self.accession stores the accession number of the sequence (retrieved
# from the fasta file)
self
.
accession
=
""
# self.sequence stores the sequence (retrieved from the fasta file)
self
.
sequence
=
""
# self.name stores the name of the sequence (retrieved from the fasta
# file)
self
.
name
=
""
# self.name stores the description of the sequence (retrieved from the
# fasta file)
self
.
description
=
""
# self.mdd stores the meaningful values from the processing of the MD
# data stored in the .dat files
self
.
mdd
=
{}
# self.correlation stores the answers to the Spearman and Pearson tests
# processed on every pair of helicoidal parameters, data come from the
# .dat files
self
.
correlation
=
{}
# self.trx stores the meaningful values from the processing of
# self.sequence using self.trx_scale_path
self
.
trx
=
{}
#
Fetch
#
Populates some of the attributes
self
.
load_fasta
(
fasta
)
self
.
load_md
(
md_parameters
)
self
.
load_trx
()
def
get
(
self
,
name
,
default
):
"""
Try to return self[name], if it fails, returns
default
Tries to return self[name], if it fails, returns default value "".
Parameters:
- ``self``
- ``name``
- ``default``
Returns:
- a string
May raise:
- AttributeError
"""
try
:
return
getattr
(
self
,
name
)
...
...
@@ -75,9 +105,19 @@ class Sequence(object):
def
__getitem__
(
self
,
name
):
"""
When doing self["a"], will try to fetch self.a.
If it fails, will raise a KeyError, because it's what
dict["a"] is supposed to return if "a" is not a key.
Tries to fetch self.a when self["a"] is called in the code. If it fails
it raises a KeyError (because that's what dict["a"] is supposed to
return if "a" is not a key).
Parameters:
- ``self``
- ``name``
Returns:
- self["a"]
May raise:
- KeyError
"""
try
:
return
getattr
(
self
,
name
)
...
...
@@ -86,13 +126,21 @@ class Sequence(object):
def
load_fasta
(
self
,
fasta
):
"""
Loads a sequence from a fasta file.
Loads a sequence from a fasta file and populates:
- ``self.sequence``
- ``self.name``
- ``self.accession``
- ``self.description``
Parameters:
- ``self``
- ``fasta`` : str, path to the considered fasta file
Populates following attribut
es:
* sequence
* name
* accession number (self.accession)
* description
Us
es:
- :py:meth:`lib.file_tools:load_fasta`
Returns:
- nothing
"""
parsed_sequence
=
file_tools
.
load_fasta
(
self
.
alphabet
,
fasta
)
self
.
sequence
=
str
(
parsed_sequence
.
seq
)
...
...
@@ -103,26 +151,53 @@ class Sequence(object):
def
load_md
(
self
,
md_parameters
):
# XXX
"""
Loads a Yasara MD file from a MD Yasara data file
Loads MD files and populates:
- ``self.mdd``
- ``self.correlation``
Parameters:
- ``self``
- ``md_parameters`` : dictionary {..., "helicoidal param": path, ...}
Uses:
- :py:meth:lib.file_tools:`load_md_data`
- :py:meth:lib.fft_tools:`fft`
- :py:meth:lib.fft_tools:`get_noticeable_data`
- :py:meth:lib.correlation:`compute_correlations`
Returns:
- nothing
May raise:
- NotImplementedError
"""
_md_params
=
{}
# -*- Populating self.mdd -*- #
# Iterates on every helicoidal parameter file
for
(
helicoidal_parameter
,
path
)
in
md_parameters
.
iteritems
():
# Loads the file
_md_params
[
helicoidal_parameter
]
=
file_tools
.
load_md_data
(
path
)
# Stocke les fft dans un objet temporaire.
# _md_ffts est une liste de 3-tuples de la forme
# (frame_number, freqs, transform)
# Stores Fast Fourier Transform results in temp objects
# _md_ffts is a list of 3-tuples: (frame_number, freqs, transform)
_md_ffts
=
[
fft_lib
.
fft
(
frame
)
for
frame
in
_md_params
[
helicoidal_parameter
]]
# Stores Fast Fourier Transform results of the sequence's center
_md_ffts_c
=
[
fft_lib
.
fft
(
frame
,
centering
=
self
.
centering
)
for
frame
in
_md_params
[
helicoidal_parameter
]]
# Stores Fast Fourier Transform results of the "sliding" sequence
_md_ffts_s
=
[
fft_lib
.
sliding_fft
(
frame
,
window
=
self
.
sliding
)
for
frame
in
_md_params
[
helicoidal_parameter
]]
# Toto
# Retrieves meaningful values for each type of results
# Type of results: complete, center and sliding
good_values
=
[
fft_lib
.
get_noticeable_data
(
fft
)
for
fft
in
_md_ffts
]
good_values_c
=
[
fft_lib
.
get_noticeable_data
(
fft
)
for
fft
in
_md_ffts_c
]
good_values_s
=
[
fft_lib
.
get_noticeable_data
(
fft
)
for
fft
in
_md_ffts_s
]
# _md_ffts[i][0] : frame number
# Populates self.mdd with those results
# NB: _md_ffts[i][0] stores the frame number
self
.
mdd
[
helicoidal_parameter
]
=
{
_md_ffts
[
i
][
0
]
:
{
'complete_peak_freq'
:
good_values
[
i
][
0
],
...
...
@@ -141,10 +216,15 @@ class Sequence(object):
raise
NotImplementedError
(
"Plotting is not implemented yet"
)
# fft_lib.plot(_md_ffts, _md_ffts_c, _md_ffts_s, self.graph, helicoidal_parameter)
# For now, we got all md data, except correlation data. This is why we have kept _md_params
# We have a double loop to run amongst possible helicoidal_parms values
# We get lexicographically-ordered lists
# -*- Populating self.correlation -*- #
# In order to compute every frame correlation between 2 parameters,
# _md_params must be kept in python's memory
# To compute every "pair" correlation once and only one time, a double
# loop is required. It runs amongst possible helicoidal_parms values
# that are lexicographically-ordered.
helicoidal_parameters
=
[]
+
_md_params
.
keys
()
helicoidal_parameters
.
sort
()
helicoidal_parameters2
=
[]
+
helicoidal_parameters
...
...
@@ -154,24 +234,44 @@ class Sequence(object):
for
param2
in
helicoidal_parameters2
:
self
.
correlation
[
param
+
"/"
+
param2
]
=
correlation_lib
.
compute_correlations
(
_md_params
[
param
],
_md_params
[
param2
],
param
,
param2
,
alpha
=
self
.
alpha
,
centering
=
self
.
centering
)
# End.
def
load_trx
(
self
):
# XXX
"""
Loads a Yasara MD file from a MD Yasara data file
"""
Populates:
- ``self.trx``
Parameters:
- ``self``
Uses:
- :py:meth:lib.trx:`match`
- :py:meth:lib.trx:`parse_trx_scale`
- :py:meth:lib.fft:`fft`
- :py:meth:lib.fft:`sliding_fft`
- :py:meth:lib.fft:`get_noticeable_data`
Returns:
- nothing
"""
# Translates self.sequence in TRX values
trx_dict
=
trx_lib
.
match
(
self
.
sequence
,
trx_lib
.
parse_trx_scale
(
self
.
trx_scale_path
))
fft
=
fft_lib
.
fft
(
trx_dict
)
fft_c
=
fft_lib
.
fft
(
trx_dict
,
centering
=
self
.
centering
)
fft_s
=
fft_lib
.
sliding_fft
(
trx_dict
,
window
=
self
.
sliding
)
# Stores the Fast Fourier Transform in temp objects
_fft
=
fft_lib
.
fft
(
trx_dict
)
# Stores the Fast Fourier Transform of sequence's center
_fft_c
=
fft_lib
.
fft
(
trx_dict
,
centering
=
self
.
centering
)
# Stores the Fast Fourier Transform of the "sliding" sequence
_fft_s
=
fft_lib
.
sliding_fft
(
trx_dict
,
window
=
self
.
sliding
)
good_values
=
fft_lib
.
get_noticeable_data
(
fft
)
good_values_c
=
fft_lib
.
get_noticeable_data
(
fft_c
)
good_values_s
=
fft_lib
.
get_noticeable_data
(
fft_s
)
# Retrieves meaningful values for each type of results
# Type of results: complete, center and sliding
good_values
=
fft_lib
.
get_noticeable_data
(
_fft
)
good_values_c
=
fft_lib
.
get_noticeable_data
(
_fft_c
)
good_values_s
=
fft_lib
.
get_noticeable_data
(
_fft_s
)
# Populates self.trx with those results
self
.
trx
=
{
'complete_peak_freq'
:
good_values
[
0
],
'complete_peak'
:
good_values
[
1
],
...
...
Write
Preview
Supports
Markdown
0%
Try again
or
attach a new file
.
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment