Commit 08f7fe9d authored by Pauline Pommeret's avatar Pauline Pommeret

Sphinxisation of docstrings

parent 63748a6f
#!/usr/bin/env python2.7 #!/usr/bin/env python2.7
""" """
Definition of the specific errors of Xylok. This file holds the definitions of Xylok's specific errors.
All of them inherit :py:class:`XylokError`. All of them inherit :py:class:`XylokError`.
""" """
......
#!/usr/bin/env python2.7 #!/usr/bin/env python2.7
""" """
This module contains functions required to load Xylok date from files. This file contains the functions required to load Xylok data from files.
Requires: .. note::
- :py:mod:`re` MD file must look like:
- :py:mod:`Bio.SeqIO` frame C16/C17 C17/G18 G18/A19 ...
- :py:mod:`Bio.Alphabet.IUPAC` 0 -1.0 9.0 -1.8
1 -4.4 -7.6 -0.1
2 -6.4 1.0 -6.0
3 -3.6 -1.1 -2.5
.. seealso::
Standard librairies:
* :py:mod:`re`
* :py:mod:`Bio.SeqIO`
* :py:mod:`Bio.Alphabet.IUPAC`
""" """
import re import re
...@@ -14,17 +23,15 @@ from Bio.Alphabet import IUPAC ...@@ -14,17 +23,15 @@ from Bio.Alphabet import IUPAC
def load_fasta(sequence_alphabet, path): def load_fasta(sequence_alphabet, path):
""" """
Loads a fasta file from a file, using its alphabet and returns a record of Loads a fasta sequence from a file, using its alphabet and returns a record
it. with all the information in the file.
Parameters: Parameters:
- ``sequence_alphabet`` : str (dna, rna, prot) - ``sequence_alphabet`` : str (dna, rna, prot)
- ``path`` : path to FASTA file - ``path`` : path to FASTA file
Uses: Uses:
- :py:mod:`Bio.SeqIO` - :py:mod:`Bio.SeqIO`
- :py:mod:`Bio.Alphabet` - :py:mod:`Bio.Alphabet`
Returns: Returns:
- an object of type :py:class:`Bio.SeqRecord.SeqRecord` - an object of type :py:class:`Bio.SeqRecord.SeqRecord`
""" """
...@@ -42,6 +49,7 @@ def load_md_data(path): ...@@ -42,6 +49,7 @@ def load_md_data(path):
Loads a post-processed MD file into a list of dictionaries (each frame has Loads a post-processed MD file into a list of dictionaries (each frame has
its dictionary). its dictionary).
.. note::
MD file must look like: MD file must look like:
frame C16/C17 C17/G18 G18/A19 ... frame C16/C17 C17/G18 G18/A19 ...
0 -1.0 9.0 -1.8 0 -1.0 9.0 -1.8
...@@ -51,7 +59,6 @@ def load_md_data(path): ...@@ -51,7 +59,6 @@ def load_md_data(path):
Parameters: Parameters:
- ``path`` : str, path to the file - ``path`` : str, path to the file
Returns: Returns:
- a list of dictionaries (each dictionary is a MD frame) - a list of dictionaries (each dictionary is a MD frame)
[ ... {"frame": frame#, ..., position_i: value_i, ...} ...] [ ... {"frame": frame#, ..., position_i: value_i, ...} ...]
......
#!/usr/bin/env python2.7 #!/usr/bin/env python2.7
"""
This file handles the learning part of Xylok. For now, only supervised
learning using the decision tree algorithm.
The decision tree computed in an average decision tree: relevant values of Fast
Fourier Transform are averaged to appear as a single *mean frame* in order not
to unbalance the algorithm.
.. note::
It may deal with incomplete data such as no *Rise* or no *Tilt*
information, thanks to preprocessing. (However, the decision might not
be relevant, but this is a user issue.)
.. seealso::
Standard librairies:
* :py:mod:`sklearn`
* :py:mod:`sklearn.preprocessing`
* :py:mod:`sklearn.tree`
Xylok:
* :py:mod:`lib.database`
"""
import sklearn import sklearn
import sklearn.preprocessing import sklearn.preprocessing
...@@ -6,19 +28,35 @@ import sklearn.tree ...@@ -6,19 +28,35 @@ import sklearn.tree
import lib.database import lib.database
LCURSOR = lib.database.LearningPGCursor() # Cursor on PostgreSQL database that allows reading/writing data in it
LCURSOR = lib.database.XylokPGCursor()
def generate_learner(): def generate_learner():
""" """
Returns an averaged DecisionTree Returns an averaged DecisionTree
Parameters:
- None
Uses:
- :py:mod:`sklearn`
- :py:mod:`sklearn.preprocessing
- :py:mod:`sklearn.tree`
Returns:
- decision tree and a list of all possible labels (str)
""" """
# Creates an imputer that decides what the missing values are in data
imp = sklearn.preprocessing.Imputer(missing_values=0.0, strategy="mean", verbose=0, copy=False, axis=1) imp = sklearn.preprocessing.Imputer(missing_values=0.0, strategy="mean", verbose=0, copy=False, axis=1)
# Retrieves data, list of numbers (translation of labels) and a list of
# all the possible lables (str) from the PostgreSQL
datas, answers, possible_answers = LCURSOR.fetch_averaged_sequence_data() datas, answers, possible_answers = LCURSOR.fetch_averaged_sequence_data()
# The imputer does it's job
imp.fit(datas) imp.fit(datas)
datas = imp.transform(datas) datas = imp.transform(datas)
# Creates the decision tree
dtree = sklearn.tree.DecisionTreeClassifier() dtree = sklearn.tree.DecisionTreeClassifier()
dtree.fit(datas, answers) dtree.fit(datas, answers)
......
#!/usr/bin/env python2.7 #!/usr/bin/env python2.7
""" """
This program populates a pre-existing PostgreSQL database with the results.
.. warning::
This programe requires a PostgreSQL database in order to work properly!
Please run :py:data:`sql.create_db.sh` with your PAM username as an
argument or create a PostgreSQL using the bdd schema stored in
:py:data:`sdl.create_db.sql`.
This program populates a pre-existing PostgreSQL database with the results.
A shell
Authors: Pauline Pommeret, Jonas Senizergues Authors: Pauline Pommeret, Jonas Senizergues
...@@ -27,7 +31,7 @@ import lib.trx as trx_lib ...@@ -27,7 +31,7 @@ import lib.trx as trx_lib
import lib.database as database import lib.database as database
if __name__ == "__main__": if __name__ == "__main__":
DBCURSOR = database.PGCursor() DBCURSOR = database.XylokPGCursor()
# -*- Parsing command-line arguments using argparse -*- # # -*- Parsing command-line arguments using argparse -*- #
......
...@@ -104,7 +104,17 @@ class Sequence(object): ...@@ -104,7 +104,17 @@ class Sequence(object):
def do_analysis(self): def do_analysis(self):
""" """
Compute complex data from parameters. Runs function and triggers the actual job.
*(Those functions are called in a specific function so that they are
called only if the data isn't already in the database)*
Parameters:
- ``self``
Uses:
- :py:meth:`load_md`
- :py:meth:`load_trx`
Returns:
- nothing
""" """
self.load_md() self.load_md()
self.load_trx() self.load_trx()
...@@ -117,10 +127,8 @@ class Sequence(object): ...@@ -117,10 +127,8 @@ class Sequence(object):
- ``self`` - ``self``
- ``name`` - ``name``
- ``default`` - ``default``
Returns: Returns:
- a string - a string
May raise: May raise:
- AttributeError - AttributeError
""" """
...@@ -138,10 +146,8 @@ class Sequence(object): ...@@ -138,10 +146,8 @@ class Sequence(object):
Parameters: Parameters:
- ``self`` - ``self``
- ``name`` - ``name``
Returns: Returns:
- self["a"] - self["a"]
May raise: May raise:
- KeyError - KeyError
""" """
...@@ -161,10 +167,8 @@ class Sequence(object): ...@@ -161,10 +167,8 @@ class Sequence(object):
Parameters: Parameters:
- ``self`` - ``self``
- ``fasta`` : str, path to the considered fasta file - ``fasta`` : str, path to the considered fasta file
Uses: Uses:
- :py:meth:`lib.file_tools:load_fasta` - :py:meth:`lib.file_tools:load_fasta`
Returns: Returns:
- nothing - nothing
""" """
...@@ -183,16 +187,13 @@ class Sequence(object): ...@@ -183,16 +187,13 @@ class Sequence(object):
Parameters: Parameters:
- ``self`` - ``self``
Uses: Uses:
- :py:meth:`lib.file_tools:load_md_data` - :py:meth:`lib.file_tools:load_md_data`
- :py:meth:`lib.fft_tools:fft` - :py:meth:`lib.fft_tools:fft`
- :py:meth:`lib.fft_tools:get_noticeable_data` - :py:meth:`lib.fft_tools:get_noticeable_data`
- :py:meth:`lib.correlation:compute_correlations` - :py:meth:`lib.correlation:compute_correlations`
Returns: Returns:
- nothing - nothing
May raise: May raise:
- NotImplementedError - NotImplementedError
""" """
...@@ -267,14 +268,12 @@ class Sequence(object): ...@@ -267,14 +268,12 @@ class Sequence(object):
Parameters: Parameters:
- ``self`` - ``self``
Uses: Uses:
- :py:meth:`lib.trx:match` - :py:meth:`lib.trx:match`
- :py:meth:`lib.trx:parse_trx_scale` - :py:meth:`lib.trx:parse_trx_scale`
- :py:meth:`lib.fft_tools:fft` - :py:meth:`lib.fft_tools:fft`
- :py:meth:`lib.fft_tools:sliding_fft` - :py:meth:`lib.fft_tools:sliding_fft`
- :py:meth:`lib.fft_tools:get_noticeable_data` - :py:meth:`lib.fft_tools:get_noticeable_data`
Returns: Returns:
- nothing - nothing
""" """
......
#!/usr/bin/env python2.7 #!/usr/bin/env python2.7
# XXX Bleh # XXX Bleh
"""
Generates the decision tree associated if the
si deja bdd : propose de la tej et remplacer
sinon: generer DT
Generates the decision tree associated if the
test sequence
.. warning::
Requires a populated PostgreSQL database in order to run properly.
.. seealso:: modules :py:mod:`lib.learning` :py:mod:`lib.database`
:py:mod:`lib.trx` :py:mod:`sequence`
"""
import argparse import argparse
from argparse import RawDescriptionHelpFormatter from argparse import RawDescriptionHelpFormatter
...@@ -13,9 +31,7 @@ import sequence ...@@ -13,9 +31,7 @@ import sequence
if __name__ == "__main__": if __name__ == "__main__":
DT, PA = lib.learning.generate_learner() DBCURSOR = lib.database.XylokPGCursor()
LC = lib.database.LearningPGCursor()
DBCURSOR = lib.database.PGCursor()
# -*- Parsing command-line arguments using argparse -*- # # -*- Parsing command-line arguments using argparse -*- #
...@@ -91,11 +107,8 @@ A .dat file must me organized this way: ...@@ -91,11 +107,8 @@ A .dat file must me organized this way:
seq = sequence.Sequence(fasta_file, md_parameters, alphabet=ARGS.alphabet, trx_scale_path=ARGS.trx_scale_file, sliding=ARGS.sliding, centering=ARGS.centering, alpha=ARGS.alpha, graph=ARGS.graph) seq = sequence.Sequence(fasta_file, md_parameters, alphabet=ARGS.alphabet, trx_scale_path=ARGS.trx_scale_file, sliding=ARGS.sliding, centering=ARGS.centering, alpha=ARGS.alpha, graph=ARGS.graph)
seq_id = DBCURSOR.check_sequence(seq) seq_id = DBCURSOR.check_sequence(seq)
if seq_id: if seq_id:
if raw_input("This sequence is already in database. Would you like to recompute its data? [y/n]") != "y": print "This sequence is already in database (id=%s)." % (seq_id,)
data = LC.fetch_averaged_specific_sequence(seq_id)
print PA[DT.predict(data)[0]]
sys.exit(0) sys.exit(0)
DBCURSOR.remove_seq(seq_id)
# We have to build the DT only if we are certain that the sequence is # We have to build the DT only if we are certain that the sequence is
# not in the db, as it could alter the result of the DT. # not in the db, as it could alter the result of the DT.
...@@ -111,5 +124,5 @@ A .dat file must me organized this way: ...@@ -111,5 +124,5 @@ A .dat file must me organized this way:
# Done, moving on to next directory # Done, moving on to next directory
print "Data stored in %r added to database under id %s" % (cur_dir, seq_id) print "Data stored in %r added to database under id %s" % (cur_dir, seq_id)
data = LC.fetch_averaged_specific_sequence(seq_id) data = DBCURSOR.fetch_averaged_specific_sequence(seq_id)
print PA[DT.predict(data)[0]] print PA[DT.predict(data)[0]]
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment