Commit 50d8f82a authored by Pauline Pommeret's avatar Pauline Pommeret
Browse files

Let's EXTERMINATE bugs

parent 76bfe08f
......@@ -5,24 +5,51 @@ Un fichier pour faire des tests
"""
import os
import sys
import argparse
import sequence
from lib.XylokExceptions import NoFastaProvided
import lib.trx as trx_lib
import lib.database as database
if __name__ == "__main__":
dbcursor = database.PGCursor()
# Use argparse to parse arguments. We first create a parser.
parser = argparse.ArgumentParser(description="ToDo")
# Adding arguments to the parser
parser.add_argument("-a", "--alphabet", type=str, help="Alphabet (DNA, RNA, prot): Unambiguous IUPAC alphabet that will be used.", action="store")
parser.add_argument("-A", "--alpha", type=float, help="Alpha parameter for statistic analysis", action="store")
parser.add_argument("-c", "--centering", type=int, help="Centering parameter", action="store")
parser.add_argument("-g", "--graph", type=int, help="Plot graphs................. There, it's done.", action="store")
parser.add_argument("-s", "--sliding", type=int, help="Sl______iding parameter", action="store")
parser.add_argument("-t", "--trx-scale-file", type=str, help="Path to trx scale file", action="store")
parser.add_argument("-a", "--alphabet", type=str, default="dna", help="Alphabet (DNA, RNA, prot): Unambiguous IUPAC alphabet that will be used.", action="store")
parser.add_argument("-A", "--alpha", type=float, default=0.05, help="Alpha parameter for statistic analysis", action="store")
parser.add_argument("-c", "--centering", type=int, default=72, help="Centering parameter", action="store")
parser.add_argument("-g", "--graph", type=int, default=None, help="Plot graphs................. There, it's done.", action="store")
parser.add_argument("-s", "--sliding", type=int, default=72, help="Sl______iding parameter", action="store")
parser.add_argument("-t", "--trx-scale-file", type=str, default=trx_lib.scale_file, help="Path to trx scale file", action="store")
parser.add_argument("datadir", type=str, help="Path to the data directory.", action="store")
# Then, parse it
args = parser.parse_args()
if not args.datadir:
raise EnvironmentError("You have to give a data directory.")
for directory in os.listdir(args.datadir):
cur_dir = os.path.join(args.datadir, directory)
md_parameters = {}
label = ""
fasta_file = None
for filepath in os.listdir(cur_dir):
if filepath == "sequence.fasta":
fasta_file = os.path.join(cur_dir, filepath)
if filepath == "label":
label = open(os.path.join(cur_dir, filepath)).readlines()[0].strip("\n")
if ".dat" in filepath:
print filepath.replace(".dat", "")
md_parameters[filepath.replace(".dat", '')] = os.path.join(cur_dir, filepath)
if fasta_file is None:
raise NoFastaProvided("There is no fasta file in %r" % (cur_dir,))
print label
print fasta_file
print md_parameters
seq = sequence.Sequence(fasta_file, md_parameters, label, alphabet=args.alphabet, trx_scale_path=args.trx_scale_file, sliding=args.sliding, centering=args.centering, alpha=args.alpha, graph=args.graph)
dbcursor.store_sequence(seq)
......@@ -23,12 +23,24 @@ class PGCursor(object):
main_seq_query = """INSERT INTO
sequences
(an, name, description, sequence, group, alphabet, alpha, sliding, centering, trx_scale_path)
(an, name, description, sequence, label, alphabet, alpha, sliding, centering, trx_scale_path)
VALUES
(%(an)s, %(name)s, %(description)s, %(sequence)s, %(group)s, %(alphabet)s, %(alpha)s, %(sliding)s, %(centering)s, %(trx_scale_path)s)
(%(an)s, %(name)s, %(description)s, %(sequence)s, %(label)s, %(alphabet)s, %(alpha)s, %(sliding)s, %(centering)s, %(trx_scale_path)s)
RETURNING
id;"""
self._cur.execute(main_seq_query, seq.seq_dict)
dic_to_sql = {
'an': seq.an,
'name': seq.name,
'description': seq.description,
'sequence': seq.sequence,
'label': seq.label,
'alphabet': seq.alphabet,
'alpha': seq.alpha,
'sliding': seq.sliding,
'centering': seq.centering,
'trx_scale_path': seq.trx_scale_path,
}
self._cur.execute(main_seq_query, seq)
seq_id = self._cur.fetchone()[0]
for helicoidal_parameter in seq.md:
......@@ -47,11 +59,11 @@ class PGCursor(object):
(%(seq_id)s, %(complete_peak_freq)s, %(complete_peak)s, %(complete_size)s, %(center_peak_freq)s, %(center_peak)s, %(center_size)s, %(sliding_peak_freq)s, %(sliding_peak)s, %(sliding_size)s);"""
dic_to_sql = dict(seq.trx)
dic_to_sql.update({'seq_id': seq_id})
self._cur.execute(trx_query, seq.trx)
self._cur.execute(trx_query, dic_to_sql)
for (correl_types, bunch_of_data) in seq.correlation:
for (correl_types, bunch_of_data) in seq.correlation.iteritems():
type_a, type_b = correl_types.split("/")
for (frame_num, data) in bunch_of_data:
for (frame_num, data) in bunch_of_data.iteritems():
corr_query = """INSERT INTO
correlations
(seq_id, frame_num, type_a, type_b, spearman_complete, spearman_center, pearson_complete, pearson_center)
......
......@@ -15,7 +15,7 @@ def split_frame(frame):
x = []
y = []
for (key, value) in frame:
for (key, value) in frame.iteritems():
# Isinstance is designed to check if key is of type int
# So that we get only (key, value) that are (position, value) and no
# ("frame", n)
......@@ -89,7 +89,8 @@ def get_noticeable_data(fft_data):
# We do a reverse copy of transform because we want
# to fetch the greater peak_index.
reverse_transform = list(transform)
# XXX - 2: is there to avoid noise
reverse_transform = list(transform)[2:]
reverse_transform.reverse()
# We then compute the peak_index and the peak
......@@ -141,7 +142,7 @@ def sliding_fft(frame, window=72):
frame_number, (_, y) = split_frame(frame)
if len(y) <= window:
raise ShiftOutOfRange("Choosen shift is too long for query sequence.")
raise ShiftOutOfRange("Choosen shift (%r) is too long for query sequence (which len is %r)." % (window, len(y)))
for i in xrange(0, len(y) - window):
modified_x.append(i)
......
......@@ -5,6 +5,7 @@ Docstring
"""
import os
import re
# This dictionary is there to translate TRX scale
nucleotide_map = {"Y" : "[CT]", "R" : "[AG]"}
......
......@@ -16,7 +16,7 @@ class Sequence(object):
Sequence class
"""
def __init__(self, fasta, md_parameters, group="", alphabet="dna", trx_scale_path=trx_lib.scale_file, sliding=72, centering=72, alpha=0.05, graph=None):
def __init__(self, fasta, md_parameters, label="", alphabet="dna", trx_scale_path=trx_lib.scale_file, sliding=72, centering=72, alpha=0.05, graph=None):
# XXX
"""
......@@ -31,7 +31,7 @@ class Sequence(object):
self.sliding = sliding
self.alpha = alpha
self.centering = centering
self.group = group
self.label = label
self.graph = graph
self.trx_scale_path = trx_scale_path
......@@ -81,10 +81,10 @@ class Sequence(object):
* description
"""
parsed_sequence = file_tools.load_fasta(self.alphabet, fasta)
self.sequence = parsed_sequence.seq
self.name = parsed_sequence.name
self.an = parsed_sequence.id.split("|")[3]
self.description = parsed_sequence.description
self.sequence = str(parsed_sequence.seq)
self.name = str(parsed_sequence.name)
self.an = str(parsed_sequence.id.split("|")[3])
self.description = str(parsed_sequence.description)
def load_md(self, md_parameters):
# XXX
......@@ -135,7 +135,7 @@ class Sequence(object):
helicoidal_parameters2 = [] + helicoidal_parameters
for param in helicoidal_parameters:
helicoidal_parameters2.pop(param)
helicoidal_parameters2.remove(param)
for param2 in helicoidal_parameters2:
self.correlation[param + "/" + param2] = correlation_lib.compute_correlations(_md_params[param], _md_params[param2], param, param2, alpha=self.alpha, centering=self.centering)
......@@ -147,7 +147,7 @@ class Sequence(object):
Loads a Yasara MD file from a MD Yasara data file
"""
trx_dict = trx_lib.match(self.sequence, trx_lib.parse_trx_scale(self.trx_scale_path), self.centering)
trx_dict = trx_lib.match(self.sequence, trx_lib.parse_trx_scale(self.trx_scale_path))
fft = fft_lib.fft(trx_dict)
fft_c = fft_lib.fft(trx_dict, centering=self.centering)
......
CREATE TABLE "sequences" (
id SERIAL,
"an" CHARACTER VARYING(20),
"name" CHARACTER VARYING(20) NOT NULL,
"name" CHARACTER VARYING(300) NOT NULL,
"description" TEXT NOT NULL,
"sequence" TEXT NOT NULL,
"group" CHARACTER VARYING(50),
"label" CHARACTER VARYING(50),
"alphabet" CHARACTER VARYING(5) NOT NULL,
"alpha" REAL NOT NULL,
"sliding" SMALLINT NOT NULL,
......@@ -15,7 +15,7 @@ CREATE TABLE "sequences" (
CREATE TABLE "md_rise" (
"seq_id" INTEGER NOT NULL REFERENCES "sequences",
"frame_num" SMALLINT NOT NULL,
"frame_num" INTEGER NOT NULL,
"complete_peak_freq" REAL NOT NULL,
"complete_peak" REAL NOT NULL,
"complete_size" REAL NOT NULL,
......@@ -29,7 +29,7 @@ CREATE TABLE "md_rise" (
CREATE TABLE "md_roll" (
"seq_id" INTEGER NOT NULL REFERENCES "sequences",
"frame_num" SMALLINT NOT NULL,
"frame_num" INTEGER NOT NULL,
"complete_peak_freq" REAL NOT NULL,
"complete_peak" REAL NOT NULL,
"complete_size" REAL NOT NULL,
......@@ -43,7 +43,7 @@ CREATE TABLE "md_roll" (
CREATE TABLE "md_shift" (
"seq_id" INTEGER NOT NULL REFERENCES "sequences",
"frame_num" SMALLINT NOT NULL,
"frame_num" INTEGER NOT NULL,
"complete_peak_freq" REAL NOT NULL,
"complete_peak" REAL NOT NULL,
"complete_size" REAL NOT NULL,
......@@ -57,7 +57,7 @@ CREATE TABLE "md_shift" (
CREATE TABLE "md_slide" (
"seq_id" INTEGER NOT NULL REFERENCES "sequences",
"frame_num" SMALLINT NOT NULL,
"frame_num" INTEGER NOT NULL,
"complete_peak_freq" REAL NOT NULL,
"complete_peak" REAL NOT NULL,
"complete_size" REAL NOT NULL,
......@@ -71,7 +71,7 @@ CREATE TABLE "md_slide" (
CREATE TABLE "md_tilt" (
"seq_id" INTEGER NOT NULL REFERENCES "sequences",
"frame_num" SMALLINT NOT NULL,
"frame_num" INTEGER NOT NULL,
"complete_peak_freq" REAL NOT NULL,
"complete_peak" REAL NOT NULL,
"complete_size" REAL NOT NULL,
......@@ -85,7 +85,7 @@ CREATE TABLE "md_tilt" (
CREATE TABLE "md_twist" (
"seq_id" INTEGER NOT NULL REFERENCES "sequences",
"frame_num" SMALLINT NOT NULL,
"frame_num" INTEGER NOT NULL,
"complete_peak_freq" REAL NOT NULL,
"complete_peak" REAL NOT NULL,
"complete_size" REAL NOT NULL,
......@@ -112,7 +112,7 @@ CREATE TABLE "trx" (
CREATE TABLE "correlations" (
"seq_id" INTEGER NOT NULL REFERENCES "sequences",
"frame_num" SMALLINT NOT NULL,
"frame_num" INTEGER NOT NULL,
"type_a" CHARACTER VARYING(6) NOT NULL,
"type_b" CHARACTER VARYING(6) NOT NULL,
"spearman_complete" BOOLEAN NOT NULL,
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment