Commit 1d53ef25 authored by Pauline Pommeret's avatar Pauline Pommeret

Truckloads of comments + printing info to user

parent e5c28bcb
#!/usr/bin/env python2.7
# -*- coding: utf-8 -*-
Un fichier pour faire des tests
This program populates a pre-existing database with the results.
import os
......@@ -10,13 +9,16 @@ from argparse import RawDescriptionHelpFormatter
import sequence
from lib.XylokExceptions import NoFastaProvided
from lib.XylokExceptions import NoLabelProvided
import lib.trx as trx_lib
import lib.database as database
if __name__ == "__main__":
DBCURSOR = database.PGCursor()
# Use argparse to parse arguments. We first create a parser.
# -*- Parsing command-line arguments using argparse -*- #
# TEXT is a string that holds the description of the program
TEXT = """This program creates a postgresql database that holds the following data:
- sequence information (name, accession number, sequence alphabet, description, etc)
- analysis parameters (Student table's alpha, number of bp in center, smooth window, etc)
......@@ -26,16 +28,16 @@ if __name__ == "__main__":
The data directory must be organized this way:
\\-group (mandatory)
\\-sequence.fasta (mandatory)
\\-group (mandatory)
\\-sequence.fasta (mandatory)
A .dat file must me organized this way:
frame C16/C17 C17/G18 G18/A19 ...
......@@ -47,10 +49,10 @@ A .dat file must me organized this way:
# Creates the parser
PARSER = argparse.ArgumentParser(description=TEXT, formatter_class=RawDescriptionHelpFormatter)
# Adding arguments to the parser
# Fills the parser with the program arguments
PARSER.add_argument("-a", "--alphabet", type=str, default="dna", help="[str] sequences alphabet (dna, rna, prot), currently only dna is implemented (default: 'dna')", action="store")
PARSER.add_argument("-A", "--alpha", type=float, default=0.05, help="[float] alpha parameter of the Student table that is to be used in the statistical analysis (default: 0.05)", action="store")
PARSER.add_argument("-c", "--centering", type=int, default=72, help="[int] number of bp that are to be considered as the center of the sequence (default: 72)", action="store")
......@@ -59,28 +61,52 @@ A .dat file must me organized this way:
PARSER.add_argument("-g", "--graph", type=int, default=None, help="[int] number of graphs that are to be plotted, currently not implemented (default: None)", action="store")
PARSER.add_argument("datadir", type=str, help="[str] path to the data directory", action="store")
# Then, parse it
# Parses the arguments
ARGS = PARSER.parse_args()
# Obviously, datadir is mandatory, if none is given, nothing can be done
if not ARGS.datadir:
raise EnvironmentError("You have to give a data directory.")
# Iterates on every directory in datadir (each directory holds the data for
# sequence)
for directory in os.listdir(ARGS.datadir):
# Creates the path to the current directory (which is handy)
cur_dir = os.path.join(ARGS.datadir, directory)
# Initializes empty parameters
md_parameters = {}
label = ""
fasta_file = None
# Iterates on every file in current directory
for filepath in os.listdir(cur_dir):
if filepath == "sequence.fasta":
fasta_file = os.path.join(cur_dir, filepath)
if filepath == "label":
label = open(os.path.join(cur_dir, filepath)).readlines()[0].strip("\n")
if ".dat" in filepath:
print filepath.replace(".dat", "")
md_parameters[filepath.replace(".dat", '')] = os.path.join(cur_dir, filepath)
# Fasta file is mandatory (a sequence is required)
if fasta_file is None:
raise NoFastaProvided("There is no fasta file in %r" % (cur_dir,))
print label
print fasta_file
print md_parameters
# Label file is mandatory (label is mandatory for supervised learning)
if label is None:
raise NoLabelProvided("There is no label file in %r" = (cur_dir,))
# Tells the users that something is happening (some users do like that)
print "Processing %r" % (cur_dir,)
# Creates a Sequence object
seq = sequence.Sequence(fasta_file, md_parameters, label, alphabet=ARGS.alphabet, trx_scale_path=ARGS.trx_scale_file, sliding=ARGS.sliding, centering=ARGS.centering, alpha=ARGS.alpha, graph=ARGS.graph)
# Tells the users that something is happening (some users do like that)
print "Adding data stored in %r in the database" % (cur_dir,)
# Stores the sequence "seq" in the database
# Done, moving on to next directory
print "Data stored in %r added to database" % (cur_dir,)
# Exiting for loop, end
print "Database successfully populated. Congratulations. You may proceed"
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment