Commit 414f61a6 authored by Pauline Pommeret's avatar Pauline Pommeret

Docstrings + comments

parent 51beec6e
#!/usr/bin/env python2.7
# XXX
"""
Docstring
This module defines the functions needed to process a sequence into a
dictionary for further use.
"""
import os
import re
from lib.XylokExceptions import NoMatch
from lib.XylokExceptions import ShiftOutOfRange
# This dictionary is there to translate TRX scale
NUCLEOTIDE_MAP = {"Y" : "[CT]", "R" : "[AG]"}
# Default TRX scale file
SCALE_FILE = os.path.join(os.path.dirname(__file__), '../data/trx_scale')
def translate_trx_scale(pattern, n_map):
......@@ -21,20 +22,33 @@ def translate_trx_scale(pattern, n_map):
It uses a nucleotide map.
Example: "YCGR" becomes "[CT]CG[AG]"
Parameters:
- ``pattern`` : string (e.g. "YCGR")
- ``n_map`` : dictionary
Returns:
- a string
"""
# NB: D.get(k[,d]) -> D[k] if k in D, else d. d defaults to None.
return "".join([n_map.get(i, i) for i in pattern])
def parse_trx_scale(path_to_scale):
"""
Parses a TRX scale file and creates a dictionary with the scale. It uses
:py:module:trx_tools:translate_trx_scale in order to use unambiguous DNA
alphabet later.
Parses a TRX scale file and creates a dictionary with the scale.
Returns a TRX dictionary that looks like this:
trx = {'AA' : 5.0, ..., '[CT]CG[AG]': 59.0, ...}
"""
NB: TRX scale file must look like :py:data:`data.trx_scale`
Parameters:
- ``path_to_scale`` : string, path to the TRX scale to be used
Uses:
- :py:meth:`translate_trx_scale`
Returns:
- a TRX dictionary that looks like {..., '[CT]CG[AG]': 59.0, ...}
"""
# Initializes a new empty dictionary scale
trx = {}
with open(path_to_scale, "r") as handle:
......@@ -42,7 +56,7 @@ def parse_trx_scale(path_to_scale):
# Processing every data line
while line:
# Not taking into account empty lines and lines starting with
# # (comments)
# "#" (comments)
if line.strip() != "" and (line.startswith("#") == False):
# line.split()[0]: the nucleotide string (2 or 4 letters) that
# needs translation, using
......@@ -58,26 +72,41 @@ def parse_trx_scale(path_to_scale):
def pair_score(string, trx):
"""
Tries to match ``string`` (a 2 or 4 letter long string) with the patterns
in ``trx`` (TRX scale dictionary).
Tries to match ``string`` to the pattern stored in the TRX scale
dictionary and returns the value associated to ``string`` in the scale.
Returns the value associated to ``string`` or raises a NoMatch Exception
Parameters:
- ``string`` : 2 or 4 letters long string
- ``trx`` : TRX scale dictionary
Uses:
- :py:mod:`re`
Returns:
- a float
May raise:
- NoMatch
"""
# Initializes a new empty list for all the pattern that match the query
# pattern
potential = []
if len(string) == 4:
# Iteration on the keys of the TRX dictionary
# Iteration on all keys of the TRX dictionary
for pattern in trx.keys():
# Test whether the 2 central nucleotides are in the pattern
if re.search(string[1:3], pattern):
# Only one pattern 2 letters long is possible and it must be kept
# in case the tetranucleotides don't match the whole string
# Only one pattern 2 letters long is possible and it must be
# kept in case the tetranucleotides don't match the whole
# string
if len(pattern) == 2:
potential.append(pattern)
# Only one 4 letters long pattern is possible.
elif re.search(string[0], pattern[1:3]) and re.search(string[-1], pattern[7:9]):
potential.append(pattern)
elif len(string) == 2:
# Iteration on the keys of the TRX dictionary which have a length equal 2
# Iteration on the keys of the TRX dictionary which have a length == 2
for pattern in [i for i in trx.keys() if len(i) == 2]:
if re.search(string, pattern):
potential.append(pattern)
......@@ -94,11 +123,21 @@ def pair_score(string, trx):
def match(sequence, trx, center=None):
"""
Translates a raw ``sequence`` into a list of values of ``trx``.
Translates a raw ``sequence`` into a dictionary which structure is:
{..., position: trx_value, ...}
NB: position i studies phosphate between nucleotide i and nucleotide i+1.
Position i studies phosphate between nlt i and i+1
Parameters:
- ``sequence`` : SeqRecord
- ``trx`` : TRX scale dictionary
- ``center`` : int that gives the number of bp that are considered as
the sequence center (default: None)
Uses:
- :py:meth:`pair_score`
72 pb center
Returns:
- a dictionary
"""
sequence_trx = {}
# Use of str(sequence) instead of sequence.tostring() because the
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment