Commit 13e29f53 authored by Pauline Pommeret's avatar Pauline Pommeret
Browse files

Comments, docstring, making pylint happy

parent b370ea69
......@@ -6,11 +6,15 @@ Docstring
import os
import re
import numpy
from lib.XylokExceptions import NoMatch
from lib.XylokExceptions import ShiftOutOfRange
# This dictionary is there to translate TRX scale
nucleotide_map = {"Y" : "[CT]", "R" : "[AG]"}
NUCLEOTIDE_MAP = {"Y" : "[CT]", "R" : "[AG]"}
scale_file = os.path.join(os.path.dirname(__file__), '../data/trx_scale')
SCALE_FILE = os.path.join(os.path.dirname(__file__), '../data/trx_scale')
def translate_trx_scale(pattern, n_map):
"""
......@@ -29,10 +33,10 @@ def parse_trx_scale(path_to_scale):
alphabet later.
Returns a TRX dictionary that looks like this:
TRX = {'AA' : 5.0, ..., '[CT]CG[AG]': 59.0, ...}
trx = {'AA' : 5.0, ..., '[CT]CG[AG]': 59.0, ...}
"""
TRX = {}
trx = {}
with open(path_to_scale, "r") as handle:
line = handle.readline()
......@@ -46,24 +50,24 @@ def parse_trx_scale(path_to_scale):
# :py:module:lib/trx_tools:translate_trx_scale
# line.split()[1]: the associated value that needs to be
# cast into float
TRX[translate_trx_scale(line.split()[0], nucleotide_map)] = float(line.split()[1])
trx[translate_trx_scale(line.split()[0], NUCLEOTIDE_MAP)] = float(line.split()[1])
line = handle.readline()
# At this point, we have a TRX dictionary that looks like this:
# TRX = {'AA' : 5.0, ..., '[CT]CG[AG]': 59.0, ...}
return TRX
# trx = {'AA' : 5.0, ..., '[CT]CG[AG]': 59.0, ...}
return trx
def pair_score(string, TRX):
def pair_score(string, trx):
"""
Tries to match ``string`` (a 2 or 4 letter long string) with the patterns
in ``TRX`` (TRX scale dictionary).
in ``trx`` (TRX scale dictionary).
Returns the value associated to ``string`` or raises a NoMatch Exception
"""
potential = []
if len(string) == 4:
# Iteration on the keys of the TRX dictionary
for pattern in TRX.keys():
for pattern in trx.keys():
# Test whether the 2 central nucleotides are in the pattern
if re.search(string[1:3], pattern):
# Only one pattern 2 letters long is possible and it must be kept
......@@ -75,7 +79,7 @@ def pair_score(string, TRX):
potential.append(pattern)
elif len(string) == 2:
# Iteration on the keys of the TRX dictionary which have a length equal 2
for pattern in [i for i in TRX.keys() if len(i)==2]:
for pattern in [i for i in trx.keys() if len(i) == 2]:
if re.search(string, pattern):
potential.append(pattern)
......@@ -86,18 +90,18 @@ def pair_score(string, TRX):
else:
# There might be up to 2 patterns in `potential`: a 2 letter long and a
# 4 letter long. The selected pattern is the longest one (specificity)
return TRX[max(potential)]
return trx[max(potential)]
def match(sequence, TRX, center=None):
def match(sequence, trx, center=None):
"""
Translates a raw ``sequence`` into a list of values of ``TRX``.
Translates a raw ``sequence`` into a list of values of ``trx``.
Position i studies phosphate between nlt i and i+1
72 pb center
"""
sequence_TRX = {}
sequence_trx = {}
# Use of str(sequence) instead of sequence.tostring() because the
# documentation says so nowaday (25/11/2014)
sequence = str(sequence)
......@@ -111,17 +115,17 @@ def match(sequence, TRX, center=None):
for position in xrange(offset, offset+length):
if position == 0:
sequence_TRX[position] = pair_score(sequence[position:position+2], TRX)
sequence_trx[position] = pair_score(sequence[position:position+2], trx)
elif position == len(sequence)-2:
sequence_TRX[position] = pair_score(sequence[position:position+2], TRX)
sequence_trx[position] = pair_score(sequence[position:position+2], trx)
else:
sequence_TRX[position] = pair_score(sequence[position-1:position+3], TRX)
return sequence_TRX
sequence_trx[position] = pair_score(sequence[position-1:position+3], trx)
return sequence_trx
def sliding_pb(sequence_TRX, shift=72):
def sliding_pb(sequence_trx, shift=72):
"""
Uses ``sequence_TRX`` (dictionary where keys are position in sequence and
Uses ``sequence_trx`` (dictionary where keys are position in sequence and
values are TRX values associated with the nucleotides) and computes the
mean value on ``shift`` (int) sliding bp.
......@@ -129,12 +133,12 @@ def sliding_pb(sequence_TRX, shift=72):
Returns a dictionary.
"""
# Test if sequence_TRX and shift have compatible length
if len(sequence_TRX) > shift:
sliding_TRX = {}
for position in xrange(0, len(sequence)-shift):
sliding_TRX[position] = numpy.round(numpy.mean([sequence_TRX[i] for i in xrange(position, position+shift)]), decimals=2)
return sliding_TRX
# Test if sequence_trx and shift have compatible length
if len(sequence_trx) > shift:
sliding_trx = {}
for position in xrange(0, len(sequence_trx)-shift):
sliding_trx[position] = numpy.round(numpy.mean([sequence_trx[i] for i in xrange(position, position+shift)]), decimals=2)
return sliding_trx
else:
raise ShiftOutOfRange("Choosen shift is too long for query sequence.")
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment