Commit 92fd9acc authored by Pauline Pommeret's avatar Pauline Pommeret
Browse files

[correlation] Correlation handling in sequence.

parent bffc42e3
#! /usr/bin/env python
# XXX French
"""
"""
import math
import json
import pandas
import lib.fft
STUDENT_FILE = "student.json"
def compute_correlations(_md_param1, _md_param2, helicoidal_parameter_1, helicoidal_parameter_2, alpha=0.05, centering=72):
"""
"""
results = {}
for (frame1, frame2) in zip(_md_param1, _md_param2):
dataframe = build_dataframe(frame1, frame2, helicoidal_parameter_1, helicoidal_parameter_2)
results[frame1['frame']] = stats_test(dataframe, helicoidal_parameter_1, helicoidal_parameter_2, alpha, centering)
return results
def fetch_student(alpha, ddl):
"""Fetch data in student database
"""
with open(STUDENT_FILE, 'r') as student_file:
student_dict = json.load(student_file)
if alpha not in student_dict:
raise KeyError("alpha = %r is not in the Student table" % (alpha,))
if ddl not in student_dict[alpha]:
# Using usual rounding rules
ddl = int(round(float(ddl)/10))
if ddl > 140:
# Then ddl = \infty
ddl = 150
return student_dict[alpha][ddl]
def build_dataframe(helicoidal_parameter_1_data, helicoidal_parameter_2_data, helicoidal_parameter_1, helicoidal_parameter_2):
"""Build a pandas DataFrame object from raw data
"""
dataframe = {
helicoidal_parameter_1: lib.fft.split_frame(helicoidal_parameter_1_data)[1][1],
helicoidal_parameter_2: lib.fft.split_frame(helicoidal_parameter_2_data)[1][1],
}
return pandas.DataFrame(dataframe)
def stats_test(dataframe, helicoidal_parameter_1, helicoidal_parameter_2, alpha=0.05, centering=72):
"""
"""
result = {
"spearman": {
"complete": False,
"center": False,
},
"pearson": {
"complete": False,
"center": False,
},
}
seq_length = len(dataframe[helicoidal_parameter_1])
for test_type in result:
for chan in result[test_type]:
if chan == "complete":
length = seq_length
offset = 0
else:
length = centering
offset = int((seq_length - length)/2)
# Compute coefficient
coeff = dataframe[helicoidal_parameter_1][offset:offset+length].corr(dataframe[helicoidal_parameter_2][offset:offset+length], method=test_type)
# pandas donne le coefficient de on calcule la variable
# de decision pour le test associe
test = coeff * math.sqrt(length - 2) / (1 - coeff**2)
t_alpha = fetch_student(alpha, length - 2)
if -t_alpha < test < t_alpha:
result[test_type][chan] = False
else:
result[test_type][chan] = True
return result
......@@ -7,6 +7,8 @@ Docstring
import lib.file_tools as file_tools
import lib.fft as fft_lib
import lib.correlation as correlation_lib
import trx
class Sequence(object):
# XXX
......@@ -14,7 +16,7 @@ class Sequence(object):
Sequence class
"""
def __init__(self, alphabet, fasta, md_parameters, sliding=72, centering=72, graph=None):
def __init__(self, alphabet, fasta, md_parameters, trx_scale_path=trx.scale_file, sliding=72, centering=72, alpha=0.05, graph=None):
# XXX
"""
......@@ -27,10 +29,24 @@ class Sequence(object):
self.alphabet = alphabet
self.sliding = sliding
self.alpha = alpha
self.centering = centering
self.graph = graph
self.trx_scale_path = trx_scale_path
# Default values for attrs
self.an = ""
self.sequence = ""
self.name = ""
self.description = ""
self.md = {}
self.correlation = {}
self.trx = {}
# Fetch
self.load_fasta(fasta)
self.load_md(md_parameters)
self.load_trx()
def load_fasta(self, fasta):
"""
......@@ -54,7 +70,6 @@ class Sequence(object):
Loads a Yasara MD file from a MD Yasara data file
"""
self.md = {}
_md_params = {}
for (helicoidal_parameter, path) in md_parameters.iteritems():
_md_params[helicoidal_parameter] = file_tools.load_md_data(path)
......@@ -97,3 +112,49 @@ class Sequence(object):
# For now, we got all md data, except correlation data. This is why we have kept _md_params
# We have a double loop to run amongst possible helicoidal_parms values
# We get lexicographically-ordered lists
helicoidal_parameters = [] + _md_params.keys()
helicoidal_parameters.sort()
helicoidal_parameters2 = [] + helicoidal_parameters
for param in helicoidal_parameters:
helicoidal_parameters2.pop(param)
for param2 in helicoidal_parameters2:
self.correlation[param + "/" + param2] = correlation_lib.compute_correlations(_md_params[param], _md_params[param2], param, param2, alpha=self.alpha, centering=self.centering)
# End.
def load_trx(self):
# XXX
"""
Loads a Yasara MD file from a MD Yasara data file
"""
trx_dict = trx.match(self.sequence, trx.tools.parse_trx_scale(self.trx_scale_path), self.centering)
fft = fft_lib.fft(trx_dict)
fft_c = fft_lib.fft(trx_dict, centering=self.centering)
fft_s = fft_lib.sliding_fft(trx_dict, window=self.sliding)
good_values = fft_lib.get_noticeable_data(fft)
good_values_c = fft_lib.get_noticeable_data(fft_c)
good_values_s = fft_lib.get_noticeable_data(fft_s)
self.trx = {
'complete' : {
'peak_freq' : good_values[i][0],
'peak' : good_values[i][1],
'size' : good_values[i][2],
},
'center' : {
'peak_freq' : good_values_c[i][0],
'peak' : good_values_c[i][1],
'size' : good_values_c[i][2],
},
'sliding' : {
'peak_freq' : good_values_s[i][0],
'peak' : good_values_s[i][1],
'size' : good_values_s[i][2],
},
}
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment