Commit 64bda2d3 authored by Pauline Pommeret's avatar Pauline Pommeret
Browse files

Comments + applying pylint comments

parent f3ab86f8
#! /usr/bin/env python
# XXX French
"""
This module contains every function required to compute the correlations in the
Xylok project.
Requires:
- :py:mod:`os`
- :py:mod:`math`
- :py:mod:`cPickle`
- :py:mod:`pandas`
- :py:mod:`lib.fft_tools`
- :py:data:`data.student.cPickle`
"""
import os
......@@ -14,6 +23,29 @@ STUDENT_FILE = os.path.join(os.path.dirname(__file__), "../data/student.cpickle"
def compute_correlations(_md_param1, _md_param2, helicoidal_parameter_1, helicoidal_parameter_2, alpha=0.05, centering=72):
"""
Computes result of Spearman and Pearson tests for every frame of 2
helicoidal parameters.
Parameters:
- ``_md_param1`` : dictionary of the data of first helicoidal
parameter
- ``_md_param2`` : dictionary of the data of first second
parameter
- ``helicoidal_parameter_1`` : first helicoidal parameter, str ('roll',
'twist', 'slide', etc)
- ``helicoidal_parameter_2`` : second helicoidal parameter, str ('roll',
'twist', 'slide', etc)
- ``alpha=0.05`` : alpha parameter of the Student table
(default 0.05)
- ``centering=72`` : number of bp considered as the sequence
center (default 72bp)
Uses:
- :py:meth:`build_dataframe`
- :py:meth:`stats_test`
Returns:
- dictionary of the computed correlations results (True/False) for every
pair of helicoidal parameters
"""
results = {}
......@@ -25,27 +57,56 @@ def compute_correlations(_md_param1, _md_param2, helicoidal_parameter_1, helicoi
return results
def fetch_student(alpha, ddl):
"""Fetch data in student database
"""
Fetches the Student value required from the Student table stored in a
cPickle.
Parameters:
- ``alpha`` : alpha value in bilateral Student table (float)
- ``ddl`` : degrees of freedom in Student table (int)
Uses:
- :py:data:`STUDENT_FILE`
Returns:
- Student value for ``alpha`` and ``ddl`` (float)
"""
with open(STUDENT_FILE, 'r') as student_file:
# Loading student table from a cPickle
student_dict = cPickle.load(student_file)
if alpha not in student_dict:
# If the user gives a value not stored in the Student table, the users
# deserves a KeyError
raise KeyError("alpha = %r is not in the Student table" % (alpha,))
if ddl not in student_dict[alpha]:
# Using usual rounding rules
# Not all ddl values are stored in the Student table, so, if the ddl
# given by the user is not in the table, a new ddl value in computed
# using usual rounding rules
ddl = int(round(float(ddl)/10))
if ddl > 140:
# Then ddl = \infty
# Then $ddl = \infty$
ddl = 150
return student_dict[alpha][ddl]
def build_dataframe(helicoidal_parameter_1_data, helicoidal_parameter_2_data, helicoidal_parameter_1, helicoidal_parameter_2):
"""Build a pandas DataFrame object from raw data
"""
Build a object from raw data
Parameters:
- ``helicoidal_parameter_1_data`` : dictionary with a frame of 1st param
- ``helicoidal_parameter_2_data`` : dictionary with a frame of 2nd param
- ``helicoidal_parameter_1`` : 1st parameter name (str)
- ``helicoidal_parameter_2`` : 2nd parameter name (str)
Uses:
- :py:meth:`lib.fft_tools.split_frame`
Returns:
- an object of type :py:class:`pandas.core.frame.DataFrame`
"""
dataframe = {
......@@ -57,6 +118,26 @@ def build_dataframe(helicoidal_parameter_1_data, helicoidal_parameter_2_data, he
def stats_test(dataframe, helicoidal_parameter_1, helicoidal_parameter_2, alpha=0.05, centering=72):
"""
Carries out Spearman and Pearson tests on the data in dataframe, on both
complete sequence and center sequence.
Tests answers are booleans.
Parameters:
- ``dataframe`` : :py:class:`pandas.core.frame.DataFrame`
with the data of a frame of 2 parameters
- ``helicoidal_parameter_1`` : name of 1st helicoidal parameter (str)
- ``helicoidal_parameter_2`` : name of 2nd helicoidal parameter (str)
- ``alpha`` : param for Student table (default 0.05)
- ``centering`` : number of bp considered as center
(default 72bp)
Uses:
- The :py:meth:`corr` of module :py:mod:`pandas` to compute values of
test statistics
- :py:meth:`math.sqrt`
Returns:
- a dictionary where keys are "testName_sequenceLength" and values are
booleans
"""
result = {
......@@ -68,7 +149,12 @@ def stats_test(dataframe, helicoidal_parameter_1, helicoidal_parameter_2, alpha=
seq_length = len(dataframe[helicoidal_parameter_1])
for test_type in ["spearman", "pearson"]:
# Since the test type is an argument of corr, iterating on test_type
# makes the code DRYer
for chan in ["complete", "center"]:
# Since the process is identical for the complete sequence and for
# the center of the sequence, iterating makes the code DRYer
# (Sorry for the pun, couldn't find a better name)
if chan == "complete":
length = seq_length
offset = 0
......@@ -76,16 +162,19 @@ def stats_test(dataframe, helicoidal_parameter_1, helicoidal_parameter_2, alpha=
length = centering
offset = int((seq_length - length)/2)
# Compute coefficient
# Computes coefficient
coeff = dataframe[helicoidal_parameter_1][offset:offset+length].corr(dataframe[helicoidal_parameter_2][offset:offset+length], method=test_type)
# pandas donne le coefficient de on calcule la variable
# de decision pour le test associe
# :py:mod:`pandas` gives us the value of the coefficient but it
# doesn't give us the statistic
test = coeff * math.sqrt(length - 2) / (1 - coeff**2)
# :py:mod:`pandas` doesn't give a straight answer to the test.
# It needs to be done "by hand"
t_alpha = fetch_student(alpha, length - 2)
if -t_alpha < test < t_alpha:
result["%s_%s" % (test_type, chan)] = False
else:
result["%s_%s" % (test_type, chan)] = True
return result
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment