From d21773a5a404d344b887b50aa8510c55693353e3 Mon Sep 17 00:00:00 2001 From: Sam Nastase Date: Thu, 10 Jan 2019 17:53:43 -0500 Subject: [PATCH 1/4] Option to return NaNs in compute_correlation --- brainiak/fcma/util.py | 13 ++++++++++++- 1 file changed, 12 insertions(+), 1 deletion(-) diff --git a/brainiak/fcma/util.py b/brainiak/fcma/util.py index b513e4fcc..5c22e06b0 100644 --- a/brainiak/fcma/util.py +++ b/brainiak/fcma/util.py @@ -52,7 +52,7 @@ def _normalize_for_correlation(data, axis): return data -def compute_correlation(matrix1, matrix2): +def compute_correlation(matrix1, matrix2, return_nans=False): """compute correlation between two sets of variables Correlate the rows of matrix1 with the rows of matrix2. @@ -83,6 +83,9 @@ def compute_correlation(matrix1, matrix2): {\\sqrt{\\sum\\limits_{j=1}^n x_j^2-n\\bar{x}}} \\frac{(y_i-\\bar{y})}{\\sqrt{\\sum\\limits_{j=1}^n y_j^2-n\\bar{y}}}) + By default (return_nans=False), returns zeros for vectors with NaNs. + If return_nans=True, convert zeros to NaNs (np.nan) in output. + Parameters ---------- matrix1: 2D array in shape [r1, c] @@ -91,6 +94,9 @@ def compute_correlation(matrix1, matrix2): matrix2: 2D array in shape [r2, c] MUST be continuous and row-major + return_nans: bool, default:False + If False, return zeros for NaNs; if True, return NaNs + Returns ------- corr_data: 2D array in shape [r1, r2] @@ -115,4 +121,9 @@ def compute_correlation(matrix1, matrix2): 0.0, corr_data, r2) + + # optionally convert zeros back to NaNs + if return_nans: + corr_data[corr_data == 0] = np.nan + return corr_data From 1d1fc0ca63d1203b662bb3e9bd34b2c40954e699 Mon Sep 17 00:00:00 2001 From: Sam Nastase Date: Thu, 10 Jan 2019 19:53:35 -0500 Subject: [PATCH 2/4] Moved NaN handling into _normalize_for_correlation --- brainiak/fcma/util.py | 21 +++++++++++---------- 1 file changed, 11 insertions(+), 10 deletions(-) diff --git a/brainiak/fcma/util.py b/brainiak/fcma/util.py index 5c22e06b0..df246fd5f 100644 --- a/brainiak/fcma/util.py +++ b/brainiak/fcma/util.py @@ -25,7 +25,7 @@ import math -def _normalize_for_correlation(data, axis): +def _normalize_for_correlation(data, axis, return_nans=False): """normalize the data before computing correlation The data will be z-scored and divided by sqrt(n) @@ -38,6 +38,9 @@ def _normalize_for_correlation(data, axis): axis: int specify which dimension of the data should be normalized + return_nans: bool, default:False + If False, return zeros for NaNs; if True, return NaNs + Returns ------- data: 2D array @@ -46,8 +49,9 @@ def _normalize_for_correlation(data, axis): shape = data.shape data = zscore(data, axis=axis, ddof=0) # if zscore fails (standard deviation is zero), - # set all values to be zero - data = np.nan_to_num(data) + # optionally set all values to be zero + if not return_nans: + data = np.nan_to_num(data) data = data / math.sqrt(shape[axis]) return data @@ -109,8 +113,10 @@ def compute_correlation(matrix1, matrix2, return_nans=False): if d1 != d2: raise ValueError('Dimension discrepancy') # preprocess two components - matrix1 = _normalize_for_correlation(matrix1, 1) - matrix2 = _normalize_for_correlation(matrix2, 1) + matrix1 = _normalize_for_correlation(matrix1, 1, + return_nans=return_nans) + matrix2 = _normalize_for_correlation(matrix2, 1, + return_nans=return_nans) corr_data = np.empty((r1, r2), dtype=np.float32, order='C') # blas routine is column-major blas.compute_single_matrix_multiplication('T', 'N', @@ -121,9 +127,4 @@ def compute_correlation(matrix1, matrix2, return_nans=False): 0.0, corr_data, r2) - - # optionally convert zeros back to NaNs - if return_nans: - corr_data[corr_data == 0] = np.nan - return corr_data From 2db09e7cb80b816606c919c742ad3a337a0f02fc Mon Sep 17 00:00:00 2001 From: Sam Nastase Date: Mon, 14 Jan 2019 15:44:49 -0500 Subject: [PATCH 3/4] Added NaN handling test in fcma/test_util.py --- tests/fcma/test_util.py | 16 ++++++++++++++++ 1 file changed, 16 insertions(+) diff --git a/tests/fcma/test_util.py b/tests/fcma/test_util.py index 6eb7782f2..a917bc256 100644 --- a/tests/fcma/test_util.py +++ b/tests/fcma/test_util.py @@ -39,5 +39,21 @@ def test_correlation_computation(): "correlation results between two sets") +def test_correlation_nans(): + row1 = 5 + col = 10 + row2 = 6 + mat1 = prng.rand(row1, col).astype(np.float32) + mat2 = prng.rand(row2, col).astype(np.float32) + mat1[0, 0] = np.nan + corr = compute_correlation(mat1, mat2, return_nans=False) + assert np.all(corr == 0, axis=1)[0] == True + assert np.sum(corr == 0) == row2 + corr = compute_correlation(mat1, mat2, return_nans=True) + assert np.all(np.isnan(corr), axis=1)[0] == True + assert np.sum(np.isnan(corr)) == row2 + + if __name__ == '__main__': test_correlation_computation() + test_correlatioon_nans() From 1e5bc3cb045ec59ba367a123ed1771eb9085af1c Mon Sep 17 00:00:00 2001 From: Sam Nastase Date: Mon, 14 Jan 2019 16:05:47 -0500 Subject: [PATCH 4/4] Fixed typo and assertions in fcma/test_util.py --- tests/fcma/test_util.py | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/tests/fcma/test_util.py b/tests/fcma/test_util.py index a917bc256..6508d49b1 100644 --- a/tests/fcma/test_util.py +++ b/tests/fcma/test_util.py @@ -47,13 +47,13 @@ def test_correlation_nans(): mat2 = prng.rand(row2, col).astype(np.float32) mat1[0, 0] = np.nan corr = compute_correlation(mat1, mat2, return_nans=False) - assert np.all(corr == 0, axis=1)[0] == True + assert np.all(corr == 0, axis=1)[0] assert np.sum(corr == 0) == row2 corr = compute_correlation(mat1, mat2, return_nans=True) - assert np.all(np.isnan(corr), axis=1)[0] == True + assert np.all(np.isnan(corr), axis=1)[0] assert np.sum(np.isnan(corr)) == row2 if __name__ == '__main__': test_correlation_computation() - test_correlatioon_nans() + test_correlation_nans()