From d21773a5a404d344b887b50aa8510c55693353e3 Mon Sep 17 00:00:00 2001
From: Sam Nastase <sam.nastase@gmail.com>
Date: Thu, 10 Jan 2019 17:53:43 -0500
Subject: [PATCH 1/4] Option to return NaNs in compute_correlation

---
 brainiak/fcma/util.py | 13 ++++++++++++-
 1 file changed, 12 insertions(+), 1 deletion(-)

diff --git a/brainiak/fcma/util.py b/brainiak/fcma/util.py
index b513e4fcc..5c22e06b0 100644
--- a/brainiak/fcma/util.py
+++ b/brainiak/fcma/util.py
@@ -52,7 +52,7 @@ def _normalize_for_correlation(data, axis):
     return data
 
 
-def compute_correlation(matrix1, matrix2):
+def compute_correlation(matrix1, matrix2, return_nans=False):
     """compute correlation between two sets of variables
 
     Correlate the rows of matrix1 with the rows of matrix2.
@@ -83,6 +83,9 @@ def compute_correlation(matrix1, matrix2):
         {\\sqrt{\\sum\\limits_{j=1}^n x_j^2-n\\bar{x}}}
         \\frac{(y_i-\\bar{y})}{\\sqrt{\\sum\\limits_{j=1}^n y_j^2-n\\bar{y}}})
 
+    By default (return_nans=False), returns zeros for vectors with NaNs.
+    If return_nans=True, convert zeros to NaNs (np.nan) in output.
+
     Parameters
     ----------
     matrix1: 2D array in shape [r1, c]
@@ -91,6 +94,9 @@ def compute_correlation(matrix1, matrix2):
     matrix2: 2D array in shape [r2, c]
         MUST be continuous and row-major
 
+    return_nans: bool, default:False
+        If False, return zeros for NaNs; if True, return NaNs
+
     Returns
     -------
     corr_data: 2D array in shape [r1, r2]
@@ -115,4 +121,9 @@ def compute_correlation(matrix1, matrix2):
                                               0.0,
                                               corr_data,
                                               r2)
+
+    # optionally convert zeros back to NaNs
+    if return_nans:
+        corr_data[corr_data == 0] = np.nan
+
     return corr_data

From 1d1fc0ca63d1203b662bb3e9bd34b2c40954e699 Mon Sep 17 00:00:00 2001
From: Sam Nastase <sam.nastase@gmail.com>
Date: Thu, 10 Jan 2019 19:53:35 -0500
Subject: [PATCH 2/4] Moved NaN handling into _normalize_for_correlation

---
 brainiak/fcma/util.py | 21 +++++++++++----------
 1 file changed, 11 insertions(+), 10 deletions(-)

diff --git a/brainiak/fcma/util.py b/brainiak/fcma/util.py
index 5c22e06b0..df246fd5f 100644
--- a/brainiak/fcma/util.py
+++ b/brainiak/fcma/util.py
@@ -25,7 +25,7 @@
 import math
 
 
-def _normalize_for_correlation(data, axis):
+def _normalize_for_correlation(data, axis, return_nans=False):
     """normalize the data before computing correlation
 
     The data will be z-scored and divided by sqrt(n)
@@ -38,6 +38,9 @@ def _normalize_for_correlation(data, axis):
     axis: int
         specify which dimension of the data should be normalized
 
+    return_nans: bool, default:False
+        If False, return zeros for NaNs; if True, return NaNs
+
     Returns
     -------
     data: 2D array
@@ -46,8 +49,9 @@ def _normalize_for_correlation(data, axis):
     shape = data.shape
     data = zscore(data, axis=axis, ddof=0)
     # if zscore fails (standard deviation is zero),
-    # set all values to be zero
-    data = np.nan_to_num(data)
+    # optionally set all values to be zero
+    if not return_nans:
+        data = np.nan_to_num(data)
     data = data / math.sqrt(shape[axis])
     return data
 
@@ -109,8 +113,10 @@ def compute_correlation(matrix1, matrix2, return_nans=False):
     if d1 != d2:
         raise ValueError('Dimension discrepancy')
     # preprocess two components
-    matrix1 = _normalize_for_correlation(matrix1, 1)
-    matrix2 = _normalize_for_correlation(matrix2, 1)
+    matrix1 = _normalize_for_correlation(matrix1, 1,
+                                         return_nans=return_nans)
+    matrix2 = _normalize_for_correlation(matrix2, 1,
+                                         return_nans=return_nans)
     corr_data = np.empty((r1, r2), dtype=np.float32, order='C')
     # blas routine is column-major
     blas.compute_single_matrix_multiplication('T', 'N',
@@ -121,9 +127,4 @@ def compute_correlation(matrix1, matrix2, return_nans=False):
                                               0.0,
                                               corr_data,
                                               r2)
-
-    # optionally convert zeros back to NaNs
-    if return_nans:
-        corr_data[corr_data == 0] = np.nan
-
     return corr_data

From 2db09e7cb80b816606c919c742ad3a337a0f02fc Mon Sep 17 00:00:00 2001
From: Sam Nastase <sam.nastase@gmail.com>
Date: Mon, 14 Jan 2019 15:44:49 -0500
Subject: [PATCH 3/4] Added NaN handling test in fcma/test_util.py

---
 tests/fcma/test_util.py | 16 ++++++++++++++++
 1 file changed, 16 insertions(+)

diff --git a/tests/fcma/test_util.py b/tests/fcma/test_util.py
index 6eb7782f2..a917bc256 100644
--- a/tests/fcma/test_util.py
+++ b/tests/fcma/test_util.py
@@ -39,5 +39,21 @@ def test_correlation_computation():
         "correlation results between two sets")
 
 
+def test_correlation_nans():
+    row1 = 5
+    col = 10
+    row2 = 6
+    mat1 = prng.rand(row1, col).astype(np.float32)
+    mat2 = prng.rand(row2, col).astype(np.float32)
+    mat1[0, 0] = np.nan
+    corr = compute_correlation(mat1, mat2, return_nans=False)
+    assert np.all(corr == 0, axis=1)[0] == True
+    assert np.sum(corr == 0) == row2
+    corr = compute_correlation(mat1, mat2, return_nans=True)
+    assert np.all(np.isnan(corr), axis=1)[0] == True
+    assert np.sum(np.isnan(corr)) == row2
+
+
 if __name__ == '__main__':
     test_correlation_computation()
+    test_correlatioon_nans()

From 1e5bc3cb045ec59ba367a123ed1771eb9085af1c Mon Sep 17 00:00:00 2001
From: Sam Nastase <sam.nastase@gmail.com>
Date: Mon, 14 Jan 2019 16:05:47 -0500
Subject: [PATCH 4/4] Fixed typo and assertions in fcma/test_util.py

---
 tests/fcma/test_util.py | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

diff --git a/tests/fcma/test_util.py b/tests/fcma/test_util.py
index a917bc256..6508d49b1 100644
--- a/tests/fcma/test_util.py
+++ b/tests/fcma/test_util.py
@@ -47,13 +47,13 @@ def test_correlation_nans():
     mat2 = prng.rand(row2, col).astype(np.float32)
     mat1[0, 0] = np.nan
     corr = compute_correlation(mat1, mat2, return_nans=False)
-    assert np.all(corr == 0, axis=1)[0] == True
+    assert np.all(corr == 0, axis=1)[0]
     assert np.sum(corr == 0) == row2
     corr = compute_correlation(mat1, mat2, return_nans=True)
-    assert np.all(np.isnan(corr), axis=1)[0] == True
+    assert np.all(np.isnan(corr), axis=1)[0]
     assert np.sum(np.isnan(corr)) == row2
 
 
 if __name__ == '__main__':
     test_correlation_computation()
-    test_correlatioon_nans()
+    test_correlation_nans()