Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
24 changes: 18 additions & 6 deletions brainiak/fcma/util.py
Original file line number Diff line number Diff line change
Expand Up @@ -29,7 +29,7 @@
]


def _normalize_for_correlation(data, axis):
def _normalize_for_correlation(data, axis, return_nans=False):
"""normalize the data before computing correlation

The data will be z-scored and divided by sqrt(n)
Expand All @@ -42,6 +42,9 @@ def _normalize_for_correlation(data, axis):
axis: int
specify which dimension of the data should be normalized

return_nans: bool, default:False
If False, return zeros for NaNs; if True, return NaNs

Returns
-------
data: 2D array
Expand All @@ -50,13 +53,14 @@ def _normalize_for_correlation(data, axis):
shape = data.shape
data = zscore(data, axis=axis, ddof=0)
# if zscore fails (standard deviation is zero),
# set all values to be zero
data = np.nan_to_num(data)
# optionally set all values to be zero
if not return_nans:
data = np.nan_to_num(data)
data = data / math.sqrt(shape[axis])
return data


def compute_correlation(matrix1, matrix2):
def compute_correlation(matrix1, matrix2, return_nans=False):
"""compute correlation between two sets of variables

Correlate the rows of matrix1 with the rows of matrix2.
Expand Down Expand Up @@ -87,6 +91,9 @@ def compute_correlation(matrix1, matrix2):
{\\sqrt{\\sum\\limits_{j=1}^n x_j^2-n\\bar{x}}}
\\frac{(y_i-\\bar{y})}{\\sqrt{\\sum\\limits_{j=1}^n y_j^2-n\\bar{y}}})

By default (return_nans=False), returns zeros for vectors with NaNs.
If return_nans=True, convert zeros to NaNs (np.nan) in output.

Parameters
----------
matrix1: 2D array in shape [r1, c]
Expand All @@ -95,6 +102,9 @@ def compute_correlation(matrix1, matrix2):
matrix2: 2D array in shape [r2, c]
MUST be continuous and row-major

return_nans: bool, default:False
If False, return zeros for NaNs; if True, return NaNs

Returns
-------
corr_data: 2D array in shape [r1, r2]
Expand All @@ -107,8 +117,10 @@ def compute_correlation(matrix1, matrix2):
if d1 != d2:
raise ValueError('Dimension discrepancy')
# preprocess two components
matrix1 = _normalize_for_correlation(matrix1, 1)
matrix2 = _normalize_for_correlation(matrix2, 1)
matrix1 = _normalize_for_correlation(matrix1, 1,
return_nans=return_nans)
matrix2 = _normalize_for_correlation(matrix2, 1,
return_nans=return_nans)
corr_data = np.empty((r1, r2), dtype=np.float32, order='C')
# blas routine is column-major
blas.compute_single_matrix_multiplication('T', 'N',
Expand Down
16 changes: 16 additions & 0 deletions tests/fcma/test_util.py
Original file line number Diff line number Diff line change
Expand Up @@ -39,5 +39,21 @@ def test_correlation_computation():
"correlation results between two sets")


def test_correlation_nans():
row1 = 5
col = 10
row2 = 6
mat1 = prng.rand(row1, col).astype(np.float32)
mat2 = prng.rand(row2, col).astype(np.float32)
mat1[0, 0] = np.nan
corr = compute_correlation(mat1, mat2, return_nans=False)
assert np.all(corr == 0, axis=1)[0]
assert np.sum(corr == 0) == row2
corr = compute_correlation(mat1, mat2, return_nans=True)
assert np.all(np.isnan(corr), axis=1)[0]
assert np.sum(np.isnan(corr)) == row2


if __name__ == '__main__':
test_correlation_computation()
test_correlation_nans()