From ed77dce1b8a981c0a30adef2266274c98ec263f0 Mon Sep 17 00:00:00 2001 From: Sam Nastase Date: Thu, 10 Jan 2019 16:09:08 -0500 Subject: [PATCH 01/21] Tolerance for NaNs for ISC/ISFC, and tests --- brainiak/isc.py | 119 ++++++++++++++++++++++++--- tests/isc/test_isc.py | 183 ++++++++++++++++++++++++++++++++++++++++++ 2 files changed, 290 insertions(+), 12 deletions(-) diff --git a/brainiak/isc.py b/brainiak/isc.py index c1bd3684f..d8eb5a5d5 100644 --- a/brainiak/isc.py +++ b/brainiak/isc.py @@ -61,7 +61,7 @@ MAX_RANDOM_SEED = 2**32 - 1 -def isc(data, pairwise=False, summary_statistic=None): +def isc(data, pairwise=False, summary_statistic=None, tolerate_nans=True): """Intersubject correlation For each voxel or ROI, compute the Pearson correlation between each @@ -81,8 +81,21 @@ def isc(data, pairwise=False, summary_statistic=None): the last dimension is assumed to correspond to subjects. If only two subjects are supplied, simply compute Pearson correlation (precludes averaging in leave-one-out approach, and does not apply summary statistic). - Output is an ndarray where the first dimension is the number of subjects - or pairs and the second dimension is the number of voxels (or ROIs). + When using leave-one-out approach, NaNs are ignored when computing mean + time series of N-1 subjects (default: tolerate_nans=True). Alternatively, + you may supply a float between 0 and 1 indicating a threshold proportion + of N subjects with non-NaN values required when computing the average time + series for a given voxel. For example, if tolerate_nans=.8, ISCs will be + computed for any voxel where >= 80% of subjects have non-NaN values, + while voxels with < 80% non-NaN values will be assigned NaNs. If set to + False, NaNs are not tolerated and voxels with one or more NaNs among the + N-1 subjects will be assigned NaN. Setting tolerate_nans to True or False + will not affect the pairwise approach; however, if a threshold float is + provided, voxels that do not reach this threshold will be excluded. Note + that accommodating NaNs may be notably slower than setting tolerate_nans to + False. Output is an ndarray where the first dimension is the number of + subjects or pairs and the second dimension is the number of voxels (or + ROIs). The implementation is based on the work in [Hasson2004]_. @@ -97,6 +110,9 @@ def isc(data, pairwise=False, summary_statistic=None): summary_statistic : None or str, default:None Return all ISCs or collapse using 'mean' or 'median' + tolerate_nans : bool or float, default:True + Accommodate NaNs (when averaging in leave-one-out approach) + Returns ------- iscs : subjects or pairs by voxels ndarray @@ -112,9 +128,16 @@ def isc(data, pairwise=False, summary_statistic=None): logger.info("Only two subjects! Simply computing Pearson correlation.") summary_statistic = None + # Check tolerate_nans input and use either mean/nanmean and exclude voxels + if tolerate_nans: + mean = np.nanmean + else: + mean = np.mean + data, mask = threshold_nans(data, tolerate_nans) + # Loop over each voxel or ROI voxel_iscs = [] - for v in np.arange(n_voxels): + for v in np.arange(data.shape[1]): voxel_data = data[:, v, :].T if n_subjects == 2: iscs = pearsonr(voxel_data[0, :], voxel_data[1, :])[0] @@ -122,12 +145,16 @@ def isc(data, pairwise=False, summary_statistic=None): iscs = squareform(np.corrcoef(voxel_data), checks=False) elif not pairwise: iscs = np.array([pearsonr(subject, - np.mean(np.delete(voxel_data, - s, axis=0), + mean(np.delete(voxel_data, + s, axis=0), axis=0))[0] for s, subject in enumerate(voxel_data)]) voxel_iscs.append(iscs) - iscs = np.column_stack(voxel_iscs) + iscs_stack = np.column_stack(voxel_iscs) + + # Get ISCs back into correct shape after masking out NaNs + iscs = np.full((iscs_stack.shape[0], n_voxels), np.nan) + iscs[:, np.where(mask)[0]] = iscs_stack # Summarize results (if requested) if summary_statistic: @@ -138,7 +165,7 @@ def isc(data, pairwise=False, summary_statistic=None): return iscs -def isfc(data, pairwise=False, summary_statistic=None): +def isfc(data, pairwise=False, summary_statistic=None, tolerate_nans=True): """Intersubject functional correlation (ISFC) @@ -186,12 +213,20 @@ def isfc(data, pairwise=False, summary_statistic=None): # Check response time series input format data, n_TRs, n_voxels, n_subjects = check_timeseries_input(data) + # Check tolerate_nans input and use either mean/nanmean and exclude voxels + if tolerate_nans: + mean = np.nanmean + else: + mean = np.mean + data, mask = threshold_nans(data, tolerate_nans) + # Handle just two subjects properly if n_subjects == 2: isfcs = compute_correlation(np.ascontiguousarray(data[..., 0].T), np.ascontiguousarray(data[..., 1].T)) isfcs = (isfcs + isfcs.T) / 2 - assert isfcs.shape == (n_voxels, n_voxels) + isfcs = isfcs[..., np.newaxis] + assert isfcs.shape == (n_voxels, n_voxels, 1) summary_statistic = None logger.info("Only two subjects! Computing ISFC between them.") @@ -217,7 +252,7 @@ def isfc(data, pairwise=False, summary_statistic=None): # Compute leave-one-out ISFCs isfcs = [compute_correlation(np.ascontiguousarray(subject.T), - np.ascontiguousarray(np.mean( + np.ascontiguousarray(mean( np.delete(data, s, axis=0), axis=0).T)) for s, subject in enumerate(data)] @@ -225,11 +260,14 @@ def isfc(data, pairwise=False, summary_statistic=None): # Transpose and average ISFC matrices for both directions isfcs = np.dstack([(isfc_matrix + isfc_matrix.T) / 2 for isfc_matrix in isfcs]) - assert isfcs.shape == (n_voxels, n_voxels, n_subjects) + + # Get ISCs back into correct shape after masking out NaNs + isfcs_all = np.full((n_voxels, n_voxels, isfcs.shape[2]), np.nan) + isfcs_all[np.ix_(np.where(mask)[0], np.where(mask)[0])] = isfcs # Summarize results (if requested) if summary_statistic: - isfcs = compute_summary_statistic(isfcs, + isfcs = compute_summary_statistic(isfcs_all, summary_statistic=summary_statistic, axis=2) @@ -400,6 +438,63 @@ def compute_summary_statistic(iscs, summary_statistic='mean', axis=None): return statistic +def threshold_nans(data, tolerate_nans): + + """Thresholds data based on proportion of subjects with NaNs + + Takes in data and a threshold value (float between 0.0 and 1.0) determining + the permissible proportion of subjects with non-NaN values. For example, if + threshold=.8, any voxel where >= 80% of subjects have non-NaN values will + be left unchanged, while any voxel with < 80% non-NaN values will be + assigned all NaN values and included in the nan_mask output. Note that the + output data has not been masked and will be same shape as the input data, + but may have a different number of NaNs based on the threshold. + + Parameters + ---------- + data : ndarray (n_TRs x n_voxels x n_subjects) + fMRI time series data + + tolerate_nans : bool or float (0.0 <= threshold <= 1.0) + Proportion of subjects with non-NaN values required to keep voxel + + Returns + ------- + data : ndarray (n_TRs x n_voxels x n_subjects) + fMRI time series data with adjusted NaNs + + nan_mask : ndarray (n_voxels,) + Boolean mask array of voxels with too many NaNs based on threshold + + """ + + nans = np.all(np.any(np.isnan(data), axis=0), axis=1) + + # Check tolerate_nans input and use either mean/nanmean and exclude voxels + if tolerate_nans is True: + logger.info("ISC computation will tolerate all NaNs when averaging") + + elif type(tolerate_nans) is float: + if not 0.0 <= tolerate_nans <= 1.0: + raise ValueError("If threshold to tolerate NaNs is a float, " + "it must be between 0.0 and 1.0; got {0}".format( + tolerate_nans)) + nans += ~(np.sum(~np.any(np.isnan(data), axis=0), axis=1) >= + data.shape[-1] * tolerate_nans) + logger.info("ISC computation will tolerate voxels with at least " + "{0} non-NaN values: {1} voxels do not meet " + "threshold".format(tolerate_nans, + np.sum(nans))) + + else: + logger.info("ISC computation will not tolerate NaNs when averaging") + + mask = ~nans + data = data[:, mask, :] + + return data, mask + + def bootstrap_isc(iscs, pairwise=False, summary_statistic='median', n_bootstraps=1000, ci_percentile=95, random_state=None): diff --git a/tests/isc/test_isc.py b/tests/isc/test_isc.py index 6bd681cc3..de1eb2c76 100644 --- a/tests/isc/test_isc.py +++ b/tests/isc/test_isc.py @@ -150,6 +150,100 @@ def test_isc_output(): logger.info("Finished testing ISC outputs") +# Check for proper handling of NaNs in ISC +def test_isc_nans(): + + # Set parameters for toy time series data + n_subjects = 20 + n_TRs = 60 + n_voxels = 30 + random_state = 42 + + logger.info("Testing ISC options") + + data = simulated_timeseries(n_subjects, n_TRs, + n_voxels=n_voxels, data_type='array', + random_state=random_state) + + # Inject NaNs into data + data[0, 0, 0] = np.nan + + # Don't tolerate NaNs, should lose zeroeth voxel + iscs_loo = isc(data, pairwise=False, tolerate_nans=False) + assert np.sum(np.isnan(iscs_loo)) == n_subjects + + # Tolerate all NaNs, only subject with NaNs yields NaN + iscs_loo = isc(data, pairwise=False, tolerate_nans=True) + assert np.sum(np.isnan(iscs_loo)) == 1 + + # Pairwise approach shouldn't care + iscs_pw_T = isc(data, pairwise=True, tolerate_nans=True) + iscs_pw_F = isc(data, pairwise=True, tolerate_nans=False) + assert np.allclose(iscs_pw_T, iscs_pw_F, equal_nan=True) + + assert (np.sum(np.isnan(iscs_pw_T)) == + np.sum(np.isnan(iscs_pw_F)) == + n_subjects - 1) + + # Set proportion of nans to reject (70% and 90% non-NaN) + data[0, 0, :] = np.nan + data[0, 1, :n_subjects - int(n_subjects * .7)] = np.nan + data[0, 2, :n_subjects - int(n_subjects * .9)] = np.nan + + iscs_loo_T = isc(data, pairwise=False, tolerate_nans=True) + iscs_loo_F = isc(data, pairwise=False, tolerate_nans=False) + iscs_loo_95 = isc(data, pairwise=False, tolerate_nans=.95) + iscs_loo_90 = isc(data, pairwise=False, tolerate_nans=.90) + iscs_loo_80 = isc(data, pairwise=False, tolerate_nans=.8) + iscs_loo_70 = isc(data, pairwise=False, tolerate_nans=.7) + iscs_loo_60 = isc(data, pairwise=False, tolerate_nans=.6) + + assert (np.sum(np.isnan(iscs_loo_F)) == + np.sum(np.isnan(iscs_loo_95)) == 60) + assert (np.sum(np.isnan(iscs_loo_80)) == + np.sum(np.isnan(iscs_loo_90)) == 42) + assert (np.sum(np.isnan(iscs_loo_T)) == + np.sum(np.isnan(iscs_loo_60)) == + np.sum(np.isnan(iscs_loo_70)) == 28) + assert np.array_equal(np.sum(np.isnan(iscs_loo_F), axis=0), + np.sum(np.isnan(iscs_loo_95), axis=0)) + assert np.array_equal(np.sum(np.isnan(iscs_loo_80), axis=0), + np.sum(np.isnan(iscs_loo_90), axis=0)) + assert np.all((np.array_equal( + np.sum(np.isnan(iscs_loo_T), axis=0), + np.sum(np.isnan(iscs_loo_60), axis=0)), + np.array_equal( + np.sum(np.isnan(iscs_loo_T), axis=0), + np.sum(np.isnan(iscs_loo_70), axis=0)), + np.array_equal( + np.sum(np.isnan(iscs_loo_60), axis=0), + np.sum(np.isnan(iscs_loo_70), axis=0)))) + + data = simulated_timeseries(n_subjects, n_TRs, + n_voxels=n_voxels, data_type='array', + random_state=random_state) + + # Inject NaNs into data + data[0, 0, 0] = np.nan + + # Make sure voxel with NaNs across all subjects is always removed + data[0, 0, :] = np.nan + iscs_loo_T = isc(data, pairwise=False, tolerate_nans=True) + iscs_loo_F = isc(data, pairwise=False, tolerate_nans=False) + assert np.allclose(iscs_loo_T, iscs_loo_F, equal_nan=True) + assert (np.sum(np.isnan(iscs_loo_T)) == + np.sum(np.isnan(iscs_loo_T)) == + n_subjects) + + iscs_pw_T = isc(data, pairwise=True, tolerate_nans=True) + iscs_pw_F = isc(data, pairwise=True, tolerate_nans=False) + assert np.allclose(iscs_pw_T, iscs_pw_F, equal_nan=True) + + assert (np.sum(np.isnan(iscs_pw_T)) == + np.sum(np.isnan(iscs_pw_T)) == + n_subjects * (n_subjects - 1) / 2) + + # Test one-sample bootstrap test def test_bootstrap_isc(): @@ -544,10 +638,99 @@ def test_isfc_options(): logger.info("Finished testing ISFC options") +# Check for proper handling of NaNs in ISFC +def test_isfc_nans(): + + # Set parameters for toy time series data + n_subjects = 20 + n_TRs = 60 + n_voxels = 30 + random_state = 42 + + logger.info("Testing ISC options") + + data = simulated_timeseries(n_subjects, n_TRs, + n_voxels=n_voxels, data_type='array', + random_state=random_state) + + # Inject NaNs into data + data[0, 0, 0] = np.nan + + # Don't tolerate NaNs, should lose zeroeth voxel + isfcs_loo = isfc(data, pairwise=False, tolerate_nans=False) + assert np.sum(isfcs_loo == 0) == n_subjects + + # Tolerate all NaNs, only subject with NaNs yields NaN + isfcs_loo = isfc(data, pairwise=False, tolerate_nans=True) + assert np.sum(np.isnan(isfcs_loo)) == 0 + + # Pairwise approach shouldn't care + isfcs_pw_T = isfc(data, pairwise=True, tolerate_nans=True) + isfcs_pw_F = isfc(data, pairwise=True, tolerate_nans=False) + assert np.allclose(isfcs_pw_T, isfcs_pw_F, equal_nan=True) + + assert (np.sum(np.isnan(isfcs_pw_T)) == + np.sum(np.isnan(isfcs_pw_F)) == 0) + + # Set proportion of nans to reject (70% and 90% non-NaN) + data[0, 0, :] = np.nan + data[0, 1, :n_subjects - int(n_subjects * .7)] = np.nan + data[0, 2, :n_subjects - int(n_subjects * .9)] = np.nan + + nans = np.sum(np.any(np.isnan(data), axis=0), axis=1) + + isfcs_loo_T = isfc(data, pairwise=False, tolerate_nans=True) + isfcs_loo_F = isfc(data, pairwise=False, tolerate_nans=False) + isfcs_loo_95 = isfc(data, pairwise=False, tolerate_nans=.95) + isfcs_loo_90 = isfc(data, pairwise=False, tolerate_nans=.90) + isfcs_loo_80 = isfc(data, pairwise=False, tolerate_nans=.8) + isfcs_loo_70 = isfc(data, pairwise=False, tolerate_nans=.7) + isfcs_loo_60 = isfc(data, pairwise=False, tolerate_nans=.6) + + assert (np.sum(np.isnan(iscs_loo_F)) == + np.sum(np.isnan(iscs_loo_95)) == 60) + assert (np.sum(np.isnan(iscs_loo_80)) == + np.sum(np.isnan(iscs_loo_90)) == 42) + assert (np.sum(np.isnan(iscs_loo_T)) == + np.sum(np.isnan(iscs_loo_60)) == + np.sum(np.isnan(iscs_loo_70)) == 28) + assert np.array_equal(np.sum(np.isnan(iscs_loo_F), axis=0), + np.sum(np.isnan(iscs_loo_95), axis=0)) + assert np.array_equal(np.sum(np.isnan(iscs_loo_80), axis=0), + np.sum(np.isnan(iscs_loo_90), axis=0)) + assert np.all((np.array_equal( + np.sum(np.isnan(iscs_loo_T), axis=0), + np.sum(np.isnan(iscs_loo_60), axis=0)), + np.array_equal( + np.sum(np.isnan(iscs_loo_T), axis=0), + np.sum(np.isnan(iscs_loo_70), axis=0)), + np.array_equal( + np.sum(np.isnan(iscs_loo_60), axis=0), + np.sum(np.isnan(iscs_loo_70), axis=0)))) + + # Make sure voxel with NaNs across all subjects is always removed + data[0, 0, :] = np.nan + iscs_loo_T = isc(data, pairwise=False, tolerate_nans=True) + iscs_loo_F = isc(data, pairwise=False, tolerate_nans=False) + assert np.allclose(iscs_loo_T, iscs_loo_F, equal_nan=True) + assert (np.sum(np.isnan(iscs_loo_T)) == + np.sum(np.isnan(iscs_loo_T)) == + n_subjects) + + iscs_pw_T = isc(data, pairwise=True, tolerate_nans=True) + iscs_pw_F = isc(data, pairwise=True, tolerate_nans=False) + assert np.allclose(iscs_pw_T, iscs_pw_F, equal_nan=True) + + assert (np.sum(np.isnan(iscs_pw_T)) == + np.sum(np.isnan(iscs_pw_T)) == + n_subjects * (n_subjects - 1) / 2) + + if __name__ == '__main__': test_isc_input() test_isc_options() test_isc_output() + test_isc_nans() test_bootstrap_isc() test_permutation_isc() test_timeshift_isc() From 1106224bd26be52107b4a4b76b56e8e4e979c979 Mon Sep 17 00:00:00 2001 From: Sam Nastase Date: Thu, 10 Jan 2019 17:47:34 -0500 Subject: [PATCH 02/21] Messing with NaN options in isfc --- brainiak/isc.py | 14 ++++++++++---- 1 file changed, 10 insertions(+), 4 deletions(-) diff --git a/brainiak/isc.py b/brainiak/isc.py index d8eb5a5d5..a43f5050f 100644 --- a/brainiak/isc.py +++ b/brainiak/isc.py @@ -200,7 +200,7 @@ def isfc(data, pairwise=False, summary_statistic=None, tolerate_nans=True): pairwise : bool, default: False Whether to use pairwise (True) or leave-one-out (False) approach - summary_statistic : None or str, default:None + summary_statistic : None or str, default: None Return all ISFCs or collapse using 'mean' or 'median' Returns @@ -223,7 +223,8 @@ def isfc(data, pairwise=False, summary_statistic=None, tolerate_nans=True): # Handle just two subjects properly if n_subjects == 2: isfcs = compute_correlation(np.ascontiguousarray(data[..., 0].T), - np.ascontiguousarray(data[..., 1].T)) + np.ascontiguousarray(data[..., 1].T), + return_nans=True) isfcs = (isfcs + isfcs.T) / 2 isfcs = isfcs[..., np.newaxis] assert isfcs.shape == (n_voxels, n_voxels, 1) @@ -237,7 +238,8 @@ def isfc(data, pairwise=False, summary_statistic=None, tolerate_nans=True): isfc_pair = compute_correlation(np.ascontiguousarray( data[..., pair[0]].T), np.ascontiguousarray( - data[..., pair[1]].T)) + data[..., pair[1]].T), + return_nans=True) isfc_pair = (isfc_pair + isfc_pair.T) / 2 isfcs.append(isfc_pair) isfcs = np.dstack(isfcs) @@ -254,7 +256,8 @@ def isfc(data, pairwise=False, summary_statistic=None, tolerate_nans=True): isfcs = [compute_correlation(np.ascontiguousarray(subject.T), np.ascontiguousarray(mean( np.delete(data, s, axis=0), - axis=0).T)) + axis=0).T), + return_nans=True) for s, subject in enumerate(data)] # Transpose and average ISFC matrices for both directions @@ -264,6 +267,9 @@ def isfc(data, pairwise=False, summary_statistic=None, tolerate_nans=True): # Get ISCs back into correct shape after masking out NaNs isfcs_all = np.full((n_voxels, n_voxels, isfcs.shape[2]), np.nan) isfcs_all[np.ix_(np.where(mask)[0], np.where(mask)[0])] = isfcs + + # compute_correlation returns zeros for NaNs, change them back + #isfcs_all[isfcs_all == 0] = np.nan # Summarize results (if requested) if summary_statistic: From d21773a5a404d344b887b50aa8510c55693353e3 Mon Sep 17 00:00:00 2001 From: Sam Nastase Date: Thu, 10 Jan 2019 17:53:43 -0500 Subject: [PATCH 03/21] Option to return NaNs in compute_correlation --- brainiak/fcma/util.py | 13 ++++++++++++- 1 file changed, 12 insertions(+), 1 deletion(-) diff --git a/brainiak/fcma/util.py b/brainiak/fcma/util.py index b513e4fcc..5c22e06b0 100644 --- a/brainiak/fcma/util.py +++ b/brainiak/fcma/util.py @@ -52,7 +52,7 @@ def _normalize_for_correlation(data, axis): return data -def compute_correlation(matrix1, matrix2): +def compute_correlation(matrix1, matrix2, return_nans=False): """compute correlation between two sets of variables Correlate the rows of matrix1 with the rows of matrix2. @@ -83,6 +83,9 @@ def compute_correlation(matrix1, matrix2): {\\sqrt{\\sum\\limits_{j=1}^n x_j^2-n\\bar{x}}} \\frac{(y_i-\\bar{y})}{\\sqrt{\\sum\\limits_{j=1}^n y_j^2-n\\bar{y}}}) + By default (return_nans=False), returns zeros for vectors with NaNs. + If return_nans=True, convert zeros to NaNs (np.nan) in output. + Parameters ---------- matrix1: 2D array in shape [r1, c] @@ -91,6 +94,9 @@ def compute_correlation(matrix1, matrix2): matrix2: 2D array in shape [r2, c] MUST be continuous and row-major + return_nans: bool, default:False + If False, return zeros for NaNs; if True, return NaNs + Returns ------- corr_data: 2D array in shape [r1, r2] @@ -115,4 +121,9 @@ def compute_correlation(matrix1, matrix2): 0.0, corr_data, r2) + + # optionally convert zeros back to NaNs + if return_nans: + corr_data[corr_data == 0] = np.nan + return corr_data From 1d1fc0ca63d1203b662bb3e9bd34b2c40954e699 Mon Sep 17 00:00:00 2001 From: Sam Nastase Date: Thu, 10 Jan 2019 19:53:35 -0500 Subject: [PATCH 04/21] Moved NaN handling into _normalize_for_correlation --- brainiak/fcma/util.py | 21 +++++++++++---------- 1 file changed, 11 insertions(+), 10 deletions(-) diff --git a/brainiak/fcma/util.py b/brainiak/fcma/util.py index 5c22e06b0..df246fd5f 100644 --- a/brainiak/fcma/util.py +++ b/brainiak/fcma/util.py @@ -25,7 +25,7 @@ import math -def _normalize_for_correlation(data, axis): +def _normalize_for_correlation(data, axis, return_nans=False): """normalize the data before computing correlation The data will be z-scored and divided by sqrt(n) @@ -38,6 +38,9 @@ def _normalize_for_correlation(data, axis): axis: int specify which dimension of the data should be normalized + return_nans: bool, default:False + If False, return zeros for NaNs; if True, return NaNs + Returns ------- data: 2D array @@ -46,8 +49,9 @@ def _normalize_for_correlation(data, axis): shape = data.shape data = zscore(data, axis=axis, ddof=0) # if zscore fails (standard deviation is zero), - # set all values to be zero - data = np.nan_to_num(data) + # optionally set all values to be zero + if not return_nans: + data = np.nan_to_num(data) data = data / math.sqrt(shape[axis]) return data @@ -109,8 +113,10 @@ def compute_correlation(matrix1, matrix2, return_nans=False): if d1 != d2: raise ValueError('Dimension discrepancy') # preprocess two components - matrix1 = _normalize_for_correlation(matrix1, 1) - matrix2 = _normalize_for_correlation(matrix2, 1) + matrix1 = _normalize_for_correlation(matrix1, 1, + return_nans=return_nans) + matrix2 = _normalize_for_correlation(matrix2, 1, + return_nans=return_nans) corr_data = np.empty((r1, r2), dtype=np.float32, order='C') # blas routine is column-major blas.compute_single_matrix_multiplication('T', 'N', @@ -121,9 +127,4 @@ def compute_correlation(matrix1, matrix2, return_nans=False): 0.0, corr_data, r2) - - # optionally convert zeros back to NaNs - if return_nans: - corr_data[corr_data == 0] = np.nan - return corr_data From 6a12190dfa941171717eec904634732bf42a9c5b Mon Sep 17 00:00:00 2001 From: Sam Nastase Date: Fri, 11 Jan 2019 13:44:08 -0500 Subject: [PATCH 05/21] NaN handling for ISFC and tests --- brainiak/isc.py | 10 ++---- tests/isc/test_isc.py | 74 +++++++++++++++++++++++-------------------- 2 files changed, 42 insertions(+), 42 deletions(-) diff --git a/brainiak/isc.py b/brainiak/isc.py index a43f5050f..bf8eb343a 100644 --- a/brainiak/isc.py +++ b/brainiak/isc.py @@ -227,7 +227,6 @@ def isfc(data, pairwise=False, summary_statistic=None, tolerate_nans=True): return_nans=True) isfcs = (isfcs + isfcs.T) / 2 isfcs = isfcs[..., np.newaxis] - assert isfcs.shape == (n_voxels, n_voxels, 1) summary_statistic = None logger.info("Only two subjects! Computing ISFC between them.") @@ -243,8 +242,7 @@ def isfc(data, pairwise=False, summary_statistic=None, tolerate_nans=True): isfc_pair = (isfc_pair + isfc_pair.T) / 2 isfcs.append(isfc_pair) isfcs = np.dstack(isfcs) - assert isfcs.shape == (n_voxels, n_voxels, - n_subjects * (n_subjects - 1) / 2) + # Compute ISFCs using leave-one-out approach elif not pairwise: @@ -267,13 +265,11 @@ def isfc(data, pairwise=False, summary_statistic=None, tolerate_nans=True): # Get ISCs back into correct shape after masking out NaNs isfcs_all = np.full((n_voxels, n_voxels, isfcs.shape[2]), np.nan) isfcs_all[np.ix_(np.where(mask)[0], np.where(mask)[0])] = isfcs - - # compute_correlation returns zeros for NaNs, change them back - #isfcs_all[isfcs_all == 0] = np.nan + isfcs = isfcs_all # Summarize results (if requested) if summary_statistic: - isfcs = compute_summary_statistic(isfcs_all, + isfcs = compute_summary_statistic(isfcs, summary_statistic=summary_statistic, axis=2) diff --git a/tests/isc/test_isc.py b/tests/isc/test_isc.py index de1eb2c76..ea5a7ebb5 100644 --- a/tests/isc/test_isc.py +++ b/tests/isc/test_isc.py @@ -223,9 +223,6 @@ def test_isc_nans(): n_voxels=n_voxels, data_type='array', random_state=random_state) - # Inject NaNs into data - data[0, 0, 0] = np.nan - # Make sure voxel with NaNs across all subjects is always removed data[0, 0, :] = np.nan iscs_loo_T = isc(data, pairwise=False, tolerate_nans=True) @@ -658,11 +655,11 @@ def test_isfc_nans(): # Don't tolerate NaNs, should lose zeroeth voxel isfcs_loo = isfc(data, pairwise=False, tolerate_nans=False) - assert np.sum(isfcs_loo == 0) == n_subjects + assert np.sum(np.isnan(isfcs_loo)) == (n_voxels * 2 - 1) * n_subjects # Tolerate all NaNs, only subject with NaNs yields NaN isfcs_loo = isfc(data, pairwise=False, tolerate_nans=True) - assert np.sum(np.isnan(isfcs_loo)) == 0 + assert np.sum(np.isnan(isfcs_loo)) == n_voxels * 2 - 1 # Pairwise approach shouldn't care isfcs_pw_T = isfc(data, pairwise=True, tolerate_nans=True) @@ -670,7 +667,8 @@ def test_isfc_nans(): assert np.allclose(isfcs_pw_T, isfcs_pw_F, equal_nan=True) assert (np.sum(np.isnan(isfcs_pw_T)) == - np.sum(np.isnan(isfcs_pw_F)) == 0) + np.sum(np.isnan(isfcs_pw_F)) == + (n_voxels * 2 - 1) * (n_subjects - 1)) # Set proportion of nans to reject (70% and 90% non-NaN) data[0, 0, :] = np.nan @@ -679,6 +677,7 @@ def test_isfc_nans(): nans = np.sum(np.any(np.isnan(data), axis=0), axis=1) + ### TRIMMING NANS AND NOT REPLACING THEM!!! reduced size isfcs_loo_T = isfc(data, pairwise=False, tolerate_nans=True) isfcs_loo_F = isfc(data, pairwise=False, tolerate_nans=False) isfcs_loo_95 = isfc(data, pairwise=False, tolerate_nans=.95) @@ -687,43 +686,47 @@ def test_isfc_nans(): isfcs_loo_70 = isfc(data, pairwise=False, tolerate_nans=.7) isfcs_loo_60 = isfc(data, pairwise=False, tolerate_nans=.6) - assert (np.sum(np.isnan(iscs_loo_F)) == - np.sum(np.isnan(iscs_loo_95)) == 60) - assert (np.sum(np.isnan(iscs_loo_80)) == - np.sum(np.isnan(iscs_loo_90)) == 42) - assert (np.sum(np.isnan(iscs_loo_T)) == - np.sum(np.isnan(iscs_loo_60)) == - np.sum(np.isnan(iscs_loo_70)) == 28) - assert np.array_equal(np.sum(np.isnan(iscs_loo_F), axis=0), - np.sum(np.isnan(iscs_loo_95), axis=0)) - assert np.array_equal(np.sum(np.isnan(iscs_loo_80), axis=0), - np.sum(np.isnan(iscs_loo_90), axis=0)) + assert (np.sum(np.isnan(isfcs_loo_F)) == + np.sum(np.isnan(isfcs_loo_95)) == 3420) + assert (np.sum(np.isnan(isfcs_loo_80)) == + np.sum(np.isnan(isfcs_loo_90)) == 2430) + assert (np.sum(np.isnan(isfcs_loo_T)) == + np.sum(np.isnan(isfcs_loo_60)) == + np.sum(np.isnan(isfcs_loo_70)) == 1632) + assert np.array_equal(np.sum(np.isnan(isfcs_loo_F), axis=0), + np.sum(np.isnan(isfcs_loo_95), axis=0)) + assert np.array_equal(np.sum(np.isnan(isfcs_loo_80), axis=0), + np.sum(np.isnan(isfcs_loo_90), axis=0)) assert np.all((np.array_equal( - np.sum(np.isnan(iscs_loo_T), axis=0), - np.sum(np.isnan(iscs_loo_60), axis=0)), + np.sum(np.isnan(isfcs_loo_T), axis=0), + np.sum(np.isnan(isfcs_loo_60), axis=0)), np.array_equal( - np.sum(np.isnan(iscs_loo_T), axis=0), - np.sum(np.isnan(iscs_loo_70), axis=0)), + np.sum(np.isnan(isfcs_loo_T), axis=0), + np.sum(np.isnan(isfcs_loo_70), axis=0)), np.array_equal( - np.sum(np.isnan(iscs_loo_60), axis=0), - np.sum(np.isnan(iscs_loo_70), axis=0)))) + np.sum(np.isnan(isfcs_loo_60), axis=0), + np.sum(np.isnan(isfcs_loo_70), axis=0)))) + + data = simulated_timeseries(n_subjects, n_TRs, + n_voxels=n_voxels, data_type='array', + random_state=random_state) # Make sure voxel with NaNs across all subjects is always removed data[0, 0, :] = np.nan - iscs_loo_T = isc(data, pairwise=False, tolerate_nans=True) - iscs_loo_F = isc(data, pairwise=False, tolerate_nans=False) - assert np.allclose(iscs_loo_T, iscs_loo_F, equal_nan=True) - assert (np.sum(np.isnan(iscs_loo_T)) == - np.sum(np.isnan(iscs_loo_T)) == - n_subjects) + isfcs_loo_T = isfc(data, pairwise=False, tolerate_nans=True) + isfcs_loo_F = isfc(data, pairwise=False, tolerate_nans=False) + assert np.allclose(isfcs_loo_T, isfcs_loo_F, equal_nan=True) + assert (np.sum(np.isnan(isfcs_loo_T)) == + np.sum(np.isnan(isfcs_loo_T)) == + 1180) - iscs_pw_T = isc(data, pairwise=True, tolerate_nans=True) - iscs_pw_F = isc(data, pairwise=True, tolerate_nans=False) - assert np.allclose(iscs_pw_T, iscs_pw_F, equal_nan=True) + isfcs_pw_T = isfc(data, pairwise=True, tolerate_nans=True) + isfcs_pw_F = isfc(data, pairwise=True, tolerate_nans=False) + assert np.allclose(isfcs_pw_T, isfcs_pw_F, equal_nan=True) - assert (np.sum(np.isnan(iscs_pw_T)) == - np.sum(np.isnan(iscs_pw_T)) == - n_subjects * (n_subjects - 1) / 2) + assert (np.sum(np.isnan(isfcs_pw_T)) == + np.sum(np.isnan(isfcs_pw_T)) == + 11210) if __name__ == '__main__': @@ -736,4 +739,5 @@ def test_isfc_nans(): test_timeshift_isc() test_phaseshift_isc() test_isfc_options() + test_isfc_nans() logger.info("Finished all ISC tests") From 966c182bd731602c679f28a145069305609bbe13 Mon Sep 17 00:00:00 2001 From: Sam Nastase Date: Fri, 11 Jan 2019 17:00:33 -0500 Subject: [PATCH 06/21] Reshaped ISFC output to n_subjects x n_voxel_pairs --- brainiak/isc.py | 97 +++++++++++++++++---------- tests/isc/test_isc.py | 151 ++++++++++++++++++++++++++++++++---------- 2 files changed, 179 insertions(+), 69 deletions(-) diff --git a/brainiak/isc.py b/brainiak/isc.py index bf8eb343a..cf2cd2dce 100644 --- a/brainiak/isc.py +++ b/brainiak/isc.py @@ -104,13 +104,13 @@ def isc(data, pairwise=False, summary_statistic=None, tolerate_nans=True): data : list or ndarray (n_TRs x n_voxels x n_subjects) fMRI data for which to compute ISC - pairwise : bool, default:False + pairwise : bool, default: False Whether to use pairwise (True) or leave-one-out (False) approach - summary_statistic : None or str, default:None + summary_statistic : None or str, default: None Return all ISCs or collapse using 'mean' or 'median' - tolerate_nans : bool or float, default:True + tolerate_nans : bool or float, default: True Accommodate NaNs (when averaging in leave-one-out approach) Returns @@ -165,7 +165,8 @@ def isc(data, pairwise=False, summary_statistic=None, tolerate_nans=True): return iscs -def isfc(data, pairwise=False, summary_statistic=None, tolerate_nans=True): +def isfc(data, pairwise=False, summary_statistic=None, + vectorize_isfcs=True, tolerate_nans=True): """Intersubject functional correlation (ISFC) @@ -186,9 +187,26 @@ def isfc(data, pairwise=False, summary_statistic=None, tolerate_nans=True): the last dimension is assumed to correspond to subjects. If only two subjects are supplied, simply compute ISFC between these two subjects (precludes averaging in leave-one-out approach, and does not apply summary - statistic). Output is n_voxels by n_voxels array if summary_statistic is - supplied; otherwise output is n_voxels by n_voxels by n_subjects (or - n_pairs) array. + statistic). Returns vectorized upper triangle of ISFC matrices for each + subject or pair when vectorized_isfcs=True, or full (redundant) 2D ISFC + matrices when vectorized_isfcs=False. When using leave-one-out approach, + NaNs are ignored when computing mean time series of N-1 subjects (default: + tolerate_nans=True). Alternatively, you may supply a float between 0 and + 1 indicating a threshold proportion of N subjects with non-NaN values + required when computing the average time series for a given voxel. For + example, if tolerate_nans=.8, ISCs will be computed for any voxel where + >= 80% of subjects have non-NaN values, while voxels with < 80% non-NaN + values will be assigned NaNs. If set to False, NaNs are not tolerated + and voxels with one or more NaNs among the N-1 subjects will be assigned + NaN. Setting tolerate_nans to True or False will not affect the pairwise + approach; however, if a threshold float is provided, voxels that do not + reach this threshold will be excluded. Note that accommodating NaNs may + be notably slower than setting tolerate_nans to False. Output is either + n_subjects (or n_pairs) by n_voxels * (n_voxels - 1) / 2 voxel pairs + if vectorize_isfcs=True (see scipy.spatial.distance.squareform), or + n_subjects (or n_pairs) by n_voxels by n_voxels 3D matrix if + vectorize_isfcs=False. If summary_statistic is supplied, output is + 1 by n_voxel_pairs or 1 by n_voxels by n_voxels. The implementation is based on the work in [Simony2016]_. @@ -203,10 +221,16 @@ def isfc(data, pairwise=False, summary_statistic=None, tolerate_nans=True): summary_statistic : None or str, default: None Return all ISFCs or collapse using 'mean' or 'median' + vectorize_isfcs : bool, default: True + Return upper triangle ISFCs (True) or 2D ISFC matrix (False) + + tolerate_nans : bool or float, default: True + Accommodate NaNs (when averaging in leave-one-out approach) + Returns ------- - isfcs : subjects or pairs by voxels ndarray - ISFC for each subject or pair (or summary statistic) per voxel + isfcs : ndarray + ISFCs for each subject or pair (or summary statistic) per voxel pair """ @@ -243,7 +267,6 @@ def isfc(data, pairwise=False, summary_statistic=None, tolerate_nans=True): isfcs.append(isfc_pair) isfcs = np.dstack(isfcs) - # Compute ISFCs using leave-one-out approach elif not pairwise: @@ -265,13 +288,19 @@ def isfc(data, pairwise=False, summary_statistic=None, tolerate_nans=True): # Get ISCs back into correct shape after masking out NaNs isfcs_all = np.full((n_voxels, n_voxels, isfcs.shape[2]), np.nan) isfcs_all[np.ix_(np.where(mask)[0], np.where(mask)[0])] = isfcs - isfcs = isfcs_all + isfcs = np.moveaxis(isfcs_all, 2, 0) + + # Optionally squareform to vectorize ISFC matrices + if vectorize_isfcs: + isfcs = np.vstack([squareform(isfc, checks=False)[np.newaxis, :] + for isfc in isfcs]) + # Summarize results (if requested) if summary_statistic: isfcs = compute_summary_statistic(isfcs, summary_statistic=summary_statistic, - axis=2) + axis=0) return isfcs @@ -414,7 +443,7 @@ def compute_summary_statistic(iscs, summary_statistic='mean', axis=None): iscs : list or ndarray ISC values - summary_statistic : str, default:'mean' + summary_statistic : str, default: 'mean' Summary statistic, 'mean' or 'median' axis : None or int or tuple of ints, optional @@ -531,19 +560,19 @@ def bootstrap_isc(iscs, pairwise=False, summary_statistic='median', iscs : list or ndarray, ISCs by voxels array ISC values for one or more voxels - pairwise : bool, default:False + pairwise : bool, default: False Indicator of pairwise or leave-one-out, should match ISCs structure - summary_statistic : str, default:'median' + summary_statistic : str, default: 'median' Summary statistic, either 'median' (default) or 'mean' - n_bootstraps : int, default:1000 + n_bootstraps : int, default: 1000 Number of bootstrap samples (subject-level with replacement) - ci_percentile : int, default:95 + ci_percentile : int, default: 95 Percentile for computing confidence intervals - random_state = int or None, default:None + random_state = int or None, default: None Initial random seed Returns @@ -771,16 +800,16 @@ def permute_one_sample_iscs(iscs, group_parameters, i, pairwise=False, i : int Permutation iteration - pairwise : bool, default:False + pairwise : bool, default: False Indicator of pairwise or leave-one-out, should match ISCs variable - summary_statistic : str, default:'median' + summary_statistic : str, default: 'median' Summary statistic, either 'median' (default) or 'mean' exact_permutations : list List of permutations - prng = None or np.random.RandomState, default:None + prng = None or np.random.RandomState, default: None Initial random seed Returns @@ -839,16 +868,16 @@ def permute_two_sample_iscs(iscs, group_parameters, i, pairwise=False, i : int Permutation iteration - pairwise : bool, default:False + pairwise : bool, default: False Indicator of pairwise or leave-one-out, should match ISCs variable - summary_statistic : str, default:'median' + summary_statistic : str, default: 'median' Summary statistic, either 'median' (default) or 'mean' exact_permutations : list List of permutations - prng = None or np.random.RandomState, default:None + prng = None or np.random.RandomState, default: None Initial random seed Indicator of pairwise or leave-one-out, should match ISCs variable @@ -947,16 +976,16 @@ def permutation_isc(iscs, group_assignment=None, pairwise=False, # noqa: C901 group_assignment : list or ndarray, group labels Group labels matching order of ISC input - pairwise : bool, default:False + pairwise : bool, default: False Indicator of pairwise or leave-one-out, should match ISCs variable - summary_statistic : str, default:'median' + summary_statistic : str, default: 'median' Summary statistic, either 'median' (default) or 'mean' - n_permutations : int, default:1000 + n_permutations : int, default: 1000 Number of permutation iteration (randomizing group assignment) - random_state = int, None, or np.random.RandomState, default:None + random_state = int, None, or np.random.RandomState, default: None Initial random seed Returns @@ -1132,13 +1161,13 @@ def timeshift_isc(data, pairwise=False, summary_statistic='median', pairwise : bool, default: False Whether to use pairwise (True) or leave-one-out (False) approach - summary_statistic : str, default:'median' + summary_statistic : str, default: 'median' Summary statistic, either 'median' (default) or 'mean' - n_shifts : int, default:1000 + n_shifts : int, default: 1000 Number of randomly shifted samples - random_state = int, None, or np.random.RandomState, default:None + random_state = int, None, or np.random.RandomState, default: None Initial random seed Returns @@ -1266,13 +1295,13 @@ def phaseshift_isc(data, pairwise=False, summary_statistic='median', pairwise : bool, default: False Whether to use pairwise (True) or leave-one-out (False) approach - summary_statistic : str, default:'median' + summary_statistic : str, default: 'median' Summary statistic, either 'median' (default) or 'mean' - n_shifts : int, default:1000 + n_shifts : int, default: 1000 Number of randomly shifted samples - random_state = int, None, or np.random.RandomState, default:None + random_state = int, None, or np.random.RandomState, default: None Initial random seed Returns diff --git a/tests/isc/test_isc.py b/tests/isc/test_isc.py index ea5a7ebb5..7a2ed2a90 100644 --- a/tests/isc/test_isc.py +++ b/tests/isc/test_isc.py @@ -229,7 +229,7 @@ def test_isc_nans(): iscs_loo_F = isc(data, pairwise=False, tolerate_nans=False) assert np.allclose(iscs_loo_T, iscs_loo_F, equal_nan=True) assert (np.sum(np.isnan(iscs_loo_T)) == - np.sum(np.isnan(iscs_loo_T)) == + np.sum(np.isnan(iscs_loo_F)) == n_subjects) iscs_pw_T = isc(data, pairwise=True, tolerate_nans=True) @@ -237,7 +237,7 @@ def test_isc_nans(): assert np.allclose(iscs_pw_T, iscs_pw_F, equal_nan=True) assert (np.sum(np.isnan(iscs_pw_T)) == - np.sum(np.isnan(iscs_pw_T)) == + np.sum(np.isnan(iscs_pw_F)) == n_subjects * (n_subjects - 1) / 2) @@ -598,12 +598,30 @@ def test_isfc_options(): data = simulated_timeseries(n_subjects, n_TRs, n_voxels=n_voxels, data_type='array') isfcs = isfc(data, pairwise=False, summary_statistic=None) + assert isfcs.shape == (n_subjects, n_voxels * (n_voxels - 1) / 2) + + # Without vectorized upper triangle + isfcs = isfc(data, pairwise=False, summary_statistic=None, + vectorize_isfcs=False) + assert isfcs.shape == (n_subjects, n_voxels, n_voxels) # Just two subjects isfcs = isfc(data[..., :2], pairwise=False, summary_statistic=None) + assert isfcs.shape == (1, n_voxels * (n_voxels - 1) / 2) + + isfcs = isfc(data[..., :2], pairwise=False, summary_statistic=None, + vectorize_isfcs=False) + assert isfcs.shape == (1, n_voxels, n_voxels) # ISFC with pairwise approach isfcs = isfc(data, pairwise=True, summary_statistic=None) + assert isfcs.shape == (n_subjects * (n_subjects - 1) / 2, + n_voxels * (n_voxels - 1) / 2) + + isfcs = isfc(data, pairwise=True, summary_statistic=None, + vectorize_isfcs=False) + assert isfcs.shape == (n_subjects * (n_subjects - 1) / 2, + n_voxels, n_voxels) # ISFC with summary statistics isfcs = isfc(data, pairwise=True, summary_statistic='mean') @@ -612,25 +630,25 @@ def test_isfc_options(): # Check output p-values data = correlated_timeseries(20, 60, noise=.5, random_state=42) - isfcs = isfc(data, pairwise=False) - assert np.all(isfcs[0, 1, :] > .5) and np.all(isfcs[1, 0, :] > .5) - assert np.all(isfcs[:2, 2, :] < .5) and np.all(isfcs[2, :2, :] < .5) + isfcs = isfc(data, pairwise=False, vectorize_isfcs=False) + assert np.all(isfcs[:, 0, 1] > .5) and np.all(isfcs[:, 1, 0] > .5) + assert np.all(isfcs[:, :2, 2] < .5) and np.all(isfcs[:, 2, :2] < .5) - isfcs = isfc(data, pairwise=True) - assert np.all(isfcs[0, 1, :] > .5) and np.all(isfcs[1, 0, :] > .5) - assert np.all(isfcs[:2, 2, :] < .5) and np.all(isfcs[2, :2, :] < .5) + isfcs = isfc(data, pairwise=True, vectorize_isfcs=False) + assert np.all(isfcs[:, 0, 1] > .5) and np.all(isfcs[:, 1, 0] > .5) + assert np.all(isfcs[:, :2, 2] < .5) and np.all(isfcs[:, 2, :2] < .5) # Check that ISC and ISFC diagonal are identical iscs = isc(data, pairwise=False) - isfcs = isfc(data, pairwise=False) + isfcs = isfc(data, pairwise=False, vectorize_isfcs=False) for s in np.arange(len(iscs)): - assert np.allclose(isfcs[..., s].diagonal(), iscs[s, :], rtol=1e-03) + assert np.allclose(isfcs[s, ...].diagonal(), iscs[s, :], rtol=1e-03) - # Check that ISC and ISFC diagonal are identical + # Check that ISC and ISFC diagonal are identical (pairwise) iscs = isc(data, pairwise=True) - isfcs = isfc(data, pairwise=True) + isfcs = isfc(data, pairwise=True, vectorize_isfcs=False) for s in np.arange(len(iscs)): - assert np.allclose(isfcs[..., s].diagonal(), iscs[s, :], rtol=1e-03) + assert np.allclose(isfcs[s, ...].diagonal(), iscs[s, :], rtol=1e-03) logger.info("Finished testing ISFC options") @@ -654,38 +672,62 @@ def test_isfc_nans(): data[0, 0, 0] = np.nan # Don't tolerate NaNs, should lose zeroeth voxel - isfcs_loo = isfc(data, pairwise=False, tolerate_nans=False) - assert np.sum(np.isnan(isfcs_loo)) == (n_voxels * 2 - 1) * n_subjects + isfcs_loo = isfc(data, pairwise=False, vectorize_isfcs=False, + tolerate_nans=False) + assert np.sum(np.isnan(isfcs_loo)) == n_subjects * (n_voxels * 2 - 1) + + # Without vectorized ISFCs + isfcs_loo = isfc(data, pairwise=False, vectorize_isfcs=True, + tolerate_nans=False) + assert np.sum(np.isnan(isfcs_loo)) == n_subjects * (n_voxels - 1) # Tolerate all NaNs, only subject with NaNs yields NaN - isfcs_loo = isfc(data, pairwise=False, tolerate_nans=True) + isfcs_loo = isfc(data, pairwise=False, vectorize_isfcs=False, + tolerate_nans=True) assert np.sum(np.isnan(isfcs_loo)) == n_voxels * 2 - 1 + isfcs_loo = isfc(data, pairwise=False, vectorize_isfcs=True, + tolerate_nans=True) + assert np.sum(np.isnan(isfcs_loo)) == n_voxels - 1 + # Pairwise approach shouldn't care - isfcs_pw_T = isfc(data, pairwise=True, tolerate_nans=True) - isfcs_pw_F = isfc(data, pairwise=True, tolerate_nans=False) + isfcs_pw_T = isfc(data, pairwise=True, vectorize_isfcs=False, + tolerate_nans=True) + isfcs_pw_F = isfc(data, pairwise=True, vectorize_isfcs=False, + tolerate_nans=False) assert np.allclose(isfcs_pw_T, isfcs_pw_F, equal_nan=True) - assert (np.sum(np.isnan(isfcs_pw_T)) == np.sum(np.isnan(isfcs_pw_F)) == (n_voxels * 2 - 1) * (n_subjects - 1)) + isfcs_pw_T = isfc(data, pairwise=True, vectorize_isfcs=True, + tolerate_nans=True) + isfcs_pw_F = isfc(data, pairwise=True, vectorize_isfcs=True, + tolerate_nans=False) + assert np.allclose(isfcs_pw_T, isfcs_pw_F, equal_nan=True) + assert (np.sum(np.isnan(isfcs_pw_T)) == + np.sum(np.isnan(isfcs_pw_F)) == + (n_voxels - 1) * (n_subjects - 1)) + # Set proportion of nans to reject (70% and 90% non-NaN) data[0, 0, :] = np.nan data[0, 1, :n_subjects - int(n_subjects * .7)] = np.nan data[0, 2, :n_subjects - int(n_subjects * .9)] = np.nan - nans = np.sum(np.any(np.isnan(data), axis=0), axis=1) - - ### TRIMMING NANS AND NOT REPLACING THEM!!! reduced size - isfcs_loo_T = isfc(data, pairwise=False, tolerate_nans=True) - isfcs_loo_F = isfc(data, pairwise=False, tolerate_nans=False) - isfcs_loo_95 = isfc(data, pairwise=False, tolerate_nans=.95) - isfcs_loo_90 = isfc(data, pairwise=False, tolerate_nans=.90) - isfcs_loo_80 = isfc(data, pairwise=False, tolerate_nans=.8) - isfcs_loo_70 = isfc(data, pairwise=False, tolerate_nans=.7) - isfcs_loo_60 = isfc(data, pairwise=False, tolerate_nans=.6) - + isfcs_loo_T = isfc(data, pairwise=False, vectorize_isfcs=False, + tolerate_nans=True) + isfcs_loo_F = isfc(data, pairwise=False, vectorize_isfcs=False, + tolerate_nans=False) + isfcs_loo_95 = isfc(data, pairwise=False, vectorize_isfcs=False, + tolerate_nans=.95) + isfcs_loo_90 = isfc(data, pairwise=False, vectorize_isfcs=False, + tolerate_nans=.90) + isfcs_loo_80 = isfc(data, pairwise=False, vectorize_isfcs=False, + tolerate_nans=.8) + isfcs_loo_70 = isfc(data, pairwise=False, vectorize_isfcs=False, + tolerate_nans=.7) + isfcs_loo_60 = isfc(data, pairwise=False, vectorize_isfcs=False, + tolerate_nans=.6) assert (np.sum(np.isnan(isfcs_loo_F)) == np.sum(np.isnan(isfcs_loo_95)) == 3420) assert (np.sum(np.isnan(isfcs_loo_80)) == @@ -707,21 +749,60 @@ def test_isfc_nans(): np.sum(np.isnan(isfcs_loo_60), axis=0), np.sum(np.isnan(isfcs_loo_70), axis=0)))) + isfcs_loo_T = isfc(data, pairwise=False, vectorize_isfcs=True, + tolerate_nans=True) + isfcs_loo_F = isfc(data, pairwise=False, vectorize_isfcs=True, + tolerate_nans=False) + isfcs_loo_95 = isfc(data, pairwise=False, vectorize_isfcs=True, + tolerate_nans=.95) + isfcs_loo_90 = isfc(data, pairwise=False, vectorize_isfcs=True, + tolerate_nans=.90) + isfcs_loo_80 = isfc(data, pairwise=False, vectorize_isfcs=True, + tolerate_nans=.8) + isfcs_loo_70 = isfc(data, pairwise=False, vectorize_isfcs=True, + tolerate_nans=.7) + isfcs_loo_60 = isfc(data, pairwise=False, vectorize_isfcs=True, + tolerate_nans=.6) + assert (np.sum(np.isnan(isfcs_loo_F)) == + np.sum(np.isnan(isfcs_loo_95)) == 1680) + assert (np.sum(np.isnan(isfcs_loo_80)) == + np.sum(np.isnan(isfcs_loo_90)) == 1194) + assert (np.sum(np.isnan(isfcs_loo_T)) == + np.sum(np.isnan(isfcs_loo_60)) == + np.sum(np.isnan(isfcs_loo_70)) == 802) + assert np.array_equal(np.sum(np.isnan(isfcs_loo_F), axis=0), + np.sum(np.isnan(isfcs_loo_95), axis=0)) + assert np.array_equal(np.sum(np.isnan(isfcs_loo_80), axis=0), + np.sum(np.isnan(isfcs_loo_90), axis=0)) + assert np.all((np.array_equal( + np.sum(np.isnan(isfcs_loo_T), axis=0), + np.sum(np.isnan(isfcs_loo_60), axis=0)), + np.array_equal( + np.sum(np.isnan(isfcs_loo_T), axis=0), + np.sum(np.isnan(isfcs_loo_70), axis=0)), + np.array_equal( + np.sum(np.isnan(isfcs_loo_60), axis=0), + np.sum(np.isnan(isfcs_loo_70), axis=0)))) + data = simulated_timeseries(n_subjects, n_TRs, n_voxels=n_voxels, data_type='array', random_state=random_state) # Make sure voxel with NaNs across all subjects is always removed data[0, 0, :] = np.nan - isfcs_loo_T = isfc(data, pairwise=False, tolerate_nans=True) - isfcs_loo_F = isfc(data, pairwise=False, tolerate_nans=False) + isfcs_loo_T = isfc(data, pairwise=False, vectorize_isfcs=False, + tolerate_nans=True) + isfcs_loo_F = isfc(data, pairwise=False, vectorize_isfcs=False, + tolerate_nans=False) assert np.allclose(isfcs_loo_T, isfcs_loo_F, equal_nan=True) assert (np.sum(np.isnan(isfcs_loo_T)) == - np.sum(np.isnan(isfcs_loo_T)) == + np.sum(np.isnan(isfcs_loo_F)) == 1180) - isfcs_pw_T = isfc(data, pairwise=True, tolerate_nans=True) - isfcs_pw_F = isfc(data, pairwise=True, tolerate_nans=False) + isfcs_pw_T = isfc(data, pairwise=True, vectorize_isfcs=False, + tolerate_nans=True) + isfcs_pw_F = isfc(data, pairwise=True, vectorize_isfcs=False, + tolerate_nans=False) assert np.allclose(isfcs_pw_T, isfcs_pw_F, equal_nan=True) assert (np.sum(np.isnan(isfcs_pw_T)) == From 02d9729a60651233a6ed8054d0559926060f42c7 Mon Sep 17 00:00:00 2001 From: Sam Nastase Date: Fri, 11 Jan 2019 17:52:47 -0500 Subject: [PATCH 07/21] Fixed whitespace errors etc --- brainiak/isc.py | 13 ++++++------- tests/isc/test_isc.py | 6 +++--- 2 files changed, 9 insertions(+), 10 deletions(-) diff --git a/brainiak/isc.py b/brainiak/isc.py index cf2cd2dce..21f1f1472 100644 --- a/brainiak/isc.py +++ b/brainiak/isc.py @@ -147,7 +147,7 @@ def isc(data, pairwise=False, summary_statistic=None, tolerate_nans=True): iscs = np.array([pearsonr(subject, mean(np.delete(voxel_data, s, axis=0), - axis=0))[0] + axis=0))[0] for s, subject in enumerate(voxel_data)]) voxel_iscs.append(iscs) iscs_stack = np.column_stack(voxel_iscs) @@ -222,7 +222,7 @@ def isfc(data, pairwise=False, summary_statistic=None, Return all ISFCs or collapse using 'mean' or 'median' vectorize_isfcs : bool, default: True - Return upper triangle ISFCs (True) or 2D ISFC matrix (False) + Return upper triangle ISFCs (True) or 2D ISFC matrix (False) tolerate_nans : bool or float, default: True Accommodate NaNs (when averaging in leave-one-out approach) @@ -289,13 +289,12 @@ def isfc(data, pairwise=False, summary_statistic=None, isfcs_all = np.full((n_voxels, n_voxels, isfcs.shape[2]), np.nan) isfcs_all[np.ix_(np.where(mask)[0], np.where(mask)[0])] = isfcs isfcs = np.moveaxis(isfcs_all, 2, 0) - + # Optionally squareform to vectorize ISFC matrices if vectorize_isfcs: isfcs = np.vstack([squareform(isfc, checks=False)[np.newaxis, :] for isfc in isfcs]) - # Summarize results (if requested) if summary_statistic: isfcs = compute_summary_statistic(isfcs, @@ -509,16 +508,16 @@ def threshold_nans(data, tolerate_nans): if not 0.0 <= tolerate_nans <= 1.0: raise ValueError("If threshold to tolerate NaNs is a float, " "it must be between 0.0 and 1.0; got {0}".format( - tolerate_nans)) + tolerate_nans)) nans += ~(np.sum(~np.any(np.isnan(data), axis=0), axis=1) >= - data.shape[-1] * tolerate_nans) + data.shape[-1] * tolerate_nans) logger.info("ISC computation will tolerate voxels with at least " "{0} non-NaN values: {1} voxels do not meet " "threshold".format(tolerate_nans, np.sum(nans))) else: - logger.info("ISC computation will not tolerate NaNs when averaging") + logger.info("ISC computation will not tolerate NaNs when averaging") mask = ~nans data = data[:, mask, :] diff --git a/tests/isc/test_isc.py b/tests/isc/test_isc.py index 7a2ed2a90..f7625a41b 100644 --- a/tests/isc/test_isc.py +++ b/tests/isc/test_isc.py @@ -599,7 +599,7 @@ def test_isfc_options(): n_voxels=n_voxels, data_type='array') isfcs = isfc(data, pairwise=False, summary_statistic=None) assert isfcs.shape == (n_subjects, n_voxels * (n_voxels - 1) / 2) - + # Without vectorized upper triangle isfcs = isfc(data, pairwise=False, summary_statistic=None, vectorize_isfcs=False) @@ -608,7 +608,7 @@ def test_isfc_options(): # Just two subjects isfcs = isfc(data[..., :2], pairwise=False, summary_statistic=None) assert isfcs.shape == (1, n_voxels * (n_voxels - 1) / 2) - + isfcs = isfc(data[..., :2], pairwise=False, summary_statistic=None, vectorize_isfcs=False) assert isfcs.shape == (1, n_voxels, n_voxels) @@ -617,7 +617,7 @@ def test_isfc_options(): isfcs = isfc(data, pairwise=True, summary_statistic=None) assert isfcs.shape == (n_subjects * (n_subjects - 1) / 2, n_voxels * (n_voxels - 1) / 2) - + isfcs = isfc(data, pairwise=True, summary_statistic=None, vectorize_isfcs=False) assert isfcs.shape == (n_subjects * (n_subjects - 1) / 2, From fcaf5d51c9f1a4cf1ee8d5bdc73cf2a697eadaaf Mon Sep 17 00:00:00 2001 From: Sam Nastase Date: Sat, 12 Jan 2019 19:15:14 -0500 Subject: [PATCH 08/21] tolerate_nans in phaseshift_isc and timeshift_isc tests --- brainiak/isc.py | 56 ++++++++++++++++++++++++++++++++++++++++--------- 1 file changed, 46 insertions(+), 10 deletions(-) diff --git a/brainiak/isc.py b/brainiak/isc.py index 21f1f1472..5ab3e1963 100644 --- a/brainiak/isc.py +++ b/brainiak/isc.py @@ -1126,7 +1126,7 @@ def permutation_isc(iscs, group_assignment=None, pairwise=False, # noqa: C901 def timeshift_isc(data, pairwise=False, summary_statistic='median', - n_shifts=1000, random_state=None): + n_shifts=1000, tolerate_nans=True, random_state=None): """Circular time-shift randomization for one-sample ISC test @@ -1140,7 +1140,19 @@ def timeshift_isc(data, pairwise=False, summary_statistic='median', each item is a time-points by voxels ndarray for a given subject. Multiple input ndarrays must be the same shape. If a single ndarray is supplied, the last dimension is assumed to correspond to subjects. - Returns the observed ISC and p-values (two-tailed test), as well as + When using leave-one-out approach, NaNs are ignored when computing mean + time series of N-1 subjects (default: tolerate_nans=True). Alternatively, + you may supply a float between 0 and 1 indicating a threshold proportion + of N subjects with non-NaN values required when computing the average time + series for a given voxel. For example, if tolerate_nans=.8, ISCs will be + computed for any voxel where >= 80% of subjects have non-NaN values, + while voxels with < 80% non-NaN values will be assigned NaNs. If set to + False, NaNs are not tolerated and voxels with one or more NaNs among the + N-1 subjects will be assigned NaN. Setting tolerate_nans to True or False + will not affect the pairwise approach; however, if a threshold float is + provided, voxels that do not reach this threshold will be excluded. Note + that accommodating NaNs may be notably slower than setting tolerate_nans to + False. Returns the observed ISC and p-values (two-tailed test), as well as the null distribution of ISCs computed on randomly time-shifted data. The implementation is based on the work in [Kauppi2010]_ and @@ -1166,6 +1178,9 @@ def timeshift_isc(data, pairwise=False, summary_statistic='median', n_shifts : int, default: 1000 Number of randomly shifted samples + tolerate_nans : bool or float, default: True + Accommodate NaNs (when averaging in leave-one-out approach) + random_state = int, None, or np.random.RandomState, default: None Initial random seed @@ -1186,7 +1201,8 @@ def timeshift_isc(data, pairwise=False, summary_statistic='median', # Get actual observed ISC observed = isc(data, pairwise=pairwise, - summary_statistic=summary_statistic) + summary_statistic=summary_statistic, + tolerate_nans=tolerate_nans) # Roll axis to get subjects in first dimension for loop if pairwise: @@ -1219,7 +1235,8 @@ def timeshift_isc(data, pairwise=False, summary_statistic='median', # Compute null ISC on shifted data for pairwise approach shifted_isc = isc(shifted_data, pairwise=pairwise, - summary_statistic=summary_statistic) + summary_statistic=summary_statistic, + tolerate_nans=tolerate_nans) # In leave-one-out, apply shift only to each left-out participant elif not pairwise: @@ -1231,7 +1248,8 @@ def timeshift_isc(data, pairwise=False, summary_statistic='median', nonshifted_mean = np.mean(np.delete(data, s, 2), axis=2) loo_isc = isc(np.dstack((shifted_subject, nonshifted_mean)), pairwise=False, - summary_statistic=None) + summary_statistic=None, + tolerate_nans=tolerate_nans) shifted_isc.append(loo_isc) # Get summary statistics across left-out subjects @@ -1261,7 +1279,7 @@ def timeshift_isc(data, pairwise=False, summary_statistic='median', def phaseshift_isc(data, pairwise=False, summary_statistic='median', - n_shifts=1000, random_state=None): + n_shifts=1000, tolerate_nans=True, random_state=None): """Phase randomization for one-sample ISC test @@ -1275,7 +1293,19 @@ def phaseshift_isc(data, pairwise=False, summary_statistic='median', each item is a time-points by voxels ndarray for a given subject. Multiple input ndarrays must be the same shape. If a single ndarray is supplied, the last dimension is assumed to correspond to subjects. - Returns the observed ISC and p-values (two-tailed test), as well as + When using leave-one-out approach, NaNs are ignored when computing mean + time series of N-1 subjects (default: tolerate_nans=True). Alternatively, + you may supply a float between 0 and 1 indicating a threshold proportion + of N subjects with non-NaN values required when computing the average time + series for a given voxel. For example, if tolerate_nans=.8, ISCs will be + computed for any voxel where >= 80% of subjects have non-NaN values, + while voxels with < 80% non-NaN values will be assigned NaNs. If set to + False, NaNs are not tolerated and voxels with one or more NaNs among the + N-1 subjects will be assigned NaN. Setting tolerate_nans to True or False + will not affect the pairwise approach; however, if a threshold float is + provided, voxels that do not reach this threshold will be excluded. Note + that accommodating NaNs may be notably slower than setting tolerate_nans to + False. Returns the observed ISC and p-values (two-tailed test), as well as the null distribution of ISCs computed on phase-randomized data. The implementation is based on the work in [Lerner2011]_ and @@ -1300,6 +1330,9 @@ def phaseshift_isc(data, pairwise=False, summary_statistic='median', n_shifts : int, default: 1000 Number of randomly shifted samples + tolerate_nans : bool or float, default: True + Accommodate NaNs (when averaging in leave-one-out approach) + random_state = int, None, or np.random.RandomState, default: None Initial random seed @@ -1320,7 +1353,8 @@ def phaseshift_isc(data, pairwise=False, summary_statistic='median', # Get actual observed ISC observed = isc(data, pairwise=pairwise, - summary_statistic=summary_statistic) + summary_statistic=summary_statistic, + tolerate_nans=tolerate_nans) # Iterate through randomized shifts to create null distribution distribution = [] @@ -1359,7 +1393,8 @@ def phaseshift_isc(data, pairwise=False, summary_statistic='median', # Compute null ISC on shifted data for pairwise approach shifted_isc = isc(shifted_data, pairwise=True, - summary_statistic=summary_statistic) + summary_statistic=summary_statistic, + tolerate_nans=tolerate_nans) # In leave-one-out, apply shift only to each left-out participant elif not pairwise: @@ -1383,7 +1418,8 @@ def phaseshift_isc(data, pairwise=False, summary_statistic='median', # ISC of shifted left-out subject vs mean of N-1 subjects nonshifted_mean = np.mean(np.delete(data, s, 2), axis=2) loo_isc = isc(np.dstack((shifted_subject, nonshifted_mean)), - pairwise=False, summary_statistic=None) + pairwise=False, summary_statistic=None, + tolerate_nans=tolerate_nans) shifted_isc.append(loo_isc) # Get summary statistics across left-out subjects From 6b943f544dd559225515cd23948918e20321ed4e Mon Sep 17 00:00:00 2001 From: Sam Nastase Date: Mon, 14 Jan 2019 22:14:45 -0500 Subject: [PATCH 09/21] Added new function to squareform ISFCs and keep diagonal --- brainiak/isc.py | 124 +++++++++++++++++++++-------- tests/isc/test_isc.py | 181 ++++++++++++++++++++++++++++-------------- 2 files changed, 213 insertions(+), 92 deletions(-) diff --git a/brainiak/isc.py b/brainiak/isc.py index 960d2d069..50d55a225 100644 --- a/brainiak/isc.py +++ b/brainiak/isc.py @@ -65,6 +65,7 @@ "isc", "permutation_isc", "phaseshift_isc", + "squareform_isfc", "timeshift_isc", ] @@ -106,7 +107,8 @@ def isc(data, pairwise=False, summary_statistic=None, tolerate_nans=True): that accommodating NaNs may be notably slower than setting tolerate_nans to False. Output is an ndarray where the first dimension is the number of subjects or pairs and the second dimension is the number of voxels (or - ROIs). + ROIs). If only two subjects are supplied or a summary statistic is invoked, + the output is a ndarray n_voxels long. The implementation is based on the work in [Hasson2004]_. @@ -144,7 +146,7 @@ def isc(data, pairwise=False, summary_statistic=None, tolerate_nans=True): mean = np.nanmean else: mean = np.mean - data, mask = threshold_nans(data, tolerate_nans) + data, mask = _threshold_nans(data, tolerate_nans) # Loop over each voxel or ROI voxel_iscs = [] @@ -173,6 +175,10 @@ def isc(data, pairwise=False, summary_statistic=None, tolerate_nans=True): summary_statistic=summary_statistic, axis=0)[np.newaxis, :] + # Throw away first dimension if singleton + if iscs.shape[0] == 1: + iscs = iscs[0] + return iscs @@ -213,11 +219,11 @@ def isfc(data, pairwise=False, summary_statistic=None, approach; however, if a threshold float is provided, voxels that do not reach this threshold will be excluded. Note that accommodating NaNs may be notably slower than setting tolerate_nans to False. Output is either - n_subjects (or n_pairs) by n_voxels * (n_voxels - 1) / 2 voxel pairs - if vectorize_isfcs=True (see scipy.spatial.distance.squareform), or - n_subjects (or n_pairs) by n_voxels by n_voxels 3D matrix if - vectorize_isfcs=False. If summary_statistic is supplied, output is - 1 by n_voxel_pairs or 1 by n_voxels by n_voxels. + a tuple comprising condensed off-diagonal ISFC values and the diagonal + ISC values if vectorize_isfcs=True, or a single ndarray with shape + n_subjects (or n_pairs) by n_voxels by n_voxels 3D array if + vectorize_isfcs=False (see brainiak.isc.squareform_isfc). If + summary_statistic is supplied, output is collapsed along first dimension. The implementation is based on the work in [Simony2016]_. @@ -233,14 +239,15 @@ def isfc(data, pairwise=False, summary_statistic=None, Return all ISFCs or collapse using 'mean' or 'median' vectorize_isfcs : bool, default: True - Return upper triangle ISFCs (True) or 2D ISFC matrix (False) + Return tuple of condensed ISFCs and ISCs (True) or square (redundant) + ISFCs (False) tolerate_nans : bool or float, default: True Accommodate NaNs (when averaging in leave-one-out approach) Returns ------- - isfcs : ndarray + isfcs : ndarray or tuple of ndarrays ISFCs for each subject or pair (or summary statistic) per voxel pair """ @@ -253,7 +260,7 @@ def isfc(data, pairwise=False, summary_statistic=None, mean = np.nanmean else: mean = np.mean - data, mask = threshold_nans(data, tolerate_nans) + data, mask = _threshold_nans(data, tolerate_nans) # Handle just two subjects properly if n_subjects == 2: @@ -301,18 +308,22 @@ def isfc(data, pairwise=False, summary_statistic=None, isfcs_all[np.ix_(np.where(mask)[0], np.where(mask)[0])] = isfcs isfcs = np.moveaxis(isfcs_all, 2, 0) - # Optionally squareform to vectorize ISFC matrices - if vectorize_isfcs: - isfcs = np.vstack([squareform(isfc, checks=False)[np.newaxis, :] - for isfc in isfcs]) - # Summarize results (if requested) if summary_statistic: isfcs = compute_summary_statistic(isfcs, summary_statistic=summary_statistic, axis=0) - return isfcs + # Throw away first dimension if singleton + if isfcs.shape[0] == 1: + isfcs = isfcs[0] + + # Optionally squareform to vectorize ISFC matrices + if vectorize_isfcs: + isfcs, iscs = squareform_isfc(isfcs) + return isfcs, iscs + else: + return isfcs def _check_timeseries_input(data): @@ -479,7 +490,70 @@ def compute_summary_statistic(iscs, summary_statistic='mean', axis=None): return statistic -def threshold_nans(data, tolerate_nans): +def squareform_isfc(isfcs, iscs=None): + + """Converts square ISFCs to condensed ISFCs (and ISCs), and vice-versa + + If input is a 2- or 3-dimensional array of square ISFC matrices, converts + this to the condensed off-diagonal ISFC values (i.e., the vectorized + triangle) and the diagonal ISC values. In this case, input must be a + single array of shape either n_voxels x n_voxels or n_subjects (or + n_pairs) x n_voxels x n_voxels. The condensed ISFC values are vectorized + according to scipy.spatial.distance.squareform, yielding n_voxels * + (n_voxels - 1) / 2 values comprising every voxel pair. Alternatively, if + input is an array of condensed off-diagonal ISFC values and an array of + diagonal ISC values, the square (redundant) ISFC values are returned. + This function mimics scipy.spatial.distance.squareform, but is intended + to retain the diagonal ISC values. + + Parameters + ---------- + isfcs : ndarray + Either condensed or redundant ISFC values + + iscs: ndarray, optional + Diagonal ISC values, required when input is condensed + + Returns + ------- + isfcs : ndarray or tuple of ndarrays + If condensed ISFCs are passed, a single redundant ISFC array is + returned; if redundant ISFCs are passed, both a condensed off- + diagonal ISFC array and the diagonal ISC values are returned + """ + + # Check if incoming ISFCs are square (redundant) + if not type(iscs) == np.ndarray and isfcs.shape[-2] == isfcs.shape[-1]: + if isfcs.ndim == 2: + isfcs = isfcs[np.newaxis, ...] + if isfcs.ndim == 3: + iscs = np.diagonal(isfcs, axis1=1, axis2=2) + isfcs = np.vstack([squareform(isfc, checks=False)[np.newaxis, :] + for isfc in isfcs]) + else: + raise ValueError("Square (redundant) ISFCs must be square " + "with multiple subjects or pairs of subjects " + "indexed by the first dimension") + if isfcs.shape[0] == iscs.shape[0] == 1: + isfcs, iscs = isfcs[0], iscs[0] + return isfcs, iscs + + # Otherwise, convert from condensed to redundant + else: + if isfcs.ndim == iscs.ndim == 1: + isfcs, iscs = isfcs[np.newaxis, :], iscs[np.newaxis, :] + isfcs_stack = [] + for isfc, isc in zip(isfcs, iscs): + isfc_sq = squareform(isfc, checks=False) + np.fill_diagonal(isfc_sq, isc) + isfcs_stack.append(isfc_sq[np.newaxis, ...]) + isfcs = np.vstack(isfcs_stack) + if isfcs.shape[0] == 1: + isfcs = isfcs[0] + return isfcs + + +def _threshold_nans(data, tolerate_nans): """Thresholds data based on proportion of subjects with NaNs @@ -611,7 +685,7 @@ def bootstrap_isc(iscs, pairwise=False, summary_statistic='median', # Compute summary statistic for observed ISCs observed = compute_summary_statistic(iscs, summary_statistic=summary_statistic, - axis=0)[np.newaxis, :] + axis=0) # Set up an empty list to build our bootstrap distribution distribution = [] @@ -688,9 +762,6 @@ def bootstrap_isc(iscs, pairwise=False, summary_statistic='median', side='two-sided', exact=False, axis=0) - # Reshape p-values to fit with data shape - p = p[np.newaxis, :] - return observed, ci, p, distribution @@ -1074,7 +1145,7 @@ def permutation_isc(iscs, group_assignment=None, pairwise=False, # noqa: C901 group_parameters['group_labels'][1], :], summary_statistic=summary_statistic, axis=0)) - observed = np.array(observed)[np.newaxis, :] + observed = np.array(observed) # Set up an empty list to build our permutation distribution distribution = [] @@ -1130,9 +1201,6 @@ def permutation_isc(iscs, group_assignment=None, pairwise=False, # noqa: C901 side='two-sided', exact=False, axis=0) - # Reshape p-values to fit with data shape - p = p[np.newaxis, :] - return observed, p, distribution @@ -1283,9 +1351,6 @@ def timeshift_isc(data, pairwise=False, summary_statistic='median', side='two-sided', exact=False, axis=0) - # Reshape p-values to fit with data shape - p = p[np.newaxis, :] - return observed, p, distribution @@ -1451,7 +1516,4 @@ def phaseshift_isc(data, pairwise=False, summary_statistic='median', side='two-sided', exact=False, axis=0) - # Reshape p-values to fit with data shape - p = p[np.newaxis, :] - return observed, p, distribution diff --git a/tests/isc/test_isc.py b/tests/isc/test_isc.py index f7625a41b..7f37a3d07 100644 --- a/tests/isc/test_isc.py +++ b/tests/isc/test_isc.py @@ -1,7 +1,8 @@ import numpy as np import logging from brainiak.isc import (isc, isfc, bootstrap_isc, permutation_isc, - timeshift_isc, phaseshift_isc) + squareform_isfc, timeshift_isc, + phaseshift_isc) from scipy.spatial.distance import squareform logger = logging.getLogger(__name__) @@ -114,15 +115,19 @@ def test_isc_options(): iscs_loo = isc(data, pairwise=False, summary_statistic=None) assert iscs_loo.shape == (n_subjects, n_voxels) + # Just two subjects + iscs_loo = isc(data[..., :2], pairwise=False, summary_statistic=None) + assert iscs_loo.shape == (n_voxels,) + iscs_pw = isc(data, pairwise=True, summary_statistic=None) assert iscs_pw.shape == (n_subjects*(n_subjects-1)/2, n_voxels) # Check summary statistics isc_mean = isc(data, pairwise=False, summary_statistic='mean') - assert isc_mean.shape == (1, n_voxels) + assert isc_mean.shape == (n_voxels,) isc_median = isc(data, pairwise=False, summary_statistic='median') - assert isc_median.shape == (1, n_voxels) + assert isc_median.shape == (n_voxels,) try: isc(data, pairwise=False, summary_statistic='min') @@ -295,15 +300,15 @@ def test_bootstrap_isc(): observed, ci, p, distribution = bootstrap_isc(iscs, pairwise=False) assert np.all(iscs[:, :2] > .5) assert np.all(iscs[:, -1] < .5) - assert p[0, 0] < .05 and p[0, 1] < .05 - assert p[0, 2] > .01 + assert p[0] < .05 and p[1] < .05 + assert p[2] > .01 iscs = isc(data, pairwise=True) observed, ci, p, distribution = bootstrap_isc(iscs, pairwise=True) assert np.all(iscs[:, :2] > .5) assert np.all(iscs[:, -1] < .5) - assert p[0, 0] < .05 and p[0, 1] < .05 - assert p[0, 2] > .01 + assert p[0] < .05 and p[1] < .05 + assert p[2] > .01 # Check that ISC computation and bootstrap observed are same iscs = isc(data, pairwise=False) @@ -438,29 +443,31 @@ def test_permutation_isc(): observed, p, distribution = permutation_isc(iscs, pairwise=False) assert np.all(iscs[:, :2] > .5) assert np.all(iscs[:, -1] < .5) - assert p[0, 0] < .05 and p[0, 1] < .05 - assert p[0, 2] > .01 + assert p[0] < .05 and p[1] < .05 + assert p[2] > .01 iscs = isc(data, pairwise=True) observed, p, distribution = permutation_isc(iscs, pairwise=True) assert np.all(iscs[:, :2] > .5) assert np.all(iscs[:, -1] < .5) - assert p[0, 0] < .05 and p[0, 1] < .05 - assert p[0, 2] > .01 + assert p[0] < .05 and p[1] < .05 + assert p[2] > .01 # Check that ISC computation and permutation observed are same iscs = isc(data, pairwise=False) observed, p, distribution = permutation_isc(iscs, pairwise=False, summary_statistic='median') - assert np.array_equal(observed, isc(data, pairwise=False, - summary_statistic='median')) + assert np.allclose(observed, isc(data, pairwise=False, + summary_statistic='median'), + rtol=1e-03) # Check that ISC computation and permuation observed are same iscs = isc(data, pairwise=True) observed, p, distribution = permutation_isc(iscs, pairwise=True, summary_statistic='mean') - assert np.array_equal(observed, isc(data, pairwise=True, - summary_statistic='mean')) + assert np.allclose(observed, isc(data, pairwise=True, + summary_statistic='mean'), + rtol=1e-03) logger.info("Finished testing permutaton test") @@ -501,29 +508,31 @@ def test_timeshift_isc(): observed, p, distribution = timeshift_isc(data, pairwise=False) assert np.all(iscs[:, :2] > .5) assert np.all(iscs[:, -1] < .5) - assert p[0, 0] < .05 and p[0, 1] < .05 - assert p[0, 2] > .01 + assert p[0] < .05 and p[1] < .05 + assert p[2] > .01 iscs = isc(data, pairwise=True) observed, p, distribution = timeshift_isc(data, pairwise=True) assert np.all(iscs[:, :2] > .5) assert np.all(iscs[:, -1] < .5) - assert p[0, 0] < .05 and p[0, 1] < .05 - assert p[0, 2] > .01 + assert p[0] < .05 and p[1] < .05 + assert p[2] > .01 # Check that ISC computation and permutation observed are same iscs = isc(data, pairwise=False) observed, p, distribution = timeshift_isc(data, pairwise=False, summary_statistic='median') - assert np.array_equal(observed, isc(data, pairwise=False, - summary_statistic='median')) + assert np.allclose(observed, isc(data, pairwise=False, + summary_statistic='median'), + rtol=1e-03) # Check that ISC computation and permuation observed are same iscs = isc(data, pairwise=True) observed, p, distribution = timeshift_isc(data, pairwise=True, summary_statistic='mean') - assert np.array_equal(observed, isc(data, pairwise=True, - summary_statistic='mean')) + assert np.allclose(observed, isc(data, pairwise=True, + summary_statistic='mean'), + rtol=1e-03) logger.info("Finished testing circular time-shift") @@ -558,29 +567,31 @@ def test_phaseshift_isc(): observed, p, distribution = phaseshift_isc(data, pairwise=False) assert np.all(iscs[:, :2] > .5) assert np.all(iscs[:, -1] < .5) - assert p[0, 0] < .05 and p[0, 1] < .05 - assert p[0, 2] > .01 + assert p[0] < .05 and p[1] < .05 + assert p[2] > .01 iscs = isc(data, pairwise=True) observed, p, distribution = phaseshift_isc(data, pairwise=True) assert np.all(iscs[:, :2] > .5) assert np.all(iscs[:, -1] < .5) - assert p[0, 0] < .05 and p[0, 1] < .05 - assert p[0, 2] > .01 + assert p[0] < .05 and p[1] < .05 + assert p[2] > .01 # Check that ISC computation and permutation observed are same iscs = isc(data, pairwise=False) observed, p, distribution = phaseshift_isc(data, pairwise=False, summary_statistic='median') - assert np.array_equal(observed, isc(data, pairwise=False, - summary_statistic='median')) + assert np.allclose(observed, isc(data, pairwise=False, + summary_statistic='median'), + rtol=1e-03) # Check that ISC computation and permuation observed are same iscs = isc(data, pairwise=True) observed, p, distribution = phaseshift_isc(data, pairwise=True, summary_statistic='mean') - assert np.array_equal(observed, isc(data, pairwise=True, - summary_statistic='mean')) + assert np.allclose(observed, isc(data, pairwise=True, + summary_statistic='mean'), + rtol=1e-03) logger.info("Finished testing phase randomization") @@ -597,8 +608,9 @@ def test_isfc_options(): data = simulated_timeseries(n_subjects, n_TRs, n_voxels=n_voxels, data_type='array') - isfcs = isfc(data, pairwise=False, summary_statistic=None) + isfcs, iscs = isfc(data, pairwise=False, summary_statistic=None) assert isfcs.shape == (n_subjects, n_voxels * (n_voxels - 1) / 2) + assert iscs.shape == (n_subjects, n_voxels) # Without vectorized upper triangle isfcs = isfc(data, pairwise=False, summary_statistic=None, @@ -606,17 +618,20 @@ def test_isfc_options(): assert isfcs.shape == (n_subjects, n_voxels, n_voxels) # Just two subjects - isfcs = isfc(data[..., :2], pairwise=False, summary_statistic=None) - assert isfcs.shape == (1, n_voxels * (n_voxels - 1) / 2) + isfcs, iscs = isfc(data[..., :2], pairwise=False, summary_statistic=None) + assert isfcs.shape == (n_voxels * (n_voxels - 1) / 2,) + assert iscs.shape == (n_voxels,) isfcs = isfc(data[..., :2], pairwise=False, summary_statistic=None, vectorize_isfcs=False) - assert isfcs.shape == (1, n_voxels, n_voxels) + assert isfcs.shape == (n_voxels, n_voxels) # ISFC with pairwise approach - isfcs = isfc(data, pairwise=True, summary_statistic=None) + isfcs, iscs = isfc(data, pairwise=True, summary_statistic=None) assert isfcs.shape == (n_subjects * (n_subjects - 1) / 2, n_voxels * (n_voxels - 1) / 2) + assert iscs.shape == (n_subjects * (n_subjects - 1) / 2, + n_voxels) isfcs = isfc(data, pairwise=True, summary_statistic=None, vectorize_isfcs=False) @@ -624,8 +639,8 @@ def test_isfc_options(): n_voxels, n_voxels) # ISFC with summary statistics - isfcs = isfc(data, pairwise=True, summary_statistic='mean') - isfcs = isfc(data, pairwise=True, summary_statistic='median') + isfcs, iscs = isfc(data, pairwise=True, summary_statistic='mean') + isfcs, iscs = isfc(data, pairwise=True, summary_statistic='median') # Check output p-values data = correlated_timeseries(20, 60, noise=.5, @@ -643,12 +658,16 @@ def test_isfc_options(): isfcs = isfc(data, pairwise=False, vectorize_isfcs=False) for s in np.arange(len(iscs)): assert np.allclose(isfcs[s, ...].diagonal(), iscs[s, :], rtol=1e-03) + isfcs, iscs_v = isfc(data, pairwise=False) + assert np.allclose(iscs, iscs_v, rtol=1e-03) # Check that ISC and ISFC diagonal are identical (pairwise) iscs = isc(data, pairwise=True) isfcs = isfc(data, pairwise=True, vectorize_isfcs=False) for s in np.arange(len(iscs)): assert np.allclose(isfcs[s, ...].diagonal(), iscs[s, :], rtol=1e-03) + isfcs, iscs_v = isfc(data, pairwise=True) + assert np.allclose(iscs, iscs_v, rtol=1e-03) logger.info("Finished testing ISFC options") @@ -676,9 +695,9 @@ def test_isfc_nans(): tolerate_nans=False) assert np.sum(np.isnan(isfcs_loo)) == n_subjects * (n_voxels * 2 - 1) - # Without vectorized ISFCs - isfcs_loo = isfc(data, pairwise=False, vectorize_isfcs=True, - tolerate_nans=False) + # With vectorized ISFCs + isfcs_loo, iscs_loo = isfc(data, pairwise=False, vectorize_isfcs=True, + tolerate_nans=False) assert np.sum(np.isnan(isfcs_loo)) == n_subjects * (n_voxels - 1) # Tolerate all NaNs, only subject with NaNs yields NaN @@ -686,8 +705,8 @@ def test_isfc_nans(): tolerate_nans=True) assert np.sum(np.isnan(isfcs_loo)) == n_voxels * 2 - 1 - isfcs_loo = isfc(data, pairwise=False, vectorize_isfcs=True, - tolerate_nans=True) + isfcs_loo, iscs_loo = isfc(data, pairwise=False, vectorize_isfcs=True, + tolerate_nans=True) assert np.sum(np.isnan(isfcs_loo)) == n_voxels - 1 # Pairwise approach shouldn't care @@ -700,10 +719,10 @@ def test_isfc_nans(): np.sum(np.isnan(isfcs_pw_F)) == (n_voxels * 2 - 1) * (n_subjects - 1)) - isfcs_pw_T = isfc(data, pairwise=True, vectorize_isfcs=True, - tolerate_nans=True) - isfcs_pw_F = isfc(data, pairwise=True, vectorize_isfcs=True, - tolerate_nans=False) + isfcs_pw_T, iscs_pw_T = isfc(data, pairwise=True, vectorize_isfcs=True, + tolerate_nans=True) + isfcs_pw_F, iscs_pw_T = isfc(data, pairwise=True, vectorize_isfcs=True, + tolerate_nans=False) assert np.allclose(isfcs_pw_T, isfcs_pw_F, equal_nan=True) assert (np.sum(np.isnan(isfcs_pw_T)) == np.sum(np.isnan(isfcs_pw_F)) == @@ -749,20 +768,20 @@ def test_isfc_nans(): np.sum(np.isnan(isfcs_loo_60), axis=0), np.sum(np.isnan(isfcs_loo_70), axis=0)))) - isfcs_loo_T = isfc(data, pairwise=False, vectorize_isfcs=True, - tolerate_nans=True) - isfcs_loo_F = isfc(data, pairwise=False, vectorize_isfcs=True, - tolerate_nans=False) - isfcs_loo_95 = isfc(data, pairwise=False, vectorize_isfcs=True, - tolerate_nans=.95) - isfcs_loo_90 = isfc(data, pairwise=False, vectorize_isfcs=True, - tolerate_nans=.90) - isfcs_loo_80 = isfc(data, pairwise=False, vectorize_isfcs=True, - tolerate_nans=.8) - isfcs_loo_70 = isfc(data, pairwise=False, vectorize_isfcs=True, - tolerate_nans=.7) - isfcs_loo_60 = isfc(data, pairwise=False, vectorize_isfcs=True, - tolerate_nans=.6) + isfcs_loo_T, _ = isfc(data, pairwise=False, vectorize_isfcs=True, + tolerate_nans=True) + isfcs_loo_F, _ = isfc(data, pairwise=False, vectorize_isfcs=True, + tolerate_nans=False) + isfcs_loo_95, _ = isfc(data, pairwise=False, vectorize_isfcs=True, + tolerate_nans=.95) + isfcs_loo_90, _ = isfc(data, pairwise=False, vectorize_isfcs=True, + tolerate_nans=.90) + isfcs_loo_80, _ = isfc(data, pairwise=False, vectorize_isfcs=True, + tolerate_nans=.8) + isfcs_loo_70, _ = isfc(data, pairwise=False, vectorize_isfcs=True, + tolerate_nans=.7) + isfcs_loo_60, _ = isfc(data, pairwise=False, vectorize_isfcs=True, + tolerate_nans=.6) assert (np.sum(np.isnan(isfcs_loo_F)) == np.sum(np.isnan(isfcs_loo_95)) == 1680) assert (np.sum(np.isnan(isfcs_loo_80)) == @@ -810,6 +829,45 @@ def test_isfc_nans(): 11210) +def test_squareform_isfc(): + + # Set parameters for toy time series data + n_subjects = 20 + n_TRs = 60 + n_voxels = 30 + random_state = 42 + + logger.info("Testing ISC options") + + data = simulated_timeseries(n_subjects, n_TRs, + n_voxels=n_voxels, data_type='array', + random_state=random_state) + + # Generate square redundant ISFCs + isfcs_r = isfc(data, vectorize_isfcs=False) + assert isfcs_r.shape == (n_subjects, n_voxels, n_voxels) + + # Squareform these into condensed ISFCs and ISCs + isfcs_c, iscs_c = squareform_isfc(isfcs_r) + assert isfcs_c.shape == (n_subjects, n_voxels * (n_voxels - 1) / 2) + assert iscs_c.shape == (n_subjects, n_voxels) + + # Go back the other way and check it's the same + isfcs_new = squareform_isfc(isfcs_c, iscs_c) + assert np.array_equal(isfcs_r, isfcs_new) + + # Check against ISC function + assert np.allclose(isc(data), iscs_c, rtol=1e-03) + + # Check for two subjects + isfcs_r = isfc(data[..., :2], vectorize_isfcs=False) + assert isfcs_r.shape == (n_voxels, n_voxels) + isfcs_c, iscs_c = squareform_isfc(isfcs_r) + assert isfcs_c.shape == (n_voxels * (n_voxels - 1) / 2,) + assert iscs_c.shape == (n_voxels,) + assert np.array_equal(isfcs_r, squareform_isfc(isfcs_c, iscs_c)) + + if __name__ == '__main__': test_isc_input() test_isc_options() @@ -821,4 +879,5 @@ def test_isfc_nans(): test_phaseshift_isc() test_isfc_options() test_isfc_nans() + test_squareform_isfc() logger.info("Finished all ISC tests") From daa5d4190db01ebd7d4340774705b1bb6fb5eca6 Mon Sep 17 00:00:00 2001 From: Sam Nastase Date: Wed, 27 Feb 2019 19:01:22 -0500 Subject: [PATCH 10/21] Replaced old p_from_null with new version --- brainiak/isc.py | 32 +++++++++---------- brainiak/utils/utils.py | 70 ++--------------------------------------- 2 files changed, 19 insertions(+), 83 deletions(-) diff --git a/brainiak/isc.py b/brainiak/isc.py index 50d55a225..1299b9ad4 100644 --- a/brainiak/isc.py +++ b/brainiak/isc.py @@ -54,7 +54,7 @@ from scipy.fftpack import fft, ifft import itertools as it from brainiak.fcma.util import compute_correlation -from brainiak.utils.utils import compute_p_from_null_distribution +from brainiak.utils.utils import p_from_null logger = logging.getLogger(__name__) @@ -758,9 +758,9 @@ def bootstrap_isc(iscs, pairwise=False, summary_statistic='median', shifted = distribution - observed # Get p-value for actual median from shifted distribution - p = compute_p_from_null_distribution(observed, shifted, - side='two-sided', exact=False, - axis=0) + p = p_from_null(observed, shifted, + side='two-sided', exact=False, + axis=0) return observed, ci, p, distribution @@ -1193,13 +1193,13 @@ def permutation_isc(iscs, group_assignment=None, pairwise=False, # noqa: C901 # Get p-value for actual median from shifted distribution if exact_permutations: - p = compute_p_from_null_distribution(observed, distribution, - side='two-sided', exact=True, - axis=0) + p = p_from_null(observed, distribution, + side='two-sided', exact=True, + axis=0) else: - p = compute_p_from_null_distribution(observed, distribution, - side='two-sided', exact=False, - axis=0) + p = p_from_null(observed, distribution, + side='two-sided', exact=False, + axis=0) return observed, p, distribution @@ -1347,9 +1347,9 @@ def timeshift_isc(data, pairwise=False, summary_statistic='median', assert distribution.shape == (n_shifts, n_voxels) # Get p-value for actual median from shifted distribution - p = compute_p_from_null_distribution(observed, distribution, - side='two-sided', exact=False, - axis=0) + p = p_from_null(observed, distribution, + side='two-sided', exact=False, + axis=0) return observed, p, distribution @@ -1512,8 +1512,8 @@ def phaseshift_isc(data, pairwise=False, summary_statistic='median', assert distribution.shape == (n_shifts, n_voxels) # Get p-value for actual median from shifted distribution - p = compute_p_from_null_distribution(observed, distribution, - side='two-sided', exact=False, - axis=0) + p = p_from_null(observed, distribution, + side='two-sided', exact=False, + axis=0) return observed, p, distribution diff --git a/brainiak/utils/utils.py b/brainiak/utils/utils.py index c44ac5488..511fa4c64 100644 --- a/brainiak/utils/utils.py +++ b/brainiak/utils/utils.py @@ -772,73 +772,9 @@ def ecdf_fun(q): return ecdf_fun -def p_from_null(X, two_sided=False, - max_null_input=None, min_null_input=None): - """Compute p value of true result from null distribution - - Given an array containing both a real result and a set of null results, - computes the fraction of null results larger than the real result (or, - if two_sided=True, the fraction of null results more extreme than the real - result in either the positive or negative direction). - - Note that all real results are compared to a pooled null distribution, - which is the max/min over all null results, providing multiple - comparisons correction. - - Parameters - ---------- - X : ndarray with arbitrary number of dimensions - The last dimension of X should contain the real result in X[..., 0] - and the null results in X[..., 1:] - If max_null_input and min_null_input are provided, - X should contain only the real result - - two_sided : bool, default:False - Whether the p value should be one-sided (testing only for being - above the null) or two-sided (testing for both significantly positive - and significantly negative values) - - max_null_input : ndarray with num_perm (see `brainiak.isfc`) entries - By default this array is derived from the X input array, - which can be very large and takes up huge memory space. - To save memory, the function which calls p_from_null - should provide this array as input. - - min_null_input : ndarray with num_perm (see `brainiak.isfc`) entries - See max_null_input - - Returns - ------- - p : ndarray the same shape as X, without the last dimension - p values for each true X value under the null distribution - """ - if (min_null_input is None) or (max_null_input is None): - real_data = X[..., 0] - leading_dims = tuple([int(d) for d in np.arange(X.ndim - 1)]) - # Compute maximum/minimum in each null dataset - max_null = np.max(X[..., 1:], axis=leading_dims) - min_null = np.min(X[..., 1:], axis=leading_dims) - else: - real_data = X - # maximum & minimum in each null dataset should be provided as input - max_null = max_null_input - min_null = min_null_input - # Compute where the true values fall on the null distribution - max_null_ecdf = ecdf(max_null) - if two_sided: - min_null_ecdf = ecdf(min_null) - p = 2 * np.minimum(1 - max_null_ecdf(real_data), - min_null_ecdf(real_data)) - p = np.minimum(p, 1) - else: - p = 1 - max_null_ecdf(real_data) - - return p - - -def compute_p_from_null_distribution(observed, distribution, - side='two-sided', exact=False, - axis=None): +def p_from_null(observed, distribution, + side='two-sided', exact=False, + axis=None): """Compute p-value from null distribution From 69f2d47f2c6b359c8243925f5449efe6ddec698a Mon Sep 17 00:00:00 2001 From: Sam Nastase Date: Wed, 27 Feb 2019 20:16:28 -0500 Subject: [PATCH 11/21] Removed ISC test NIfTI data because we're simulating --- tests/isc/mask.nii.gz | Bin 87 -> 0 bytes tests/isc/subj1.nii.gz | Bin 124 -> 0 bytes tests/isc/subj2.nii.gz | Bin 124 -> 0 bytes 3 files changed, 0 insertions(+), 0 deletions(-) delete mode 100644 tests/isc/mask.nii.gz delete mode 100644 tests/isc/subj1.nii.gz delete mode 100644 tests/isc/subj2.nii.gz diff --git a/tests/isc/mask.nii.gz b/tests/isc/mask.nii.gz deleted file mode 100644 index 83b31ffbdea8d48fd5c9c745520306fde30fac26..0000000000000000000000000000000000000000 GIT binary patch literal 0 HcmV?d00001 literal 87 zcmV-d0I2^TiwFp;X{=ZR|7~G&Yc6hSX#k61WFQJKGq8XmBLf7YYGPp!01GtOGce%8 t8=N8XsJaJ^$ArgfkU!7?v0@AidD@0x%|r$-P%Uj#0sybv>&4&!004TWAtnF- diff --git a/tests/isc/subj1.nii.gz b/tests/isc/subj1.nii.gz deleted file mode 100644 index 9cf26af98981dfc65b707115007cfe881f6cd8a0..0000000000000000000000000000000000000000 GIT binary patch literal 0 HcmV?d00001 literal 124 zcmV-?0E7P@iwFqsSgcqA|8sR>YB4TuX=wn9VPqf*urNR%D+41KqH1Dc5C97_*fTKT z!yB9-@~FB8j>m+@YLGwB0kL8X40+myV9k)w8Yq&Lfe9L4gk3-jl`IU*MB7grl`KTt ePrOP}LJF2&2!}uMDhZo~Pyqm%9mAC10{{SDpDw)s diff --git a/tests/isc/subj2.nii.gz b/tests/isc/subj2.nii.gz deleted file mode 100644 index ec6bb9a44eb799b2f30af4babaeceb26f91dfa43..0000000000000000000000000000000000000000 GIT binary patch literal 0 HcmV?d00001 literal 124 zcmV-?0E7P@iwFqvSgcqA|8sR>YBDZvX=wn9VPqf*urNR%D+41KqH1Dc5C97_*fTKT z!yB9-@~FB8j>m+@YLGwB0kL8X40+myV9k)w8Yq&Lfe9L4gk3-jl`IU*MB7grl`KTt ePrOP}LJF2&2!}uMDhZo~Pyqm%9mAC10{{SFbuPgG From dba670a9686841a4232d9867ec440488a20b05eb Mon Sep 17 00:00:00 2001 From: Sam Nastase Date: Sun, 3 Mar 2019 17:44:23 -0500 Subject: [PATCH 12/21] Moved phase randomization in phaseshift_isc to utils --- brainiak/isc.py | 49 ++---------- brainiak/utils/utils.py | 104 +++++++++++++++++-------- tests/utils/test_utils.py | 159 +++++++++++++++++++++++++++----------- 3 files changed, 192 insertions(+), 120 deletions(-) diff --git a/brainiak/isc.py b/brainiak/isc.py index 1299b9ad4..efd83e625 100644 --- a/brainiak/isc.py +++ b/brainiak/isc.py @@ -51,10 +51,9 @@ import logging from scipy.spatial.distance import squareform from scipy.stats import pearsonr -from scipy.fftpack import fft, ifft import itertools as it from brainiak.fcma.util import compute_correlation -from brainiak.utils.utils import p_from_null +from brainiak.utils.utils import phase_randomize, p_from_null logger = logging.getLogger(__name__) @@ -747,7 +746,6 @@ def bootstrap_isc(iscs, pairwise=False, summary_statistic='median', # Convert distribution to numpy array distribution = np.array(distribution) - assert distribution.shape == (n_bootstraps, n_voxels) # Compute CIs of median from bootstrap distribution (default: 95%) ci = (np.percentile(distribution, (100 - ci_percentile)/2, axis=0), @@ -1189,7 +1187,6 @@ def permutation_isc(iscs, group_assignment=None, pairwise=False, # noqa: C901 # Convert distribution to numpy array distribution = np.array(distribution) - assert distribution.shape == (n_permutations, n_voxels) # Get p-value for actual median from shifted distribution if exact_permutations: @@ -1344,7 +1341,6 @@ def timeshift_isc(data, pairwise=False, summary_statistic='median', # Convert distribution to numpy array distribution = np.vstack(distribution) - assert distribution.shape == (n_shifts, n_voxels) # Get p-value for actual median from shifted distribution p = p_from_null(observed, distribution, @@ -1442,31 +1438,12 @@ def phaseshift_isc(data, pairwise=False, summary_statistic='median', else: prng = np.random.RandomState(random_state) - # Get randomized phase shifts - if n_TRs % 2 == 0: - # Why are we indexing from 1 not zero here? n_TRs / -1 long? - pos_freq = np.arange(1, data.shape[0] // 2) - neg_freq = np.arange(data.shape[0] - 1, data.shape[0] // 2, -1) - else: - pos_freq = np.arange(1, (data.shape[0] - 1) // 2 + 1) - neg_freq = np.arange(data.shape[0] - 1, - (data.shape[0] - 1) // 2, -1) - - phase_shifts = prng.rand(len(pos_freq), 1, n_subjects) * 2 * np.math.pi + # Get shifted version of data + shifted_data = phase_randomize(data, random_state=prng) # In pairwise approach, apply all shifts then compute pairwise ISCs if pairwise: - # Fast Fourier transform along time dimension of data - fft_data = fft(data, axis=0) - - # Shift pos and neg frequencies symmetrically, to keep signal real - fft_data[pos_freq, :, :] *= np.exp(1j * phase_shifts) - fft_data[neg_freq, :, :] *= np.exp(-1j * phase_shifts) - - # Inverse FFT to put data back in time domain for ISC - shifted_data = np.real(ifft(fft_data, axis=0)) - # Compute null ISC on shifted data for pairwise approach shifted_isc = isc(shifted_data, pairwise=True, summary_statistic=summary_statistic, @@ -1475,24 +1452,15 @@ def phaseshift_isc(data, pairwise=False, summary_statistic='median', # In leave-one-out, apply shift only to each left-out participant elif not pairwise: - # Roll subject axis in phaseshifts for loop - phase_shifts = np.rollaxis(phase_shifts, 2, 0) + # Roll subject axis of phase-randomized data + shifted_data = np.rollaxis(shifted_data, 2, 0) shifted_isc = [] - for s, shift in enumerate(phase_shifts): - - # Apply FFT to left-out subject - fft_subject = fft(data[:, :, s], axis=0) - - # Shift pos and neg frequencies symmetrically, keep signal real - fft_subject[pos_freq, :] *= np.exp(1j * shift) - fft_subject[neg_freq, :] *= np.exp(-1j * shift) - - # Inverse FFT to put data back in time domain for ISC - shifted_subject = np.real(ifft(fft_subject, axis=0)) + for s, shifted_subject in enumerate(shifted_data): # ISC of shifted left-out subject vs mean of N-1 subjects - nonshifted_mean = np.mean(np.delete(data, s, 2), axis=2) + nonshifted_mean = np.mean(np.delete(data, s, axis=2), + axis=2) loo_isc = isc(np.dstack((shifted_subject, nonshifted_mean)), pairwise=False, summary_statistic=None, tolerate_nans=tolerate_nans) @@ -1509,7 +1477,6 @@ def phaseshift_isc(data, pairwise=False, summary_statistic='median', # Convert distribution to numpy array distribution = np.vstack(distribution) - assert distribution.shape == (n_shifts, n_voxels) # Get p-value for actual median from shifted distribution p = p_from_null(observed, distribution, diff --git a/brainiak/utils/utils.py b/brainiak/utils/utils.py index 511fa4c64..78e63db47 100644 --- a/brainiak/utils/utils.py +++ b/brainiak/utils/utils.py @@ -17,9 +17,8 @@ import os.path import psutil from .fmrisim import generate_stimfunction, _double_gamma_hrf, convolve_hrf -from sklearn.utils import check_random_state +import brainiak.isc as isc from scipy.fftpack import fft, ifft -import math import logging logger = logging.getLogger(__name__) @@ -31,7 +30,6 @@ __all__ = [ "center_mass_exp", - "compute_p_from_null_distribution", "concatenate_not_none", "cov2corr", "ecdf", @@ -697,25 +695,42 @@ def usable_cpu_count(): return result -def phase_randomize(D, random_state=0): - """Randomly shift signal phases +def phase_randomize(data, voxelwise=False, random_state=None): + """Randomize phase of time series across subjects - For each timecourse (from each voxel and each subject), computes its DFT - and then randomly shifts the phase of each frequency before inverting - back into the time domain. This yields timecourses with the same power - spectrum (and thus the same autocorrelation) as the original timecourses, - but will remove any meaningful temporal relationships between the - timecourses. + For each subject, apply Fourier transform to voxel time series + and then randomly shift the phase of each frequency before inverting + back into the time domain. This yields time series with the same power + spectrum (and thus the same autocorrelation) as the original time series + but will remove any meaningful temporal relationships among time series + across subjects. By default (voxelwise=False), the same phase shift is + applied across all voxels; however if voxelwise=True, different random + phase shifts are applied to each voxel. The typical input is a time by + voxels by subjects ndarray. The first dimension is assumed to be the + time dimension and will be phase randomized. If a 2-dimensional ndarray + is provided, the last dimension is assumed to be subjects, and different + phase randomizations will be applied to each subject. - This procedure is described in: - Simony E, Honey CJ, Chen J, Lositsky O, Yeshurun Y, Wiesel A, Hasson U - (2016) Dynamic reconfiguration of the default mode network during narrative - comprehension. Nat Commun 7. + The implementation is based on the work in [Lerner2011]_ and + [Simony2016]_. + + .. [Lerner2011] "Topographic mapping of a hierarchy of temporal + receptive windows using a narrated story.", Y. Lerner, C. J. Honey, + L. J. Silbert, U. Hasson, 2011, Journal of Neuroscience, 31, 2906-2915. + https://doi.org/10.1523/jneurosci.3684-10.2011 + + .. [Simony2016] "Dynamic reconfiguration of the default mode network + during narrative comprehension.", E. Simony, C. J. Honey, J. Chen, O. + Lositsky, Y. Yeshurun, A. Wiesel, U. Hasson, 2016, Nature + Communications, 7, 12141. https://doi.org/10.1038/ncomms12141 Parameters ---------- - D : voxel by time by subject ndarray - fMRI data to be phase randomized + data : ndarray (n_TRs x n_voxels x n_subjects) + Data to be phase randomized (per subject) + + voxelwise : bool, default: False + Apply same (False) or different (True) randomizations across voxels random_state : RandomState or an int seed (0 by default) A random number generator instance to define the state of the @@ -723,28 +738,54 @@ def phase_randomize(D, random_state=0): Returns ---------- - ndarray of same shape as D - phase randomized timecourses + shifted_data : ndarray (n_TRs x n_voxels x n_subjects) + Phase-randomized time series """ - random_state = check_random_state(random_state) + # Check if input is 2-dimensional + data_ndim = data.ndim - F = fft(D, axis=1) - if D.shape[1] % 2 == 0: - pos_freq = np.arange(1, D.shape[1] // 2) - neg_freq = np.arange(D.shape[1] - 1, D.shape[1] // 2, -1) + # Get basic shape of data + data, n_TRs, n_voxels, n_subjects = isc._check_timeseries_input(data) + + # Random seed to be deterministically re-randomized at each iteration + if isinstance(random_state, np.random.RandomState): + prng = random_state else: - pos_freq = np.arange(1, (D.shape[1] - 1) // 2 + 1) - neg_freq = np.arange(D.shape[1] - 1, (D.shape[1] - 1) // 2, -1) + prng = np.random.RandomState(random_state) - shift = random_state.rand(D.shape[0], len(pos_freq), - D.shape[2]) * 2 * math.pi + # Get randomized phase shifts + if n_TRs % 2 == 0: + # Why are we indexing from 1 not zero here? n_TRs / -1 long? + pos_freq = np.arange(1, data.shape[0] // 2) + neg_freq = np.arange(data.shape[0] - 1, data.shape[0] // 2, -1) + else: + pos_freq = np.arange(1, (data.shape[0] - 1) // 2 + 1) + neg_freq = np.arange(data.shape[0] - 1, + (data.shape[0] - 1) // 2, -1) + + if not voxelwise: + phase_shifts = (prng.rand(len(pos_freq), 1, n_subjects) + * 2 * np.math.pi) + else: + phase_shifts = (prng.rand(len(pos_freq), n_voxels, n_subjects) + * 2 * np.math.pi) + + # Fast Fourier transform along time dimension of data + fft_data = fft(data, axis=0) # Shift pos and neg frequencies symmetrically, to keep signal real - F[:, pos_freq, :] *= np.exp(1j * shift) - F[:, neg_freq, :] *= np.exp(-1j * shift) + fft_data[pos_freq, :, :] *= np.exp(1j * phase_shifts) + fft_data[neg_freq, :, :] *= np.exp(-1j * phase_shifts) + + # Inverse FFT to put data back in time domain + shifted_data = np.real(ifft(fft_data, axis=0)) - return np.real(ifft(F, axis=1)) + # Go back to 2-dimensions if input was 2-dimensional + if data_ndim == 2: + shifted_data = shifted_data[:, 0, :] + + return shifted_data def ecdf(x): @@ -775,7 +816,6 @@ def ecdf_fun(q): def p_from_null(observed, distribution, side='two-sided', exact=False, axis=None): - """Compute p-value from null distribution Returns the p-value for an observed test statistic given a null diff --git a/tests/utils/test_utils.py b/tests/utils/test_utils.py index 1348790db..ed801bfaf 100644 --- a/tests/utils/test_utils.py +++ b/tests/utils/test_utils.py @@ -181,35 +181,6 @@ def test_center_mass_exp(): 'close to its mid-point' -def test_phase_randomize(): - from brainiak.utils.utils import phase_randomize - import numpy as np - from scipy.fftpack import fft - import math - from scipy.stats import pearsonr - - # Generate auto-correlated signals - nv = 2 - T = 100 - ns = 3 - D = np.zeros((nv, T, ns)) - for v in range(nv): - for s in range(ns): - D[v, :, s] = np.sin(np.linspace(0, math.pi * 5 * (v + 1), T)) + \ - np.sin(np.linspace(0, math.pi * 6 * (s + 1), T)) - - freq = fft(D, axis=1) - D_pr = phase_randomize(D) - freq_pr = fft(D_pr, axis=1) - p_corr = pearsonr(np.angle(freq).flatten(), np.angle(freq_pr).flatten())[0] - - assert np.isclose(abs(freq), abs(freq_pr)).all(), \ - "Amplitude spectrum not preserved under phase randomization" - - assert abs(p_corr) < 0.03, \ - "Phases still correlated after randomization" - - def test_ecdf(): from brainiak.utils.utils import ecdf import numpy as np @@ -224,26 +195,120 @@ def test_ecdf(): def test_p_from_null(): - from brainiak.utils.utils import p_from_null import numpy as np + from brainiak.utils.utils import p_from_null - X = np.zeros((2, 5)) # One true value, 4 null values - X[0, 0] = 1 - X[1, 0] = -2 - X[0, 1:] = [-1.0, 0.00, 0.50, 2.00] - X[1, 1:] = [-1.5, 0.25, -0.25, 0.25] - - Y = X[:, 0] - Y_max = np.max(X[:, 1:], axis=0) - Y_min = np.min(X[:, 1:], axis=0) + # Create random null and observed value in tail + null = np.random.randn(10000) + observed = np.ceil(np.percentile(null, 97.5) * 1000) / 1000 + + # Check two-tailed p-value for observed + p_ts = p_from_null(observed, null) + assert np.isclose(p_ts, 0.05, atol=1e-02) + + # Check two-tailed p-value for observed + p_right = p_from_null(observed, null, side='right') + assert np.isclose(p_right, 0.025, atol=1e-02) + assert np.isclose(p_right, p_ts / 2, atol=1e-02) + + # Check two-tailed p-value for observed + p_left = p_from_null(observed, null, side='left') + assert np.isclose(p_left, 0.975, atol=1e-02) + assert np.isclose(1 - p_left, p_right, atol=1e-02) + assert np.isclose(1 - p_left, p_ts / 2, atol=1e-02) + + # Check 2-dimensional input (i.e., samples by voxels) + null = np.random.randn(10000, 3) + observed = np.ceil(np.percentile(null, 97.5, axis=0) * 1000) / 1000 + + # Check two-tailed p-value for observed + p_ts = p_from_null(observed, null, axis=0) + assert np.allclose(p_ts, 0.05, atol=1e-02) + + # Check two-tailed p-value for observed + p_right = p_from_null(observed, null, side='right', axis=0) + assert np.allclose(p_right, 0.025, atol=1e-02) + assert np.allclose(p_right, p_ts / 2, atol=1e-02) + + # Check two-tailed p-value for observed + p_left = p_from_null(observed, null, side='left', axis=0) + assert np.allclose(p_left, 0.975, atol=1e-02) + assert np.allclose(1 - p_left, p_right, atol=1e-02) + assert np.allclose(1 - p_left, p_ts / 2, atol=1e-02) + + # Check for exact test + p_ts = p_from_null(observed, null, exact=True, axis=0) + assert np.allclose(p_ts, 0.05, atol=1e-02) + + # Check two-tailed p-value for exact + p_right = p_from_null(observed, null, side='right', + exact=True, axis=0) + assert np.allclose(p_right, 0.025, atol=1e-02) + assert np.allclose(p_right, p_ts / 2, atol=1e-02) + + # Check two-tailed p-value for exact + p_left = p_from_null(observed, null, side='left', + exact=True, axis=0) + assert np.allclose(p_left, 0.975, atol=1e-02) + assert np.allclose(1 - p_left, p_right, atol=1e-02) + assert np.allclose(1 - p_left, p_ts / 2, atol=1e-02) - p_1side = p_from_null(X, two_sided=False) - assert np.isclose(p_1side, [0.25, 1]).all(), "One-sided p value incorrect" - p_2side = p_from_null(X, two_sided=True) - assert np.isclose(p_2side, [0.5, 0]).all(), "Two-sided p value incorrect" +def test_phase_randomize(): + import numpy as np + from scipy.fftpack import fft + from scipy.stats import pearsonr + from brainiak.utils.utils import phase_randomize - p_2side_m = p_from_null(Y, two_sided=True, - max_null_input=Y_max, - min_null_input=Y_min) - assert np.isclose(p_2side, p_2side_m).all(), "p_null differs with max/min" + data = np.repeat(np.repeat(np.random.randn(60)[:, np.newaxis, np.newaxis], + 30, axis=1), + 20, axis=2) + assert np.array_equal(data[..., 0], data[..., 1]) + + # Phase-randomize data across subjects (same across voxels) + shifted_data = phase_randomize(data, voxelwise=False, random_state=1) + assert shifted_data.shape == data.shape + assert not np.array_equal(shifted_data[..., 0], shifted_data[..., 1]) + assert not np.array_equal(shifted_data[..., 0], data[..., 0]) + + # Check that random_state returns same shifts + shifted_data_ = phase_randomize(data, voxelwise=False, random_state=1) + assert np.array_equal(shifted_data, shifted_data_) + + shifted_data_ = phase_randomize(data, voxelwise=False, random_state=2) + assert not np.array_equal(shifted_data, shifted_data_) + + # Phase-randomize subjects and voxels + shifted_data = phase_randomize(data, voxelwise=True, random_state=1) + assert shifted_data.shape == data.shape + assert not np.array_equal(shifted_data[..., 0], shifted_data[..., 1]) + assert not np.array_equal(shifted_data[..., 0], data[..., 0]) + assert not np.array_equal(shifted_data[:, 0, 0], shifted_data[:, 1, 0]) + + # Try with 2-dimensional input + shifted_data = phase_randomize(data[..., 0], + voxelwise=True, + random_state=1) + assert shifted_data.ndim == 2 + assert not np.array_equal(shifted_data[:, 0], shifted_data[:, 1]) + + # Create correlated noisy data + corr_data = np.repeat(np.random.randn(60)[:, np.newaxis, np.newaxis], + 2, axis=2) + np.random.randn(60, 1, 2) + + # Get correlation and frequency domain for data + corr_r = pearsonr(corr_data[:, 0, 0], + corr_data[:, 0, 1])[0] + corr_freq = fft(corr_data, axis=0) + + # Phase-randomize time series and get correlation/frequency + shifted_data = phase_randomize(corr_data) + shifted_r = pearsonr(shifted_data[:, 0, 0], + shifted_data[:, 0, 1])[0] + shifted_freq = fft(shifted_data, axis=0) + + # Check that phase-randomization reduces correlation + assert np.abs(shifted_r) < np.abs(corr_r) + + # Check that amplitude spectrum is preserved + assert np.allclose(np.abs(shifted_freq), np.abs(corr_freq)) From aa0c37d11bc5b3602caee33dde6eb45755177c83 Mon Sep 17 00:00:00 2001 From: Sam Nastase Date: Sun, 3 Mar 2019 17:46:40 -0500 Subject: [PATCH 13/21] Removed vestigial ecdf from utils and tests --- brainiak/utils/utils.py | 26 -------------------------- tests/utils/test_utils.py | 13 ------------- 2 files changed, 39 deletions(-) diff --git a/brainiak/utils/utils.py b/brainiak/utils/utils.py index 78e63db47..b285e2cca 100644 --- a/brainiak/utils/utils.py +++ b/brainiak/utils/utils.py @@ -32,7 +32,6 @@ "center_mass_exp", "concatenate_not_none", "cov2corr", - "ecdf", "from_tri_2_sym", "from_sym_2_tri", "gen_design", @@ -788,31 +787,6 @@ def phase_randomize(data, voxelwise=False, random_state=None): return shifted_data -def ecdf(x): - """Empirical cumulative distribution function - - Given a 1D array of values, returns a function f(q) that outputs the - fraction of values less than or equal to q. - - Parameters - ---------- - x : 1D array - values for which to compute CDF - - Returns - ---------- - ecdf_fun: Callable[[float], float] - function that returns the value of the CDF at a given point - """ - xp = np.sort(x) - yp = np.arange(len(xp) + 1) / len(xp) - - def ecdf_fun(q): - return yp[np.searchsorted(xp, q, side="right")] - - return ecdf_fun - - def p_from_null(observed, distribution, side='two-sided', exact=False, axis=None): diff --git a/tests/utils/test_utils.py b/tests/utils/test_utils.py index ed801bfaf..77f608d1b 100644 --- a/tests/utils/test_utils.py +++ b/tests/utils/test_utils.py @@ -181,19 +181,6 @@ def test_center_mass_exp(): 'close to its mid-point' -def test_ecdf(): - from brainiak.utils.utils import ecdf - import numpy as np - - x = np.array([1, 4, 3]) - cdf_fun = ecdf(x) - - assert np.isclose(cdf_fun(0), [0]), "Left side of cdf should be 0" - assert np.isclose(cdf_fun(5), [1]), "Right side of cdf should be 1" - assert np.isclose(cdf_fun(1.5), [1 / 3]), "CDF value incorrect" - assert np.isclose(cdf_fun(1), [1 / 3]), "CDF should be right-continuous" - - def test_p_from_null(): import numpy as np from brainiak.utils.utils import p_from_null From 7210616d1af45611912a9b7691068588cf1ccdf1 Mon Sep 17 00:00:00 2001 From: Sam Nastase Date: Mon, 4 Mar 2019 15:23:13 -0500 Subject: [PATCH 14/21] Moved _check_timeseries_input to utils to fix dependencies --- brainiak/isc.py | 66 +-------------------------------- brainiak/utils/utils.py | 66 ++++++++++++++++++++++++++++++++- tests/utils/test_utils.py | 78 +++++++++++++++++++++++++++++++++++++++ 3 files changed, 144 insertions(+), 66 deletions(-) diff --git a/brainiak/isc.py b/brainiak/isc.py index efd83e625..f14aafd3c 100644 --- a/brainiak/isc.py +++ b/brainiak/isc.py @@ -53,7 +53,8 @@ from scipy.stats import pearsonr import itertools as it from brainiak.fcma.util import compute_correlation -from brainiak.utils.utils import phase_randomize, p_from_null +from brainiak.utils.utils import (phase_randomize, p_from_null, + _check_timeseries_input) logger = logging.getLogger(__name__) @@ -325,69 +326,6 @@ def isfc(data, pairwise=False, summary_statistic=None, return isfcs -def _check_timeseries_input(data): - - """Checks response time series input data for ISC analysis - - Input data should be a n_TRs by n_voxels by n_subjects ndarray - (e.g., brainiak.image.MaskedMultiSubjectData) or a list where each - item is a n_TRs by n_voxels ndarray for a given subject. Multiple - input ndarrays must be the same shape. If a 2D array is supplied, - the last dimension is assumed to correspond to subjects. This - function is only intended to be used internally by other - functions in this module (e.g., isc, isfc). - - Parameters - ---------- - data : ndarray or list - Time series data - - Returns - ------- - iscs : ndarray - Array of ISC values - - n_TRs : int - Number of time points (TRs) - - n_voxels : int - Number of voxels (or ROIs) - - n_subjects : int - Number of subjects - - """ - - # Convert list input to 3d and check shapes - if type(data) == list: - data_shape = data[0].shape - for i, d in enumerate(data): - if d.shape != data_shape: - raise ValueError("All ndarrays in input list " - "must be the same shape!") - if d.ndim == 1: - data[i] = d[:, np.newaxis] - data = np.dstack(data) - - # Convert input ndarray to 3d and check shape - elif isinstance(data, np.ndarray): - if data.ndim == 2: - data = data[:, np.newaxis, :] - elif data.ndim == 3: - pass - else: - raise ValueError("Input ndarray should have 2 " - "or 3 dimensions (got {0})!".format(data.ndim)) - - # Infer subjects, TRs, voxels and log for user to check - n_TRs, n_voxels, n_subjects = data.shape - logger.info("Assuming {0} subjects with {1} time points " - "and {2} voxel(s) or ROI(s) for ISC analysis.".format( - n_subjects, n_TRs, n_voxels)) - - return data, n_TRs, n_voxels, n_subjects - - def _check_isc_input(iscs, pairwise=False): """Checks ISC inputs for statistical tests diff --git a/brainiak/utils/utils.py b/brainiak/utils/utils.py index b285e2cca..aa38da943 100644 --- a/brainiak/utils/utils.py +++ b/brainiak/utils/utils.py @@ -17,7 +17,6 @@ import os.path import psutil from .fmrisim import generate_stimfunction, _double_gamma_hrf, convolve_hrf -import brainiak.isc as isc from scipy.fftpack import fft, ifft import logging @@ -745,7 +744,7 @@ def phase_randomize(data, voxelwise=False, random_state=None): data_ndim = data.ndim # Get basic shape of data - data, n_TRs, n_voxels, n_subjects = isc._check_timeseries_input(data) + data, n_TRs, n_voxels, n_subjects = _check_timeseries_input(data) # Random seed to be deterministically re-randomized at each iteration if isinstance(random_state, np.random.RandomState): @@ -857,3 +856,66 @@ def p_from_null(observed, distribution, p = (numerator + 1) / (n_samples + 1) return p + + +def _check_timeseries_input(data): + + """Checks response time series input data (e.g., for ISC analysis) + + Input data should be a n_TRs by n_voxels by n_subjects ndarray + (e.g., brainiak.image.MaskedMultiSubjectData) or a list where each + item is a n_TRs by n_voxels ndarray for a given subject. Multiple + input ndarrays must be the same shape. If a 2D array is supplied, + the last dimension is assumed to correspond to subjects. This + function is generally intended to be used internally by other + functions module (e.g., isc, isfc in brainiak.isc). + + Parameters + ---------- + data : ndarray or list + Time series data + + Returns + ------- + data : ndarray + Input time series data with standardized structure + + n_TRs : int + Number of time points (TRs) + + n_voxels : int + Number of voxels (or ROIs) + + n_subjects : int + Number of subjects + + """ + + # Convert list input to 3d and check shapes + if type(data) == list: + data_shape = data[0].shape + for i, d in enumerate(data): + if d.shape != data_shape: + raise ValueError("All ndarrays in input list " + "must be the same shape!") + if d.ndim == 1: + data[i] = d[:, np.newaxis] + data = np.dstack(data) + + # Convert input ndarray to 3d and check shape + elif isinstance(data, np.ndarray): + if data.ndim == 2: + data = data[:, np.newaxis, :] + elif data.ndim == 3: + pass + else: + raise ValueError("Input ndarray should have 2 " + "or 3 dimensions (got {0})!".format(data.ndim)) + + # Infer subjects, TRs, voxels and log for user to check + n_TRs, n_voxels, n_subjects = data.shape + logger.info("Assuming {0} subjects with {1} time points " + "and {2} voxel(s) or ROI(s) for ISC analysis.".format( + n_subjects, n_TRs, n_voxels)) + + return data, n_TRs, n_voxels, n_subjects diff --git a/tests/utils/test_utils.py b/tests/utils/test_utils.py index 77f608d1b..7a76ab49a 100644 --- a/tests/utils/test_utils.py +++ b/tests/utils/test_utils.py @@ -299,3 +299,81 @@ def test_phase_randomize(): # Check that amplitude spectrum is preserved assert np.allclose(np.abs(shifted_freq), np.abs(corr_freq)) + + +def test_check_timeseries_input(): + import numpy as np + from itertools import combinations + from brainiak.utils.utils import _check_timeseries_input + + # Set a fixed vector for comparison + vector = np.random.randn(60) + + # List of subjects with one voxel/ROI + list_1d = [vector for _ in np.arange(10)] + (data_list_1d, n_TRs, + n_voxels, n_subjects) = _check_timeseries_input(list_1d) + assert n_TRs == 60 + assert n_voxels == 1 + assert n_subjects == 10 + + # Array of subjects with one voxel/ROI + array_2d = np.hstack([vector[:, np.newaxis] + for _ in np.arange(10)]) + (data_array_2d, n_TRs, + n_voxels, n_subjects) = _check_timeseries_input(array_2d) + assert n_TRs == 60 + assert n_voxels == 1 + assert n_subjects == 10 + + # List of 2-dimensional arrays + list_2d = [vector[:, np.newaxis] for _ in np.arange(10)] + (data_list_2d, n_TRs, + n_voxels, n_subjects) = _check_timeseries_input(list_2d) + assert n_TRs == 60 + assert n_voxels == 1 + assert n_subjects == 10 + + # List of 3-dimensional arrays + list_3d = [vector[:, np.newaxis, np.newaxis] + for _ in np.arange(10)] + (data_list_3d, n_TRs, + n_voxels, n_subjects) = _check_timeseries_input(list_3d) + assert n_TRs == 60 + assert n_voxels == 1 + assert n_subjects == 10 + + # 3-dimensional array + array_3d = np.dstack([vector[:, np.newaxis] + for _ in np.arange(10)]) + (data_array_3d, n_TRs, + n_voxels, n_subjects) = _check_timeseries_input(array_3d) + assert n_TRs == 60 + assert n_voxels == 1 + assert n_subjects == 10 + + # Check they're the same + for pair in combinations([data_list_1d, data_array_2d, + data_list_2d, data_list_3d, + data_array_3d], 2): + assert np.array_equal(pair[0], pair[1]) + + # List of multivoxel arrays + matrix = np.random.randn(60, 30) + list_mv = [matrix + for _ in np.arange(10)] + (data_list_mv, n_TRs, + n_voxels, n_subjects) = _check_timeseries_input(list_mv) + assert n_TRs == 60 + assert n_voxels == 30 + assert n_subjects == 10 + + # 3-dimensional array with multiple voxels + array_mv = np.dstack([matrix for _ in np.arange(10)]) + (data_array_mv, n_TRs, + n_voxels, n_subjects) = _check_timeseries_input(array_mv) + assert n_TRs == 60 + assert n_voxels == 30 + assert n_subjects == 10 + + assert np.array_equal(data_list_mv, data_array_mv) From 1d82dcd68275b9c85b4c7cfb567a66b9b88b03fc Mon Sep 17 00:00:00 2001 From: Sam Nastase Date: Wed, 6 Mar 2019 20:36:40 -1000 Subject: [PATCH 15/21] Fixing duplicate references --- brainiak/utils/utils.py | 10 ---------- 1 file changed, 10 deletions(-) diff --git a/brainiak/utils/utils.py b/brainiak/utils/utils.py index aa38da943..8f23da0fc 100644 --- a/brainiak/utils/utils.py +++ b/brainiak/utils/utils.py @@ -712,16 +712,6 @@ def phase_randomize(data, voxelwise=False, random_state=None): The implementation is based on the work in [Lerner2011]_ and [Simony2016]_. - .. [Lerner2011] "Topographic mapping of a hierarchy of temporal - receptive windows using a narrated story.", Y. Lerner, C. J. Honey, - L. J. Silbert, U. Hasson, 2011, Journal of Neuroscience, 31, 2906-2915. - https://doi.org/10.1523/jneurosci.3684-10.2011 - - .. [Simony2016] "Dynamic reconfiguration of the default mode network - during narrative comprehension.", E. Simony, C. J. Honey, J. Chen, O. - Lositsky, Y. Yeshurun, A. Wiesel, U. Hasson, 2016, Nature - Communications, 7, 12141. https://doi.org/10.1038/ncomms12141 - Parameters ---------- data : ndarray (n_TRs x n_voxels x n_subjects) From 7b4e70945ee762e27698af145b35d89bedeea3b1 Mon Sep 17 00:00:00 2001 From: Sam Nastase Date: Thu, 14 Mar 2019 13:43:56 -0400 Subject: [PATCH 16/21] isfc can now take 'targets' as input (asymmetric) --- brainiak/isc.py | 69 +++++++++++++++++++++++++++++++++++-------------- 1 file changed, 49 insertions(+), 20 deletions(-) diff --git a/brainiak/isc.py b/brainiak/isc.py index f14aafd3c..8a6ef5984 100644 --- a/brainiak/isc.py +++ b/brainiak/isc.py @@ -182,20 +182,23 @@ def isc(data, pairwise=False, summary_statistic=None, tolerate_nans=True): return iscs -def isfc(data, pairwise=False, summary_statistic=None, +def isfc(data, targets=None, pairwise=False, summary_statistic=None, vectorize_isfcs=True, tolerate_nans=True): """Intersubject functional correlation (ISFC) - For each voxel or ROI, compute the Pearson correlation between each - subject's response time series and other subjects' response time series - for all voxels or ROIs. If pairwise is False (default), use the - leave-one-out approach, where correlation is computed between each - subject and the average of the other subjects. If pairwise is True, - compute correlations between all pairs of subjects. If summary_statistic - is None, return N ISFC values for N subjects (leave-one-out) or N(N-1)/2 - ISFC values for each pair of N subjects, corresponding to the upper - triangle of the correlation matrix (see scipy.spatial.distance.squareform). + For each input voxel or ROI, compute the Pearson correlation between each + subject's response time series and all input voxels or ROIs in other + subjects. If a targets array is provided, instead compute ISFCs between + each input voxel time series and each voxel time series in targets across + subjects (resulting in asymmetric ISFC values). The targets array must have + the same number TRs and subjects as the input data. If pairwise is False + (default), use the leave-one-out approach, where correlation is computed + between each subject and the average of the other subjects. If pairwise is + True, compute correlations between all pairs of subjects. If + summary_statistic is None, return N ISFC values for N subjects (leave-one- + out) or N(N-1)/2 ISFC values for each pair of N subjects, corresponding to + the triangle of the correlation matrix (scipy.spatial.distance.squareform). Alternatively, use either 'mean' or 'median' to compute summary statistic of ISFCs (Fisher Z is applied if using mean). Input should be n_TRs by n_voxels by n_subjects array (e.g., brainiak.image.MaskedMultiSubjectData) @@ -222,7 +225,9 @@ def isfc(data, pairwise=False, summary_statistic=None, a tuple comprising condensed off-diagonal ISFC values and the diagonal ISC values if vectorize_isfcs=True, or a single ndarray with shape n_subjects (or n_pairs) by n_voxels by n_voxels 3D array if - vectorize_isfcs=False (see brainiak.isc.squareform_isfc). If + vectorize_isfcs=False (see brainiak.isc.squareform_isfc). If targets array + is provided (yielding asymmetric ISFCs), output ISFCs are not vectorized, + resulting in an n_subjects by n_voxels by n_targets ISFC array. If summary_statistic is supplied, output is collapsed along first dimension. The implementation is based on the work in [Simony2016]_. @@ -232,6 +237,9 @@ def isfc(data, pairwise=False, summary_statistic=None, data : list or ndarray (n_TRs x n_voxels x n_subjects) fMRI data for which to compute ISFC + targets : list or ndarray (n_TRs x n_voxels x n_subjects), optional + fMRI data to use as targets for ISFC + pairwise : bool, default: False Whether to use pairwise (True) or leave-one-out (False) approach @@ -254,6 +262,20 @@ def isfc(data, pairwise=False, summary_statistic=None, # Check response time series input format data, n_TRs, n_voxels, n_subjects = _check_timeseries_input(data) + if isinstance(targets, np.ndarray) or isinstance(targets, list): + targets, t_n_TRs, t_n_voxels, t_n_subjects = ( + check_timeseries_input(targets)) + if n_TRs != t_n_TRs: + raise ValueError("Targets array must have same number of TRs as" + "input data") + if n_subjects != t_n_subjects: + raise ValueError("Targets array must have same number of subjects" + "as input data") + symmetric = False + else: + targets = data + t_n_TRs, t_n_voxels, t_n_subjects = data.shape + symmetric = True # Check tolerate_nans input and use either mean/nanmean and exclude voxels if tolerate_nans: @@ -261,6 +283,7 @@ def isfc(data, pairwise=False, summary_statistic=None, else: mean = np.mean data, mask = _threshold_nans(data, tolerate_nans) + targets, targets_mask = _threshold_nans(targets, tolerate_nans) # Handle just two subjects properly if n_subjects == 2: @@ -279,33 +302,39 @@ def isfc(data, pairwise=False, summary_statistic=None, isfc_pair = compute_correlation(np.ascontiguousarray( data[..., pair[0]].T), np.ascontiguousarray( - data[..., pair[1]].T), + targeets[..., pair[1]].T), return_nans=True) - isfc_pair = (isfc_pair + isfc_pair.T) / 2 + if symmetric: + isfc_pair = (isfc_pair + isfc_pair.T) / 2 isfcs.append(isfc_pair) isfcs = np.dstack(isfcs) + assert isfcs.shape == (n_voxels, t_n_voxels, + n_subjects * (n_subjects - 1) / 2) # Compute ISFCs using leave-one-out approach elif not pairwise: # Roll subject axis for loop data = np.rollaxis(data, 2, 0) + targets = np.rollaxis(targets, 2, 0) # Compute leave-one-out ISFCs isfcs = [compute_correlation(np.ascontiguousarray(subject.T), np.ascontiguousarray(mean( - np.delete(data, s, axis=0), + np.delete(targets, s, axis=0), axis=0).T), return_nans=True) for s, subject in enumerate(data)] # Transpose and average ISFC matrices for both directions - isfcs = np.dstack([(isfc_matrix + isfc_matrix.T) / 2 - for isfc_matrix in isfcs]) + isfcs = np.dstack([(isfc_matrix + isfc_matrix.T) / 2 if + symmetric else isfc_matrix for + isfc_matrix in isfcs]) + assert isfcs.shape == (n_voxels, t_n_voxels, n_subjects) # Get ISCs back into correct shape after masking out NaNs - isfcs_all = np.full((n_voxels, n_voxels, isfcs.shape[2]), np.nan) - isfcs_all[np.ix_(np.where(mask)[0], np.where(mask)[0])] = isfcs + isfcs_all = np.full((n_voxels, t_n_voxels, isfcs.shape[2]), np.nan) + isfcs_all[np.ix_(np.where(mask)[0], np.where(targets_mask)[0])] = isfcs isfcs = np.moveaxis(isfcs_all, 2, 0) # Summarize results (if requested) @@ -318,8 +347,8 @@ def isfc(data, pairwise=False, summary_statistic=None, if isfcs.shape[0] == 1: isfcs = isfcs[0] - # Optionally squareform to vectorize ISFC matrices - if vectorize_isfcs: + # Optionally squareform to vectorize ISFC matrices (only if symmetric) + if vectorize_isfcs and symmetric: isfcs, iscs = squareform_isfc(isfcs) return isfcs, iscs else: From 90117b676a4cc40a5bb53bab909946fd94aabc8a Mon Sep 17 00:00:00 2001 From: Sam Nastase Date: Mon, 18 Mar 2019 12:37:18 -0400 Subject: [PATCH 17/21] Fixed up 'targets' for ISFCs, added tests --- brainiak/isc.py | 90 ++++++++++++++++++++++++++++++++----------- tests/isc/test_isc.py | 78 +++++++++++++++++++++++++++++++++++++ 2 files changed, 145 insertions(+), 23 deletions(-) diff --git a/brainiak/isc.py b/brainiak/isc.py index 8a6ef5984..b949d0dc5 100644 --- a/brainiak/isc.py +++ b/brainiak/isc.py @@ -195,7 +195,8 @@ def isfc(data, targets=None, pairwise=False, summary_statistic=None, the same number TRs and subjects as the input data. If pairwise is False (default), use the leave-one-out approach, where correlation is computed between each subject and the average of the other subjects. If pairwise is - True, compute correlations between all pairs of subjects. If + True, compute correlations between all pairs of subjects. If a targets + array is provided, only the leave-one-out approach is supported. If summary_statistic is None, return N ISFC values for N subjects (leave-one- out) or N(N-1)/2 ISFC values for each pair of N subjects, corresponding to the triangle of the correlation matrix (scipy.spatial.distance.squareform). @@ -262,20 +263,12 @@ def isfc(data, targets=None, pairwise=False, summary_statistic=None, # Check response time series input format data, n_TRs, n_voxels, n_subjects = _check_timeseries_input(data) - if isinstance(targets, np.ndarray) or isinstance(targets, list): - targets, t_n_TRs, t_n_voxels, t_n_subjects = ( - check_timeseries_input(targets)) - if n_TRs != t_n_TRs: - raise ValueError("Targets array must have same number of TRs as" - "input data") - if n_subjects != t_n_subjects: - raise ValueError("Targets array must have same number of subjects" - "as input data") - symmetric = False - else: - targets = data - t_n_TRs, t_n_voxels, t_n_subjects = data.shape - symmetric = True + + # Check for optional targets input array + targets, t_n_TRs, t_n_voxels, t_n_subejcts, symmetric = ( + _check_targets_input(targets, data)) + if not symmetric: + pairwise = False # Check tolerate_nans input and use either mean/nanmean and exclude voxels if tolerate_nans: @@ -285,8 +278,8 @@ def isfc(data, targets=None, pairwise=False, summary_statistic=None, data, mask = _threshold_nans(data, tolerate_nans) targets, targets_mask = _threshold_nans(targets, tolerate_nans) - # Handle just two subjects properly - if n_subjects == 2: + # Handle just two subjects properly (for symmetric approach) + if symmetric and n_subjects == 2: isfcs = compute_correlation(np.ascontiguousarray(data[..., 0].T), np.ascontiguousarray(data[..., 1].T), return_nans=True) @@ -295,24 +288,22 @@ def isfc(data, targets=None, pairwise=False, summary_statistic=None, summary_statistic = None logger.info("Only two subjects! Computing ISFC between them.") - # Compute all pairwise ISFCs + # Compute all pairwise ISFCs (only for symmetric approach) elif pairwise: isfcs = [] for pair in it.combinations(np.arange(n_subjects), 2): isfc_pair = compute_correlation(np.ascontiguousarray( data[..., pair[0]].T), np.ascontiguousarray( - targeets[..., pair[1]].T), + targets[..., pair[1]].T), return_nans=True) if symmetric: isfc_pair = (isfc_pair + isfc_pair.T) / 2 isfcs.append(isfc_pair) isfcs = np.dstack(isfcs) - assert isfcs.shape == (n_voxels, t_n_voxels, - n_subjects * (n_subjects - 1) / 2) # Compute ISFCs using leave-one-out approach - elif not pairwise: + else: # Roll subject axis for loop data = np.rollaxis(data, 2, 0) @@ -330,7 +321,6 @@ def isfc(data, targets=None, pairwise=False, summary_statistic=None, isfcs = np.dstack([(isfc_matrix + isfc_matrix.T) / 2 if symmetric else isfc_matrix for isfc_matrix in isfcs]) - assert isfcs.shape == (n_voxels, t_n_voxels, n_subjects) # Get ISCs back into correct shape after masking out NaNs isfcs_all = np.full((n_voxels, t_n_voxels, isfcs.shape[2]), np.nan) @@ -410,6 +400,60 @@ def _check_isc_input(iscs, pairwise=False): return iscs, n_subjects, n_voxels +def _check_targets_input(targets, data): + + """Checks ISFC targets input array + + For ISFC analysis, targets input array should either be a list + of n_TRs by n_targets arrays (where each array corresponds to + a subject), or an n_TRs by n_targets by n_subjects ndarray. This + function also checks the shape of the targets array against the + input data array. + + Parameters + ---------- + data : list or ndarray (n_TRs x n_voxels x n_subjects) + fMRI data for which to compute ISFC + + targets : list or ndarray (n_TRs x n_voxels x n_subjects) + fMRI data to use as targets for ISFC + + Returns + ------- + targets : ndarray (n_TRs x n_voxels x n_subjects) + ISFC targets with standadized structure + + n_TRs : int + Number of time points (TRs) for targets array + + n_voxels : int + Number of voxels (or ROIs) for targets array + + n_subjects : int + Number of subjects for targets array + + symmetric : bool + Indicator for symmetric vs. asymmetric + """ + + if isinstance(targets, np.ndarray) or isinstance(targets, list): + targets, n_TRs, n_voxels, n_subjects = ( + _check_timeseries_input(targets)) + if data.shape[0] != n_TRs: + raise ValueError("Targets array must have same number of " + "TRs as input data") + if data.shape[2] != n_subjects: + raise ValueError("Targets array must have same number of " + "subjects as input data") + symmetric = False + else: + targets = data + n_TRs, n_voxels, n_subjects = data.shape + symmetric = True + + return targets, n_TRs, n_voxels, n_subjects, symmetric + + def compute_summary_statistic(iscs, summary_statistic='mean', axis=None): """Computes summary statistics for ISCs diff --git a/tests/isc/test_isc.py b/tests/isc/test_isc.py index 7f37a3d07..fe8af9d18 100644 --- a/tests/isc/test_isc.py +++ b/tests/isc/test_isc.py @@ -669,6 +669,62 @@ def test_isfc_options(): isfcs, iscs_v = isfc(data, pairwise=True) assert np.allclose(iscs, iscs_v, rtol=1e-03) + # Generate 'targets' data and use for ISFC + data = simulated_timeseries(n_subjects, n_TRs, + n_voxels=n_voxels, data_type='array') + n_targets = 15 + targets_data = simulated_timeseries(n_subjects, n_TRs, + n_voxels=n_targets, + data_type='array') + isfcs = isfc(data, targets=targets_data, pairwise=False, + vectorize_isfcs=False) + assert isfcs.shape == (n_subjects, n_voxels, n_targets) + + # Ensure 'square' output enforced + isfcs = isfc(data, targets=targets_data, pairwise=False, + vectorize_isfcs=True) + assert isfcs.shape == (n_subjects, n_voxels, n_targets) + + # Check list input for targets + targets_data = simulated_timeseries(n_subjects, n_TRs, + n_voxels=n_targets, + data_type='list') + isfcs = isfc(data, targets=targets_data, pairwise=False, + vectorize_isfcs=False) + assert isfcs.shape == (n_subjects, n_voxels, n_targets) + + # Check that mismatching subjects / TRs breaks targets + targets_data = simulated_timeseries(n_subjects, n_TRs, + n_voxels=n_targets, + data_type='array') + + try: + isfcs = isfc(data, targets=targets_data[..., :-1], + pairwise=False, vectorize_isfcs=False) + except ValueError: + logging.info("Correctly caught mismatching n_subjects") + assert isfcs.shape == (n_subjects, n_voxels, n_targets) + + try: + isfcs = isfc(data, targets=targets_data[:-1, ...], + pairwise=False, vectorize_isfcs=False) + except ValueError: + logging.info("Correctly caught mismatching n_TRs") + + # Check targets for only 2 subjects + isfcs = isfc(data[..., :2], targets=targets_data[..., :2], + pairwise=False, summary_statistic=None) + assert isfcs.shape == (2, n_voxels, n_targets) + + isfcs = isfc(data[..., :2], targets=targets_data[..., :2], + pairwise=True, summary_statistic=None) + assert isfcs.shape == (2, n_voxels, n_targets) + + # Check that supplying targets enforces leave-one-out + isfcs_pw = isfc(data, targets=targets_data, pairwise=True, + vectorize_isfcs=False, tolerate_nans=False) + assert isfcs_pw.shape == (n_subjects, n_voxels, n_targets) + logger.info("Finished testing ISFC options") @@ -828,6 +884,28 @@ def test_isfc_nans(): np.sum(np.isnan(isfcs_pw_T)) == 11210) + # Check for NaN-handling in targets + n_targets = 15 + data = simulated_timeseries(n_subjects, n_TRs, + n_voxels=n_voxels, data_type='array', + random_state=random_state) + targets_data = simulated_timeseries(n_subjects, n_TRs, + n_voxels=n_targets, + data_type='array') + + # Inject NaNs into targets_data + targets_data[0, 0, 0] = np.nan + + # Don't tolerate NaNs, should lose zeroeth voxel + isfcs_loo = isfc(data, targets=targets_data, pairwise=False, + vectorize_isfcs=False, tolerate_nans=False) + assert np.sum(np.isnan(isfcs_loo)) == (n_subjects - 1) * (n_targets * 2) + + # Single NaN in targets will get averaged out with tolerate + isfcs_loo = isfc(data, targets=targets_data, pairwise=False, + vectorize_isfcs=False, tolerate_nans=True) + assert np.sum(np.isnan(isfcs_loo)) == 0 + def test_squareform_isfc(): From e45641912163b15d4afeec489836a0bdfdca3d4e Mon Sep 17 00:00:00 2001 From: Sam Nastase Date: Mon, 18 Mar 2019 16:35:02 -0400 Subject: [PATCH 18/21] Increasing some bits of code coverage --- brainiak/utils/utils.py | 6 +++--- tests/utils/test_utils.py | 23 +++++++++++++++++++++++ 2 files changed, 26 insertions(+), 3 deletions(-) diff --git a/brainiak/utils/utils.py b/brainiak/utils/utils.py index 8f23da0fc..bdb119cc6 100644 --- a/brainiak/utils/utils.py +++ b/brainiak/utils/utils.py @@ -827,13 +827,13 @@ def p_from_null(observed, distribution, logger.info("Assuming {0} resampling iterations".format(n_samples)) if side == 'two-sided': - # numerator for two-sided test + # Numerator for two-sided test numerator = np.sum(np.abs(distribution) >= np.abs(observed), axis=axis) elif side == 'left': - # numerator for one-sided test in left tail + # Numerator for one-sided test in left tail numerator = np.sum(distribution <= observed, axis=axis) elif side == 'right': - # numerator for one-sided test in right tail + # Numerator for one-sided test in right tail numerator = np.sum(distribution >= observed, axis=axis) # If exact test all possible permutations and do not adjust diff --git a/tests/utils/test_utils.py b/tests/utils/test_utils.py index 7a76ab49a..5311999b7 100644 --- a/tests/utils/test_utils.py +++ b/tests/utils/test_utils.py @@ -189,6 +189,12 @@ def test_p_from_null(): null = np.random.randn(10000) observed = np.ceil(np.percentile(null, 97.5) * 1000) / 1000 + # Check that we catch improper side + try: + _ = p_from_null(observed, null, side='wrong') + except ValueError: + pass + # Check two-tailed p-value for observed p_ts = p_from_null(observed, null) assert np.isclose(p_ts, 0.05, atol=1e-02) @@ -258,6 +264,9 @@ def test_phase_randomize(): assert not np.array_equal(shifted_data[..., 0], shifted_data[..., 1]) assert not np.array_equal(shifted_data[..., 0], data[..., 0]) + # Check that uneven n_TRs doesn't explode + _ = phase_randomize(data[:-1, ...]) + # Check that random_state returns same shifts shifted_data_ = phase_randomize(data, voxelwise=False, random_state=1) assert np.array_equal(shifted_data, shifted_data_) @@ -334,6 +343,13 @@ def test_check_timeseries_input(): assert n_voxels == 1 assert n_subjects == 10 + # Check if lists have mismatching size + list_bad = [list_2d[0][:-1, :]] + list_2d[1:] + try: + (data_list_bad, _, _, _) = _check_timeseries_input(list_bad) + except ValueError: + pass + # List of 3-dimensional arrays list_3d = [vector[:, np.newaxis, np.newaxis] for _ in np.arange(10)] @@ -352,6 +368,13 @@ def test_check_timeseries_input(): assert n_voxels == 1 assert n_subjects == 10 + # Check that 4-dimensional input array throws error + array_4d = array_3d[..., np.newaxis] + try: + (data_array_4d, _, _, _) = _check_timeseries_input(array_4d) + except ValueError: + pass + # Check they're the same for pair in combinations([data_list_1d, data_array_2d, data_list_2d, data_list_3d, From d71ae14d305c0c183c6e0755491027eb36003f19 Mon Sep 17 00:00:00 2001 From: Sam Nastase Date: Tue, 19 Mar 2019 13:13:12 -0400 Subject: [PATCH 19/21] Modified exception tests to use shmancy pytest.raises --- tests/isc/test_isc.py | 13 ++++--------- tests/utils/test_utils.py | 12 +++--------- 2 files changed, 7 insertions(+), 18 deletions(-) diff --git a/tests/isc/test_isc.py b/tests/isc/test_isc.py index fe8af9d18..fc4ea17c1 100644 --- a/tests/isc/test_isc.py +++ b/tests/isc/test_isc.py @@ -1,5 +1,6 @@ import numpy as np import logging +import pytest from brainiak.isc import (isc, isfc, bootstrap_isc, permutation_isc, squareform_isfc, timeshift_isc, phaseshift_isc) @@ -129,10 +130,8 @@ def test_isc_options(): isc_median = isc(data, pairwise=False, summary_statistic='median') assert isc_median.shape == (n_voxels,) - try: + with pytest.raises(ValueError): isc(data, pairwise=False, summary_statistic='min') - except ValueError: - logger.info("Correctly caught unexpected summary statistic") logger.info("Finished testing ISC options") @@ -698,18 +697,14 @@ def test_isfc_options(): n_voxels=n_targets, data_type='array') - try: + with pytest.raises(ValueError): isfcs = isfc(data, targets=targets_data[..., :-1], pairwise=False, vectorize_isfcs=False) - except ValueError: - logging.info("Correctly caught mismatching n_subjects") assert isfcs.shape == (n_subjects, n_voxels, n_targets) - try: + with pytest.raises(ValueError): isfcs = isfc(data, targets=targets_data[:-1, ...], pairwise=False, vectorize_isfcs=False) - except ValueError: - logging.info("Correctly caught mismatching n_TRs") # Check targets for only 2 subjects isfcs = isfc(data[..., :2], targets=targets_data[..., :2], diff --git a/tests/utils/test_utils.py b/tests/utils/test_utils.py index 5311999b7..f37c51a7e 100644 --- a/tests/utils/test_utils.py +++ b/tests/utils/test_utils.py @@ -190,10 +190,8 @@ def test_p_from_null(): observed = np.ceil(np.percentile(null, 97.5) * 1000) / 1000 # Check that we catch improper side - try: + with pytest.raises(ValueError): _ = p_from_null(observed, null, side='wrong') - except ValueError: - pass # Check two-tailed p-value for observed p_ts = p_from_null(observed, null) @@ -345,10 +343,8 @@ def test_check_timeseries_input(): # Check if lists have mismatching size list_bad = [list_2d[0][:-1, :]] + list_2d[1:] - try: + with pytest.raises(ValueError): (data_list_bad, _, _, _) = _check_timeseries_input(list_bad) - except ValueError: - pass # List of 3-dimensional arrays list_3d = [vector[:, np.newaxis, np.newaxis] @@ -370,10 +366,8 @@ def test_check_timeseries_input(): # Check that 4-dimensional input array throws error array_4d = array_3d[..., np.newaxis] - try: + with pytest.raises(ValueError): (data_array_4d, _, _, _) = _check_timeseries_input(array_4d) - except ValueError: - pass # Check they're the same for pair in combinations([data_list_1d, data_array_2d, From 4cc6417eba3eadf1f95b72cb01d962361d2b54bc Mon Sep 17 00:00:00 2001 From: Sam Nastase Date: Wed, 27 Mar 2019 18:27:28 -0400 Subject: [PATCH 20/21] Added news items for issues and PR --- docs/newsfragments/384.feature | 1 + docs/newsfragments/396.feature | 1 + docs/newsfragments/397.feature | 1 + docs/newsfragments/398.feature | 1 + docs/newsfragments/399.feature | 1 + docs/newsfragments/405.feature | 1 + docs/newsfragments/409.feature | 1 + 7 files changed, 7 insertions(+) create mode 100644 docs/newsfragments/384.feature create mode 100644 docs/newsfragments/396.feature create mode 100644 docs/newsfragments/397.feature create mode 100644 docs/newsfragments/398.feature create mode 100644 docs/newsfragments/399.feature create mode 100644 docs/newsfragments/405.feature create mode 100644 docs/newsfragments/409.feature diff --git a/docs/newsfragments/384.feature b/docs/newsfragments/384.feature new file mode 100644 index 000000000..4a4dde007 --- /dev/null +++ b/docs/newsfragments/384.feature @@ -0,0 +1 @@ +Revamped ISC/ISFC functionality with more statistical tests diff --git a/docs/newsfragments/396.feature b/docs/newsfragments/396.feature new file mode 100644 index 000000000..e6731e504 --- /dev/null +++ b/docs/newsfragments/396.feature @@ -0,0 +1 @@ +utils.phase_randomize outputs phase-scrambled input data, not tied to ISC/ISFC diff --git a/docs/newsfragments/397.feature b/docs/newsfragments/397.feature new file mode 100644 index 000000000..fabbc3014 --- /dev/null +++ b/docs/newsfragments/397.feature @@ -0,0 +1 @@ +utils.p_from_null no longer estimates distribution, simply returns p-value diff --git a/docs/newsfragments/398.feature b/docs/newsfragments/398.feature new file mode 100644 index 000000000..9935b6d99 --- /dev/null +++ b/docs/newsfragments/398.feature @@ -0,0 +1 @@ +ISC/ISFC analyses will now tolerate NaNs or tolerate a proportion of NaNs diff --git a/docs/newsfragments/399.feature b/docs/newsfragments/399.feature new file mode 100644 index 000000000..d51d1b372 --- /dev/null +++ b/docs/newsfragments/399.feature @@ -0,0 +1 @@ +ISFC will now output either vectorized triangle and diagonal of square matrices diff --git a/docs/newsfragments/405.feature b/docs/newsfragments/405.feature new file mode 100644 index 000000000..5bc21b1cd --- /dev/null +++ b/docs/newsfragments/405.feature @@ -0,0 +1 @@ +Updates NaN-toleration, ISFC output shape, and targets array for ISFC diff --git a/docs/newsfragments/409.feature b/docs/newsfragments/409.feature new file mode 100644 index 000000000..1382a649f --- /dev/null +++ b/docs/newsfragments/409.feature @@ -0,0 +1 @@ +Asymmetric ISFC analysis can now be performed based on a targets array From 357376b6befa1e2dd96a5ef2985999e8576f5319 Mon Sep 17 00:00:00 2001 From: Sam Nastase Date: Wed, 27 Mar 2019 18:33:17 -0400 Subject: [PATCH 21/21] Updated NEWS directly for outdated PR #384 --- NEWS.rst | 2 ++ docs/newsfragments/384.feature | 1 - 2 files changed, 2 insertions(+), 1 deletion(-) delete mode 100644 docs/newsfragments/384.feature diff --git a/NEWS.rst b/NEWS.rst index 3af1dcb58..1cc6725e1 100644 --- a/NEWS.rst +++ b/NEWS.rst @@ -17,6 +17,8 @@ Features - fmrisim: Added fitting procedure for SFNR and SNR parameters. Updated AR to be ARMA, involving both the generation and estimation. (`#372 `_) +- isc: Revamped ISC/ISFC functionality with more statistical tests. (`#384 + `_) - reprsimil: Added an option in BRSA to set the prior of SNR to be "equal" across all voxels. (`#387 `_) diff --git a/docs/newsfragments/384.feature b/docs/newsfragments/384.feature deleted file mode 100644 index 4a4dde007..000000000 --- a/docs/newsfragments/384.feature +++ /dev/null @@ -1 +0,0 @@ -Revamped ISC/ISFC functionality with more statistical tests