From 643c2a42cbb8079707ce9082c8622f550d87cb41 Mon Sep 17 00:00:00 2001 From: Javier Turek Date: Tue, 20 Sep 2016 12:17:06 -0700 Subject: [PATCH 01/14] Added Semi-Supervised SRM --- brainiak/funcalign/srm.py | 2 +- brainiak/funcalign/sssrm.py | 823 ++++++++++++++++++ brainiak/utils/utils.py | 66 ++ examples/funcalign/download-data.sh | 4 +- .../srm_image_prediction_example.ipynb | 23 +- .../funcalign/srm_image_prediction_example.py | 23 +- .../sssrm_image_prediction_example.py | 95 ++ setup.py | 2 + tests/funcalign/test_sssrm.py | 172 ++++ tests/utils/test_utils.py | 26 + 10 files changed, 1220 insertions(+), 16 deletions(-) create mode 100644 brainiak/funcalign/sssrm.py create mode 100644 examples/funcalign/sssrm_image_prediction_example.py create mode 100644 tests/funcalign/test_sssrm.py diff --git a/brainiak/funcalign/srm.py b/brainiak/funcalign/srm.py index ff97f6b8a..56e65b326 100644 --- a/brainiak/funcalign/srm.py +++ b/brainiak/funcalign/srm.py @@ -81,7 +81,7 @@ def _init_w_transforms(data, features): # Set Wi to a random orthogonal voxels by features matrix for subject in range(subjects): voxels[subject] = data[subject].shape[0] - rnd_matrix = np.mat(np.random.random((voxels[subject], features))) + rnd_matrix = np.random.random((voxels[subject], features)) q, r = np.linalg.qr(rnd_matrix) w.append(q) diff --git a/brainiak/funcalign/sssrm.py b/brainiak/funcalign/sssrm.py new file mode 100644 index 000000000..d4cf52539 --- /dev/null +++ b/brainiak/funcalign/sssrm.py @@ -0,0 +1,823 @@ +# Copyright 2016 Intel Corporation +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +"""Semi-Supervised Shared Response Model (SS-SRM) + +The implementations are based on the following publications: + +.. [Turek2016] "A Semi-Supervised Method for Multi-Subject fMRI Functional + Alignment", + J. Turek, T. Willke, P.-H. Chen, P. Ramadge + under review, 2016. +""" + +# Authors: Javier Turek (Intel Labs), 2016 + +import logging + +import numpy as np +from sklearn.base import BaseEstimator, TransformerMixin, ClassifierMixin +from sklearn.utils import assert_all_finite +from sklearn.utils.validation import NotFittedError +from sklearn.utils.multiclass import unique_labels +import theano +import theano.tensor as T +import theano.compile.sharedvalue as S +from pymanopt.manifolds import Euclidean +from pymanopt.manifolds import Product +from pymanopt.solvers import ConjugateGradient +from pymanopt import Problem +from pymanopt.manifolds import Stiefel +import gc + +from brainiak.utils import utils +from brainiak.funcalign import srm + +__all__ = [ + "SSSRM" +] + +logger = logging.getLogger(__name__) + + +class SSSRM(BaseEstimator, ClassifierMixin, TransformerMixin): + """Semi-Supervised Shared Response Model (SS-SRM) + + Given multi-subject data, factorize it as a shared response S among all + subjects and an orthogonal transform W per subject, using also labeled + data to train a Multinomial Logistic Regression (MLR) classifier (with + l2 regularization) in a semi-supervised manner: + + .. math:: X_i \\approx W_i S ,~for~all~i=1\dots N + and + .. math:: f(W_i^T Z_i)=y_i ,~for~all~i=1\dots N + + Parameters + ---------- + + n_iter : int, default: 10 + Number of iterations to run the algorithm. + + features : int, default: 50 + Number of features to compute. + + gamma : float, default: 1.0 + Regularization parameter for the classifier. + + cost : float, default: 0.5 + Balance parameter between the SRM term and the MLR term. + + rand_seed : int, default: 0 + Seed for initializing the random number generator. + + verbose : boolean, default: False + Verbose mode flag. + + + Attributes + ---------- + + w_ : list of array, element i has shape=[voxels_i, features] + The orthogonal transforms (mappings) for each subject. + + s_ : array, shape=[features, samples] + The shared response. + + theta_ : array, shape=[classes, features] + The MLR class plane parameters. + + bias_ : array, shape=[classes] + The MLR class biases. + + classes_ : array of int, shape=[classes] + Mapping table for each classes to original class label. + + .. note:: + The number of voxels may be different between subjects. However, the + number of samples for the alignment data must be the same across + subjects. The number of labeled samples per subject can be different. + + The Semi-Supervised Shared Response Model is approximated using the + Block-Coordinate Descent (BCD) algorithm proposed in [Turek2016]_. + + This is a single node version. + """ + + def __init__(self, n_iter=10, features=50, gamma=1.0, cost=0.5, + rand_seed=0, verbose=False): + self.n_iter = n_iter + self.features = features + self.gamma = gamma + self.cost = cost + self.rand_seed = rand_seed + self.verbose = verbose + return + + def fit(self, X, y, Z): + """Compute the Semi-Supervised Shared Response Model + + Parameters + ---------- + + X : list of 2D arrays, element i has shape=[voxels_i, n_align] + Each element in the list contains the fMRI data for alignment of + one subject. There are n_align samples for each subject. + + y : list of arrays of int, element i has shape=[samples_i] + Each element in the list contains the labels for the data samples + in Z. + + Z : list of 2D arrays, element i has shape=[voxels_i, samples_i] + Each element in the list contains the fMRI data of one subject + for training the MLR classifier. + + """ + if self.verbose: + logger.info('Starting SS-SRM') + + # Check that the cost value is in range (0.0,1.0) + if 0.0 >= self.cost or self.cost >= 1.0: + raise ValueError("Cost parameter should be in range (0.0, 1.0)") + + # Check that the regularizer value is positive + if 0.0 >= self.gamma: + raise ValueError("Gamma parameter should be positive.") + + # Check the number of subjects + if len(X) <= 1: + raise ValueError("There are not enough subjects in the alignment " + "data ({0:d}) to train the model.".format(len(X))) + if len(y) <= 1: + raise ValueError("There are not enough subjects in the labels y " + "({0:d}) to train the model.".format(len(y))) + + if len(Z) <= 1: + raise ValueError("There are not enough subjects in the labeled Z " + "data ({0:d}) to train the model.".format(len(Z))) + + if not (len(X) == len(y)) or not (len(X) == len(Z)): + raise ValueError("Different number of subjects in data") + + # Check for input data sizes + if X[0].shape[1] < self.features: + raise ValueError( + "There are not enough samples to train the model with " + "{0:d} features.".format(self.features)) + + # Check if all subjects have same number of TRs for alignment + number_trs = X[0].shape[1] + number_subjects = len(X) + for subject in range(number_subjects): + assert_all_finite(X[subject]) + if X[subject].shape[1] != number_trs: + raise ValueError("Different number of alignment samples " + "between subjects.") + + # Check if alignment and classification data have the same number of + # voxels per subject. Also check that there labels for all the classif. + # sample + for subject in range(number_subjects): + assert_all_finite(Z[subject]) + if X[subject].shape[0] != Z[subject].shape[0]: + raise ValueError("Different number of voxels between alignment" + " and classification data (subject {0:d})" + ".".format(subject)) + if Z[subject].shape[1] != y[subject].size: + raise ValueError("Different number of samples and labels in " + "subject {0:d}.".format(subject)) + + # Map the classes to [0..C-1] + new_y = self._init_classes(y) + + # Run SS-SRM + self.w_, self.s_, self.theta_, self.bias_ = self._sssrm(X, Z, new_y) + + return self + + def _init_classes(self, y): + """Map all possible classes to the range [0,..,C-1] + + Parameters + ---------- + + y : list of arrays of int, each element has shape=[samples_i,] + Labels of the samples for each subject + + + Returns + ------- + new_y : list of arrays of int, each element has shape=[samples_i,] + Mapped labels of the samples for each subject + + ..note:: + The mapping of the classes is saved in the attribute classes_. + """ + self.classes_ = unique_labels(utils.concatenate_list(y)) + new_y = [None] * len(y) + for s in range(len(y)): + new_y[s] = np.digitize(y[s], self.classes_) - 1 + return new_y + + def transform(self, X, y=None): + """Use the model to transform matrix to Shared Response space + + Parameters + ---------- + + X : list of 2D arrays, element i has shape=[voxels_i, samples_i] + Each element in the list contains the fMRI data of one subject + note that number of voxels and samples can vary across subjects. + + y : not used as it only applies the mappings + + + Returns + ------- + + s : list of 2D arrays, element i has shape=[features_i, samples_i] + Shared responses from input data (X) + """ + + # Check if the model exist + if hasattr(self, 'w_') is False: + raise NotFittedError("The model fit has not been run yet.") + + # Check the number of subjects + if len(X) != len(self.w_): + raise ValueError("The number of subjects does not match the one" + " in the model.") + + s = [None] * len(X) + for subject in range(len(X)): + s[subject] = self.w_[subject].T.dot(X[subject]) + + return s + + def predict(self, X): + """Classify the output for given data + + Parameters + ---------- + + X : list of 2D arrays, element i has shape=[voxels_i, samples_i] + Each element in the list contains the fMRI data of one subject + The number of voxels should be according to each subject at + the moment of training the model. + + Returns + ------- + p: list of arrays, element i has shape=[samples_i] + Predictions for each data sample. + """ + # Check if the model exist + if hasattr(self, 'w_') is False: + raise NotFittedError("The model fit has not been run yet.") + + # Check the number of subjects + if len(X) != len(self.w_): + raise ValueError("The number of subjects does not match the one" + " in the model.") + + X_shared = self.transform(X) + p = [None] * len(X_shared) + for subject in range(len(X_shared)): + sumexp, _, exponents = utils.sumexp_stable( + self.theta_.T.dot(X_shared[subject]) + self.bias_) + p[subject] = self.classes_[ + (exponents / sumexp[np.newaxis, :]).argmax(axis=0)] + + return p + + def _sssrm(self, data_align, data_sup, labels): + """Block-Coordinate Descent algorithm for fitting SS-SRM. + + Parameters + ---------- + + data_align : list of 2D arrays, element i has shape=[voxels_i, n_align] + Each element in the list contains the fMRI data for alignment of + one subject. There are n_align samples for each subject. + + data_sup : list of 2D arrays, element i has shape=[voxels_i, samples_i] + Each element in the list contains the fMRI data of one subject for + the classification task. + + labels : list of arrays of int, element i has shape=[samples_i] + Each element in the list contains the labels for the data samples + in data_sup. + + + Returns + ------- + + w : list of array, element i has shape=[voxels_i, features] + The orthogonal transforms (mappings) :math:`W_i` for each subject. + + s : array, shape=[features, samples] + The shared response. + """ + classes = self.classes_.size + + # Initialization: + np.random.seed(self.rand_seed) + # Set Wi's to a random orthogonal voxels by TRs + w, _ = srm._init_w_transforms(data_align, self.features) + + # Initialize the shared response S + s = SSSRM._compute_shared_response(data_align, w) + + # Initialize theta and bias + theta, bias = self._update_classifier(data_sup, labels, w, classes) + + # calculate and print the objective function + if self.verbose: + objective = self._objective_function(data_align, data_sup, labels, + w, s, theta, bias) + logger.info('Objective function %f' % objective) + + # Main loop: + for iteration in range(self.n_iter): + if self.verbose: + logger.info('Iteration %d' % (iteration + 1)) + + # Update the mappings Wi + w = self._update_w(data_align, data_sup, labels, w, s, theta, bias) + + # Output the objective function + if self.verbose: + objective = self._objective_function(data_align, data_sup, + labels, w, s, theta, bias) + logger.info('Objective function after updating Wi %f' + % objective) + + # Update the shared response S + s = SSSRM._compute_shared_response(data_align, w) + + # Output the objective function + if self.verbose: + objective = self._objective_function(data_align, data_sup, + labels, w, s, theta, bias) + logger.info('Objective function after updating S %f' + % objective) + + # Update the MLR classifier, theta and bias + theta, bias = self._update_classifier(data_sup, labels, w, classes) + + # Output the objective function + if self.verbose: + objective = self._objective_function(data_align, data_sup, + labels, w, s, theta, bias) + logger.info('Objective function after updating MLR %f' + % objective) + + return w, s, theta, bias + + def _update_classifier(self, data, labels, w, classes): + """Update the classifier parameters theta and bias + + Parameters + ---------- + + data : list of 2D arrays, element i has shape=[voxels_i, samples_i] + Each element in the list contains the fMRI data of one subject for + the classification task. + + labels : list of arrays of int, element i has shape=[samples_i] + Each element in the list contains the labels for the data samples + in data_sup. + + w : list of 2D array, element i has shape=[voxels_i, features] + The orthogonal transforms (mappings) :math:`W_i` for each subject. + + classes : int + The number of classes in the classifier. + + + Returns + ------- + + theta : array, shape=[features, classes] + The MLR parameter for the class planes. + + bias : array shape=[classes,] + The MLR parameter for class biases. + """ + + # Stack the data and labels for training the classifier + data_stacked, labels_stacked, weights = \ + SSSRM._stack_list(data, labels, w) + + features = w[0].shape[1] + total_samples = weights.size + + data_th = S.shared(data_stacked.astype(theano.config.floatX)) + val_ = S.shared(labels_stacked) + total_samples_S = S.shared(total_samples) + theta_th = T.matrix(name='theta', dtype=theano.config.floatX) + bias_th = T.col(name='bias', dtype=theano.config.floatX) + constf2 = S.shared(self.cost / self.gamma, allow_downcast=True) + weights_th = S.shared(weights) + + log_p_y_given_x = \ + T.log(T.nnet.softmax((theta_th.T.dot(data_th.T)).T + bias_th.T)) + f = -constf2 * T.sum((log_p_y_given_x[T.arange(total_samples_S), val_]) + / weights_th) + 0.5 * T.sum(theta_th ** 2) + + manifold = Product((Euclidean(features, classes), + Euclidean(classes, 1))) + problem = Problem(manifold=manifold, cost=f, arg=[theta_th, bias_th], + verbosity=0) + solver = ConjugateGradient(mingradnorm=1e-6) + solution = solver.solve(problem) + theta = solution[0] + bias = solution[1] + + del constf2 + del theta_th + del bias_th + del data_th + del val_ + del solver + del solution + + return theta, bias + + def _update_w(self, data_align, data_sup, labels, w, s, theta, bias): + """ + + Parameters + ---------- + data_align : list of 2D arrays, element i has shape=[voxels_i, n_align] + Each element in the list contains the fMRI data for alignment of + one subject. There are n_align samples for each subject. + + data_sup : list of 2D arrays, element i has shape=[voxels_i, samples_i] + Each element in the list contains the fMRI data of one subject for + the classification task. + + labels : list of arrays of int, element i has shape=[samples_i] + Each element in the list contains the labels for the data samples + in data_sup. + + w : list of array, element i has shape=[voxels_i, features] + The orthogonal transforms (mappings) :math:`W_i` for each subject. + + s : array, shape=[features, samples] + The shared response. + + theta : array, shape=[classes, features] + The MLR class plane parameters. + + bias : array, shape=[classes] + The MLR class biases. + + Returns + ------- + + w : list of 2D array, element i has shape=[voxels_i, features] + The updated orthogonal transforms (mappings). + """ + subjects = len(data_align) + + s_th = S.shared(s.astype(theano.config.floatX)) + theta_th = S.shared(theta.T.astype(theano.config.floatX)) + bias_th = S.shared(bias.T.astype(theano.config.floatX), + broadcastable=(True, False)) + + for subject in range(subjects): + if self.verbose: + logger.info('Subject Wi %d' % subject) + # Solve for subject i + # Create the theano function + w_th = T.matrix(name='W', dtype=theano.config.floatX) + data_srm_subject = \ + S.shared(data_align[subject].astype(theano.config.floatX)) + constf1 = \ + S.shared((1 - self.cost) * 0.5 / data_align[subject].shape[1], + allow_downcast=True) + f1 = constf1 * T.sum((data_srm_subject - w_th.dot(s_th))**2) + + if data_sup[subject] is not None: + lr_samples_S = S.shared(data_sup[subject].shape[1]) + data_sup_subject = \ + S.shared(data_sup[subject].astype(theano.config.floatX)) + labels_S = S.shared(labels[subject]) + constf2 = S.shared(-self.cost / self.gamma + / data_sup[subject].shape[1], + allow_downcast=True) + + log_p_y_given_x = T.log(T.nnet.softmax((theta_th.dot( + w_th.T.dot(data_sup_subject))).T + bias_th)) + f2 = constf2 * T.sum( + log_p_y_given_x[T.arange(lr_samples_S), labels_S]) + f = f1 + f2 + else: + f = f1 + + # Define the problem and solve + f_subject = self._objective_function_subject(data_align[subject], + data_sup[subject], + labels[subject], + w[subject], + s, theta, bias) + minstep = np.min((10**-np.floor(np.log10(f_subject))), 1e-1) + manifold = Stiefel(w[subject].shape[0], w[subject].shape[1]) + problem = Problem(manifold=manifold, cost=f, arg=w_th, verbosity=0) + solver = ConjugateGradient(mingradnorm=1e-2, minstepsize=minstep) + w[subject] = np.array(solver.solve( + problem, x=w[subject].astype(theano.config.floatX))) + if data_sup[subject] is not None: + del f2 + del log_p_y_given_x + del data_sup_subject + del labels_S + del solver + del problem + del manifold + del f + del f1 + del data_srm_subject + del w_th + del theta_th + del bias_th + del s_th + + # Run garbage collector to avoid filling up the memory + gc.collect() + return w + + @staticmethod + def _compute_shared_response(data, w): + """ Compute the shared response S + + Parameters + ---------- + + data : list of 2D arrays, element i has shape=[voxels_i, samples] + Each element in the list contains the fMRI data of one subject. + + w : list of 2D arrays, element i has shape=[voxels_i, features] + The orthogonal transforms (mappings) :math:`W_i` for each subject. + + + Returns + ------- + + s : array, shape=[features, samples] + The shared response for the subjects data with the mappings in w. + """ + s = np.zeros((w[0].shape[1], data[0].shape[1])) + for m in range(len(w)): + s = s + w[m].T.dot(data[m]) + s /= len(w) + return s + + def _objective_function(self, data_align, data_sup, labels, w, s, theta, + bias): + """Compute the objective function of the Semi-Supervised SRM + + .. math:: (1-C)*Loss_{SRM}(W_i,S;X_i) + .. math:: + C/\gamma * Loss_MLR(\theta, bias; {(W_i^T*Z_i, y_i}) + .. math:: + R(\theta) + + Parameters + ---------- + + data_align : list of 2D arrays, element i has shape=[voxels_i, n_align] + Each element in the list contains the fMRI data for alignment of + one subject. There are n_align samples for each subject. + + data_sup : list of 2D arrays, element i has shape=[voxels_i, samples_i] + Each element in the list contains the fMRI data of one subject for + the classification task. + + labels : list of arrays of int, element i has shape=[samples_i] + Each element in the list contains the labels for the data samples + in data_sup. + + w : list of array, element i has shape=[voxels_i, features] + The orthogonal transforms (mappings) :math:`W_i` for each subject. + + s : array, shape=[features, samples] + The shared response. + + theta : array, shape=[classes, features] + The MLR class plane parameters. + + bias : array, shape=[classes] + The MLR class biases. + + + Returns + ------- + + f_val : float + The SS-SRM objective function evaluated based on the parameters to + this function. + """ + subjects = len(data_align) + + # Compute the SRM loss + f_val = 0.0 + for subject in range(subjects): + samples = data_align[subject].shape[1] + f_val += (1 - self.cost) * (0.5 / samples) \ + * np.linalg.norm(data_align[subject] - w[subject].dot(s), + 'fro')**2 + + # Compute the MLR loss + f_val += self._loss_lr(data_sup, labels, w, theta, bias) + + return f_val + + def _objective_function_subject(self, data_align, data_sup, labels, w, s, + theta, bias): + """Compute the objective function for one subject. + + .. math:: (1-C)*Loss_{SRM}_i(W_i,S;X_i) + .. math:: + C/\gamma * Loss_MLR_i(\theta, bias; {(W_i^T*Z_i, y_i}) + .. math:: + R(\theta) + + Parameters + ---------- + + data_align : 2D array, shape=[voxels_i, samples_align] + Contains the fMRI data for alignment of subject i. + + data_sup : 2D array, shape=[voxels_i, samples_i] + Contains the fMRI data of one subject for the classification task. + + labels : array of int, shape=[samples_i] + The labels for the data samples in data_sup. + + w : array, shape=[voxels_i, features] + The orthogonal transform (mapping) :math:`W_i` for subject i. + + s : array, shape=[features, samples] + The shared response. + + theta : array, shape=[classes, features] + The MLR class plane parameters. + + bias : array, shape=[classes] + The MLR class biases. + + + Returns + ------- + + f_val : float + The SS-SRM objective function for subject i evaluated on the + parameters to this function. + """ + # Compute the SRM loss + f_val = 0.0 + samples = data_align.shape[1] + f_val += (1 - self.cost) * (0.5 / samples) \ + * np.linalg.norm(data_align - w.dot(s), 'fro')**2 + + # Compute the MLR loss + f_val += self._loss_lr_subject(data_sup, labels, w, theta, bias) + + return f_val + + def _loss_lr_subject(self, data, labels, w, theta, bias): + """Compute the Loss MLR for a single subject (without regularization) + + Parameters + ---------- + + data : array, shape=[voxels, samples] + The fMRI data of subject i for the classification task. + + labels : array of int, shape=[samples] + The labels for the data samples in data. + + w : array, shape=[voxels, features] + The orthogonal transform (mapping) :math:`W_i` for subject i. + + theta : array, shape=[classes, features] + The MLR class plane parameters. + + bias : array, shape=[classes] + The MLR class biases. + + Returns + ------- + + loss : float + The loss MLR for the subject + """ + if data is None: + return 0.0 + + samples = data.shape[1] + + thetaT_wi_zi_plus_bias = theta.T.dot(w.T.dot(data)) + bias + sum_exp, max_value, _ = utils.sumexp_stable(thetaT_wi_zi_plus_bias) + sum_exp_values = np.log(sum_exp) + max_value + + aux = 0.0 + for sample in range(samples): + label = labels[sample] + aux += thetaT_wi_zi_plus_bias[label, sample] + return self.cost / samples / self.gamma * (sum_exp_values.sum() - aux) + + def _loss_lr(self, data, labels, w, theta, bias): + """Compute the Loss MLR (with the regularization) + + Parameters + ---------- + + data : list of 2D arrays, element i has shape=[voxels_i, samples_i] + Each element in the list contains the fMRI data of one subject for + the classification task. + + labels : list of arrays of int, element i has shape=[samples_i] + Each element in the list contains the labels for the samples in + data. + + w : list of array, element i has shape=[voxels_i, features] + The orthogonal transforms (mappings) :math:`W_i` for each subject. + + theta : array, shape=[classes, features] + The MLR class plane parameters. + + bias : array, shape=[classes] + The MLR class biases. + + + Returns + ------- + + loss : float + The loss MLR for the SS-SRM model + """ + subjects = len(data) + loss = 0.0 + for subject in range(subjects): + if labels[subject] is not None: + loss += self._loss_lr_subject(data[subject], labels[subject], + w[subject], theta, bias) + + return loss + 0.5 * np.linalg.norm(theta, 'fro')**2 + + @staticmethod + def _stack_list(data, data_labels, w): + """Construct a numpy array by stacking arrays in a list + + Parameter + ---------- + + data : list of 2D arrays, element i has shape=[voxels_i, samples_i] + Each element in the list contains the fMRI data of one subject for + the classification task. + + data_labels : list of arrays of int, element i has shape=[samples_i] + Each element in the list contains the labels for the samples in + data. + + w : list of array, element i has shape=[voxels_i, features] + The orthogonal transforms (mappings) :math:`W_i` for each subject. + + + Returns + ------- + + data_stacked : 2D array, shape=[samples, features] + The data samples from all subjects are stacked into a single + 2D array, where "samples" is the sum of samples_i. + + labels_stacked : array, shape=[samples,] + The labels from all subjects are stacked into a single + array, where "samples" is the sum of samples_i. + + weights : array, shape=[samples,] + The number of samples of the subject that are related to that + sample. They become a weight per sample in the MLR loss. + """ + labels_stacked = utils.concatenate_list(data_labels) + + weights = np.empty((labels_stacked.size,)) + data_shared = [None] * len(data) + curr_samples = 0 + for s in range(len(data)): + if data[s] is not None: + subject_samples = data[s].shape[1] + curr_samples_end = curr_samples + subject_samples + weights[curr_samples:curr_samples_end] = subject_samples + data_shared[s] = w[s].T.dot(data[s]) + curr_samples += data[s].shape[1] + + data_stacked = utils.concatenate_list(data_shared, axis=1).T + return data_stacked, labels_stacked, weights diff --git a/brainiak/utils/utils.py b/brainiak/utils/utils.py index 2f1087604..1cccd0e0f 100644 --- a/brainiak/utils/utils.py +++ b/brainiak/utils/utils.py @@ -100,3 +100,69 @@ def fast_inv(a): except np.linalg.linalg.LinAlgError: logging.exception('Error from np.linalg.solve') raise + + +def sumexp_stable(data): + """Compute the sum of exponents for a list of samples + + Parameters + ---------- + + data : array, shape=[features, samples] + A data array containing samples. + + + Returns + ------- + + result_sum : array, shape=[samples,] + The sum of exponents for each sample divided by the exponent + of the maximum feature value in the sample. + + max_value : array, shape=[samples,] + The maximum feature value for each sample. + + result_exp : array, shape=[features, samples] + The exponent of each element in each sample divided by the exponent + of the maximum feature value in the sample. + + ..note:: + This function is more stable than computing the sum(exp(v)). + It useful for computing the softmax_i(v)=exp(v_i)/sum(exp(v)) function. + """ + max_value = data.max(axis=0) + result_exp = np.exp(data - max_value) + result_sum = np.sum(result_exp, axis=0) + return result_sum, max_value, result_exp + + +def concatenate_list(l, axis=0): + """Construct a numpy array by stacking arrays in a list + + Parameter + ---------- + + data : list of arrays, arrays have same shape in all but one dimenstion or + elements are None + The list of arrays to be concatenated. + + axis : int, default = 0 + Axis for the concatenation + + + Returns + ------- + + data_stacked : array + The resulting concatenated array. + + """ + # Get the indexes of the arrays in the list + mask = [] + for i in range(len(l)): + if l[i] is not None: + mask.append(i) + + # Concatenate them + l_stacked = np.concatenate([l[i] for i in mask], axis=axis) + return l_stacked diff --git a/examples/funcalign/download-data.sh b/examples/funcalign/download-data.sh index d12d70d00..41bf1f7d7 100755 --- a/examples/funcalign/download-data.sh +++ b/examples/funcalign/download-data.sh @@ -1,3 +1,3 @@ -curl --location --create-dirs -o data/movie_data.mat https://www.dropbox.com/s/7areadyb6ddvl5h/movie_data.mat?dl=0 +curl --location --create-dirs -o data/movie_data.mat https://www.dropbox.com/s/2vg143mr2gwt6dz/movie_data.mat?dl=0 curl --location --create-dirs -o data/label.mat https://www.dropbox.com/s/ogd26q6fro4l2d2/label.mat?dl=0 -curl --location --create-dirs -o data/image_data.mat https://www.dropbox.com/s/wks4fwgzetmeqb4/image_data.mat?dl=0 +curl --location --create-dirs -o data/image_data.mat https://www.dropbox.com/s/l818vr6o8huatxj/image_data.mat?dl=0 \ No newline at end of file diff --git a/examples/funcalign/srm_image_prediction_example.ipynb b/examples/funcalign/srm_image_prediction_example.ipynb index c82c349bb..86444b23d 100644 --- a/examples/funcalign/srm_image_prediction_example.ipynb +++ b/examples/funcalign/srm_image_prediction_example.ipynb @@ -57,7 +57,7 @@ }, "outputs": [], "source": [ - "movie_data = scipy.io.loadmat('data/movie_data.mat')" + "movie_file = scipy.io.loadmat('data/movie_data.mat')" ] }, { @@ -65,7 +65,8 @@ "metadata": {}, "source": [ "Convert data to a list of arrays matching SRM input.\n", - "Each element is a matrix of voxels by TRs." + "Each element is a matrix of voxels by TRs.\n", + "Also, concatenate data from both hemispheres in the brain." ] }, { @@ -76,8 +77,12 @@ }, "outputs": [], "source": [ - "movie_data = list(movie_data['movie_data_lh'])\n", - "subjects = len(movie_data)" + "movie_data_left = movie_file['movie_data_lh']\n", + "movie_data_right = movie_file['movie_data_rh']\n", + "subjects = movie_data_left.shape[2]\n", + "movie_data = []\n", + "for s in range(subjects):\n", + " movie_data.append(np.concatenate([movie_data_left[:, :, s], movie_data_right[:, :, s]], axis=0))" ] }, { @@ -133,14 +138,16 @@ }, "outputs": [], "source": [ - "image_data = scipy.io.loadmat('data/image_data.mat')" + "image_file = scipy.io.loadmat('data/image_data.mat')\n", + "image_data_left = image_file['image_data_lh']\n", + "image_data_right = image_file['image_data_rh']" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ - "Convert data to a list of arrays matching SRM input. Each element is a matrix of voxels by TRs." + "Convert data to a list of arrays matching SRM input. Each element is a matrix of voxels by TRs. Also, concatenate data from both hemispheres in the brain." ] }, { @@ -151,7 +158,9 @@ }, "outputs": [], "source": [ - "image_data = list(image_data['image_data_lh'])" + "image_data = []\n", + "for s in range(subjects):\n", + " image_data.append(np.concatenate([image_data_left[:, :, s], image_data_right[:, :, s]], axis=0))" ] }, { diff --git a/examples/funcalign/srm_image_prediction_example.py b/examples/funcalign/srm_image_prediction_example.py index c8e69bf0d..52e2011c1 100644 --- a/examples/funcalign/srm_image_prediction_example.py +++ b/examples/funcalign/srm_image_prediction_example.py @@ -16,21 +16,26 @@ from sklearn.metrics import confusion_matrix from sklearn.svm import NuSVC import numpy as np +import brainiak.funcalign.srm # Load the input data that contains the movie stimuli for unsupervised training with SRM -movie_data = scipy.io.loadmat('data/movie_data.mat') +movie_file = scipy.io.loadmat('data/movie_data.mat') +movie_data_left = movie_file['movie_data_lh'] +movie_data_right = movie_file['movie_data_rh'] +subjects = movie_data_left.shape[2] # Convert data to a list of arrays matching SRM input. # Each element is a matrix of voxels by TRs. -movie_data = list(movie_data['movie_data_lh']) -subjects = len(movie_data) +# Also, concatenate data from both hemispheres in the brain. +movie_data = [] +for s in range(subjects): + movie_data.append(np.concatenate([movie_data_left[:, :, s], movie_data_right[:, :, s]], axis=0)) # Z-score the data for subject in range(subjects): movie_data[subject] = stats.zscore(movie_data[subject],axis=1,ddof=1) # Run SRM with the movie data -import brainiak.funcalign.srm help(brainiak.funcalign.srm.SRM) srm = brainiak.funcalign.srm.SRM(n_iter=10, features=50, verbose=True) srm.fit(movie_data) @@ -61,10 +66,16 @@ def plot_confusion_matrix(cm, title="Confusion Matrix"): plt.show() # Load the input data that contains the image stimuli and its labels for training a classifier -image_data = scipy.io.loadmat('data/image_data.mat') +image_file = scipy.io.loadmat('data/image_data.mat') +image_data_left = image_file['image_data_lh'] +image_data_right = image_file['image_data_rh'] + # Convert data to a list of arrays matching SRM input. # Each element is a matrix of voxels by TRs. -image_data = list(image_data['image_data_lh']) +# Also, concatenate data from both hemispheres in the brain. +image_data = [] +for s in range(subjects): + image_data.append(np.concatenate([image_data_left[:, :, s], image_data_right[:, :, s]], axis=0)) assert image_data[0].shape[0] == movie_data[0].shape[0], "Number of voxels in movie data and image data do not match!" diff --git a/examples/funcalign/sssrm_image_prediction_example.py b/examples/funcalign/sssrm_image_prediction_example.py new file mode 100644 index 000000000..71e316d9a --- /dev/null +++ b/examples/funcalign/sssrm_image_prediction_example.py @@ -0,0 +1,95 @@ +# Copyright 2016 Intel Corporation +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +import scipy.io +from scipy.stats import stats +import numpy as np + +# Define the Theano flags to use cpu and float64 before theano is imported in brainiak +import os +os.environ['THEANO_FLAGS'] = 'device=cpu, floatX=float64' + +import brainiak.funcalign.sssrm + + + +# Load the input data that contains the movie stimuli for unsupervised training with SS-SRM +movie_file = scipy.io.loadmat('data/movie_data.mat') +movie_data_left = movie_file['movie_data_lh'] +movie_data_right = movie_file['movie_data_rh'] +subjects = movie_data_left.shape[2] + +# Load the input data that contains the image stimuli and its labels for training a classifier +image_file = scipy.io.loadmat('data/image_data.mat') +image_data_left = image_file['image_data_lh'] +image_data_right = image_file['image_data_rh'] + +# Merge the two hemispheres into one piece of data and +# convert data to a list of arrays matching SS-SRM input. +# Each element is a matrix of voxels by TRs_i. +image_data = [] +movie_data = [] +for s in range(subjects): + image_data.append(np.concatenate([image_data_left[:, :, s], image_data_right[:, :, s]], axis=0)) + movie_data.append(np.concatenate([movie_data_left[:, :, s], movie_data_right[:, :, s]], axis=0)) + +# Read the labels of the image data for training the classifier. +labels = scipy.io.loadmat('data/label.mat') +labels = np.squeeze(labels['label']) +image_samples = labels.size + +# Z-score the data +for subject in range(subjects): + image_data[subject] = stats.zscore(image_data[subject], axis=1, ddof=1) + movie_data[subject] = stats.zscore(movie_data[subject], axis=1, ddof=1) + + +# Run cross validation on the blocks of image stimuli (leave one block out) +# Note: There are 8 blocks of 7 samples (TRs) each +print("Running cross-validation with SS-SRM... (this may take a while)") +accuracy = np.zeros((8,)) +for block in range(8): + print("Block ", block) + + # Create masks with the train and validation samples + idx_validation = np.zeros((image_samples,), dtype=bool) + idx_validation[block*7:(block+1)*7] = True + idx_train = np.ones((image_samples,), dtype=bool) + idx_train[block*7:(block+1)*7] = False + + # Divide the samples and labels in train and validation sets + image_data_train = [None] * subjects + labels_train = [None] * subjects + image_data_validation = [None] * subjects + labels_validation = [None] * subjects + for s in range(subjects): + image_data_train[s] = image_data[s][:, idx_train] + labels_train[s] = labels[idx_train] + image_data_validation[s] = image_data[s][:, idx_validation] + labels_validation[s] = labels[idx_validation] + + # Run SS-SRM with the movie data and training image data + model = brainiak.funcalign.sssrm.SSSRM(n_iter=10, features=50, gamma=1.0, cost=0.2, verbose=True) + model.fit(movie_data, labels_train, image_data_train) + + # Predict on the validation samples and check results + prediction = model.predict(image_data_validation) + predicted = 0 + total_predicted = 0 + for s in range(subjects): + predicted += sum(prediction[s] == labels_validation[s]) + total_predicted += prediction[s].size + accuracy[block] = predicted/total_predicted + print("Accuracy for this block: ",accuracy[block]) + +print("SS-SRM: The average accuracy among all subjects is {0:f} +/- {1:f}".format(np.mean(accuracy), np.std(accuracy))) diff --git a/setup.py b/setup.py index b2757fad0..94d9988f6 100644 --- a/setup.py +++ b/setup.py @@ -109,6 +109,8 @@ def build_extensions(self): 'numpy', 'scikit-learn', 'scipy', + 'pymanopt', + 'theano', 'pybind11>=1.7', ], author='Princeton Neuroscience Institute and Intel Corporation', diff --git a/tests/funcalign/test_sssrm.py b/tests/funcalign/test_sssrm.py new file mode 100644 index 000000000..427e31f52 --- /dev/null +++ b/tests/funcalign/test_sssrm.py @@ -0,0 +1,172 @@ +# Copyright 2016 Intel Corporation +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +from sklearn.utils.validation import NotFittedError +import pytest + +def test_sssrm(): + import os + os.environ['THEANO_FLAGS'] = 'device=cpu, floatX=float64' + + import numpy as np + import brainiak.funcalign.sssrm + + model = brainiak.funcalign.sssrm.SSSRM() + assert model, "Invalid SSSRM instance!" + + voxels = 100 + align_samples = 400 + samples = 500 + subjects = 2 + features = 3 + n_labels = 4 + + model = brainiak.funcalign.sssrm.SSSRM(verbose=True, n_iter=5, features=features, gamma=10.0, cost=0.1) + assert model, "Invalid SSSRM instance!" + + # Create a Shared response S with K = 3 + theta = np.linspace(-4 * np.pi, 4 * np.pi, samples) + z = np.linspace(-2, 2, samples) + r = z**2 + 1 + x = r * np.sin(theta) + y = r * np.cos(theta) + + S = np.vstack((x, y, z)) + S_align = S[:, :align_samples] + S_classify = S[:, align_samples:] + X = [] + Z = [] + Z2 = [] + W = [] + y = [] + Q, R = np.linalg.qr(np.random.random((voxels, features))) + W.append(Q) + X.append(Q.dot(S_align) + 0.1 * np.random.random((voxels, align_samples))) + Z.append(Q.dot(S_classify) + 0.1 * np.random.random((voxels, samples - align_samples))) + Z2.append(Q.dot(S_classify) + 0.1 * np.random.random((voxels, samples - align_samples))) + y.append(np.repeat(np.arange(n_labels), (samples - align_samples)/n_labels)) + + # Check that transform does NOT run before fitting the model + with pytest.raises(NotFittedError) as excinfo: + model.transform(X) + print("Test: transforming before fitting the model") + + # Check that predict does NOT run before fitting the model + with pytest.raises(NotFittedError) as excinfo: + model.predict(X) + print("Test: predicting before fitting the model") + + # Check that it does NOT run with 1 subject on X + with pytest.raises(ValueError) as excinfo: + model.fit(X, y, Z) + print("Test: running SSSRM with 1 subject (alignment)") + + # Create more subjects align and classification data + for subject in range(1, subjects): + Q, R = np.linalg.qr(np.random.random((voxels, features))) + W.append(Q) + X.append(Q.dot(S_align) + 0.1 * np.random.random((voxels, align_samples))) + Z2.append(Q.dot(S_classify) + 0.1 * np.random.random((voxels, samples - align_samples))) + + # Check that it does NOT run with 1 subject on y + with pytest.raises(ValueError) as excinfo: + model.fit(X, y, Z) + print("Test: running SSSRM with 1 subject (labels)") + + # Create more subjects labels data + for subject in range(1, subjects): + y.append(np.repeat(np.arange(n_labels), (samples - align_samples)/n_labels)) + + # Check that it does NOT run with 1 subject on Z + with pytest.raises(ValueError) as excinfo: + model.fit(X, y, Z) + print("Test: running SSSRM with 1 subject (classif.)") + + # Check that cost is in (0,1) range + model_bad = brainiak.funcalign.sssrm.SSSRM(verbose=False, n_iter=1, features=features, gamma=10.0, cost=1.5) + assert model_bad, "Invalid SSSRM instance!" + with pytest.raises(ValueError) as excinfo: + model_bad.fit(X, y, Z) + print("Test: running SSSRM with wrong cost") + + # Check that gamma is positive + model_bad = brainiak.funcalign.sssrm.SSSRM(verbose=False, n_iter=1, features=features, gamma=-0.1, cost=0.2) + assert model_bad, "Invalid SSSRM instance!" + with pytest.raises(ValueError) as excinfo: + model_bad.fit(X, y, Z) + print("Test: running SSSRM with wrong gamma") + + # Check that runs with 2 subject + model.fit(X, y, Z2) + print("Test: fitting SSSRM successfully") + + assert len(model.w_) == subjects, "Invalid computation of SSSRM! (wrong # subjects in W)" + for subject in range(subjects): + assert model.w_[subject].shape[0] == voxels, "Invalid computation of SSSRM! (wrong # voxels in W)" + assert model.w_[subject].shape[1] == features, "Invalid computation of SSSRM! (wrong # features in W)" + ortho = np.linalg.norm(model.w_[subject].T.dot(model.w_[subject]) - np.eye(model.w_[subject].shape[1]), 'fro') + assert ortho < 1e-7, "A Wi mapping is not orthonormal in SSSRM." + assert model.s_.shape[0] == features, "Invalid computation of SSSRM! (wrong # features in S)" + assert model.s_.shape[1] == align_samples, "Invalid computation of SSSRM! (wrong # samples in S)" + + # Check that it does run to compute the shared response after the model computation + new_s = model.transform(X) + print("Test: transforming with SSSRM successfully") + + assert len(new_s) == subjects, "Invalid computation of SSSRM! (wrong # subjects after transform)" + for subject in range(subjects): + assert new_s[subject].shape[0] == features, "Invalid computation of SSSRM! (wrong # features after transform)" + assert new_s[subject].shape[1] == align_samples, "Invalid computation of SSSRM! (wrong # samples after transform)" + + # Check that it predicts with the model + pred = model.predict(Z2) + print("Test: predicting with SSSRM successfully") + assert len(pred) == subjects, "Invalid computation of SSSRM! (wrong # subjects after predict)" + for subject in range(subjects): + assert pred[subject].size == samples - align_samples, "SSSRM: wrong # answers in predict" + pred_labels = np.logical_and(pred[subject] >=0, pred[subject] < n_labels) + assert np.all(pred_labels), "SSSRM: wrong class number output in predict" + + # Check that it does NOT run with non-matching number of subjects + with pytest.raises(ValueError) as excinfo: + model.transform(X[1]) + print("Test: transforming with non-matching number of subjects") + + # Check that it does not run without enough samples (TRs). + with pytest.raises(ValueError) as excinfo: + model.set_params(features=(align_samples + 1)) + model.fit(X, y, Z2) + print("Test: not enough samples") + + # Check that it does not run with different number of samples (TRs) + X2 = X + X2[0] = Q.dot(S[:,:-2]) + with pytest.raises(ValueError) as excinfo: + model.fit(X2, y, Z2) + print("Test: different number of samples per subject") + + # Create one more subject + Q, R = np.linalg.qr(np.random.random((voxels, features))) + W.append(Q) + X.append(Q.dot(S_align) + 0.1 * np.random.random((voxels, align_samples))) + Z2.append(Q.dot(S_classify) + 0.1 * np.random.random((voxels, samples - align_samples))) + + # Check that it does not run with different number of subjects in each input + with pytest.raises(ValueError) as excinfo: + model.fit(X, y, Z2) + print("Test: different number of subjects in the inputs") + + y.append(np.repeat(np.arange(n_labels), (samples - align_samples)/n_labels)) + with pytest.raises(ValueError) as excinfo: + model.fit(X, y, Z) + print("Test: different number of subjects in the inputs") diff --git a/tests/utils/test_utils.py b/tests/utils/test_utils.py index dc71daf06..b94649eb9 100644 --- a/tests/utils/test_utils.py +++ b/tests/utils/test_utils.py @@ -42,3 +42,29 @@ def test_fast_inv(): with pytest.raises(np.linalg.linalg.LinAlgError) as excinfo: fast_inv(a) assert "Last 2 dimensions of the array must be square" in str(excinfo.value) + + +def test_sumexp(): + from brainiak.utils.utils import sumexp_stable + import numpy as np + + data = np.array([[1, 1],[0, 1]]) + sums, maxs, exps = sumexp_stable(data) + assert sums.size == data.shape[1], "Invalid sum(exp(v)) computation (wrong # samples in sums)" + assert exps.shape[0] == data.shape[0], "Invalid exp(v) computation (wrong # features)" + assert exps.shape[1] == data.shape[1], "Invalid exp(v) computation (wrong # samples)" + assert maxs.size == data.shape[1], "Invalid max computation (wrong # samples in maxs)" + + +def test_concatenate_list(): + from brainiak.utils.utils import concatenate_list + import numpy as np + l = [None] * 5 + + l[1] = np.array([0, 1, 2]) + l[3] = np.array([3, 4]) + try: + r = concatenate_list(l, axis=0) + except: + assert True, "Could not concatenate a list" + assert np.all(np.arange(5) == r), "Invalid concatenation of a list of arrays" \ No newline at end of file From 0c4cfee31f820f80f2883e8a37a0af07803024f7 Mon Sep 17 00:00:00 2001 From: Javier Turek Date: Tue, 20 Sep 2016 13:58:16 -0700 Subject: [PATCH 02/14] Resolved function complexity check --- brainiak/funcalign/sssrm.py | 32 +++++++++++--------------------- 1 file changed, 11 insertions(+), 21 deletions(-) diff --git a/brainiak/funcalign/sssrm.py b/brainiak/funcalign/sssrm.py index d4cf52539..e9f7c846e 100644 --- a/brainiak/funcalign/sssrm.py +++ b/brainiak/funcalign/sssrm.py @@ -142,8 +142,8 @@ def fit(self, X, y, Z): for training the MLR classifier. """ - if self.verbose: - logger.info('Starting SS-SRM') + + logger.info('Starting SS-SRM') # Check that the cost value is in range (0.0,1.0) if 0.0 >= self.cost or self.cost >= 1.0: @@ -154,19 +154,12 @@ def fit(self, X, y, Z): raise ValueError("Gamma parameter should be positive.") # Check the number of subjects - if len(X) <= 1: - raise ValueError("There are not enough subjects in the alignment " - "data ({0:d}) to train the model.".format(len(X))) - if len(y) <= 1: - raise ValueError("There are not enough subjects in the labels y " - "({0:d}) to train the model.".format(len(y))) - - if len(Z) <= 1: - raise ValueError("There are not enough subjects in the labeled Z " - "data ({0:d}) to train the model.".format(len(Z))) + if len(X) <= 1 or len(y) <= 1 or len(Z) <= 1: + raise ValueError("There are not enough subjects in the input " + "data to train the model.") if not (len(X) == len(y)) or not (len(X) == len(Z)): - raise ValueError("Different number of subjects in data") + raise ValueError("Different number of subjects in data.") # Check for input data sizes if X[0].shape[1] < self.features: @@ -175,19 +168,17 @@ def fit(self, X, y, Z): "{0:d} features.".format(self.features)) # Check if all subjects have same number of TRs for alignment + # and if alignment and classification data have the same number of + # voxels per subject. Also check that there labels for all the classif. + # sample number_trs = X[0].shape[1] number_subjects = len(X) for subject in range(number_subjects): assert_all_finite(X[subject]) + assert_all_finite(Z[subject]) if X[subject].shape[1] != number_trs: raise ValueError("Different number of alignment samples " "between subjects.") - - # Check if alignment and classification data have the same number of - # voxels per subject. Also check that there labels for all the classif. - # sample - for subject in range(number_subjects): - assert_all_finite(Z[subject]) if X[subject].shape[0] != Z[subject].shape[0]: raise ValueError("Different number of voxels between alignment" " and classification data (subject {0:d})" @@ -347,8 +338,7 @@ def _sssrm(self, data_align, data_sup, labels): # Main loop: for iteration in range(self.n_iter): - if self.verbose: - logger.info('Iteration %d' % (iteration + 1)) + logger.info('Iteration %d' % (iteration + 1)) # Update the mappings Wi w = self._update_w(data_align, data_sup, labels, w, s, theta, bias) From 17bfb287015c2ae2d601933a656e81a4bb64363a Mon Sep 17 00:00:00 2001 From: Javier Turek Date: Tue, 20 Sep 2016 14:09:40 -0700 Subject: [PATCH 03/14] remove the verbose flag --- brainiak/funcalign/sssrm.py | 18 ++++++------------ 1 file changed, 6 insertions(+), 12 deletions(-) diff --git a/brainiak/funcalign/sssrm.py b/brainiak/funcalign/sssrm.py index e9f7c846e..3285b1d95 100644 --- a/brainiak/funcalign/sssrm.py +++ b/brainiak/funcalign/sssrm.py @@ -80,9 +80,6 @@ class SSSRM(BaseEstimator, ClassifierMixin, TransformerMixin): rand_seed : int, default: 0 Seed for initializing the random number generator. - verbose : boolean, default: False - Verbose mode flag. - Attributes ---------- @@ -114,13 +111,12 @@ class SSSRM(BaseEstimator, ClassifierMixin, TransformerMixin): """ def __init__(self, n_iter=10, features=50, gamma=1.0, cost=0.5, - rand_seed=0, verbose=False): + rand_seed=0): self.n_iter = n_iter self.features = features self.gamma = gamma self.cost = cost self.rand_seed = rand_seed - self.verbose = verbose return def fit(self, X, y, Z): @@ -142,7 +138,6 @@ def fit(self, X, y, Z): for training the MLR classifier. """ - logger.info('Starting SS-SRM') # Check that the cost value is in range (0.0,1.0) @@ -331,7 +326,7 @@ def _sssrm(self, data_align, data_sup, labels): theta, bias = self._update_classifier(data_sup, labels, w, classes) # calculate and print the objective function - if self.verbose: + if logger.isEnabledFor(20): objective = self._objective_function(data_align, data_sup, labels, w, s, theta, bias) logger.info('Objective function %f' % objective) @@ -344,7 +339,7 @@ def _sssrm(self, data_align, data_sup, labels): w = self._update_w(data_align, data_sup, labels, w, s, theta, bias) # Output the objective function - if self.verbose: + if logger.isEnabledFor(20): objective = self._objective_function(data_align, data_sup, labels, w, s, theta, bias) logger.info('Objective function after updating Wi %f' @@ -354,7 +349,7 @@ def _sssrm(self, data_align, data_sup, labels): s = SSSRM._compute_shared_response(data_align, w) # Output the objective function - if self.verbose: + if logger.isEnabledFor(20): objective = self._objective_function(data_align, data_sup, labels, w, s, theta, bias) logger.info('Objective function after updating S %f' @@ -364,7 +359,7 @@ def _sssrm(self, data_align, data_sup, labels): theta, bias = self._update_classifier(data_sup, labels, w, classes) # Output the objective function - if self.verbose: + if logger.isEnabledFor(20): objective = self._objective_function(data_align, data_sup, labels, w, s, theta, bias) logger.info('Objective function after updating MLR %f' @@ -485,8 +480,7 @@ def _update_w(self, data_align, data_sup, labels, w, s, theta, bias): broadcastable=(True, False)) for subject in range(subjects): - if self.verbose: - logger.info('Subject Wi %d' % subject) + logger.info('Subject Wi %d' % subject) # Solve for subject i # Create the theano function w_th = T.matrix(name='W', dtype=theano.config.floatX) From a008581bc684ad549ccdb759147000a120a222f0 Mon Sep 17 00:00:00 2001 From: Javier Turek Date: Tue, 20 Sep 2016 14:58:17 -0700 Subject: [PATCH 04/14] Remove verbose flag from test --- tests/funcalign/test_sssrm.py | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/tests/funcalign/test_sssrm.py b/tests/funcalign/test_sssrm.py index 427e31f52..329b058dc 100644 --- a/tests/funcalign/test_sssrm.py +++ b/tests/funcalign/test_sssrm.py @@ -31,7 +31,7 @@ def test_sssrm(): features = 3 n_labels = 4 - model = brainiak.funcalign.sssrm.SSSRM(verbose=True, n_iter=5, features=features, gamma=10.0, cost=0.1) + model = brainiak.funcalign.sssrm.SSSRM(n_iter=5, features=features, gamma=10.0, cost=0.1) assert model, "Invalid SSSRM instance!" # Create a Shared response S with K = 3 @@ -93,14 +93,14 @@ def test_sssrm(): print("Test: running SSSRM with 1 subject (classif.)") # Check that cost is in (0,1) range - model_bad = brainiak.funcalign.sssrm.SSSRM(verbose=False, n_iter=1, features=features, gamma=10.0, cost=1.5) + model_bad = brainiak.funcalign.sssrm.SSSRM(n_iter=1, features=features, gamma=10.0, cost=1.5) assert model_bad, "Invalid SSSRM instance!" with pytest.raises(ValueError) as excinfo: model_bad.fit(X, y, Z) print("Test: running SSSRM with wrong cost") # Check that gamma is positive - model_bad = brainiak.funcalign.sssrm.SSSRM(verbose=False, n_iter=1, features=features, gamma=-0.1, cost=0.2) + model_bad = brainiak.funcalign.sssrm.SSSRM(n_iter=1, features=features, gamma=-0.1, cost=0.2) assert model_bad, "Invalid SSSRM instance!" with pytest.raises(ValueError) as excinfo: model_bad.fit(X, y, Z) From b9c246eb5ffb9498da09ee3ddb8fd5c04f7497ad Mon Sep 17 00:00:00 2001 From: Javier Turek Date: Tue, 20 Sep 2016 15:10:34 -0700 Subject: [PATCH 05/14] Added test for data fit of the model --- tests/funcalign/test_sssrm.py | 2 ++ 1 file changed, 2 insertions(+) diff --git a/tests/funcalign/test_sssrm.py b/tests/funcalign/test_sssrm.py index 329b058dc..f218f5562 100644 --- a/tests/funcalign/test_sssrm.py +++ b/tests/funcalign/test_sssrm.py @@ -116,6 +116,8 @@ def test_sssrm(): assert model.w_[subject].shape[1] == features, "Invalid computation of SSSRM! (wrong # features in W)" ortho = np.linalg.norm(model.w_[subject].T.dot(model.w_[subject]) - np.eye(model.w_[subject].shape[1]), 'fro') assert ortho < 1e-7, "A Wi mapping is not orthonormal in SSSRM." + difference = np.linalg.norm(X[subject] - model.w_[subject].dot(model.s_), 'fro') + assert difference < 1e-2, "Model seems incorrectly computed." assert model.s_.shape[0] == features, "Invalid computation of SSSRM! (wrong # features in S)" assert model.s_.shape[1] == align_samples, "Invalid computation of SSSRM! (wrong # samples in S)" From faf7fba0248e3c241052bdac87690147a6064a7a Mon Sep 17 00:00:00 2001 From: Javier Turek Date: Tue, 20 Sep 2016 15:31:35 -0700 Subject: [PATCH 06/14] Removed the verbose flag from example. --- examples/funcalign/sssrm_image_prediction_example.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/examples/funcalign/sssrm_image_prediction_example.py b/examples/funcalign/sssrm_image_prediction_example.py index 71e316d9a..2b0406413 100644 --- a/examples/funcalign/sssrm_image_prediction_example.py +++ b/examples/funcalign/sssrm_image_prediction_example.py @@ -79,7 +79,7 @@ labels_validation[s] = labels[idx_validation] # Run SS-SRM with the movie data and training image data - model = brainiak.funcalign.sssrm.SSSRM(n_iter=10, features=50, gamma=1.0, cost=0.2, verbose=True) + model = brainiak.funcalign.sssrm.SSSRM(n_iter=10, features=50, gamma=1.0, cost=0.2) model.fit(movie_data, labels_train, image_data_train) # Predict on the validation samples and check results From 032bea28515c217328f7b3aade9f313529a81cd6 Mon Sep 17 00:00:00 2001 From: Javier Turek Date: Tue, 20 Sep 2016 15:36:34 -0700 Subject: [PATCH 07/14] modify 20 by INFO constant --- brainiak/funcalign/sssrm.py | 8 ++++---- tests/funcalign/test_sssrm.py | 3 ++- 2 files changed, 6 insertions(+), 5 deletions(-) diff --git a/brainiak/funcalign/sssrm.py b/brainiak/funcalign/sssrm.py index 3285b1d95..b5bbd0327 100644 --- a/brainiak/funcalign/sssrm.py +++ b/brainiak/funcalign/sssrm.py @@ -326,7 +326,7 @@ def _sssrm(self, data_align, data_sup, labels): theta, bias = self._update_classifier(data_sup, labels, w, classes) # calculate and print the objective function - if logger.isEnabledFor(20): + if logger.isEnabledFor(logging.INFO): objective = self._objective_function(data_align, data_sup, labels, w, s, theta, bias) logger.info('Objective function %f' % objective) @@ -339,7 +339,7 @@ def _sssrm(self, data_align, data_sup, labels): w = self._update_w(data_align, data_sup, labels, w, s, theta, bias) # Output the objective function - if logger.isEnabledFor(20): + if logger.isEnabledFor(logging.INFO): objective = self._objective_function(data_align, data_sup, labels, w, s, theta, bias) logger.info('Objective function after updating Wi %f' @@ -349,7 +349,7 @@ def _sssrm(self, data_align, data_sup, labels): s = SSSRM._compute_shared_response(data_align, w) # Output the objective function - if logger.isEnabledFor(20): + if logger.isEnabledFor(logging.INFO): objective = self._objective_function(data_align, data_sup, labels, w, s, theta, bias) logger.info('Objective function after updating S %f' @@ -359,7 +359,7 @@ def _sssrm(self, data_align, data_sup, labels): theta, bias = self._update_classifier(data_sup, labels, w, classes) # Output the objective function - if logger.isEnabledFor(20): + if logger.isEnabledFor(logging.INFO): objective = self._objective_function(data_align, data_sup, labels, w, s, theta, bias) logger.info('Objective function after updating MLR %f' diff --git a/tests/funcalign/test_sssrm.py b/tests/funcalign/test_sssrm.py index f218f5562..091195ef9 100644 --- a/tests/funcalign/test_sssrm.py +++ b/tests/funcalign/test_sssrm.py @@ -117,7 +117,8 @@ def test_sssrm(): ortho = np.linalg.norm(model.w_[subject].T.dot(model.w_[subject]) - np.eye(model.w_[subject].shape[1]), 'fro') assert ortho < 1e-7, "A Wi mapping is not orthonormal in SSSRM." difference = np.linalg.norm(X[subject] - model.w_[subject].dot(model.s_), 'fro') - assert difference < 1e-2, "Model seems incorrectly computed." + datanorm = np.linalg.norm(X[subject], 'fro') + assert difference/datanorm < 1.0, "Model seems incorrectly computed." assert model.s_.shape[0] == features, "Invalid computation of SSSRM! (wrong # features in S)" assert model.s_.shape[1] == align_samples, "Invalid computation of SSSRM! (wrong # samples in S)" From f0c040eec92d44a267669267fff85ec563876f22 Mon Sep 17 00:00:00 2001 From: Javier Turek Date: Tue, 20 Sep 2016 15:42:49 -0700 Subject: [PATCH 08/14] added logging to test to include missing coverage --- tests/funcalign/test_sssrm.py | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/tests/funcalign/test_sssrm.py b/tests/funcalign/test_sssrm.py index 091195ef9..ba5d15f71 100644 --- a/tests/funcalign/test_sssrm.py +++ b/tests/funcalign/test_sssrm.py @@ -106,6 +106,10 @@ def test_sssrm(): model_bad.fit(X, y, Z) print("Test: running SSSRM with wrong gamma") + # Set the logging level to INFO + import logging + logging.basicConfig(level=logging.INFO) + # Check that runs with 2 subject model.fit(X, y, Z2) print("Test: fitting SSSRM successfully") From 344cdd4b16ef4d69efefa7f03a6e9742ecff8800 Mon Sep 17 00:00:00 2001 From: Javier Turek Date: Tue, 20 Sep 2016 16:04:18 -0700 Subject: [PATCH 09/14] Corrected minor doc errors --- brainiak/funcalign/sssrm.py | 2 ++ brainiak/utils/utils.py | 5 ++--- 2 files changed, 4 insertions(+), 3 deletions(-) diff --git a/brainiak/funcalign/sssrm.py b/brainiak/funcalign/sssrm.py index b5bbd0327..b37f460c7 100644 --- a/brainiak/funcalign/sssrm.py +++ b/brainiak/funcalign/sssrm.py @@ -59,7 +59,9 @@ class SSSRM(BaseEstimator, ClassifierMixin, TransformerMixin): l2 regularization) in a semi-supervised manner: .. math:: X_i \\approx W_i S ,~for~all~i=1\dots N + and + .. math:: f(W_i^T Z_i)=y_i ,~for~all~i=1\dots N Parameters diff --git a/brainiak/utils/utils.py b/brainiak/utils/utils.py index 1cccd0e0f..70c8d545d 100644 --- a/brainiak/utils/utils.py +++ b/brainiak/utils/utils.py @@ -139,10 +139,10 @@ def sumexp_stable(data): def concatenate_list(l, axis=0): """Construct a numpy array by stacking arrays in a list - Parameter + Parameters ---------- - data : list of arrays, arrays have same shape in all but one dimenstion or + data : list of arrays, arrays have same shape in all but one dimension or elements are None The list of arrays to be concatenated. @@ -155,7 +155,6 @@ def concatenate_list(l, axis=0): data_stacked : array The resulting concatenated array. - """ # Get the indexes of the arrays in the list mask = [] From 92a55a32d6ede93f9c6bb7c40ea7dfcc9731844b Mon Sep 17 00:00:00 2001 From: Javier Turek Date: Thu, 22 Sep 2016 10:43:57 -0700 Subject: [PATCH 10/14] modified pr-check for theano correct testing --- pr-check.sh | 1 + 1 file changed, 1 insertion(+) diff --git a/pr-check.sh b/pr-check.sh index 27ed1f4f9..4c9793f31 100755 --- a/pr-check.sh +++ b/pr-check.sh @@ -122,6 +122,7 @@ pip3 install $ignore_installed -U -e . || \ # build documentation cd docs +export THEANO_FLAGS='device=cpu,floatX=float64' git clean -Xf . if [ ! -z $SLURM_NODELIST ] then From 18051d6646d098fa91012af045d3d7644451f2a2 Mon Sep 17 00:00:00 2001 From: Javier Turek Date: Wed, 28 Sep 2016 16:51:52 -0700 Subject: [PATCH 11/14] Divided tests --- tests/funcalign/test_sssrm.py | 64 ++++++++++++++++++++++++++++++++--- 1 file changed, 59 insertions(+), 5 deletions(-) diff --git a/tests/funcalign/test_sssrm.py b/tests/funcalign/test_sssrm.py index ba5d15f71..ea48a61e4 100644 --- a/tests/funcalign/test_sssrm.py +++ b/tests/funcalign/test_sssrm.py @@ -11,19 +11,25 @@ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. -from sklearn.utils.validation import NotFittedError import pytest -def test_sssrm(): +def test_instance(): import os os.environ['THEANO_FLAGS'] = 'device=cpu, floatX=float64' - - import numpy as np import brainiak.funcalign.sssrm model = brainiak.funcalign.sssrm.SSSRM() assert model, "Invalid SSSRM instance!" + +def test_wrong_input(): + import os + os.environ['THEANO_FLAGS'] = 'device=cpu, floatX=float64' + + from sklearn.utils.validation import NotFittedError + import numpy as np + import brainiak.funcalign.sssrm + voxels = 100 align_samples = 400 samples = 500 @@ -106,6 +112,55 @@ def test_sssrm(): model_bad.fit(X, y, Z) print("Test: running SSSRM with wrong gamma") + +def test_sssrm(): + import os + os.environ['THEANO_FLAGS'] = 'device=cpu, floatX=float64' + + from sklearn.utils.validation import NotFittedError + import numpy as np + import brainiak.funcalign.sssrm + + voxels = 100 + align_samples = 400 + samples = 500 + subjects = 2 + features = 3 + n_labels = 4 + + model = brainiak.funcalign.sssrm.SSSRM(n_iter=5, features=features, gamma=10.0, cost=0.1) + assert model, "Invalid SSSRM instance!" + + # Create a Shared response S with K = 3 + theta = np.linspace(-4 * np.pi, 4 * np.pi, samples) + z = np.linspace(-2, 2, samples) + r = z**2 + 1 + x = r * np.sin(theta) + y = r * np.cos(theta) + + S = np.vstack((x, y, z)) + S_align = S[:, :align_samples] + S_classify = S[:, align_samples:] + X = [] + Z = [] + Z2 = [] + y = [] + Q, R = np.linalg.qr(np.random.random((voxels, features))) + X.append(Q.dot(S_align) + 0.1 * np.random.random((voxels, align_samples))) + Z.append(Q.dot(S_classify) + 0.1 * np.random.random((voxels, samples - align_samples))) + Z2.append(Q.dot(S_classify) + 0.1 * np.random.random((voxels, samples - align_samples))) + y.append(np.repeat(np.arange(n_labels), (samples - align_samples)/n_labels)) + + # Create more subjects align and classification data + for subject in range(1, subjects): + Q, R = np.linalg.qr(np.random.random((voxels, features))) + X.append(Q.dot(S_align) + 0.1 * np.random.random((voxels, align_samples))) + Z2.append(Q.dot(S_classify) + 0.1 * np.random.random((voxels, samples - align_samples))) + + # Create more subjects labels data + for subject in range(1, subjects): + y.append(np.repeat(np.arange(n_labels), (samples - align_samples)/n_labels)) + # Set the logging level to INFO import logging logging.basicConfig(level=logging.INFO) @@ -164,7 +219,6 @@ def test_sssrm(): # Create one more subject Q, R = np.linalg.qr(np.random.random((voxels, features))) - W.append(Q) X.append(Q.dot(S_align) + 0.1 * np.random.random((voxels, align_samples))) Z2.append(Q.dot(S_classify) + 0.1 * np.random.random((voxels, samples - align_samples))) From d59cdf460ec62165e787ac9add0c6a950bf89873 Mon Sep 17 00:00:00 2001 From: Javier Turek Date: Fri, 30 Sep 2016 10:18:18 -0700 Subject: [PATCH 12/14] Modified documentations --- brainiak/funcalign/sssrm.py | 38 ++++++++++++++++++------------------- 1 file changed, 19 insertions(+), 19 deletions(-) diff --git a/brainiak/funcalign/sssrm.py b/brainiak/funcalign/sssrm.py index b37f460c7..2e25dc604 100644 --- a/brainiak/funcalign/sssrm.py +++ b/brainiak/funcalign/sssrm.py @@ -58,11 +58,11 @@ class SSSRM(BaseEstimator, ClassifierMixin, TransformerMixin): data to train a Multinomial Logistic Regression (MLR) classifier (with l2 regularization) in a semi-supervised manner: - .. math:: X_i \\approx W_i S ,~for~all~i=1\dots N + .. math:: (1-\alpha) Loss_{SRM}(W_i,S;X_i) + .. math:: + \alpha/\gamma Loss_{MLR}(\theta, bias; {(W_i^T*Z_i, y_i}) + .. math:: + R(\theta) - and - - .. math:: f(W_i^T Z_i)=y_i ,~for~all~i=1\dots N + (see Equations (1) and (4) in [Turek2016]_). Parameters ---------- @@ -76,7 +76,7 @@ class SSSRM(BaseEstimator, ClassifierMixin, TransformerMixin): gamma : float, default: 1.0 Regularization parameter for the classifier. - cost : float, default: 0.5 + alpha : float, default: 0.5 Balance parameter between the SRM term and the MLR term. rand_seed : int, default: 0 @@ -112,12 +112,12 @@ class SSSRM(BaseEstimator, ClassifierMixin, TransformerMixin): This is a single node version. """ - def __init__(self, n_iter=10, features=50, gamma=1.0, cost=0.5, + def __init__(self, n_iter=10, features=50, gamma=1.0, alpha=0.5, rand_seed=0): self.n_iter = n_iter self.features = features self.gamma = gamma - self.cost = cost + self.alpha = alpha self.rand_seed = rand_seed return @@ -142,9 +142,9 @@ def fit(self, X, y, Z): """ logger.info('Starting SS-SRM') - # Check that the cost value is in range (0.0,1.0) - if 0.0 >= self.cost or self.cost >= 1.0: - raise ValueError("Cost parameter should be in range (0.0, 1.0)") + # Check that the alpha value is in range (0.0,1.0) + if 0.0 >= self.alpha or self.alpha >= 1.0: + raise ValueError("Alpha parameter should be in range (0.0, 1.0)") # Check that the regularizer value is positive if 0.0 >= self.gamma: @@ -412,7 +412,7 @@ def _update_classifier(self, data, labels, w, classes): total_samples_S = S.shared(total_samples) theta_th = T.matrix(name='theta', dtype=theano.config.floatX) bias_th = T.col(name='bias', dtype=theano.config.floatX) - constf2 = S.shared(self.cost / self.gamma, allow_downcast=True) + constf2 = S.shared(self.alpha / self.gamma, allow_downcast=True) weights_th = S.shared(weights) log_p_y_given_x = \ @@ -489,7 +489,7 @@ def _update_w(self, data_align, data_sup, labels, w, s, theta, bias): data_srm_subject = \ S.shared(data_align[subject].astype(theano.config.floatX)) constf1 = \ - S.shared((1 - self.cost) * 0.5 / data_align[subject].shape[1], + S.shared((1 - self.alpha) * 0.5 / data_align[subject].shape[1], allow_downcast=True) f1 = constf1 * T.sum((data_srm_subject - w_th.dot(s_th))**2) @@ -498,7 +498,7 @@ def _update_w(self, data_align, data_sup, labels, w, s, theta, bias): data_sup_subject = \ S.shared(data_sup[subject].astype(theano.config.floatX)) labels_S = S.shared(labels[subject]) - constf2 = S.shared(-self.cost / self.gamma + constf2 = S.shared(-self.alpha / self.gamma / data_sup[subject].shape[1], allow_downcast=True) @@ -572,8 +572,8 @@ def _objective_function(self, data_align, data_sup, labels, w, s, theta, bias): """Compute the objective function of the Semi-Supervised SRM - .. math:: (1-C)*Loss_{SRM}(W_i,S;X_i) - .. math:: + C/\gamma * Loss_MLR(\theta, bias; {(W_i^T*Z_i, y_i}) + .. math:: (1-\alpha)*Loss_{SRM}(W_i,S;X_i) + .. math:: + \alpha/\gamma * Loss_{MLR}(\theta, bias; {(W_i^T*Z_i, y_i}) .. math:: + R(\theta) Parameters @@ -617,7 +617,7 @@ def _objective_function(self, data_align, data_sup, labels, w, s, theta, f_val = 0.0 for subject in range(subjects): samples = data_align[subject].shape[1] - f_val += (1 - self.cost) * (0.5 / samples) \ + f_val += (1 - self.alpha) * (0.5 / samples) \ * np.linalg.norm(data_align[subject] - w[subject].dot(s), 'fro')**2 @@ -631,7 +631,7 @@ def _objective_function_subject(self, data_align, data_sup, labels, w, s, """Compute the objective function for one subject. .. math:: (1-C)*Loss_{SRM}_i(W_i,S;X_i) - .. math:: + C/\gamma * Loss_MLR_i(\theta, bias; {(W_i^T*Z_i, y_i}) + .. math:: + C/\gamma * Loss_{MLR_i}(\theta, bias; {(W_i^T*Z_i, y_i}) .. math:: + R(\theta) Parameters @@ -669,7 +669,7 @@ def _objective_function_subject(self, data_align, data_sup, labels, w, s, # Compute the SRM loss f_val = 0.0 samples = data_align.shape[1] - f_val += (1 - self.cost) * (0.5 / samples) \ + f_val += (1 - self.alpha) * (0.5 / samples) \ * np.linalg.norm(data_align - w.dot(s), 'fro')**2 # Compute the MLR loss @@ -717,7 +717,7 @@ def _loss_lr_subject(self, data, labels, w, theta, bias): for sample in range(samples): label = labels[sample] aux += thetaT_wi_zi_plus_bias[label, sample] - return self.cost / samples / self.gamma * (sum_exp_values.sum() - aux) + return self.alpha / samples / self.gamma * (sum_exp_values.sum() - aux) def _loss_lr(self, data, labels, w, theta, bias): """Compute the Loss MLR (with the regularization) From 912e84835fbb29adb5db787f48fef290a14f82b3 Mon Sep 17 00:00:00 2001 From: Javier Turek Date: Fri, 30 Sep 2016 10:33:58 -0700 Subject: [PATCH 13/14] Modified cost by alpha --- tests/funcalign/test_sssrm.py | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-) diff --git a/tests/funcalign/test_sssrm.py b/tests/funcalign/test_sssrm.py index ea48a61e4..aca4f8ca5 100644 --- a/tests/funcalign/test_sssrm.py +++ b/tests/funcalign/test_sssrm.py @@ -37,7 +37,7 @@ def test_wrong_input(): features = 3 n_labels = 4 - model = brainiak.funcalign.sssrm.SSSRM(n_iter=5, features=features, gamma=10.0, cost=0.1) + model = brainiak.funcalign.sssrm.SSSRM(n_iter=5, features=features, gamma=10.0, alpha=0.1) assert model, "Invalid SSSRM instance!" # Create a Shared response S with K = 3 @@ -98,15 +98,15 @@ def test_wrong_input(): model.fit(X, y, Z) print("Test: running SSSRM with 1 subject (classif.)") - # Check that cost is in (0,1) range - model_bad = brainiak.funcalign.sssrm.SSSRM(n_iter=1, features=features, gamma=10.0, cost=1.5) + # Check that alpha is in (0,1) range + model_bad = brainiak.funcalign.sssrm.SSSRM(n_iter=1, features=features, gamma=10.0, alpha=1.5) assert model_bad, "Invalid SSSRM instance!" with pytest.raises(ValueError) as excinfo: model_bad.fit(X, y, Z) - print("Test: running SSSRM with wrong cost") + print("Test: running SSSRM with wrong alpha") # Check that gamma is positive - model_bad = brainiak.funcalign.sssrm.SSSRM(n_iter=1, features=features, gamma=-0.1, cost=0.2) + model_bad = brainiak.funcalign.sssrm.SSSRM(n_iter=1, features=features, gamma=-0.1, alpha=0.2) assert model_bad, "Invalid SSSRM instance!" with pytest.raises(ValueError) as excinfo: model_bad.fit(X, y, Z) @@ -128,7 +128,7 @@ def test_sssrm(): features = 3 n_labels = 4 - model = brainiak.funcalign.sssrm.SSSRM(n_iter=5, features=features, gamma=10.0, cost=0.1) + model = brainiak.funcalign.sssrm.SSSRM(n_iter=5, features=features, gamma=10.0, alpha=0.1) assert model, "Invalid SSSRM instance!" # Create a Shared response S with K = 3 From 107bb423f2b0b6068c42aefa96e3d515043fd2db Mon Sep 17 00:00:00 2001 From: Javier Turek Date: Fri, 30 Sep 2016 13:32:17 -0700 Subject: [PATCH 14/14] Corrected the test for one utility function --- tests/utils/test_utils.py | 7 +++---- 1 file changed, 3 insertions(+), 4 deletions(-) diff --git a/tests/utils/test_utils.py b/tests/utils/test_utils.py index 0b2894935..74def6821 100644 --- a/tests/utils/test_utils.py +++ b/tests/utils/test_utils.py @@ -63,10 +63,9 @@ def test_concatenate_list(): l[1] = np.array([0, 1, 2]) l[3] = np.array([3, 4]) - try: - r = concatenate_list(l, axis=0) - except: - assert True, "Could not concatenate a list" + + r = concatenate_list(l, axis=0) + assert np.all(np.arange(5) == r), "Invalid concatenation of a list of arrays"