Lib updates
This commit is contained in:
		| @@ -10,7 +10,7 @@ from fluents.workflow import Function, OptionsDialog, Options | |||||||
| from fluents.dataset import Dataset | from fluents.dataset import Dataset | ||||||
| from fluents import plots, dataset, workflow, logger | from fluents import plots, dataset, workflow, logger | ||||||
| import scipy | import scipy | ||||||
| from engines import pca, pls | from engines import pca, pls, nipals_lpls | ||||||
| from cx_stats import leverage, variances, hotelling | from cx_stats import leverage, variances, hotelling | ||||||
| from cx_utils import mat_center | from cx_utils import mat_center | ||||||
| from validation import * | from validation import * | ||||||
| @@ -238,14 +238,14 @@ class PLS(Model): | |||||||
|         """Estimates cut off on significant vars by controlling fdr.""" |         """Estimates cut off on significant vars by controlling fdr.""" | ||||||
|  |  | ||||||
|         if self._options['calc_qvals']==True: |         if self._options['calc_qvals']==True: | ||||||
|             qvals_sorted, qvals = pls_qvals(a, b, |             qvals = pls_qvals(a, b, | ||||||
|                                             aopt=None, |                               aopt=None, | ||||||
|                                             alpha=reg, |                               alpha=reg, | ||||||
|                                             n_iter=n_iter, |                               n_iter=n_iter, | ||||||
|                                             algo='pls', |                               algo='pls', | ||||||
|                                             sim_method=sim_method) |                               sim_method=sim_method) | ||||||
|             self.model['qval'] = qvals |             self.model['qval'] = qvals | ||||||
|             self.model['qval_sorted'] = qvals_sorted |             #self.model['qval_sorted'] = qvals_sorted | ||||||
|         else: |         else: | ||||||
|            self.model['qval'] = None |            self.model['qval'] = None | ||||||
|            self.model['qval_sorted'] = None  |            self.model['qval_sorted'] = None  | ||||||
| @@ -276,18 +276,19 @@ class PLS(Model): | |||||||
|         pc_ids_opt = ['_comp', map(str, range(self.model['aopt']))] |         pc_ids_opt = ['_comp', map(str, range(self.model['aopt']))] | ||||||
|         zero_dim = ['_doe',['0']] # null dim, vector (hidden) |         zero_dim = ['_doe',['0']] # null dim, vector (hidden) | ||||||
|  |  | ||||||
|         match_ids = {'E':[ids_0, ids_1], |         match_ids = {'E' : [ids_0, ids_1], | ||||||
|                      'P':[ids_1, pc_ids], |                      'P' : [ids_1, pc_ids], | ||||||
|                      'T':[ids_0, pc_ids], |                      'T' : [ids_0, pc_ids], | ||||||
|                      'W': [ids_1, pc_ids], |                      'W' : [ids_1, pc_ids], | ||||||
|                      'R': [ids_1, pc_ids], |                      'R' : [ids_1, pc_ids], | ||||||
|                      'Q':[ids_3, pc_ids], |                      'Q' : [ids_3, pc_ids], | ||||||
|                      'F':[ids_0, ids_3], |                      'F' : [ids_0, ids_3], | ||||||
|                      'B':[ids_1, ids_3], |                      'B' : [ids_1, ids_3], | ||||||
|                      'qval':[ids_1, zero_dim], |                      'qval' : [ids_1, zero_dim], | ||||||
|                      'qval_sorted':[ids_1, zero_dim], |                      'qval_sorted':[ids_1, zero_dim], | ||||||
|                      'w_tsq':[ids_1, zero_dim], |                      'w_tsq' : [ids_1, zero_dim], | ||||||
|                      'rmsep':[ids_3, pc_ids], |                      'rmsep' : [ids_3, pc_ids], | ||||||
|  |                      'CP': [ids_1, pc_ids] | ||||||
|                      } |                      } | ||||||
|          |          | ||||||
|         array = self.model[name]      |         array = self.model[name]      | ||||||
| @@ -302,7 +303,7 @@ class PLS(Model): | |||||||
|             #except: |             #except: | ||||||
|             #    logger.log('debug', 'Plot: %s failed' %plt) |             #    logger.log('debug', 'Plot: %s failed' %plt) | ||||||
|         return out |         return out | ||||||
|              |      | ||||||
|     def run_o(self, a, b): |     def run_o(self, a, b): | ||||||
|         """Run PLS with present options.""" |         """Run PLS with present options.""" | ||||||
|         options = self._options |         options = self._options | ||||||
| @@ -330,6 +331,17 @@ class PLS(Model): | |||||||
|         self.model['var_y'] = var_y |         self.model['var_y'] = var_y | ||||||
|         self.model['exp_var_y'] = exp_var_y |         self.model['exp_var_y'] = exp_var_y | ||||||
|          |          | ||||||
|  |         if options['calc_corrloads']: | ||||||
|  |             corr_load = scipy.empty_like(self.model['P'].copy()) | ||||||
|  |             T = self.model['T'] | ||||||
|  |             X = self._data['X'] | ||||||
|  |             # For each variable/attribute in original matrix (not meancentered) | ||||||
|  |             for i,score in enumerate(T.T): | ||||||
|  |                 for j, profile in enumerate(X.T): | ||||||
|  |                     corrs = scipy.corrcoef(score, profile) | ||||||
|  |                     corr_load[j,i] = corrs[0,1] | ||||||
|  |             self.model['CP'] = corr_load | ||||||
|  |              | ||||||
|         if options['calc_conf']: |         if options['calc_conf']: | ||||||
|             self.confidence(**options.confidence_options()) |             self.confidence(**options.confidence_options()) | ||||||
|  |  | ||||||
| @@ -353,6 +365,141 @@ class PLS(Model): | |||||||
|             #run with current data and options |             #run with current data and options | ||||||
|             return self.run_o(a, b) |             return self.run_o(a, b) | ||||||
|  |  | ||||||
|  | class LPLS(Model): | ||||||
|  |     def __init__(self, id='lpls', name='LPLS'): | ||||||
|  |         Model.__init__(self, id, name) | ||||||
|  |         self._options = LplsOptions() | ||||||
|  |          | ||||||
|  |     def validation(self, opt): | ||||||
|  |         """Returns rmsep for lpls model. | ||||||
|  |         """ | ||||||
|  |          | ||||||
|  |         if opt['calc_cv']==True: | ||||||
|  |             val_engine = opt['val_engine'] | ||||||
|  |             rmsep, aopt = val_engine(self.model['X'], self.model['Y'], | ||||||
|  |                                      self.model['Z'], opt['amax'], opt['n_sets'], opt['xz_alpha']) | ||||||
|  |             self.model['rmsep'] = rmsep | ||||||
|  |             self.model['aopt'] = aopt | ||||||
|  |         else: | ||||||
|  |             self.model['rmsep'] = None | ||||||
|  |             self.model['aopt'] = opt['aopt'] | ||||||
|  |          | ||||||
|  |     def confidence(self, opt): | ||||||
|  |         """Returns a confidence measure for model parameters | ||||||
|  |         Supported parameters: W | ||||||
|  |         """ | ||||||
|  |         aopt = self.model['aopt'] | ||||||
|  |         if opt['calc_conf']: | ||||||
|  |             Wx, Wz = lpls_jk(self.model['X'], self.model['Y'], self.model['Z'], aopt, n_sets) | ||||||
|  |             Wcal = self.model['W'][:,:aopt] | ||||||
|  |             Lcal = self.model['L'][:,:aopt] | ||||||
|  |             # ensure that Wcal is scaled | ||||||
|  |             tnorm = scipy.apply_along_axis(norm, 0, self.model['T'][:,:aopt]) | ||||||
|  |             Wcal = Wcal*tnorm | ||||||
|  |             a,b,c,d,e = opt['p_center'], opt['crot'], opt['alpha'], opt['strict'], opt['cov_center'] | ||||||
|  |             tsqx = hotelling(Wx, Wcal, a,b,c,d,e) | ||||||
|  |             tsqz = hotelling(Wz, Lcal, a,b,c,d,e) | ||||||
|  |             self.model['tsqx'] = tsqx | ||||||
|  |             self.model['tsqz'] = tsqz | ||||||
|  |         else: | ||||||
|  |             self.model['tsqx'] = None | ||||||
|  |             self.model['tsqz'] = None | ||||||
|  |  | ||||||
|  |     def permutation_confidence(self, opt): | ||||||
|  |         """Estimates cut off on significant vars by controlling fdr. | ||||||
|  |          | ||||||
|  |         """ | ||||||
|  |         self.model['qval'] = None | ||||||
|  |         self.model['qval_sorted'] = None  | ||||||
|  |  | ||||||
|  |     def make_model(self, opt): | ||||||
|  |         """Make model on amax components. | ||||||
|  |         """ | ||||||
|  |         engine = opt['engine'] | ||||||
|  |         dat = engine(self._data['X'], self._data['Y'], self._data['Z'], | ||||||
|  |                      opt['amax'], opt['xz_alpha'], opt['center_mth'], | ||||||
|  |                      opt['mode'], opt['scale'], False) | ||||||
|  |         self.model.update(dat) | ||||||
|  |          | ||||||
|  |     def as_dataset(self, name, dtype='Dataset'): | ||||||
|  |         """Return any model parameter as Dataset | ||||||
|  |         No ids matching | ||||||
|  |         """ | ||||||
|  |         if name not in self.model.keys(): | ||||||
|  |             return | ||||||
|  |         DX, DY, DZ = self._dataset['X'], self._dataset['Y'], self._dataset['Z'] | ||||||
|  |         dim_name_0, dim_name_1 = DX.get_dim_name() | ||||||
|  |         dim_name_2, dim_name_3 = DY.get_dim_name() | ||||||
|  |         dim_name_4, dim_name_5 = DZ.get_dim_name() | ||||||
|  |         #samples | ||||||
|  |         ids_0 = [dim_name_0, DX.get_identifiers(dim_name_0, sorted=True)] | ||||||
|  |          # x vars (genes) | ||||||
|  |         ids_1 = [dim_name_1, DX.get_identifiers(dim_name_1, sorted=True)] | ||||||
|  |         # y vars (sample descriptors) | ||||||
|  |         ids_3 = [dim_name_3, DY.get_identifiers(dim_name_3, sorted=True)] | ||||||
|  |         #z-vars (variable descriptors) | ||||||
|  |         ids_4 = [dim_name_4, DZ.get_identifiers(dim_name_4, sorted=True)] | ||||||
|  |         # components (hidden) | ||||||
|  |         pc_ids = ['_comp', map(str, range(self._options['amax']))] | ||||||
|  |         pc_ids_opt = ['_comp', map(str, range(self.model['aopt']))] | ||||||
|  |         zero_dim = ['_doe',['0']] # null dim, vector (hidden) | ||||||
|  |  | ||||||
|  |         match_ids = {'E' : [ids_0, ids_1], | ||||||
|  |                      'P' : [ids_1, pc_ids], | ||||||
|  |                      'T' : [ids_0, pc_ids], | ||||||
|  |                      'W' : [ids_1, pc_ids], | ||||||
|  |                      'L' : [ids_4, pc_ids], | ||||||
|  |                      'Q' : [ids_3, pc_ids], | ||||||
|  |                      'F' : [ids_0, ids_3], | ||||||
|  |                      'B' : [ids_1, ids_3], | ||||||
|  |                      'tsqx' : [ids_1, zero_dim], | ||||||
|  |                      'tsqz' : [ids_4, zero_dim], | ||||||
|  |                      'K' : [ids_1, pc_ids], | ||||||
|  |                      'rmsep' : [ids_3, pc_ids] | ||||||
|  |                      } | ||||||
|  |          | ||||||
|  |         array = self.model[name]      | ||||||
|  |         M = Dataset(array, identifiers=match_ids[name], name=name) | ||||||
|  |         return M | ||||||
|  |  | ||||||
|  |     def get_out_plots(self, options): | ||||||
|  |         out=[] | ||||||
|  |         for plt in options['out_plots']: | ||||||
|  |             out.append(plt(self)) | ||||||
|  |         return out | ||||||
|  |      | ||||||
|  |     def run(self, a, b, c): | ||||||
|  |         """Run L-PLS with present options.""" | ||||||
|  |         options = self._options | ||||||
|  |         self._dataset['X'] = a | ||||||
|  |         self._dataset['Y'] = b | ||||||
|  |         self._dataset['Z'] = c | ||||||
|  |         self._data['X'] = a.asarray() | ||||||
|  |         self._data['Y'] = b.asarray() | ||||||
|  |         self._data['Z'] = c.asarray() | ||||||
|  |         self.validation(options) | ||||||
|  |         self.make_model(options) | ||||||
|  |         if options['calc_conf']: | ||||||
|  |             self.confidence(options) | ||||||
|  |  | ||||||
|  |         out = [self.as_dataset(p) for p in options['out_data']] | ||||||
|  |         for plt in self.get_out_plots(options): | ||||||
|  |             out.append(plt) | ||||||
|  |         return out | ||||||
|  |  | ||||||
|  |     def run_gui(self, a, b, c): | ||||||
|  |         """Run LPLS with option gui. | ||||||
|  |         """ | ||||||
|  |         dialog = LPlsOptionsDialog([a, b, c], self._options) | ||||||
|  |         dialog.show_all() | ||||||
|  |         response = dialog.run() | ||||||
|  |         dialog.hide() | ||||||
|  |  | ||||||
|  |         if response == gtk.RESPONSE_OK: | ||||||
|  |             # set output data and plots | ||||||
|  |             dialog.set_output() | ||||||
|  |             #run with current data and options | ||||||
|  |             return self.run(a, b, c) | ||||||
|  |  | ||||||
| class PcaOptions(Options): | class PcaOptions(Options): | ||||||
|     """Options for Principal Component Analysis. |     """Options for Principal Component Analysis. | ||||||
| @@ -403,7 +550,9 @@ class PcaOptions(Options): | |||||||
|                             ] |                             ] | ||||||
|          |          | ||||||
|         opt['out_data'] = ['T','P', 'p_tsq'] |         opt['out_data'] = ['T','P', 'p_tsq'] | ||||||
|         opt['out_plots'] = [blmplots.PcaScorePlot,blmplots.PcaLoadingPlot,blmplots.LineViewXc] |         opt['out_plots'] = [blmplots.PcaScorePlot, | ||||||
|  |                             blmplots.PcaLoadingPlot, | ||||||
|  |                             blmplots.LineViewXc] | ||||||
|  |  | ||||||
|         self.update(opt) |         self.update(opt) | ||||||
|          |          | ||||||
| @@ -444,6 +593,7 @@ class PlsOptions(Options): | |||||||
|         opt['center_mth'] = mat_center |         opt['center_mth'] = mat_center | ||||||
|         opt['scale'] = 'scores' |         opt['scale'] = 'scores' | ||||||
|  |  | ||||||
|  |         opt['calc_corrloads'] = True | ||||||
|         opt['calc_conf'] = False |         opt['calc_conf'] = False | ||||||
|         opt['n_sets'] = 5 |         opt['n_sets'] = 5 | ||||||
|         opt['strict'] = True |         opt['strict'] = True | ||||||
| @@ -468,7 +618,8 @@ class PlsOptions(Options): | |||||||
|                             (blmplots.PlsLoadingPlot, 'Loadings', True), |                             (blmplots.PlsLoadingPlot, 'Loadings', True), | ||||||
|                             (blmplots.LineViewXc, 'Line view', True), |                             (blmplots.LineViewXc, 'Line view', True), | ||||||
|                             (blmplots.PredictionErrorPlot, 'Residual Error', False), |                             (blmplots.PredictionErrorPlot, 'Residual Error', False), | ||||||
|                             (blmplots.RMSEPPlot, 'RMSEP', False) |                             (blmplots.RMSEPPlot, 'RMSEP', False), | ||||||
|  |                             (blmplots.PlsCorrelationLoadingPlot, 'Corr. loadings', True) | ||||||
|                             ] |                             ] | ||||||
|          |          | ||||||
|         opt['out_data'] = ['T','P', 'p_tsq'] |         opt['out_data'] = ['T','P', 'p_tsq'] | ||||||
| @@ -494,14 +645,87 @@ class PlsOptions(Options): | |||||||
|                     'strict', 'crot', 'cov_center']  |                     'strict', 'crot', 'cov_center']  | ||||||
|         return self._copy_from_list(opt_list)   |         return self._copy_from_list(opt_list)   | ||||||
|      |      | ||||||
|  |  | ||||||
|  |     def permutation_confidence(self): | ||||||
|  |         opt_list = ['q_pert_method', 'q_iter'] | ||||||
|  |         return self._copy_from_list(opt_list) | ||||||
|  |  | ||||||
|  |  | ||||||
|  | class LplsOptions(Options): | ||||||
|  |     """Options for L-shaped Partial Least Squares Regression. | ||||||
|  |     """ | ||||||
|  |     def __init__(self): | ||||||
|  |         Options.__init__(self) | ||||||
|  |         self._set_default() | ||||||
|  |              | ||||||
|  |     def _set_default(self): | ||||||
|  |         opt = {} | ||||||
|  |         opt['engine'] = nipals_lpls | ||||||
|  |         opt['mode'] = 'normal' # how much info to calculate | ||||||
|  |         opt['amax'] = 10 | ||||||
|  |         opt['aopt'] = 9 | ||||||
|  |         opt['xz_alpha'] = .5 | ||||||
|  |         opt['auto_aopt'] = False | ||||||
|  |         opt['center'] = True | ||||||
|  |         opt['center_mth'] = [2, 0, 1] | ||||||
|  |         opt['scale'] = 'scores' | ||||||
|  |         opt['calc_conf'] = False | ||||||
|  |         opt['n_sets'] = 7 | ||||||
|  |         opt['strict'] = False | ||||||
|  |         opt['p_center'] = 'med' | ||||||
|  |         opt['alpha'] = .3 | ||||||
|  |         opt['cov_center'] = 'med' | ||||||
|  |         opt['crot'] = True | ||||||
|  |  | ||||||
|  |         opt['calc_cv'] = False | ||||||
|  |         opt['cv_val_method'] = 'random' | ||||||
|  |         opt['cv_val_sets'] = opt['n_sets'] | ||||||
|  |  | ||||||
|  |         opt['all_data'] = [('T', 'scores', True), | ||||||
|  |                            ('Wx', 'X-weights', True), | ||||||
|  |                            ('Wz', 'Z-weights', True), | ||||||
|  |                            ('E','residuals', False), | ||||||
|  |                            ('tsq_x', 't2X', False), | ||||||
|  |                            ('rmsep', 'RMSEP', False) | ||||||
|  |                            ] | ||||||
|  |  | ||||||
|  |         # (class, name, sensitive, ticked) | ||||||
|  |         opt['all_plots'] = [(blmplots.PlsScorePlot, 'Scores', True), | ||||||
|  |                             (blmplots.PlsLoadingPlot, 'Loadings', True), | ||||||
|  |                             (blmplots.LineViewXc, 'Line view', True), | ||||||
|  |                             (blmplots.PredictionErrorPlot, 'Residual Error', False), | ||||||
|  |                             (blmplots.RMSEPPlot, 'RMSEP', False), | ||||||
|  |                             (blmplots.LplsHypoidCorrelationPlot, 'Hypoid corr.', False) | ||||||
|  |                             ] | ||||||
|  |          | ||||||
|  |         opt['out_data'] = ['T','P'] | ||||||
|  |         opt['out_plots'] = [blmplots.PlsScorePlot,blmplots.PlsLoadingPlot,blmplots.LineViewXc] | ||||||
|  |          | ||||||
|  |         #opt['out_data'] = None | ||||||
|  |                              | ||||||
|  |         opt['pack'] = False | ||||||
|  |         opt['calc_qvals'] = False | ||||||
|  |         opt['q_pert_method'] = 'shuffle_rows' | ||||||
|  |         opt['q_iter'] = 20         | ||||||
|  |  | ||||||
|  |         self.update(opt) | ||||||
|  |  | ||||||
|  |     def make_model_options(self): | ||||||
|  |         """Options for make_model method.""" | ||||||
|  |         opt_list = ['scale','mode', 'amax', 'engine'] | ||||||
|  |         return self._copy_from_list(opt_list)    | ||||||
|  |  | ||||||
|  |     def confidence_options(self): | ||||||
|  |         """Options for confidence method.""" | ||||||
|  |         opt_list = ['n_sets', 'aopt', 'alpha', 'p_center', | ||||||
|  |                     'strict', 'crot', 'cov_center']  | ||||||
|  |         return self._copy_from_list(opt_list)   | ||||||
|  |      | ||||||
|     def validation_options(self): |     def validation_options(self): | ||||||
|         """Options for pre_validation method.""" |         """Options for pre_validation method.""" | ||||||
|         opt_list = ['amax', 'n_sets', 'cv_val_method'] |         opt_list = ['amax', 'n_sets', 'cv_val_method'] | ||||||
|         return self._copy_from_list(opt_list) |         return self._copy_from_list(opt_list) | ||||||
|  |  | ||||||
|     def permutation_confidence(self): |  | ||||||
|         opt_list = ['q_pert_method', 'q_iter'] |  | ||||||
|         return self._copy_from_list(opt_list) |  | ||||||
|  |  | ||||||
| class PcaOptionsDialog(OptionsDialog): | class PcaOptionsDialog(OptionsDialog): | ||||||
|     """Options dialog for Principal Component Analysis. |     """Options dialog for Principal Component Analysis. | ||||||
| @@ -716,6 +940,210 @@ class PcaOptionsDialog(OptionsDialog): | |||||||
|             self._options['strict'] = True  |             self._options['strict'] = True  | ||||||
|  |  | ||||||
|  |  | ||||||
|  | class LplsOptionsDialog(OptionsDialog): | ||||||
|  |     """Options dialog for L-shaped Partial Least squares regression. | ||||||
|  |     """ | ||||||
|  |     def __init__(self, data, options, input_names=['X', 'Y', 'Z']): | ||||||
|  |         OptionsDialog.__init__(self, data, options, input_names) | ||||||
|  |         glade_file = os.path.join(fluents.DATADIR, 'lpls_options.glade') | ||||||
|  |  | ||||||
|  |         notebook_name = "vbox1" | ||||||
|  |         page_name = "Options" | ||||||
|  |         self.add_page_from_glade(glade_file, notebook_name, page_name) | ||||||
|  |         # connect signals to handlers | ||||||
|  |         dic = {"on_amax_value_changed" : self.on_amax_changed, | ||||||
|  |                "on_aopt_value_changed" : self.on_aopt_changed, | ||||||
|  |                "auto_aopt_toggled" : self.auto_aopt_toggled, | ||||||
|  |                "center_toggled" : self.center_toggled, | ||||||
|  |                #"on_scale_changed" : self.on_scale_changed, | ||||||
|  |                "on_val_none" : self.val_toggled, | ||||||
|  |                "on_val_cv" : self.cv_toggled, | ||||||
|  |                "on_cv_method_changed" : self.on_cv_method_changed, | ||||||
|  |                "on_cv_sets_changed" : self.on_cv_sets_changed, | ||||||
|  |                "on_conf_toggled" : self.conf_toggled, | ||||||
|  |                "on_subset_loc_changed" : self.on_subset_loc_changed, | ||||||
|  |                "on_cov_loc_changed" : self.on_cov_loc_changed, | ||||||
|  |                "on_alpha_changed" : self.on_alpha_changed, | ||||||
|  |                "on_rot_changed" : self.on_rot_changed, | ||||||
|  |                "on__toggled" : self.conf_toggled, | ||||||
|  |                "on_qval_changed" : self.on_qval_changed, | ||||||
|  |                "on_iter_changed" : self.on_iter_changed | ||||||
|  |                } | ||||||
|  |          | ||||||
|  |         self.wTree.signal_autoconnect(dic) | ||||||
|  |  | ||||||
|  |         # set/ensure valid default values/ranges | ||||||
|  |         # | ||||||
|  |         amax_sb = self.wTree.get_widget("amax_spinbutton") | ||||||
|  |         max_comp =  min(data[0].shape) # max num of components | ||||||
|  |         if self._options['amax']>max_comp: | ||||||
|  |             logger.log('debug', 'amax default too large ... adjusting') | ||||||
|  |             self._options['amax'] = max_comp | ||||||
|  |         amax_sb.get_adjustment().set_all(self._options['amax'], 1, max_comp, 1, 0, 0) | ||||||
|  |         # aopt spin button | ||||||
|  |         aopt_sb = self.wTree.get_widget("aopt_spinbutton") | ||||||
|  |         if self._options['aopt']>self._options['amax']: | ||||||
|  |             self._options['aopt'] = self._options['amax'] + 1 - 1 | ||||||
|  |         aopt_sb.get_adjustment().set_all(self._options['aopt'], 1, self._options['amax'], 1, 0, 0) | ||||||
|  |  | ||||||
|  |         # scale | ||||||
|  |         # scale_cb = self.wTree.get_widget("scale_combobox") | ||||||
|  |         # scale_cb.set_active(0) | ||||||
|  |  | ||||||
|  |         # validation frames | ||||||
|  |         if self._options['calc_cv']==False: | ||||||
|  |             cv_frame = self.wTree.get_widget("cv_frame") | ||||||
|  |             cv_frame.set_sensitive(False) | ||||||
|  |  | ||||||
|  |         cv = self.wTree.get_widget("cv_method").set_active(0) | ||||||
|  |  | ||||||
|  |         # confidence | ||||||
|  |         if self._options['calc_conf']==True: | ||||||
|  |             self.wTree.get_widget("subset_expander").set_sensitive(True) | ||||||
|  |         else: | ||||||
|  |             self.wTree.get_widget("subset_expander").set_sensitive(False) | ||||||
|  |  | ||||||
|  |         cb = self.wTree.get_widget("subset_loc") | ||||||
|  |         _m = {'med': 0, 'mean': 1, 'full_model': 2} | ||||||
|  |         cb.set_active(_m.get(self._options['p_center'])) | ||||||
|  |  | ||||||
|  |         cb = self.wTree.get_widget("cov_loc") | ||||||
|  |         _m = {'med': 0, 'mean': 1} | ||||||
|  |         cb.set_active(_m.get(self._options['cov_center'])) | ||||||
|  |  | ||||||
|  |         hs = self.wTree.get_widget("alpha_scale") | ||||||
|  |         hs.set_value(self._options['alpha']) | ||||||
|  |  | ||||||
|  |         tb = self.wTree.get_widget("qvals") | ||||||
|  |         tb.set_sensitive(True) | ||||||
|  |          | ||||||
|  |          | ||||||
|  |     def on_amax_changed(self, sb): | ||||||
|  |         logger.log("debug", "amax changed: new value: %s" %sb.get_value_as_int()) | ||||||
|  |         amax = sb.get_value_as_int() | ||||||
|  |         # update aopt if needed | ||||||
|  |         if amax<self._options['aopt']: | ||||||
|  |             self._options['aopt'] = amax | ||||||
|  |         aopt_sb = self.wTree.get_widget("aopt_spinbutton") | ||||||
|  |         aopt_sb.get_adjustment().set_all(self._options['aopt'], 1, amax, 1, 0, 0) | ||||||
|  |         self._options['amax'] = sb.get_value_as_int() | ||||||
|  |          | ||||||
|  |     def on_aopt_changed(self, sb): | ||||||
|  |         aopt = sb.get_value_as_int() | ||||||
|  |         self._options['aopt'] = aopt | ||||||
|  |          | ||||||
|  |     def auto_aopt_toggled(self, tb): | ||||||
|  |         aopt_sb = self.wTree.get_widget("aopt_spinbutton") | ||||||
|  |         if tb.get_active(): | ||||||
|  |             self._options['auto_aopt'] = True | ||||||
|  |             aopt_sb.set_sensitive(False) | ||||||
|  |         else: | ||||||
|  |             self._options['auto_aopt'] = False | ||||||
|  |             aopt_sb.set_sensitive(True) | ||||||
|  |  | ||||||
|  |     def center_toggled(self, tb): | ||||||
|  |         if tb.get_active(): | ||||||
|  |             self._options['center'] = True | ||||||
|  |         else: | ||||||
|  |             logger.log("debug", "centering set to False") | ||||||
|  |             self._options['center'] = False | ||||||
|  |  | ||||||
|  |     #def on_scale_changed(self, cb): | ||||||
|  |     #    scale = cb.get_active_text() | ||||||
|  |     #    if scale=='Scores': | ||||||
|  |     #        self._options['scale'] = 'scores' | ||||||
|  |     #    elif scale=='Loadings': | ||||||
|  |     #        self._options['scale'] = 'loads' | ||||||
|  |     #    else: | ||||||
|  |     #        raise IOError | ||||||
|  |  | ||||||
|  |     def val_toggled(self, tb): | ||||||
|  |         """Callback for validation: None. """ | ||||||
|  |         cv_frame = self.wTree.get_widget("cv_frame") | ||||||
|  |         cv_tb = self.wTree.get_widget("cv_toggle") | ||||||
|  |         if tb.get_active(): | ||||||
|  |             self._options['calc_cv'] = False | ||||||
|  |             cv_frame.set_sensitive(False) | ||||||
|  |             cv_tb.set_sensitive(False) | ||||||
|  |         else: | ||||||
|  |             cv_tb.set_sensitive(True) | ||||||
|  |             if cv_tb.get_active(): | ||||||
|  |                 cv_frame.set_sensitive(True) | ||||||
|  |                 self._options['calc_cv'] = True | ||||||
|  |  | ||||||
|  |     def cv_toggled(self, tb): | ||||||
|  |         cv_frame = self.wTree.get_widget("cv_frame") | ||||||
|  |         val_tb = self.wTree.get_widget("val_none_toggle") | ||||||
|  |         if tb.get_active(): | ||||||
|  |             cv_frame.set_sensitive(True) | ||||||
|  |             self._options['calc_cv'] = True | ||||||
|  |         else: | ||||||
|  |             cv_frame.set_sensitive(False) | ||||||
|  |             self._options['calc_cv'] = False | ||||||
|  |  | ||||||
|  |     def on_cv_method_changed(self, cb): | ||||||
|  |         method = cb.get_active_text() | ||||||
|  |         if method == 'Random': | ||||||
|  |             self._options['cv_val_method'] = 'random' | ||||||
|  |  | ||||||
|  |     def on_cv_sets_changed(self, sb): | ||||||
|  |         val = sb.get_value_as_int() | ||||||
|  |         self._options['cv_val_sets'] = val | ||||||
|  |  | ||||||
|  |     def conf_toggled(self, tb): | ||||||
|  |         if tb.get_active(): | ||||||
|  |             self._options['calc_conf'] = False | ||||||
|  |             self.wTree.get_widget("subset_expander").set_sensitive(False) | ||||||
|  |         else: | ||||||
|  |             self._options['calc_conf'] = True | ||||||
|  |             self.wTree.get_widget("subset_expander").set_sensitive(True) | ||||||
|  |  | ||||||
|  |     def on_subset_loc_changed(self, cb): | ||||||
|  |         method = cb.get_active_text() | ||||||
|  |         if method=='Full model': | ||||||
|  |             self._options['p_center'] = 'full_model' | ||||||
|  |         elif method=='Median': | ||||||
|  |             self._options['p_center'] = 'med' | ||||||
|  |         elif method=='Mean': | ||||||
|  |             self._options['p_center'] = 'mean' | ||||||
|  |  | ||||||
|  |     def on_cov_loc_changed(self, cb): | ||||||
|  |         method = cb.get_active_text() | ||||||
|  |         if method=='Median': | ||||||
|  |             self._options['cov_center'] = 'med' | ||||||
|  |         elif method=='Mean': | ||||||
|  |             self._options['cov_center'] = 'mean' | ||||||
|  |  | ||||||
|  |     def on_alpha_changed(self, hs): | ||||||
|  |         self._options['alpha'] = hs.get_value() | ||||||
|  |  | ||||||
|  |     def on_rot_changed(self, rg): | ||||||
|  |         proc, strict = rg | ||||||
|  |         if proc.get_active(): | ||||||
|  |             self._options['crot'] = True | ||||||
|  |         else: | ||||||
|  |             self._options['crot'] = True | ||||||
|  |             self._options['strict'] = True  | ||||||
|  |  | ||||||
|  |     def qval_toggled(self, tb): | ||||||
|  |         if tb.get_active(): | ||||||
|  |             self._options['calc_qval'] = False | ||||||
|  |             self.wTree.get_widget("qval_method").set_sensitive(False) | ||||||
|  |             self.wTree.get_widget("q_iter").set_sensitive(False) | ||||||
|  |         else: | ||||||
|  |             self._options['calc_qval'] = True | ||||||
|  |             self.wTree.get_widget("qval_method").set_sensitive(True) | ||||||
|  |             self.wTree.get_widget("q_iter").set_sensitive(True) | ||||||
|  |  | ||||||
|  |     def on_iter_changed(self, sb): | ||||||
|  |         self._options['q_iter'] = sb.get_value() | ||||||
|  |  | ||||||
|  |     def on_qval_changed(self, cb): | ||||||
|  |         q_method = cb.get_active_text() | ||||||
|  |         if method=='Shuffle rows': | ||||||
|  |             self._options['q_pert_method'] = 'shuffle' | ||||||
|  |          | ||||||
|  |              | ||||||
| class PlsOptionsDialog(OptionsDialog): | class PlsOptionsDialog(OptionsDialog): | ||||||
|     """Options dialog for Partial Least squares regression. |     """Options dialog for Partial Least squares regression. | ||||||
|     """ |     """ | ||||||
| @@ -918,5 +1346,3 @@ class PlsOptionsDialog(OptionsDialog): | |||||||
|         q_method = cb.get_active_text() |         q_method = cb.get_active_text() | ||||||
|         if method=='Shuffle rows': |         if method=='Shuffle rows': | ||||||
|             self._options['q_pert_method'] = 'shuffle' |             self._options['q_pert_method'] = 'shuffle' | ||||||
|          |  | ||||||
|              |  | ||||||
|   | |||||||
| @@ -190,7 +190,12 @@ class PlsCorrelationLoadingPlot(BlmScatterPlot): | |||||||
|         title = "Pls correlation loadings (%s)" %model._dataset['X'].get_name() |         title = "Pls correlation loadings (%s)" %model._dataset['X'].get_name() | ||||||
|         BlmScatterPlot.__init__(self, title, model, absi, ordi, part_name='CP') |         BlmScatterPlot.__init__(self, title, model, absi, ordi, part_name='CP') | ||||||
|          |          | ||||||
|          |  | ||||||
|  | class LplsHypoidCorrelationPlot(BlmScatterPlot): | ||||||
|  |     def __init__(self, model, absi=0, ordi=1): | ||||||
|  |         title = "Hypoid correlations(%s)" %model._dataset['X'].get_name() | ||||||
|  |         BlmScatterPlot.__init__(self, title, model, absi, ordi, part_name='W') | ||||||
|  |      | ||||||
| class LineViewXc(plots.LineViewPlot): | class LineViewXc(plots.LineViewPlot): | ||||||
|     """A line view of centered raw data |     """A line view of centered raw data | ||||||
|     """ |     """ | ||||||
| @@ -214,8 +219,8 @@ class PlsQvalScatter(plots.ScatterPlot): | |||||||
|     def __init__(self, model, pc=0): |     def __init__(self, model, pc=0): | ||||||
|         if not model.model.has_key('w_tsq'): |         if not model.model.has_key('w_tsq'): | ||||||
|             return None |             return None | ||||||
|         self._W = model.model['P'] |         self._W = model.model['W'] | ||||||
|         dataset_1 = model.as_dataset('P') |         dataset_1 = model.as_dataset('W') | ||||||
|         dataset_2 = model.as_dataset('w_tsq') |         dataset_2 = model.as_dataset('w_tsq') | ||||||
|         id_dim = dataset_1.get_dim_name(0) #genes |         id_dim = dataset_1.get_dim_name(0) #genes | ||||||
|         sel_dim = dataset_1.get_dim_name(1) #_comp |         sel_dim = dataset_1.get_dim_name(1) #_comp | ||||||
|   | |||||||
| @@ -115,6 +115,7 @@ def expl_var_y(Y, T, Q): | |||||||
|          |          | ||||||
| def pls_qvals(a, b, aopt=None, alpha=.3, | def pls_qvals(a, b, aopt=None, alpha=.3, | ||||||
|               n_iter=20, algo='pls', |               n_iter=20, algo='pls', | ||||||
|  |               center=True, | ||||||
|               sim_method='shuffle', |               sim_method='shuffle', | ||||||
|               p_center='med', cov_center='med', |               p_center='med', cov_center='med', | ||||||
|               crot=True, strict=False, metric=None): |               crot=True, strict=False, metric=None): | ||||||
| @@ -122,8 +123,98 @@ def pls_qvals(a, b, aopt=None, alpha=.3, | |||||||
|     """Returns qvals for pls model. |     """Returns qvals for pls model. | ||||||
|  |  | ||||||
|     input: |     input: | ||||||
|     a -- centered data matrix |     a -- data matrix | ||||||
|     b -- centered data matrix |     b -- data matrix | ||||||
|  |     aopt -- scalar, opt. number of components | ||||||
|  |     alpha -- [0,1] regularisation parameter for T2-test | ||||||
|  |     n_iter -- number of permutations | ||||||
|  |     sim_method -- permutation method ['shuffle'] | ||||||
|  |     p_center -- location estimator for sub models ['med'] | ||||||
|  |     cov_center -- location estimator for covariance of submodels ['med'] | ||||||
|  |     crot -- bool, use rotations of sub models? | ||||||
|  |     strict -- bool, use stict (rot/flips only) rotations? | ||||||
|  |     metric -- bool, use row metric? | ||||||
|  |     """ | ||||||
|  |      | ||||||
|  |     m, n = a.shape | ||||||
|  |     TSQ = zeros((n, n_iter), dtype='d') # (nvars x n_subsets) | ||||||
|  |     n_false = zeros((n, n_iter), dtype='d') | ||||||
|  |  | ||||||
|  |     #full model | ||||||
|  |     if center: | ||||||
|  |         ac = a - a.mean(0) | ||||||
|  |         bc = b - b.mean(0) | ||||||
|  |     if metric!=None: | ||||||
|  |         ac = dot(ac, metric) | ||||||
|  |     if algo=='bridge': | ||||||
|  |         dat = bridge(ac, bc, aopt, 'loads', 'fast') | ||||||
|  |     else: | ||||||
|  |         dat = pls(ac, bc, aopt, 'loads', 'fast') | ||||||
|  |     Wcv = pls_jkW(a, b, aopt, n_blocks=None, algo=algo, metric=metric, center=True) | ||||||
|  |     tsq_full = hotelling(Wcv, dat['W'], p_center=p_center, | ||||||
|  |                          alpha=alpha, crot=crot, strict=strict, | ||||||
|  |                          cov_center=cov_center) | ||||||
|  |     t0 = time.time() | ||||||
|  |     Vs = shuffle_1d(bc, n_iter, axis=0) | ||||||
|  |     for i, b_shuff in enumerate(Vs): | ||||||
|  |         t1 = time.time() | ||||||
|  |         if algo=='bridge': | ||||||
|  |             dat = bridge(ac, b_shuff, aopt, 'loads','fast') | ||||||
|  |         else: | ||||||
|  |             dat = pls(ac, b_shuff, aopt, 'loads', 'fast') | ||||||
|  |         Wcv = pls_jkW(a, b_shuff, aopt, n_blocks=None, algo=algo, metric=metric) | ||||||
|  |         TSQ[:,i] = hotelling(Wcv, dat['W'], p_center=p_center, | ||||||
|  |                              alpha=alpha, crot=crot, strict=strict, | ||||||
|  |                              cov_center=cov_center) | ||||||
|  |         print time.time() - t1 | ||||||
|  |     sort_index = argsort(tsq_full)[::-1] | ||||||
|  |     back_sort_index = sort_index.argsort() | ||||||
|  |     print time.time() - t0 | ||||||
|  |  | ||||||
|  |     # count false positives | ||||||
|  |     tsq_full_sorted = tsq_full.take(sort_index) | ||||||
|  |     for i in xrange(n_iter): | ||||||
|  |         for j in xrange(n): | ||||||
|  |             n_false[j,i] = sum(TSQ[:,i]>=tsq_full[j]) # number of false pos. genes (0-n) | ||||||
|  |     false_pos = median(n_false, 1) | ||||||
|  |     ll = arange(1, len(false_pos)+1, 1) | ||||||
|  |     sort_qval = false_pos.take(sort_index)/ll | ||||||
|  |     qval = false_pos/ll.take(back_sort_index) | ||||||
|  |     print time.time() - t0 | ||||||
|  |     #return qval, false_pos, TSQ, tsq_full | ||||||
|  |     return qval | ||||||
|  |  | ||||||
|  | def ensure_strict(C, only_flips=True): | ||||||
|  |     """Ensure that a rotation matrix does only 90 degree rotations. | ||||||
|  |     In multiplication with pcs this allows flips and reordering. | ||||||
|  |  | ||||||
|  |     if only_flips is True there will onlt be flips allowed | ||||||
|  |     """ | ||||||
|  |     Cm = C | ||||||
|  |     S = sign(C) # signs | ||||||
|  |     if only_flips==True: | ||||||
|  |         C = eye(Cm.shape[0])*S | ||||||
|  |         return C | ||||||
|  |     Cm = zeros_like(C) | ||||||
|  |     Cm.putmask(1.,abs(C)>.6) | ||||||
|  |     if det(Cm)>1: | ||||||
|  |         raise ValueError,"Implement this!" | ||||||
|  |     return Cm*S | ||||||
|  |  | ||||||
|  | def pls_qvals_II(a, b, aopt=None, center=True, alpha=.3, | ||||||
|  |                  n_iter=20, algo='pls', | ||||||
|  |                  sim_method='shuffle', | ||||||
|  |                  p_center='med', cov_center='med', | ||||||
|  |                  crot=True, strict=False, metric=None): | ||||||
|  |  | ||||||
|  |     """Returns qvals for pls model. | ||||||
|  |     Shuffling of variables in X is preprocessed in metric. | ||||||
|  |     Null model is 'If I put genes randomly on network' ... if they are sign: | ||||||
|  |     then this is due to network structure and not covariance with response. | ||||||
|  |  | ||||||
|  |     input: | ||||||
|  |     a -- data matrix | ||||||
|  |     b -- data matrix | ||||||
|     aopt -- scalar, opt. number of components |     aopt -- scalar, opt. number of components | ||||||
|     alpha -- [0,1] regularisation parameter for T2-test |     alpha -- [0,1] regularisation parameter for T2-test | ||||||
|     n_iter -- number of permutations |     n_iter -- number of permutations | ||||||
| @@ -140,25 +231,33 @@ def pls_qvals(a, b, aopt=None, alpha=.3, | |||||||
|     n_false = zeros((n, n_iter), dtype='<f8') |     n_false = zeros((n, n_iter), dtype='<f8') | ||||||
|  |  | ||||||
|     #full model |     #full model | ||||||
|     if metric!=None: |  | ||||||
|         a = dot(a, metric) |     # center? | ||||||
|  |     if center==True: | ||||||
|  |         ac = a - a.mean(0) | ||||||
|  |         bc = b - b.mean(0) | ||||||
|  |     if metric==None: | ||||||
|  |         metric = eye(n,n) | ||||||
|     if algo=='bridge': |     if algo=='bridge': | ||||||
|         dat = bridge(a, b, aopt, 'loads', 'fast') |         dat = bridge(ac, bc, aopt, 'loads', 'fast') | ||||||
|     else: |     else: | ||||||
|         dat = pls(a, b, aopt, 'loads', 'fast') |         dat = pls(ac, bc, aopt, 'loads', 'fast') | ||||||
|     Wcv = pls_jkW(a, b, aopt, n_blocks=None, algo=algo, metric=metric) |     Wcv = pls_jkW(a, b, aopt, n_blocks=None, algo=algo, metric=metric) | ||||||
|     tsq_full = hotelling(Wcv, dat['W'], p_center=p_center, |     tsq_full = hotelling(Wcv, dat['W'], p_center=p_center, | ||||||
|                          alpha=alpha, crot=crot, strict=strict, |                          alpha=alpha, crot=crot, strict=strict, | ||||||
|                          cov_center=cov_center) |                          cov_center=cov_center) | ||||||
|     t0 = time.time() |     t0 = time.time() | ||||||
|     Vs = shuffle_1d(b, n_iter) |     Vs = shuffle_1d(a, n_iter, 1) | ||||||
|     for i, b_shuff in enumerate(Vs): |     for i, a_shuff in enumerate(Vs): | ||||||
|         t1 = time.time() |         t1 = time.time() | ||||||
|  |         a = a_shuff - a_shuff.mean(0) | ||||||
|  |         a = dot(a, metric) | ||||||
|  |          | ||||||
|         if algo=='bridge': |         if algo=='bridge': | ||||||
|             dat = bridge(a, b_shuff, aopt, 'loads','fast') |             dat = bridge(a, b, aopt, 'loads','fast') | ||||||
|         else: |         else: | ||||||
|             dat = pls(a, b, aopt, 'loads', 'fast') |             dat = pls(a, b, aopt, 'loads', 'fast') | ||||||
|         Wcv = pls_jkW(a, b_shuff, aopt, n_blocks=None, algo=algo, metric=metric) |         Wcv = pls_jkW(a, b, aopt, n_blocks=None, algo=algo, metric=metric) | ||||||
|         TSQ[:,i] = hotelling(Wcv, dat['W'], p_center=p_center, |         TSQ[:,i] = hotelling(Wcv, dat['W'], p_center=p_center, | ||||||
|                              alpha=alpha, crot=crot, strict=strict, |                              alpha=alpha, crot=crot, strict=strict, | ||||||
|                              cov_center=cov_center) |                              cov_center=cov_center) | ||||||
| @@ -177,24 +276,8 @@ def pls_qvals(a, b, aopt=None, alpha=.3, | |||||||
|     sort_qval = false_pos.take(sort_index)/ll |     sort_qval = false_pos.take(sort_index)/ll | ||||||
|     qval = false_pos/ll.take(back_sort_index) |     qval = false_pos/ll.take(back_sort_index) | ||||||
|     print time.time() - t0 |     print time.time() - t0 | ||||||
|     return qval, false_pos, TSQ, tsq_full |     #return qval, false_pos, TSQ, tsq_full | ||||||
|  |     return qval | ||||||
| def ensure_strict(C, only_flips=True): |  | ||||||
|     """Ensure that a rotation matrix does only 90 degree rotations. |  | ||||||
|     In multiplication with pcs this allows flips and reordering. |  | ||||||
|  |  | ||||||
|     if only_flips is True there will onlt be flips allowed |  | ||||||
|     """ |  | ||||||
|     Cm = C |  | ||||||
|     S = sign(C) # signs |  | ||||||
|     if only_flips==True: |  | ||||||
|         C = eye(Cm.shape[0])*S |  | ||||||
|         return C |  | ||||||
|     Cm = zeros_like(C) |  | ||||||
|     Cm.putmask(1.,abs(C)>.6) |  | ||||||
|     if det(Cm)>1: |  | ||||||
|         raise ValueError,"Implement this!" |  | ||||||
|     return Cm*S |  | ||||||
|  |  | ||||||
| def leverage(aopt=1,*args): | def leverage(aopt=1,*args): | ||||||
|     """Returns leverages |     """Returns leverages | ||||||
| @@ -253,3 +336,10 @@ def ssq(E, axis=0, weights=None): | |||||||
|         raise NotImplementedError, "Higher order modes not supported" |         raise NotImplementedError, "Higher order modes not supported" | ||||||
|     return pow(Ew,2).sum(axis) |     return pow(Ew,2).sum(axis) | ||||||
|  |  | ||||||
|  |  | ||||||
|  | def vnorm(x): | ||||||
|  |     """Returns the euclidian norm of a vector. | ||||||
|  |  | ||||||
|  |     This is considerably faster than linalg.norm | ||||||
|  |     """ | ||||||
|  |     return sqrt(dot(x,x.conj())) | ||||||
|   | |||||||
| @@ -1,23 +1,25 @@ | |||||||
| from scipy import apply_along_axis,newaxis,zeros,\ | from scipy import apply_along_axis,newaxis,zeros,\ | ||||||
|      median,round_,nonzero,dot,argmax,any,sqrt,ndarray,\ |      median,round_,nonzero,dot,argmax,any,sqrt,ndarray,\ | ||||||
|      trace,zeros_like,sign,sort,real,argsort,rand,array,\ |      trace,zeros_like,sign,sort,real,argsort,rand,array,\ | ||||||
|      matrix |      matrix,nan | ||||||
| from scipy.linalg import norm,svd,inv,eig | from scipy.linalg import norm,svd,inv,eig | ||||||
| from scipy.stats import median,mean | from scipy.stats import median,mean | ||||||
|  |  | ||||||
| def normalise(a,axis=0,return_scales=False): | def normalise(a, axis=0, return_scales=False): | ||||||
| 	s = apply_along_axis(norm,axis,a) |     s = apply_along_axis(norm, axis, a) | ||||||
| 	if axis==0: |     if axis==0: | ||||||
| 		s = s[newaxis] |         s = s[newaxis] | ||||||
| 	else: |     else: | ||||||
| 		s = s[:,newaxis] |         s = s[:,newaxis] | ||||||
| 		 | 	     | ||||||
| 	a_s = a/s |     a_s = a/s | ||||||
| 	if return_scales: |  | ||||||
| 		return a_s,s |  | ||||||
| 	return a_s |  | ||||||
|  |  | ||||||
| def sub2ind(shape,i,j): |     if return_scales: | ||||||
|  |        return a_s, s | ||||||
|  |  | ||||||
|  |     return a_s | ||||||
|  |  | ||||||
|  | def sub2ind(shape, i, j): | ||||||
| 	"""Indices from subscripts. Only support for 2d""" | 	"""Indices from subscripts. Only support for 2d""" | ||||||
| 	row,col = shape | 	row,col = shape | ||||||
| 	ind = [] | 	ind = [] | ||||||
| @@ -41,13 +43,13 @@ def sorted_eig(a, b=None,sort_by='sm'): | |||||||
|     (This is reversed output compared to matlab) |     (This is reversed output compared to matlab) | ||||||
|      |      | ||||||
|     """ |     """ | ||||||
|     s,v = eig(a,b) |     s,v = eig(a, b) | ||||||
|     s = real(s) # dont expect any imaginary part |     s = real(s) # dont expect any imaginary part | ||||||
|     v = real(v) |     v = real(v) | ||||||
|     ind = argsort(s) |     ind = argsort(s) | ||||||
|     if sort_by=='lm': |     if sort_by=='lm': | ||||||
|         ind = ind[::-1] |         ind = ind[::-1] | ||||||
|     v = v.take(ind,1) |     v = v.take(ind, 1) | ||||||
|     s = s.take(ind) |     s = s.take(ind) | ||||||
|  |  | ||||||
|     return s,v |     return s,v | ||||||
| @@ -67,15 +69,15 @@ def str2num(string_number): | |||||||
|     return num |     return num | ||||||
|  |  | ||||||
| def randperm(n): | def randperm(n): | ||||||
|   r=rand(n) |   r = rand(n) | ||||||
|   dict={} |   dict={} | ||||||
|   for i in range(n): |   for i in range(n): | ||||||
|      dict[r[i]]=i |      dict[r[i]] = i | ||||||
|   r=sort(r) |   r = sort(r) | ||||||
|   out=zeros(n) |   out = zeros(n) | ||||||
|   for i in range(n): |   for i in range(n): | ||||||
|      out[i]=dict[r[i]] |      out[i] = dict[r[i]] | ||||||
|   return array(out,dtype='i') |   return array(out).astype('i') | ||||||
|  |  | ||||||
| def mat_center(X,axis=0,ret_mn=False): | def mat_center(X,axis=0,ret_mn=False): | ||||||
|     """Mean center matrix along axis. |     """Mean center matrix along axis. | ||||||
|   | |||||||
| @@ -3,11 +3,12 @@ | |||||||
| There is no typechecking of any kind here, just focus on speed | There is no typechecking of any kind here, just focus on speed | ||||||
| """ | """ | ||||||
|  |  | ||||||
| from scipy.linalg import svd,norm,inv,pinv,qr | import math | ||||||
|  | from scipy.linalg import svd,inv | ||||||
| from scipy import dot,empty,eye,newaxis,zeros,sqrt,diag,\ | from scipy import dot,empty,eye,newaxis,zeros,sqrt,diag,\ | ||||||
|      apply_along_axis,mean,ones,randn,empty_like,outer,c_,\ |      apply_along_axis,mean,ones,randn,empty_like,outer,c_,\ | ||||||
|      rand,sum,cumsum,matrix |      rand,sum,cumsum,matrix | ||||||
|  |      | ||||||
| def pca(a, aopt, scale='scores', mode='normal'): | def pca(a, aopt, scale='scores', mode='normal'): | ||||||
|     """ Principal Component Analysis model |     """ Principal Component Analysis model | ||||||
|     mode: |     mode: | ||||||
| @@ -18,17 +19,18 @@ def pca(a, aopt, scale='scores', mode='normal'): | |||||||
|      |      | ||||||
|     m, n = a.shape |     m, n = a.shape | ||||||
|  |  | ||||||
|     if m*10.>n: |     if m*3>n: | ||||||
|         u, s, vt = esvd(a) |         u, s, v = esvd(a) | ||||||
|     else: |     else: | ||||||
|         u, s, vt = svd(a, full_matrices=0) |         u, s, vt = svd(a, full_matrices=0) | ||||||
|  |         v = vt.T | ||||||
|     eigvals = (1./m)*s |     eigvals = (1./m)*s | ||||||
|     T = u*s |     T = u*s | ||||||
|     T = T[:,:aopt] |     T = T[:,:aopt] | ||||||
|     P = vt[:aopt,:].T |     P = v[:,:aopt] | ||||||
|      |      | ||||||
|     if scale=='loads': |     if scale=='loads': | ||||||
|         tnorm = apply_along_axis(norm, 0, T) |         tnorm = apply_along_axis(vnorm, 0, T) | ||||||
|         T = T/tnorm |         T = T/tnorm | ||||||
|         P = P*tnorm |         P = P*tnorm | ||||||
|  |  | ||||||
| @@ -47,6 +49,7 @@ def pca(a, aopt, scale='scores', mode='normal'): | |||||||
|              |              | ||||||
|     return {'T':T, 'P':P, 'E':E} |     return {'T':T, 'P':P, 'E':E} | ||||||
|  |  | ||||||
|  |  | ||||||
| def pcr(a, b, aopt=2, scale='scores', mode='normal'): | def pcr(a, b, aopt=2, scale='scores', mode='normal'): | ||||||
|     """Returns Principal component regression model.""" |     """Returns Principal component regression model.""" | ||||||
|     m, n = a.shape |     m, n = a.shape | ||||||
| @@ -98,13 +101,13 @@ def pls(a, b, aopt=2, scale='scores', mode='normal', ab=None): | |||||||
|             u, s, vh = svd(dot(ab.T, ab)) |             u, s, vh = svd(dot(ab.T, ab)) | ||||||
|             w = dot(ab, u[:,:1]) |             w = dot(ab, u[:,:1]) | ||||||
|      |      | ||||||
|         w = w/norm(w) |         w = w/vnorm(w) | ||||||
|         r = w.copy() |         r = w.copy() | ||||||
|         if i>0: |         if i>0: | ||||||
|             for j in range(0,i,1): |             for j in range(0,i,1): | ||||||
|                 r = r - dot(P[:,j].T, w)*R[:,j][:,newaxis] |                 r = r - dot(P[:,j].T, w)*R[:,j][:,newaxis] | ||||||
|         t = dot(a, r) |         t = dot(a, r) | ||||||
|         tt = norm(t)**2 |         tt = vnorm(t)**2 | ||||||
|         p  = dot(a.T, t)/tt |         p  = dot(a.T, t)/tt | ||||||
|         q = dot(r.T, ab).T/tt |         q = dot(r.T, ab).T/tt | ||||||
|         ab = ab - dot(p, q.T)*tt |         ab = ab - dot(p, q.T)*tt | ||||||
| @@ -115,7 +118,7 @@ def pls(a, b, aopt=2, scale='scores', mode='normal', ab=None): | |||||||
|  |  | ||||||
|         if mode=='fast' and i==aopt-1: |         if mode=='fast' and i==aopt-1: | ||||||
|             if scale=='loads': |             if scale=='loads': | ||||||
|                 tnorm = apply_along_axis(norm, 0, T) |                 tnorm = apply_along_axis(vnorm, 0, T) | ||||||
|                 T = T/tnorm |                 T = T/tnorm | ||||||
|                 W = W*tnorm |                 W = W*tnorm | ||||||
|             return {'T':T, 'W':W} |             return {'T':T, 'W':W} | ||||||
| @@ -134,7 +137,7 @@ def pls(a, b, aopt=2, scale='scores', mode='normal', ab=None): | |||||||
|         F = b - dot(T[:,:aopt], Q[:,:aopt].T) |         F = b - dot(T[:,:aopt], Q[:,:aopt].T) | ||||||
|  |  | ||||||
|     if scale=='loads': |     if scale=='loads': | ||||||
|         tnorm = apply_along_axis(norm, 0, T) |         tnorm = apply_along_axis(vnorm, 0, T) | ||||||
|         T = T/tnorm |         T = T/tnorm | ||||||
|         W = W*tnorm |         W = W*tnorm | ||||||
|         Q = Q*tnorm |         Q = Q*tnorm | ||||||
| @@ -159,7 +162,7 @@ def w_simpls(aat, b, aopt): | |||||||
|         u = dot(b, u[:,:1]) #y-factor scores |         u = dot(b, u[:,:1]) #y-factor scores | ||||||
|         U[:,i] = u.ravel() |         U[:,i] = u.ravel() | ||||||
|         t = dot(aat, u) |         t = dot(aat, u) | ||||||
|         t = t/norm(t) |         t = t/vnorm(t) | ||||||
|         T[:,i] = t.ravel() |         T[:,i] = t.ravel() | ||||||
|         h = dot(aat, t) #score-weights |         h = dot(aat, t) #score-weights | ||||||
|         H[:,i] = h.ravel() |         H[:,i] = h.ravel() | ||||||
| @@ -183,7 +186,7 @@ def bridge(a, b, aopt, scale='scores', mode='normal', r=0): | |||||||
|     W = u[:,:aopt] |     W = u[:,:aopt] | ||||||
|     K = vt[:aopt,:].T |     K = vt[:aopt,:].T | ||||||
|     T = dot(a, W) |     T = dot(a, W) | ||||||
|     tnorm = apply_along_axis(norm, 0, T) # norm of T-columns |     tnorm = apply_along_axis(vnorm, 0, T) # norm of T-columns | ||||||
|  |  | ||||||
|     if mode == 'fast': |     if mode == 'fast': | ||||||
|         if scale=='loads': |         if scale=='loads': | ||||||
| @@ -196,16 +199,6 @@ def bridge(a, b, aopt, scale='scores', mode='normal', r=0): | |||||||
|     B = zeros((aopt, n, l), dtype='f') |     B = zeros((aopt, n, l), dtype='f') | ||||||
|     for i in range(aopt): |     for i in range(aopt): | ||||||
|         B[i] = dot(W[:,:i+1], Q[:,:i+1].T) |         B[i] = dot(W[:,:i+1], Q[:,:i+1].T) | ||||||
|     # leverages |  | ||||||
|     # fixme: probably need an orthogonal basis for row-space leverage |  | ||||||
|     #        T (scores) are not orthogonal |  | ||||||
|     #        Using a qr decomp to get an orthonormal basis for row-space |  | ||||||
|     #Tq = qr(T)[0] |  | ||||||
|     #s_lev,v_lev = leverage(aopt,Tq,W) |  | ||||||
|     # explained variance |  | ||||||
|     #var_x, exp_var_x = variances(a,T,W) |  | ||||||
|     #qnorm = apply_along_axis(norm, 0, Q) |  | ||||||
|     #var_y, exp_var_y = variances(b,U,Q/qnorm) |  | ||||||
|      |      | ||||||
|     if mode == 'detailed': |     if mode == 'detailed': | ||||||
|         E = empty((aopt, m, n)) |         E = empty((aopt, m, n)) | ||||||
| @@ -225,10 +218,132 @@ def bridge(a, b, aopt, scale='scores', mode='normal', r=0): | |||||||
|     return {'B':B, 'W':W, 'T':T, 'Q':Q, 'E':E, 'F':F, 'U':U, 'P':W} |     return {'B':B, 'W':W, 'T':T, 'Q':Q, 'E':E, 'F':F, 'U':U, 'P':W} | ||||||
|      |      | ||||||
|  |  | ||||||
|  |  | ||||||
|  | def nipals_lpls(X, Y, Z, amax, alpha=.7, mean_ctr=[2, 0, 1], mode='normal', scale='scores', verbose=False): | ||||||
|  |     """ L-shaped Partial Least Sqaures Regression by the nipals algorithm. | ||||||
|  |  | ||||||
|  |     (X!Z)->Y | ||||||
|  |     :input: | ||||||
|  |         X : data matrix (m, n) | ||||||
|  |         Y : data matrix (m, l) | ||||||
|  |         Z : data matrix (n, o) | ||||||
|  |  | ||||||
|  |     :output: | ||||||
|  |       T : X-scores | ||||||
|  |       W : X-weights/Z-weights | ||||||
|  |       P : X-loadings | ||||||
|  |       Q : Y-loadings | ||||||
|  |       U : X-Y relation | ||||||
|  |       L : Z-scores | ||||||
|  |       K : Z-loads | ||||||
|  |       B : Regression coefficients X->Y | ||||||
|  |       b0: Regression coefficient intercept | ||||||
|  |       evx : X-explained variance | ||||||
|  |       evy : Y-explained variance | ||||||
|  |       evz : Z-explained variance | ||||||
|  |  | ||||||
|  |     :Notes: | ||||||
|  |      | ||||||
|  |     """ | ||||||
|  |     if mean_ctr!=None: | ||||||
|  |         xctr, yctr, zctr = mean_ctr | ||||||
|  |         X, mnX = center(X, xctr) | ||||||
|  |         Y, mnY = center(Y, xctr) | ||||||
|  |         Z, mnZ = center(Z, zctr) | ||||||
|  |  | ||||||
|  |     varX = pow(X, 2).sum() | ||||||
|  |     varY = pow(Y, 2).sum() | ||||||
|  |     varZ = pow(Z, 2).sum() | ||||||
|  |      | ||||||
|  |     m, n = X.shape | ||||||
|  |     k, l = Y.shape | ||||||
|  |     u, o = Z.shape | ||||||
|  |  | ||||||
|  |     # initialize  | ||||||
|  |     U = empty((k, amax)) | ||||||
|  |     Q = empty((l, amax)) | ||||||
|  |     T = empty((m, amax)) | ||||||
|  |     W = empty((n, amax)) | ||||||
|  |     P = empty((n, amax)) | ||||||
|  |     K = empty((o, amax)) | ||||||
|  |     L = empty((u, amax)) | ||||||
|  |     var_x = empty((amax,)) | ||||||
|  |     var_y = empty((amax,)) | ||||||
|  |     var_z = empty((amax,)) | ||||||
|  |      | ||||||
|  |     for a in range(amax): | ||||||
|  |         if verbose: | ||||||
|  |             print "\n Working on comp. %s" %a | ||||||
|  |         u = Y[:,:1] | ||||||
|  |         diff = 1 | ||||||
|  |         MAX_ITER = 100 | ||||||
|  |         lim = 1e-5 | ||||||
|  |         niter = 0 | ||||||
|  |         while (diff>lim and niter<MAX_ITER): | ||||||
|  |             niter += 1 | ||||||
|  |             u1 = u.copy() | ||||||
|  |             w = dot(X.T, u) | ||||||
|  |             w = w/sqrt(dot(w.T, w)) | ||||||
|  |             l = dot(Z, w) | ||||||
|  |             k = dot(Z.T, l) | ||||||
|  |             k = k/sqrt(dot(k.T, k)) | ||||||
|  |             w = alpha*k + (1-alpha)*w | ||||||
|  |             w = w/sqrt(dot(w.T, w)) | ||||||
|  |             t = dot(X, w) | ||||||
|  |             c = dot(Y.T, t) | ||||||
|  |             c = c/sqrt(dot(c.T, c)) | ||||||
|  |             u = dot(Y, c) | ||||||
|  |             diff = abs(u1 - u).max() | ||||||
|  |         if verbose: | ||||||
|  |             print "Converged after %s iterations" %niter | ||||||
|  |         tt = dot(t.T, t) | ||||||
|  |         p = dot(X.T, t)/tt | ||||||
|  |         q = dot(Y.T, t)/tt | ||||||
|  |         l = dot(Z, w) | ||||||
|  |          | ||||||
|  |         U[:,a] = u.ravel() | ||||||
|  |         W[:,a] = w.ravel() | ||||||
|  |         P[:,a] = p.ravel() | ||||||
|  |         T[:,a] = t.ravel() | ||||||
|  |         Q[:,a] = q.ravel() | ||||||
|  |         L[:,a] = l.ravel() | ||||||
|  |         K[:,a] = k.ravel() | ||||||
|  |  | ||||||
|  |         X = X - dot(t, p.T) | ||||||
|  |         Y = Y - dot(t, q.T) | ||||||
|  |         Z = (Z.T - dot(w, l.T)).T | ||||||
|  |  | ||||||
|  |         var_x[a] = pow(X, 2).sum() | ||||||
|  |         var_y[a] = pow(Y, 2).sum() | ||||||
|  |         var_z[a] = pow(Z, 2).sum() | ||||||
|  |      | ||||||
|  |     B = dot(dot(W, inv(dot(P.T, W))), Q.T) | ||||||
|  |     b0 = mnY - dot(mnX, B) | ||||||
|  |      | ||||||
|  |     # variance explained | ||||||
|  |     evx = 100.0*(1 - var_x/varX) | ||||||
|  |     evy = 100.0*(1 - var_y/varY) | ||||||
|  |     evz = 100.0*(1 - var_z/varZ) | ||||||
|  |     if scale=='loads': | ||||||
|  |         tnorm = apply_along_axis(vnorm, 0, T) | ||||||
|  |         T = T/tnorm | ||||||
|  |         W = W*tnorm | ||||||
|  |         Q = Q*tnorm | ||||||
|  |         knorm = apply_along_axis(vnorm, 0, K) | ||||||
|  |         L = L*knorm | ||||||
|  |         K = K/knorm | ||||||
|  |      | ||||||
|  |     return {'T':T, 'W':W, 'P':P, 'Q':Q, 'U':U, 'L':L, 'K':K, 'B':B, 'b0':b0, 'evx':evx, 'evy':evy, 'evz':evz}     | ||||||
|  |  | ||||||
|  |  | ||||||
|  |  | ||||||
|  |  | ||||||
|  | ########### Helper routines ######### | ||||||
|  |  | ||||||
| def m_shape(array): | def m_shape(array): | ||||||
|     return matrix(array).shape |     return matrix(array).shape | ||||||
|  |  | ||||||
| def esvd(data,economy=1): | def esvd(data, economy=1): | ||||||
|     """SVD with the option of economy sized calculation |     """SVD with the option of economy sized calculation | ||||||
|     Calculate subspaces of X'X or XX' depending on the shape |     Calculate subspaces of X'X or XX' depending on the shape | ||||||
|     of the matrix. |     of the matrix. | ||||||
| @@ -239,17 +354,40 @@ def esvd(data,economy=1): | |||||||
|     """ |     """ | ||||||
|     m, n = data.shape |     m, n = data.shape | ||||||
|     if m>=n: |     if m>=n: | ||||||
|         u, s, vt = svd(dot(data.T, data)) |         data = dot(data.T, data) | ||||||
|  |         u, s, vt = svd(data) | ||||||
|         u = dot(data, vt.T) |         u = dot(data, vt.T) | ||||||
|         v = vt.T |         v = vt.T | ||||||
|         for i in xrange(n): |         for i in xrange(n): | ||||||
|             s[i] = norm(u[:,i]) |             s[i] = vnorm(u[:,i]) | ||||||
|             u[:,i] = u[:,i]/s[i] |             u[:,i] = u[:,i]/s[i] | ||||||
|     else: |     else: | ||||||
|         u, s, vt = svd(dot(data, data.T)) |         data = dot(data, data.T) | ||||||
|  |         data = (data + data.T)/2.0 | ||||||
|  |         u, s, vt = svd(data) | ||||||
|         v = dot(u.T, data) |         v = dot(u.T, data) | ||||||
|         for i in xrange(m): |         for i in xrange(m): | ||||||
|             s[i] = norm(v[i,:]) |             s[i] = vnorm(v[i,:]) | ||||||
|             v[i,:] = v[i,:]/s[i] |             v[i,:] = v[i,:]/s[i] | ||||||
|  |  | ||||||
|     return u, s, v |     return u, s, v.T | ||||||
|  |  | ||||||
|  | def vnorm(x): | ||||||
|  |     # assume column arrays (or vectors) | ||||||
|  |     return math.sqrt(dot(x.T, x)) | ||||||
|  |  | ||||||
|  | def center(a, axis): | ||||||
|  |      # 0 = col center, 1 = row center, 2 = double center | ||||||
|  |      # -1 = nothing | ||||||
|  |     if axis==-1: | ||||||
|  |         mn = zeros((a.shape[1],)) | ||||||
|  |     elif axis==0: | ||||||
|  |         mn = a.mean(0) | ||||||
|  |     elif axis==1: | ||||||
|  |         mn = a.mean(1)[:,newaxis] | ||||||
|  |     elif axis==2: | ||||||
|  |         mn = a.mean(0) + a.mean(1)[:,newaxis] - a.mean() | ||||||
|  |     else: | ||||||
|  |         raise IOError("input error: axis must be in [-1,0,1,2]") | ||||||
|  |  | ||||||
|  |     return a - mn, mn | ||||||
|   | |||||||
| @@ -53,6 +53,7 @@ def gene_hypergeo_test(selection, category_dataset): | |||||||
|                              cat_count) |                              cat_count) | ||||||
|      |      | ||||||
|     pvals = scipy.where(cat_count==0, 2, pvals) |     pvals = scipy.where(cat_count==0, 2, pvals) | ||||||
|  |     pvals = scipy.where(scipy.isnan(pvals), 2, pvals) | ||||||
|     out = {} |     out = {} | ||||||
|     for i in range(pvals.size): |     for i in range(pvals.size): | ||||||
|         out[str(all_cats[i])] = (count[i], cat_count[i], pvals[i]) |         out[str(all_cats[i])] = (count[i], cat_count[i], pvals[i]) | ||||||
|   | |||||||
| @@ -2,7 +2,7 @@ import os,sys | |||||||
| from itertools import izip | from itertools import izip | ||||||
| import networkx as NX | import networkx as NX | ||||||
| from scipy import shape,diag,dot,asarray,sqrt,real,zeros,eye,exp,maximum,\ | from scipy import shape,diag,dot,asarray,sqrt,real,zeros,eye,exp,maximum,\ | ||||||
|      outer,maximum,sum,diag,real |      outer,maximum,sum,diag,real,atleast_2d | ||||||
| from scipy.linalg import eig,svd,inv,expm,norm | from scipy.linalg import eig,svd,inv,expm,norm | ||||||
| from cx_utils import sorted_eig | from cx_utils import sorted_eig | ||||||
|  |  | ||||||
| @@ -378,6 +378,7 @@ Ke = expm(A) .... expm(-A)? | |||||||
| # 14.05.2006: diffusion returns negative values, using expm(-LL) instead (FIX) | # 14.05.2006: diffusion returns negative values, using expm(-LL) instead (FIX) | ||||||
| # 13.09.2206: update for use in numpy | # 13.09.2206: update for use in numpy | ||||||
|  |  | ||||||
|  | # 27.04.2007: diffusion now uses pade approximations to matrix exponential. Also the last  | ||||||
|  |  | ||||||
| def K_expAdj(W, normalised=True, alpha=1.0): | def K_expAdj(W, normalised=True, alpha=1.0): | ||||||
|     """Matrix exponential of adjacency matrix, mentioned in Kandola as a general diffusion kernel.  |     """Matrix exponential of adjacency matrix, mentioned in Kandola as a general diffusion kernel.  | ||||||
| @@ -433,8 +434,8 @@ def K_vonNeumann(W, normalised=True, alpha=1.0): | |||||||
|     return dot(dot(vr,psigma),vri).astype(t) |     return dot(dot(vr,psigma),vri).astype(t) | ||||||
|  |  | ||||||
| def K_laplacian(W, normalised=True, alpha=1.0): | def K_laplacian(W, normalised=True, alpha=1.0): | ||||||
|     """ This is the matrix square root of the pseudo inverse of L. |     """ This is the matrix pseudo inverse of L. | ||||||
|     Also known as th eaverage commute time matrix. |     Also known as the average commute time matrix. | ||||||
|     """ |     """ | ||||||
|     W = asarray(W) |     W = asarray(W) | ||||||
|     t = W.dtype.char |     t = W.dtype.char | ||||||
| @@ -464,8 +465,7 @@ def K_laplacian(W, normalised=True, alpha=1.0): | |||||||
|     return K |     return K | ||||||
|  |  | ||||||
|  |  | ||||||
|  | def K_diffusion(W, normalised=True, alpha=1.0, beta=0.5, use_cut=False): | ||||||
| def K_diffusion(W, normalised=True, alpha=1.0, beta=0.5): |  | ||||||
|     """Returns diffusion kernel. |     """Returns diffusion kernel. | ||||||
|     input: |     input: | ||||||
|             -- W, adj. matrix |             -- W, adj. matrix | ||||||
| @@ -477,27 +477,45 @@ def K_diffusion(W, normalised=True, alpha=1.0, beta=0.5): | |||||||
|     t = W.dtype.char |     t = W.dtype.char | ||||||
|     if len(W.shape)!=2: |     if len(W.shape)!=2: | ||||||
|         raise ValueError, "Non-matrix input to matrix function." |         raise ValueError, "Non-matrix input to matrix function." | ||||||
|     m,n = W.shape |     m, n = W.shape | ||||||
|     if t in ['F','D']: |     if t in ['F','D']: | ||||||
|         raise TypeError, "Complex input!" |         raise TypeError, "Complex input!" | ||||||
|     D = diag(sum(W,0)) |     D = diag(W.sum(0)) | ||||||
|     L = D-W |     L = D - W | ||||||
|     if normalised==True: |     if normalised==True: | ||||||
|         T = diag(sqrt(1./(sum(W,0)))) |         T = diag(sqrt(1./W.sum(0))) | ||||||
|         L = dot(dot(T,L),T) |         L = dot(dot(T, L), T) | ||||||
|     e,vr = eig(L) |     e, vr = eig(L) | ||||||
|     vri = inv(vr) #inv |     vri = inv(vr) #inv | ||||||
|     cond = 1.0*{0: feps*1e3, 1: eps*1e6}[_array_precision[t]] |     cond = 1.0*{0: feps*1e3, 1: eps*1e6}[_array_precision[t]] | ||||||
|     cutoff = 1.*abs(cond*maximum.reduce(e)) |     cutoff = 1.*abs(cond*maximum.reduce(e)) | ||||||
|     psigma = eye(m) # if sing vals are 0 exp(0)=1 (unnecessary) |     psigma = eye(m) # if eigvals are 0 exp(0)=1 (unnecessary) | ||||||
|     #psigma = zeros((m,n), dtype='<f8') |     #psigma = zeros((m,n), dtype='<f8') | ||||||
|     for i in range(len(e)): |     for i in range(len(e)): | ||||||
|         if abs(e[i]) > cutoff: |         if abs(e[i]) > cutoff: | ||||||
|             psigma[i,i] = exp(-beta*e[i]) |             psigma[i,i] = exp(-beta*e[i]) | ||||||
|  |         #else: | ||||||
|  |         #    psigma[i,i] = 0.0 | ||||||
|     K = real(dot(dot(vr, psigma), vri)) |     K = real(dot(dot(vr, psigma), vri)) | ||||||
|     I = eye(n, dtype='<f8') |     I = eye(n, dtype='<f8') | ||||||
|     K = (1. - alpha)*I + alpha*K |     K = (1. - alpha)*I + alpha*K | ||||||
|     return K |     return K | ||||||
|  |  | ||||||
|  | def K_diffusion2(W, normalised=True, alpha=1.0, beta=0.5, ncomp=None): | ||||||
|  |     """Returns diffusion kernel, using fast pade approximation. | ||||||
|  |     input: | ||||||
|  |             -- W, adj. matrix | ||||||
|  |             -- normalised [True/False] | ||||||
|  |             -- beta, [0->), (diffusion degree) | ||||||
|  |     """ | ||||||
|  |      | ||||||
|  |     D = diag(W.sum(0)) | ||||||
|  |     L = D - W | ||||||
|  |     if normalised==True: | ||||||
|  |         T = diag(sqrt(1./W.sum(0))) | ||||||
|  |         L = dot(dot(T, L), T) | ||||||
|  |     return expm(-beta*L) | ||||||
|  |      | ||||||
|      |      | ||||||
| def K_modularity(W,alpha=1.0): | def K_modularity(W,alpha=1.0): | ||||||
|     """ Returns the matrix square root of Newmans modularity.""" |     """ Returns the matrix square root of Newmans modularity.""" | ||||||
| @@ -530,3 +548,20 @@ def kernel_score(K, W): | |||||||
|     score = diag(dot(W, dot(K, W)) ) |     score = diag(dot(W, dot(K, W)) ) | ||||||
|     tot = sum(score) |     tot = sum(score) | ||||||
|     return score, tot |     return score, tot | ||||||
|  |  | ||||||
|  |  | ||||||
|  | def modularity_matrix(G, nodelist=None): | ||||||
|  |     if not nodelist: | ||||||
|  |         nodelist = G.nodes() | ||||||
|  |     else: | ||||||
|  |         G = NX.subgraph(G, nodelist)  | ||||||
|  |          | ||||||
|  |     A = NX.adj_matrix(G, nodelist=nodelist) | ||||||
|  |     d = atleast_2d(G.degree(nbunch=nodelist)) | ||||||
|  |     m = 1.*G.number_of_edges() | ||||||
|  |     B = A - A/m | ||||||
|  |     return B | ||||||
|  |  | ||||||
|  |  | ||||||
|  |                          | ||||||
|  |      | ||||||
|   | |||||||
| @@ -41,30 +41,31 @@ def pls_gen(a, b, n_blocks=None, center=False, index_out=False,axis=0, metric=No | |||||||
|      """Random block crossvalidation |      """Random block crossvalidation | ||||||
|     Leave-one-out is a subset, with n_blocks equals a.shape[-1] |     Leave-one-out is a subset, with n_blocks equals a.shape[-1] | ||||||
|     """ |     """ | ||||||
|      index = randperm(a.shape[axis]) |      #index = randperm(a.shape[axis]) | ||||||
|  |      index = arange(a.shape[axis]) | ||||||
|      if n_blocks==None: |      if n_blocks==None: | ||||||
|           n_blocks = a.shape[axis] |           n_blocks = a.shape[axis] | ||||||
|      n_in_set = ceil(float(a.shape[axis])/n_blocks) |      n_in_set = ceil(float(a.shape[axis])/n_blocks) | ||||||
|      out_ind_sets = [index[i*n_in_set:(i+1)*n_in_set] for i in range(n_blocks)] |      out_ind_sets = [index[i*n_in_set:(i+1)*n_in_set] for i in range(n_blocks)] | ||||||
|      for out in out_ind_sets: |      for out in out_ind_sets: | ||||||
|          inn = [i for i in index if i not in out] |           inn = [i for i in index if i not in out] | ||||||
|          acal = a.take(inn, 0) |           acal = a.take(inn, 0) | ||||||
|          atrue = a.take(out, 0) |           atrue = a.take(out, 0) | ||||||
|          bcal = b.take(inn, 0) |           bcal = b.take(inn, 0) | ||||||
|          btrue = b.take(out, 0) |           btrue = b.take(out, 0) | ||||||
|          if center: |           if center: | ||||||
|               mn_a = acal.mean(0)[newaxis] |                mn_a = acal.mean(0)[newaxis] | ||||||
|               acal = acal - mn_a |                acal = acal - mn_a | ||||||
|               atrue = atrue - mn_a |                atrue = atrue - mn_a | ||||||
|               mn_b = bcal.mean(0)[newaxis] |                mn_b = bcal.mean(0)[newaxis] | ||||||
|               bcal = bcal - mn_b |                bcal = bcal - mn_b | ||||||
|               btrue = btrue - mn_b |                btrue = btrue - mn_b | ||||||
|          if metric!=None: |           if metric!=None: | ||||||
|               acal = dot(acal, metric) |                acal = dot(acal, metric) | ||||||
|          if index_out: |           if index_out: | ||||||
|               yield acal, atrue, bcal, btrue, out |                yield acal, atrue, bcal, btrue, out | ||||||
|          else:      |           else:      | ||||||
|               yield acal, atrue, bcal, btrue |                yield acal, atrue, bcal, btrue | ||||||
|  |  | ||||||
|           |           | ||||||
| def pca_gen(a, n_sets=None, center=False, index_out=False, axis=0, metric=None): | def pca_gen(a, n_sets=None, center=False, index_out=False, axis=0, metric=None): | ||||||
| @@ -151,6 +152,7 @@ def shuffle_1d_block(a, n_sets=None, blocks=None, index_out=False, axis=0): | |||||||
|              index = arange(m) |              index = arange(m) | ||||||
|              dummy = map(random.shuffle, array_split(index, blocks)) |              dummy = map(random.shuffle, array_split(index, blocks)) | ||||||
|              a_out = a.take(index, axis) |              a_out = a.take(index, axis) | ||||||
|  |           | ||||||
|          if index_out: |          if index_out: | ||||||
|               yield a_out, index |               yield a_out, index | ||||||
|          else: |          else: | ||||||
| @@ -164,7 +166,8 @@ def shuffle_1d(a, n_sets, axis=0): | |||||||
|      m = a.shape[axis] |      m = a.shape[axis] | ||||||
|      for ii in xrange(n_sets): |      for ii in xrange(n_sets): | ||||||
|          index = randperm(m) |          index = randperm(m) | ||||||
|          yield a.take(index, axis) |          a = a.take(index, axis) | ||||||
|  |          yield a | ||||||
|           |           | ||||||
| def diag_pert(a, n_sets=10, center=True, index_out=False): | def diag_pert(a, n_sets=10, center=True, index_out=False): | ||||||
|     """Alter generator returning sets perturbed with means at diagonals. |     """Alter generator returning sets perturbed with means at diagonals. | ||||||
| @@ -205,18 +208,17 @@ def diag_pert(a, n_sets=10, center=True, index_out=False): | |||||||
|         else: |         else: | ||||||
|              yield a_out |              yield a_out | ||||||
|  |  | ||||||
|  |   | ||||||
| def outerprod_centering(aat, ret_mn=True): | def outerprod_centering(aat, ret_mn=True): | ||||||
|     """Returns mean centered symmetric outerproduct matrix. |     """Returns double centered symmetric outerproduct matrix. | ||||||
|     """ |     """ | ||||||
|     n = aat.shape[0] |     h = aat.mean(0)[newaxis] | ||||||
|     h = aat.sum(0)[:,newaxis] |     h = h - 0.5*h.mean() | ||||||
|     h = (h - mean(h)/2)/n |     mn_a = h + h.T # beauty of broadcasting | ||||||
|     mn_a = h + h.T |  | ||||||
|     aatc = aat - mn_a |     aatc = aat - mn_a | ||||||
|     if ret_mn: |     if ret_mn: | ||||||
|         return aatc, aat.mean(0) |         return aatc, mn_a | ||||||
|     return aat - mn_a |     return aatc | ||||||
|       |       | ||||||
|       |       | ||||||
|       |       | ||||||
|   | |||||||
| @@ -12,11 +12,47 @@ from cx_utils import m_shape | |||||||
| def w_pls_cv_val(X, Y, amax, n_blocks=None, algo='simpls'): | def w_pls_cv_val(X, Y, amax, n_blocks=None, algo='simpls'): | ||||||
|     """Returns rmsep and aopt for pls tailored for wide X. |     """Returns rmsep and aopt for pls tailored for wide X. | ||||||
|  |  | ||||||
|  |     The root mean square error of cross validation is calculated | ||||||
|  |     based on random block cross-validation. With number of blocks equal to | ||||||
|  |     number of samples [default] gives leave-one-out cv. | ||||||
|  |     The pls model is based on the simpls algorithm for wide X. | ||||||
|  |  | ||||||
|     comments: |     :Parameters: | ||||||
|              -- X, Y inputs need to be centered (fixme: check) |     X : ndarray  | ||||||
|  |         column centered data matrix of size (samples x variables) | ||||||
|  |     Y : ndarray | ||||||
|  |         column centered response matrix of size (samples x responses) | ||||||
|  |     amax : scalar  | ||||||
|  |         Maximum number of components | ||||||
|  |     n_blocks : scalar | ||||||
|  |         Number of blocks in cross validation | ||||||
|  |      | ||||||
|  |     :Returns:  | ||||||
|  |     rmsep : ndarray | ||||||
|  |         Root Mean Square Error of cross-validated Predictions  | ||||||
|  |     aopt : scalar | ||||||
|  |         Guestimate of the optimal number of components | ||||||
|  |  | ||||||
|  |     :SeeAlso: | ||||||
|  |     - pls_cv_val : Same output, not optimised for wide X | ||||||
|  |     - w_simpls : Simpls algorithm for wide X | ||||||
|  |      | ||||||
|  |     Notes | ||||||
|  |     ----- | ||||||
|  |     Based (cowardly translated) on m-files from the Chemoact toolbox | ||||||
|  |     X, Y inputs need to be centered (fixme: check) | ||||||
|  |      | ||||||
|  |  | ||||||
|  |     Examples | ||||||
|  |     -------- | ||||||
|  |  | ||||||
|  |     >>> import numpy as n | ||||||
|  |     >>> X = n.array([[1., 2., 3.],[]]) | ||||||
|  |     >>> Y = n.array([[1., 2., 3.],[]]) | ||||||
|  |     >>> w_pls(X, Y, 1) | ||||||
|  |     [4,5,6], 1 | ||||||
|     """ |     """ | ||||||
|  |      | ||||||
|     k, l = m_shape(Y) |     k, l = m_shape(Y) | ||||||
|     PRESS = zeros((l, amax+1), dtype='f') |     PRESS = zeros((l, amax+1), dtype='f') | ||||||
|     if n_blocks==None: |     if n_blocks==None: | ||||||
| @@ -30,7 +66,7 @@ def w_pls_cv_val(X, Y, amax, n_blocks=None, algo='simpls'): | |||||||
|         if algo=='simpls': |         if algo=='simpls': | ||||||
|             dat = w_simpls(Din, Yin, amax) |             dat = w_simpls(Din, Yin, amax) | ||||||
|             Q, U, H = dat['Q'], dat['U'], dat['H'] |             Q, U, H = dat['Q'], dat['U'], dat['H'] | ||||||
|             That = dot(Doi, dot(U, inv(triu(dot(H.T,U))) )) |             That = dot(Doi, dot(U, inv(triu(dot(H.T, U))) )) | ||||||
|         else: |         else: | ||||||
|             raise NotImplementedError |             raise NotImplementedError | ||||||
|          |          | ||||||
| @@ -40,21 +76,13 @@ def w_pls_cv_val(X, Y, amax, n_blocks=None, algo='simpls'): | |||||||
|             E = Yout[:,j][:,newaxis] - TQ |             E = Yout[:,j][:,newaxis] - TQ | ||||||
|             E = E + sum(E, 0)/Din.shape[0] |             E = E + sum(E, 0)/Din.shape[0] | ||||||
|             PRESS[j,1:] = PRESS[j,1:] + sum(E**2, 0) |             PRESS[j,1:] = PRESS[j,1:] + sum(E**2, 0) | ||||||
|     #Yhat = Y - dot(That,Q.T) |     Yhat = Y - dot(That,Q.T) | ||||||
|     rmsep = sqrt(PRESS/Y.shape[0]) |     rmsep = sqrt(PRESS/Y.shape[0]) | ||||||
|     aopt = find_aopt_from_sep(rmsep) |     aopt = find_aopt_from_sep(rmsep) | ||||||
|     return rmsep, aopt |     return rmsep, Yhat, aopt | ||||||
|  |  | ||||||
| def pls_val(X, Y, amax=2, n_blocks=10, algo='pls', metric=None): | def pls_val(X, Y, amax=2, n_blocks=10, algo='pls', metric=None): | ||||||
|     """ Validation results of pls model.  |      | ||||||
|  |  | ||||||
|  |  | ||||||
|  |  | ||||||
|     comments: |  | ||||||
|              -- X, Y inputs need to be centered (fixme: check) |  | ||||||
|  |  | ||||||
|  |  | ||||||
|     """     |  | ||||||
|     k, l = m_shape(Y) |     k, l = m_shape(Y) | ||||||
|     PRESS = zeros((l, amax+1), dtype='<f8') |     PRESS = zeros((l, amax+1), dtype='<f8') | ||||||
|     EE = zeros((amax, k, l), dtype='<f8') |     EE = zeros((amax, k, l), dtype='<f8') | ||||||
| @@ -79,7 +107,30 @@ def pls_val(X, Y, amax=2, n_blocks=10, algo='pls', metric=None): | |||||||
|  |  | ||||||
|     rmsep = sqrt(PRESS/(k-1.)) |     rmsep = sqrt(PRESS/(k-1.)) | ||||||
|     aopt = find_aopt_from_sep(rmsep) |     aopt = find_aopt_from_sep(rmsep) | ||||||
|     return rmsep, aopt |     return rmsep, Yhat, aopt | ||||||
|  |  | ||||||
|  | def lpls_val(X, Y, Z, a_max=2, nsets=None,alpha=.5): | ||||||
|  |     """Performs crossvalidation to get generalisation error in lpls""" | ||||||
|  |     cv_iter = select_generators.pls_gen(X, Y, n_blocks=nsets,center=False,index_out=True) | ||||||
|  |     k, l = Y.shape | ||||||
|  |     Yhat = empty((a_max,k,l), 'd') | ||||||
|  |     for i, (xcal,xi,ycal,yi,ind) in enumerate(cv_iter): | ||||||
|  |         T, W, P, Q, U, L, K, B, b0, evx, evy, evz = nipals_lpls(xcal,ycal,Z, | ||||||
|  |                                                                 a_max=a_max, | ||||||
|  |                                                                 alpha=alpha, | ||||||
|  |                                                                 mean_ctr=[2,0,1], | ||||||
|  |                                                                 verbose=False) | ||||||
|  |         for a in range(a_max): | ||||||
|  |             Yhat[a,ind,:] = b0[a][0][0] + dot(xi, B[a]) | ||||||
|  |     Yhat_class = zeros_like(Yhat) | ||||||
|  |     for a in range(a_max): | ||||||
|  |         for i in range(k): | ||||||
|  |             Yhat_class[a,i,argmax(Yhat[a,i,:])]=1.0 | ||||||
|  |     class_err = 100*((Yhat_class+Y)==2).sum(1)/Y.sum(0).astype('d') | ||||||
|  |     sep = (Y - Yhat)**2 | ||||||
|  |     rmsep = sqrt(sep.mean(1)) | ||||||
|  |     aopt = find_aopt_from_sep(rmsep) | ||||||
|  |     return rmsep, Yhat, aopt | ||||||
|  |  | ||||||
| def pca_alter_val(a, amax, n_sets=10, method='diag'): | def pca_alter_val(a, amax, n_sets=10, method='diag'): | ||||||
|     """Pca validation by altering elements in X. |     """Pca validation by altering elements in X. | ||||||
| @@ -146,8 +197,7 @@ def pls_jkW(a, b, amax, n_blocks=None, algo='pls', use_pack=True, center=True, m | |||||||
|     if n_blocks == None: |     if n_blocks == None: | ||||||
|         n_blocks = b.shape[0] |         n_blocks = b.shape[0] | ||||||
|  |  | ||||||
|     Wcv = empty((n_blocks, a.shape[1], amax), dtype='f') |     Wcv = empty((n_blocks, a.shape[1], amax), dtype='d') | ||||||
|  |  | ||||||
|     if use_pack and metric==None: |     if use_pack and metric==None: | ||||||
|         u, s, inflater = svd(a, full_matrices=0) |         u, s, inflater = svd(a, full_matrices=0) | ||||||
|         a = u*s |         a = u*s | ||||||
| @@ -161,11 +211,10 @@ def pls_jkW(a, b, amax, n_blocks=None, algo='pls', use_pack=True, center=True, m | |||||||
|             dat = bridge(a_in, b_in, amax, 'loads', 'fast') |             dat = bridge(a_in, b_in, amax, 'loads', 'fast') | ||||||
|  |  | ||||||
|         W = dat['W'] |         W = dat['W'] | ||||||
|  |  | ||||||
|         if use_pack and metric==None: |         if use_pack and metric==None: | ||||||
|             W = dot(inflater.T, W) |             W = dot(inflater.T, W) | ||||||
|  |  | ||||||
|         Wcv[nn,:,:] = W |         Wcv[nn,:,:] = W[:,:,] | ||||||
|          |          | ||||||
|     return Wcv |     return Wcv | ||||||
|  |  | ||||||
| @@ -200,6 +249,29 @@ def pca_jkP(a, aopt, n_blocks=None, metric=None): | |||||||
|     return PP |     return PP | ||||||
|  |  | ||||||
|  |  | ||||||
|  | def lpls_jk(X, Y, Z, a_max, nsets=None, alpha=.5): | ||||||
|  |     cv_iter = select_generators.pls_gen(X, Y, n_blocks=nsets,center=False,index_out=False) | ||||||
|  |     m, n = X.shape | ||||||
|  |     k, l = Y.shape | ||||||
|  |     o, p = Z.shape | ||||||
|  |     if nsets==None: | ||||||
|  |         nsets = m | ||||||
|  |     WWx = empty((nsets, n, a_max), 'd') | ||||||
|  |     WWz = empty((nsets, o, a_max), 'd') | ||||||
|  |     #WWy = empty((nsets, l, a_max), 'd') | ||||||
|  |     for i, (xcal,xi,ycal,yi) in enumerate(cv_iter): | ||||||
|  |         T, W, P, Q, U, L, K, B, b0, evx, evy, evz = nipals_lpls(xcal,ycal,Z, | ||||||
|  |                                                                 a_max=a_max, | ||||||
|  |                                                                 alpha=alpha, | ||||||
|  |                                                                 mean_ctr=[2,0,1], | ||||||
|  |                                                                 scale='loads', | ||||||
|  |                                                                 verbose=False) | ||||||
|  |         WWx[i,:,:] = W | ||||||
|  |         WWz[i,:,:] = L | ||||||
|  |         #WWy[i,:,:] = Q | ||||||
|  |  | ||||||
|  |     return WWx, WWz | ||||||
|  |  | ||||||
| def find_aopt_from_sep(sep, method='75perc'): | def find_aopt_from_sep(sep, method='75perc'): | ||||||
|     """Returns an estimate of optimal number of components from rmsecv. |     """Returns an estimate of optimal number of components from rmsecv. | ||||||
|     """ |     """ | ||||||
|   | |||||||
		Reference in New Issue
	
	Block a user