Lib updates
This commit is contained in:
		| @@ -10,7 +10,7 @@ from fluents.workflow import Function, OptionsDialog, Options | ||||
| from fluents.dataset import Dataset | ||||
| from fluents import plots, dataset, workflow, logger | ||||
| import scipy | ||||
| from engines import pca, pls | ||||
| from engines import pca, pls, nipals_lpls | ||||
| from cx_stats import leverage, variances, hotelling | ||||
| from cx_utils import mat_center | ||||
| from validation import * | ||||
| @@ -238,14 +238,14 @@ class PLS(Model): | ||||
|         """Estimates cut off on significant vars by controlling fdr.""" | ||||
|  | ||||
|         if self._options['calc_qvals']==True: | ||||
|             qvals_sorted, qvals = pls_qvals(a, b, | ||||
|             qvals = pls_qvals(a, b, | ||||
|                               aopt=None, | ||||
|                               alpha=reg, | ||||
|                               n_iter=n_iter, | ||||
|                               algo='pls', | ||||
|                               sim_method=sim_method) | ||||
|             self.model['qval'] = qvals | ||||
|             self.model['qval_sorted'] = qvals_sorted | ||||
|             #self.model['qval_sorted'] = qvals_sorted | ||||
|         else: | ||||
|            self.model['qval'] = None | ||||
|            self.model['qval_sorted'] = None  | ||||
| @@ -276,18 +276,19 @@ class PLS(Model): | ||||
|         pc_ids_opt = ['_comp', map(str, range(self.model['aopt']))] | ||||
|         zero_dim = ['_doe',['0']] # null dim, vector (hidden) | ||||
|  | ||||
|         match_ids = {'E':[ids_0, ids_1], | ||||
|                      'P':[ids_1, pc_ids], | ||||
|                      'T':[ids_0, pc_ids], | ||||
|                      'W': [ids_1, pc_ids], | ||||
|                      'R': [ids_1, pc_ids], | ||||
|                      'Q':[ids_3, pc_ids], | ||||
|                      'F':[ids_0, ids_3], | ||||
|                      'B':[ids_1, ids_3], | ||||
|                      'qval':[ids_1, zero_dim], | ||||
|         match_ids = {'E' : [ids_0, ids_1], | ||||
|                      'P' : [ids_1, pc_ids], | ||||
|                      'T' : [ids_0, pc_ids], | ||||
|                      'W' : [ids_1, pc_ids], | ||||
|                      'R' : [ids_1, pc_ids], | ||||
|                      'Q' : [ids_3, pc_ids], | ||||
|                      'F' : [ids_0, ids_3], | ||||
|                      'B' : [ids_1, ids_3], | ||||
|                      'qval' : [ids_1, zero_dim], | ||||
|                      'qval_sorted':[ids_1, zero_dim], | ||||
|                      'w_tsq':[ids_1, zero_dim], | ||||
|                      'rmsep':[ids_3, pc_ids], | ||||
|                      'w_tsq' : [ids_1, zero_dim], | ||||
|                      'rmsep' : [ids_3, pc_ids], | ||||
|                      'CP': [ids_1, pc_ids] | ||||
|                      } | ||||
|          | ||||
|         array = self.model[name]      | ||||
| @@ -330,6 +331,17 @@ class PLS(Model): | ||||
|         self.model['var_y'] = var_y | ||||
|         self.model['exp_var_y'] = exp_var_y | ||||
|          | ||||
|         if options['calc_corrloads']: | ||||
|             corr_load = scipy.empty_like(self.model['P'].copy()) | ||||
|             T = self.model['T'] | ||||
|             X = self._data['X'] | ||||
|             # For each variable/attribute in original matrix (not meancentered) | ||||
|             for i,score in enumerate(T.T): | ||||
|                 for j, profile in enumerate(X.T): | ||||
|                     corrs = scipy.corrcoef(score, profile) | ||||
|                     corr_load[j,i] = corrs[0,1] | ||||
|             self.model['CP'] = corr_load | ||||
|              | ||||
|         if options['calc_conf']: | ||||
|             self.confidence(**options.confidence_options()) | ||||
|  | ||||
| @@ -353,6 +365,141 @@ class PLS(Model): | ||||
|             #run with current data and options | ||||
|             return self.run_o(a, b) | ||||
|  | ||||
| class LPLS(Model): | ||||
|     def __init__(self, id='lpls', name='LPLS'): | ||||
|         Model.__init__(self, id, name) | ||||
|         self._options = LplsOptions() | ||||
|          | ||||
|     def validation(self, opt): | ||||
|         """Returns rmsep for lpls model. | ||||
|         """ | ||||
|          | ||||
|         if opt['calc_cv']==True: | ||||
|             val_engine = opt['val_engine'] | ||||
|             rmsep, aopt = val_engine(self.model['X'], self.model['Y'], | ||||
|                                      self.model['Z'], opt['amax'], opt['n_sets'], opt['xz_alpha']) | ||||
|             self.model['rmsep'] = rmsep | ||||
|             self.model['aopt'] = aopt | ||||
|         else: | ||||
|             self.model['rmsep'] = None | ||||
|             self.model['aopt'] = opt['aopt'] | ||||
|          | ||||
|     def confidence(self, opt): | ||||
|         """Returns a confidence measure for model parameters | ||||
|         Supported parameters: W | ||||
|         """ | ||||
|         aopt = self.model['aopt'] | ||||
|         if opt['calc_conf']: | ||||
|             Wx, Wz = lpls_jk(self.model['X'], self.model['Y'], self.model['Z'], aopt, n_sets) | ||||
|             Wcal = self.model['W'][:,:aopt] | ||||
|             Lcal = self.model['L'][:,:aopt] | ||||
|             # ensure that Wcal is scaled | ||||
|             tnorm = scipy.apply_along_axis(norm, 0, self.model['T'][:,:aopt]) | ||||
|             Wcal = Wcal*tnorm | ||||
|             a,b,c,d,e = opt['p_center'], opt['crot'], opt['alpha'], opt['strict'], opt['cov_center'] | ||||
|             tsqx = hotelling(Wx, Wcal, a,b,c,d,e) | ||||
|             tsqz = hotelling(Wz, Lcal, a,b,c,d,e) | ||||
|             self.model['tsqx'] = tsqx | ||||
|             self.model['tsqz'] = tsqz | ||||
|         else: | ||||
|             self.model['tsqx'] = None | ||||
|             self.model['tsqz'] = None | ||||
|  | ||||
|     def permutation_confidence(self, opt): | ||||
|         """Estimates cut off on significant vars by controlling fdr. | ||||
|          | ||||
|         """ | ||||
|         self.model['qval'] = None | ||||
|         self.model['qval_sorted'] = None  | ||||
|  | ||||
|     def make_model(self, opt): | ||||
|         """Make model on amax components. | ||||
|         """ | ||||
|         engine = opt['engine'] | ||||
|         dat = engine(self._data['X'], self._data['Y'], self._data['Z'], | ||||
|                      opt['amax'], opt['xz_alpha'], opt['center_mth'], | ||||
|                      opt['mode'], opt['scale'], False) | ||||
|         self.model.update(dat) | ||||
|          | ||||
|     def as_dataset(self, name, dtype='Dataset'): | ||||
|         """Return any model parameter as Dataset | ||||
|         No ids matching | ||||
|         """ | ||||
|         if name not in self.model.keys(): | ||||
|             return | ||||
|         DX, DY, DZ = self._dataset['X'], self._dataset['Y'], self._dataset['Z'] | ||||
|         dim_name_0, dim_name_1 = DX.get_dim_name() | ||||
|         dim_name_2, dim_name_3 = DY.get_dim_name() | ||||
|         dim_name_4, dim_name_5 = DZ.get_dim_name() | ||||
|         #samples | ||||
|         ids_0 = [dim_name_0, DX.get_identifiers(dim_name_0, sorted=True)] | ||||
|          # x vars (genes) | ||||
|         ids_1 = [dim_name_1, DX.get_identifiers(dim_name_1, sorted=True)] | ||||
|         # y vars (sample descriptors) | ||||
|         ids_3 = [dim_name_3, DY.get_identifiers(dim_name_3, sorted=True)] | ||||
|         #z-vars (variable descriptors) | ||||
|         ids_4 = [dim_name_4, DZ.get_identifiers(dim_name_4, sorted=True)] | ||||
|         # components (hidden) | ||||
|         pc_ids = ['_comp', map(str, range(self._options['amax']))] | ||||
|         pc_ids_opt = ['_comp', map(str, range(self.model['aopt']))] | ||||
|         zero_dim = ['_doe',['0']] # null dim, vector (hidden) | ||||
|  | ||||
|         match_ids = {'E' : [ids_0, ids_1], | ||||
|                      'P' : [ids_1, pc_ids], | ||||
|                      'T' : [ids_0, pc_ids], | ||||
|                      'W' : [ids_1, pc_ids], | ||||
|                      'L' : [ids_4, pc_ids], | ||||
|                      'Q' : [ids_3, pc_ids], | ||||
|                      'F' : [ids_0, ids_3], | ||||
|                      'B' : [ids_1, ids_3], | ||||
|                      'tsqx' : [ids_1, zero_dim], | ||||
|                      'tsqz' : [ids_4, zero_dim], | ||||
|                      'K' : [ids_1, pc_ids], | ||||
|                      'rmsep' : [ids_3, pc_ids] | ||||
|                      } | ||||
|          | ||||
|         array = self.model[name]      | ||||
|         M = Dataset(array, identifiers=match_ids[name], name=name) | ||||
|         return M | ||||
|  | ||||
|     def get_out_plots(self, options): | ||||
|         out=[] | ||||
|         for plt in options['out_plots']: | ||||
|             out.append(plt(self)) | ||||
|         return out | ||||
|      | ||||
|     def run(self, a, b, c): | ||||
|         """Run L-PLS with present options.""" | ||||
|         options = self._options | ||||
|         self._dataset['X'] = a | ||||
|         self._dataset['Y'] = b | ||||
|         self._dataset['Z'] = c | ||||
|         self._data['X'] = a.asarray() | ||||
|         self._data['Y'] = b.asarray() | ||||
|         self._data['Z'] = c.asarray() | ||||
|         self.validation(options) | ||||
|         self.make_model(options) | ||||
|         if options['calc_conf']: | ||||
|             self.confidence(options) | ||||
|  | ||||
|         out = [self.as_dataset(p) for p in options['out_data']] | ||||
|         for plt in self.get_out_plots(options): | ||||
|             out.append(plt) | ||||
|         return out | ||||
|  | ||||
|     def run_gui(self, a, b, c): | ||||
|         """Run LPLS with option gui. | ||||
|         """ | ||||
|         dialog = LPlsOptionsDialog([a, b, c], self._options) | ||||
|         dialog.show_all() | ||||
|         response = dialog.run() | ||||
|         dialog.hide() | ||||
|  | ||||
|         if response == gtk.RESPONSE_OK: | ||||
|             # set output data and plots | ||||
|             dialog.set_output() | ||||
|             #run with current data and options | ||||
|             return self.run(a, b, c) | ||||
|  | ||||
| class PcaOptions(Options): | ||||
|     """Options for Principal Component Analysis. | ||||
| @@ -403,7 +550,9 @@ class PcaOptions(Options): | ||||
|                             ] | ||||
|          | ||||
|         opt['out_data'] = ['T','P', 'p_tsq'] | ||||
|         opt['out_plots'] = [blmplots.PcaScorePlot,blmplots.PcaLoadingPlot,blmplots.LineViewXc] | ||||
|         opt['out_plots'] = [blmplots.PcaScorePlot, | ||||
|                             blmplots.PcaLoadingPlot, | ||||
|                             blmplots.LineViewXc] | ||||
|  | ||||
|         self.update(opt) | ||||
|          | ||||
| @@ -444,6 +593,7 @@ class PlsOptions(Options): | ||||
|         opt['center_mth'] = mat_center | ||||
|         opt['scale'] = 'scores' | ||||
|  | ||||
|         opt['calc_corrloads'] = True | ||||
|         opt['calc_conf'] = False | ||||
|         opt['n_sets'] = 5 | ||||
|         opt['strict'] = True | ||||
| @@ -468,7 +618,8 @@ class PlsOptions(Options): | ||||
|                             (blmplots.PlsLoadingPlot, 'Loadings', True), | ||||
|                             (blmplots.LineViewXc, 'Line view', True), | ||||
|                             (blmplots.PredictionErrorPlot, 'Residual Error', False), | ||||
|                             (blmplots.RMSEPPlot, 'RMSEP', False) | ||||
|                             (blmplots.RMSEPPlot, 'RMSEP', False), | ||||
|                             (blmplots.PlsCorrelationLoadingPlot, 'Corr. loadings', True) | ||||
|                             ] | ||||
|          | ||||
|         opt['out_data'] = ['T','P', 'p_tsq'] | ||||
| @@ -494,14 +645,87 @@ class PlsOptions(Options): | ||||
|                     'strict', 'crot', 'cov_center']  | ||||
|         return self._copy_from_list(opt_list)   | ||||
|      | ||||
|  | ||||
|     def permutation_confidence(self): | ||||
|         opt_list = ['q_pert_method', 'q_iter'] | ||||
|         return self._copy_from_list(opt_list) | ||||
|  | ||||
|  | ||||
| class LplsOptions(Options): | ||||
|     """Options for L-shaped Partial Least Squares Regression. | ||||
|     """ | ||||
|     def __init__(self): | ||||
|         Options.__init__(self) | ||||
|         self._set_default() | ||||
|              | ||||
|     def _set_default(self): | ||||
|         opt = {} | ||||
|         opt['engine'] = nipals_lpls | ||||
|         opt['mode'] = 'normal' # how much info to calculate | ||||
|         opt['amax'] = 10 | ||||
|         opt['aopt'] = 9 | ||||
|         opt['xz_alpha'] = .5 | ||||
|         opt['auto_aopt'] = False | ||||
|         opt['center'] = True | ||||
|         opt['center_mth'] = [2, 0, 1] | ||||
|         opt['scale'] = 'scores' | ||||
|         opt['calc_conf'] = False | ||||
|         opt['n_sets'] = 7 | ||||
|         opt['strict'] = False | ||||
|         opt['p_center'] = 'med' | ||||
|         opt['alpha'] = .3 | ||||
|         opt['cov_center'] = 'med' | ||||
|         opt['crot'] = True | ||||
|  | ||||
|         opt['calc_cv'] = False | ||||
|         opt['cv_val_method'] = 'random' | ||||
|         opt['cv_val_sets'] = opt['n_sets'] | ||||
|  | ||||
|         opt['all_data'] = [('T', 'scores', True), | ||||
|                            ('Wx', 'X-weights', True), | ||||
|                            ('Wz', 'Z-weights', True), | ||||
|                            ('E','residuals', False), | ||||
|                            ('tsq_x', 't2X', False), | ||||
|                            ('rmsep', 'RMSEP', False) | ||||
|                            ] | ||||
|  | ||||
|         # (class, name, sensitive, ticked) | ||||
|         opt['all_plots'] = [(blmplots.PlsScorePlot, 'Scores', True), | ||||
|                             (blmplots.PlsLoadingPlot, 'Loadings', True), | ||||
|                             (blmplots.LineViewXc, 'Line view', True), | ||||
|                             (blmplots.PredictionErrorPlot, 'Residual Error', False), | ||||
|                             (blmplots.RMSEPPlot, 'RMSEP', False), | ||||
|                             (blmplots.LplsHypoidCorrelationPlot, 'Hypoid corr.', False) | ||||
|                             ] | ||||
|          | ||||
|         opt['out_data'] = ['T','P'] | ||||
|         opt['out_plots'] = [blmplots.PlsScorePlot,blmplots.PlsLoadingPlot,blmplots.LineViewXc] | ||||
|          | ||||
|         #opt['out_data'] = None | ||||
|                              | ||||
|         opt['pack'] = False | ||||
|         opt['calc_qvals'] = False | ||||
|         opt['q_pert_method'] = 'shuffle_rows' | ||||
|         opt['q_iter'] = 20         | ||||
|  | ||||
|         self.update(opt) | ||||
|  | ||||
|     def make_model_options(self): | ||||
|         """Options for make_model method.""" | ||||
|         opt_list = ['scale','mode', 'amax', 'engine'] | ||||
|         return self._copy_from_list(opt_list)    | ||||
|  | ||||
|     def confidence_options(self): | ||||
|         """Options for confidence method.""" | ||||
|         opt_list = ['n_sets', 'aopt', 'alpha', 'p_center', | ||||
|                     'strict', 'crot', 'cov_center']  | ||||
|         return self._copy_from_list(opt_list)   | ||||
|      | ||||
|     def validation_options(self): | ||||
|         """Options for pre_validation method.""" | ||||
|         opt_list = ['amax', 'n_sets', 'cv_val_method'] | ||||
|         return self._copy_from_list(opt_list) | ||||
|  | ||||
|     def permutation_confidence(self): | ||||
|         opt_list = ['q_pert_method', 'q_iter'] | ||||
|         return self._copy_from_list(opt_list) | ||||
|  | ||||
| class PcaOptionsDialog(OptionsDialog): | ||||
|     """Options dialog for Principal Component Analysis. | ||||
| @@ -716,6 +940,210 @@ class PcaOptionsDialog(OptionsDialog): | ||||
|             self._options['strict'] = True  | ||||
|  | ||||
|  | ||||
| class LplsOptionsDialog(OptionsDialog): | ||||
|     """Options dialog for L-shaped Partial Least squares regression. | ||||
|     """ | ||||
|     def __init__(self, data, options, input_names=['X', 'Y', 'Z']): | ||||
|         OptionsDialog.__init__(self, data, options, input_names) | ||||
|         glade_file = os.path.join(fluents.DATADIR, 'lpls_options.glade') | ||||
|  | ||||
|         notebook_name = "vbox1" | ||||
|         page_name = "Options" | ||||
|         self.add_page_from_glade(glade_file, notebook_name, page_name) | ||||
|         # connect signals to handlers | ||||
|         dic = {"on_amax_value_changed" : self.on_amax_changed, | ||||
|                "on_aopt_value_changed" : self.on_aopt_changed, | ||||
|                "auto_aopt_toggled" : self.auto_aopt_toggled, | ||||
|                "center_toggled" : self.center_toggled, | ||||
|                #"on_scale_changed" : self.on_scale_changed, | ||||
|                "on_val_none" : self.val_toggled, | ||||
|                "on_val_cv" : self.cv_toggled, | ||||
|                "on_cv_method_changed" : self.on_cv_method_changed, | ||||
|                "on_cv_sets_changed" : self.on_cv_sets_changed, | ||||
|                "on_conf_toggled" : self.conf_toggled, | ||||
|                "on_subset_loc_changed" : self.on_subset_loc_changed, | ||||
|                "on_cov_loc_changed" : self.on_cov_loc_changed, | ||||
|                "on_alpha_changed" : self.on_alpha_changed, | ||||
|                "on_rot_changed" : self.on_rot_changed, | ||||
|                "on__toggled" : self.conf_toggled, | ||||
|                "on_qval_changed" : self.on_qval_changed, | ||||
|                "on_iter_changed" : self.on_iter_changed | ||||
|                } | ||||
|          | ||||
|         self.wTree.signal_autoconnect(dic) | ||||
|  | ||||
|         # set/ensure valid default values/ranges | ||||
|         # | ||||
|         amax_sb = self.wTree.get_widget("amax_spinbutton") | ||||
|         max_comp =  min(data[0].shape) # max num of components | ||||
|         if self._options['amax']>max_comp: | ||||
|             logger.log('debug', 'amax default too large ... adjusting') | ||||
|             self._options['amax'] = max_comp | ||||
|         amax_sb.get_adjustment().set_all(self._options['amax'], 1, max_comp, 1, 0, 0) | ||||
|         # aopt spin button | ||||
|         aopt_sb = self.wTree.get_widget("aopt_spinbutton") | ||||
|         if self._options['aopt']>self._options['amax']: | ||||
|             self._options['aopt'] = self._options['amax'] + 1 - 1 | ||||
|         aopt_sb.get_adjustment().set_all(self._options['aopt'], 1, self._options['amax'], 1, 0, 0) | ||||
|  | ||||
|         # scale | ||||
|         # scale_cb = self.wTree.get_widget("scale_combobox") | ||||
|         # scale_cb.set_active(0) | ||||
|  | ||||
|         # validation frames | ||||
|         if self._options['calc_cv']==False: | ||||
|             cv_frame = self.wTree.get_widget("cv_frame") | ||||
|             cv_frame.set_sensitive(False) | ||||
|  | ||||
|         cv = self.wTree.get_widget("cv_method").set_active(0) | ||||
|  | ||||
|         # confidence | ||||
|         if self._options['calc_conf']==True: | ||||
|             self.wTree.get_widget("subset_expander").set_sensitive(True) | ||||
|         else: | ||||
|             self.wTree.get_widget("subset_expander").set_sensitive(False) | ||||
|  | ||||
|         cb = self.wTree.get_widget("subset_loc") | ||||
|         _m = {'med': 0, 'mean': 1, 'full_model': 2} | ||||
|         cb.set_active(_m.get(self._options['p_center'])) | ||||
|  | ||||
|         cb = self.wTree.get_widget("cov_loc") | ||||
|         _m = {'med': 0, 'mean': 1} | ||||
|         cb.set_active(_m.get(self._options['cov_center'])) | ||||
|  | ||||
|         hs = self.wTree.get_widget("alpha_scale") | ||||
|         hs.set_value(self._options['alpha']) | ||||
|  | ||||
|         tb = self.wTree.get_widget("qvals") | ||||
|         tb.set_sensitive(True) | ||||
|          | ||||
|          | ||||
|     def on_amax_changed(self, sb): | ||||
|         logger.log("debug", "amax changed: new value: %s" %sb.get_value_as_int()) | ||||
|         amax = sb.get_value_as_int() | ||||
|         # update aopt if needed | ||||
|         if amax<self._options['aopt']: | ||||
|             self._options['aopt'] = amax | ||||
|         aopt_sb = self.wTree.get_widget("aopt_spinbutton") | ||||
|         aopt_sb.get_adjustment().set_all(self._options['aopt'], 1, amax, 1, 0, 0) | ||||
|         self._options['amax'] = sb.get_value_as_int() | ||||
|          | ||||
|     def on_aopt_changed(self, sb): | ||||
|         aopt = sb.get_value_as_int() | ||||
|         self._options['aopt'] = aopt | ||||
|          | ||||
|     def auto_aopt_toggled(self, tb): | ||||
|         aopt_sb = self.wTree.get_widget("aopt_spinbutton") | ||||
|         if tb.get_active(): | ||||
|             self._options['auto_aopt'] = True | ||||
|             aopt_sb.set_sensitive(False) | ||||
|         else: | ||||
|             self._options['auto_aopt'] = False | ||||
|             aopt_sb.set_sensitive(True) | ||||
|  | ||||
|     def center_toggled(self, tb): | ||||
|         if tb.get_active(): | ||||
|             self._options['center'] = True | ||||
|         else: | ||||
|             logger.log("debug", "centering set to False") | ||||
|             self._options['center'] = False | ||||
|  | ||||
|     #def on_scale_changed(self, cb): | ||||
|     #    scale = cb.get_active_text() | ||||
|     #    if scale=='Scores': | ||||
|     #        self._options['scale'] = 'scores' | ||||
|     #    elif scale=='Loadings': | ||||
|     #        self._options['scale'] = 'loads' | ||||
|     #    else: | ||||
|     #        raise IOError | ||||
|  | ||||
|     def val_toggled(self, tb): | ||||
|         """Callback for validation: None. """ | ||||
|         cv_frame = self.wTree.get_widget("cv_frame") | ||||
|         cv_tb = self.wTree.get_widget("cv_toggle") | ||||
|         if tb.get_active(): | ||||
|             self._options['calc_cv'] = False | ||||
|             cv_frame.set_sensitive(False) | ||||
|             cv_tb.set_sensitive(False) | ||||
|         else: | ||||
|             cv_tb.set_sensitive(True) | ||||
|             if cv_tb.get_active(): | ||||
|                 cv_frame.set_sensitive(True) | ||||
|                 self._options['calc_cv'] = True | ||||
|  | ||||
|     def cv_toggled(self, tb): | ||||
|         cv_frame = self.wTree.get_widget("cv_frame") | ||||
|         val_tb = self.wTree.get_widget("val_none_toggle") | ||||
|         if tb.get_active(): | ||||
|             cv_frame.set_sensitive(True) | ||||
|             self._options['calc_cv'] = True | ||||
|         else: | ||||
|             cv_frame.set_sensitive(False) | ||||
|             self._options['calc_cv'] = False | ||||
|  | ||||
|     def on_cv_method_changed(self, cb): | ||||
|         method = cb.get_active_text() | ||||
|         if method == 'Random': | ||||
|             self._options['cv_val_method'] = 'random' | ||||
|  | ||||
|     def on_cv_sets_changed(self, sb): | ||||
|         val = sb.get_value_as_int() | ||||
|         self._options['cv_val_sets'] = val | ||||
|  | ||||
|     def conf_toggled(self, tb): | ||||
|         if tb.get_active(): | ||||
|             self._options['calc_conf'] = False | ||||
|             self.wTree.get_widget("subset_expander").set_sensitive(False) | ||||
|         else: | ||||
|             self._options['calc_conf'] = True | ||||
|             self.wTree.get_widget("subset_expander").set_sensitive(True) | ||||
|  | ||||
|     def on_subset_loc_changed(self, cb): | ||||
|         method = cb.get_active_text() | ||||
|         if method=='Full model': | ||||
|             self._options['p_center'] = 'full_model' | ||||
|         elif method=='Median': | ||||
|             self._options['p_center'] = 'med' | ||||
|         elif method=='Mean': | ||||
|             self._options['p_center'] = 'mean' | ||||
|  | ||||
|     def on_cov_loc_changed(self, cb): | ||||
|         method = cb.get_active_text() | ||||
|         if method=='Median': | ||||
|             self._options['cov_center'] = 'med' | ||||
|         elif method=='Mean': | ||||
|             self._options['cov_center'] = 'mean' | ||||
|  | ||||
|     def on_alpha_changed(self, hs): | ||||
|         self._options['alpha'] = hs.get_value() | ||||
|  | ||||
|     def on_rot_changed(self, rg): | ||||
|         proc, strict = rg | ||||
|         if proc.get_active(): | ||||
|             self._options['crot'] = True | ||||
|         else: | ||||
|             self._options['crot'] = True | ||||
|             self._options['strict'] = True  | ||||
|  | ||||
|     def qval_toggled(self, tb): | ||||
|         if tb.get_active(): | ||||
|             self._options['calc_qval'] = False | ||||
|             self.wTree.get_widget("qval_method").set_sensitive(False) | ||||
|             self.wTree.get_widget("q_iter").set_sensitive(False) | ||||
|         else: | ||||
|             self._options['calc_qval'] = True | ||||
|             self.wTree.get_widget("qval_method").set_sensitive(True) | ||||
|             self.wTree.get_widget("q_iter").set_sensitive(True) | ||||
|  | ||||
|     def on_iter_changed(self, sb): | ||||
|         self._options['q_iter'] = sb.get_value() | ||||
|  | ||||
|     def on_qval_changed(self, cb): | ||||
|         q_method = cb.get_active_text() | ||||
|         if method=='Shuffle rows': | ||||
|             self._options['q_pert_method'] = 'shuffle' | ||||
|          | ||||
|              | ||||
| class PlsOptionsDialog(OptionsDialog): | ||||
|     """Options dialog for Partial Least squares regression. | ||||
|     """ | ||||
| @@ -918,5 +1346,3 @@ class PlsOptionsDialog(OptionsDialog): | ||||
|         q_method = cb.get_active_text() | ||||
|         if method=='Shuffle rows': | ||||
|             self._options['q_pert_method'] = 'shuffle' | ||||
|          | ||||
|              | ||||
|   | ||||
| @@ -191,6 +191,11 @@ class PlsCorrelationLoadingPlot(BlmScatterPlot): | ||||
|         BlmScatterPlot.__init__(self, title, model, absi, ordi, part_name='CP') | ||||
|          | ||||
|  | ||||
| class LplsHypoidCorrelationPlot(BlmScatterPlot): | ||||
|     def __init__(self, model, absi=0, ordi=1): | ||||
|         title = "Hypoid correlations(%s)" %model._dataset['X'].get_name() | ||||
|         BlmScatterPlot.__init__(self, title, model, absi, ordi, part_name='W') | ||||
|      | ||||
| class LineViewXc(plots.LineViewPlot): | ||||
|     """A line view of centered raw data | ||||
|     """ | ||||
| @@ -214,8 +219,8 @@ class PlsQvalScatter(plots.ScatterPlot): | ||||
|     def __init__(self, model, pc=0): | ||||
|         if not model.model.has_key('w_tsq'): | ||||
|             return None | ||||
|         self._W = model.model['P'] | ||||
|         dataset_1 = model.as_dataset('P') | ||||
|         self._W = model.model['W'] | ||||
|         dataset_1 = model.as_dataset('W') | ||||
|         dataset_2 = model.as_dataset('w_tsq') | ||||
|         id_dim = dataset_1.get_dim_name(0) #genes | ||||
|         sel_dim = dataset_1.get_dim_name(1) #_comp | ||||
|   | ||||
| @@ -115,6 +115,7 @@ def expl_var_y(Y, T, Q): | ||||
|          | ||||
| def pls_qvals(a, b, aopt=None, alpha=.3, | ||||
|               n_iter=20, algo='pls', | ||||
|               center=True, | ||||
|               sim_method='shuffle', | ||||
|               p_center='med', cov_center='med', | ||||
|               crot=True, strict=False, metric=None): | ||||
| @@ -122,8 +123,98 @@ def pls_qvals(a, b, aopt=None, alpha=.3, | ||||
|     """Returns qvals for pls model. | ||||
|  | ||||
|     input: | ||||
|     a -- centered data matrix | ||||
|     b -- centered data matrix | ||||
|     a -- data matrix | ||||
|     b -- data matrix | ||||
|     aopt -- scalar, opt. number of components | ||||
|     alpha -- [0,1] regularisation parameter for T2-test | ||||
|     n_iter -- number of permutations | ||||
|     sim_method -- permutation method ['shuffle'] | ||||
|     p_center -- location estimator for sub models ['med'] | ||||
|     cov_center -- location estimator for covariance of submodels ['med'] | ||||
|     crot -- bool, use rotations of sub models? | ||||
|     strict -- bool, use stict (rot/flips only) rotations? | ||||
|     metric -- bool, use row metric? | ||||
|     """ | ||||
|      | ||||
|     m, n = a.shape | ||||
|     TSQ = zeros((n, n_iter), dtype='d') # (nvars x n_subsets) | ||||
|     n_false = zeros((n, n_iter), dtype='d') | ||||
|  | ||||
|     #full model | ||||
|     if center: | ||||
|         ac = a - a.mean(0) | ||||
|         bc = b - b.mean(0) | ||||
|     if metric!=None: | ||||
|         ac = dot(ac, metric) | ||||
|     if algo=='bridge': | ||||
|         dat = bridge(ac, bc, aopt, 'loads', 'fast') | ||||
|     else: | ||||
|         dat = pls(ac, bc, aopt, 'loads', 'fast') | ||||
|     Wcv = pls_jkW(a, b, aopt, n_blocks=None, algo=algo, metric=metric, center=True) | ||||
|     tsq_full = hotelling(Wcv, dat['W'], p_center=p_center, | ||||
|                          alpha=alpha, crot=crot, strict=strict, | ||||
|                          cov_center=cov_center) | ||||
|     t0 = time.time() | ||||
|     Vs = shuffle_1d(bc, n_iter, axis=0) | ||||
|     for i, b_shuff in enumerate(Vs): | ||||
|         t1 = time.time() | ||||
|         if algo=='bridge': | ||||
|             dat = bridge(ac, b_shuff, aopt, 'loads','fast') | ||||
|         else: | ||||
|             dat = pls(ac, b_shuff, aopt, 'loads', 'fast') | ||||
|         Wcv = pls_jkW(a, b_shuff, aopt, n_blocks=None, algo=algo, metric=metric) | ||||
|         TSQ[:,i] = hotelling(Wcv, dat['W'], p_center=p_center, | ||||
|                              alpha=alpha, crot=crot, strict=strict, | ||||
|                              cov_center=cov_center) | ||||
|         print time.time() - t1 | ||||
|     sort_index = argsort(tsq_full)[::-1] | ||||
|     back_sort_index = sort_index.argsort() | ||||
|     print time.time() - t0 | ||||
|  | ||||
|     # count false positives | ||||
|     tsq_full_sorted = tsq_full.take(sort_index) | ||||
|     for i in xrange(n_iter): | ||||
|         for j in xrange(n): | ||||
|             n_false[j,i] = sum(TSQ[:,i]>=tsq_full[j]) # number of false pos. genes (0-n) | ||||
|     false_pos = median(n_false, 1) | ||||
|     ll = arange(1, len(false_pos)+1, 1) | ||||
|     sort_qval = false_pos.take(sort_index)/ll | ||||
|     qval = false_pos/ll.take(back_sort_index) | ||||
|     print time.time() - t0 | ||||
|     #return qval, false_pos, TSQ, tsq_full | ||||
|     return qval | ||||
|  | ||||
| def ensure_strict(C, only_flips=True): | ||||
|     """Ensure that a rotation matrix does only 90 degree rotations. | ||||
|     In multiplication with pcs this allows flips and reordering. | ||||
|  | ||||
|     if only_flips is True there will onlt be flips allowed | ||||
|     """ | ||||
|     Cm = C | ||||
|     S = sign(C) # signs | ||||
|     if only_flips==True: | ||||
|         C = eye(Cm.shape[0])*S | ||||
|         return C | ||||
|     Cm = zeros_like(C) | ||||
|     Cm.putmask(1.,abs(C)>.6) | ||||
|     if det(Cm)>1: | ||||
|         raise ValueError,"Implement this!" | ||||
|     return Cm*S | ||||
|  | ||||
| def pls_qvals_II(a, b, aopt=None, center=True, alpha=.3, | ||||
|                  n_iter=20, algo='pls', | ||||
|                  sim_method='shuffle', | ||||
|                  p_center='med', cov_center='med', | ||||
|                  crot=True, strict=False, metric=None): | ||||
|  | ||||
|     """Returns qvals for pls model. | ||||
|     Shuffling of variables in X is preprocessed in metric. | ||||
|     Null model is 'If I put genes randomly on network' ... if they are sign: | ||||
|     then this is due to network structure and not covariance with response. | ||||
|  | ||||
|     input: | ||||
|     a -- data matrix | ||||
|     b -- data matrix | ||||
|     aopt -- scalar, opt. number of components | ||||
|     alpha -- [0,1] regularisation parameter for T2-test | ||||
|     n_iter -- number of permutations | ||||
| @@ -140,25 +231,33 @@ def pls_qvals(a, b, aopt=None, alpha=.3, | ||||
|     n_false = zeros((n, n_iter), dtype='<f8') | ||||
|  | ||||
|     #full model | ||||
|     if metric!=None: | ||||
|         a = dot(a, metric) | ||||
|  | ||||
|     # center? | ||||
|     if center==True: | ||||
|         ac = a - a.mean(0) | ||||
|         bc = b - b.mean(0) | ||||
|     if metric==None: | ||||
|         metric = eye(n,n) | ||||
|     if algo=='bridge': | ||||
|         dat = bridge(a, b, aopt, 'loads', 'fast') | ||||
|         dat = bridge(ac, bc, aopt, 'loads', 'fast') | ||||
|     else: | ||||
|         dat = pls(a, b, aopt, 'loads', 'fast') | ||||
|         dat = pls(ac, bc, aopt, 'loads', 'fast') | ||||
|     Wcv = pls_jkW(a, b, aopt, n_blocks=None, algo=algo, metric=metric) | ||||
|     tsq_full = hotelling(Wcv, dat['W'], p_center=p_center, | ||||
|                          alpha=alpha, crot=crot, strict=strict, | ||||
|                          cov_center=cov_center) | ||||
|     t0 = time.time() | ||||
|     Vs = shuffle_1d(b, n_iter) | ||||
|     for i, b_shuff in enumerate(Vs): | ||||
|     Vs = shuffle_1d(a, n_iter, 1) | ||||
|     for i, a_shuff in enumerate(Vs): | ||||
|         t1 = time.time() | ||||
|         a = a_shuff - a_shuff.mean(0) | ||||
|         a = dot(a, metric) | ||||
|          | ||||
|         if algo=='bridge': | ||||
|             dat = bridge(a, b_shuff, aopt, 'loads','fast') | ||||
|             dat = bridge(a, b, aopt, 'loads','fast') | ||||
|         else: | ||||
|             dat = pls(a, b, aopt, 'loads', 'fast') | ||||
|         Wcv = pls_jkW(a, b_shuff, aopt, n_blocks=None, algo=algo, metric=metric) | ||||
|         Wcv = pls_jkW(a, b, aopt, n_blocks=None, algo=algo, metric=metric) | ||||
|         TSQ[:,i] = hotelling(Wcv, dat['W'], p_center=p_center, | ||||
|                              alpha=alpha, crot=crot, strict=strict, | ||||
|                              cov_center=cov_center) | ||||
| @@ -177,24 +276,8 @@ def pls_qvals(a, b, aopt=None, alpha=.3, | ||||
|     sort_qval = false_pos.take(sort_index)/ll | ||||
|     qval = false_pos/ll.take(back_sort_index) | ||||
|     print time.time() - t0 | ||||
|     return qval, false_pos, TSQ, tsq_full | ||||
|  | ||||
| def ensure_strict(C, only_flips=True): | ||||
|     """Ensure that a rotation matrix does only 90 degree rotations. | ||||
|     In multiplication with pcs this allows flips and reordering. | ||||
|  | ||||
|     if only_flips is True there will onlt be flips allowed | ||||
|     """ | ||||
|     Cm = C | ||||
|     S = sign(C) # signs | ||||
|     if only_flips==True: | ||||
|         C = eye(Cm.shape[0])*S | ||||
|         return C | ||||
|     Cm = zeros_like(C) | ||||
|     Cm.putmask(1.,abs(C)>.6) | ||||
|     if det(Cm)>1: | ||||
|         raise ValueError,"Implement this!" | ||||
|     return Cm*S | ||||
|     #return qval, false_pos, TSQ, tsq_full | ||||
|     return qval | ||||
|  | ||||
| def leverage(aopt=1,*args): | ||||
|     """Returns leverages | ||||
| @@ -253,3 +336,10 @@ def ssq(E, axis=0, weights=None): | ||||
|         raise NotImplementedError, "Higher order modes not supported" | ||||
|     return pow(Ew,2).sum(axis) | ||||
|  | ||||
|  | ||||
| def vnorm(x): | ||||
|     """Returns the euclidian norm of a vector. | ||||
|  | ||||
|     This is considerably faster than linalg.norm | ||||
|     """ | ||||
|     return sqrt(dot(x,x.conj())) | ||||
|   | ||||
| @@ -1,23 +1,25 @@ | ||||
| from scipy import apply_along_axis,newaxis,zeros,\ | ||||
|      median,round_,nonzero,dot,argmax,any,sqrt,ndarray,\ | ||||
|      trace,zeros_like,sign,sort,real,argsort,rand,array,\ | ||||
|      matrix | ||||
|      matrix,nan | ||||
| from scipy.linalg import norm,svd,inv,eig | ||||
| from scipy.stats import median,mean | ||||
|  | ||||
| def normalise(a,axis=0,return_scales=False): | ||||
| 	s = apply_along_axis(norm,axis,a) | ||||
| def normalise(a, axis=0, return_scales=False): | ||||
|     s = apply_along_axis(norm, axis, a) | ||||
|     if axis==0: | ||||
|         s = s[newaxis] | ||||
|     else: | ||||
|         s = s[:,newaxis] | ||||
| 	     | ||||
|     a_s = a/s | ||||
|  | ||||
|     if return_scales: | ||||
| 		return a_s,s | ||||
|        return a_s, s | ||||
|  | ||||
|     return a_s | ||||
|  | ||||
| def sub2ind(shape,i,j): | ||||
| def sub2ind(shape, i, j): | ||||
| 	"""Indices from subscripts. Only support for 2d""" | ||||
| 	row,col = shape | ||||
| 	ind = [] | ||||
| @@ -41,13 +43,13 @@ def sorted_eig(a, b=None,sort_by='sm'): | ||||
|     (This is reversed output compared to matlab) | ||||
|      | ||||
|     """ | ||||
|     s,v = eig(a,b) | ||||
|     s,v = eig(a, b) | ||||
|     s = real(s) # dont expect any imaginary part | ||||
|     v = real(v) | ||||
|     ind = argsort(s) | ||||
|     if sort_by=='lm': | ||||
|         ind = ind[::-1] | ||||
|     v = v.take(ind,1) | ||||
|     v = v.take(ind, 1) | ||||
|     s = s.take(ind) | ||||
|  | ||||
|     return s,v | ||||
| @@ -67,15 +69,15 @@ def str2num(string_number): | ||||
|     return num | ||||
|  | ||||
| def randperm(n): | ||||
|   r=rand(n) | ||||
|   r = rand(n) | ||||
|   dict={} | ||||
|   for i in range(n): | ||||
|      dict[r[i]]=i | ||||
|   r=sort(r) | ||||
|   out=zeros(n) | ||||
|      dict[r[i]] = i | ||||
|   r = sort(r) | ||||
|   out = zeros(n) | ||||
|   for i in range(n): | ||||
|      out[i]=dict[r[i]] | ||||
|   return array(out,dtype='i') | ||||
|      out[i] = dict[r[i]] | ||||
|   return array(out).astype('i') | ||||
|  | ||||
| def mat_center(X,axis=0,ret_mn=False): | ||||
|     """Mean center matrix along axis. | ||||
|   | ||||
| @@ -3,7 +3,8 @@ | ||||
| There is no typechecking of any kind here, just focus on speed | ||||
| """ | ||||
|  | ||||
| from scipy.linalg import svd,norm,inv,pinv,qr | ||||
| import math | ||||
| from scipy.linalg import svd,inv | ||||
| from scipy import dot,empty,eye,newaxis,zeros,sqrt,diag,\ | ||||
|      apply_along_axis,mean,ones,randn,empty_like,outer,c_,\ | ||||
|      rand,sum,cumsum,matrix | ||||
| @@ -18,17 +19,18 @@ def pca(a, aopt, scale='scores', mode='normal'): | ||||
|      | ||||
|     m, n = a.shape | ||||
|  | ||||
|     if m*10.>n: | ||||
|         u, s, vt = esvd(a) | ||||
|     if m*3>n: | ||||
|         u, s, v = esvd(a) | ||||
|     else: | ||||
|         u, s, vt = svd(a, full_matrices=0) | ||||
|         v = vt.T | ||||
|     eigvals = (1./m)*s | ||||
|     T = u*s | ||||
|     T = T[:,:aopt] | ||||
|     P = vt[:aopt,:].T | ||||
|     P = v[:,:aopt] | ||||
|      | ||||
|     if scale=='loads': | ||||
|         tnorm = apply_along_axis(norm, 0, T) | ||||
|         tnorm = apply_along_axis(vnorm, 0, T) | ||||
|         T = T/tnorm | ||||
|         P = P*tnorm | ||||
|  | ||||
| @@ -47,6 +49,7 @@ def pca(a, aopt, scale='scores', mode='normal'): | ||||
|              | ||||
|     return {'T':T, 'P':P, 'E':E} | ||||
|  | ||||
|  | ||||
| def pcr(a, b, aopt=2, scale='scores', mode='normal'): | ||||
|     """Returns Principal component regression model.""" | ||||
|     m, n = a.shape | ||||
| @@ -98,13 +101,13 @@ def pls(a, b, aopt=2, scale='scores', mode='normal', ab=None): | ||||
|             u, s, vh = svd(dot(ab.T, ab)) | ||||
|             w = dot(ab, u[:,:1]) | ||||
|      | ||||
|         w = w/norm(w) | ||||
|         w = w/vnorm(w) | ||||
|         r = w.copy() | ||||
|         if i>0: | ||||
|             for j in range(0,i,1): | ||||
|                 r = r - dot(P[:,j].T, w)*R[:,j][:,newaxis] | ||||
|         t = dot(a, r) | ||||
|         tt = norm(t)**2 | ||||
|         tt = vnorm(t)**2 | ||||
|         p  = dot(a.T, t)/tt | ||||
|         q = dot(r.T, ab).T/tt | ||||
|         ab = ab - dot(p, q.T)*tt | ||||
| @@ -115,7 +118,7 @@ def pls(a, b, aopt=2, scale='scores', mode='normal', ab=None): | ||||
|  | ||||
|         if mode=='fast' and i==aopt-1: | ||||
|             if scale=='loads': | ||||
|                 tnorm = apply_along_axis(norm, 0, T) | ||||
|                 tnorm = apply_along_axis(vnorm, 0, T) | ||||
|                 T = T/tnorm | ||||
|                 W = W*tnorm | ||||
|             return {'T':T, 'W':W} | ||||
| @@ -134,7 +137,7 @@ def pls(a, b, aopt=2, scale='scores', mode='normal', ab=None): | ||||
|         F = b - dot(T[:,:aopt], Q[:,:aopt].T) | ||||
|  | ||||
|     if scale=='loads': | ||||
|         tnorm = apply_along_axis(norm, 0, T) | ||||
|         tnorm = apply_along_axis(vnorm, 0, T) | ||||
|         T = T/tnorm | ||||
|         W = W*tnorm | ||||
|         Q = Q*tnorm | ||||
| @@ -159,7 +162,7 @@ def w_simpls(aat, b, aopt): | ||||
|         u = dot(b, u[:,:1]) #y-factor scores | ||||
|         U[:,i] = u.ravel() | ||||
|         t = dot(aat, u) | ||||
|         t = t/norm(t) | ||||
|         t = t/vnorm(t) | ||||
|         T[:,i] = t.ravel() | ||||
|         h = dot(aat, t) #score-weights | ||||
|         H[:,i] = h.ravel() | ||||
| @@ -183,7 +186,7 @@ def bridge(a, b, aopt, scale='scores', mode='normal', r=0): | ||||
|     W = u[:,:aopt] | ||||
|     K = vt[:aopt,:].T | ||||
|     T = dot(a, W) | ||||
|     tnorm = apply_along_axis(norm, 0, T) # norm of T-columns | ||||
|     tnorm = apply_along_axis(vnorm, 0, T) # norm of T-columns | ||||
|  | ||||
|     if mode == 'fast': | ||||
|         if scale=='loads': | ||||
| @@ -196,16 +199,6 @@ def bridge(a, b, aopt, scale='scores', mode='normal', r=0): | ||||
|     B = zeros((aopt, n, l), dtype='f') | ||||
|     for i in range(aopt): | ||||
|         B[i] = dot(W[:,:i+1], Q[:,:i+1].T) | ||||
|     # leverages | ||||
|     # fixme: probably need an orthogonal basis for row-space leverage | ||||
|     #        T (scores) are not orthogonal | ||||
|     #        Using a qr decomp to get an orthonormal basis for row-space | ||||
|     #Tq = qr(T)[0] | ||||
|     #s_lev,v_lev = leverage(aopt,Tq,W) | ||||
|     # explained variance | ||||
|     #var_x, exp_var_x = variances(a,T,W) | ||||
|     #qnorm = apply_along_axis(norm, 0, Q) | ||||
|     #var_y, exp_var_y = variances(b,U,Q/qnorm) | ||||
|      | ||||
|     if mode == 'detailed': | ||||
|         E = empty((aopt, m, n)) | ||||
| @@ -225,10 +218,132 @@ def bridge(a, b, aopt, scale='scores', mode='normal', r=0): | ||||
|     return {'B':B, 'W':W, 'T':T, 'Q':Q, 'E':E, 'F':F, 'U':U, 'P':W} | ||||
|      | ||||
|  | ||||
|  | ||||
| def nipals_lpls(X, Y, Z, amax, alpha=.7, mean_ctr=[2, 0, 1], mode='normal', scale='scores', verbose=False): | ||||
|     """ L-shaped Partial Least Sqaures Regression by the nipals algorithm. | ||||
|  | ||||
|     (X!Z)->Y | ||||
|     :input: | ||||
|         X : data matrix (m, n) | ||||
|         Y : data matrix (m, l) | ||||
|         Z : data matrix (n, o) | ||||
|  | ||||
|     :output: | ||||
|       T : X-scores | ||||
|       W : X-weights/Z-weights | ||||
|       P : X-loadings | ||||
|       Q : Y-loadings | ||||
|       U : X-Y relation | ||||
|       L : Z-scores | ||||
|       K : Z-loads | ||||
|       B : Regression coefficients X->Y | ||||
|       b0: Regression coefficient intercept | ||||
|       evx : X-explained variance | ||||
|       evy : Y-explained variance | ||||
|       evz : Z-explained variance | ||||
|  | ||||
|     :Notes: | ||||
|      | ||||
|     """ | ||||
|     if mean_ctr!=None: | ||||
|         xctr, yctr, zctr = mean_ctr | ||||
|         X, mnX = center(X, xctr) | ||||
|         Y, mnY = center(Y, xctr) | ||||
|         Z, mnZ = center(Z, zctr) | ||||
|  | ||||
|     varX = pow(X, 2).sum() | ||||
|     varY = pow(Y, 2).sum() | ||||
|     varZ = pow(Z, 2).sum() | ||||
|      | ||||
|     m, n = X.shape | ||||
|     k, l = Y.shape | ||||
|     u, o = Z.shape | ||||
|  | ||||
|     # initialize  | ||||
|     U = empty((k, amax)) | ||||
|     Q = empty((l, amax)) | ||||
|     T = empty((m, amax)) | ||||
|     W = empty((n, amax)) | ||||
|     P = empty((n, amax)) | ||||
|     K = empty((o, amax)) | ||||
|     L = empty((u, amax)) | ||||
|     var_x = empty((amax,)) | ||||
|     var_y = empty((amax,)) | ||||
|     var_z = empty((amax,)) | ||||
|      | ||||
|     for a in range(amax): | ||||
|         if verbose: | ||||
|             print "\n Working on comp. %s" %a | ||||
|         u = Y[:,:1] | ||||
|         diff = 1 | ||||
|         MAX_ITER = 100 | ||||
|         lim = 1e-5 | ||||
|         niter = 0 | ||||
|         while (diff>lim and niter<MAX_ITER): | ||||
|             niter += 1 | ||||
|             u1 = u.copy() | ||||
|             w = dot(X.T, u) | ||||
|             w = w/sqrt(dot(w.T, w)) | ||||
|             l = dot(Z, w) | ||||
|             k = dot(Z.T, l) | ||||
|             k = k/sqrt(dot(k.T, k)) | ||||
|             w = alpha*k + (1-alpha)*w | ||||
|             w = w/sqrt(dot(w.T, w)) | ||||
|             t = dot(X, w) | ||||
|             c = dot(Y.T, t) | ||||
|             c = c/sqrt(dot(c.T, c)) | ||||
|             u = dot(Y, c) | ||||
|             diff = abs(u1 - u).max() | ||||
|         if verbose: | ||||
|             print "Converged after %s iterations" %niter | ||||
|         tt = dot(t.T, t) | ||||
|         p = dot(X.T, t)/tt | ||||
|         q = dot(Y.T, t)/tt | ||||
|         l = dot(Z, w) | ||||
|          | ||||
|         U[:,a] = u.ravel() | ||||
|         W[:,a] = w.ravel() | ||||
|         P[:,a] = p.ravel() | ||||
|         T[:,a] = t.ravel() | ||||
|         Q[:,a] = q.ravel() | ||||
|         L[:,a] = l.ravel() | ||||
|         K[:,a] = k.ravel() | ||||
|  | ||||
|         X = X - dot(t, p.T) | ||||
|         Y = Y - dot(t, q.T) | ||||
|         Z = (Z.T - dot(w, l.T)).T | ||||
|  | ||||
|         var_x[a] = pow(X, 2).sum() | ||||
|         var_y[a] = pow(Y, 2).sum() | ||||
|         var_z[a] = pow(Z, 2).sum() | ||||
|      | ||||
|     B = dot(dot(W, inv(dot(P.T, W))), Q.T) | ||||
|     b0 = mnY - dot(mnX, B) | ||||
|      | ||||
|     # variance explained | ||||
|     evx = 100.0*(1 - var_x/varX) | ||||
|     evy = 100.0*(1 - var_y/varY) | ||||
|     evz = 100.0*(1 - var_z/varZ) | ||||
|     if scale=='loads': | ||||
|         tnorm = apply_along_axis(vnorm, 0, T) | ||||
|         T = T/tnorm | ||||
|         W = W*tnorm | ||||
|         Q = Q*tnorm | ||||
|         knorm = apply_along_axis(vnorm, 0, K) | ||||
|         L = L*knorm | ||||
|         K = K/knorm | ||||
|      | ||||
|     return {'T':T, 'W':W, 'P':P, 'Q':Q, 'U':U, 'L':L, 'K':K, 'B':B, 'b0':b0, 'evx':evx, 'evy':evy, 'evz':evz}     | ||||
|  | ||||
|  | ||||
|  | ||||
|  | ||||
| ########### Helper routines ######### | ||||
|  | ||||
| def m_shape(array): | ||||
|     return matrix(array).shape | ||||
|  | ||||
| def esvd(data,economy=1): | ||||
| def esvd(data, economy=1): | ||||
|     """SVD with the option of economy sized calculation | ||||
|     Calculate subspaces of X'X or XX' depending on the shape | ||||
|     of the matrix. | ||||
| @@ -239,17 +354,40 @@ def esvd(data,economy=1): | ||||
|     """ | ||||
|     m, n = data.shape | ||||
|     if m>=n: | ||||
|         u, s, vt = svd(dot(data.T, data)) | ||||
|         data = dot(data.T, data) | ||||
|         u, s, vt = svd(data) | ||||
|         u = dot(data, vt.T) | ||||
|         v = vt.T | ||||
|         for i in xrange(n): | ||||
|             s[i] = norm(u[:,i]) | ||||
|             s[i] = vnorm(u[:,i]) | ||||
|             u[:,i] = u[:,i]/s[i] | ||||
|     else: | ||||
|         u, s, vt = svd(dot(data, data.T)) | ||||
|         data = dot(data, data.T) | ||||
|         data = (data + data.T)/2.0 | ||||
|         u, s, vt = svd(data) | ||||
|         v = dot(u.T, data) | ||||
|         for i in xrange(m): | ||||
|             s[i] = norm(v[i,:]) | ||||
|             s[i] = vnorm(v[i,:]) | ||||
|             v[i,:] = v[i,:]/s[i] | ||||
|  | ||||
|     return u, s, v | ||||
|     return u, s, v.T | ||||
|  | ||||
| def vnorm(x): | ||||
|     # assume column arrays (or vectors) | ||||
|     return math.sqrt(dot(x.T, x)) | ||||
|  | ||||
| def center(a, axis): | ||||
|      # 0 = col center, 1 = row center, 2 = double center | ||||
|      # -1 = nothing | ||||
|     if axis==-1: | ||||
|         mn = zeros((a.shape[1],)) | ||||
|     elif axis==0: | ||||
|         mn = a.mean(0) | ||||
|     elif axis==1: | ||||
|         mn = a.mean(1)[:,newaxis] | ||||
|     elif axis==2: | ||||
|         mn = a.mean(0) + a.mean(1)[:,newaxis] - a.mean() | ||||
|     else: | ||||
|         raise IOError("input error: axis must be in [-1,0,1,2]") | ||||
|  | ||||
|     return a - mn, mn | ||||
|   | ||||
| @@ -53,6 +53,7 @@ def gene_hypergeo_test(selection, category_dataset): | ||||
|                              cat_count) | ||||
|      | ||||
|     pvals = scipy.where(cat_count==0, 2, pvals) | ||||
|     pvals = scipy.where(scipy.isnan(pvals), 2, pvals) | ||||
|     out = {} | ||||
|     for i in range(pvals.size): | ||||
|         out[str(all_cats[i])] = (count[i], cat_count[i], pvals[i]) | ||||
|   | ||||
| @@ -2,7 +2,7 @@ import os,sys | ||||
| from itertools import izip | ||||
| import networkx as NX | ||||
| from scipy import shape,diag,dot,asarray,sqrt,real,zeros,eye,exp,maximum,\ | ||||
|      outer,maximum,sum,diag,real | ||||
|      outer,maximum,sum,diag,real,atleast_2d | ||||
| from scipy.linalg import eig,svd,inv,expm,norm | ||||
| from cx_utils import sorted_eig | ||||
|  | ||||
| @@ -378,6 +378,7 @@ Ke = expm(A) .... expm(-A)? | ||||
| # 14.05.2006: diffusion returns negative values, using expm(-LL) instead (FIX) | ||||
| # 13.09.2206: update for use in numpy | ||||
|  | ||||
| # 27.04.2007: diffusion now uses pade approximations to matrix exponential. Also the last  | ||||
|  | ||||
| def K_expAdj(W, normalised=True, alpha=1.0): | ||||
|     """Matrix exponential of adjacency matrix, mentioned in Kandola as a general diffusion kernel.  | ||||
| @@ -433,8 +434,8 @@ def K_vonNeumann(W, normalised=True, alpha=1.0): | ||||
|     return dot(dot(vr,psigma),vri).astype(t) | ||||
|  | ||||
| def K_laplacian(W, normalised=True, alpha=1.0): | ||||
|     """ This is the matrix square root of the pseudo inverse of L. | ||||
|     Also known as th eaverage commute time matrix. | ||||
|     """ This is the matrix pseudo inverse of L. | ||||
|     Also known as the average commute time matrix. | ||||
|     """ | ||||
|     W = asarray(W) | ||||
|     t = W.dtype.char | ||||
| @@ -464,8 +465,7 @@ def K_laplacian(W, normalised=True, alpha=1.0): | ||||
|     return K | ||||
|  | ||||
|  | ||||
|  | ||||
| def K_diffusion(W, normalised=True, alpha=1.0, beta=0.5): | ||||
| def K_diffusion(W, normalised=True, alpha=1.0, beta=0.5, use_cut=False): | ||||
|     """Returns diffusion kernel. | ||||
|     input: | ||||
|             -- W, adj. matrix | ||||
| @@ -477,28 +477,46 @@ def K_diffusion(W, normalised=True, alpha=1.0, beta=0.5): | ||||
|     t = W.dtype.char | ||||
|     if len(W.shape)!=2: | ||||
|         raise ValueError, "Non-matrix input to matrix function." | ||||
|     m,n = W.shape | ||||
|     m, n = W.shape | ||||
|     if t in ['F','D']: | ||||
|         raise TypeError, "Complex input!" | ||||
|     D = diag(sum(W,0)) | ||||
|     L = D-W | ||||
|     D = diag(W.sum(0)) | ||||
|     L = D - W | ||||
|     if normalised==True: | ||||
|         T = diag(sqrt(1./(sum(W,0)))) | ||||
|         L = dot(dot(T,L),T) | ||||
|     e,vr = eig(L) | ||||
|         T = diag(sqrt(1./W.sum(0))) | ||||
|         L = dot(dot(T, L), T) | ||||
|     e, vr = eig(L) | ||||
|     vri = inv(vr) #inv | ||||
|     cond = 1.0*{0: feps*1e3, 1: eps*1e6}[_array_precision[t]] | ||||
|     cutoff = 1.*abs(cond*maximum.reduce(e)) | ||||
|     psigma = eye(m) # if sing vals are 0 exp(0)=1 (unnecessary) | ||||
|     psigma = eye(m) # if eigvals are 0 exp(0)=1 (unnecessary) | ||||
|     #psigma = zeros((m,n), dtype='<f8') | ||||
|     for i in range(len(e)): | ||||
|         if abs(e[i]) > cutoff: | ||||
|             psigma[i,i] = exp(-beta*e[i]) | ||||
|         #else: | ||||
|         #    psigma[i,i] = 0.0 | ||||
|     K = real(dot(dot(vr, psigma), vri)) | ||||
|     I = eye(n, dtype='<f8') | ||||
|     K = (1. - alpha)*I + alpha*K | ||||
|     return K | ||||
|  | ||||
| def K_diffusion2(W, normalised=True, alpha=1.0, beta=0.5, ncomp=None): | ||||
|     """Returns diffusion kernel, using fast pade approximation. | ||||
|     input: | ||||
|             -- W, adj. matrix | ||||
|             -- normalised [True/False] | ||||
|             -- beta, [0->), (diffusion degree) | ||||
|     """ | ||||
|      | ||||
|     D = diag(W.sum(0)) | ||||
|     L = D - W | ||||
|     if normalised==True: | ||||
|         T = diag(sqrt(1./W.sum(0))) | ||||
|         L = dot(dot(T, L), T) | ||||
|     return expm(-beta*L) | ||||
|      | ||||
|      | ||||
| def K_modularity(W,alpha=1.0): | ||||
|     """ Returns the matrix square root of Newmans modularity.""" | ||||
|     W = asarray(W) | ||||
| @@ -530,3 +548,20 @@ def kernel_score(K, W): | ||||
|     score = diag(dot(W, dot(K, W)) ) | ||||
|     tot = sum(score) | ||||
|     return score, tot | ||||
|  | ||||
|  | ||||
| def modularity_matrix(G, nodelist=None): | ||||
|     if not nodelist: | ||||
|         nodelist = G.nodes() | ||||
|     else: | ||||
|         G = NX.subgraph(G, nodelist)  | ||||
|          | ||||
|     A = NX.adj_matrix(G, nodelist=nodelist) | ||||
|     d = atleast_2d(G.degree(nbunch=nodelist)) | ||||
|     m = 1.*G.number_of_edges() | ||||
|     B = A - A/m | ||||
|     return B | ||||
|  | ||||
|  | ||||
|                          | ||||
|      | ||||
|   | ||||
| @@ -41,7 +41,8 @@ def pls_gen(a, b, n_blocks=None, center=False, index_out=False,axis=0, metric=No | ||||
|      """Random block crossvalidation | ||||
|     Leave-one-out is a subset, with n_blocks equals a.shape[-1] | ||||
|     """ | ||||
|      index = randperm(a.shape[axis]) | ||||
|      #index = randperm(a.shape[axis]) | ||||
|      index = arange(a.shape[axis]) | ||||
|      if n_blocks==None: | ||||
|           n_blocks = a.shape[axis] | ||||
|      n_in_set = ceil(float(a.shape[axis])/n_blocks) | ||||
| @@ -151,6 +152,7 @@ def shuffle_1d_block(a, n_sets=None, blocks=None, index_out=False, axis=0): | ||||
|              index = arange(m) | ||||
|              dummy = map(random.shuffle, array_split(index, blocks)) | ||||
|              a_out = a.take(index, axis) | ||||
|           | ||||
|          if index_out: | ||||
|               yield a_out, index | ||||
|          else: | ||||
| @@ -164,7 +166,8 @@ def shuffle_1d(a, n_sets, axis=0): | ||||
|      m = a.shape[axis] | ||||
|      for ii in xrange(n_sets): | ||||
|          index = randperm(m) | ||||
|          yield a.take(index, axis) | ||||
|          a = a.take(index, axis) | ||||
|          yield a | ||||
|           | ||||
| def diag_pert(a, n_sets=10, center=True, index_out=False): | ||||
|     """Alter generator returning sets perturbed with means at diagonals. | ||||
| @@ -207,16 +210,15 @@ def diag_pert(a, n_sets=10, center=True, index_out=False): | ||||
|  | ||||
|   | ||||
| def outerprod_centering(aat, ret_mn=True): | ||||
|     """Returns mean centered symmetric outerproduct matrix. | ||||
|     """Returns double centered symmetric outerproduct matrix. | ||||
|     """ | ||||
|     n = aat.shape[0] | ||||
|     h = aat.sum(0)[:,newaxis] | ||||
|     h = (h - mean(h)/2)/n | ||||
|     mn_a = h + h.T | ||||
|     h = aat.mean(0)[newaxis] | ||||
|     h = h - 0.5*h.mean() | ||||
|     mn_a = h + h.T # beauty of broadcasting | ||||
|     aatc = aat - mn_a | ||||
|     if ret_mn: | ||||
|         return aatc, aat.mean(0) | ||||
|     return aat - mn_a | ||||
|         return aatc, mn_a | ||||
|     return aatc | ||||
|       | ||||
|       | ||||
|       | ||||
|   | ||||
| @@ -12,11 +12,47 @@ from cx_utils import m_shape | ||||
| def w_pls_cv_val(X, Y, amax, n_blocks=None, algo='simpls'): | ||||
|     """Returns rmsep and aopt for pls tailored for wide X. | ||||
|  | ||||
|     The root mean square error of cross validation is calculated | ||||
|     based on random block cross-validation. With number of blocks equal to | ||||
|     number of samples [default] gives leave-one-out cv. | ||||
|     The pls model is based on the simpls algorithm for wide X. | ||||
|  | ||||
|     comments: | ||||
|              -- X, Y inputs need to be centered (fixme: check) | ||||
|     :Parameters: | ||||
|     X : ndarray  | ||||
|         column centered data matrix of size (samples x variables) | ||||
|     Y : ndarray | ||||
|         column centered response matrix of size (samples x responses) | ||||
|     amax : scalar  | ||||
|         Maximum number of components | ||||
|     n_blocks : scalar | ||||
|         Number of blocks in cross validation | ||||
|      | ||||
|     :Returns:  | ||||
|     rmsep : ndarray | ||||
|         Root Mean Square Error of cross-validated Predictions  | ||||
|     aopt : scalar | ||||
|         Guestimate of the optimal number of components | ||||
|  | ||||
|     :SeeAlso: | ||||
|     - pls_cv_val : Same output, not optimised for wide X | ||||
|     - w_simpls : Simpls algorithm for wide X | ||||
|      | ||||
|     Notes | ||||
|     ----- | ||||
|     Based (cowardly translated) on m-files from the Chemoact toolbox | ||||
|     X, Y inputs need to be centered (fixme: check) | ||||
|      | ||||
|  | ||||
|     Examples | ||||
|     -------- | ||||
|  | ||||
|     >>> import numpy as n | ||||
|     >>> X = n.array([[1., 2., 3.],[]]) | ||||
|     >>> Y = n.array([[1., 2., 3.],[]]) | ||||
|     >>> w_pls(X, Y, 1) | ||||
|     [4,5,6], 1 | ||||
|     """ | ||||
|      | ||||
|     k, l = m_shape(Y) | ||||
|     PRESS = zeros((l, amax+1), dtype='f') | ||||
|     if n_blocks==None: | ||||
| @@ -30,7 +66,7 @@ def w_pls_cv_val(X, Y, amax, n_blocks=None, algo='simpls'): | ||||
|         if algo=='simpls': | ||||
|             dat = w_simpls(Din, Yin, amax) | ||||
|             Q, U, H = dat['Q'], dat['U'], dat['H'] | ||||
|             That = dot(Doi, dot(U, inv(triu(dot(H.T,U))) )) | ||||
|             That = dot(Doi, dot(U, inv(triu(dot(H.T, U))) )) | ||||
|         else: | ||||
|             raise NotImplementedError | ||||
|          | ||||
| @@ -40,21 +76,13 @@ def w_pls_cv_val(X, Y, amax, n_blocks=None, algo='simpls'): | ||||
|             E = Yout[:,j][:,newaxis] - TQ | ||||
|             E = E + sum(E, 0)/Din.shape[0] | ||||
|             PRESS[j,1:] = PRESS[j,1:] + sum(E**2, 0) | ||||
|     #Yhat = Y - dot(That,Q.T) | ||||
|     Yhat = Y - dot(That,Q.T) | ||||
|     rmsep = sqrt(PRESS/Y.shape[0]) | ||||
|     aopt = find_aopt_from_sep(rmsep) | ||||
|     return rmsep, aopt | ||||
|     return rmsep, Yhat, aopt | ||||
|  | ||||
| def pls_val(X, Y, amax=2, n_blocks=10, algo='pls', metric=None): | ||||
|     """ Validation results of pls model.  | ||||
|      | ||||
|  | ||||
|  | ||||
|     comments: | ||||
|              -- X, Y inputs need to be centered (fixme: check) | ||||
|  | ||||
|  | ||||
|     """     | ||||
|     k, l = m_shape(Y) | ||||
|     PRESS = zeros((l, amax+1), dtype='<f8') | ||||
|     EE = zeros((amax, k, l), dtype='<f8') | ||||
| @@ -79,7 +107,30 @@ def pls_val(X, Y, amax=2, n_blocks=10, algo='pls', metric=None): | ||||
|  | ||||
|     rmsep = sqrt(PRESS/(k-1.)) | ||||
|     aopt = find_aopt_from_sep(rmsep) | ||||
|     return rmsep, aopt | ||||
|     return rmsep, Yhat, aopt | ||||
|  | ||||
| def lpls_val(X, Y, Z, a_max=2, nsets=None,alpha=.5): | ||||
|     """Performs crossvalidation to get generalisation error in lpls""" | ||||
|     cv_iter = select_generators.pls_gen(X, Y, n_blocks=nsets,center=False,index_out=True) | ||||
|     k, l = Y.shape | ||||
|     Yhat = empty((a_max,k,l), 'd') | ||||
|     for i, (xcal,xi,ycal,yi,ind) in enumerate(cv_iter): | ||||
|         T, W, P, Q, U, L, K, B, b0, evx, evy, evz = nipals_lpls(xcal,ycal,Z, | ||||
|                                                                 a_max=a_max, | ||||
|                                                                 alpha=alpha, | ||||
|                                                                 mean_ctr=[2,0,1], | ||||
|                                                                 verbose=False) | ||||
|         for a in range(a_max): | ||||
|             Yhat[a,ind,:] = b0[a][0][0] + dot(xi, B[a]) | ||||
|     Yhat_class = zeros_like(Yhat) | ||||
|     for a in range(a_max): | ||||
|         for i in range(k): | ||||
|             Yhat_class[a,i,argmax(Yhat[a,i,:])]=1.0 | ||||
|     class_err = 100*((Yhat_class+Y)==2).sum(1)/Y.sum(0).astype('d') | ||||
|     sep = (Y - Yhat)**2 | ||||
|     rmsep = sqrt(sep.mean(1)) | ||||
|     aopt = find_aopt_from_sep(rmsep) | ||||
|     return rmsep, Yhat, aopt | ||||
|  | ||||
| def pca_alter_val(a, amax, n_sets=10, method='diag'): | ||||
|     """Pca validation by altering elements in X. | ||||
| @@ -146,8 +197,7 @@ def pls_jkW(a, b, amax, n_blocks=None, algo='pls', use_pack=True, center=True, m | ||||
|     if n_blocks == None: | ||||
|         n_blocks = b.shape[0] | ||||
|  | ||||
|     Wcv = empty((n_blocks, a.shape[1], amax), dtype='f') | ||||
|  | ||||
|     Wcv = empty((n_blocks, a.shape[1], amax), dtype='d') | ||||
|     if use_pack and metric==None: | ||||
|         u, s, inflater = svd(a, full_matrices=0) | ||||
|         a = u*s | ||||
| @@ -161,11 +211,10 @@ def pls_jkW(a, b, amax, n_blocks=None, algo='pls', use_pack=True, center=True, m | ||||
|             dat = bridge(a_in, b_in, amax, 'loads', 'fast') | ||||
|  | ||||
|         W = dat['W'] | ||||
|  | ||||
|         if use_pack and metric==None: | ||||
|             W = dot(inflater.T, W) | ||||
|  | ||||
|         Wcv[nn,:,:] = W | ||||
|         Wcv[nn,:,:] = W[:,:,] | ||||
|          | ||||
|     return Wcv | ||||
|  | ||||
| @@ -200,6 +249,29 @@ def pca_jkP(a, aopt, n_blocks=None, metric=None): | ||||
|     return PP | ||||
|  | ||||
|  | ||||
| def lpls_jk(X, Y, Z, a_max, nsets=None, alpha=.5): | ||||
|     cv_iter = select_generators.pls_gen(X, Y, n_blocks=nsets,center=False,index_out=False) | ||||
|     m, n = X.shape | ||||
|     k, l = Y.shape | ||||
|     o, p = Z.shape | ||||
|     if nsets==None: | ||||
|         nsets = m | ||||
|     WWx = empty((nsets, n, a_max), 'd') | ||||
|     WWz = empty((nsets, o, a_max), 'd') | ||||
|     #WWy = empty((nsets, l, a_max), 'd') | ||||
|     for i, (xcal,xi,ycal,yi) in enumerate(cv_iter): | ||||
|         T, W, P, Q, U, L, K, B, b0, evx, evy, evz = nipals_lpls(xcal,ycal,Z, | ||||
|                                                                 a_max=a_max, | ||||
|                                                                 alpha=alpha, | ||||
|                                                                 mean_ctr=[2,0,1], | ||||
|                                                                 scale='loads', | ||||
|                                                                 verbose=False) | ||||
|         WWx[i,:,:] = W | ||||
|         WWz[i,:,:] = L | ||||
|         #WWy[i,:,:] = Q | ||||
|  | ||||
|     return WWx, WWz | ||||
|  | ||||
| def find_aopt_from_sep(sep, method='75perc'): | ||||
|     """Returns an estimate of optimal number of components from rmsecv. | ||||
|     """ | ||||
|   | ||||
		Reference in New Issue
	
	Block a user