... just lots of stuff
This commit is contained in:
		| @@ -18,6 +18,39 @@ from packer import Packer | |||||||
| import blmplots | import blmplots | ||||||
|  |  | ||||||
|  |  | ||||||
|  | class NewStyleModel(Function): | ||||||
|  |     def __init__(self, id='johndoe', name='JohnDoe'): | ||||||
|  |         Function.__init__(self, id, name) | ||||||
|  |         self.name = name | ||||||
|  |         self.options = Options | ||||||
|  |         self.input_data = [] | ||||||
|  |         self.parts = {} | ||||||
|  |         self.io_table = {} | ||||||
|  |         self.datasets = [] | ||||||
|  |         self.plots = [] | ||||||
|  |  | ||||||
|  |     def create_dataset(self, param, Dataset=Dataset): | ||||||
|  |         for ds in self.datasets: | ||||||
|  |             if ds.get_name()==param: return ds | ||||||
|  |         if not param in self.parts.keys(): | ||||||
|  |             logger.log('notice', 'Parameter: %s not present' %param) | ||||||
|  |             return | ||||||
|  |         if not param in self.io_table.keys(): | ||||||
|  |             logger.log('notice', 'Parameter: %s not in defined in io table' %param) | ||||||
|  |  | ||||||
|  |         identifiers = self.io_table.get(param) | ||||||
|  |         data = self.parts.get(param) | ||||||
|  |         ds = Dataset(data, identifiers=identifiers, name=param) | ||||||
|  |         self.datasets.append(dataset) | ||||||
|  |         return ds | ||||||
|  |  | ||||||
|  |     def create_plot(self, blmplot): | ||||||
|  |         if blmplot.validate_model(self.parts): | ||||||
|  |             plt = blmplot(self.parts) | ||||||
|  |             self.plots.append(plt) | ||||||
|  |             return plt | ||||||
|  |  | ||||||
|  |      | ||||||
| class Model(Function): | class Model(Function): | ||||||
|     """Base class of bilinear models. |     """Base class of bilinear models. | ||||||
|     """ |     """ | ||||||
| @@ -68,7 +101,7 @@ class PCA(Model): | |||||||
|             logger.log('notice', 'Aopt at first component!') |             logger.log('notice', 'Aopt at first component!') | ||||||
|          |          | ||||||
|     def confidence(self, aopt, n_sets, alpha, p_center, |     def confidence(self, aopt, n_sets, alpha, p_center, | ||||||
|                    crot, strict, cov_center ): |                    crot, strict, cov_center): | ||||||
|         """Returns a confidence measure for model parameters. |         """Returns a confidence measure for model parameters. | ||||||
|         Based on aopt. |         Based on aopt. | ||||||
|  |  | ||||||
| @@ -80,7 +113,7 @@ class PCA(Model): | |||||||
|          |          | ||||||
|         jk_segments = pca_jkP(self.model['E0'], aopt, n_sets) |         jk_segments = pca_jkP(self.model['E0'], aopt, n_sets) | ||||||
|         Pcal = self.model['P'][:,:aopt] |         Pcal = self.model['P'][:,:aopt] | ||||||
|         # add the scale to P |         # ensure scaled P | ||||||
|         tnorm = scipy.apply_along_axis(norm, 0, self.model['T'][:,:aopt]) |         tnorm = scipy.apply_along_axis(norm, 0, self.model['T'][:,:aopt]) | ||||||
|         Pcal = Pcal*tnorm |         Pcal = Pcal*tnorm | ||||||
|         tsq = hotelling(jk_segments, Pcal, p_center, |         tsq = hotelling(jk_segments, Pcal, p_center, | ||||||
| @@ -90,33 +123,14 @@ class PCA(Model): | |||||||
|     def make_model(self, amax, mode, scale): |     def make_model(self, amax, mode, scale): | ||||||
|         """Model on optimal number of components. |         """Model on optimal number of components. | ||||||
|         """ |         """ | ||||||
|         dat = pca(self.model['E0'], amax, scale, mode) |         dat = pca(self.model['E0'], amax, scale, mode)         | ||||||
|          |  | ||||||
|         # explained variance |  | ||||||
|         var_x, exp_var_x = variances(self.model['E0'], dat['T'], dat['P']) |  | ||||||
|         dat['var_x'] = var_x |  | ||||||
|         dat['exp_var_x'] = exp_var_x |  | ||||||
|  |  | ||||||
|         #fixme### |  | ||||||
|         do_lev_s = False |  | ||||||
|         do_lev_v = False |  | ||||||
|         ##### |  | ||||||
|         if do_lev_s: |  | ||||||
|             # sample leverages |  | ||||||
|             tnorm = scipy.apply_along_axis(norm, 0, dat['T']) # norm of Ts |  | ||||||
|             s_lev = leverage(amax, tnorm) |  | ||||||
|             dat['s_lev'] = s_lev |  | ||||||
|         if do_lev_v: |  | ||||||
|             # variable leverages |  | ||||||
|             v_lev = leverage(amax, dat['P']) |  | ||||||
|             dat['v_lev'] = v_lev |  | ||||||
|          |  | ||||||
|         self.model.update(dat) |         self.model.update(dat) | ||||||
|  |  | ||||||
|     def as_dataset(self, param, dtype='dataset'): |     def as_dataset(self, param, dtype='dataset'): | ||||||
|         """Return model parameter as Dataset. |         """Return model parameter as Dataset. | ||||||
|         """ |         """ | ||||||
|         if not param in self.model.keys(): |         if not param in self.model.keys(): | ||||||
|  |             logger.log('notice', 'Parameter: %s not in model' %param) | ||||||
|             return |             return | ||||||
|         DX = self._dataset['X'] #input dataset |         DX = self._dataset['X'] #input dataset | ||||||
|         dim_name_0, dim_name_1 = DX.get_dim_name() |         dim_name_0, dim_name_1 = DX.get_dim_name() | ||||||
| @@ -128,17 +142,18 @@ class PCA(Model): | |||||||
|         pc_ids = ['_amax', map(str,range(self._options['amax'])) ] |         pc_ids = ['_amax', map(str,range(self._options['amax'])) ] | ||||||
|         pc_ids_opt = ['_aopt', map(str, range(self._options['aopt'])) ]  |         pc_ids_opt = ['_aopt', map(str, range(self._options['aopt'])) ]  | ||||||
|         zero_dim = ['_doe', ['0']] # null dim, vector (hidden) |         zero_dim = ['_doe', ['0']] # null dim, vector (hidden) | ||||||
|         match_ids = {'E':[ids_0, ids_1], |         match_ids = {'E' : [ids_0, ids_1], | ||||||
|                      'E0':[ids_0, ids_1], |                      'E0' : [ids_0, ids_1], | ||||||
|                      'P':[ids_1, pc_ids], |                      'P' : [ids_1, pc_ids], | ||||||
|                      'T':[ids_0, pc_ids], |                      'T' : [ids_0, pc_ids], | ||||||
|                      'W':[ids_1, pc_ids], |                      'W' : [ids_1, pc_ids], | ||||||
|                      'p_tsq':[ids_1, zero_dim], |                      'p_tsq' : [ids_1, zero_dim], | ||||||
|                      'rmsep':[pc_ids, zero_dim], |                      'rmsep' : [pc_ids, zero_dim], | ||||||
|                      'var_leverages':[ids_1, zero_dim], |                      'var_leverages' : [ids_1, zero_dim], | ||||||
|                      'sample_leverages':[pc_ids, zero_dim], |                      'sample_leverages' : [pc_ids, zero_dim], | ||||||
|                      'exp_var_x': [pc_ids, zero_dim], |                      'exp_var_x' : [pc_ids, zero_dim], | ||||||
|                      'var_x': [pc_ids, zero_dim], |                      'var_x' : [pc_ids, zero_dim], | ||||||
|  |                      'eigvals' : [pc_ids, zero_dim] | ||||||
|                      } |                      } | ||||||
|          |          | ||||||
|         out = Dataset(self.model[param], match_ids[param], name=param) |         out = Dataset(self.model[param], match_ids[param], name=param) | ||||||
| @@ -256,7 +271,7 @@ class PLS(Model): | |||||||
|     def make_model(self, a, b, amax, scale, mode, engine): |     def make_model(self, a, b, amax, scale, mode, engine): | ||||||
|         """Make model on amax components. |         """Make model on amax components. | ||||||
|         """ |         """ | ||||||
|         print "MAking model" |         print "Making model" | ||||||
|         dat = engine(a, b, amax, scale, mode) |         dat = engine(a, b, amax, scale, mode) | ||||||
|         self.model.update(dat) |         self.model.update(dat) | ||||||
|          |          | ||||||
| @@ -478,13 +493,6 @@ class LPLS(Model): | |||||||
|         self._data['Z'] = c.asarray() |         self._data['Z'] = c.asarray() | ||||||
|         self.validation(options) |         self.validation(options) | ||||||
|         self.make_model(options) |         self.make_model(options) | ||||||
|         print self.model['evx'] |  | ||||||
|         evx_str = [str(i)[:3] for i in self.model['evx']] |  | ||||||
|         logger.log('notice', 'Explained variance:X\n\t: ' + str(evx_str)) |  | ||||||
|         evy_str = [str(i)[:3] for i in self.model['evy']] |  | ||||||
|         logger.log('notice', 'Explained variance:Y\n\t: ' + str(evy_str)) |  | ||||||
|         evz_str = [str(i)[:3] for i in self.model['evz']] |  | ||||||
|         logger.log('notice', 'Explained variance:Z\n\t: ' + str(evz_str)) |  | ||||||
|          |          | ||||||
|         if options['calc_conf']: |         if options['calc_conf']: | ||||||
|             self.confidence(options) |             self.confidence(options) | ||||||
| @@ -553,13 +561,15 @@ class PcaOptions(Options): | |||||||
|         opt['all_plots'] = [(blmplots.PcaScorePlot, 'Scores', True), |         opt['all_plots'] = [(blmplots.PcaScorePlot, 'Scores', True), | ||||||
|                             (blmplots.PcaLoadingPlot, 'Loadings', True), |                             (blmplots.PcaLoadingPlot, 'Loadings', True), | ||||||
|                             (blmplots.LineViewXc, 'Line view', True), |                             (blmplots.LineViewXc, 'Line view', True), | ||||||
|                             (blmplots.PredictionErrorPlot, 'Residual Error', False) |                             (blmplots.PredictionErrorPlot, 'Residual Error', False), | ||||||
|  |                              (blmplots.PcaScreePlot, 'Scree', True) | ||||||
|                             ] |                             ] | ||||||
|          |          | ||||||
|         opt['out_data'] = ['T','P', 'p_tsq'] |         opt['out_data'] = ['T','P', 'p_tsq'] | ||||||
|         opt['out_plots'] = [blmplots.PcaScorePlot, |         opt['out_plots'] = [blmplots.PcaScorePlot, | ||||||
|                             blmplots.PcaLoadingPlot, |                             blmplots.PcaLoadingPlot, | ||||||
|                             blmplots.LineViewXc] |                             blmplots.LineViewXc, | ||||||
|  |                             blmplots.PcaScreePlot] | ||||||
|  |  | ||||||
|         self.update(opt) |         self.update(opt) | ||||||
|          |          | ||||||
| @@ -621,7 +631,8 @@ class PlsOptions(Options): | |||||||
|  |  | ||||||
|         # (class, name, sensitive, ticked) |         # (class, name, sensitive, ticked) | ||||||
|         opt['all_plots'] = [(blmplots.PlsScorePlot, 'Scores', True), |         opt['all_plots'] = [(blmplots.PlsScorePlot, 'Scores', True), | ||||||
|                             (blmplots.PlsLoadingPlot, 'Loadings', True), |                             (blmplots.PlsXLoadingPlot, 'X-Loadings', True), | ||||||
|  |                             (blmplots.PlsYLoadingPlot, 'Y-Loadings', True), | ||||||
|                             (blmplots.LineViewXc, 'Line view', True), |                             (blmplots.LineViewXc, 'Line view', True), | ||||||
|                             (blmplots.PredictionErrorPlot, 'Residual Error', False), |                             (blmplots.PredictionErrorPlot, 'Residual Error', False), | ||||||
|                             (blmplots.RMSEPPlot, 'RMSEP', False), |                             (blmplots.RMSEPPlot, 'RMSEP', False), | ||||||
| @@ -629,7 +640,7 @@ class PlsOptions(Options): | |||||||
|                             ] |                             ] | ||||||
|          |          | ||||||
|         opt['out_data'] = ['T','P', 'w_tsq'] |         opt['out_data'] = ['T','P', 'w_tsq'] | ||||||
|         opt['out_plots'] = [blmplots.PlsScorePlot,blmplots.PlsLoadingPlot,blmplots.LineViewXc] |         opt['out_plots'] = [blmplots.PlsScorePlot,blmplots.PlsXLoadingPlot,blmplots.PlsYLoadingPlot,blmplots.LineViewXc] | ||||||
|          |          | ||||||
|         #opt['out_data'] = None |         #opt['out_data'] = None | ||||||
|                              |                              | ||||||
| @@ -699,11 +710,9 @@ class LplsOptions(Options): | |||||||
|                            ] |                            ] | ||||||
|  |  | ||||||
|         # (class, name, sensitive, ticked) |         # (class, name, sensitive, ticked) | ||||||
|         opt['all_plots'] = [(blmplots.PlsScorePlot, 'Scores', True), |         opt['all_plots'] = [(blmplots.LplsScorePlot, 'Scores', True), | ||||||
|                             (blmplots.PlsLoadingPlot, 'Loadings', True), |                             (blmplots.LplsXLoadingPlot, 'Loadings', True), | ||||||
|                             (blmplots.LineViewXc, 'Line view', True), |                             (blmplots.LineViewXc, 'Line view', True), | ||||||
|                             (blmplots.PredictionErrorPlot, 'Residual Error', False), |  | ||||||
|                             (blmplots.RMSEPPlot, 'RMSEP', False), |  | ||||||
|                             (blmplots.LplsHypoidCorrelationPlot, 'Hypoid corr.', False), |                             (blmplots.LplsHypoidCorrelationPlot, 'Hypoid corr.', False), | ||||||
|                             (blmplots.LplsXCorrelationPlot, 'X corr.', True), |                             (blmplots.LplsXCorrelationPlot, 'X corr.', True), | ||||||
|                             (blmplots.LplsZCorrelationPlot, 'Z corr.', True) |                             (blmplots.LplsZCorrelationPlot, 'Z corr.', True) | ||||||
|   | |||||||
| @@ -8,7 +8,7 @@ fixme: | |||||||
| from matplotlib import cm,patches | from matplotlib import cm,patches | ||||||
| import gtk | import gtk | ||||||
| import fluents | import fluents | ||||||
| from fluents import plots, main | from fluents import plots, main,logger | ||||||
| import scipy | import scipy | ||||||
| from scipy import dot,sum,diag,arange,log,mean,newaxis,sqrt,apply_along_axis,empty | from scipy import dot,sum,diag,arange,log,mean,newaxis,sqrt,apply_along_axis,empty | ||||||
| from scipy.stats import corrcoef | from scipy.stats import corrcoef | ||||||
| @@ -80,10 +80,17 @@ class BlmScatterPlot(plots.ScatterPlot): | |||||||
|         """Set patch sizes.""" |         """Set patch sizes.""" | ||||||
|         pass |         pass | ||||||
|  |  | ||||||
|     def set_expvar_axlabels(self, param="evx"): |     def set_expvar_axlabels(self, param=None): | ||||||
|  |         if param == None: | ||||||
|  |             param = self._expvar_param | ||||||
|  |         else: | ||||||
|  |             self._expvar_param = param | ||||||
|         if not self.model.model.has_key(param): |         if not self.model.model.has_key(param): | ||||||
|             self.model.model[param] = None |             self.model.model[param] = None | ||||||
|         if self.model.model[param]==None: |         if self.model.model[param]==None: | ||||||
|  |             logger.log('notice', 'Param: %s not in model' %param) | ||||||
|  |             print self.model.model.keys() | ||||||
|  |             print self.model.model[param] | ||||||
|             pass #fixme: do expvar calc here if not present |             pass #fixme: do expvar calc here if not present | ||||||
|         else: |         else: | ||||||
|             expvar = self.model.model[param] |             expvar = self.model.model[param] | ||||||
| @@ -127,7 +134,7 @@ class BlmScatterPlot(plots.ScatterPlot): | |||||||
|         self.selection_collection._offsets = xy |         self.selection_collection._offsets = xy | ||||||
|         self.canvas.draw_idle() |         self.canvas.draw_idle() | ||||||
|         pad = abs(self.xaxis_data.min()-self.xaxis_data.max())*0.05 |         pad = abs(self.xaxis_data.min()-self.xaxis_data.max())*0.05 | ||||||
|         new_lims = (self.xaxis_data.min()+pad, self.xaxis_data.max()+pad) |         new_lims = (self.xaxis_data.min() - pad, self.xaxis_data.max() + pad) | ||||||
|         self.axes.set_xlim(new_lims, emit=True) |         self.axes.set_xlim(new_lims, emit=True) | ||||||
|         self.set_expvar_axlabels() |         self.set_expvar_axlabels() | ||||||
|         self.canvas.draw_idle() |         self.canvas.draw_idle() | ||||||
| @@ -140,7 +147,7 @@ class BlmScatterPlot(plots.ScatterPlot): | |||||||
|         self.sc._offsets = xy |         self.sc._offsets = xy | ||||||
|         self.selection_collection._offsets = xy |         self.selection_collection._offsets = xy | ||||||
|         pad = abs(self.yaxis_data.min()-self.yaxis_data.max())*0.05 |         pad = abs(self.yaxis_data.min()-self.yaxis_data.max())*0.05 | ||||||
|         new_lims = (self.yaxis_data.min()+pad, self.yaxis_data.max()+pad) |         new_lims = (self.yaxis_data.min() - pad, self.yaxis_data.max() + pad) | ||||||
|         self.axes.set_ylim(new_lims, emit=True) |         self.axes.set_ylim(new_lims, emit=True) | ||||||
|         self.set_expvar_axlabels() |         self.set_expvar_axlabels() | ||||||
|         self.canvas.draw_idle() |         self.canvas.draw_idle() | ||||||
| @@ -159,19 +166,28 @@ class BlmScatterPlot(plots.ScatterPlot): | |||||||
|             for indx,txt in self._text_labels.items(): |             for indx,txt in self._text_labels.items(): | ||||||
|                 if indx in index: |                 if indx in index: | ||||||
|                     txt.set_visible(True) |                     txt.set_visible(True) | ||||||
|         self.canvas.draw() |         self.canvas.draw_idle() | ||||||
|                  |                  | ||||||
|     def hide_labels(self): |     def hide_labels(self): | ||||||
|         for txt in self._text_labels.values(): |         for txt in self._text_labels.values(): | ||||||
|             txt.set_visible(False) |             txt.set_visible(False) | ||||||
|         self.canvas.draw() |         self.canvas.draw_idle() | ||||||
|  |  | ||||||
|  |  | ||||||
|  | class PcaScreePlot(plots.BarPlot): | ||||||
|  |     def __init__(self, model): | ||||||
|  |         title = "Pca, (%s) Scree" %model._dataset['X'].get_name() | ||||||
|  |         ds = model.as_dataset('eigvals') | ||||||
|  |         if ds==None: | ||||||
|  |             logger.log('notice', 'Model does not contain eigvals') | ||||||
|  |         plots.BarPlot.__init__(self, ds, name=title) | ||||||
|  |  | ||||||
|  |  | ||||||
| class PcaScorePlot(BlmScatterPlot): | class PcaScorePlot(BlmScatterPlot): | ||||||
|     def __init__(self, model, absi=0, ordi=1): |     def __init__(self, model, absi=0, ordi=1): | ||||||
|         title = "Pca scores (%s)" %model._dataset['X'].get_name() |         title = "Pca scores (%s)" %model._dataset['X'].get_name() | ||||||
|         BlmScatterPlot.__init__(self, title, model, absi, ordi, 'T') |         BlmScatterPlot.__init__(self, title, model, absi, ordi, 'T') | ||||||
|  |         self.set_expvar_axlabels(param="expvarx") | ||||||
|  |  | ||||||
| class PcaLoadingPlot(BlmScatterPlot): | class PcaLoadingPlot(BlmScatterPlot): | ||||||
|     def __init__(self, model, absi=0, ordi=1): |     def __init__(self, model, absi=0, ordi=1): | ||||||
| @@ -185,13 +201,19 @@ class PlsScorePlot(BlmScatterPlot): | |||||||
|         BlmScatterPlot.__init__(self, title, model, absi, ordi, 'T') |         BlmScatterPlot.__init__(self, title, model, absi, ordi, 'T') | ||||||
|  |  | ||||||
|  |  | ||||||
| class PlsLoadingPlot(BlmScatterPlot): | class PlsXLoadingPlot(BlmScatterPlot): | ||||||
|     def __init__(self, model, absi=0, ordi=1): |     def __init__(self, model, absi=0, ordi=1): | ||||||
|         title = "Pls loadings (%s)" %model._dataset['X'].get_name() |         title = "Pls x-loadings (%s)" %model._dataset['X'].get_name() | ||||||
|         BlmScatterPlot.__init__(self, title, model, absi, ordi, part_name='P', color_by='w_tsq') |         BlmScatterPlot.__init__(self, title, model, absi, ordi, part_name='P', color_by='w_tsq') | ||||||
|         #self.set_expvar_axlabels(self, param="expvarx") |         #self.set_expvar_axlabels(self, param="expvarx") | ||||||
|  |  | ||||||
|      |  | ||||||
|  | class PlsYLoadingPlot(BlmScatterPlot): | ||||||
|  |     def __init__(self, model, absi=0, ordi=1): | ||||||
|  |         title = "Pls y-loadings (%s)" %model._dataset['Y'].get_name() | ||||||
|  |         BlmScatterPlot.__init__(self, title, model, absi, ordi, part_name='Q') | ||||||
|  |  | ||||||
|  |  | ||||||
| class PlsCorrelationLoadingPlot(BlmScatterPlot): | class PlsCorrelationLoadingPlot(BlmScatterPlot): | ||||||
|     def __init__(self, model, absi=0, ordi=1): |     def __init__(self, model, absi=0, ordi=1): | ||||||
|         title = "Pls correlation loadings (%s)" %model._dataset['X'].get_name() |         title = "Pls correlation loadings (%s)" %model._dataset['X'].get_name() | ||||||
| @@ -402,9 +424,21 @@ class TRBiplot(plots.ScatterPlot): | |||||||
|      |      | ||||||
|  |  | ||||||
| class InfluencePlot(plots.ScatterPlot): | class InfluencePlot(plots.ScatterPlot): | ||||||
|  |     """ Returns a leverage vs resiudal scatter plot. | ||||||
|     """ |     """ | ||||||
|     """ |     def __init__(self, model, dim, name="Influence"): | ||||||
|     pass |         if not model.model.has_key('levx'): | ||||||
|  |             logger.log('notice', 'Model has no calculations of leverages') | ||||||
|  |             return | ||||||
|  |         if not model.model.has_key('ssqx'): | ||||||
|  |             logger.log('notice', 'Model has no calculations of residuals') | ||||||
|  |             return | ||||||
|  |         ds1 = model.as_dataset('levx') | ||||||
|  |         ds2 = model.as_dataset('ssqx') | ||||||
|  |         plots.ScatterPlot.__init__(self, ds1, ds2, | ||||||
|  |                                    id_dim, sel_dim, id_1, id_2, | ||||||
|  |                                    c=col, s=20, sel_dim_2=sel_dim_2, | ||||||
|  |                                    name='Load Volcano') | ||||||
|          |          | ||||||
|  |  | ||||||
| class RMSEPPlot(plots.BarPlot): | class RMSEPPlot(plots.BarPlot): | ||||||
|   | |||||||
| @@ -11,7 +11,7 @@ import time | |||||||
|  |  | ||||||
|  |  | ||||||
| def hotelling(Pcv, P, p_center='med', cov_center='med', | def hotelling(Pcv, P, p_center='med', cov_center='med', | ||||||
|               alpha=0.3, crot=True, strict=False, metric=None): |               alpha=0.3, crot=True, strict=False): | ||||||
|     """Returns regularized hotelling T^2. |     """Returns regularized hotelling T^2. | ||||||
|      |      | ||||||
|     alpha -- regularisation towards pooled cov estimates |     alpha -- regularisation towards pooled cov estimates | ||||||
| @@ -21,13 +21,9 @@ def hotelling(Pcv, P, p_center='med', cov_center='med', | |||||||
|     alpha -- regularisation |     alpha -- regularisation | ||||||
|     crot -- rotate submodels toward full? |     crot -- rotate submodels toward full? | ||||||
|     strict -- only rotate 90 degree ? |     strict -- only rotate 90 degree ? | ||||||
|     metric -- inverse metric matrix (if Pcv and P from metric pca/pls) |  | ||||||
|      |      | ||||||
|     """ |     """ | ||||||
|     m, n = P.shape |     m, n = P.shape | ||||||
|     if metric==None: |  | ||||||
|         metric = eye(m, dtype='<f8') |  | ||||||
|     P = dot(metric.T, asarray(P)) |  | ||||||
|     n_sets, n, amax = Pcv.shape |     n_sets, n, amax = Pcv.shape | ||||||
|     # allocate |     # allocate | ||||||
|     T_sq = empty((n, ),dtype='f') |     T_sq = empty((n, ),dtype='f') | ||||||
| @@ -36,7 +32,6 @@ def hotelling(Pcv, P, p_center='med', cov_center='med', | |||||||
|     # rotate sub_models to full model |     # rotate sub_models to full model | ||||||
|     if crot: |     if crot: | ||||||
|         for i, Pi in enumerate(Pcv): |         for i, Pi in enumerate(Pcv): | ||||||
|             Pi = dot(metric.T, Pi) |  | ||||||
|             Pcv[i] = procrustes(P, Pi, strict=strict) |             Pcv[i] = procrustes(P, Pi, strict=strict) | ||||||
|  |  | ||||||
|     # center of pnull |     # center of pnull | ||||||
| @@ -118,7 +113,7 @@ def pls_qvals(a, b, aopt=None, alpha=.3, | |||||||
|               center=True, |               center=True, | ||||||
|               sim_method='shuffle', |               sim_method='shuffle', | ||||||
|               p_center='med', cov_center='med', |               p_center='med', cov_center='med', | ||||||
|               crot=True, strict=False, metric=None): |               crot=True, strict=False): | ||||||
|  |  | ||||||
|     """Returns qvals for pls model. |     """Returns qvals for pls model. | ||||||
|  |  | ||||||
| @@ -133,7 +128,6 @@ def pls_qvals(a, b, aopt=None, alpha=.3, | |||||||
|     cov_center -- location estimator for covariance of submodels ['med'] |     cov_center -- location estimator for covariance of submodels ['med'] | ||||||
|     crot -- bool, use rotations of sub models? |     crot -- bool, use rotations of sub models? | ||||||
|     strict -- bool, use stict (rot/flips only) rotations? |     strict -- bool, use stict (rot/flips only) rotations? | ||||||
|     metric -- bool, use row metric? |  | ||||||
|     """ |     """ | ||||||
|      |      | ||||||
|     m, n = a.shape |     m, n = a.shape | ||||||
| @@ -144,13 +138,12 @@ def pls_qvals(a, b, aopt=None, alpha=.3, | |||||||
|     if center: |     if center: | ||||||
|         ac = a - a.mean(0) |         ac = a - a.mean(0) | ||||||
|         bc = b - b.mean(0) |         bc = b - b.mean(0) | ||||||
|     if metric!=None: |      | ||||||
|         ac = dot(ac, metric) |  | ||||||
|     if algo=='bridge': |     if algo=='bridge': | ||||||
|         dat = bridge(ac, bc, aopt, 'loads', 'fast') |         dat = bridge(ac, bc, aopt, 'loads', 'fast') | ||||||
|     else: |     else: | ||||||
|         dat = pls(ac, bc, aopt, 'loads', 'fast') |         dat = pls(ac, bc, aopt, 'loads', 'fast') | ||||||
|     Wcv = pls_jkW(a, b, aopt, n_blocks=None, algo=algo, metric=metric, center=True) |     Wcv = pls_jkW(a, b, aopt, n_blocks=None, algo=algo,center=True) | ||||||
|     tsq_full = hotelling(Wcv, dat['W'], p_center=p_center, |     tsq_full = hotelling(Wcv, dat['W'], p_center=p_center, | ||||||
|                          alpha=alpha, crot=crot, strict=strict, |                          alpha=alpha, crot=crot, strict=strict, | ||||||
|                          cov_center=cov_center) |                          cov_center=cov_center) | ||||||
| @@ -162,7 +155,7 @@ def pls_qvals(a, b, aopt=None, alpha=.3, | |||||||
|             dat = bridge(ac, b_shuff, aopt, 'loads','fast') |             dat = bridge(ac, b_shuff, aopt, 'loads','fast') | ||||||
|         else: |         else: | ||||||
|             dat = pls(ac, b_shuff, aopt, 'loads', 'fast') |             dat = pls(ac, b_shuff, aopt, 'loads', 'fast') | ||||||
|         Wcv = pls_jkW(a, b_shuff, aopt, n_blocks=None, algo=algo, metric=metric) |         Wcv = pls_jkW(a, b_shuff, aopt, n_blocks=None, algo=algo) | ||||||
|         TSQ[:,i] = hotelling(Wcv, dat['W'], p_center=p_center, |         TSQ[:,i] = hotelling(Wcv, dat['W'], p_center=p_center, | ||||||
|                              alpha=alpha, crot=crot, strict=strict, |                              alpha=alpha, crot=crot, strict=strict, | ||||||
|                              cov_center=cov_center) |                              cov_center=cov_center) | ||||||
| @@ -205,10 +198,10 @@ def pls_qvals_II(a, b, aopt=None, center=True, alpha=.3, | |||||||
|                  n_iter=20, algo='pls', |                  n_iter=20, algo='pls', | ||||||
|                  sim_method='shuffle', |                  sim_method='shuffle', | ||||||
|                  p_center='med', cov_center='med', |                  p_center='med', cov_center='med', | ||||||
|                  crot=True, strict=False, metric=None): |                  crot=True, strict=False): | ||||||
|  |  | ||||||
|     """Returns qvals for pls model. |     """Returns qvals for pls model. | ||||||
|     Shuffling of variables in X is preprocessed in metric. |     Shuffling of variables in X. | ||||||
|     Null model is 'If I put genes randomly on network' ... if they are sign: |     Null model is 'If I put genes randomly on network' ... if they are sign: | ||||||
|     then this is due to network structure and not covariance with response. |     then this is due to network structure and not covariance with response. | ||||||
|  |  | ||||||
| @@ -223,7 +216,6 @@ def pls_qvals_II(a, b, aopt=None, center=True, alpha=.3, | |||||||
|     cov_center -- location estimator for covariance of submodels ['med'] |     cov_center -- location estimator for covariance of submodels ['med'] | ||||||
|     crot -- bool, use rotations of sub models? |     crot -- bool, use rotations of sub models? | ||||||
|     strict -- bool, use stict (rot/flips only) rotations? |     strict -- bool, use stict (rot/flips only) rotations? | ||||||
|     metric -- bool, use row metric? |  | ||||||
|     """ |     """ | ||||||
|      |      | ||||||
|     m, n = a.shape |     m, n = a.shape | ||||||
| @@ -236,13 +228,12 @@ def pls_qvals_II(a, b, aopt=None, center=True, alpha=.3, | |||||||
|     if center==True: |     if center==True: | ||||||
|         ac = a - a.mean(0) |         ac = a - a.mean(0) | ||||||
|         bc = b - b.mean(0) |         bc = b - b.mean(0) | ||||||
|     if metric==None: |      | ||||||
|         metric = eye(n,n) |  | ||||||
|     if algo=='bridge': |     if algo=='bridge': | ||||||
|         dat = bridge(ac, bc, aopt, 'loads', 'fast') |         dat = bridge(ac, bc, aopt, 'loads', 'fast') | ||||||
|     else: |     else: | ||||||
|         dat = pls(ac, bc, aopt, 'loads', 'fast') |         dat = pls(ac, bc, aopt, 'loads', 'fast') | ||||||
|     Wcv = pls_jkW(a, b, aopt, n_blocks=None, algo=algo, metric=metric) |     Wcv = pls_jkW(a, b, aopt, n_blocks=None, algo=algo) | ||||||
|     tsq_full = hotelling(Wcv, dat['W'], p_center=p_center, |     tsq_full = hotelling(Wcv, dat['W'], p_center=p_center, | ||||||
|                          alpha=alpha, crot=crot, strict=strict, |                          alpha=alpha, crot=crot, strict=strict, | ||||||
|                          cov_center=cov_center) |                          cov_center=cov_center) | ||||||
| @@ -251,13 +242,12 @@ def pls_qvals_II(a, b, aopt=None, center=True, alpha=.3, | |||||||
|     for i, a_shuff in enumerate(Vs): |     for i, a_shuff in enumerate(Vs): | ||||||
|         t1 = time.time() |         t1 = time.time() | ||||||
|         a = a_shuff - a_shuff.mean(0) |         a = a_shuff - a_shuff.mean(0) | ||||||
|         a = dot(a, metric) |  | ||||||
|          |          | ||||||
|         if algo=='bridge': |         if algo=='bridge': | ||||||
|             dat = bridge(a, b, aopt, 'loads','fast') |             dat = bridge(a, b, aopt, 'loads','fast') | ||||||
|         else: |         else: | ||||||
|             dat = pls(a, b, aopt, 'loads', 'fast') |             dat = pls(a, b, aopt, 'loads', 'fast') | ||||||
|         Wcv = pls_jkW(a, b, aopt, n_blocks=None, algo=algo, metric=metric) |         Wcv = pls_jkW(a, b, aopt, n_blocks=None, algo=algo) | ||||||
|         TSQ[:,i] = hotelling(Wcv, dat['W'], p_center=p_center, |         TSQ[:,i] = hotelling(Wcv, dat['W'], p_center=p_center, | ||||||
|                              alpha=alpha, crot=crot, strict=strict, |                              alpha=alpha, crot=crot, strict=strict, | ||||||
|                              cov_center=cov_center) |                              cov_center=cov_center) | ||||||
|   | |||||||
| @@ -32,7 +32,7 @@ def pca(a, aopt,scale='scores',mode='normal',center_axis=0): | |||||||
|         lev --leverages, ssq -- sum of squares, expvar -- cumulative |         lev --leverages, ssq -- sum of squares, expvar -- cumulative | ||||||
|         explained variance, aopt -- number of components used |         explained variance, aopt -- number of components used | ||||||
|      |      | ||||||
|     :OtherParameters: |     :OtherParam eters: | ||||||
|     mode : str |     mode : str | ||||||
|         Amount of info retained, ('fast', 'normal', 'detailed') |         Amount of info retained, ('fast', 'normal', 'detailed') | ||||||
|     center_axis : int |     center_axis : int | ||||||
| @@ -68,7 +68,6 @@ def pca(a, aopt,scale='scores',mode='normal',center_axis=0): | |||||||
|         a = a - expand_dims(a.mean(center_axis), center_axis) |         a = a - expand_dims(a.mean(center_axis), center_axis) | ||||||
|     if m>(n+100) or n>(m+100): |     if m>(n+100) or n>(m+100): | ||||||
|         u, s, v = esvd(a, amax=None) # fixme:amax option need to work with expl.var |         u, s, v = esvd(a, amax=None) # fixme:amax option need to work with expl.var | ||||||
|         print s[:10] |  | ||||||
|     else: |     else: | ||||||
|         u, s, vt = svd(a, 0) |         u, s, vt = svd(a, 0) | ||||||
|         v = vt.T |         v = vt.T | ||||||
| @@ -94,7 +93,7 @@ def pca(a, aopt,scale='scores',mode='normal',center_axis=0): | |||||||
|         lev = [] |         lev = [] | ||||||
|         for ai in range(aopt): |         for ai in range(aopt): | ||||||
|             E[ai,:,:] = a - dot(T[:,:ai+1], P[:,:ai+1].T) |             E[ai,:,:] = a - dot(T[:,:ai+1], P[:,:ai+1].T) | ||||||
|             ssq.append([(E[ai,:,:]**2).sum(0), (E[ai,:,:]**2).sum(1)]) |             ssq.append([(E[ai,:,:]**2).mean(0), (E[ai,:,:]**2).mean(1)]) | ||||||
|             if scale=='loads': |             if scale=='loads': | ||||||
|                 lev.append([((s*T)**2).sum(1), (P**2).sum(1)]) |                 lev.append([((s*T)**2).sum(1), (P**2).sum(1)]) | ||||||
|             else: |             else: | ||||||
| @@ -112,7 +111,7 @@ def pca(a, aopt,scale='scores',mode='normal',center_axis=0): | |||||||
|         # variances |         # variances | ||||||
|     expvarx = r_[0, 100*e.cumsum()/e.sum()][:aopt+1] |     expvarx = r_[0, 100*e.cumsum()/e.sum()][:aopt+1] | ||||||
|      |      | ||||||
|     return {'T':T, 'P':P, 'E':E, 'expvarx':expvarx, 'levx':lev, 'ssqx':ssq, 'aopt':aopt} |     return {'T':T, 'P':P, 'E':E, 'expvarx':expvarx, 'levx':lev, 'ssqx':ssq, 'aopt':aopt, 'eigvals': e[:aopt,newaxis]} | ||||||
|  |  | ||||||
| def pcr(a, b, aopt, scale='scores',mode='normal',center_axis=0): | def pcr(a, b, aopt, scale='scores',mode='normal',center_axis=0): | ||||||
|     """ Principal Component Regression. |     """ Principal Component Regression. | ||||||
| @@ -282,7 +281,7 @@ def pls(a, b, aopt=2, scale='scores', mode='normal', center_axis=-1, ab=None): | |||||||
|         if i>0: |         if i>0: | ||||||
|             for j in range(0, i, 1): |             for j in range(0, i, 1): | ||||||
|                 r = r - dot(P[:,j].T, w)*R[:,j][:,newaxis] |                 r = r - dot(P[:,j].T, w)*R[:,j][:,newaxis] | ||||||
|         print vnorm(r) |          | ||||||
|         t = dot(a, r) |         t = dot(a, r) | ||||||
|         tt = vnorm(t)**2 |         tt = vnorm(t)**2 | ||||||
|         p  = dot(a.T, t)/tt |         p  = dot(a.T, t)/tt | ||||||
| @@ -375,14 +374,11 @@ def w_pls(aat, b, aopt): | |||||||
|         u = dot(b , q) #y-factor scores |         u = dot(b , q) #y-factor scores | ||||||
|         U[:,i] = u.ravel() |         U[:,i] = u.ravel() | ||||||
|         t = dot(aat, u) |         t = dot(aat, u) | ||||||
|         print "Norm of t: %s" %vnorm(t) |  | ||||||
|         print "s: %s" %s |  | ||||||
|          |          | ||||||
|         t = t/vnorm(t) |         t = t/vnorm(t) | ||||||
|         T[:,i] = t.ravel() |         T[:,i] = t.ravel() | ||||||
|         r = dot(aat, t)#score-weights |         r = dot(aat, t)#score-weights | ||||||
|         #r = r/vnorm(r) |         #r = r/vnorm(r) | ||||||
|         print "Norm R: %s" %vnorm(r) |  | ||||||
|         R[:,i] = r.ravel() |         R[:,i] = r.ravel() | ||||||
|         PROJ[:,: i+1] = dot(T[:,:i+1], inv(dot(T[:,:i+1].T, R[:,:i+1])) ) |         PROJ[:,: i+1] = dot(T[:,:i+1], inv(dot(T[:,:i+1].T, R[:,:i+1])) ) | ||||||
|         if i<aopt: |         if i<aopt: | ||||||
| @@ -701,10 +697,9 @@ def esvd(data, amax=None): | |||||||
|                 pcrange = None |                 pcrange = None | ||||||
|             else: |             else: | ||||||
|                 pcrange = [n-amax, n] |                 pcrange = [n-amax, n] | ||||||
|             print "symm>n" |  | ||||||
|             s, v = symeig(kernel, range=pcrange, overwrite=True) |             s, v = symeig(kernel, range=pcrange, overwrite=True) | ||||||
|             s = s[::-1] |             s = s[::-1].real | ||||||
|             v = v[:,::-1] |             v = v[:,::-1].real | ||||||
|         else: |         else: | ||||||
|             u, s, vt = svd(kernel) |             u, s, vt = svd(kernel) | ||||||
|             v = vt.T |             v = vt.T | ||||||
| @@ -718,7 +713,6 @@ def esvd(data, amax=None): | |||||||
|                 pcrange = None |                 pcrange = None | ||||||
|             else: |             else: | ||||||
|                 pcrange = [m-amax, m] |                 pcrange = [m-amax, m] | ||||||
|             print "sym (m<n)" |  | ||||||
|             s, u = symeig(kernel, range=pcrange, overwrite=True) |             s, u = symeig(kernel, range=pcrange, overwrite=True) | ||||||
|             s = s[::-1] |             s = s[::-1] | ||||||
|             u = u[:,::-1] |             u = u[:,::-1] | ||||||
| @@ -726,8 +720,8 @@ def esvd(data, amax=None): | |||||||
|             u, s, vt = svd(kernel) |             u, s, vt = svd(kernel) | ||||||
|         s = sqrt(s) |         s = sqrt(s) | ||||||
|         v = dot(data.T, u)/s |         v = dot(data.T, u)/s | ||||||
|     print s[:2] |     # some use of symeig returns the 0 imaginary part | ||||||
|     return u, s, v |     return u.real, s.real, v.real | ||||||
|  |  | ||||||
| def vnorm(x): | def vnorm(x): | ||||||
|     # assume column arrays (or vectors) |     # assume column arrays (or vectors) | ||||||
|   | |||||||
| @@ -38,7 +38,7 @@ def w_pls_gen(aat,b,n_blocks=None,center=True,index_out=False): | |||||||
|           else: |           else: | ||||||
|                yield aat_in,aat_out,b_in,b_out |                yield aat_in,aat_out,b_in,b_out | ||||||
|  |  | ||||||
| def pls_gen(a, b, n_blocks=None, center=False, index_out=False,axis=0, metric=None): | def pls_gen(a, b, n_blocks=None, center=False, index_out=False,axis=0): | ||||||
|      """Random block crossvalidation |      """Random block crossvalidation | ||||||
|     Leave-one-out is a subset, with n_blocks equals a.shape[-1] |     Leave-one-out is a subset, with n_blocks equals a.shape[-1] | ||||||
|     """ |     """ | ||||||
| @@ -61,15 +61,14 @@ def pls_gen(a, b, n_blocks=None, center=False, index_out=False,axis=0, metric=No | |||||||
|                mn_b = bcal.mean(0)[newaxis] |                mn_b = bcal.mean(0)[newaxis] | ||||||
|                bcal = bcal - mn_b |                bcal = bcal - mn_b | ||||||
|                btrue = btrue - mn_b |                btrue = btrue - mn_b | ||||||
|           if metric!=None: |            | ||||||
|                acal = dot(acal, metric) |  | ||||||
|           if index_out: |           if index_out: | ||||||
|                yield acal, atrue, bcal, btrue, out |                yield acal, atrue, bcal, btrue, out | ||||||
|           else:      |           else:      | ||||||
|                yield acal, atrue, bcal, btrue |                yield acal, atrue, bcal, btrue | ||||||
|  |  | ||||||
|           |           | ||||||
| def pca_gen(a, n_sets=None, center=False, index_out=False, axis=0, metric=None): | def pca_gen(a, n_sets=None, center=False, index_out=False, axis=0): | ||||||
|      """Returns a generator of crossvalidation sample segments. |      """Returns a generator of crossvalidation sample segments. | ||||||
|  |  | ||||||
|      input: |      input: | ||||||
| @@ -97,8 +96,7 @@ def pca_gen(a, n_sets=None, center=False, index_out=False, axis=0, metric=None): | |||||||
|               mn_a = acal.mean(0)[newaxis] |               mn_a = acal.mean(0)[newaxis] | ||||||
|               acal = acal - mn_a |               acal = acal - mn_a | ||||||
|               atrue = atrue - mn_a |               atrue = atrue - mn_a | ||||||
|          if metric!=None: |           | ||||||
|               acal = dot(acal, metric) |  | ||||||
|          if index_out: |          if index_out: | ||||||
|               yield acal, atrue, out |               yield acal, atrue, out | ||||||
|          else: |          else: | ||||||
|   | |||||||
| @@ -80,12 +80,12 @@ def w_pls_cv_val(X, Y, amax, n_blocks=None, algo='simpls'): | |||||||
|     aopt = find_aopt_from_sep(msep) |     aopt = find_aopt_from_sep(msep) | ||||||
|     return sqrt(msep) |     return sqrt(msep) | ||||||
|  |  | ||||||
| def pls_val(X, Y, amax=2, n_blocks=10, algo='pls', metric=None): | def pls_val(X, Y, amax=2, n_blocks=10, algo='pls'): | ||||||
|     k, l = m_shape(Y) |     k, l = m_shape(Y) | ||||||
|     PRESS = zeros((l, amax+1), dtype='<f8') |     PRESS = zeros((l, amax+1), dtype='<f8') | ||||||
|     EE = zeros((amax, k, l), dtype='<f8') |     EE = zeros((amax, k, l), dtype='<f8') | ||||||
|     Yhat = zeros((amax, k, l), dtype='<f8') |     Yhat = zeros((amax, k, l), dtype='<f8') | ||||||
|     V = pls_gen(X, Y, n_blocks=n_blocks, center=True, index_out=True, metric=metric) |     V = pls_gen(X, Y, n_blocks=n_blocks, center=True, index_out=True) | ||||||
|     for Xin, Xout, Yin, Yout, out in V: |     for Xin, Xout, Yin, Yout, out in V: | ||||||
|         ym = -sum(Yout,0)[newaxis]/Yin.shape[0] |         ym = -sum(Yout,0)[newaxis]/Yin.shape[0] | ||||||
|         Yin = (Yin - ym) |         Yin = (Yin - ym) | ||||||
| @@ -187,7 +187,7 @@ def pca_cv_val(a, amax, n_sets): | |||||||
|  |  | ||||||
|     return sep, aopt |     return sep, aopt | ||||||
|  |  | ||||||
| def pls_jkW(a, b, amax, n_blocks=None, algo='pls', use_pack=True, center=True, metric=None): | def pls_jkW(a, b, amax, n_blocks=None, algo='pls', use_pack=True, center=True): | ||||||
|     """ Returns CV-segments of paramter W for wide X. |     """ Returns CV-segments of paramter W for wide X. | ||||||
|      |      | ||||||
|     todo: add support for T,Q and B |     todo: add support for T,Q and B | ||||||
| @@ -196,11 +196,11 @@ def pls_jkW(a, b, amax, n_blocks=None, algo='pls', use_pack=True, center=True, m | |||||||
|         n_blocks = b.shape[0] |         n_blocks = b.shape[0] | ||||||
|  |  | ||||||
|     Wcv = empty((n_blocks, a.shape[1], amax), dtype='d') |     Wcv = empty((n_blocks, a.shape[1], amax), dtype='d') | ||||||
|     if use_pack and metric==None: |     if use_pack: | ||||||
|         u, s, inflater = svd(a, full_matrices=0) |         u, s, inflater = svd(a, full_matrices=0) | ||||||
|         a = u*s |         a = u*s | ||||||
|      |      | ||||||
|     V = pls_gen(a, b, n_blocks=n_blocks, center=center, metric=metric) |     V = pls_gen(a, b, n_blocks=n_blocks, center=center) | ||||||
|     for nn,(a_in, a_out, b_in, b_out) in enumerate(V): |     for nn,(a_in, a_out, b_in, b_out) in enumerate(V): | ||||||
|         if algo=='pls': |         if algo=='pls': | ||||||
|             dat = pls(a_in, b_in, amax, 'loads', 'fast') |             dat = pls(a_in, b_in, amax, 'loads', 'fast') | ||||||
| @@ -209,14 +209,14 @@ def pls_jkW(a, b, amax, n_blocks=None, algo='pls', use_pack=True, center=True, m | |||||||
|             dat = bridge(a_in, b_in, amax, 'loads', 'fast') |             dat = bridge(a_in, b_in, amax, 'loads', 'fast') | ||||||
|  |  | ||||||
|         W = dat['W'] |         W = dat['W'] | ||||||
|         if use_pack and metric==None: |         if use_pack: | ||||||
|             W = dot(inflater.T, W) |             W = dot(inflater.T, W) | ||||||
|  |  | ||||||
|         Wcv[nn,:,:] = W[:,:,] |         Wcv[nn,:,:] = W[:,:,] | ||||||
|          |          | ||||||
|     return Wcv |     return Wcv | ||||||
|  |  | ||||||
| def pca_jkP(a, aopt, n_blocks=None, metric=None): | def pca_jkP(a, aopt, n_blocks=None): | ||||||
|     """Returns loading from PCA on CV-segments. |     """Returns loading from PCA on CV-segments. | ||||||
|      |      | ||||||
|     input: |     input: | ||||||
|   | |||||||
		Reference in New Issue
	
	Block a user