... just lots of stuff
This commit is contained in:
		| @@ -18,6 +18,39 @@ from packer import Packer | ||||
| import blmplots | ||||
|  | ||||
|  | ||||
| class NewStyleModel(Function): | ||||
|     def __init__(self, id='johndoe', name='JohnDoe'): | ||||
|         Function.__init__(self, id, name) | ||||
|         self.name = name | ||||
|         self.options = Options | ||||
|         self.input_data = [] | ||||
|         self.parts = {} | ||||
|         self.io_table = {} | ||||
|         self.datasets = [] | ||||
|         self.plots = [] | ||||
|  | ||||
|     def create_dataset(self, param, Dataset=Dataset): | ||||
|         for ds in self.datasets: | ||||
|             if ds.get_name()==param: return ds | ||||
|         if not param in self.parts.keys(): | ||||
|             logger.log('notice', 'Parameter: %s not present' %param) | ||||
|             return | ||||
|         if not param in self.io_table.keys(): | ||||
|             logger.log('notice', 'Parameter: %s not in defined in io table' %param) | ||||
|  | ||||
|         identifiers = self.io_table.get(param) | ||||
|         data = self.parts.get(param) | ||||
|         ds = Dataset(data, identifiers=identifiers, name=param) | ||||
|         self.datasets.append(dataset) | ||||
|         return ds | ||||
|  | ||||
|     def create_plot(self, blmplot): | ||||
|         if blmplot.validate_model(self.parts): | ||||
|             plt = blmplot(self.parts) | ||||
|             self.plots.append(plt) | ||||
|             return plt | ||||
|  | ||||
|      | ||||
| class Model(Function): | ||||
|     """Base class of bilinear models. | ||||
|     """ | ||||
| @@ -68,7 +101,7 @@ class PCA(Model): | ||||
|             logger.log('notice', 'Aopt at first component!') | ||||
|          | ||||
|     def confidence(self, aopt, n_sets, alpha, p_center, | ||||
|                    crot, strict, cov_center ): | ||||
|                    crot, strict, cov_center): | ||||
|         """Returns a confidence measure for model parameters. | ||||
|         Based on aopt. | ||||
|  | ||||
| @@ -80,7 +113,7 @@ class PCA(Model): | ||||
|          | ||||
|         jk_segments = pca_jkP(self.model['E0'], aopt, n_sets) | ||||
|         Pcal = self.model['P'][:,:aopt] | ||||
|         # add the scale to P | ||||
|         # ensure scaled P | ||||
|         tnorm = scipy.apply_along_axis(norm, 0, self.model['T'][:,:aopt]) | ||||
|         Pcal = Pcal*tnorm | ||||
|         tsq = hotelling(jk_segments, Pcal, p_center, | ||||
| @@ -90,33 +123,14 @@ class PCA(Model): | ||||
|     def make_model(self, amax, mode, scale): | ||||
|         """Model on optimal number of components. | ||||
|         """ | ||||
|         dat = pca(self.model['E0'], amax, scale, mode) | ||||
|          | ||||
|         # explained variance | ||||
|         var_x, exp_var_x = variances(self.model['E0'], dat['T'], dat['P']) | ||||
|         dat['var_x'] = var_x | ||||
|         dat['exp_var_x'] = exp_var_x | ||||
|  | ||||
|         #fixme### | ||||
|         do_lev_s = False | ||||
|         do_lev_v = False | ||||
|         ##### | ||||
|         if do_lev_s: | ||||
|             # sample leverages | ||||
|             tnorm = scipy.apply_along_axis(norm, 0, dat['T']) # norm of Ts | ||||
|             s_lev = leverage(amax, tnorm) | ||||
|             dat['s_lev'] = s_lev | ||||
|         if do_lev_v: | ||||
|             # variable leverages | ||||
|             v_lev = leverage(amax, dat['P']) | ||||
|             dat['v_lev'] = v_lev | ||||
|          | ||||
|         dat = pca(self.model['E0'], amax, scale, mode)         | ||||
|         self.model.update(dat) | ||||
|  | ||||
|     def as_dataset(self, param, dtype='dataset'): | ||||
|         """Return model parameter as Dataset. | ||||
|         """ | ||||
|         if not param in self.model.keys(): | ||||
|             logger.log('notice', 'Parameter: %s not in model' %param) | ||||
|             return | ||||
|         DX = self._dataset['X'] #input dataset | ||||
|         dim_name_0, dim_name_1 = DX.get_dim_name() | ||||
| @@ -128,17 +142,18 @@ class PCA(Model): | ||||
|         pc_ids = ['_amax', map(str,range(self._options['amax'])) ] | ||||
|         pc_ids_opt = ['_aopt', map(str, range(self._options['aopt'])) ]  | ||||
|         zero_dim = ['_doe', ['0']] # null dim, vector (hidden) | ||||
|         match_ids = {'E':[ids_0, ids_1], | ||||
|                      'E0':[ids_0, ids_1], | ||||
|                      'P':[ids_1, pc_ids], | ||||
|                      'T':[ids_0, pc_ids], | ||||
|                      'W':[ids_1, pc_ids], | ||||
|                      'p_tsq':[ids_1, zero_dim], | ||||
|                      'rmsep':[pc_ids, zero_dim], | ||||
|                      'var_leverages':[ids_1, zero_dim], | ||||
|                      'sample_leverages':[pc_ids, zero_dim], | ||||
|                      'exp_var_x': [pc_ids, zero_dim], | ||||
|                      'var_x': [pc_ids, zero_dim], | ||||
|         match_ids = {'E' : [ids_0, ids_1], | ||||
|                      'E0' : [ids_0, ids_1], | ||||
|                      'P' : [ids_1, pc_ids], | ||||
|                      'T' : [ids_0, pc_ids], | ||||
|                      'W' : [ids_1, pc_ids], | ||||
|                      'p_tsq' : [ids_1, zero_dim], | ||||
|                      'rmsep' : [pc_ids, zero_dim], | ||||
|                      'var_leverages' : [ids_1, zero_dim], | ||||
|                      'sample_leverages' : [pc_ids, zero_dim], | ||||
|                      'exp_var_x' : [pc_ids, zero_dim], | ||||
|                      'var_x' : [pc_ids, zero_dim], | ||||
|                      'eigvals' : [pc_ids, zero_dim] | ||||
|                      } | ||||
|          | ||||
|         out = Dataset(self.model[param], match_ids[param], name=param) | ||||
| @@ -256,7 +271,7 @@ class PLS(Model): | ||||
|     def make_model(self, a, b, amax, scale, mode, engine): | ||||
|         """Make model on amax components. | ||||
|         """ | ||||
|         print "MAking model" | ||||
|         print "Making model" | ||||
|         dat = engine(a, b, amax, scale, mode) | ||||
|         self.model.update(dat) | ||||
|          | ||||
| @@ -478,13 +493,6 @@ class LPLS(Model): | ||||
|         self._data['Z'] = c.asarray() | ||||
|         self.validation(options) | ||||
|         self.make_model(options) | ||||
|         print self.model['evx'] | ||||
|         evx_str = [str(i)[:3] for i in self.model['evx']] | ||||
|         logger.log('notice', 'Explained variance:X\n\t: ' + str(evx_str)) | ||||
|         evy_str = [str(i)[:3] for i in self.model['evy']] | ||||
|         logger.log('notice', 'Explained variance:Y\n\t: ' + str(evy_str)) | ||||
|         evz_str = [str(i)[:3] for i in self.model['evz']] | ||||
|         logger.log('notice', 'Explained variance:Z\n\t: ' + str(evz_str)) | ||||
|          | ||||
|         if options['calc_conf']: | ||||
|             self.confidence(options) | ||||
| @@ -553,13 +561,15 @@ class PcaOptions(Options): | ||||
|         opt['all_plots'] = [(blmplots.PcaScorePlot, 'Scores', True), | ||||
|                             (blmplots.PcaLoadingPlot, 'Loadings', True), | ||||
|                             (blmplots.LineViewXc, 'Line view', True), | ||||
|                             (blmplots.PredictionErrorPlot, 'Residual Error', False) | ||||
|                             (blmplots.PredictionErrorPlot, 'Residual Error', False), | ||||
|                              (blmplots.PcaScreePlot, 'Scree', True) | ||||
|                             ] | ||||
|          | ||||
|         opt['out_data'] = ['T','P', 'p_tsq'] | ||||
|         opt['out_plots'] = [blmplots.PcaScorePlot, | ||||
|                             blmplots.PcaLoadingPlot, | ||||
|                             blmplots.LineViewXc] | ||||
|                             blmplots.LineViewXc, | ||||
|                             blmplots.PcaScreePlot] | ||||
|  | ||||
|         self.update(opt) | ||||
|          | ||||
| @@ -621,7 +631,8 @@ class PlsOptions(Options): | ||||
|  | ||||
|         # (class, name, sensitive, ticked) | ||||
|         opt['all_plots'] = [(blmplots.PlsScorePlot, 'Scores', True), | ||||
|                             (blmplots.PlsLoadingPlot, 'Loadings', True), | ||||
|                             (blmplots.PlsXLoadingPlot, 'X-Loadings', True), | ||||
|                             (blmplots.PlsYLoadingPlot, 'Y-Loadings', True), | ||||
|                             (blmplots.LineViewXc, 'Line view', True), | ||||
|                             (blmplots.PredictionErrorPlot, 'Residual Error', False), | ||||
|                             (blmplots.RMSEPPlot, 'RMSEP', False), | ||||
| @@ -629,7 +640,7 @@ class PlsOptions(Options): | ||||
|                             ] | ||||
|          | ||||
|         opt['out_data'] = ['T','P', 'w_tsq'] | ||||
|         opt['out_plots'] = [blmplots.PlsScorePlot,blmplots.PlsLoadingPlot,blmplots.LineViewXc] | ||||
|         opt['out_plots'] = [blmplots.PlsScorePlot,blmplots.PlsXLoadingPlot,blmplots.PlsYLoadingPlot,blmplots.LineViewXc] | ||||
|          | ||||
|         #opt['out_data'] = None | ||||
|                              | ||||
| @@ -699,11 +710,9 @@ class LplsOptions(Options): | ||||
|                            ] | ||||
|  | ||||
|         # (class, name, sensitive, ticked) | ||||
|         opt['all_plots'] = [(blmplots.PlsScorePlot, 'Scores', True), | ||||
|                             (blmplots.PlsLoadingPlot, 'Loadings', True), | ||||
|         opt['all_plots'] = [(blmplots.LplsScorePlot, 'Scores', True), | ||||
|                             (blmplots.LplsXLoadingPlot, 'Loadings', True), | ||||
|                             (blmplots.LineViewXc, 'Line view', True), | ||||
|                             (blmplots.PredictionErrorPlot, 'Residual Error', False), | ||||
|                             (blmplots.RMSEPPlot, 'RMSEP', False), | ||||
|                             (blmplots.LplsHypoidCorrelationPlot, 'Hypoid corr.', False), | ||||
|                             (blmplots.LplsXCorrelationPlot, 'X corr.', True), | ||||
|                             (blmplots.LplsZCorrelationPlot, 'Z corr.', True) | ||||
|   | ||||
| @@ -8,7 +8,7 @@ fixme: | ||||
| from matplotlib import cm,patches | ||||
| import gtk | ||||
| import fluents | ||||
| from fluents import plots, main | ||||
| from fluents import plots, main,logger | ||||
| import scipy | ||||
| from scipy import dot,sum,diag,arange,log,mean,newaxis,sqrt,apply_along_axis,empty | ||||
| from scipy.stats import corrcoef | ||||
| @@ -80,10 +80,17 @@ class BlmScatterPlot(plots.ScatterPlot): | ||||
|         """Set patch sizes.""" | ||||
|         pass | ||||
|  | ||||
|     def set_expvar_axlabels(self, param="evx"): | ||||
|     def set_expvar_axlabels(self, param=None): | ||||
|         if param == None: | ||||
|             param = self._expvar_param | ||||
|         else: | ||||
|             self._expvar_param = param | ||||
|         if not self.model.model.has_key(param): | ||||
|             self.model.model[param] = None | ||||
|         if self.model.model[param]==None: | ||||
|             logger.log('notice', 'Param: %s not in model' %param) | ||||
|             print self.model.model.keys() | ||||
|             print self.model.model[param] | ||||
|             pass #fixme: do expvar calc here if not present | ||||
|         else: | ||||
|             expvar = self.model.model[param] | ||||
| @@ -127,7 +134,7 @@ class BlmScatterPlot(plots.ScatterPlot): | ||||
|         self.selection_collection._offsets = xy | ||||
|         self.canvas.draw_idle() | ||||
|         pad = abs(self.xaxis_data.min()-self.xaxis_data.max())*0.05 | ||||
|         new_lims = (self.xaxis_data.min()+pad, self.xaxis_data.max()+pad) | ||||
|         new_lims = (self.xaxis_data.min() - pad, self.xaxis_data.max() + pad) | ||||
|         self.axes.set_xlim(new_lims, emit=True) | ||||
|         self.set_expvar_axlabels() | ||||
|         self.canvas.draw_idle() | ||||
| @@ -140,7 +147,7 @@ class BlmScatterPlot(plots.ScatterPlot): | ||||
|         self.sc._offsets = xy | ||||
|         self.selection_collection._offsets = xy | ||||
|         pad = abs(self.yaxis_data.min()-self.yaxis_data.max())*0.05 | ||||
|         new_lims = (self.yaxis_data.min()+pad, self.yaxis_data.max()+pad) | ||||
|         new_lims = (self.yaxis_data.min() - pad, self.yaxis_data.max() + pad) | ||||
|         self.axes.set_ylim(new_lims, emit=True) | ||||
|         self.set_expvar_axlabels() | ||||
|         self.canvas.draw_idle() | ||||
| @@ -159,19 +166,28 @@ class BlmScatterPlot(plots.ScatterPlot): | ||||
|             for indx,txt in self._text_labels.items(): | ||||
|                 if indx in index: | ||||
|                     txt.set_visible(True) | ||||
|         self.canvas.draw() | ||||
|         self.canvas.draw_idle() | ||||
|                  | ||||
|     def hide_labels(self): | ||||
|         for txt in self._text_labels.values(): | ||||
|             txt.set_visible(False) | ||||
|         self.canvas.draw() | ||||
|         self.canvas.draw_idle() | ||||
|  | ||||
|  | ||||
| class PcaScreePlot(plots.BarPlot): | ||||
|     def __init__(self, model): | ||||
|         title = "Pca, (%s) Scree" %model._dataset['X'].get_name() | ||||
|         ds = model.as_dataset('eigvals') | ||||
|         if ds==None: | ||||
|             logger.log('notice', 'Model does not contain eigvals') | ||||
|         plots.BarPlot.__init__(self, ds, name=title) | ||||
|  | ||||
|  | ||||
| class PcaScorePlot(BlmScatterPlot): | ||||
|     def __init__(self, model, absi=0, ordi=1): | ||||
|         title = "Pca scores (%s)" %model._dataset['X'].get_name() | ||||
|         BlmScatterPlot.__init__(self, title, model, absi, ordi, 'T') | ||||
|  | ||||
|         self.set_expvar_axlabels(param="expvarx") | ||||
|  | ||||
| class PcaLoadingPlot(BlmScatterPlot): | ||||
|     def __init__(self, model, absi=0, ordi=1): | ||||
| @@ -185,13 +201,19 @@ class PlsScorePlot(BlmScatterPlot): | ||||
|         BlmScatterPlot.__init__(self, title, model, absi, ordi, 'T') | ||||
|  | ||||
|  | ||||
| class PlsLoadingPlot(BlmScatterPlot): | ||||
| class PlsXLoadingPlot(BlmScatterPlot): | ||||
|     def __init__(self, model, absi=0, ordi=1): | ||||
|         title = "Pls loadings (%s)" %model._dataset['X'].get_name() | ||||
|         title = "Pls x-loadings (%s)" %model._dataset['X'].get_name() | ||||
|         BlmScatterPlot.__init__(self, title, model, absi, ordi, part_name='P', color_by='w_tsq') | ||||
|         #self.set_expvar_axlabels(self, param="expvarx") | ||||
|  | ||||
|      | ||||
|  | ||||
| class PlsYLoadingPlot(BlmScatterPlot): | ||||
|     def __init__(self, model, absi=0, ordi=1): | ||||
|         title = "Pls y-loadings (%s)" %model._dataset['Y'].get_name() | ||||
|         BlmScatterPlot.__init__(self, title, model, absi, ordi, part_name='Q') | ||||
|  | ||||
|  | ||||
| class PlsCorrelationLoadingPlot(BlmScatterPlot): | ||||
|     def __init__(self, model, absi=0, ordi=1): | ||||
|         title = "Pls correlation loadings (%s)" %model._dataset['X'].get_name() | ||||
| @@ -402,9 +424,21 @@ class TRBiplot(plots.ScatterPlot): | ||||
|      | ||||
|  | ||||
| class InfluencePlot(plots.ScatterPlot): | ||||
|     """ Returns a leverage vs resiudal scatter plot. | ||||
|     """ | ||||
|     """ | ||||
|     pass | ||||
|     def __init__(self, model, dim, name="Influence"): | ||||
|         if not model.model.has_key('levx'): | ||||
|             logger.log('notice', 'Model has no calculations of leverages') | ||||
|             return | ||||
|         if not model.model.has_key('ssqx'): | ||||
|             logger.log('notice', 'Model has no calculations of residuals') | ||||
|             return | ||||
|         ds1 = model.as_dataset('levx') | ||||
|         ds2 = model.as_dataset('ssqx') | ||||
|         plots.ScatterPlot.__init__(self, ds1, ds2, | ||||
|                                    id_dim, sel_dim, id_1, id_2, | ||||
|                                    c=col, s=20, sel_dim_2=sel_dim_2, | ||||
|                                    name='Load Volcano') | ||||
|          | ||||
|  | ||||
| class RMSEPPlot(plots.BarPlot): | ||||
|   | ||||
| @@ -11,7 +11,7 @@ import time | ||||
|  | ||||
|  | ||||
| def hotelling(Pcv, P, p_center='med', cov_center='med', | ||||
|               alpha=0.3, crot=True, strict=False, metric=None): | ||||
|               alpha=0.3, crot=True, strict=False): | ||||
|     """Returns regularized hotelling T^2. | ||||
|      | ||||
|     alpha -- regularisation towards pooled cov estimates | ||||
| @@ -21,13 +21,9 @@ def hotelling(Pcv, P, p_center='med', cov_center='med', | ||||
|     alpha -- regularisation | ||||
|     crot -- rotate submodels toward full? | ||||
|     strict -- only rotate 90 degree ? | ||||
|     metric -- inverse metric matrix (if Pcv and P from metric pca/pls) | ||||
|      | ||||
|     """ | ||||
|     m, n = P.shape | ||||
|     if metric==None: | ||||
|         metric = eye(m, dtype='<f8') | ||||
|     P = dot(metric.T, asarray(P)) | ||||
|     n_sets, n, amax = Pcv.shape | ||||
|     # allocate | ||||
|     T_sq = empty((n, ),dtype='f') | ||||
| @@ -36,7 +32,6 @@ def hotelling(Pcv, P, p_center='med', cov_center='med', | ||||
|     # rotate sub_models to full model | ||||
|     if crot: | ||||
|         for i, Pi in enumerate(Pcv): | ||||
|             Pi = dot(metric.T, Pi) | ||||
|             Pcv[i] = procrustes(P, Pi, strict=strict) | ||||
|  | ||||
|     # center of pnull | ||||
| @@ -118,7 +113,7 @@ def pls_qvals(a, b, aopt=None, alpha=.3, | ||||
|               center=True, | ||||
|               sim_method='shuffle', | ||||
|               p_center='med', cov_center='med', | ||||
|               crot=True, strict=False, metric=None): | ||||
|               crot=True, strict=False): | ||||
|  | ||||
|     """Returns qvals for pls model. | ||||
|  | ||||
| @@ -133,7 +128,6 @@ def pls_qvals(a, b, aopt=None, alpha=.3, | ||||
|     cov_center -- location estimator for covariance of submodels ['med'] | ||||
|     crot -- bool, use rotations of sub models? | ||||
|     strict -- bool, use stict (rot/flips only) rotations? | ||||
|     metric -- bool, use row metric? | ||||
|     """ | ||||
|      | ||||
|     m, n = a.shape | ||||
| @@ -144,13 +138,12 @@ def pls_qvals(a, b, aopt=None, alpha=.3, | ||||
|     if center: | ||||
|         ac = a - a.mean(0) | ||||
|         bc = b - b.mean(0) | ||||
|     if metric!=None: | ||||
|         ac = dot(ac, metric) | ||||
|      | ||||
|     if algo=='bridge': | ||||
|         dat = bridge(ac, bc, aopt, 'loads', 'fast') | ||||
|     else: | ||||
|         dat = pls(ac, bc, aopt, 'loads', 'fast') | ||||
|     Wcv = pls_jkW(a, b, aopt, n_blocks=None, algo=algo, metric=metric, center=True) | ||||
|     Wcv = pls_jkW(a, b, aopt, n_blocks=None, algo=algo,center=True) | ||||
|     tsq_full = hotelling(Wcv, dat['W'], p_center=p_center, | ||||
|                          alpha=alpha, crot=crot, strict=strict, | ||||
|                          cov_center=cov_center) | ||||
| @@ -162,7 +155,7 @@ def pls_qvals(a, b, aopt=None, alpha=.3, | ||||
|             dat = bridge(ac, b_shuff, aopt, 'loads','fast') | ||||
|         else: | ||||
|             dat = pls(ac, b_shuff, aopt, 'loads', 'fast') | ||||
|         Wcv = pls_jkW(a, b_shuff, aopt, n_blocks=None, algo=algo, metric=metric) | ||||
|         Wcv = pls_jkW(a, b_shuff, aopt, n_blocks=None, algo=algo) | ||||
|         TSQ[:,i] = hotelling(Wcv, dat['W'], p_center=p_center, | ||||
|                              alpha=alpha, crot=crot, strict=strict, | ||||
|                              cov_center=cov_center) | ||||
| @@ -205,10 +198,10 @@ def pls_qvals_II(a, b, aopt=None, center=True, alpha=.3, | ||||
|                  n_iter=20, algo='pls', | ||||
|                  sim_method='shuffle', | ||||
|                  p_center='med', cov_center='med', | ||||
|                  crot=True, strict=False, metric=None): | ||||
|                  crot=True, strict=False): | ||||
|  | ||||
|     """Returns qvals for pls model. | ||||
|     Shuffling of variables in X is preprocessed in metric. | ||||
|     Shuffling of variables in X. | ||||
|     Null model is 'If I put genes randomly on network' ... if they are sign: | ||||
|     then this is due to network structure and not covariance with response. | ||||
|  | ||||
| @@ -223,7 +216,6 @@ def pls_qvals_II(a, b, aopt=None, center=True, alpha=.3, | ||||
|     cov_center -- location estimator for covariance of submodels ['med'] | ||||
|     crot -- bool, use rotations of sub models? | ||||
|     strict -- bool, use stict (rot/flips only) rotations? | ||||
|     metric -- bool, use row metric? | ||||
|     """ | ||||
|      | ||||
|     m, n = a.shape | ||||
| @@ -236,13 +228,12 @@ def pls_qvals_II(a, b, aopt=None, center=True, alpha=.3, | ||||
|     if center==True: | ||||
|         ac = a - a.mean(0) | ||||
|         bc = b - b.mean(0) | ||||
|     if metric==None: | ||||
|         metric = eye(n,n) | ||||
|      | ||||
|     if algo=='bridge': | ||||
|         dat = bridge(ac, bc, aopt, 'loads', 'fast') | ||||
|     else: | ||||
|         dat = pls(ac, bc, aopt, 'loads', 'fast') | ||||
|     Wcv = pls_jkW(a, b, aopt, n_blocks=None, algo=algo, metric=metric) | ||||
|     Wcv = pls_jkW(a, b, aopt, n_blocks=None, algo=algo) | ||||
|     tsq_full = hotelling(Wcv, dat['W'], p_center=p_center, | ||||
|                          alpha=alpha, crot=crot, strict=strict, | ||||
|                          cov_center=cov_center) | ||||
| @@ -251,13 +242,12 @@ def pls_qvals_II(a, b, aopt=None, center=True, alpha=.3, | ||||
|     for i, a_shuff in enumerate(Vs): | ||||
|         t1 = time.time() | ||||
|         a = a_shuff - a_shuff.mean(0) | ||||
|         a = dot(a, metric) | ||||
|          | ||||
|         if algo=='bridge': | ||||
|             dat = bridge(a, b, aopt, 'loads','fast') | ||||
|         else: | ||||
|             dat = pls(a, b, aopt, 'loads', 'fast') | ||||
|         Wcv = pls_jkW(a, b, aopt, n_blocks=None, algo=algo, metric=metric) | ||||
|         Wcv = pls_jkW(a, b, aopt, n_blocks=None, algo=algo) | ||||
|         TSQ[:,i] = hotelling(Wcv, dat['W'], p_center=p_center, | ||||
|                              alpha=alpha, crot=crot, strict=strict, | ||||
|                              cov_center=cov_center) | ||||
|   | ||||
| @@ -32,7 +32,7 @@ def pca(a, aopt,scale='scores',mode='normal',center_axis=0): | ||||
|         lev --leverages, ssq -- sum of squares, expvar -- cumulative | ||||
|         explained variance, aopt -- number of components used | ||||
|      | ||||
|     :OtherParameters: | ||||
|     :OtherParam eters: | ||||
|     mode : str | ||||
|         Amount of info retained, ('fast', 'normal', 'detailed') | ||||
|     center_axis : int | ||||
| @@ -68,7 +68,6 @@ def pca(a, aopt,scale='scores',mode='normal',center_axis=0): | ||||
|         a = a - expand_dims(a.mean(center_axis), center_axis) | ||||
|     if m>(n+100) or n>(m+100): | ||||
|         u, s, v = esvd(a, amax=None) # fixme:amax option need to work with expl.var | ||||
|         print s[:10] | ||||
|     else: | ||||
|         u, s, vt = svd(a, 0) | ||||
|         v = vt.T | ||||
| @@ -94,7 +93,7 @@ def pca(a, aopt,scale='scores',mode='normal',center_axis=0): | ||||
|         lev = [] | ||||
|         for ai in range(aopt): | ||||
|             E[ai,:,:] = a - dot(T[:,:ai+1], P[:,:ai+1].T) | ||||
|             ssq.append([(E[ai,:,:]**2).sum(0), (E[ai,:,:]**2).sum(1)]) | ||||
|             ssq.append([(E[ai,:,:]**2).mean(0), (E[ai,:,:]**2).mean(1)]) | ||||
|             if scale=='loads': | ||||
|                 lev.append([((s*T)**2).sum(1), (P**2).sum(1)]) | ||||
|             else: | ||||
| @@ -112,7 +111,7 @@ def pca(a, aopt,scale='scores',mode='normal',center_axis=0): | ||||
|         # variances | ||||
|     expvarx = r_[0, 100*e.cumsum()/e.sum()][:aopt+1] | ||||
|      | ||||
|     return {'T':T, 'P':P, 'E':E, 'expvarx':expvarx, 'levx':lev, 'ssqx':ssq, 'aopt':aopt} | ||||
|     return {'T':T, 'P':P, 'E':E, 'expvarx':expvarx, 'levx':lev, 'ssqx':ssq, 'aopt':aopt, 'eigvals': e[:aopt,newaxis]} | ||||
|  | ||||
| def pcr(a, b, aopt, scale='scores',mode='normal',center_axis=0): | ||||
|     """ Principal Component Regression. | ||||
| @@ -282,7 +281,7 @@ def pls(a, b, aopt=2, scale='scores', mode='normal', center_axis=-1, ab=None): | ||||
|         if i>0: | ||||
|             for j in range(0, i, 1): | ||||
|                 r = r - dot(P[:,j].T, w)*R[:,j][:,newaxis] | ||||
|         print vnorm(r) | ||||
|          | ||||
|         t = dot(a, r) | ||||
|         tt = vnorm(t)**2 | ||||
|         p  = dot(a.T, t)/tt | ||||
| @@ -375,14 +374,11 @@ def w_pls(aat, b, aopt): | ||||
|         u = dot(b , q) #y-factor scores | ||||
|         U[:,i] = u.ravel() | ||||
|         t = dot(aat, u) | ||||
|         print "Norm of t: %s" %vnorm(t) | ||||
|         print "s: %s" %s | ||||
|          | ||||
|         t = t/vnorm(t) | ||||
|         T[:,i] = t.ravel() | ||||
|         r = dot(aat, t)#score-weights | ||||
|         #r = r/vnorm(r) | ||||
|         print "Norm R: %s" %vnorm(r) | ||||
|         R[:,i] = r.ravel() | ||||
|         PROJ[:,: i+1] = dot(T[:,:i+1], inv(dot(T[:,:i+1].T, R[:,:i+1])) ) | ||||
|         if i<aopt: | ||||
| @@ -701,10 +697,9 @@ def esvd(data, amax=None): | ||||
|                 pcrange = None | ||||
|             else: | ||||
|                 pcrange = [n-amax, n] | ||||
|             print "symm>n" | ||||
|             s, v = symeig(kernel, range=pcrange, overwrite=True) | ||||
|             s = s[::-1] | ||||
|             v = v[:,::-1] | ||||
|             s = s[::-1].real | ||||
|             v = v[:,::-1].real | ||||
|         else: | ||||
|             u, s, vt = svd(kernel) | ||||
|             v = vt.T | ||||
| @@ -718,7 +713,6 @@ def esvd(data, amax=None): | ||||
|                 pcrange = None | ||||
|             else: | ||||
|                 pcrange = [m-amax, m] | ||||
|             print "sym (m<n)" | ||||
|             s, u = symeig(kernel, range=pcrange, overwrite=True) | ||||
|             s = s[::-1] | ||||
|             u = u[:,::-1] | ||||
| @@ -726,8 +720,8 @@ def esvd(data, amax=None): | ||||
|             u, s, vt = svd(kernel) | ||||
|         s = sqrt(s) | ||||
|         v = dot(data.T, u)/s | ||||
|     print s[:2] | ||||
|     return u, s, v | ||||
|     # some use of symeig returns the 0 imaginary part | ||||
|     return u.real, s.real, v.real | ||||
|  | ||||
| def vnorm(x): | ||||
|     # assume column arrays (or vectors) | ||||
|   | ||||
| @@ -38,7 +38,7 @@ def w_pls_gen(aat,b,n_blocks=None,center=True,index_out=False): | ||||
|           else: | ||||
|                yield aat_in,aat_out,b_in,b_out | ||||
|  | ||||
| def pls_gen(a, b, n_blocks=None, center=False, index_out=False,axis=0, metric=None): | ||||
| def pls_gen(a, b, n_blocks=None, center=False, index_out=False,axis=0): | ||||
|      """Random block crossvalidation | ||||
|     Leave-one-out is a subset, with n_blocks equals a.shape[-1] | ||||
|     """ | ||||
| @@ -61,15 +61,14 @@ def pls_gen(a, b, n_blocks=None, center=False, index_out=False,axis=0, metric=No | ||||
|                mn_b = bcal.mean(0)[newaxis] | ||||
|                bcal = bcal - mn_b | ||||
|                btrue = btrue - mn_b | ||||
|           if metric!=None: | ||||
|                acal = dot(acal, metric) | ||||
|            | ||||
|           if index_out: | ||||
|                yield acal, atrue, bcal, btrue, out | ||||
|           else:      | ||||
|                yield acal, atrue, bcal, btrue | ||||
|  | ||||
|           | ||||
| def pca_gen(a, n_sets=None, center=False, index_out=False, axis=0, metric=None): | ||||
| def pca_gen(a, n_sets=None, center=False, index_out=False, axis=0): | ||||
|      """Returns a generator of crossvalidation sample segments. | ||||
|  | ||||
|      input: | ||||
| @@ -97,8 +96,7 @@ def pca_gen(a, n_sets=None, center=False, index_out=False, axis=0, metric=None): | ||||
|               mn_a = acal.mean(0)[newaxis] | ||||
|               acal = acal - mn_a | ||||
|               atrue = atrue - mn_a | ||||
|          if metric!=None: | ||||
|               acal = dot(acal, metric) | ||||
|           | ||||
|          if index_out: | ||||
|               yield acal, atrue, out | ||||
|          else: | ||||
|   | ||||
| @@ -80,12 +80,12 @@ def w_pls_cv_val(X, Y, amax, n_blocks=None, algo='simpls'): | ||||
|     aopt = find_aopt_from_sep(msep) | ||||
|     return sqrt(msep) | ||||
|  | ||||
| def pls_val(X, Y, amax=2, n_blocks=10, algo='pls', metric=None): | ||||
| def pls_val(X, Y, amax=2, n_blocks=10, algo='pls'): | ||||
|     k, l = m_shape(Y) | ||||
|     PRESS = zeros((l, amax+1), dtype='<f8') | ||||
|     EE = zeros((amax, k, l), dtype='<f8') | ||||
|     Yhat = zeros((amax, k, l), dtype='<f8') | ||||
|     V = pls_gen(X, Y, n_blocks=n_blocks, center=True, index_out=True, metric=metric) | ||||
|     V = pls_gen(X, Y, n_blocks=n_blocks, center=True, index_out=True) | ||||
|     for Xin, Xout, Yin, Yout, out in V: | ||||
|         ym = -sum(Yout,0)[newaxis]/Yin.shape[0] | ||||
|         Yin = (Yin - ym) | ||||
| @@ -187,7 +187,7 @@ def pca_cv_val(a, amax, n_sets): | ||||
|  | ||||
|     return sep, aopt | ||||
|  | ||||
| def pls_jkW(a, b, amax, n_blocks=None, algo='pls', use_pack=True, center=True, metric=None): | ||||
| def pls_jkW(a, b, amax, n_blocks=None, algo='pls', use_pack=True, center=True): | ||||
|     """ Returns CV-segments of paramter W for wide X. | ||||
|      | ||||
|     todo: add support for T,Q and B | ||||
| @@ -196,11 +196,11 @@ def pls_jkW(a, b, amax, n_blocks=None, algo='pls', use_pack=True, center=True, m | ||||
|         n_blocks = b.shape[0] | ||||
|  | ||||
|     Wcv = empty((n_blocks, a.shape[1], amax), dtype='d') | ||||
|     if use_pack and metric==None: | ||||
|     if use_pack: | ||||
|         u, s, inflater = svd(a, full_matrices=0) | ||||
|         a = u*s | ||||
|      | ||||
|     V = pls_gen(a, b, n_blocks=n_blocks, center=center, metric=metric) | ||||
|     V = pls_gen(a, b, n_blocks=n_blocks, center=center) | ||||
|     for nn,(a_in, a_out, b_in, b_out) in enumerate(V): | ||||
|         if algo=='pls': | ||||
|             dat = pls(a_in, b_in, amax, 'loads', 'fast') | ||||
| @@ -209,14 +209,14 @@ def pls_jkW(a, b, amax, n_blocks=None, algo='pls', use_pack=True, center=True, m | ||||
|             dat = bridge(a_in, b_in, amax, 'loads', 'fast') | ||||
|  | ||||
|         W = dat['W'] | ||||
|         if use_pack and metric==None: | ||||
|         if use_pack: | ||||
|             W = dot(inflater.T, W) | ||||
|  | ||||
|         Wcv[nn,:,:] = W[:,:,] | ||||
|          | ||||
|     return Wcv | ||||
|  | ||||
| def pca_jkP(a, aopt, n_blocks=None, metric=None): | ||||
| def pca_jkP(a, aopt, n_blocks=None): | ||||
|     """Returns loading from PCA on CV-segments. | ||||
|      | ||||
|     input: | ||||
|   | ||||
		Reference in New Issue
	
	Block a user