From 7dbf28f65de472f752fc1d048d8cb67e37cb8570 Mon Sep 17 00:00:00 2001 From: flatberg Date: Fri, 24 Aug 2007 09:14:24 +0000 Subject: [PATCH] ... just lots of stuff --- fluents/lib/blmfuncs.py | 109 +++++++++++++++++-------------- fluents/lib/blmplots.py | 58 ++++++++++++---- fluents/lib/cx_stats.py | 30 +++------ fluents/lib/engines.py | 22 +++---- fluents/lib/select_generators.py | 10 ++- fluents/lib/validation.py | 14 ++-- 6 files changed, 134 insertions(+), 109 deletions(-) diff --git a/fluents/lib/blmfuncs.py b/fluents/lib/blmfuncs.py index 24722b4..b7803bd 100644 --- a/fluents/lib/blmfuncs.py +++ b/fluents/lib/blmfuncs.py @@ -18,6 +18,39 @@ from packer import Packer import blmplots +class NewStyleModel(Function): + def __init__(self, id='johndoe', name='JohnDoe'): + Function.__init__(self, id, name) + self.name = name + self.options = Options + self.input_data = [] + self.parts = {} + self.io_table = {} + self.datasets = [] + self.plots = [] + + def create_dataset(self, param, Dataset=Dataset): + for ds in self.datasets: + if ds.get_name()==param: return ds + if not param in self.parts.keys(): + logger.log('notice', 'Parameter: %s not present' %param) + return + if not param in self.io_table.keys(): + logger.log('notice', 'Parameter: %s not in defined in io table' %param) + + identifiers = self.io_table.get(param) + data = self.parts.get(param) + ds = Dataset(data, identifiers=identifiers, name=param) + self.datasets.append(dataset) + return ds + + def create_plot(self, blmplot): + if blmplot.validate_model(self.parts): + plt = blmplot(self.parts) + self.plots.append(plt) + return plt + + class Model(Function): """Base class of bilinear models. """ @@ -68,7 +101,7 @@ class PCA(Model): logger.log('notice', 'Aopt at first component!') def confidence(self, aopt, n_sets, alpha, p_center, - crot, strict, cov_center ): + crot, strict, cov_center): """Returns a confidence measure for model parameters. Based on aopt. @@ -80,7 +113,7 @@ class PCA(Model): jk_segments = pca_jkP(self.model['E0'], aopt, n_sets) Pcal = self.model['P'][:,:aopt] - # add the scale to P + # ensure scaled P tnorm = scipy.apply_along_axis(norm, 0, self.model['T'][:,:aopt]) Pcal = Pcal*tnorm tsq = hotelling(jk_segments, Pcal, p_center, @@ -90,33 +123,14 @@ class PCA(Model): def make_model(self, amax, mode, scale): """Model on optimal number of components. """ - dat = pca(self.model['E0'], amax, scale, mode) - - # explained variance - var_x, exp_var_x = variances(self.model['E0'], dat['T'], dat['P']) - dat['var_x'] = var_x - dat['exp_var_x'] = exp_var_x - - #fixme### - do_lev_s = False - do_lev_v = False - ##### - if do_lev_s: - # sample leverages - tnorm = scipy.apply_along_axis(norm, 0, dat['T']) # norm of Ts - s_lev = leverage(amax, tnorm) - dat['s_lev'] = s_lev - if do_lev_v: - # variable leverages - v_lev = leverage(amax, dat['P']) - dat['v_lev'] = v_lev - + dat = pca(self.model['E0'], amax, scale, mode) self.model.update(dat) def as_dataset(self, param, dtype='dataset'): """Return model parameter as Dataset. """ if not param in self.model.keys(): + logger.log('notice', 'Parameter: %s not in model' %param) return DX = self._dataset['X'] #input dataset dim_name_0, dim_name_1 = DX.get_dim_name() @@ -128,17 +142,18 @@ class PCA(Model): pc_ids = ['_amax', map(str,range(self._options['amax'])) ] pc_ids_opt = ['_aopt', map(str, range(self._options['aopt'])) ] zero_dim = ['_doe', ['0']] # null dim, vector (hidden) - match_ids = {'E':[ids_0, ids_1], - 'E0':[ids_0, ids_1], - 'P':[ids_1, pc_ids], - 'T':[ids_0, pc_ids], - 'W':[ids_1, pc_ids], - 'p_tsq':[ids_1, zero_dim], - 'rmsep':[pc_ids, zero_dim], - 'var_leverages':[ids_1, zero_dim], - 'sample_leverages':[pc_ids, zero_dim], - 'exp_var_x': [pc_ids, zero_dim], - 'var_x': [pc_ids, zero_dim], + match_ids = {'E' : [ids_0, ids_1], + 'E0' : [ids_0, ids_1], + 'P' : [ids_1, pc_ids], + 'T' : [ids_0, pc_ids], + 'W' : [ids_1, pc_ids], + 'p_tsq' : [ids_1, zero_dim], + 'rmsep' : [pc_ids, zero_dim], + 'var_leverages' : [ids_1, zero_dim], + 'sample_leverages' : [pc_ids, zero_dim], + 'exp_var_x' : [pc_ids, zero_dim], + 'var_x' : [pc_ids, zero_dim], + 'eigvals' : [pc_ids, zero_dim] } out = Dataset(self.model[param], match_ids[param], name=param) @@ -256,7 +271,7 @@ class PLS(Model): def make_model(self, a, b, amax, scale, mode, engine): """Make model on amax components. """ - print "MAking model" + print "Making model" dat = engine(a, b, amax, scale, mode) self.model.update(dat) @@ -478,13 +493,6 @@ class LPLS(Model): self._data['Z'] = c.asarray() self.validation(options) self.make_model(options) - print self.model['evx'] - evx_str = [str(i)[:3] for i in self.model['evx']] - logger.log('notice', 'Explained variance:X\n\t: ' + str(evx_str)) - evy_str = [str(i)[:3] for i in self.model['evy']] - logger.log('notice', 'Explained variance:Y\n\t: ' + str(evy_str)) - evz_str = [str(i)[:3] for i in self.model['evz']] - logger.log('notice', 'Explained variance:Z\n\t: ' + str(evz_str)) if options['calc_conf']: self.confidence(options) @@ -553,13 +561,15 @@ class PcaOptions(Options): opt['all_plots'] = [(blmplots.PcaScorePlot, 'Scores', True), (blmplots.PcaLoadingPlot, 'Loadings', True), (blmplots.LineViewXc, 'Line view', True), - (blmplots.PredictionErrorPlot, 'Residual Error', False) + (blmplots.PredictionErrorPlot, 'Residual Error', False), + (blmplots.PcaScreePlot, 'Scree', True) ] opt['out_data'] = ['T','P', 'p_tsq'] opt['out_plots'] = [blmplots.PcaScorePlot, blmplots.PcaLoadingPlot, - blmplots.LineViewXc] + blmplots.LineViewXc, + blmplots.PcaScreePlot] self.update(opt) @@ -621,7 +631,8 @@ class PlsOptions(Options): # (class, name, sensitive, ticked) opt['all_plots'] = [(blmplots.PlsScorePlot, 'Scores', True), - (blmplots.PlsLoadingPlot, 'Loadings', True), + (blmplots.PlsXLoadingPlot, 'X-Loadings', True), + (blmplots.PlsYLoadingPlot, 'Y-Loadings', True), (blmplots.LineViewXc, 'Line view', True), (blmplots.PredictionErrorPlot, 'Residual Error', False), (blmplots.RMSEPPlot, 'RMSEP', False), @@ -629,7 +640,7 @@ class PlsOptions(Options): ] opt['out_data'] = ['T','P', 'w_tsq'] - opt['out_plots'] = [blmplots.PlsScorePlot,blmplots.PlsLoadingPlot,blmplots.LineViewXc] + opt['out_plots'] = [blmplots.PlsScorePlot,blmplots.PlsXLoadingPlot,blmplots.PlsYLoadingPlot,blmplots.LineViewXc] #opt['out_data'] = None @@ -699,11 +710,9 @@ class LplsOptions(Options): ] # (class, name, sensitive, ticked) - opt['all_plots'] = [(blmplots.PlsScorePlot, 'Scores', True), - (blmplots.PlsLoadingPlot, 'Loadings', True), + opt['all_plots'] = [(blmplots.LplsScorePlot, 'Scores', True), + (blmplots.LplsXLoadingPlot, 'Loadings', True), (blmplots.LineViewXc, 'Line view', True), - (blmplots.PredictionErrorPlot, 'Residual Error', False), - (blmplots.RMSEPPlot, 'RMSEP', False), (blmplots.LplsHypoidCorrelationPlot, 'Hypoid corr.', False), (blmplots.LplsXCorrelationPlot, 'X corr.', True), (blmplots.LplsZCorrelationPlot, 'Z corr.', True) diff --git a/fluents/lib/blmplots.py b/fluents/lib/blmplots.py index 8375d96..9be96fa 100644 --- a/fluents/lib/blmplots.py +++ b/fluents/lib/blmplots.py @@ -8,7 +8,7 @@ fixme: from matplotlib import cm,patches import gtk import fluents -from fluents import plots, main +from fluents import plots, main,logger import scipy from scipy import dot,sum,diag,arange,log,mean,newaxis,sqrt,apply_along_axis,empty from scipy.stats import corrcoef @@ -80,10 +80,17 @@ class BlmScatterPlot(plots.ScatterPlot): """Set patch sizes.""" pass - def set_expvar_axlabels(self, param="evx"): + def set_expvar_axlabels(self, param=None): + if param == None: + param = self._expvar_param + else: + self._expvar_param = param if not self.model.model.has_key(param): self.model.model[param] = None if self.model.model[param]==None: + logger.log('notice', 'Param: %s not in model' %param) + print self.model.model.keys() + print self.model.model[param] pass #fixme: do expvar calc here if not present else: expvar = self.model.model[param] @@ -127,7 +134,7 @@ class BlmScatterPlot(plots.ScatterPlot): self.selection_collection._offsets = xy self.canvas.draw_idle() pad = abs(self.xaxis_data.min()-self.xaxis_data.max())*0.05 - new_lims = (self.xaxis_data.min()+pad, self.xaxis_data.max()+pad) + new_lims = (self.xaxis_data.min() - pad, self.xaxis_data.max() + pad) self.axes.set_xlim(new_lims, emit=True) self.set_expvar_axlabels() self.canvas.draw_idle() @@ -140,7 +147,7 @@ class BlmScatterPlot(plots.ScatterPlot): self.sc._offsets = xy self.selection_collection._offsets = xy pad = abs(self.yaxis_data.min()-self.yaxis_data.max())*0.05 - new_lims = (self.yaxis_data.min()+pad, self.yaxis_data.max()+pad) + new_lims = (self.yaxis_data.min() - pad, self.yaxis_data.max() + pad) self.axes.set_ylim(new_lims, emit=True) self.set_expvar_axlabels() self.canvas.draw_idle() @@ -159,19 +166,28 @@ class BlmScatterPlot(plots.ScatterPlot): for indx,txt in self._text_labels.items(): if indx in index: txt.set_visible(True) - self.canvas.draw() + self.canvas.draw_idle() def hide_labels(self): for txt in self._text_labels.values(): txt.set_visible(False) - self.canvas.draw() + self.canvas.draw_idle() + + +class PcaScreePlot(plots.BarPlot): + def __init__(self, model): + title = "Pca, (%s) Scree" %model._dataset['X'].get_name() + ds = model.as_dataset('eigvals') + if ds==None: + logger.log('notice', 'Model does not contain eigvals') + plots.BarPlot.__init__(self, ds, name=title) class PcaScorePlot(BlmScatterPlot): def __init__(self, model, absi=0, ordi=1): title = "Pca scores (%s)" %model._dataset['X'].get_name() BlmScatterPlot.__init__(self, title, model, absi, ordi, 'T') - + self.set_expvar_axlabels(param="expvarx") class PcaLoadingPlot(BlmScatterPlot): def __init__(self, model, absi=0, ordi=1): @@ -185,13 +201,19 @@ class PlsScorePlot(BlmScatterPlot): BlmScatterPlot.__init__(self, title, model, absi, ordi, 'T') -class PlsLoadingPlot(BlmScatterPlot): +class PlsXLoadingPlot(BlmScatterPlot): def __init__(self, model, absi=0, ordi=1): - title = "Pls loadings (%s)" %model._dataset['X'].get_name() + title = "Pls x-loadings (%s)" %model._dataset['X'].get_name() BlmScatterPlot.__init__(self, title, model, absi, ordi, part_name='P', color_by='w_tsq') #self.set_expvar_axlabels(self, param="expvarx") - + +class PlsYLoadingPlot(BlmScatterPlot): + def __init__(self, model, absi=0, ordi=1): + title = "Pls y-loadings (%s)" %model._dataset['Y'].get_name() + BlmScatterPlot.__init__(self, title, model, absi, ordi, part_name='Q') + + class PlsCorrelationLoadingPlot(BlmScatterPlot): def __init__(self, model, absi=0, ordi=1): title = "Pls correlation loadings (%s)" %model._dataset['X'].get_name() @@ -402,9 +424,21 @@ class TRBiplot(plots.ScatterPlot): class InfluencePlot(plots.ScatterPlot): + """ Returns a leverage vs resiudal scatter plot. """ - """ - pass + def __init__(self, model, dim, name="Influence"): + if not model.model.has_key('levx'): + logger.log('notice', 'Model has no calculations of leverages') + return + if not model.model.has_key('ssqx'): + logger.log('notice', 'Model has no calculations of residuals') + return + ds1 = model.as_dataset('levx') + ds2 = model.as_dataset('ssqx') + plots.ScatterPlot.__init__(self, ds1, ds2, + id_dim, sel_dim, id_1, id_2, + c=col, s=20, sel_dim_2=sel_dim_2, + name='Load Volcano') class RMSEPPlot(plots.BarPlot): diff --git a/fluents/lib/cx_stats.py b/fluents/lib/cx_stats.py index 433ce78..43eea92 100644 --- a/fluents/lib/cx_stats.py +++ b/fluents/lib/cx_stats.py @@ -11,7 +11,7 @@ import time def hotelling(Pcv, P, p_center='med', cov_center='med', - alpha=0.3, crot=True, strict=False, metric=None): + alpha=0.3, crot=True, strict=False): """Returns regularized hotelling T^2. alpha -- regularisation towards pooled cov estimates @@ -21,13 +21,9 @@ def hotelling(Pcv, P, p_center='med', cov_center='med', alpha -- regularisation crot -- rotate submodels toward full? strict -- only rotate 90 degree ? - metric -- inverse metric matrix (if Pcv and P from metric pca/pls) """ m, n = P.shape - if metric==None: - metric = eye(m, dtype='(n+100) or n>(m+100): u, s, v = esvd(a, amax=None) # fixme:amax option need to work with expl.var - print s[:10] else: u, s, vt = svd(a, 0) v = vt.T @@ -94,7 +93,7 @@ def pca(a, aopt,scale='scores',mode='normal',center_axis=0): lev = [] for ai in range(aopt): E[ai,:,:] = a - dot(T[:,:ai+1], P[:,:ai+1].T) - ssq.append([(E[ai,:,:]**2).sum(0), (E[ai,:,:]**2).sum(1)]) + ssq.append([(E[ai,:,:]**2).mean(0), (E[ai,:,:]**2).mean(1)]) if scale=='loads': lev.append([((s*T)**2).sum(1), (P**2).sum(1)]) else: @@ -112,7 +111,7 @@ def pca(a, aopt,scale='scores',mode='normal',center_axis=0): # variances expvarx = r_[0, 100*e.cumsum()/e.sum()][:aopt+1] - return {'T':T, 'P':P, 'E':E, 'expvarx':expvarx, 'levx':lev, 'ssqx':ssq, 'aopt':aopt} + return {'T':T, 'P':P, 'E':E, 'expvarx':expvarx, 'levx':lev, 'ssqx':ssq, 'aopt':aopt, 'eigvals': e[:aopt,newaxis]} def pcr(a, b, aopt, scale='scores',mode='normal',center_axis=0): """ Principal Component Regression. @@ -282,7 +281,7 @@ def pls(a, b, aopt=2, scale='scores', mode='normal', center_axis=-1, ab=None): if i>0: for j in range(0, i, 1): r = r - dot(P[:,j].T, w)*R[:,j][:,newaxis] - print vnorm(r) + t = dot(a, r) tt = vnorm(t)**2 p = dot(a.T, t)/tt @@ -375,14 +374,11 @@ def w_pls(aat, b, aopt): u = dot(b , q) #y-factor scores U[:,i] = u.ravel() t = dot(aat, u) - print "Norm of t: %s" %vnorm(t) - print "s: %s" %s t = t/vnorm(t) T[:,i] = t.ravel() r = dot(aat, t)#score-weights #r = r/vnorm(r) - print "Norm R: %s" %vnorm(r) R[:,i] = r.ravel() PROJ[:,: i+1] = dot(T[:,:i+1], inv(dot(T[:,:i+1].T, R[:,:i+1])) ) if in" s, v = symeig(kernel, range=pcrange, overwrite=True) - s = s[::-1] - v = v[:,::-1] + s = s[::-1].real + v = v[:,::-1].real else: u, s, vt = svd(kernel) v = vt.T @@ -718,7 +713,6 @@ def esvd(data, amax=None): pcrange = None else: pcrange = [m-amax, m] - print "sym (m