From 7dbf28f65de472f752fc1d048d8cb67e37cb8570 Mon Sep 17 00:00:00 2001
From: flatberg <flatberg@pvv.ntnu.no>
Date: Fri, 24 Aug 2007 09:14:24 +0000
Subject: [PATCH] ... just lots of stuff

---
 fluents/lib/blmfuncs.py          | 109 +++++++++++++++++--------------
 fluents/lib/blmplots.py          |  58 ++++++++++++----
 fluents/lib/cx_stats.py          |  30 +++------
 fluents/lib/engines.py           |  22 +++----
 fluents/lib/select_generators.py |  10 ++-
 fluents/lib/validation.py        |  14 ++--
 6 files changed, 134 insertions(+), 109 deletions(-)

diff --git a/fluents/lib/blmfuncs.py b/fluents/lib/blmfuncs.py
index 24722b4..b7803bd 100644
--- a/fluents/lib/blmfuncs.py
+++ b/fluents/lib/blmfuncs.py
@@ -18,6 +18,39 @@ from packer import Packer
 import blmplots
 
 
+class NewStyleModel(Function):
+    def __init__(self, id='johndoe', name='JohnDoe'):
+        Function.__init__(self, id, name)
+        self.name = name
+        self.options = Options
+        self.input_data = []
+        self.parts = {}
+        self.io_table = {}
+        self.datasets = []
+        self.plots = []
+
+    def create_dataset(self, param, Dataset=Dataset):
+        for ds in self.datasets:
+            if ds.get_name()==param: return ds
+        if not param in self.parts.keys():
+            logger.log('notice', 'Parameter: %s not present' %param)
+            return
+        if not param in self.io_table.keys():
+            logger.log('notice', 'Parameter: %s not in defined in io table' %param)
+
+        identifiers = self.io_table.get(param)
+        data = self.parts.get(param)
+        ds = Dataset(data, identifiers=identifiers, name=param)
+        self.datasets.append(dataset)
+        return ds
+
+    def create_plot(self, blmplot):
+        if blmplot.validate_model(self.parts):
+            plt = blmplot(self.parts)
+            self.plots.append(plt)
+            return plt
+
+    
 class Model(Function):
     """Base class of bilinear models.
     """
@@ -68,7 +101,7 @@ class PCA(Model):
             logger.log('notice', 'Aopt at first component!')
         
     def confidence(self, aopt, n_sets, alpha, p_center,
-                   crot, strict, cov_center ):
+                   crot, strict, cov_center):
         """Returns a confidence measure for model parameters.
         Based on aopt.
 
@@ -80,7 +113,7 @@ class PCA(Model):
         
         jk_segments = pca_jkP(self.model['E0'], aopt, n_sets)
         Pcal = self.model['P'][:,:aopt]
-        # add the scale to P
+        # ensure scaled P
         tnorm = scipy.apply_along_axis(norm, 0, self.model['T'][:,:aopt])
         Pcal = Pcal*tnorm
         tsq = hotelling(jk_segments, Pcal, p_center,
@@ -90,33 +123,14 @@ class PCA(Model):
     def make_model(self, amax, mode, scale):
         """Model on optimal number of components.
         """
-        dat = pca(self.model['E0'], amax, scale, mode)
-        
-        # explained variance
-        var_x, exp_var_x = variances(self.model['E0'], dat['T'], dat['P'])
-        dat['var_x'] = var_x
-        dat['exp_var_x'] = exp_var_x
-
-        #fixme###
-        do_lev_s = False
-        do_lev_v = False
-        #####
-        if do_lev_s:
-            # sample leverages
-            tnorm = scipy.apply_along_axis(norm, 0, dat['T']) # norm of Ts
-            s_lev = leverage(amax, tnorm)
-            dat['s_lev'] = s_lev
-        if do_lev_v:
-            # variable leverages
-            v_lev = leverage(amax, dat['P'])
-            dat['v_lev'] = v_lev
-        
+        dat = pca(self.model['E0'], amax, scale, mode)        
         self.model.update(dat)
 
     def as_dataset(self, param, dtype='dataset'):
         """Return model parameter as Dataset.
         """
         if not param in self.model.keys():
+            logger.log('notice', 'Parameter: %s not in model' %param)
             return
         DX = self._dataset['X'] #input dataset
         dim_name_0, dim_name_1 = DX.get_dim_name()
@@ -128,17 +142,18 @@ class PCA(Model):
         pc_ids = ['_amax', map(str,range(self._options['amax'])) ]
         pc_ids_opt = ['_aopt', map(str, range(self._options['aopt'])) ] 
         zero_dim = ['_doe', ['0']] # null dim, vector (hidden)
-        match_ids = {'E':[ids_0, ids_1],
-                     'E0':[ids_0, ids_1],
-                     'P':[ids_1, pc_ids],
-                     'T':[ids_0, pc_ids],
-                     'W':[ids_1, pc_ids],
-                     'p_tsq':[ids_1, zero_dim],
-                     'rmsep':[pc_ids, zero_dim],
-                     'var_leverages':[ids_1, zero_dim],
-                     'sample_leverages':[pc_ids, zero_dim],
-                     'exp_var_x': [pc_ids, zero_dim],
-                     'var_x': [pc_ids, zero_dim],
+        match_ids = {'E' : [ids_0, ids_1],
+                     'E0' : [ids_0, ids_1],
+                     'P' : [ids_1, pc_ids],
+                     'T' : [ids_0, pc_ids],
+                     'W' : [ids_1, pc_ids],
+                     'p_tsq' : [ids_1, zero_dim],
+                     'rmsep' : [pc_ids, zero_dim],
+                     'var_leverages' : [ids_1, zero_dim],
+                     'sample_leverages' : [pc_ids, zero_dim],
+                     'exp_var_x' : [pc_ids, zero_dim],
+                     'var_x' : [pc_ids, zero_dim],
+                     'eigvals' : [pc_ids, zero_dim]
                      }
         
         out = Dataset(self.model[param], match_ids[param], name=param)
@@ -256,7 +271,7 @@ class PLS(Model):
     def make_model(self, a, b, amax, scale, mode, engine):
         """Make model on amax components.
         """
-        print "MAking model"
+        print "Making model"
         dat = engine(a, b, amax, scale, mode)
         self.model.update(dat)
         
@@ -478,13 +493,6 @@ class LPLS(Model):
         self._data['Z'] = c.asarray()
         self.validation(options)
         self.make_model(options)
-        print self.model['evx']
-        evx_str = [str(i)[:3] for i in self.model['evx']]
-        logger.log('notice', 'Explained variance:X\n\t: ' + str(evx_str))
-        evy_str = [str(i)[:3] for i in self.model['evy']]
-        logger.log('notice', 'Explained variance:Y\n\t: ' + str(evy_str))
-        evz_str = [str(i)[:3] for i in self.model['evz']]
-        logger.log('notice', 'Explained variance:Z\n\t: ' + str(evz_str))
         
         if options['calc_conf']:
             self.confidence(options)
@@ -553,13 +561,15 @@ class PcaOptions(Options):
         opt['all_plots'] = [(blmplots.PcaScorePlot, 'Scores', True),
                             (blmplots.PcaLoadingPlot, 'Loadings', True),
                             (blmplots.LineViewXc, 'Line view', True),
-                            (blmplots.PredictionErrorPlot, 'Residual Error', False)
+                            (blmplots.PredictionErrorPlot, 'Residual Error', False),
+                             (blmplots.PcaScreePlot, 'Scree', True)
                             ]
         
         opt['out_data'] = ['T','P', 'p_tsq']
         opt['out_plots'] = [blmplots.PcaScorePlot,
                             blmplots.PcaLoadingPlot,
-                            blmplots.LineViewXc]
+                            blmplots.LineViewXc,
+                            blmplots.PcaScreePlot]
 
         self.update(opt)
         
@@ -621,7 +631,8 @@ class PlsOptions(Options):
 
         # (class, name, sensitive, ticked)
         opt['all_plots'] = [(blmplots.PlsScorePlot, 'Scores', True),
-                            (blmplots.PlsLoadingPlot, 'Loadings', True),
+                            (blmplots.PlsXLoadingPlot, 'X-Loadings', True),
+                            (blmplots.PlsYLoadingPlot, 'Y-Loadings', True),
                             (blmplots.LineViewXc, 'Line view', True),
                             (blmplots.PredictionErrorPlot, 'Residual Error', False),
                             (blmplots.RMSEPPlot, 'RMSEP', False),
@@ -629,7 +640,7 @@ class PlsOptions(Options):
                             ]
         
         opt['out_data'] = ['T','P', 'w_tsq']
-        opt['out_plots'] = [blmplots.PlsScorePlot,blmplots.PlsLoadingPlot,blmplots.LineViewXc]
+        opt['out_plots'] = [blmplots.PlsScorePlot,blmplots.PlsXLoadingPlot,blmplots.PlsYLoadingPlot,blmplots.LineViewXc]
         
         #opt['out_data'] = None
                             
@@ -699,11 +710,9 @@ class LplsOptions(Options):
                            ]
 
         # (class, name, sensitive, ticked)
-        opt['all_plots'] = [(blmplots.PlsScorePlot, 'Scores', True),
-                            (blmplots.PlsLoadingPlot, 'Loadings', True),
+        opt['all_plots'] = [(blmplots.LplsScorePlot, 'Scores', True),
+                            (blmplots.LplsXLoadingPlot, 'Loadings', True),
                             (blmplots.LineViewXc, 'Line view', True),
-                            (blmplots.PredictionErrorPlot, 'Residual Error', False),
-                            (blmplots.RMSEPPlot, 'RMSEP', False),
                             (blmplots.LplsHypoidCorrelationPlot, 'Hypoid corr.', False),
                             (blmplots.LplsXCorrelationPlot, 'X corr.', True),
                             (blmplots.LplsZCorrelationPlot, 'Z corr.', True)
diff --git a/fluents/lib/blmplots.py b/fluents/lib/blmplots.py
index 8375d96..9be96fa 100644
--- a/fluents/lib/blmplots.py
+++ b/fluents/lib/blmplots.py
@@ -8,7 +8,7 @@ fixme:
 from matplotlib import cm,patches
 import gtk
 import fluents
-from fluents import plots, main
+from fluents import plots, main,logger
 import scipy
 from scipy import dot,sum,diag,arange,log,mean,newaxis,sqrt,apply_along_axis,empty
 from scipy.stats import corrcoef
@@ -80,10 +80,17 @@ class BlmScatterPlot(plots.ScatterPlot):
         """Set patch sizes."""
         pass
 
-    def set_expvar_axlabels(self, param="evx"):
+    def set_expvar_axlabels(self, param=None):
+        if param == None:
+            param = self._expvar_param
+        else:
+            self._expvar_param = param
         if not self.model.model.has_key(param):
             self.model.model[param] = None
         if self.model.model[param]==None:
+            logger.log('notice', 'Param: %s not in model' %param)
+            print self.model.model.keys()
+            print self.model.model[param]
             pass #fixme: do expvar calc here if not present
         else:
             expvar = self.model.model[param]
@@ -127,7 +134,7 @@ class BlmScatterPlot(plots.ScatterPlot):
         self.selection_collection._offsets = xy
         self.canvas.draw_idle()
         pad = abs(self.xaxis_data.min()-self.xaxis_data.max())*0.05
-        new_lims = (self.xaxis_data.min()+pad, self.xaxis_data.max()+pad)
+        new_lims = (self.xaxis_data.min() - pad, self.xaxis_data.max() + pad)
         self.axes.set_xlim(new_lims, emit=True)
         self.set_expvar_axlabels()
         self.canvas.draw_idle()
@@ -140,7 +147,7 @@ class BlmScatterPlot(plots.ScatterPlot):
         self.sc._offsets = xy
         self.selection_collection._offsets = xy
         pad = abs(self.yaxis_data.min()-self.yaxis_data.max())*0.05
-        new_lims = (self.yaxis_data.min()+pad, self.yaxis_data.max()+pad)
+        new_lims = (self.yaxis_data.min() - pad, self.yaxis_data.max() + pad)
         self.axes.set_ylim(new_lims, emit=True)
         self.set_expvar_axlabels()
         self.canvas.draw_idle()
@@ -159,19 +166,28 @@ class BlmScatterPlot(plots.ScatterPlot):
             for indx,txt in self._text_labels.items():
                 if indx in index:
                     txt.set_visible(True)
-        self.canvas.draw()
+        self.canvas.draw_idle()
                 
     def hide_labels(self):
         for txt in self._text_labels.values():
             txt.set_visible(False)
-        self.canvas.draw()
+        self.canvas.draw_idle()
+
+
+class PcaScreePlot(plots.BarPlot):
+    def __init__(self, model):
+        title = "Pca, (%s) Scree" %model._dataset['X'].get_name()
+        ds = model.as_dataset('eigvals')
+        if ds==None:
+            logger.log('notice', 'Model does not contain eigvals')
+        plots.BarPlot.__init__(self, ds, name=title)
 
 
 class PcaScorePlot(BlmScatterPlot):
     def __init__(self, model, absi=0, ordi=1):
         title = "Pca scores (%s)" %model._dataset['X'].get_name()
         BlmScatterPlot.__init__(self, title, model, absi, ordi, 'T')
-
+        self.set_expvar_axlabels(param="expvarx")
 
 class PcaLoadingPlot(BlmScatterPlot):
     def __init__(self, model, absi=0, ordi=1):
@@ -185,13 +201,19 @@ class PlsScorePlot(BlmScatterPlot):
         BlmScatterPlot.__init__(self, title, model, absi, ordi, 'T')
 
 
-class PlsLoadingPlot(BlmScatterPlot):
+class PlsXLoadingPlot(BlmScatterPlot):
     def __init__(self, model, absi=0, ordi=1):
-        title = "Pls loadings (%s)" %model._dataset['X'].get_name()
+        title = "Pls x-loadings (%s)" %model._dataset['X'].get_name()
         BlmScatterPlot.__init__(self, title, model, absi, ordi, part_name='P', color_by='w_tsq')
         #self.set_expvar_axlabels(self, param="expvarx")
 
-    
+
+class PlsYLoadingPlot(BlmScatterPlot):
+    def __init__(self, model, absi=0, ordi=1):
+        title = "Pls y-loadings (%s)" %model._dataset['Y'].get_name()
+        BlmScatterPlot.__init__(self, title, model, absi, ordi, part_name='Q')
+
+
 class PlsCorrelationLoadingPlot(BlmScatterPlot):
     def __init__(self, model, absi=0, ordi=1):
         title = "Pls correlation loadings (%s)" %model._dataset['X'].get_name()
@@ -402,9 +424,21 @@ class TRBiplot(plots.ScatterPlot):
     
 
 class InfluencePlot(plots.ScatterPlot):
+    """ Returns a leverage vs resiudal scatter plot.
     """
-    """
-    pass
+    def __init__(self, model, dim, name="Influence"):
+        if not model.model.has_key('levx'):
+            logger.log('notice', 'Model has no calculations of leverages')
+            return
+        if not model.model.has_key('ssqx'):
+            logger.log('notice', 'Model has no calculations of residuals')
+            return
+        ds1 = model.as_dataset('levx')
+        ds2 = model.as_dataset('ssqx')
+        plots.ScatterPlot.__init__(self, ds1, ds2,
+                                   id_dim, sel_dim, id_1, id_2,
+                                   c=col, s=20, sel_dim_2=sel_dim_2,
+                                   name='Load Volcano')
         
 
 class RMSEPPlot(plots.BarPlot):
diff --git a/fluents/lib/cx_stats.py b/fluents/lib/cx_stats.py
index 433ce78..43eea92 100644
--- a/fluents/lib/cx_stats.py
+++ b/fluents/lib/cx_stats.py
@@ -11,7 +11,7 @@ import time
 
 
 def hotelling(Pcv, P, p_center='med', cov_center='med',
-              alpha=0.3, crot=True, strict=False, metric=None):
+              alpha=0.3, crot=True, strict=False):
     """Returns regularized hotelling T^2.
     
     alpha -- regularisation towards pooled cov estimates
@@ -21,13 +21,9 @@ def hotelling(Pcv, P, p_center='med', cov_center='med',
     alpha -- regularisation
     crot -- rotate submodels toward full?
     strict -- only rotate 90 degree ?
-    metric -- inverse metric matrix (if Pcv and P from metric pca/pls)
     
     """
     m, n = P.shape
-    if metric==None:
-        metric = eye(m, dtype='<f8')
-    P = dot(metric.T, asarray(P))
     n_sets, n, amax = Pcv.shape
     # allocate
     T_sq = empty((n, ),dtype='f')
@@ -36,7 +32,6 @@ def hotelling(Pcv, P, p_center='med', cov_center='med',
     # rotate sub_models to full model
     if crot:
         for i, Pi in enumerate(Pcv):
-            Pi = dot(metric.T, Pi)
             Pcv[i] = procrustes(P, Pi, strict=strict)
 
     # center of pnull
@@ -118,7 +113,7 @@ def pls_qvals(a, b, aopt=None, alpha=.3,
               center=True,
               sim_method='shuffle',
               p_center='med', cov_center='med',
-              crot=True, strict=False, metric=None):
+              crot=True, strict=False):
 
     """Returns qvals for pls model.
 
@@ -133,7 +128,6 @@ def pls_qvals(a, b, aopt=None, alpha=.3,
     cov_center -- location estimator for covariance of submodels ['med']
     crot -- bool, use rotations of sub models?
     strict -- bool, use stict (rot/flips only) rotations?
-    metric -- bool, use row metric?
     """
     
     m, n = a.shape
@@ -144,13 +138,12 @@ def pls_qvals(a, b, aopt=None, alpha=.3,
     if center:
         ac = a - a.mean(0)
         bc = b - b.mean(0)
-    if metric!=None:
-        ac = dot(ac, metric)
+    
     if algo=='bridge':
         dat = bridge(ac, bc, aopt, 'loads', 'fast')
     else:
         dat = pls(ac, bc, aopt, 'loads', 'fast')
-    Wcv = pls_jkW(a, b, aopt, n_blocks=None, algo=algo, metric=metric, center=True)
+    Wcv = pls_jkW(a, b, aopt, n_blocks=None, algo=algo,center=True)
     tsq_full = hotelling(Wcv, dat['W'], p_center=p_center,
                          alpha=alpha, crot=crot, strict=strict,
                          cov_center=cov_center)
@@ -162,7 +155,7 @@ def pls_qvals(a, b, aopt=None, alpha=.3,
             dat = bridge(ac, b_shuff, aopt, 'loads','fast')
         else:
             dat = pls(ac, b_shuff, aopt, 'loads', 'fast')
-        Wcv = pls_jkW(a, b_shuff, aopt, n_blocks=None, algo=algo, metric=metric)
+        Wcv = pls_jkW(a, b_shuff, aopt, n_blocks=None, algo=algo)
         TSQ[:,i] = hotelling(Wcv, dat['W'], p_center=p_center,
                              alpha=alpha, crot=crot, strict=strict,
                              cov_center=cov_center)
@@ -205,10 +198,10 @@ def pls_qvals_II(a, b, aopt=None, center=True, alpha=.3,
                  n_iter=20, algo='pls',
                  sim_method='shuffle',
                  p_center='med', cov_center='med',
-                 crot=True, strict=False, metric=None):
+                 crot=True, strict=False):
 
     """Returns qvals for pls model.
-    Shuffling of variables in X is preprocessed in metric.
+    Shuffling of variables in X.
     Null model is 'If I put genes randomly on network' ... if they are sign:
     then this is due to network structure and not covariance with response.
 
@@ -223,7 +216,6 @@ def pls_qvals_II(a, b, aopt=None, center=True, alpha=.3,
     cov_center -- location estimator for covariance of submodels ['med']
     crot -- bool, use rotations of sub models?
     strict -- bool, use stict (rot/flips only) rotations?
-    metric -- bool, use row metric?
     """
     
     m, n = a.shape
@@ -236,13 +228,12 @@ def pls_qvals_II(a, b, aopt=None, center=True, alpha=.3,
     if center==True:
         ac = a - a.mean(0)
         bc = b - b.mean(0)
-    if metric==None:
-        metric = eye(n,n)
+    
     if algo=='bridge':
         dat = bridge(ac, bc, aopt, 'loads', 'fast')
     else:
         dat = pls(ac, bc, aopt, 'loads', 'fast')
-    Wcv = pls_jkW(a, b, aopt, n_blocks=None, algo=algo, metric=metric)
+    Wcv = pls_jkW(a, b, aopt, n_blocks=None, algo=algo)
     tsq_full = hotelling(Wcv, dat['W'], p_center=p_center,
                          alpha=alpha, crot=crot, strict=strict,
                          cov_center=cov_center)
@@ -251,13 +242,12 @@ def pls_qvals_II(a, b, aopt=None, center=True, alpha=.3,
     for i, a_shuff in enumerate(Vs):
         t1 = time.time()
         a = a_shuff - a_shuff.mean(0)
-        a = dot(a, metric)
         
         if algo=='bridge':
             dat = bridge(a, b, aopt, 'loads','fast')
         else:
             dat = pls(a, b, aopt, 'loads', 'fast')
-        Wcv = pls_jkW(a, b, aopt, n_blocks=None, algo=algo, metric=metric)
+        Wcv = pls_jkW(a, b, aopt, n_blocks=None, algo=algo)
         TSQ[:,i] = hotelling(Wcv, dat['W'], p_center=p_center,
                              alpha=alpha, crot=crot, strict=strict,
                              cov_center=cov_center)
diff --git a/fluents/lib/engines.py b/fluents/lib/engines.py
index 1d6334e..809a34b 100644
--- a/fluents/lib/engines.py
+++ b/fluents/lib/engines.py
@@ -32,7 +32,7 @@ def pca(a, aopt,scale='scores',mode='normal',center_axis=0):
         lev --leverages, ssq -- sum of squares, expvar -- cumulative
         explained variance, aopt -- number of components used
     
-    :OtherParameters:
+    :OtherParam eters:
     mode : str
         Amount of info retained, ('fast', 'normal', 'detailed')
     center_axis : int
@@ -68,7 +68,6 @@ def pca(a, aopt,scale='scores',mode='normal',center_axis=0):
         a = a - expand_dims(a.mean(center_axis), center_axis)
     if m>(n+100) or n>(m+100):
         u, s, v = esvd(a, amax=None) # fixme:amax option need to work with expl.var
-        print s[:10]
     else:
         u, s, vt = svd(a, 0)
         v = vt.T
@@ -94,7 +93,7 @@ def pca(a, aopt,scale='scores',mode='normal',center_axis=0):
         lev = []
         for ai in range(aopt):
             E[ai,:,:] = a - dot(T[:,:ai+1], P[:,:ai+1].T)
-            ssq.append([(E[ai,:,:]**2).sum(0), (E[ai,:,:]**2).sum(1)])
+            ssq.append([(E[ai,:,:]**2).mean(0), (E[ai,:,:]**2).mean(1)])
             if scale=='loads':
                 lev.append([((s*T)**2).sum(1), (P**2).sum(1)])
             else:
@@ -112,7 +111,7 @@ def pca(a, aopt,scale='scores',mode='normal',center_axis=0):
         # variances
     expvarx = r_[0, 100*e.cumsum()/e.sum()][:aopt+1]
     
-    return {'T':T, 'P':P, 'E':E, 'expvarx':expvarx, 'levx':lev, 'ssqx':ssq, 'aopt':aopt}
+    return {'T':T, 'P':P, 'E':E, 'expvarx':expvarx, 'levx':lev, 'ssqx':ssq, 'aopt':aopt, 'eigvals': e[:aopt,newaxis]}
 
 def pcr(a, b, aopt, scale='scores',mode='normal',center_axis=0):
     """ Principal Component Regression.
@@ -282,7 +281,7 @@ def pls(a, b, aopt=2, scale='scores', mode='normal', center_axis=-1, ab=None):
         if i>0:
             for j in range(0, i, 1):
                 r = r - dot(P[:,j].T, w)*R[:,j][:,newaxis]
-        print vnorm(r)
+        
         t = dot(a, r)
         tt = vnorm(t)**2
         p  = dot(a.T, t)/tt
@@ -375,14 +374,11 @@ def w_pls(aat, b, aopt):
         u = dot(b , q) #y-factor scores
         U[:,i] = u.ravel()
         t = dot(aat, u)
-        print "Norm of t: %s" %vnorm(t)
-        print "s: %s" %s
         
         t = t/vnorm(t)
         T[:,i] = t.ravel()
         r = dot(aat, t)#score-weights
         #r = r/vnorm(r)
-        print "Norm R: %s" %vnorm(r)
         R[:,i] = r.ravel()
         PROJ[:,: i+1] = dot(T[:,:i+1], inv(dot(T[:,:i+1].T, R[:,:i+1])) )
         if i<aopt:
@@ -701,10 +697,9 @@ def esvd(data, amax=None):
                 pcrange = None
             else:
                 pcrange = [n-amax, n]
-            print "symm>n"
             s, v = symeig(kernel, range=pcrange, overwrite=True)
-            s = s[::-1]
-            v = v[:,::-1]
+            s = s[::-1].real
+            v = v[:,::-1].real
         else:
             u, s, vt = svd(kernel)
             v = vt.T
@@ -718,7 +713,6 @@ def esvd(data, amax=None):
                 pcrange = None
             else:
                 pcrange = [m-amax, m]
-            print "sym (m<n)"
             s, u = symeig(kernel, range=pcrange, overwrite=True)
             s = s[::-1]
             u = u[:,::-1]
@@ -726,8 +720,8 @@ def esvd(data, amax=None):
             u, s, vt = svd(kernel)
         s = sqrt(s)
         v = dot(data.T, u)/s
-    print s[:2]
-    return u, s, v
+    # some use of symeig returns the 0 imaginary part
+    return u.real, s.real, v.real
 
 def vnorm(x):
     # assume column arrays (or vectors)
diff --git a/fluents/lib/select_generators.py b/fluents/lib/select_generators.py
index ac94c26..9d80598 100644
--- a/fluents/lib/select_generators.py
+++ b/fluents/lib/select_generators.py
@@ -38,7 +38,7 @@ def w_pls_gen(aat,b,n_blocks=None,center=True,index_out=False):
           else:
                yield aat_in,aat_out,b_in,b_out
 
-def pls_gen(a, b, n_blocks=None, center=False, index_out=False,axis=0, metric=None):
+def pls_gen(a, b, n_blocks=None, center=False, index_out=False,axis=0):
      """Random block crossvalidation
     Leave-one-out is a subset, with n_blocks equals a.shape[-1]
     """
@@ -61,15 +61,14 @@ def pls_gen(a, b, n_blocks=None, center=False, index_out=False,axis=0, metric=No
                mn_b = bcal.mean(0)[newaxis]
                bcal = bcal - mn_b
                btrue = btrue - mn_b
-          if metric!=None:
-               acal = dot(acal, metric)
+          
           if index_out:
                yield acal, atrue, bcal, btrue, out
           else:     
                yield acal, atrue, bcal, btrue
 
          
-def pca_gen(a, n_sets=None, center=False, index_out=False, axis=0, metric=None):
+def pca_gen(a, n_sets=None, center=False, index_out=False, axis=0):
      """Returns a generator of crossvalidation sample segments.
 
      input:
@@ -97,8 +96,7 @@ def pca_gen(a, n_sets=None, center=False, index_out=False, axis=0, metric=None):
               mn_a = acal.mean(0)[newaxis]
               acal = acal - mn_a
               atrue = atrue - mn_a
-         if metric!=None:
-              acal = dot(acal, metric)
+         
          if index_out:
               yield acal, atrue, out
          else:
diff --git a/fluents/lib/validation.py b/fluents/lib/validation.py
index 6ce394c..dc9c8f3 100644
--- a/fluents/lib/validation.py
+++ b/fluents/lib/validation.py
@@ -80,12 +80,12 @@ def w_pls_cv_val(X, Y, amax, n_blocks=None, algo='simpls'):
     aopt = find_aopt_from_sep(msep)
     return sqrt(msep)
 
-def pls_val(X, Y, amax=2, n_blocks=10, algo='pls', metric=None):
+def pls_val(X, Y, amax=2, n_blocks=10, algo='pls'):
     k, l = m_shape(Y)
     PRESS = zeros((l, amax+1), dtype='<f8')
     EE = zeros((amax, k, l), dtype='<f8')
     Yhat = zeros((amax, k, l), dtype='<f8')
-    V = pls_gen(X, Y, n_blocks=n_blocks, center=True, index_out=True, metric=metric)
+    V = pls_gen(X, Y, n_blocks=n_blocks, center=True, index_out=True)
     for Xin, Xout, Yin, Yout, out in V:
         ym = -sum(Yout,0)[newaxis]/Yin.shape[0]
         Yin = (Yin - ym)
@@ -187,7 +187,7 @@ def pca_cv_val(a, amax, n_sets):
 
     return sep, aopt
 
-def pls_jkW(a, b, amax, n_blocks=None, algo='pls', use_pack=True, center=True, metric=None):
+def pls_jkW(a, b, amax, n_blocks=None, algo='pls', use_pack=True, center=True):
     """ Returns CV-segments of paramter W for wide X.
     
     todo: add support for T,Q and B
@@ -196,11 +196,11 @@ def pls_jkW(a, b, amax, n_blocks=None, algo='pls', use_pack=True, center=True, m
         n_blocks = b.shape[0]
 
     Wcv = empty((n_blocks, a.shape[1], amax), dtype='d')
-    if use_pack and metric==None:
+    if use_pack:
         u, s, inflater = svd(a, full_matrices=0)
         a = u*s
     
-    V = pls_gen(a, b, n_blocks=n_blocks, center=center, metric=metric)
+    V = pls_gen(a, b, n_blocks=n_blocks, center=center)
     for nn,(a_in, a_out, b_in, b_out) in enumerate(V):
         if algo=='pls':
             dat = pls(a_in, b_in, amax, 'loads', 'fast')
@@ -209,14 +209,14 @@ def pls_jkW(a, b, amax, n_blocks=None, algo='pls', use_pack=True, center=True, m
             dat = bridge(a_in, b_in, amax, 'loads', 'fast')
 
         W = dat['W']
-        if use_pack and metric==None:
+        if use_pack:
             W = dot(inflater.T, W)
 
         Wcv[nn,:,:] = W[:,:,]
         
     return Wcv
 
-def pca_jkP(a, aopt, n_blocks=None, metric=None):
+def pca_jkP(a, aopt, n_blocks=None):
     """Returns loading from PCA on CV-segments.
     
     input: