From 253305b602e7e560b5e43fdbe938bb8c36ae450c Mon Sep 17 00:00:00 2001
From: flatberg <flatberg@pvv.ntnu.no>
Date: Fri, 14 Dec 2007 00:16:31 +0000
Subject: [PATCH] Fixed conflicts

---
 pyblm/__init__.py        |   1 -
 pyblm/crossvalidation.py |  85 +++++++++++++---------
 pyblm/engines.py         | 149 +++++++++++++++++++--------------------
 pyblm/models.py          |  28 ++++----
 pyblm/statistics.py      |  78 ++++++++++----------
 5 files changed, 177 insertions(+), 164 deletions(-)

diff --git a/pyblm/__init__.py b/pyblm/__init__.py
index c4198ea..73566c4 100644
--- a/pyblm/__init__.py
+++ b/pyblm/__init__.py
@@ -13,4 +13,3 @@ def test(level=1, verbosity=1):
     print 'Python version %s' % (sys.version.replace('\n', '',),)
     from numpy.testing import NumpyTest
     return NumpyTest().test(level, verbosity)
-
diff --git a/pyblm/crossvalidation.py b/pyblm/crossvalidation.py
index 364bf4e..765f42b 100644
--- a/pyblm/crossvalidation.py
+++ b/pyblm/crossvalidation.py
@@ -155,7 +155,7 @@ def lpls_val(X, Y, Z, a_max=2, nsets=None,alpha=.5, center_axis=[2,0,2], zorth=F
     validation scheme.
 
     *Parameters*:
-    
+
         X : {array}
             Main data matrix (m, n)
         Y : {array}
@@ -180,9 +180,9 @@ def lpls_val(X, Y, Z, a_max=2, nsets=None,alpha=.5, center_axis=[2,0,2], zorth=F
             If true, Require orthogonal latent components in Z.
         verbose : {boolean}, optional
             Verbosity of console output. For use in debugging.
-    
+
     *Returns*:
-    
+
         rmsep : {array}
             Root mean squred error of prediction
         yhat : {array}
@@ -191,19 +191,19 @@ def lpls_val(X, Y, Z, a_max=2, nsets=None,alpha=.5, center_axis=[2,0,2], zorth=F
             Estimated value of optimal number of components
 
     """
-    
+
     m, n = X.shape
     k, l = Y.shape
     o, p = Z.shape
     assert m == k, "X (%d,%d) - Y (%d,%d) dim mismatch" %(m, n, k, l)
     assert n == p, "X (%d,%d) - Z (%d,%d) dim mismatch" %(m, n, o, p)
     if nsets == None:
-        nsets = m 
+        nsets = m
     if nsets > X.shape[0]:
         print "nsets (%d) is larger than number of variables (%d).\nnsets:  %d -> %d" %(nsets, m, nsets, m)
         nsets = m
     assert (alpha >= 0 and alpha<=1), "Alpha needs to be within [0,1], got: %.2f" %alpha
-    
+
     Yhat = empty((a_max, k, l), 'd')
     for cal, val in cv(k, nsets):
         # do the training model
@@ -217,10 +217,16 @@ def lpls_val(X, Y, Z, a_max=2, nsets=None,alpha=.5, center_axis=[2,0,2], zorth=F
         # predictions
         for a in range(a_max):
             Yhat[a,val,:] = atleast_2d(ym + dot(xi, dat['B'][a]))
+    # todo: need a better support for classification error
+    y_is_class = Y.dtype.char.lower() in ['i','p', 'b', 'h','?']
+    if y_is_class:
+        pass
+        #Yhat, err = class_error(Yhat, Y)
+        #return Yhat, err
     sep = (Y - Yhat)**2
     rmsep = sqrt(sep.mean(1)).T
     #aopt = find_aopt_from_sep(rmsep)
-    
+
     # todo: need a better support for classification error
     error = prediction_error(Yhat, Y, method='1/2')
     
@@ -228,7 +234,7 @@ def lpls_val(X, Y, Z, a_max=2, nsets=None,alpha=.5, center_axis=[2,0,2], zorth=F
 
 def pca_jk(a, aopt, nsets=None, center_axis=[0], method='cv'):
     """Returns jack-knife segments from PCA.
-    
+
     *Parameters*:
 
         a : {array}
@@ -252,9 +258,9 @@ def pca_jk(a, aopt, nsets=None, center_axis=[0], method='cv'):
             Loadings collected in a three way matrix (n_segments, m, aopt)
 
     *Notes*:
-    
+
         - Crossvalidation method is currently set to random blocks of samples.
-        
+
     """
     m, n = a.shape
     if nsets == None:
@@ -278,14 +284,14 @@ def pca_jk(a, aopt, nsets=None, center_axis=[0], method='cv'):
             Pcv[i,:,:] = pca(a[cal,:], aopt, mode='fast', scale='loads', center_axis = center_axis)['P']
     else:
         raise NotImplementedError(method)
-        
+
     return Pcv
 
 def pls_jk(X, Y, a_opt, nsets=None, center_axis=[0,0], verbose=False):
     """ Returns jack-knife segements of W.
-    
+
     *Parameters*:
-    
+
         X : {array}
             Main data matrix (m, n)
         Y : {array}
@@ -294,7 +300,7 @@ def pls_jk(X, Y, a_opt, nsets=None, center_axis=[0,0], verbose=False):
             The number of components to calculate (0, min(m,n))
         nsets : (integer), optional
             Number of jack-knife segments
-        
+
         center_axis : {boolean}, optional
             - -1 : nothing
             - 0 : row center
@@ -302,12 +308,12 @@ def pls_jk(X, Y, a_opt, nsets=None, center_axis=[0,0], verbose=False):
             - 2 : double center
         verbose : {boolean}, optional
             Verbosity of console output. For use in debugging.
-    
+
     *Returns*:
-    
+
         Wcv : {array}
             Loading-weights jack-knife segements
-        
+
     """
     m, n = X.shape
     k, l = Y.shape
@@ -320,7 +326,7 @@ def pls_jk(X, Y, a_opt, nsets=None, center_axis=[0,0], verbose=False):
             print "Segment number: %d" %i
         dat = pls(X[cal,:], Y[cal,:], a_opt, scale='loads', mode='fast', center_axis=center_axis)
         Wcv[i,:,:] = dat['W']
-        
+
     return Wcv
 
 def lpls_jk(X, Y, Z, a_opt, nsets=None, xz_alpha=.5, center_axis=[2,0,2], zorth=False, verbose=False):
@@ -332,10 +338,10 @@ def lpls_jk(X, Y, Z, a_opt, nsets=None, xz_alpha=.5, center_axis=[2,0,2], zorth=
     infer the paramter confidence in th model.
 
     The segements returned are the X-block weights and Z-block weights.
-    
-    
+
+
     *Parameters*:
-    
+
         X : {array}
             Main data matrix (m, n)
         Y : {array}
@@ -358,15 +364,15 @@ def lpls_jk(X, Y, Z, a_opt, nsets=None, xz_alpha=.5, center_axis=[2,0,2], zorth=
             2 : double center
         verbose : {boolean}, optional
             Verbosity of console output. For use in debugging.
-    
+
     *Returns*:
-    
+
         Wx : {array}
             X-block jack-knife segements
         Wz : {array}
             Z-block jack-knife segements
     """
-    
+
     m, n = X.shape
     k, l = Y.shape
     o, p = Z.shape
@@ -388,7 +394,7 @@ def lpls_jk(X, Y, Z, a_opt, nsets=None, xz_alpha=.5, center_axis=[2,0,2], zorth=
 
 def find_aopt_from_sep(err, method='vanilla'):
     """Returns an estimate of optimal number of components.
-    
+
     The estimate is based on the error of prediction from
     crossvalidation. This is pretty much wild guessing and it is
     recomended to inspect model parameters and prediction errors
@@ -406,7 +412,7 @@ def find_aopt_from_sep(err, method='vanilla'):
         aopt : {integer}
             A guess on the optimal number of components
     """
-    
+
     if method == 'vanilla':
         # min rmsep
         rmsecv = sqrt(err.mean(0))
@@ -434,7 +440,7 @@ def cv(N, K, randomise=True, sequential=False):
     of length ~N/K, *without* replacement.
 
     *Parameters*:
-    
+
         N : {integer}
             Total number of samples
         K : {integer}
@@ -443,7 +449,7 @@ def cv(N, K, randomise=True, sequential=False):
             Use random sampling
         sequential : {boolean}
             Use sequential sampling
-        
+
     *Returns*:
 
         training : {array-like}
@@ -456,12 +462,12 @@ def cv(N, K, randomise=True, sequential=False):
         If randomise is true, a copy of index is shuffled before partitioning,
 
         otherwise its order is preserved in training and validation.
-        
+
         Randomise overrides the sequential argument. If randomise is true,
         sequential is False
         If sequential is true the index is partioned in continous blocks,
         otherwise interleaved ordering is used.
-    
+
     """
     if K > N:
         raise ValueError, "You cannot divide a list of %d samples into more than %d segments. Yout tried: %s" %(N, N, K)
@@ -510,6 +516,8 @@ def diag_cv(shape, nsets=9, randomise=True):
     except:
         raise ValueError("shape needs to be a two-tuple")
     if nsets>m or nsets>n:
+        msg = "You may not use more subsets than max(n_rows, n_cols)"
+        raise ValueError, msg
          msg = "You may not use more subsets than max(n_rows, n_cols)"
          nsets = min(m, n)
     nm = n*m
@@ -524,7 +532,20 @@ def diag_cv(shape, nsets=9, randomise=True):
             validation.update(ind)
         #training = [j for j in index if j not in validation]
         yield list(validation)
-        
+
+def class_error(y_hat, y, method='vanilla'):
+    """ Not used.
+    """
+    a_opt, k, l = y_hat.shape
+    y_hat_c = zeros((k, l), dtype='d')
+    if method == vanilla:
+        pass
+    for a in range(a_opt):
+        for i in range(k):
+            y_hat_c[a, val, argmax(y_hat[a,val,:])] = 1.0
+    err = 100*((y_hat_c + y) == 2).sum(1)/y.sum(0).astype('d')
+
+    return y_hat_c, err
 
 def prediction_error(y_hat, y, method='squared'):
     """Loss function on multiclass Y.
@@ -651,7 +672,7 @@ def _wkernel_pls_val(X, Y, a_max, n_blocks=None):
     for Din, Doi, Yin, Yout in V:
         ym = -sum(Yout, 0)[newaxis]/(1.0*Yin.shape[0])
         PRESS[:,0] = PRESS[:,0] + ((Yout - ym)**2).sum(0)
-        
+
         dat = w_simpls(Din, Yin, a_max)
         Q, U, H = dat['Q'], dat['U'], dat['H']
         That = dot(Doi, dot(U, inv(triu(dot(H.T, U))) ))
diff --git a/pyblm/engines.py b/pyblm/engines.py
index 504f1f9..4d825f7 100644
--- a/pyblm/engines.py
+++ b/pyblm/engines.py
@@ -20,7 +20,7 @@ def pca(X, aopt, scale='scores', mode='normal', center_axis=[0]):
     extracts orthogonal components of maximum variance.
 
     *Parameters*:
-    
+
         X : {array}
             Data measurement matrix, (samples x variables)
         aopt : {integer}
@@ -55,21 +55,21 @@ def pca(X, aopt, scale='scores', mode='normal', center_axis=[0]):
         mode : {string}, optional
             Amount of info retained, [['normal'], 'fast', 'detailed']
 
-  
+
     *SeeAlso*:
 
         `center` : Data centering
-        
+
 
     *Notes*
-    
+
     Uses kernel speed-up if m>>n or m<<n.
 
     If residuals turn rank deficient, a lower number of component than given
     in input will be used.
-    
+
     *Examples*:
-    
+
     >>> import scipy,engines
     >>> a=scipy.asarray([[1,2,3],[2,4,5]])
     >>> dat=engines.pca(a, 2)
@@ -77,7 +77,7 @@ def pca(X, aopt, scale='scores', mode='normal', center_axis=[0]):
     array([0.,99.8561562,  100.])
 
     """
-    
+
     m, n = X.shape
     max_aopt = min(m, n)
     if center_axis != None:
@@ -94,7 +94,7 @@ def pca(X, aopt, scale='scores', mode='normal', center_axis=[0]):
         u = u[:,:aopt]
         s = s[:aopt]
         v = v[:,:aopt]
-    
+
     # ranktest
     tol = 1e-10
     eff_rank = sum(s > s[0]*tol)
@@ -104,14 +104,14 @@ def pca(X, aopt, scale='scores', mode='normal', center_axis=[0]):
     T = T[:,:aopt]
     P = v[:,:aopt]
     e = s**2
-    
+
     if scale=='loads':
         T = T/s
         P = P*s
 
     if mode in ['fast', 'f', 'F']:
         return {'T':T, 'P':P, 'aopt':aopt, 'mnx': mnx}
-    
+
     if mode in ['detailed', 'd', 'D']:
         E = empty((aopt, m, n))
         ssq = []
@@ -135,7 +135,7 @@ def pca(X, aopt, scale='scores', mode='normal', center_axis=[0]):
             lev = [(1./m)+((T/s)**2).sum(1), (1./n)+(P**2).sum(1)]
     # variances
     expvarx = r_[0, 100*e.cumsum()/(X*X).sum()]
-    
+
     return {'T': T, 'P': P, 'E': E, 'evx': expvarx, 'leverage': lev, 'ssqx': ssq,
             'aopt': aopt, 'eigvals': e, 'mnx': mnx}
 
@@ -159,20 +159,20 @@ def pcr(a, b, aopt, scale='scores',mode='normal',center_axis=[0, 0]):
         keys -- values,  T -- scores, P -- loadings, E -- residuals,
         levx -- leverages, ssqx -- sum of squares, expvarx -- cumulative
         explained variance, aopt -- number of components used
-    
+
     *OtherParameters*:
 
         mode : {string}
             Amount of info retained, ('fast', 'normal', 'detailed')
         center_axis : {integer}
             Center along given axis. If neg.: no centering (-inf,..., matrix modes)
-  
+
     SeeAlso:
-    
+
         - pca : other blm
         - pls : other blm
         - lpls : other blm
- 
+
     *Notes*
 
     -----
@@ -180,12 +180,12 @@ def pcr(a, b, aopt, scale='scores',mode='normal',center_axis=[0, 0]):
     Uses kernel speed-up if m>>n or m<<n.
 
     If residuals turn rank deficient, a lower number of component than given
-    in input will be used. The number of components used is given in results-dict. 
-    
+    in input will be used. The number of components used is given in results-dict.
+
 
     Examples
     --------
-    
+
     >>> import scipy,engines
     >>> a=scipy.asarray([[1,2,3],[2,4,5]])
     >>> b=scipy.asarray([[1,1],[2,3]])
@@ -222,9 +222,9 @@ def pcr(a, b, aopt, scale='scores',mode='normal',center_axis=[0, 0]):
         F = b - dot(T, Q.T)
         sepy = F**2
         ssqy = [sepy.sum(0), sepy.sum(1)]
-    
+
     expvary = r_[0,  100*((T**2).sum(0)*(Q**2).sum(0)/(b**2).sum()).cumsum()[:aopt]]
-    
+
     dat.update({'Q': Q, 'F': F, 'evy': expvary, 'ssqy': ssqy, 'mny': mny})
     return dat
 
@@ -245,9 +245,9 @@ def pls(X, Y, aopt=2, scale='scores', mode='normal', center_axis=[0, 0]):
             Which component should get the scale
         center_axis : {-1, integer}
             Perform centering across given axis, (-1 is no centering)
-        
+
     *Returns*:
-  
+
         T : {array}
             X-scores
         W : {array}
@@ -280,25 +280,25 @@ def pls(X, Y, aopt=2, scale='scores', mode='normal', center_axis=[0, 0]):
             Sum of squared residuals in Y along each dimesion
         leverage : {array}
             Sample leverages
-        
+
     *OtherParameters*:
-    
+
         mode : ['normal', 'fast', 'detailed'], optional
             How much details to compute
-    
+
     *SeeAlso*:
-    
+
         `center` - data centering
- 
+
     *Notes*
 
         - The output with mode='fast' will only return T and W
-        
+
         - If residuals turn rank deficient, a lower number of component than given in input will be used. The number of components used is given in results.
 
 
     *Examples*
-    
+
     >>> import numpy, engines
     >>> a = numpy.asarray([[1,2,3],[2,4,5]])
     >>> b = numpy.asarray([[1,1],[2,3]])
@@ -307,7 +307,7 @@ def pls(X, Y, aopt=2, scale='scores', mode='normal', center_axis=[0, 0]):
     array([0.,99.8561562,  100.])
 
     """
-    
+
     m, n = X.shape
     try:
         k, l = Y.shape
@@ -322,7 +322,7 @@ def pls(X, Y, aopt=2, scale='scores', mode='normal', center_axis=[0, 0]):
         Y, mny = center(Y, center_axis[1])
         min_aopt = min_aopt - 1
     assert(aopt > 0 and aopt < min_aopt)
-    
+
     W = empty((n, aopt))
     P = empty((n, aopt))
     R = empty((n, aopt))
@@ -330,7 +330,7 @@ def pls(X, Y, aopt=2, scale='scores', mode='normal', center_axis=[0, 0]):
     T = empty((m, aopt))
     B = empty((aopt, n, l))
     tt = empty((aopt,))
-    
+
     XY = dot(X.T, Y)
     for i in range(aopt):
         if XY.shape[1] == 1: #pls 1
@@ -345,7 +345,7 @@ def pls(X, Y, aopt=2, scale='scores', mode='normal', center_axis=[0, 0]):
             # with many samples, many x-vars and many non-orth y-vars (where arpack speed
             # shines)
             #############
-            
+
             #s, w = arpack.eigen_symmetric(dot(XY, XY.T),k=1, tol=1e-10, maxiter=1000)
             #if s[0] == 0:
             #    print "Arpack did not converge... using svd"
@@ -357,15 +357,15 @@ def pls(X, Y, aopt=2, scale='scores', mode='normal', center_axis=[0, 0]):
             #    print "Arpack did not converge... using svd"
             q, s, vh = svd(dot(XY.T, XY))
             q = q[:,:1]
-            
+
             w = dot(XY, q)
             w = w/vnorm(w)
-        
+
         r = w.copy()
         if i > 0:
             for j in range(0, i, 1):
                 r = r - dot(P[:,j].T, w)*R[:,j][:,newaxis]
-        
+
         t = dot(X, r)
         tt[i] = tti = dot(t.T, t).ravel()
         p  = dot(X.T, t)/tti
@@ -385,7 +385,7 @@ def pls(X, Y, aopt=2, scale='scores', mode='normal', center_axis=[0, 0]):
         R[:,i] = r.ravel()
         Q[:,i] = q.ravel()
         B[i] = dot(R[:,:i+1], Q[:,:i+1].T)
-        
+
     qnorm = apply_along_axis(vnorm, 0, Q)
     tnorm = sqrt(tt)
     pp = (P**2).sum(0)
@@ -412,13 +412,13 @@ def pls(X, Y, aopt=2, scale='scores', mode='normal', center_axis=[0, 0]):
         sepy = F**2
         ssqy = [sepy.sum(0), sepy.sum(1)]
         leverage = 1./m + ((T/tnorm)**2).sum(1)
-        
+
     # variances
     tp= tt*pp
     tq = tt*qnorm*qnorm
     expvarx = r_[0, 100*tp/(X*X).sum()]
     expvary = r_[0, 100*tq/(Y*Y).sum()]
-    
+
     if scale == 'loads':
         T = T/tnorm
         W = W*tnorm
@@ -438,7 +438,7 @@ def nipals_lpls(X, Y, Z, a_max, alpha=.7, center_axis=[2, 0, 2], scale='scores',
     of these three matrices tries to discover common directions/subspaces.
 
     *Parameters*:
-    
+
         X : {array}
             Main data matrix (m, n)
         Y : {array}
@@ -457,9 +457,9 @@ def nipals_lpls(X, Y, Z, a_max, alpha=.7, center_axis=[2, 0, 2], scale='scores',
             0 : row center
             1 : column center
             2 : double center
-    
+
     *Returns*:
-    
+
         T : {array}
             X-scores
         W : {array}
@@ -504,18 +504,18 @@ def nipals_lpls(X, Y, Z, a_max, alpha=.7, center_axis=[2, 0, 2], scale='scores',
         Saeboe et al., LPLS-regression: a method for improved prediction and
         classification through inclusion of background information on
         predictor variables, J. of chemometrics and intell. laboratory syst.
-        
+
         Martens et.al, Regression of a data matrix on descriptors of
         both its rows and of its columns via latent variables: L-PLSR,
         Computational statistics & data analysis, 2005
-    
+
     """
     m, n = X.shape
     k, l = Y.shape
     u, o = Z.shape
     max_rank = min(m, n)
-    
-    
+
+
     if center_axis != None:
         xctr, yctr, zctr = center_axis
         X, mnX = center(X, xctr)
@@ -523,14 +523,14 @@ def nipals_lpls(X, Y, Z, a_max, alpha=.7, center_axis=[2, 0, 2], scale='scores',
         Z, mnZ = center(Z, zctr)
         max_rank = max_rank -1
     assert (a_max > 0 and a_max < max_rank), "Number of comp error:\
-    tried: %d, max_rank: %d" %(a_max, max_rank)    
+    tried: %d, max_rank: %d" %(a_max, max_rank)
 
     # initial variance
     varX = (X**2).sum()
     varY = (Y**2).sum()
     varZ = (Z**2).sum()
 
-    # initialize 
+    # initialize
     U = empty((k, a_max))
     Q = empty((l, a_max))
     T = zeros((m, a_max))
@@ -600,7 +600,7 @@ def nipals_lpls(X, Y, Z, a_max, alpha=.7, center_axis=[2, 0, 2], scale='scores',
         else:
             k = w
             l = dot(G, w)
-        
+
         U[:,a] = u.ravel()
         W[:,a] = w.ravel()
         P[:,a] = p.ravel()
@@ -617,11 +617,11 @@ def nipals_lpls(X, Y, Z, a_max, alpha=.7, center_axis=[2, 0, 2], scale='scores',
         var_x[a] = pow(E, 2).sum()
         var_y[a] = pow(F, 2).sum()
         var_z[a] = pow(G, 2).sum()
-    
+
         B[a] = dot(dot(W[:,:a+1], inv(dot(P[:,:a+1].T, W[:,:a+1]))), Q[:,:a+1].T)
         #b0[a] = mnY - dot(mnX, B[a])
-        
-    
+
+
     # variance explained
     evx = 100.*(1 - var_x/varX)
     evy = 100.*(1 - var_y/varY)
@@ -635,8 +635,8 @@ def nipals_lpls(X, Y, Z, a_max, alpha=.7, center_axis=[2, 0, 2], scale='scores',
         knorm = apply_along_axis(vnorm, 0, K)
         L = L*knorm
         K = K/knorm
-    
-    return {'T':T, 'W':W, 'P':P, 'Q':Q, 'U':U, 'L':L, 'K':K, 'B':B, 'E': E, 'F': F, 'G': G, 'evx':evx, 'evy':evy, 'evz':evz,'mnx': mnX, 'mny': mnY, 'mnz': mnZ}    
+
+    return {'T':T, 'W':W, 'P':P, 'Q':Q, 'U':U, 'L':L, 'K':K, 'B':B, 'E': E, 'F': F, 'G': G, 'evx':evx, 'evy':evy, 'evz':evz,'mnx': mnX, 'mny': mnY, 'mnz': mnZ}
 
 def lpls_predict(model_dict, x, aopt):
     """Predict lpls reponses from existing model on new data.
@@ -646,25 +646,25 @@ def lpls_predict(model_dict, x, aopt):
     except:
         x = atleast_2d(x.shape)
         m, n = x.shape
-    
+
     if 'B0' in model_dict.keys():
         y = model_dict['B0'] + dot()
-        
-    
+
+
 
 def vnorm(a):
     """Returns the norm of a vector.
 
     *Parameters*:
-        
+
         a : {array}
             Input data, 1-dim, or column vector (m, 1)
 
     *Returns*:
-    
+
         a_norm : {array}
             Norm of input vector
-            
+
     """
     return msqrt(dot(a.T,a))
 
@@ -676,7 +676,7 @@ def center(a, axis):
        a : {array}
            Input data
        axis : {integer}
-           Which centering to perform. 
+           Which centering to perform.
            0 = col center, 1 = row center, 2 = double center
            -1 = nothing
 
@@ -707,16 +707,16 @@ def center(a, axis):
         else:
             mn = a.mean()*ones(a.shape)
         return a - mn, mn
-    
+
     if axis == -1:
         mn = zeros((1,a.shape[1],))
-        mn = tile(mn, (a.shape[0], 1))
+        #mn = tile(mn, (a.shape[0], 1))
     elif axis == 0:
         mn = a.mean(0)[newaxis]
-        mn = tile(mn, (a.shape[0], 1)) 
+        #mn = tile(mn, (a.shape[0], 1))
     elif axis == 1:
         mn = a.mean(1)[:,newaxis]
-        mn = tile(mn, (1, a.shape[1]))
+        #mn = tile(mn, (1, a.shape[1]))
     elif axis == 2:
         #fixme: double centering returns column mean as loc-vector, ok?
         mn = a.mean(0)[newaxis] + a.mean(1)[:,newaxis] - a.mean()
@@ -774,7 +774,7 @@ def _scale(a, axis):
        a : {array}
            Input data
        axis : {integer}
-           Which scaling to perform. 
+           Which scaling to perform.
            0 = column, 1 = row, -1 = nothing
 
     *Returns*:
@@ -784,7 +784,7 @@ def _scale(a, axis):
         mn : {array}
             Scaling vector/matrix
     """
-    
+
     if axis == -1:
         sc = zeros((a.shape[1],))
     elif axis == 0:
@@ -817,21 +817,20 @@ def esvd(data, a_max=None):
             Singular values
         v : {array}
             Left hand eigenvectors
-    
+
     *Notes*:
 
         Uses Anoldi iterations for the symmetric eigendecomp (ARPACK)
-    
+
     """
-    
+
     m, n = data.shape
-    if m >= n:
+    if m > n:
         kernel = dot(data.T, data)
-        
         if a_max == None:
             a_max = n - 1
         s, v = arpack.eigen_symmetric(kernel, k=a_max, which='LM',
-                                      maxiter=200, tol=1e-5)
+                                      maxiter=500, tol=1e-7)
         s = s[::-1]
         v = v[:,::-1]
         #u, s, vt = svd(kernel)
@@ -841,9 +840,9 @@ def esvd(data, a_max=None):
     else:
         kernel = dot(data, data.T)
         if a_max == None:
-            a_max = m -1
+            a_max = m - 1
         s, u = arpack.eigen_symmetric(kernel, k=a_max, which='LM',
-                                      maxiter=200, tol=1e-5)
+                                      maxiter=500, tol=1e-7)
         s = s[::-1]
         u = u[:,::-1]
         #u, s, vt = svd(kernel)
diff --git a/pyblm/models.py b/pyblm/models.py
index b21926b..42e69cd 100644
--- a/pyblm/models.py
+++ b/pyblm/models.py
@@ -21,7 +21,7 @@ class Model(object):
     def __init__(self, name="johndoe"):
         self.name = name
         self.options = {}
-        
+
     def save(self, filename='pca.ml'):
         pass
 
@@ -46,7 +46,7 @@ class PCA(Model):
         self._x = x
         self.amax = amax
         self.aopt = amax
-        
+
     # properties
     def amax():
         doc = "maximum number of components"
@@ -77,7 +77,7 @@ class PCA(Model):
             del self._tot_var
         return locals()
     tot_var = property(**tot_var())
-    
+
     def scores():
         doc = "pca scores"
         def fget(self):
@@ -94,7 +94,7 @@ class PCA(Model):
             del self._core_scores
         return locals()
     scores = property(**scores())
-    
+
     def loadings():
         doc = "pca loadings"
         def fget(self):
@@ -111,7 +111,7 @@ class PCA(Model):
             self._loadings = p
         return locals()
     loadings = property(**loadings())
-    
+
     def singvals():
         doc = "Singular values"
         def fget(self):
@@ -128,7 +128,7 @@ class PCA(Model):
             del self._singvals
         return locals()
     singvals = property(**singvals())
-    
+
     def x():
         doc = "x is readonly, may not be deleted"
         def fget(self):
@@ -154,12 +154,12 @@ class PCA(Model):
                 del self._xc
         return locals()
     xadd = property(**xadd())
-    
+
     def xc():
         doc = "mean_centered input data"
         def fget(self):
             if not hasattr(self, "_xc"):
-                 self._xc = self.x + self.xadd
+                self._xc = self.x + self.xadd
             return self._xc
         def fset(self, xc):
             self._xc = xc
@@ -186,7 +186,7 @@ class PCA(Model):
             del self._xw
         return locals()
     xw = property(**xw())
-    
+
     def explained_variance():
         doc = "explained variance"
         def fget(self):
@@ -215,7 +215,7 @@ class PCA(Model):
             del self._residuals
         return locals()
     residuals = property(**residuals())
-                
+
     def leverage():
         doc = "objects leverage"
         def fget(self):
@@ -254,7 +254,7 @@ class PCA(Model):
                 self._column_metric = scale(self.xc, axis=1)
             return self._column_metric
         def fset(self, w):
-            
+
             self._column_metric = w
             # update model
         def fdel(self):
@@ -263,7 +263,7 @@ class PCA(Model):
                 del self._xd
         return locals()
     column_metric = property(**column_metric())
-    
+
     def blm_update(self, a, b):
         pass
 
@@ -281,8 +281,8 @@ class PCA(Model):
 
     def reweight(self, w):
         pass
-    
-        
+
+
 if __name__ == "__main__":
     from numpy.random import rand
     X = rand(4,10)
diff --git a/pyblm/statistics.py b/pyblm/statistics.py
index 6d9db02..b38e881 100644
--- a/pyblm/statistics.py
+++ b/pyblm/statistics.py
@@ -22,9 +22,9 @@ def hotelling(Pcv, P, p_center='median', cov_center='median',
     used in multivariate hypothesis testing. In order to avoid small variance
     samples to become significant this version allows borrowing variance
     from the pooled covariance.
-    
+
     *Parameters*:
-    
+
         Pcv : {array}
             Crossvalidation segements of paramter
         P : {array}
@@ -39,9 +39,9 @@ def hotelling(Pcv, P, p_center='median', cov_center='median',
             Rotate sub-segments toward calibration model.
         strict : {boolean}, optional
             Only rotate 90 degree
-    
+
     *Returns*:
-    
+
        tsq : {array}
            Hotellings T^2 estimate
 
@@ -50,19 +50,19 @@ def hotelling(Pcv, P, p_center='median', cov_center='median',
         Gidskehaug et al., A framework for significance analysis of
         gene expression datausing dimension reduction methods, BMC
         bioinformatics, 2007
-        
+
     *Notes*
 
         The rotational freedom in the solution of bilinear
         models may require that a rotation onto the calibration
         model. One way of doing that is procrustes rotation.
-        
+
     """
     m, n = P.shape
     n_sets, n, amax = Pcv.shape
     T_sq = empty((n,), dtype='d')
     Cov_i = zeros((n, amax, amax), dtype='d')
-    
+
     # rotate sub_models to full model
     if crot:
         for i, Pi in enumerate(Pcv):
@@ -77,20 +77,15 @@ def hotelling(Pcv, P, p_center='median', cov_center='median',
         P_ctr = P
 
     for i in xrange(n):
-        Pi = Pcv[:,i,:] # (n_sets x amax) 
+        Pi = Pcv[:,i,:] # (n_sets x amax)
         Pi_ctr = P_ctr[i,:] # (1 x amax)
-        #Pim = (Pi - Pi_ctr)*msqrt(n_sets-1)
-        #Cov_i[i] = (1./n_sets)*dot(Pim.T, Pim)
-        Pim = (Pi - Pi_ctr)
-        Cov_i[i] = dot(Pim.T, Pim)
+        Pim = (Pi - Pi_ctr)*msqrt(n_sets-1)
+        Cov_i[i] = (1./n_sets)*dot(Pim.T, Pim)
+
     if cov_center == 'median':
         Cov_p = median(Cov_i)
-    elif cov_center == 'mean':
+    else cov_center == 'mean':
         Cov_p = Cov.mean(0)
-    else:
-        print "Pooled covariance est. invalid, using median"
-        print cov_center
-        Cov_p = median(Cov_i)
     reg_cov = (1. - alpha)*Cov_i + alpha*Cov_p
     for i in xrange(n):
         Pc = P_ctr[i,:]
@@ -105,7 +100,7 @@ def procrustes(a, b, strict=True, center=False, force_norm=False, verbose=False)
     onto another by minimising the squared error.
 
     *Parameters*:
-    
+
         a : {array}
             Input array, stationary
         b : {array}
@@ -127,9 +122,9 @@ def procrustes(a, b, strict=True, center=False, force_norm=False, verbose=False)
     *Reference*:
 
         Schonemann, A generalized solution of the orthogonal Procrustes
-        problem, Psychometrika, 1966 
+        problem, Psychometrika, 1966
     """
-    
+
     if center:
         mn_a = a.mean(0)
         a = a - mn_a
@@ -145,7 +140,7 @@ def procrustes(a, b, strict=True, center=False, force_norm=False, verbose=False)
     u, s, vt = svd(dot(b.T, a))    
     Cm = dot(u, vt) # Cm: orthogonal rotation matrix
     if strict:
-       Cm = _ensure_strict(Cm)
+        Cm = _ensure_strict(Cm)
     b_rot = dot(b, Cm)
     if verbose:
         fit = ((b - b_rot)**2).sum()
@@ -158,7 +153,7 @@ def procrustes(a, b, strict=True, center=False, force_norm=False, verbose=False)
 
 def _ensure_strict(C, only_flips=True):
     """Ensure that a rotation matrix does only 90 degree rotations.
-    
+
     In multiplication with pcs this allows flips and reordering.
     if only_flips is True there will onlt be flips allowed
 
@@ -173,13 +168,13 @@ def _ensure_strict(C, only_flips=True):
 
         C_rot : {array}
             Restricted rotation matrix
-    
+
     *Notes*:
-    
+
         This function is not ready for use. Use (only_flips=True).
         That is, for more than two components, the rotation matrix
         has a tendency to be unstable (det(Cm)>1), when rounding is used.
-    
+
     """
     if only_flips:
         C = eye(C.shape[0])*sign(C)
@@ -199,9 +194,9 @@ def lpls_qvals(X, Y, Z, aopt=None, alpha=.3, zx_alpha=.5, n_iter=20,
 
     The response (Y) is randomly permuted, and the number of false positives
     is registered by comparing hotellings T2 statistics of the calibration model.
-    
+
     *Parameters*:
-    
+
         X : {array}
             Main data matrix (m, n)
         Y : {array}
@@ -237,14 +232,14 @@ def lpls_qvals(X, Y, Z, aopt=None, alpha=.3, zx_alpha=.5, n_iter=20,
 
         nsets : {integer}
             Number of crossvalidation segements
-    
+
     *Reference*:
 
         Gidskehaug et al., A framework for significance analysis of
         gene expression data using dimension reduction methods, BMC
         bioinformatics, 2007
     """
-    
+
     m, n = X.shape
     k, nz = Z.shape
     assert(n==nz)
@@ -255,7 +250,7 @@ def lpls_qvals(X, Y, Z, aopt=None, alpha=.3, zx_alpha=.5, n_iter=20,
         Y = atleast_2d(Y).T
         my, l = Y.shape
     assert(m==my)
-    
+
     pert_tsq_x = zeros((n, n_iter), dtype='d') # (nxvars x n_subsets)
     pert_tsq_z = zeros((k, n_iter), dtype='d') # (nzvars x n_subsets)
 
@@ -264,7 +259,7 @@ def lpls_qvals(X, Y, Z, aopt=None, alpha=.3, zx_alpha=.5, n_iter=20,
     Wc, Lc = lpls_jk(X, Y, Z ,aopt, zorth=zorth)
     cal_tsq_x = hotelling(Wc, dat['W'], alpha=alpha)
     cal_tsq_z = hotelling(Lc, dat['L'], alpha=alpha)
-    print "morn"
+    
     # Perturbations
     index = arange(m)
     for i in range(n_iter):
@@ -275,7 +270,7 @@ def lpls_qvals(X, Y, Z, aopt=None, alpha=.3, zx_alpha=.5, n_iter=20,
         pert_tsq_x[:,i] = hotelling(Wi, dat['W'], alpha=alpha)
         # no reason to borrow variance in dag (alpha ->some small value)
         pert_tsq_z[:,i] = hotelling(Li, dat['L'], alpha=0.01) 
-   
+
     return cal_tsq_z, pert_tsq_z, cal_tsq_x, pert_tsq_x
 
 
@@ -286,9 +281,9 @@ def pls_qvals(X, Y, aopt, alpha=.3, n_iter=20,p_center='med', cov_center=median,
 
     The response (Y) is randomly permuted, and the number of false positives
     is registered by comparing hotellings T2 statistics of the calibration model.
-    
+
     *Parameters*:
-    
+
         X : {array}
             Main data matrix (m, n)
         Y : {array}
@@ -318,14 +313,14 @@ def pls_qvals(X, Y, aopt, alpha=.3, n_iter=20,p_center='med', cov_center=median,
             Only rotate 90 degree
         nsets : {integer}
             Number of crossvalidation segements
-    
+
     *Reference*:
 
         Gidskehaug et al., A framework for significance analysis of
         gene expression data using dimension reduction methods, BMC
         bioinformatics, 2007
     """
-    
+
     m, n = X.shape
     try:
         my, l = Y.shape
@@ -341,7 +336,7 @@ def pls_qvals(X, Y, aopt, alpha=.3, n_iter=20,p_center='med', cov_center=median,
     Wc = pls_jk(X, Y , aopt)
 
     cal_tsq_x = hotelling(Wc, dat['W'], alpha=alpha)
-    
+
     # Perturbations
     pert_tsq_x = zeros((n, n_iter), dtype='d') # (nxvars x n_subsets)
     index = arange(m)
@@ -351,7 +346,7 @@ def pls_qvals(X, Y, aopt, alpha=.3, n_iter=20,p_center='med', cov_center=median,
         dat = pls(X, Y[indi,:], aopt, scale='loads', center_axis=center_axis)
         Wi = pls_jk(X, Y[indi,:], aopt, nsets=nsets, center_axis=center_axis)
         pert_tsq_x[:,i] = hotelling(Wi, dat['W'], alpha=alpha)
-   
+
     return cal_tsq_x, pert_tsq_x
 
 
@@ -362,7 +357,7 @@ def _fdr(tsq, tsqp, loc_method=median):
     Fdr is a method used in multiple hypothesis testing to correct for multiple
     comparisons. It controls the expected proportion of incorrectly rejected null
     hypotheses (type I errors) in a list of rejected hypotheses.
-    
+
     *Parameters*:
 
         tsq : {array}
@@ -372,14 +367,14 @@ def _fdr(tsq, tsqp, loc_method=median):
 
         loc_method : {py_func}
             Location method
-    
+
     *Returns*:
 
         fdr : {array}
             False discovery rate
 
     *Notes*:
-    
+
     This is an internal function for use in fdr estimation of jack-knifed
     perturbated blm parameters.
 
@@ -403,4 +398,3 @@ def _fdr(tsq, tsqp, loc_method=median):
     fd_rate = fp/n_signif
     fd_rate[fd_rate>1] = 1
     return fd_rate
-