Mostly clean ups

2007-11-27 15:05:19 +00:00
parent 2951ca4088
commit 4c809674bb
2 changed files with 98 additions and 84 deletions
--- a/pyblm/crossvalidation.py
+++ b/pyblm/crossvalidation.py
@@ -12,7 +12,7 @@ from numpy.random import shuffle
 from engines import nipals_lpls as lpls
-def lpls_val(X, Y, Z, a_max=2, nsets=None,alpha=.5, mean_ctr=[2,0,2], zorth=False, verbose=True):
+def lpls_val(X, Y, Z, a_max=2, nsets=None,alpha=.5, mean_ctr=[2,0,2], zorth=False, verbose=False):
    """Performs crossvalidation for generalisation error in lpls.
    The L-PLS crossvalidation is estimated just like an ordinary pls
@@ -80,11 +80,11 @@ def lpls_val(X, Y, Z, a_max=2, nsets=None,alpha=.5, mean_ctr=[2,0,2], zorth=Fals
        if mean_ctr[0] != 1:
            xi = X[val,:] - dat['mnx']
        else:
-            xi = X[val] - X[val].mean(1)[:,newaxis]
+            xi = X[val] - X[cal].mean(1)[:,newaxis]
        if mean_ctr[2] != 1:
            ym = dat['mny']
        else:
-            ym = Y[val].mean(1)[:,newaxis] #???: check this
+            ym = Y[cal].mean(1)[:,newaxis]
        # predictions
        for a in range(a_max):
            Yhat[a,val,:] = atleast_2d(ym + dot(xi, dat['B'][a]))
@@ -113,7 +113,7 @@ def lpls_val(X, Y, Z, a_max=2, nsets=None,alpha=.5, mean_ctr=[2,0,2], zorth=Fals
 def pca_jk(a, aopt, n_blocks=None):
    """Returns jack-knife segements from PCA.
-    Parameters:
+    *Parameters*:
        a : {array}
            data matrix (n x m)
@@ -122,21 +122,15 @@ def pca_jk(a, aopt, n_blocks=None):
        nsets : {integer}
            number of segments
-    Returns:
+    *Returns*:
        Pcv : {array}
            Loadings collected in a three way matrix (n_segments, m, aopt)
-    Notes:
+    *Notes*:
        - The loadings are scaled with the (1/samples)*eigenvalues.
        - Crossvalidation method is currently set to random blocks of samples.
        - todo: add support for T
        - fixme: more efficient to add this in validation loop?
    """
    if nsets == None:
        nsets = a.shape[0]
@@ -305,6 +299,7 @@ def cv(N, K, randomise=True, sequential=False):
    of length ~N/K, *without* replacement.
    *Parameters*:
        N : {integer}
            Total number of samples
        K : {integer}
--- a/pyblm/engines.py
+++ b/pyblm/engines.py
@@ -12,13 +12,14 @@ minimum
 from numpy.linalg import inv,svd
 from scipy.sandbox import arpack
 def pca(X, aopt, scale='scores', mode='normal', center_axis=0):
    """ Principal Component Analysis.
    PCA is a low rank bilinear aprroximation to a data matrix that sequentially
    extracts orthogonal components of maximum variance.
-    Parameters:
+    *Parameters*:
        X : {array}
            Data measurement matrix, (samples x variables)
@@ -27,7 +28,7 @@ def pca(X, aopt, scale='scores', mode='normal', center_axis=0):
        center_axis : {integer}
            Center along given axis. If neg.: no centering (-inf,..., matrix modes)
-    Returns:
+    *Returns*:
        T : {array}
            Scores, (samples, components)
@@ -47,7 +48,7 @@ def pca(X, aopt, scale='scores', mode='normal', center_axis=0):
        leverage : {array}
            Leverages, (samples,)
-    OtherParameters:
+    *OtherParameters*:
        scale : {string}, optional
             Where to put the weights [['scores'], 'loadings']
@@ -55,7 +56,7 @@ def pca(X, aopt, scale='scores', mode='normal', center_axis=0):
            Amount of info retained, [['normal'], 'fast', 'detailed']
-    :SeeAlso:
+    *SeeAlso*:
        `center` : Data centering
@@ -78,9 +79,11 @@ def pca(X, aopt, scale='scores', mode='normal', center_axis=0):
    """
    m, n = X.shape
-    assert(aopt<=min(m,n))
+    min_aopt = min(m, n)
    if center_axis >= 0:
        X = X - expand_dims(X.mean(center_axis), center_axis)
        min_aopt = min_aopt - 1
    assert(aopt <= min_aopt)
    if m > (n+100) or n > (m+100):
        u, s, v = esvd(X, aopt)
    else:
@@ -139,7 +142,7 @@ def pcr(a, b, aopt, scale='scores',mode='normal',center_axis=0):
    Performs PCR on given matrix and returns results in a dictionary.
-    Parameters:
+    *Parameters*:
        a : array
        Data measurement matrix, (samples x variables)
@@ -148,18 +151,18 @@ def pcr(a, b, aopt, scale='scores',mode='normal',center_axis=0):
        aopt : int
        Number of components to use, aopt<=min(samples, variables)
-    Returns:
+    *Returns*:
    results : dict
        keys -- values,  T -- scores, P -- loadings, E -- residuals,
        levx -- leverages, ssqx -- sum of squares, expvarx -- cumulative
        explained variance, aopt -- number of components used
-    OtherParameters:
+    *OtherParameters*:
-    mode : str
+        mode : {string}
            Amount of info retained, ('fast', 'normal', 'detailed')
-    center_axis : int
+        center_axis : {integer}
            Center along given axis. If neg.: no centering (-inf,..., matrix modes)
    SeeAlso:
@@ -284,7 +287,7 @@ def pls(X, Y, aopt=2, scale='scores', mode='normal', center_axis=-1):
    *SeeAlso*:
-        `center` : data centering
+        `center` - data centering
    *Notes*
@@ -311,13 +314,15 @@ def pls(X, Y, aopt=2, scale='scores', mode='normal', center_axis=-1):
        Y = atleast_2d(Y).T
        k, l = Y.shape
    assert(m == k)
    assert(aopt<min(m, n))
    mnx, mny = 0, 0
    min_aopt = min(m, n)
    if center_axis >= 0:
        mnx = expand_dims(X.mean(center_axis), center_axis)
        X = X - mnx
        min_aopt = min_aopt - 1
        mny = expand_dims(Y.mean(center_axis), center_axis)
        Y = Y - mny 
    assert(aopt > 0 and aopt < min_aopt)
    W = empty((n, aopt))
    P = empty((n, aopt))
@@ -356,7 +361,7 @@ def pls(X, Y, aopt=2, scale='scores', mode='normal', center_axis=-1):
        T[:,i] = t.ravel()
        W[:,i] = w.ravel()
-        if mode=='fast' and i==aopt-1:
+        if mode == 'fast' and i == (aopt - 1):
            if scale == 'loads':
                tnorm = sqrt(tt)
                T = T/tnorm
@@ -495,7 +500,7 @@ def nipals_lpls(X, Y, Z, a_max, alpha=.7, mean_ctr=[2, 0, 2], scale='scores', zo
    m, n = X.shape
    k, l = Y.shape
    u, o = Z.shape
-    max_rank = min(m, n)
+    max_rank = min(m, n) + 1
    assert (a_max > 0 and a_max < max_rank), "Number of comp error:\
    tried: %d, max_rank: %d" %(a_max, max_rank)
@@ -617,6 +622,20 @@ def nipals_lpls(X, Y, Z, a_max, alpha=.7, mean_ctr=[2, 0, 2], scale='scores', zo
    return {'T':T, 'W':W, 'P':P, 'Q':Q, 'U':U, 'L':L, 'K':K, 'B':B, 'E': E, 'F': F, 'G': G, 'evx':evx, 'evy':evy, 'evz':evz,'mnx': mnX, 'mny': mnY, 'mnz': mnZ}    
 def lpls_predict(model_dict, x, aopt):
    """Predict lpls reponses from existing model on new data.
    """
    try:
        m, n = x.shape
    except:
        x = atleast_2d(x.shape)
        m, n = x.shape
    if 'B0' in model_dict.keys():
        y = model_dict['B0'] + dot()
 def vnorm(a):
    """Returns the norm of a vector.
@@ -714,19 +733,19 @@ def _scale(a, axis):
    return a - sc, sc
 def esvd(data, a_max=None):
-    """ SVD with kernel calculation
+    """SVD with kernel calculation.
    Calculate subspaces of X'X or XX' depending on the shape
    of the matrix.
-    Parameters:
+    *Parameters*:
        data : {array}
            Data matrix
        a_max : {integer}
            Number of components to extract
-    Returns:
+    *Returns*:
        u : {array}
            Right hand eigenvectors
@@ -735,9 +754,9 @@ def esvd(data, a_max=None):
        v : {array}
            Left hand eigenvectors
-    notes:
+    *Notes*:
-        Uses Anoldi iterations (ARPACK)
+        Uses Anoldi iterations for the symmetric eigendecomp (ARPACK)
    """