Mostly clean ups

2007-11-27 15:05:19 +00:00
parent 2951ca4088
commit 4c809674bb
2 changed files with 98 additions and 84 deletions
--- a/pyblm/crossvalidation.py
+++ b/pyblm/crossvalidation.py
@ -12,7 +12,7 @@ from numpy.random import shuffle
 from engines import nipals_lpls as lpls


-def lpls_val(X, Y, Z, a_max=2, nsets=None,alpha=.5, mean_ctr=[2,0,2], zorth=False, verbose=True):
+def lpls_val(X, Y, Z, a_max=2, nsets=None,alpha=.5, mean_ctr=[2,0,2], zorth=False, verbose=False):
    """Performs crossvalidation for generalisation error in lpls.

    The L-PLS crossvalidation is estimated just like an ordinary pls
@ -80,11 +80,11 @@ def lpls_val(X, Y, Z, a_max=2, nsets=None,alpha=.5, mean_ctr=[2,0,2], zorth=Fals
        if mean_ctr[0] != 1:
            xi = X[val,:] - dat['mnx']
        else:
-            xi = X[val] - X[val].mean(1)[:,newaxis]
+            xi = X[val] - X[cal].mean(1)[:,newaxis]
        if mean_ctr[2] != 1:
            ym = dat['mny']
        else:
-            ym = Y[val].mean(1)[:,newaxis] #???: check this
+            ym = Y[cal].mean(1)[:,newaxis]
        # predictions
        for a in range(a_max):
            Yhat[a,val,:] = atleast_2d(ym + dot(xi, dat['B'][a]))
@ -113,7 +113,7 @@ def lpls_val(X, Y, Z, a_max=2, nsets=None,alpha=.5, mean_ctr=[2,0,2], zorth=Fals
 def pca_jk(a, aopt, n_blocks=None):
    """Returns jack-knife segements from PCA.
    
-    Parameters:
+    *Parameters*:

        a : {array}
            data matrix (n x m)
@ -122,21 +122,15 @@ def pca_jk(a, aopt, n_blocks=None):
        nsets : {integer}
            number of segments

-    Returns:
+    *Returns*:

        Pcv : {array}
            Loadings collected in a three way matrix (n_segments, m, aopt)

-    Notes:
-    
-        - The loadings are scaled with the (1/samples)*eigenvalues.
+    *Notes*:
    
        - Crossvalidation method is currently set to random blocks of samples.
        
-        - todo: add support for T
-
-        - fixme: more efficient to add this in validation loop?
-        
    """
    if nsets == None:
        nsets = a.shape[0]
@ -305,6 +299,7 @@ def cv(N, K, randomise=True, sequential=False):
    of length ~N/K, *without* replacement.

    *Parameters*:
+    
        N : {integer}
            Total number of samples
        K : {integer}
--- a/pyblm/engines.py
+++ b/pyblm/engines.py
@ -12,13 +12,14 @@ minimum
 from numpy.linalg import inv,svd
 from scipy.sandbox import arpack

+
 def pca(X, aopt, scale='scores', mode='normal', center_axis=0):
    """ Principal Component Analysis.

    PCA is a low rank bilinear aprroximation to a data matrix that sequentially
    extracts orthogonal components of maximum variance.

-    Parameters:
+    *Parameters*:
    
        X : {array}
            Data measurement matrix, (samples x variables)
@ -27,7 +28,7 @@ def pca(X, aopt, scale='scores', mode='normal', center_axis=0):
        center_axis : {integer}
            Center along given axis. If neg.: no centering (-inf,..., matrix modes)

-    Returns:
+    *Returns*:

        T : {array}
            Scores, (samples, components)
@ -47,7 +48,7 @@ def pca(X, aopt, scale='scores', mode='normal', center_axis=0):
        leverage : {array}
            Leverages, (samples,)

-    OtherParameters:
+    *OtherParameters*:

        scale : {string}, optional
             Where to put the weights [['scores'], 'loadings']
@ -55,7 +56,7 @@ def pca(X, aopt, scale='scores', mode='normal', center_axis=0):
            Amount of info retained, [['normal'], 'fast', 'detailed']

  
-    :SeeAlso:
+    *SeeAlso*:

        `center` : Data centering
        
@ -78,9 +79,11 @@ def pca(X, aopt, scale='scores', mode='normal', center_axis=0):
    """
    
    m, n = X.shape
-    assert(aopt<=min(m,n))
+    min_aopt = min(m, n)
    if center_axis >= 0:
        X = X - expand_dims(X.mean(center_axis), center_axis)
+        min_aopt = min_aopt - 1
+    assert(aopt <= min_aopt)
    if m > (n+100) or n > (m+100):
        u, s, v = esvd(X, aopt)
    else:
@ -139,7 +142,7 @@ def pcr(a, b, aopt, scale='scores',mode='normal',center_axis=0):

    Performs PCR on given matrix and returns results in a dictionary.

-    Parameters:
+    *Parameters*:

        a : array
        Data measurement matrix, (samples x variables)
@ -148,18 +151,18 @@ def pcr(a, b, aopt, scale='scores',mode='normal',center_axis=0):
        aopt : int
        Number of components to use, aopt<=min(samples, variables)

-    Returns:
+    *Returns*:

    results : dict
        keys -- values,  T -- scores, P -- loadings, E -- residuals,
        levx -- leverages, ssqx -- sum of squares, expvarx -- cumulative
        explained variance, aopt -- number of components used
    
-    OtherParameters:
+    *OtherParameters*:

-    mode : str
+        mode : {string}
            Amount of info retained, ('fast', 'normal', 'detailed')
-    center_axis : int
+        center_axis : {integer}
            Center along given axis. If neg.: no centering (-inf,..., matrix modes)
  
    SeeAlso:
@ -284,7 +287,7 @@ def pls(X, Y, aopt=2, scale='scores', mode='normal', center_axis=-1):
    
    *SeeAlso*:
    
-        `center` : data centering
+        `center` - data centering
 
    *Notes*

@ -311,13 +314,15 @@ def pls(X, Y, aopt=2, scale='scores', mode='normal', center_axis=-1):
        Y = atleast_2d(Y).T
        k, l = Y.shape
    assert(m == k)
-    assert(aopt<min(m, n))
    mnx, mny = 0, 0
+    min_aopt = min(m, n)
    if center_axis >= 0:
        mnx = expand_dims(X.mean(center_axis), center_axis)
        X = X - mnx
+        min_aopt = min_aopt - 1
        mny = expand_dims(Y.mean(center_axis), center_axis)
        Y = Y - mny 
+    assert(aopt > 0 and aopt < min_aopt)
    
    W = empty((n, aopt))
    P = empty((n, aopt))
@ -356,7 +361,7 @@ def pls(X, Y, aopt=2, scale='scores', mode='normal', center_axis=-1):
        T[:,i] = t.ravel()
        W[:,i] = w.ravel()

-        if mode=='fast' and i==aopt-1:
+        if mode == 'fast' and i == (aopt - 1):
            if scale == 'loads':
                tnorm = sqrt(tt)
                T = T/tnorm
@ -495,7 +500,7 @@ def nipals_lpls(X, Y, Z, a_max, alpha=.7, mean_ctr=[2, 0, 2], scale='scores', zo
    m, n = X.shape
    k, l = Y.shape
    u, o = Z.shape
-    max_rank = min(m, n)
+    max_rank = min(m, n) + 1
    assert (a_max > 0 and a_max < max_rank), "Number of comp error:\
    tried: %d, max_rank: %d" %(a_max, max_rank)
    
@ -617,6 +622,20 @@ def nipals_lpls(X, Y, Z, a_max, alpha=.7, mean_ctr=[2, 0, 2], scale='scores', zo
    
    return {'T':T, 'W':W, 'P':P, 'Q':Q, 'U':U, 'L':L, 'K':K, 'B':B, 'E': E, 'F': F, 'G': G, 'evx':evx, 'evy':evy, 'evz':evz,'mnx': mnX, 'mny': mnY, 'mnz': mnZ}    

+def lpls_predict(model_dict, x, aopt):
+    """Predict lpls reponses from existing model on new data.
+    """
+    try:
+        m, n = x.shape
+    except:
+        x = atleast_2d(x.shape)
+        m, n = x.shape
+    
+    if 'B0' in model_dict.keys():
+        y = model_dict['B0'] + dot()
+        
+    
+
 def vnorm(a):
    """Returns the norm of a vector.

@ -714,19 +733,19 @@ def _scale(a, axis):
    return a - sc, sc

 def esvd(data, a_max=None):
-    """ SVD with kernel calculation
+    """SVD with kernel calculation.

    Calculate subspaces of X'X or XX' depending on the shape
    of the matrix.

-    Parameters:
+    *Parameters*:

        data : {array}
            Data matrix
        a_max : {integer}
            Number of components to extract

-    Returns:
+    *Returns*:

        u : {array}
            Right hand eigenvectors
@ -735,9 +754,9 @@ def esvd(data, a_max=None):
        v : {array}
            Left hand eigenvectors
    
-    notes:
+    *Notes*:

-        Uses Anoldi iterations (ARPACK)
+        Uses Anoldi iterations for the symmetric eigendecomp (ARPACK)
    
    """