From 2951ca40882b8098c8841246ef60db742f74d9cc Mon Sep 17 00:00:00 2001 From: flatberg Date: Mon, 26 Nov 2007 15:30:52 +0000 Subject: [PATCH] A few updates --- pyblm/crossvalidation.py | 30 ++++++++++++---- pyblm/engines.py | 35 ++++++++++++------ pyblm/models.py | 78 ++++++++++++++++++++++------------------ pyblm/statistics.py | 24 +++++++++---- 4 files changed, 108 insertions(+), 59 deletions(-) diff --git a/pyblm/crossvalidation.py b/pyblm/crossvalidation.py index ab7aded..762f59b 100644 --- a/pyblm/crossvalidation.py +++ b/pyblm/crossvalidation.py @@ -12,7 +12,7 @@ from numpy.random import shuffle from engines import nipals_lpls as lpls -def lpls_val(X, Y, Z, a_max=2, nsets=None,alpha=.5, mean_ctr=[2,0,2],verbose=True): +def lpls_val(X, Y, Z, a_max=2, nsets=None,alpha=.5, mean_ctr=[2,0,2], zorth=False, verbose=True): """Performs crossvalidation for generalisation error in lpls. The L-PLS crossvalidation is estimated just like an ordinary pls @@ -42,6 +42,8 @@ def lpls_val(X, Y, Z, a_max=2, nsets=None,alpha=.5, mean_ctr=[2,0,2],verbose=Tru 0 : row center 1 : column center 2 : double center + zorth : {boolean} + If true, Require orthogonal latent components in Z. verbose : {boolean}, optional Verbosity of console output. For use in debugging. @@ -70,7 +72,11 @@ def lpls_val(X, Y, Z, a_max=2, nsets=None,alpha=.5, mean_ctr=[2,0,2],verbose=Tru Yhat = empty((a_max, k, l), 'd') for cal, val in cv(nsets, k): - dat = lpls(X[cal],Y[cal],Z,a_max=a_max,alpha=alpha,mean_ctr=mean_ctr,verbose=verbose) + # do the training model + dat = lpls(X[cal], Y[cal], Z, a_max=a_max, alpha=alpha, + mean_ctr=mean_ctr, zorth=zorth, verbose=verbose) + + # center test data if mean_ctr[0] != 1: xi = X[val,:] - dat['mnx'] else: @@ -79,14 +85,24 @@ def lpls_val(X, Y, Z, a_max=2, nsets=None,alpha=.5, mean_ctr=[2,0,2],verbose=Tru ym = dat['mny'] else: ym = Y[val].mean(1)[:,newaxis] #???: check this + # predictions for a in range(a_max): Yhat[a,val,:] = atleast_2d(ym + dot(xi, dat['B'][a])) + #if permute: + # xcal = X[cal] + # for a in range(1,a_max,1): + # for n in range(10): + # shuffle(cal) + # dat = lpls(xcal, Y[cal], Z, a_max=a_max, alpha=alpha, + # mean_ctr=mean_ctr, verbose=verbose) + # todo: need a better support for classification error y_is_class = Y.dtype.char.lower() in ['i','p', 'b', 'h','?'] if y_is_class: - Yhat, err = class_error(Yhat,Y) - return Yhat, err + pass + #Yhat, err = class_error(Yhat, Y) + #return Yhat, err sep = (Y - Yhat)**2 rmsep = sqrt(sep.mean(1)).T @@ -317,8 +333,8 @@ def cv(N, K, randomise=True, sequential=False): otherwise interleaved ordering is used. """ - if K>N: - raise ValueError, "You cannot divide a list of %d samples into more than %d segments. Yout tried: %s" %(N,N,K) + if N>K: + raise ValueError, "You cannot divide a list of %d samples into more than %d segments. Yout tried: %s" %(K, K, N) index = xrange(N) if randomise: from random import shuffle @@ -371,7 +387,7 @@ def class_error(Yhat, Y, method='vanilla'): Yhat_c = zeros((k, l), dtype='d') for a in range(a_opt): for i in range(k): - Yhat_c[a,val,argmax(Yhat[a,val,:])] = 1.0 + Yhat_c[a, val, argmax(Yhat[a,val,:])] = 1.0 err = 100*((Yhat_c + Y) == 2).sum(1)/Y.sum(0).astype('d') return Yhat_c, err diff --git a/pyblm/engines.py b/pyblm/engines.py index 5c224ab..878b4ca 100644 --- a/pyblm/engines.py +++ b/pyblm/engines.py @@ -411,7 +411,7 @@ def pls(X, Y, aopt=2, scale='scores', mode='normal', center_axis=-1): 'evx': expvarx, 'evy': expvary, 'ssqx': ssqx, 'ssqy': ssqy, 'leverage': leverage, 'mnx': mnx, 'mny': mny} -def nipals_lpls(X, Y, Z, a_max, alpha=.7, mean_ctr=[2, 0, 1], scale='scores', verbose=False): +def nipals_lpls(X, Y, Z, a_max, alpha=.7, mean_ctr=[2, 0, 2], scale='scores', zorth = False, verbose=False): """ L-shaped Partial Least Sqaures Regression by the nipals algorithm. An L-shaped low rank model aproximates three matrices in a hyploid @@ -475,10 +475,14 @@ def nipals_lpls(X, Y, Z, a_max, alpha=.7, mean_ctr=[2, 0, 1], scale='scores', ve scale : {'scores', 'loads'}, optional Option to decide on where the scale goes. + zorth : {False, boolean}, optional + Option to force orthogonality between latent components + in Z verbose : {boolean}, optional Verbosity of console output. For use in debugging. *References* + Saeboe et al., LPLS-regression: a method for improved prediction and classification through inclusion of background information on predictor variables, J. of chemometrics and intell. laboratory syst. @@ -522,18 +526,22 @@ def nipals_lpls(X, Y, Z, a_max, alpha=.7, mean_ctr=[2, 0, 1], scale='scores', ve var_y = empty((a_max,)) var_z = empty((a_max,)) - MAX_ITER = 450 + MAX_ITER = 4500 LIM = finfo(X.dtype).resolution is_rd = False for a in range(a_max): if verbose: print "\nWorking on comp. %s" %a u = F[:,:1] + w = E[:1,:].T + l = G[:,:1] diff = 1 niter = 0 while (diff>LIM and niter1), when rounding is used. """ if only_flips: @@ -279,6 +281,16 @@ def _fdr(tsq, tsqp, loc_method=median): fdr : {array} False discovery rate + *Notes*: + + This is an internal function for use in fdr estimation of jack-knifed + perturbated blm parameters. + + + *Reference*: + Gidskehaug et al., A framework for significance analysis of + gene expression data using dimension reduction methods, BMC + bioinformatics, 2007 """ n, = tsq.shape k, m = tsqp.shape