From 155dfada5cb9775bbd456084871d01ee683a249d Mon Sep 17 00:00:00 2001 From: flatberg Date: Mon, 23 Jul 2007 13:25:34 +0000 Subject: [PATCH] iups --- scripts/lpls/lpls.py | 20 +++++++++++++------- scripts/lpls/plots_lpls.py | 1 + scripts/lpls/rpy_go.py | 6 +++++- scripts/lpls/run_smoker.py | 21 ++++++++++++--------- 4 files changed, 31 insertions(+), 17 deletions(-) diff --git a/scripts/lpls/lpls.py b/scripts/lpls/lpls.py index 13a0294..071ce5d 100644 --- a/scripts/lpls/lpls.py +++ b/scripts/lpls/lpls.py @@ -9,7 +9,7 @@ import select_generators sys.path.remove("/home/flatberg/fluents/fluents/lib") -def nipals_lpls(X, Y, Z, a_max, alpha=.7, mean_ctr=[2, 0, 1], verbose=True): +def nipals_lpls(X, Y, Z, a_max, alpha=.7, mean_ctr=[2, 0, 1], scale='scores', verbose=True): """ L-shaped Partial Least Sqaures Regression by the nipals algorithm. (X!Z)->Y @@ -113,7 +113,11 @@ def nipals_lpls(X, Y, Z, a_max, alpha=.7, mean_ctr=[2, 0, 1], verbose=True): evx = 100.0*(1 - var_x/varX) evy = 100.0*(1 - var_y/varY) evz = 100.0*(1 - var_z/varZ) - + if scale=='loads': + tnorm = apply_along_axis(norm, 0, T) + T = T/tnorm + Q = Q*tnorm + W = W*tnorm return T, W, P, Q, U, L, K, B, b0, evx, evy, evz def svd_lpls(X, Y, Z, a_max, alpha=.7, mean_ctr=[2, 0, 1], verbose=True): @@ -307,7 +311,8 @@ def center(a, axis): # 0 = col center, 1 = row center, 2 = double center # -1 = nothing if axis==-1: - return a + mn = zeros((a.shape[1],)) + return a - mn, mn elif axis==0: mn = a.mean(0) return a - mn, mn @@ -364,14 +369,14 @@ def correlation_loadings(D, T, P, test=True): def cv_lpls(X, Y, Z, a_max=2, nsets=None,alpha=.5): """Performs crossvalidation to get generalisation error in lpls""" - cv_iter = select_generators.pls_gen(X, Y, n_blocks=nsets,center=True,index_out=True) + cv_iter = select_generators.pls_gen(X, Y, n_blocks=nsets,center=False,index_out=True) k, l = Y.shape Yhat = empty((a_max,k,l), 'd') for i, (xcal,xi,ycal,yi,ind) in enumerate(cv_iter): T, W, P, Q, U, L, K, B, b0, evx, evy, evz = nipals_lpls(xcal,ycal,Z, a_max=a_max, alpha=alpha, - mean_ctr=[0,0,1], + mean_ctr=[2,0,1], verbose=False) for a in range(a_max): Yhat[a,ind,:] = b0[a][0][0] + dot(xi, B[a]) @@ -385,7 +390,7 @@ def cv_lpls(X, Y, Z, a_max=2, nsets=None,alpha=.5): return rmsep, Yhat, class_err def jk_lpls(X, Y, Z, a_max, nsets=None, alpha=.5): - cv_iter = select_generators.pls_gen(X, Y, n_blocks=nsets,center=True,index_out=False) + cv_iter = select_generators.pls_gen(X, Y, n_blocks=nsets,center=False,index_out=False) m, n = X.shape k, l = Y.shape o, p = Z.shape @@ -398,7 +403,8 @@ def jk_lpls(X, Y, Z, a_max, nsets=None, alpha=.5): T, W, P, Q, U, L, K, B, b0, evx, evy, evz = nipals_lpls(xcal,ycal,Z, a_max=a_max, alpha=alpha, - mean_ctr=[0,0,1], + mean_ctr=[2,0,1], + scale='loads', verbose=False) WWx[i,:,:] = W WWz[i,:,:] = L diff --git a/scripts/lpls/plots_lpls.py b/scripts/lpls/plots_lpls.py index a350b86..bb1bc1b 100644 --- a/scripts/lpls/plots_lpls.py +++ b/scripts/lpls/plots_lpls.py @@ -39,6 +39,7 @@ def plot_corrloads(R, pc1=0,pc2=1,s=20, c='b', zorder=5,expvar=None,ax=None,draw #pylab.show() def plot_dag(edge_dict, node_color='b', node_size=30,labels=None,nodelist=None,pos=None): + # networkx does not play well with colon in node names clean_edges = {} for head, neigb in edge_dict.items(): head = head.replace(":", "_") diff --git a/scripts/lpls/rpy_go.py b/scripts/lpls/rpy_go.py index 14ada26..787700a 100644 --- a/scripts/lpls/rpy_go.py +++ b/scripts/lpls/rpy_go.py @@ -1,6 +1,5 @@ """ Module for Gene ontology related functions called in R""" import scipy - import rpy silent_eval = rpy.with_mode(rpy.NO_CONVERSION, rpy.r) @@ -126,3 +125,8 @@ def parents_dag(go_terms, ontology=['BP']): else: edge_dict[nn] = [head] return edge_dict + +def gene_GO_hypergeo_test(genelist, universe, ontology = ['BP']): + + pvals = geneGoHyperGeoTest(entrezGeneIds, lib=None, ontology=ontology[0], universe=universe) + return pvals diff --git a/scripts/lpls/run_smoker.py b/scripts/lpls/run_smoker.py index 3f152b9..36deb09 100644 --- a/scripts/lpls/run_smoker.py +++ b/scripts/lpls/run_smoker.py @@ -32,14 +32,14 @@ print "SAM done" qq = rpy.r('qobj<-qvalue(sam.out@p.value)') qvals = asarray(qq['qvalues']) # cut off -co = 0.1 +co = 0.001 index = where(qvals<0.01)[0] # Subset data X = DX.asarray() Xr = X[:,index] gene_ids = DX.get_identifiers('gene_ids', index) - +print "\nWorkiing on subset with %s genes " %len(gene_ids) ### Build GO data #### print "Go terms ..." @@ -48,13 +48,15 @@ terms = set() for t in goterms.values(): terms.update(t) terms = list(terms) +print "Number of go-terms: %s" %len(terms) rpy.r.library("GOSim") # Go-term similarity matrix methods = ("JiangConrath","Resnik","Lin","CoutoEnriched","CoutoJiangConrath","CoutoResnik","CoutoLin") -meth = methods[2] +meth = methods[0] print "Term-term similarity matrix (method = %s)" %meth if meth=="CoutoEnriched": rpy.r('setEnrichmentFactors(alpha=0.1,beta=0.5)') +print "Calculating term-term similarity matrix" tmat = rpy.r.getTermSim(terms, verbose=False, method=meth) # check if all terms where found nanindex = where(isnan(tmat[:,0]))[0] @@ -93,20 +95,21 @@ gene_ids = asarray(gene_ids)[newind] print "LPLSR ..." a_max = 5 aopt = 2 -alpha=.5 +alpha=.6 T, W, P, Q, U, L, K, B, b0, evx, evy, evz = nipals_lpls(Xr,Y,Z, a_max, alpha) # Correlation loadings -dx,Rx,ssx= correlation_loadings(Xr, T, P) -dx,Ry,ssx= correlation_loadings(Y, T, Q) -cadx,Rz,ssx= correlation_loadings(Z.T, K, L) +dx,Rx,rssx = correlation_loadings(Xr, T, P) +dx,Ry,rssy = correlation_loadings(Y, T, Q) +cadz,Rz,rssz = correlation_loadings(Z.T, W, L) # Prediction error rmsep , yhat, class_error = cv_lpls(Xr, Y, Z, a_max, alpha=alpha) # Significance Hotellings T Wx, Wz, Wy, = jk_lpls(Xr, Y, Z, aopt) -tsqx = cx_stats.hotelling(Wx,W[:,:aopt]) -tsqz = cx_stats.hotelling(Wz,L[:,:aopt]) +Ws = W*apply_along_axis(norm, 0, T) +tsqx = cx_stats.hotelling(Wx, Ws[:,:aopt]) +tsqz = cx_stats.hotelling(Wz, L[:,:aopt]) ## plots ##