From 438e7cb91851017dad745b0f39d2e9fbb9012894 Mon Sep 17 00:00:00 2001 From: flatberg Date: Thu, 2 Aug 2007 11:19:16 +0000 Subject: [PATCH] Irrelevant play --- scripts/lpls/lpls.py | 8 ++++---- scripts/lpls/rpy_go.py | 17 ++++++++++++++--- scripts/lpls/run_smoker.py | 39 +++++++++++++++++++------------------- 3 files changed, 38 insertions(+), 26 deletions(-) diff --git a/scripts/lpls/lpls.py b/scripts/lpls/lpls.py index 570dba4..5630d6d 100644 --- a/scripts/lpls/lpls.py +++ b/scripts/lpls/lpls.py @@ -365,7 +365,7 @@ def correlation_loadings(D, T, P, test=True): -def cv_lpls(X, Y, Z, a_max=2, nsets=None,alpha=.5): +def cv_lpls(X, Y, Z, a_max=2, nsets=None,alpha=.5, mean_ctr=[2,0,1]): """Performs crossvalidation to get generalisation error in lpls""" cv_iter = select_generators.pls_gen(X, Y, n_blocks=nsets,center=False,index_out=True) k, l = Y.shape @@ -374,7 +374,7 @@ def cv_lpls(X, Y, Z, a_max=2, nsets=None,alpha=.5): T, W, P, Q, U, L, K, B, b0, evx, evy, evz = nipals_lpls(xcal,ycal,Z, a_max=a_max, alpha=alpha, - mean_ctr=[2,0,1], + mean_ctr=mean_ctr, verbose=False) for a in range(a_max): Yhat[a,ind,:] = b0[a][0][0] + dot(xi, B[a]) @@ -387,7 +387,7 @@ def cv_lpls(X, Y, Z, a_max=2, nsets=None,alpha=.5): rmsep = sqrt(sep.mean(1)) return rmsep, Yhat, class_err -def jk_lpls(X, Y, Z, a_max, nsets=None, alpha=.5): +def jk_lpls(X, Y, Z, a_max, nsets=None, alpha=.5, mean_ctr=[2,0,1]): cv_iter = select_generators.pls_gen(X, Y, n_blocks=nsets,center=False,index_out=False) m, n = X.shape k, l = Y.shape @@ -401,7 +401,7 @@ def jk_lpls(X, Y, Z, a_max, nsets=None, alpha=.5): T, W, P, Q, U, L, K, B, b0, evx, evy, evz = nipals_lpls(xcal,ycal,Z, a_max=a_max, alpha=alpha, - mean_ctr=[2,0,1], + mean_ctr=mean_ctr, scale='loads', verbose=False) WWx[i,:,:] = W diff --git a/scripts/lpls/rpy_go.py b/scripts/lpls/rpy_go.py index 713c6e5..9dda37c 100644 --- a/scripts/lpls/rpy_go.py +++ b/scripts/lpls/rpy_go.py @@ -152,7 +152,18 @@ def parents_dag(go_terms, ontology=['BP']): edge_dict[nn] = [head] return edge_dict -def gene_GO_hypergeo_test(genelist, universe, ontology = ['BP']): +def gene_GO_hypergeo_test(genelist,universe="entrezUniverse",ontology="BP",chip = "hgu133a",pval_cutoff=0.01,cond=False,test_direction="over"): - pvals = geneGoHyperGeoTest(entrezGeneIds, lib=None, ontology=ontology[0], universe=universe) - return pvals + #assert(scipy.alltrue([True for i in genelist if i in universe])) + universeGeneIds=universe + params = rpy.r.new("GOHyperGParams", + geneIds=genelist, + annotation="hgu133a", + ontology=ontology, + pvalueCutoff=pval_cutoff, + conditional=cond, + testDirection=test_direction + ) + result = rpy.r.summary(rpy.r.hyperGTest(params)) + + return rpy.r.summary(result), params diff --git a/scripts/lpls/run_smoker.py b/scripts/lpls/run_smoker.py index f1d4e86..a817eae 100644 --- a/scripts/lpls/run_smoker.py +++ b/scripts/lpls/run_smoker.py @@ -12,7 +12,7 @@ from plots_lpls import plot_corrloads ######## DATA ########## # full smoker data -DX = dataset.read_ftsv(open("../../data/smokers-full/Xfull.ftsv")) +DX = dataset.read_ftsv(open("../../data/smokers-full/Smokers.ftsv")) DY = dataset.read_ftsv(open("../../data/smokers-full/Yg.ftsv")) Y = DY.asarray().astype('d') # select subset genes by SAM @@ -32,7 +32,7 @@ print "SAM done" qq = rpy.r('qobj<-qvalue(sam.out@p.value)') qvals = asarray(qq['qvalues']) # cut off -cutoff = 2 +cutoff = 0.05 index = where(qvals0: # Z-matrix #Z, newind = rpy_go.genego_matrix(terms, tmat, gene_ids, terms,func=mean) #Z = Z.T -Z1 = rpy_go.genego_sim(gene2goterms,gene_ids,terms,rpytmat1,go_term_sim="OA",term_sim=meth) +Z = rpy_go.genego_sim(gene2goterms,gene_ids,terms,rpytmat1,go_term_sim="OA",term_sim=meth) #### do another -meth = methods[4] -rpytmat = rpy.with_mode(rpy.NO_CONVERSION, rpy.r.getTermSim)(terms, method=meth,verbose=False) -tmat = rpy.r.assign("haha", rpytmat) +#meth = methods[4] +#rpytmat = rpy.with_mode(rpy.NO_CONVERSION, rpy.r.getTermSim)(terms, method=meth,verbose=False) +#tmat = rpy.r.assign("haha", rpytmat) # check if all terms where found -nanindex = where(isnan(tmat[:,0]))[0] -if len(nanindex)>0: - raise valueError("NANs in tmat") +#nanindex = where(isnan(tmat[:,0]))[0] +#if len(nanindex)>0: +# raise valueError("NANs in tmat") # Z-matrix #Z, newind = rpy_go.genego_matrix(terms, tmat, gene_ids, terms,func=mean) #Z = Z.T -Z = rpy_go.genego_sim(gene2goterms,gene_ids,terms,rpytmat,go_term_sim="OA",term_sim=meth) +#Z = rpy_go.genego_sim(gene2goterms,gene_ids,terms,rpytmat,go_term_sim="OA",term_sim=meth) @@ -105,17 +105,20 @@ Xr = X[:,newind] ######## LPLSR ######## print "LPLSR ..." a_max = 5 -aopt = 2 -alpha=.6 -T, W, P, Q, U, L, K, B, b0, evx, evy, evz = nipals_lpls(Xr,Y,Z, a_max, alpha) +aopt = 3 +alpha=.4 +mean_ctr = [2, 0, 1] +T, W, P, Q, U, L, K, B, b0, evx, evy, evz = nipals_lpls(Xr,Y,Z, a_max, + alpha=alpha, + mean_ctr=mean_ctr) # Correlation loadings dx,Rx,rssx = correlation_loadings(Xr, T, P) dx,Ry,rssy = correlation_loadings(Y, T, Q) cadz,Rz,rssz = correlation_loadings(Z.T, W, L) # Prediction error -rmsep , yhat, class_error = cv_lpls(Xr, Y, Z, a_max, alpha=alpha) -alpha_check=False +rmsep , yhat, class_error = cv_lpls(Xr, Y, Z, a_max, alpha=alpha,mean_ctr=mean_ctr) +alpha_check=True if alpha_check: Alpha = arange(0.01, 1, .1) Rmsep,Yhat, CE = [],[],[] @@ -127,8 +130,6 @@ if alpha_check: Rmsep = asarray(Rmsep) Yhat = asarray(Yhat) CE = asarray(CE) - - figure(200) @@ -158,7 +159,7 @@ title('Classification accuracy') figure(3) # Hypoid correlations plot_corrloads(Rz, pc1=0, pc2=1, s=tsqz/10.0, c='b', zorder=5, expvar=evz, ax=None) ax = gca() -ylabels = DY.get_identifiers('_cat', sorted=True) +ylabels = DY.get_identifiers('_status', sorted=True) plot_corrloads(Ry, pc1=0, pc2=1, s=150, c='g', zorder=5, expvar=evy, ax=ax,labels=ylabels) figure(3)