diff --git a/scripts/lpls/run_smoker.py b/scripts/lpls/run_smoker.py index 36deb09..cd2257b 100644 --- a/scripts/lpls/run_smoker.py +++ b/scripts/lpls/run_smoker.py @@ -14,7 +14,7 @@ from plots_lpls import plot_corrloads # full smoker data DX = dataset.read_ftsv(open("../../data/smokers-full/Xfull.ftsv")) DY = dataset.read_ftsv(open("../../data/smokers-full/Yg.ftsv")) -Y = DY.asarray() +Y = DY.asarray().astype('d') # select subset genes by SAM rpy.r.library("siggenes") rpy.r.library("qvalue") @@ -88,7 +88,7 @@ Z, newind = rpy_go.genego_matrix(goterms, tmat, gene_ids, terms,func=mean) Z = Z.T # update X matrix (no go-terms available) Xr = Xr[:,newind] -gene_ids = asarray(gene_ids)[newind] +new_gene_ids = asarray(gene_ids)[newind] ######## LPLSR ######## @@ -104,6 +104,19 @@ dx,Ry,rssy = correlation_loadings(Y, T, Q) cadz,Rz,rssz = correlation_loadings(Z.T, W, L) # Prediction error rmsep , yhat, class_error = cv_lpls(Xr, Y, Z, a_max, alpha=alpha) +alpha_check=False +if alpha_check: + Alpha = arange(0.01, 1, .1) + Rmsep,Yhat, CE = [],[],[] + for a in Alpha: + rmsep , yhat, ce = cv_lpls(Xr, Y, Z, a_max, alpha=alpha) + Rmsep.append(rmsep) + Yhat.append(yhat) + CE.append(yhat) + Rmsep = asarray(Rmsep) + Yhat = asarray(Yhat) + CE = asarray(CE) + # Significance Hotellings T Wx, Wz, Wy, = jk_lpls(Xr, Y, Z, aopt) @@ -119,10 +132,10 @@ bar_col = 'rgb'*5 m = Y.shape[1] for a in range(m): bar(arange(a_max)+a*bar_w+.1, rmsep[:,a], width=bar_w, color=bar_col[a]) -ylim([rmsep.min()-.05, rmsep.max()+.05]) +ylim([rmsep.min()-.05, rmsep.max()+.05]) +title('RMSEP') figure(2) # Hypoid correlations -title('RMSEP') plot_corrloads(Rz, pc1=0, pc2=1, s=tsqz/10.0, c='b', zorder=5, expvar=evz, ax=None) ax = gca() ylabels = DY.get_identifiers('_cat', sorted=True)