Projects/laydi
Projects
/
laydi
Archived
7
0
Fork 0
This commit is contained in:
Arnar Flatberg 2007-07-23 13:25:34 +00:00
parent 9db5991108
commit 155dfada5c
4 changed files with 31 additions and 17 deletions

View File

@ -9,7 +9,7 @@ import select_generators
sys.path.remove("/home/flatberg/fluents/fluents/lib") sys.path.remove("/home/flatberg/fluents/fluents/lib")
def nipals_lpls(X, Y, Z, a_max, alpha=.7, mean_ctr=[2, 0, 1], verbose=True): def nipals_lpls(X, Y, Z, a_max, alpha=.7, mean_ctr=[2, 0, 1], scale='scores', verbose=True):
""" L-shaped Partial Least Sqaures Regression by the nipals algorithm. """ L-shaped Partial Least Sqaures Regression by the nipals algorithm.
(X!Z)->Y (X!Z)->Y
@ -113,7 +113,11 @@ def nipals_lpls(X, Y, Z, a_max, alpha=.7, mean_ctr=[2, 0, 1], verbose=True):
evx = 100.0*(1 - var_x/varX) evx = 100.0*(1 - var_x/varX)
evy = 100.0*(1 - var_y/varY) evy = 100.0*(1 - var_y/varY)
evz = 100.0*(1 - var_z/varZ) evz = 100.0*(1 - var_z/varZ)
if scale=='loads':
tnorm = apply_along_axis(norm, 0, T)
T = T/tnorm
Q = Q*tnorm
W = W*tnorm
return T, W, P, Q, U, L, K, B, b0, evx, evy, evz return T, W, P, Q, U, L, K, B, b0, evx, evy, evz
def svd_lpls(X, Y, Z, a_max, alpha=.7, mean_ctr=[2, 0, 1], verbose=True): def svd_lpls(X, Y, Z, a_max, alpha=.7, mean_ctr=[2, 0, 1], verbose=True):
@ -307,7 +311,8 @@ def center(a, axis):
# 0 = col center, 1 = row center, 2 = double center # 0 = col center, 1 = row center, 2 = double center
# -1 = nothing # -1 = nothing
if axis==-1: if axis==-1:
return a mn = zeros((a.shape[1],))
return a - mn, mn
elif axis==0: elif axis==0:
mn = a.mean(0) mn = a.mean(0)
return a - mn, mn return a - mn, mn
@ -364,14 +369,14 @@ def correlation_loadings(D, T, P, test=True):
def cv_lpls(X, Y, Z, a_max=2, nsets=None,alpha=.5): def cv_lpls(X, Y, Z, a_max=2, nsets=None,alpha=.5):
"""Performs crossvalidation to get generalisation error in lpls""" """Performs crossvalidation to get generalisation error in lpls"""
cv_iter = select_generators.pls_gen(X, Y, n_blocks=nsets,center=True,index_out=True) cv_iter = select_generators.pls_gen(X, Y, n_blocks=nsets,center=False,index_out=True)
k, l = Y.shape k, l = Y.shape
Yhat = empty((a_max,k,l), 'd') Yhat = empty((a_max,k,l), 'd')
for i, (xcal,xi,ycal,yi,ind) in enumerate(cv_iter): for i, (xcal,xi,ycal,yi,ind) in enumerate(cv_iter):
T, W, P, Q, U, L, K, B, b0, evx, evy, evz = nipals_lpls(xcal,ycal,Z, T, W, P, Q, U, L, K, B, b0, evx, evy, evz = nipals_lpls(xcal,ycal,Z,
a_max=a_max, a_max=a_max,
alpha=alpha, alpha=alpha,
mean_ctr=[0,0,1], mean_ctr=[2,0,1],
verbose=False) verbose=False)
for a in range(a_max): for a in range(a_max):
Yhat[a,ind,:] = b0[a][0][0] + dot(xi, B[a]) Yhat[a,ind,:] = b0[a][0][0] + dot(xi, B[a])
@ -385,7 +390,7 @@ def cv_lpls(X, Y, Z, a_max=2, nsets=None,alpha=.5):
return rmsep, Yhat, class_err return rmsep, Yhat, class_err
def jk_lpls(X, Y, Z, a_max, nsets=None, alpha=.5): def jk_lpls(X, Y, Z, a_max, nsets=None, alpha=.5):
cv_iter = select_generators.pls_gen(X, Y, n_blocks=nsets,center=True,index_out=False) cv_iter = select_generators.pls_gen(X, Y, n_blocks=nsets,center=False,index_out=False)
m, n = X.shape m, n = X.shape
k, l = Y.shape k, l = Y.shape
o, p = Z.shape o, p = Z.shape
@ -398,7 +403,8 @@ def jk_lpls(X, Y, Z, a_max, nsets=None, alpha=.5):
T, W, P, Q, U, L, K, B, b0, evx, evy, evz = nipals_lpls(xcal,ycal,Z, T, W, P, Q, U, L, K, B, b0, evx, evy, evz = nipals_lpls(xcal,ycal,Z,
a_max=a_max, a_max=a_max,
alpha=alpha, alpha=alpha,
mean_ctr=[0,0,1], mean_ctr=[2,0,1],
scale='loads',
verbose=False) verbose=False)
WWx[i,:,:] = W WWx[i,:,:] = W
WWz[i,:,:] = L WWz[i,:,:] = L

View File

@ -39,6 +39,7 @@ def plot_corrloads(R, pc1=0,pc2=1,s=20, c='b', zorder=5,expvar=None,ax=None,draw
#pylab.show() #pylab.show()
def plot_dag(edge_dict, node_color='b', node_size=30,labels=None,nodelist=None,pos=None): def plot_dag(edge_dict, node_color='b', node_size=30,labels=None,nodelist=None,pos=None):
# networkx does not play well with colon in node names
clean_edges = {} clean_edges = {}
for head, neigb in edge_dict.items(): for head, neigb in edge_dict.items():
head = head.replace(":", "_") head = head.replace(":", "_")

View File

@ -1,6 +1,5 @@
""" Module for Gene ontology related functions called in R""" """ Module for Gene ontology related functions called in R"""
import scipy import scipy
import rpy import rpy
silent_eval = rpy.with_mode(rpy.NO_CONVERSION, rpy.r) silent_eval = rpy.with_mode(rpy.NO_CONVERSION, rpy.r)
@ -126,3 +125,8 @@ def parents_dag(go_terms, ontology=['BP']):
else: else:
edge_dict[nn] = [head] edge_dict[nn] = [head]
return edge_dict return edge_dict
def gene_GO_hypergeo_test(genelist, universe, ontology = ['BP']):
pvals = geneGoHyperGeoTest(entrezGeneIds, lib=None, ontology=ontology[0], universe=universe)
return pvals

View File

@ -32,14 +32,14 @@ print "SAM done"
qq = rpy.r('qobj<-qvalue(sam.out@p.value)') qq = rpy.r('qobj<-qvalue(sam.out@p.value)')
qvals = asarray(qq['qvalues']) qvals = asarray(qq['qvalues'])
# cut off # cut off
co = 0.1 co = 0.001
index = where(qvals<0.01)[0] index = where(qvals<0.01)[0]
# Subset data # Subset data
X = DX.asarray() X = DX.asarray()
Xr = X[:,index] Xr = X[:,index]
gene_ids = DX.get_identifiers('gene_ids', index) gene_ids = DX.get_identifiers('gene_ids', index)
print "\nWorkiing on subset with %s genes " %len(gene_ids)
### Build GO data #### ### Build GO data ####
print "Go terms ..." print "Go terms ..."
@ -48,13 +48,15 @@ terms = set()
for t in goterms.values(): for t in goterms.values():
terms.update(t) terms.update(t)
terms = list(terms) terms = list(terms)
print "Number of go-terms: %s" %len(terms)
rpy.r.library("GOSim") rpy.r.library("GOSim")
# Go-term similarity matrix # Go-term similarity matrix
methods = ("JiangConrath","Resnik","Lin","CoutoEnriched","CoutoJiangConrath","CoutoResnik","CoutoLin") methods = ("JiangConrath","Resnik","Lin","CoutoEnriched","CoutoJiangConrath","CoutoResnik","CoutoLin")
meth = methods[2] meth = methods[0]
print "Term-term similarity matrix (method = %s)" %meth print "Term-term similarity matrix (method = %s)" %meth
if meth=="CoutoEnriched": if meth=="CoutoEnriched":
rpy.r('setEnrichmentFactors(alpha=0.1,beta=0.5)') rpy.r('setEnrichmentFactors(alpha=0.1,beta=0.5)')
print "Calculating term-term similarity matrix"
tmat = rpy.r.getTermSim(terms, verbose=False, method=meth) tmat = rpy.r.getTermSim(terms, verbose=False, method=meth)
# check if all terms where found # check if all terms where found
nanindex = where(isnan(tmat[:,0]))[0] nanindex = where(isnan(tmat[:,0]))[0]
@ -93,19 +95,20 @@ gene_ids = asarray(gene_ids)[newind]
print "LPLSR ..." print "LPLSR ..."
a_max = 5 a_max = 5
aopt = 2 aopt = 2
alpha=.5 alpha=.6
T, W, P, Q, U, L, K, B, b0, evx, evy, evz = nipals_lpls(Xr,Y,Z, a_max, alpha) T, W, P, Q, U, L, K, B, b0, evx, evy, evz = nipals_lpls(Xr,Y,Z, a_max, alpha)
# Correlation loadings # Correlation loadings
dx,Rx,ssx= correlation_loadings(Xr, T, P) dx,Rx,rssx = correlation_loadings(Xr, T, P)
dx,Ry,ssx= correlation_loadings(Y, T, Q) dx,Ry,rssy = correlation_loadings(Y, T, Q)
cadx,Rz,ssx= correlation_loadings(Z.T, K, L) cadz,Rz,rssz = correlation_loadings(Z.T, W, L)
# Prediction error # Prediction error
rmsep , yhat, class_error = cv_lpls(Xr, Y, Z, a_max, alpha=alpha) rmsep , yhat, class_error = cv_lpls(Xr, Y, Z, a_max, alpha=alpha)
# Significance Hotellings T # Significance Hotellings T
Wx, Wz, Wy, = jk_lpls(Xr, Y, Z, aopt) Wx, Wz, Wy, = jk_lpls(Xr, Y, Z, aopt)
tsqx = cx_stats.hotelling(Wx,W[:,:aopt]) Ws = W*apply_along_axis(norm, 0, T)
tsqx = cx_stats.hotelling(Wx, Ws[:,:aopt])
tsqz = cx_stats.hotelling(Wz, L[:,:aopt]) tsqz = cx_stats.hotelling(Wz, L[:,:aopt])