iups
This commit is contained in:
parent
9db5991108
commit
155dfada5c
|
@ -9,7 +9,7 @@ import select_generators
|
||||||
sys.path.remove("/home/flatberg/fluents/fluents/lib")
|
sys.path.remove("/home/flatberg/fluents/fluents/lib")
|
||||||
|
|
||||||
|
|
||||||
def nipals_lpls(X, Y, Z, a_max, alpha=.7, mean_ctr=[2, 0, 1], verbose=True):
|
def nipals_lpls(X, Y, Z, a_max, alpha=.7, mean_ctr=[2, 0, 1], scale='scores', verbose=True):
|
||||||
""" L-shaped Partial Least Sqaures Regression by the nipals algorithm.
|
""" L-shaped Partial Least Sqaures Regression by the nipals algorithm.
|
||||||
|
|
||||||
(X!Z)->Y
|
(X!Z)->Y
|
||||||
|
@ -113,7 +113,11 @@ def nipals_lpls(X, Y, Z, a_max, alpha=.7, mean_ctr=[2, 0, 1], verbose=True):
|
||||||
evx = 100.0*(1 - var_x/varX)
|
evx = 100.0*(1 - var_x/varX)
|
||||||
evy = 100.0*(1 - var_y/varY)
|
evy = 100.0*(1 - var_y/varY)
|
||||||
evz = 100.0*(1 - var_z/varZ)
|
evz = 100.0*(1 - var_z/varZ)
|
||||||
|
if scale=='loads':
|
||||||
|
tnorm = apply_along_axis(norm, 0, T)
|
||||||
|
T = T/tnorm
|
||||||
|
Q = Q*tnorm
|
||||||
|
W = W*tnorm
|
||||||
return T, W, P, Q, U, L, K, B, b0, evx, evy, evz
|
return T, W, P, Q, U, L, K, B, b0, evx, evy, evz
|
||||||
|
|
||||||
def svd_lpls(X, Y, Z, a_max, alpha=.7, mean_ctr=[2, 0, 1], verbose=True):
|
def svd_lpls(X, Y, Z, a_max, alpha=.7, mean_ctr=[2, 0, 1], verbose=True):
|
||||||
|
@ -307,7 +311,8 @@ def center(a, axis):
|
||||||
# 0 = col center, 1 = row center, 2 = double center
|
# 0 = col center, 1 = row center, 2 = double center
|
||||||
# -1 = nothing
|
# -1 = nothing
|
||||||
if axis==-1:
|
if axis==-1:
|
||||||
return a
|
mn = zeros((a.shape[1],))
|
||||||
|
return a - mn, mn
|
||||||
elif axis==0:
|
elif axis==0:
|
||||||
mn = a.mean(0)
|
mn = a.mean(0)
|
||||||
return a - mn, mn
|
return a - mn, mn
|
||||||
|
@ -364,14 +369,14 @@ def correlation_loadings(D, T, P, test=True):
|
||||||
|
|
||||||
def cv_lpls(X, Y, Z, a_max=2, nsets=None,alpha=.5):
|
def cv_lpls(X, Y, Z, a_max=2, nsets=None,alpha=.5):
|
||||||
"""Performs crossvalidation to get generalisation error in lpls"""
|
"""Performs crossvalidation to get generalisation error in lpls"""
|
||||||
cv_iter = select_generators.pls_gen(X, Y, n_blocks=nsets,center=True,index_out=True)
|
cv_iter = select_generators.pls_gen(X, Y, n_blocks=nsets,center=False,index_out=True)
|
||||||
k, l = Y.shape
|
k, l = Y.shape
|
||||||
Yhat = empty((a_max,k,l), 'd')
|
Yhat = empty((a_max,k,l), 'd')
|
||||||
for i, (xcal,xi,ycal,yi,ind) in enumerate(cv_iter):
|
for i, (xcal,xi,ycal,yi,ind) in enumerate(cv_iter):
|
||||||
T, W, P, Q, U, L, K, B, b0, evx, evy, evz = nipals_lpls(xcal,ycal,Z,
|
T, W, P, Q, U, L, K, B, b0, evx, evy, evz = nipals_lpls(xcal,ycal,Z,
|
||||||
a_max=a_max,
|
a_max=a_max,
|
||||||
alpha=alpha,
|
alpha=alpha,
|
||||||
mean_ctr=[0,0,1],
|
mean_ctr=[2,0,1],
|
||||||
verbose=False)
|
verbose=False)
|
||||||
for a in range(a_max):
|
for a in range(a_max):
|
||||||
Yhat[a,ind,:] = b0[a][0][0] + dot(xi, B[a])
|
Yhat[a,ind,:] = b0[a][0][0] + dot(xi, B[a])
|
||||||
|
@ -385,7 +390,7 @@ def cv_lpls(X, Y, Z, a_max=2, nsets=None,alpha=.5):
|
||||||
return rmsep, Yhat, class_err
|
return rmsep, Yhat, class_err
|
||||||
|
|
||||||
def jk_lpls(X, Y, Z, a_max, nsets=None, alpha=.5):
|
def jk_lpls(X, Y, Z, a_max, nsets=None, alpha=.5):
|
||||||
cv_iter = select_generators.pls_gen(X, Y, n_blocks=nsets,center=True,index_out=False)
|
cv_iter = select_generators.pls_gen(X, Y, n_blocks=nsets,center=False,index_out=False)
|
||||||
m, n = X.shape
|
m, n = X.shape
|
||||||
k, l = Y.shape
|
k, l = Y.shape
|
||||||
o, p = Z.shape
|
o, p = Z.shape
|
||||||
|
@ -398,7 +403,8 @@ def jk_lpls(X, Y, Z, a_max, nsets=None, alpha=.5):
|
||||||
T, W, P, Q, U, L, K, B, b0, evx, evy, evz = nipals_lpls(xcal,ycal,Z,
|
T, W, P, Q, U, L, K, B, b0, evx, evy, evz = nipals_lpls(xcal,ycal,Z,
|
||||||
a_max=a_max,
|
a_max=a_max,
|
||||||
alpha=alpha,
|
alpha=alpha,
|
||||||
mean_ctr=[0,0,1],
|
mean_ctr=[2,0,1],
|
||||||
|
scale='loads',
|
||||||
verbose=False)
|
verbose=False)
|
||||||
WWx[i,:,:] = W
|
WWx[i,:,:] = W
|
||||||
WWz[i,:,:] = L
|
WWz[i,:,:] = L
|
||||||
|
|
|
@ -39,6 +39,7 @@ def plot_corrloads(R, pc1=0,pc2=1,s=20, c='b', zorder=5,expvar=None,ax=None,draw
|
||||||
#pylab.show()
|
#pylab.show()
|
||||||
|
|
||||||
def plot_dag(edge_dict, node_color='b', node_size=30,labels=None,nodelist=None,pos=None):
|
def plot_dag(edge_dict, node_color='b', node_size=30,labels=None,nodelist=None,pos=None):
|
||||||
|
# networkx does not play well with colon in node names
|
||||||
clean_edges = {}
|
clean_edges = {}
|
||||||
for head, neigb in edge_dict.items():
|
for head, neigb in edge_dict.items():
|
||||||
head = head.replace(":", "_")
|
head = head.replace(":", "_")
|
||||||
|
|
|
@ -1,6 +1,5 @@
|
||||||
""" Module for Gene ontology related functions called in R"""
|
""" Module for Gene ontology related functions called in R"""
|
||||||
import scipy
|
import scipy
|
||||||
|
|
||||||
import rpy
|
import rpy
|
||||||
silent_eval = rpy.with_mode(rpy.NO_CONVERSION, rpy.r)
|
silent_eval = rpy.with_mode(rpy.NO_CONVERSION, rpy.r)
|
||||||
|
|
||||||
|
@ -126,3 +125,8 @@ def parents_dag(go_terms, ontology=['BP']):
|
||||||
else:
|
else:
|
||||||
edge_dict[nn] = [head]
|
edge_dict[nn] = [head]
|
||||||
return edge_dict
|
return edge_dict
|
||||||
|
|
||||||
|
def gene_GO_hypergeo_test(genelist, universe, ontology = ['BP']):
|
||||||
|
|
||||||
|
pvals = geneGoHyperGeoTest(entrezGeneIds, lib=None, ontology=ontology[0], universe=universe)
|
||||||
|
return pvals
|
||||||
|
|
|
@ -32,14 +32,14 @@ print "SAM done"
|
||||||
qq = rpy.r('qobj<-qvalue(sam.out@p.value)')
|
qq = rpy.r('qobj<-qvalue(sam.out@p.value)')
|
||||||
qvals = asarray(qq['qvalues'])
|
qvals = asarray(qq['qvalues'])
|
||||||
# cut off
|
# cut off
|
||||||
co = 0.1
|
co = 0.001
|
||||||
index = where(qvals<0.01)[0]
|
index = where(qvals<0.01)[0]
|
||||||
|
|
||||||
# Subset data
|
# Subset data
|
||||||
X = DX.asarray()
|
X = DX.asarray()
|
||||||
Xr = X[:,index]
|
Xr = X[:,index]
|
||||||
gene_ids = DX.get_identifiers('gene_ids', index)
|
gene_ids = DX.get_identifiers('gene_ids', index)
|
||||||
|
print "\nWorkiing on subset with %s genes " %len(gene_ids)
|
||||||
### Build GO data ####
|
### Build GO data ####
|
||||||
|
|
||||||
print "Go terms ..."
|
print "Go terms ..."
|
||||||
|
@ -48,13 +48,15 @@ terms = set()
|
||||||
for t in goterms.values():
|
for t in goterms.values():
|
||||||
terms.update(t)
|
terms.update(t)
|
||||||
terms = list(terms)
|
terms = list(terms)
|
||||||
|
print "Number of go-terms: %s" %len(terms)
|
||||||
rpy.r.library("GOSim")
|
rpy.r.library("GOSim")
|
||||||
# Go-term similarity matrix
|
# Go-term similarity matrix
|
||||||
methods = ("JiangConrath","Resnik","Lin","CoutoEnriched","CoutoJiangConrath","CoutoResnik","CoutoLin")
|
methods = ("JiangConrath","Resnik","Lin","CoutoEnriched","CoutoJiangConrath","CoutoResnik","CoutoLin")
|
||||||
meth = methods[2]
|
meth = methods[0]
|
||||||
print "Term-term similarity matrix (method = %s)" %meth
|
print "Term-term similarity matrix (method = %s)" %meth
|
||||||
if meth=="CoutoEnriched":
|
if meth=="CoutoEnriched":
|
||||||
rpy.r('setEnrichmentFactors(alpha=0.1,beta=0.5)')
|
rpy.r('setEnrichmentFactors(alpha=0.1,beta=0.5)')
|
||||||
|
print "Calculating term-term similarity matrix"
|
||||||
tmat = rpy.r.getTermSim(terms, verbose=False, method=meth)
|
tmat = rpy.r.getTermSim(terms, verbose=False, method=meth)
|
||||||
# check if all terms where found
|
# check if all terms where found
|
||||||
nanindex = where(isnan(tmat[:,0]))[0]
|
nanindex = where(isnan(tmat[:,0]))[0]
|
||||||
|
@ -93,19 +95,20 @@ gene_ids = asarray(gene_ids)[newind]
|
||||||
print "LPLSR ..."
|
print "LPLSR ..."
|
||||||
a_max = 5
|
a_max = 5
|
||||||
aopt = 2
|
aopt = 2
|
||||||
alpha=.5
|
alpha=.6
|
||||||
T, W, P, Q, U, L, K, B, b0, evx, evy, evz = nipals_lpls(Xr,Y,Z, a_max, alpha)
|
T, W, P, Q, U, L, K, B, b0, evx, evy, evz = nipals_lpls(Xr,Y,Z, a_max, alpha)
|
||||||
|
|
||||||
# Correlation loadings
|
# Correlation loadings
|
||||||
dx,Rx,ssx= correlation_loadings(Xr, T, P)
|
dx,Rx,rssx = correlation_loadings(Xr, T, P)
|
||||||
dx,Ry,ssx= correlation_loadings(Y, T, Q)
|
dx,Ry,rssy = correlation_loadings(Y, T, Q)
|
||||||
cadx,Rz,ssx= correlation_loadings(Z.T, K, L)
|
cadz,Rz,rssz = correlation_loadings(Z.T, W, L)
|
||||||
# Prediction error
|
# Prediction error
|
||||||
rmsep , yhat, class_error = cv_lpls(Xr, Y, Z, a_max, alpha=alpha)
|
rmsep , yhat, class_error = cv_lpls(Xr, Y, Z, a_max, alpha=alpha)
|
||||||
|
|
||||||
# Significance Hotellings T
|
# Significance Hotellings T
|
||||||
Wx, Wz, Wy, = jk_lpls(Xr, Y, Z, aopt)
|
Wx, Wz, Wy, = jk_lpls(Xr, Y, Z, aopt)
|
||||||
tsqx = cx_stats.hotelling(Wx,W[:,:aopt])
|
Ws = W*apply_along_axis(norm, 0, T)
|
||||||
|
tsqx = cx_stats.hotelling(Wx, Ws[:,:aopt])
|
||||||
tsqz = cx_stats.hotelling(Wz, L[:,:aopt])
|
tsqz = cx_stats.hotelling(Wz, L[:,:aopt])
|
||||||
|
|
||||||
|
|
||||||
|
|
Reference in New Issue