This commit is contained in:
parent
14d24d02c2
commit
e417547923
|
@ -435,50 +435,74 @@ class SubgraphQuery(workflow.Function):
|
|||
# 1.) Operate on a subset selection
|
||||
selection = main.project.get_selection()
|
||||
if not selection.has_key(self._dim):
|
||||
print "not"
|
||||
logger.log("notice", "Expected gene ids: %s, but got. %s" %(self._dim, selection.keys()))
|
||||
return None
|
||||
if len(selection[self._dim]) == 0:
|
||||
print "not3"
|
||||
logger.log("notice", "No selected genes to query")
|
||||
return None
|
||||
logger.log("notice", "No selected genes to query, using all")
|
||||
Dw = Dw.subdata(self._dim, Dw.get_identifiers(self._dim)[:100])
|
||||
else:
|
||||
Dw = Dw.subdata(self._dim, selection[self._dim])
|
||||
print Dw.shape
|
||||
|
||||
# 2.) Pairwise goodness in loading space
|
||||
indices = self._pairsim(Dw)
|
||||
print indices
|
||||
print indices.shape
|
||||
|
||||
idents1 = Dw.get_identifiers(self._dim, indices[:,0])
|
||||
idents2 = Dw.get_identifiers(self._dim, indices[:,1])
|
||||
idents = zip(idents1, idents2)
|
||||
|
||||
# 3.) Identify close subgraphs
|
||||
|
||||
# 4.) Rank subgraphs
|
||||
|
||||
main.project.set_selection('gene_ids', idents1)
|
||||
#main.project.set_sele
|
||||
logger.log("notice", "Gene ids updated")
|
||||
#plt = GraphQueryScatterPlot(SS, Dw)
|
||||
#return [plt]
|
||||
|
||||
def _pairsim(self, Dw, ptype='cosine',cut_rat=.2):
|
||||
"""Returns close pairs across given dim.
|
||||
ptype : ['cov', 'correlation', 'cosine', 'heat', 'euclidean']
|
||||
"""
|
||||
W = Dw.asarray()
|
||||
if neigh_type == 'cov':
|
||||
if ptype == 'cov':
|
||||
W -= W.mean(1)[:,scipy.newaxis]
|
||||
wcov = scipy.dot(W, W.T)/(W.shape[1]-1)
|
||||
wcov_min = wcov.max()*max_cov_ratio
|
||||
indices = scipy.where(wcov >= wcov_min)[0]
|
||||
elif neigh_type == 'cosine':
|
||||
import hcluster
|
||||
dp = hcluster.squareform(hcluster.pdist(W, 'cosine'))
|
||||
min_dist = dp.max()*0.1
|
||||
p1, p2 = scipy.where(dp <= min_dist)
|
||||
wcov_min = wcov.max()*cut_rat
|
||||
indices = scipy.asarray(scipy.where(wcov >= wcov_min)).T
|
||||
elif ptype == 'heat':
|
||||
from hcluster import pdist, squareform
|
||||
D = squareform(pdist(W))
|
||||
H = exp(-D)
|
||||
h_min = H.max()*cut_rat
|
||||
indices = scipy.asarray(scipy.where(H >= h_min)).T
|
||||
elif ptype in ['euclidean', 'cosine', 'correlation']:
|
||||
from hcluster import pdist, squareform
|
||||
D = squareform(pdist(W), ptype)
|
||||
d_min = D.max()*cut_rat
|
||||
indices = []
|
||||
for i in range(D.shape[0]):
|
||||
for j in range(i, D.shape[0]):
|
||||
if D[i,j] <= d_min:
|
||||
indices.append([i,j])
|
||||
print "W"
|
||||
print W.shape
|
||||
indices = scipy.asarray(indices)
|
||||
|
||||
acc_gene_ids = Dw.get_identifiers(self._dim, indices=indices)
|
||||
else:
|
||||
raise ValueError("ptype: %s not valid" %ptype)
|
||||
return indices
|
||||
|
||||
# 3.) Subgraphs
|
||||
G = DA.asnetworkx()
|
||||
common_gids = [i for i in G.nodes() if i in acc_gene_ids]
|
||||
G = nx.subgraph(G, common_gids)
|
||||
S = nx.connected_component_subgraphs(G)
|
||||
n = map(len, S)
|
||||
print n
|
||||
SS = [s for s in S if len(s)>=3]
|
||||
if not SS:
|
||||
print "No subgraphs here"
|
||||
return None
|
||||
# 4.) Identify close subgraphs
|
||||
|
||||
# 5.) Rank subgraphs
|
||||
|
||||
#main.project.set_selection('gene_ids', acc_gene_ids)
|
||||
#logger.log("notice", "Gene ids updated")
|
||||
plt = GraphQueryScatterPlot(SS, Dw)
|
||||
return [plt]
|
||||
def _subgraphsim(self, Dw, idents, stype='dijkstra'):
|
||||
# subgraph
|
||||
Gw = nx.XGraph()
|
||||
for edge in idents:
|
||||
e = G.get_edge(edge)
|
||||
Gw.add_edge()
|
||||
if stype == 'dijkstra':
|
||||
pass
|
||||
|
||||
class GraphQueryScatterPlot(plots.ScatterPlot):
|
||||
def __init__(self, S, Dw, *args, **kw):
|
||||
|
|
Reference in New Issue