This commit is contained in:
Arnar Flatberg 2008-02-06 09:42:46 +00:00
parent 14d24d02c2
commit e417547923

View File

@ -435,51 +435,75 @@ class SubgraphQuery(workflow.Function):
# 1.) Operate on a subset selection
selection = main.project.get_selection()
if not selection.has_key(self._dim):
print "not"
logger.log("notice", "Expected gene ids: %s, but got. %s" %(self._dim, selection.keys()))
return None
if len(selection[self._dim]) == 0:
print "not3"
logger.log("notice", "No selected genes to query")
return None
Dw = Dw.subdata(self._dim, selection[self._dim])
print Dw.shape
logger.log("notice", "No selected genes to query, using all")
Dw = Dw.subdata(self._dim, Dw.get_identifiers(self._dim)[:100])
else:
Dw = Dw.subdata(self._dim, selection[self._dim])
# 2.) Pairwise goodness in loading space
indices = self._pairsim(Dw)
print indices
print indices.shape
idents1 = Dw.get_identifiers(self._dim, indices[:,0])
idents2 = Dw.get_identifiers(self._dim, indices[:,1])
idents = zip(idents1, idents2)
# 3.) Identify close subgraphs
# 4.) Rank subgraphs
main.project.set_selection('gene_ids', idents1)
#main.project.set_sele
logger.log("notice", "Gene ids updated")
#plt = GraphQueryScatterPlot(SS, Dw)
#return [plt]
def _pairsim(self, Dw, ptype='cosine',cut_rat=.2):
"""Returns close pairs across given dim.
ptype : ['cov', 'correlation', 'cosine', 'heat', 'euclidean']
"""
W = Dw.asarray()
if neigh_type == 'cov':
if ptype == 'cov':
W -= W.mean(1)[:,scipy.newaxis]
wcov = scipy.dot(W, W.T)/(W.shape[1]-1)
wcov_min = wcov.max()*max_cov_ratio
indices = scipy.where(wcov >= wcov_min)[0]
elif neigh_type == 'cosine':
import hcluster
dp = hcluster.squareform(hcluster.pdist(W, 'cosine'))
min_dist = dp.max()*0.1
p1, p2 = scipy.where(dp <= min_dist)
wcov_min = wcov.max()*cut_rat
indices = scipy.asarray(scipy.where(wcov >= wcov_min)).T
elif ptype == 'heat':
from hcluster import pdist, squareform
D = squareform(pdist(W))
H = exp(-D)
h_min = H.max()*cut_rat
indices = scipy.asarray(scipy.where(H >= h_min)).T
elif ptype in ['euclidean', 'cosine', 'correlation']:
from hcluster import pdist, squareform
D = squareform(pdist(W), ptype)
d_min = D.max()*cut_rat
indices = []
for i in range(D.shape[0]):
for j in range(i, D.shape[0]):
if D[i,j] <= d_min:
indices.append([i,j])
print "W"
print W.shape
indices = scipy.asarray(indices)
else:
raise ValueError("ptype: %s not valid" %ptype)
return indices
acc_gene_ids = Dw.get_identifiers(self._dim, indices=indices)
def _subgraphsim(self, Dw, idents, stype='dijkstra'):
# subgraph
Gw = nx.XGraph()
for edge in idents:
e = G.get_edge(edge)
Gw.add_edge()
if stype == 'dijkstra':
pass
# 3.) Subgraphs
G = DA.asnetworkx()
common_gids = [i for i in G.nodes() if i in acc_gene_ids]
G = nx.subgraph(G, common_gids)
S = nx.connected_component_subgraphs(G)
n = map(len, S)
print n
SS = [s for s in S if len(s)>=3]
if not SS:
print "No subgraphs here"
return None
# 4.) Identify close subgraphs
# 5.) Rank subgraphs
#main.project.set_selection('gene_ids', acc_gene_ids)
#logger.log("notice", "Gene ids updated")
plt = GraphQueryScatterPlot(SS, Dw)
return [plt]
class GraphQueryScatterPlot(plots.ScatterPlot):
def __init__(self, S, Dw, *args, **kw):
self.S = S