From e417547923415e1ace77f0a0fab51f8d70dd22a0 Mon Sep 17 00:00:00 2001 From: flatberg Date: Wed, 6 Feb 2008 09:42:46 +0000 Subject: [PATCH] cc --- workflows/demo.py | 94 +++++++++++++++++++++++++++++------------------ 1 file changed, 59 insertions(+), 35 deletions(-) diff --git a/workflows/demo.py b/workflows/demo.py index 3dee4f7..3c7c666 100644 --- a/workflows/demo.py +++ b/workflows/demo.py @@ -435,51 +435,75 @@ class SubgraphQuery(workflow.Function): # 1.) Operate on a subset selection selection = main.project.get_selection() if not selection.has_key(self._dim): - print "not" logger.log("notice", "Expected gene ids: %s, but got. %s" %(self._dim, selection.keys())) return None if len(selection[self._dim]) == 0: - print "not3" - logger.log("notice", "No selected genes to query") - return None - Dw = Dw.subdata(self._dim, selection[self._dim]) - print Dw.shape + logger.log("notice", "No selected genes to query, using all") + Dw = Dw.subdata(self._dim, Dw.get_identifiers(self._dim)[:100]) + else: + Dw = Dw.subdata(self._dim, selection[self._dim]) # 2.) Pairwise goodness in loading space + indices = self._pairsim(Dw) + print indices + print indices.shape + + idents1 = Dw.get_identifiers(self._dim, indices[:,0]) + idents2 = Dw.get_identifiers(self._dim, indices[:,1]) + idents = zip(idents1, idents2) + + # 3.) Identify close subgraphs + + # 4.) Rank subgraphs + + main.project.set_selection('gene_ids', idents1) + #main.project.set_sele + logger.log("notice", "Gene ids updated") + #plt = GraphQueryScatterPlot(SS, Dw) + #return [plt] + + def _pairsim(self, Dw, ptype='cosine',cut_rat=.2): + """Returns close pairs across given dim. + ptype : ['cov', 'correlation', 'cosine', 'heat', 'euclidean'] + """ W = Dw.asarray() - if neigh_type == 'cov': + if ptype == 'cov': W -= W.mean(1)[:,scipy.newaxis] wcov = scipy.dot(W, W.T)/(W.shape[1]-1) - wcov_min = wcov.max()*max_cov_ratio - indices = scipy.where(wcov >= wcov_min)[0] - elif neigh_type == 'cosine': - import hcluster - dp = hcluster.squareform(hcluster.pdist(W, 'cosine')) - min_dist = dp.max()*0.1 - p1, p2 = scipy.where(dp <= min_dist) + wcov_min = wcov.max()*cut_rat + indices = scipy.asarray(scipy.where(wcov >= wcov_min)).T + elif ptype == 'heat': + from hcluster import pdist, squareform + D = squareform(pdist(W)) + H = exp(-D) + h_min = H.max()*cut_rat + indices = scipy.asarray(scipy.where(H >= h_min)).T + elif ptype in ['euclidean', 'cosine', 'correlation']: + from hcluster import pdist, squareform + D = squareform(pdist(W), ptype) + d_min = D.max()*cut_rat + indices = [] + for i in range(D.shape[0]): + for j in range(i, D.shape[0]): + if D[i,j] <= d_min: + indices.append([i,j]) + print "W" + print W.shape + indices = scipy.asarray(indices) + + else: + raise ValueError("ptype: %s not valid" %ptype) + return indices - acc_gene_ids = Dw.get_identifiers(self._dim, indices=indices) + def _subgraphsim(self, Dw, idents, stype='dijkstra'): + # subgraph + Gw = nx.XGraph() + for edge in idents: + e = G.get_edge(edge) + Gw.add_edge() + if stype == 'dijkstra': + pass - # 3.) Subgraphs - G = DA.asnetworkx() - common_gids = [i for i in G.nodes() if i in acc_gene_ids] - G = nx.subgraph(G, common_gids) - S = nx.connected_component_subgraphs(G) - n = map(len, S) - print n - SS = [s for s in S if len(s)>=3] - if not SS: - print "No subgraphs here" - return None - # 4.) Identify close subgraphs - - # 5.) Rank subgraphs - - #main.project.set_selection('gene_ids', acc_gene_ids) - #logger.log("notice", "Gene ids updated") - plt = GraphQueryScatterPlot(SS, Dw) - return [plt] - class GraphQueryScatterPlot(plots.ScatterPlot): def __init__(self, S, Dw, *args, **kw): self.S = S