cc

2008-02-06 09:42:46 +00:00 · 2008-02-06 09:42:46 +00:00 · e417547923
commit e417547923
parent 14d24d02c2
1 changed files with 59 additions and 35 deletions
--- a/workflows/demo.py
+++ b/workflows/demo.py
@ -435,51 +435,75 @@ class SubgraphQuery(workflow.Function):
        # 1.) Operate on a subset selection
        selection = main.project.get_selection()
        if not selection.has_key(self._dim):
-            print "not"
            logger.log("notice", "Expected gene ids: %s, but got. %s" %(self._dim, selection.keys()))
            return None
        if len(selection[self._dim]) == 0:
-            print "not3"
-            logger.log("notice", "No selected genes to query")
-            return None
-        Dw = Dw.subdata(self._dim, selection[self._dim])
-        print Dw.shape
+            logger.log("notice", "No selected genes to query, using all")
+            Dw = Dw.subdata(self._dim, Dw.get_identifiers(self._dim)[:100])
+        else:
+            Dw = Dw.subdata(self._dim, selection[self._dim])

        # 2.) Pairwise goodness in loading space
+        indices = self._pairsim(Dw)
+        print indices
+        print indices.shape
+        
+        idents1 = Dw.get_identifiers(self._dim, indices[:,0])
+        idents2 = Dw.get_identifiers(self._dim, indices[:,1])
+        idents = zip(idents1, idents2)
+        
+        # 3.) Identify close subgraphs
+        
+        # 4.) Rank subgraphs
+        
+        main.project.set_selection('gene_ids', idents1)
+        #main.project.set_sele
+        logger.log("notice", "Gene ids updated")
+        #plt = GraphQueryScatterPlot(SS, Dw)
+        #return [plt]
+
+    def _pairsim(self, Dw, ptype='cosine',cut_rat=.2):
+        """Returns close pairs across given dim.
+        ptype : ['cov', 'correlation', 'cosine', 'heat', 'euclidean']
+        """
        W = Dw.asarray()
-        if neigh_type == 'cov':
+        if ptype == 'cov':
            W -= W.mean(1)[:,scipy.newaxis]
            wcov = scipy.dot(W, W.T)/(W.shape[1]-1)
-            wcov_min = wcov.max()*max_cov_ratio
-            indices = scipy.where(wcov >= wcov_min)[0]
-        elif neigh_type == 'cosine':
-            import hcluster
-            dp = hcluster.squareform(hcluster.pdist(W, 'cosine'))
-            min_dist = dp.max()*0.1
-            p1, p2 = scipy.where(dp <= min_dist)
+            wcov_min = wcov.max()*cut_rat
+            indices = scipy.asarray(scipy.where(wcov >= wcov_min)).T
+        elif ptype == 'heat':
+            from hcluster import pdist, squareform
+            D = squareform(pdist(W))
+            H = exp(-D)
+            h_min = H.max()*cut_rat
+            indices = scipy.asarray(scipy.where(H >= h_min)).T
+        elif ptype in ['euclidean', 'cosine', 'correlation']:
+            from hcluster import pdist, squareform
+            D = squareform(pdist(W), ptype)
+            d_min = D.max()*cut_rat
+            indices = []
+            for i in range(D.shape[0]):
+                for j in range(i, D.shape[0]):
+                    if D[i,j] <= d_min:
+                        indices.append([i,j])
+            print "W"
+            print W.shape
+            indices = scipy.asarray(indices)
+            
+        else:
+            raise ValueError("ptype: %s  not valid" %ptype)
+        return indices

-        acc_gene_ids = Dw.get_identifiers(self._dim, indices=indices)
+    def _subgraphsim(self, Dw, idents, stype='dijkstra'):
+        # subgraph
+        Gw = nx.XGraph()
+        for edge in idents:
+            e = G.get_edge(edge)
+            Gw.add_edge()
+        if stype == 'dijkstra':
+            pass
        
-        # 3.) Subgraphs
-        G = DA.asnetworkx()
-        common_gids = [i for i in G.nodes() if i in acc_gene_ids]
-        G = nx.subgraph(G, common_gids)
-        S = nx.connected_component_subgraphs(G)
-        n = map(len, S)
-        print n
-        SS = [s for s in S if len(s)>=3]
-        if not SS:
-            print "No subgraphs here"
-            return None
-        # 4.) Identify close subgraphs
-        
-        # 5.) Rank subgraphs
-        
-        #main.project.set_selection('gene_ids', acc_gene_ids)
-        #logger.log("notice", "Gene ids updated")
-        plt = GraphQueryScatterPlot(SS, Dw)
-        return [plt]
-
 class GraphQueryScatterPlot(plots.ScatterPlot):
    def __init__(self, S, Dw, *args, **kw):
        self.S = S