Moved lots of stuff back to sandbox

2007-03-14 16:32:49 +00:00 · 2007-03-14 16:32:49 +00:00 · 48047f1395
commit 48047f1395
parent 7f1f639ee7
1 changed files with 24 additions and 143 deletions
--- a/fluents/lib/nx_utils.py
+++ b/fluents/lib/nx_utils.py
@ -8,10 +8,13 @@ from cx_utils import sorted_eig
 import numpy
 eps = numpy.finfo(float).eps.item()
 feps = numpy.finfo(numpy.single).eps.item()
 _array_precision = {'f': 0, 'd': 1, 'F': 0, 'D': 1,'i': 1}
 class NXUTILSException(Exception): pass
 def xgraph_to_graph(G):
    """Convert an Xgraph to an ordinary graph.
@ -65,10 +68,11 @@ def get_affinity_matrix(G, data, ids, dist='e', mask=None, weight=None, t=0, out
    try:
        from Bio import Cluster as CLS
    except:
-        raise ValueError, "Need installed biopython"
+        raise NXUTILSError("Import of Biopython failed")
-    nVar = len(data)
+    n_var = len(data)
-    nSamp = len(data[data.keys()[0]])
+    n_samp = len(data[data.keys()[0]])
    X = zeros((nVar, nSamp),dtpye='<f8')
    for i, gene in enumerate(ids): #this shuld be right!!
        X[i,:] = data[gene]
@ -233,6 +237,7 @@ def assortative_index(G):
            out.append((d[node], nn_d))
    return array(out).T
 def struct_equivalence(G,n1,n2):
    """Returns the structural equivalence of a node pair.  Two nodes
    are structural equal if they share the same neighbors.
@ -264,146 +269,22 @@ def hamming_distance(n1,n2):
    """
    pass
-def graph_corrcoeff(G):
+def graph_corrcoeff(G, vec=None, nodelist=None, sim='corr'):
-    """Not finnished.
+    """Returns the correlation coefficient for each node. The
-    """
+    correlation coefficient is between the node and its neighbours.
    A,index = NX.adj_matrix(G,with_labels=True)
    #C = zeros(*A.shape(),'d')
    n = 1.*G.number_of_nodes()
    for node in G.nodes():
        a_j = A[index[node],:] #neighbors
        mean_a = sum(a_j)/n# degree(G)/number_of_nodes()
        var_a = sqrt(sum((a_j - mean_a)**2)/n)
        pass
 def graph_and_data_intersection(data, graph, pathways=None,
                                keep_connected=True):
    """Returns the intersection of keys in two dictionaries.
    NB: keep track of identifer sorting after these dict transforms.
    input:
          data -- dict, keys: gene id, value: measurement profile
          graph -- networkx,base.graph, full graph
          pathways -- dict, keys: pathway name, values: nodes in pathway 
    call:
    new_data, new_graph,pathways = graph_and_data_intersection(data,graph,pathways,keep_connected=True)
    """
-    new_graph = graph.copy()
+    if nodelist==None:
-    new_data = {}
+        nodelist=G.nodes()
-    new_pathways = {}
+    if vec == None:
-    graph_set = set(graph.nodes())
+        vec = G.degree(nodelist)
-    data_set = set(data.keys())
+    if len(vec)!=len(nodelist):
-    intersection = data_set & graph_set
+        raise NXUTILSError("The node value vector is not of same length (%s) as the nodelist(%s)") %(len(vec), len(nodelist))
    new_graph.delete_nodes_from(graph_set - data_set) #remove difference
    for k in intersection:
        new_data[k] = data[k]
-    if keep_connected:
+    A = NX.ad_matrix(G, nodelist=nodelist)
-        max_iter = 0
+    for i, node in enumerate(nodelist):
-        sub_graphs = NX.connected_component_subgraphs(new_graph)
+        nei_i = A[i,:]==1
-        if len(sub_graphs)==0:
+        vec_i = vec[nei_i]
            new_graph = sub_graphs[0]
        else:
            new_graph = sub_graphs[0]
        old_data = new_data
        while new_graph.number_of_nodes() != len(new_data) and max_iter<100:
            max_iter+=1
            graph_set = sets.Set(new_graph.nodes())
            data_set = sets.Set(new_data.keys())
            intersection = data_set & graph_set
            new_graph.delete_nodes_from(graph_set - data_set)
            new_data={}
            for k in intersection:
                new_data[k] = old_data[k]
            old_data = new_data.copy()
            new_graph = NX.connected_component_subgraphs(new_graph)[0]
    if pathways!=None:
        for pth,nodes in pathways.items():
            new_pathways[pth] = [node for node in nodes if node in new_graph]
    print "\nSUMMARY (graph_and_data_intersection): "
    print "Number of input variables: %s\n\
    Number nodes in input graph: %s" %(len(data),len(graph))
    print "\nUsing intersection of connected graph and nodes with data values"
    print "Number of variables is now: %s" %len(new_data)
    print "Number of nodes in graph: %s" %new_graph.number_of_nodes()
    if pathways!=None:
        return new_data,new_graph,new_pathways
    else:
        return new_data,new_graph
 def rx_graph_and_data_intersection(graph,node_data,pathways,data,keep_connected=False):
    """Returns a (connected) reaction graph with present gene expression data.
    keep_connected==True:
    When a node (gene) is not present in our expression data, the node
    is deleted and all neighbors are connected with edge weight=0.5
    if the are not already neigbors.
    input:
          data -- dict, keys: gene id, value: measurement profile
          graph -- networkx.xbase.xgraph, full wieghted graph
          node_data -- dict, keys: rx id, value: set of gene_ids 
          pathways -- dict, keys: pathway name, values: lidt of nodes in pathway 
    """
    # We do not connect the full graph ... may be performed by using the reference graph?
    graph = NX.connected_component_subgraphs(graph)[0] #largest connected component 
    new_graph = graph.copy()
    new_data = {}
    new_node_data = node_data.copy()
    new_pathways = {}
    genes_in_graph=set()
    genes_in_data = set(data.keys())
    rx_in_graph = set(new_graph.nodes())
    # genes in graph nodes (rx_nodes)
    for rx in rx_in_graph:
        genes_in_graph.update(set(new_node_data.get(rx)))
    keep_genes = genes_in_data.intersection(genes_in_graph) #both in graph and data
    #update node data
    for rx,genes in node_data.items(): # delete node data of nodes not present in graph
            genes = set(genes)
            genes.intersection_update(keep_genes) #remove genes if they are not in inters. 
            if len(genes)==0 or rx not in rx_in_graph: #no gene data or not in graph
                print "removing: " + str(rx)
                del new_node_data[rx]
    rx_in_data= set(new_node_data.keys())
    rx_intersection = rx_in_data.intersection(rx_in_graph)
    for gene in keep_genes:
        new_data[gene] = data.get(gene)
    # update pathways nodes
    for pth,genes in pathways.items():
        if genes:
            genes = set(genes)
            genes.intersection_update(keep_genes) # gene needs to have data
        else:
            pass
        new_pathways[pth] = genes
    bad_nodes = rx_in_graph.difference(rx_in_data) #in graph but no data
    if keep_connected==True:
        dummy = new_graph.copy()
        for rx in bad_nodes:
            dummy.delete_node(rx)
            if len(NX.connected_component_subgraphs(dummy))>1:
                nghbrs = new_graph.neighbors(rx)
                for i in nghbrs:
                    for j in nghbrs:
                        if i!=j:
                            if not new_graph.has_edge(i,j):
                                new_graph.add_edge(i,j,0.5)
    #update graph
    new_graph.delete_nodes_from(list(bad_nodes))
    return new_graph,new_node_data,new_pathways,new_data
 def weighted_laplacian(G,with_labels=False):
    """Return standard Laplacian of graph from a weighted adjacency matrix."""
@ -418,10 +299,10 @@ def weighted_laplacian(G,with_labels=False):
    else:	
        return L            
-def subnetworks(G, T2):
+def grow_subnetworks(G, T2):
    """Return the highest scoring (T2-test) subgraph og G.
-    Use simulated annealing to identify highly scoring subgraphs.
+    Use simulated annealing to identify highly grow subgraphs.
    ref: -- Ideker et.al (Bioinformatics 18, 2002)
         -- Patil and Nielsen (PNAS 2006)