Moved lots of stuff back to sandbox

2007-03-14 16:32:49 +00:00 · 2007-03-14 16:32:49 +00:00 · 48047f1395
parent 7f1f639ee7
commit 48047f1395
1 changed files with 24 additions and 143 deletions
--- a/fluents/lib/nx_utils.py
+++ b/fluents/lib/nx_utils.py
@ -8,10 +8,13 @@ from cx_utils import sorted_eig

 import numpy

+
+
 eps = numpy.finfo(float).eps.item()
 feps = numpy.finfo(numpy.single).eps.item()
 _array_precision = {'f': 0, 'd': 1, 'F': 0, 'D': 1,'i': 1}

+class NXUTILSException(Exception): pass

 def xgraph_to_graph(G):
    """Convert an Xgraph to an ordinary graph.
@ -65,10 +68,11 @@ def get_affinity_matrix(G, data, ids, dist='e', mask=None, weight=None, t=0, out
    try:
        from Bio import Cluster as CLS
    except:
-        raise ValueError, "Need installed biopython"
-    nVar = len(data)
-    nSamp = len(data[data.keys()[0]])
+        raise NXUTILSError("Import of Biopython failed")
+    n_var = len(data)
+    n_samp = len(data[data.keys()[0]])
    X = zeros((nVar, nSamp),dtpye='<f8')
+
    for i, gene in enumerate(ids): #this shuld be right!!
        X[i,:] = data[gene]
    
@ -233,6 +237,7 @@ def assortative_index(G):
            out.append((d[node], nn_d))
    return array(out).T

+
 def struct_equivalence(G,n1,n2):
    """Returns the structural equivalence of a node pair.  Two nodes
    are structural equal if they share the same neighbors.
@ -264,146 +269,22 @@ def hamming_distance(n1,n2):
    """
    pass

-def graph_corrcoeff(G):
-    """Not finnished.
-    """
-    A,index = NX.adj_matrix(G,with_labels=True)
-    #C = zeros(*A.shape(),'d')
-    n = 1.*G.number_of_nodes()
-    for node in G.nodes():
-        a_j = A[index[node],:] #neighbors
-        mean_a = sum(a_j)/n# degree(G)/number_of_nodes()
-        var_a = sqrt(sum((a_j - mean_a)**2)/n)
-        pass
-
-def graph_and_data_intersection(data, graph, pathways=None,
-                                keep_connected=True):
-
-    """Returns the intersection of keys in two dictionaries.
-
-    NB: keep track of identifer sorting after these dict transforms.
-    
-    input:
-          data -- dict, keys: gene id, value: measurement profile
-          graph -- networkx,base.graph, full graph
-          pathways -- dict, keys: pathway name, values: nodes in pathway 
-    call:
-    new_data, new_graph,pathways = graph_and_data_intersection(data,graph,pathways,keep_connected=True)
+def graph_corrcoeff(G, vec=None, nodelist=None, sim='corr'):
+    """Returns the correlation coefficient for each node. The
+    correlation coefficient is between the node and its neighbours.

    """
-    new_graph = graph.copy()
-    new_data = {}
-    new_pathways = {}
-    graph_set = set(graph.nodes())
-    data_set = set(data.keys())
-    intersection = data_set & graph_set
-    new_graph.delete_nodes_from(graph_set - data_set) #remove difference
-    for k in intersection:
-        new_data[k] = data[k]
+    if nodelist==None:
+        nodelist=G.nodes()
+    if vec == None:
+        vec = G.degree(nodelist)
+    if len(vec)!=len(nodelist):
+        raise NXUTILSError("The node value vector is not of same length (%s) as the nodelist(%s)") %(len(vec), len(nodelist))

-    if keep_connected:
-        max_iter = 0
-        sub_graphs = NX.connected_component_subgraphs(new_graph)
-        if len(sub_graphs)==0:
-            new_graph = sub_graphs[0]
-        else:
-            new_graph = sub_graphs[0]
-        old_data = new_data
-        while new_graph.number_of_nodes() != len(new_data) and max_iter<100:
-            max_iter+=1
-            graph_set = sets.Set(new_graph.nodes())
-            data_set = sets.Set(new_data.keys())
-            intersection = data_set & graph_set
-            new_graph.delete_nodes_from(graph_set - data_set)
-            new_data={}
-            for k in intersection:
-                new_data[k] = old_data[k]
-            old_data = new_data.copy()
-            new_graph = NX.connected_component_subgraphs(new_graph)[0]
-    if pathways!=None:
-        for pth,nodes in pathways.items():
-            new_pathways[pth] = [node for node in nodes if node in new_graph]
-    print "\nSUMMARY (graph_and_data_intersection): "
-    print "Number of input variables: %s\n\
-    Number nodes in input graph: %s" %(len(data),len(graph))
-    print "\nUsing intersection of connected graph and nodes with data values"
-    print "Number of variables is now: %s" %len(new_data)
-    print "Number of nodes in graph: %s" %new_graph.number_of_nodes()
-    if pathways!=None:
-        return new_data,new_graph,new_pathways
-    else:
-        return new_data,new_graph
-
-def rx_graph_and_data_intersection(graph,node_data,pathways,data,keep_connected=False):
-    """Returns a (connected) reaction graph with present gene expression data.
-
-    keep_connected==True:
-    When a node (gene) is not present in our expression data, the node
-    is deleted and all neighbors are connected with edge weight=0.5
-    if the are not already neigbors.
-
-    input:
-          data -- dict, keys: gene id, value: measurement profile
-          graph -- networkx.xbase.xgraph, full wieghted graph
-          node_data -- dict, keys: rx id, value: set of gene_ids 
-          pathways -- dict, keys: pathway name, values: lidt of nodes in pathway 
-    """
-    # We do not connect the full graph ... may be performed by using the reference graph?
-    graph = NX.connected_component_subgraphs(graph)[0] #largest connected component 
-
-    new_graph = graph.copy()
-    new_data = {}
-    new_node_data = node_data.copy()
-    new_pathways = {}
-    
-    genes_in_graph=set()
-    genes_in_data = set(data.keys())
-    rx_in_graph = set(new_graph.nodes())
-
-    # genes in graph nodes (rx_nodes)
-    for rx in rx_in_graph:
-        genes_in_graph.update(set(new_node_data.get(rx)))
-    keep_genes = genes_in_data.intersection(genes_in_graph) #both in graph and data
-
-    #update node data
-    for rx,genes in node_data.items(): # delete node data of nodes not present in graph
-            genes = set(genes)
-            genes.intersection_update(keep_genes) #remove genes if they are not in inters. 
-            if len(genes)==0 or rx not in rx_in_graph: #no gene data or not in graph
-                print "removing: " + str(rx)
-                del new_node_data[rx]
-    rx_in_data= set(new_node_data.keys())
-    rx_intersection = rx_in_data.intersection(rx_in_graph)
-
-    for gene in keep_genes:
-        new_data[gene] = data.get(gene)
-
-    # update pathways nodes
-    for pth,genes in pathways.items():
-        if genes:
-            genes = set(genes)
-            genes.intersection_update(keep_genes) # gene needs to have data
-        else:
-            pass
-        new_pathways[pth] = genes
-    bad_nodes = rx_in_graph.difference(rx_in_data) #in graph but no data
-
-    if keep_connected==True:
-        dummy = new_graph.copy()
-        for rx in bad_nodes:
-            dummy.delete_node(rx)
-            if len(NX.connected_component_subgraphs(dummy))>1:
-                nghbrs = new_graph.neighbors(rx)
-                for i in nghbrs:
-                    for j in nghbrs:
-                        if i!=j:
-                            if not new_graph.has_edge(i,j):
-                                new_graph.add_edge(i,j,0.5)
-
-    #update graph
-    new_graph.delete_nodes_from(list(bad_nodes))
-    
-    return new_graph,new_node_data,new_pathways,new_data
+    A = NX.ad_matrix(G, nodelist=nodelist)
+    for i, node in enumerate(nodelist):
+        nei_i = A[i,:]==1
+        vec_i = vec[nei_i]

 def weighted_laplacian(G,with_labels=False):
    """Return standard Laplacian of graph from a weighted adjacency matrix."""
@ -418,10 +299,10 @@ def weighted_laplacian(G,with_labels=False):
    else:	
        return L            

-def subnetworks(G, T2):
+def grow_subnetworks(G, T2):
    """Return the highest scoring (T2-test) subgraph og G.

-    Use simulated annealing to identify highly scoring subgraphs.
+    Use simulated annealing to identify highly grow subgraphs.

    ref: -- Ideker et.al (Bioinformatics 18, 2002)
         -- Patil and Nielsen (PNAS 2006)