Moved lots of stuff back to sandbox
This commit is contained in:
parent
7f1f639ee7
commit
48047f1395
|
@ -8,10 +8,13 @@ from cx_utils import sorted_eig
|
|||
|
||||
import numpy
|
||||
|
||||
|
||||
|
||||
eps = numpy.finfo(float).eps.item()
|
||||
feps = numpy.finfo(numpy.single).eps.item()
|
||||
_array_precision = {'f': 0, 'd': 1, 'F': 0, 'D': 1,'i': 1}
|
||||
|
||||
class NXUTILSException(Exception): pass
|
||||
|
||||
def xgraph_to_graph(G):
|
||||
"""Convert an Xgraph to an ordinary graph.
|
||||
|
@ -65,10 +68,11 @@ def get_affinity_matrix(G, data, ids, dist='e', mask=None, weight=None, t=0, out
|
|||
try:
|
||||
from Bio import Cluster as CLS
|
||||
except:
|
||||
raise ValueError, "Need installed biopython"
|
||||
nVar = len(data)
|
||||
nSamp = len(data[data.keys()[0]])
|
||||
raise NXUTILSError("Import of Biopython failed")
|
||||
n_var = len(data)
|
||||
n_samp = len(data[data.keys()[0]])
|
||||
X = zeros((nVar, nSamp),dtpye='<f8')
|
||||
|
||||
for i, gene in enumerate(ids): #this shuld be right!!
|
||||
X[i,:] = data[gene]
|
||||
|
||||
|
@ -233,6 +237,7 @@ def assortative_index(G):
|
|||
out.append((d[node], nn_d))
|
||||
return array(out).T
|
||||
|
||||
|
||||
def struct_equivalence(G,n1,n2):
|
||||
"""Returns the structural equivalence of a node pair. Two nodes
|
||||
are structural equal if they share the same neighbors.
|
||||
|
@ -264,146 +269,22 @@ def hamming_distance(n1,n2):
|
|||
"""
|
||||
pass
|
||||
|
||||
def graph_corrcoeff(G):
|
||||
"""Not finnished.
|
||||
"""
|
||||
A,index = NX.adj_matrix(G,with_labels=True)
|
||||
#C = zeros(*A.shape(),'d')
|
||||
n = 1.*G.number_of_nodes()
|
||||
for node in G.nodes():
|
||||
a_j = A[index[node],:] #neighbors
|
||||
mean_a = sum(a_j)/n# degree(G)/number_of_nodes()
|
||||
var_a = sqrt(sum((a_j - mean_a)**2)/n)
|
||||
pass
|
||||
|
||||
def graph_and_data_intersection(data, graph, pathways=None,
|
||||
keep_connected=True):
|
||||
|
||||
"""Returns the intersection of keys in two dictionaries.
|
||||
|
||||
NB: keep track of identifer sorting after these dict transforms.
|
||||
|
||||
input:
|
||||
data -- dict, keys: gene id, value: measurement profile
|
||||
graph -- networkx,base.graph, full graph
|
||||
pathways -- dict, keys: pathway name, values: nodes in pathway
|
||||
call:
|
||||
new_data, new_graph,pathways = graph_and_data_intersection(data,graph,pathways,keep_connected=True)
|
||||
def graph_corrcoeff(G, vec=None, nodelist=None, sim='corr'):
|
||||
"""Returns the correlation coefficient for each node. The
|
||||
correlation coefficient is between the node and its neighbours.
|
||||
|
||||
"""
|
||||
new_graph = graph.copy()
|
||||
new_data = {}
|
||||
new_pathways = {}
|
||||
graph_set = set(graph.nodes())
|
||||
data_set = set(data.keys())
|
||||
intersection = data_set & graph_set
|
||||
new_graph.delete_nodes_from(graph_set - data_set) #remove difference
|
||||
for k in intersection:
|
||||
new_data[k] = data[k]
|
||||
if nodelist==None:
|
||||
nodelist=G.nodes()
|
||||
if vec == None:
|
||||
vec = G.degree(nodelist)
|
||||
if len(vec)!=len(nodelist):
|
||||
raise NXUTILSError("The node value vector is not of same length (%s) as the nodelist(%s)") %(len(vec), len(nodelist))
|
||||
|
||||
if keep_connected:
|
||||
max_iter = 0
|
||||
sub_graphs = NX.connected_component_subgraphs(new_graph)
|
||||
if len(sub_graphs)==0:
|
||||
new_graph = sub_graphs[0]
|
||||
else:
|
||||
new_graph = sub_graphs[0]
|
||||
old_data = new_data
|
||||
while new_graph.number_of_nodes() != len(new_data) and max_iter<100:
|
||||
max_iter+=1
|
||||
graph_set = sets.Set(new_graph.nodes())
|
||||
data_set = sets.Set(new_data.keys())
|
||||
intersection = data_set & graph_set
|
||||
new_graph.delete_nodes_from(graph_set - data_set)
|
||||
new_data={}
|
||||
for k in intersection:
|
||||
new_data[k] = old_data[k]
|
||||
old_data = new_data.copy()
|
||||
new_graph = NX.connected_component_subgraphs(new_graph)[0]
|
||||
if pathways!=None:
|
||||
for pth,nodes in pathways.items():
|
||||
new_pathways[pth] = [node for node in nodes if node in new_graph]
|
||||
print "\nSUMMARY (graph_and_data_intersection): "
|
||||
print "Number of input variables: %s\n\
|
||||
Number nodes in input graph: %s" %(len(data),len(graph))
|
||||
print "\nUsing intersection of connected graph and nodes with data values"
|
||||
print "Number of variables is now: %s" %len(new_data)
|
||||
print "Number of nodes in graph: %s" %new_graph.number_of_nodes()
|
||||
if pathways!=None:
|
||||
return new_data,new_graph,new_pathways
|
||||
else:
|
||||
return new_data,new_graph
|
||||
|
||||
def rx_graph_and_data_intersection(graph,node_data,pathways,data,keep_connected=False):
|
||||
"""Returns a (connected) reaction graph with present gene expression data.
|
||||
|
||||
keep_connected==True:
|
||||
When a node (gene) is not present in our expression data, the node
|
||||
is deleted and all neighbors are connected with edge weight=0.5
|
||||
if the are not already neigbors.
|
||||
|
||||
input:
|
||||
data -- dict, keys: gene id, value: measurement profile
|
||||
graph -- networkx.xbase.xgraph, full wieghted graph
|
||||
node_data -- dict, keys: rx id, value: set of gene_ids
|
||||
pathways -- dict, keys: pathway name, values: lidt of nodes in pathway
|
||||
"""
|
||||
# We do not connect the full graph ... may be performed by using the reference graph?
|
||||
graph = NX.connected_component_subgraphs(graph)[0] #largest connected component
|
||||
|
||||
new_graph = graph.copy()
|
||||
new_data = {}
|
||||
new_node_data = node_data.copy()
|
||||
new_pathways = {}
|
||||
|
||||
genes_in_graph=set()
|
||||
genes_in_data = set(data.keys())
|
||||
rx_in_graph = set(new_graph.nodes())
|
||||
|
||||
# genes in graph nodes (rx_nodes)
|
||||
for rx in rx_in_graph:
|
||||
genes_in_graph.update(set(new_node_data.get(rx)))
|
||||
keep_genes = genes_in_data.intersection(genes_in_graph) #both in graph and data
|
||||
|
||||
#update node data
|
||||
for rx,genes in node_data.items(): # delete node data of nodes not present in graph
|
||||
genes = set(genes)
|
||||
genes.intersection_update(keep_genes) #remove genes if they are not in inters.
|
||||
if len(genes)==0 or rx not in rx_in_graph: #no gene data or not in graph
|
||||
print "removing: " + str(rx)
|
||||
del new_node_data[rx]
|
||||
rx_in_data= set(new_node_data.keys())
|
||||
rx_intersection = rx_in_data.intersection(rx_in_graph)
|
||||
|
||||
for gene in keep_genes:
|
||||
new_data[gene] = data.get(gene)
|
||||
|
||||
# update pathways nodes
|
||||
for pth,genes in pathways.items():
|
||||
if genes:
|
||||
genes = set(genes)
|
||||
genes.intersection_update(keep_genes) # gene needs to have data
|
||||
else:
|
||||
pass
|
||||
new_pathways[pth] = genes
|
||||
bad_nodes = rx_in_graph.difference(rx_in_data) #in graph but no data
|
||||
|
||||
if keep_connected==True:
|
||||
dummy = new_graph.copy()
|
||||
for rx in bad_nodes:
|
||||
dummy.delete_node(rx)
|
||||
if len(NX.connected_component_subgraphs(dummy))>1:
|
||||
nghbrs = new_graph.neighbors(rx)
|
||||
for i in nghbrs:
|
||||
for j in nghbrs:
|
||||
if i!=j:
|
||||
if not new_graph.has_edge(i,j):
|
||||
new_graph.add_edge(i,j,0.5)
|
||||
|
||||
#update graph
|
||||
new_graph.delete_nodes_from(list(bad_nodes))
|
||||
|
||||
return new_graph,new_node_data,new_pathways,new_data
|
||||
A = NX.ad_matrix(G, nodelist=nodelist)
|
||||
for i, node in enumerate(nodelist):
|
||||
nei_i = A[i,:]==1
|
||||
vec_i = vec[nei_i]
|
||||
|
||||
def weighted_laplacian(G,with_labels=False):
|
||||
"""Return standard Laplacian of graph from a weighted adjacency matrix."""
|
||||
|
@ -418,10 +299,10 @@ def weighted_laplacian(G,with_labels=False):
|
|||
else:
|
||||
return L
|
||||
|
||||
def subnetworks(G, T2):
|
||||
def grow_subnetworks(G, T2):
|
||||
"""Return the highest scoring (T2-test) subgraph og G.
|
||||
|
||||
Use simulated annealing to identify highly scoring subgraphs.
|
||||
Use simulated annealing to identify highly grow subgraphs.
|
||||
|
||||
ref: -- Ideker et.al (Bioinformatics 18, 2002)
|
||||
-- Patil and Nielsen (PNAS 2006)
|
||||
|
|
Reference in New Issue