Projects/laydi
Projects
/
laydi
Archived
7
0
Fork 0

Moved lots of stuff back to sandbox

This commit is contained in:
Arnar Flatberg 2007-03-14 16:32:49 +00:00
parent 7f1f639ee7
commit 48047f1395
1 changed files with 24 additions and 143 deletions

View File

@ -8,10 +8,13 @@ from cx_utils import sorted_eig
import numpy import numpy
eps = numpy.finfo(float).eps.item() eps = numpy.finfo(float).eps.item()
feps = numpy.finfo(numpy.single).eps.item() feps = numpy.finfo(numpy.single).eps.item()
_array_precision = {'f': 0, 'd': 1, 'F': 0, 'D': 1,'i': 1} _array_precision = {'f': 0, 'd': 1, 'F': 0, 'D': 1,'i': 1}
class NXUTILSException(Exception): pass
def xgraph_to_graph(G): def xgraph_to_graph(G):
"""Convert an Xgraph to an ordinary graph. """Convert an Xgraph to an ordinary graph.
@ -65,10 +68,11 @@ def get_affinity_matrix(G, data, ids, dist='e', mask=None, weight=None, t=0, out
try: try:
from Bio import Cluster as CLS from Bio import Cluster as CLS
except: except:
raise ValueError, "Need installed biopython" raise NXUTILSError("Import of Biopython failed")
nVar = len(data) n_var = len(data)
nSamp = len(data[data.keys()[0]]) n_samp = len(data[data.keys()[0]])
X = zeros((nVar, nSamp),dtpye='<f8') X = zeros((nVar, nSamp),dtpye='<f8')
for i, gene in enumerate(ids): #this shuld be right!! for i, gene in enumerate(ids): #this shuld be right!!
X[i,:] = data[gene] X[i,:] = data[gene]
@ -233,6 +237,7 @@ def assortative_index(G):
out.append((d[node], nn_d)) out.append((d[node], nn_d))
return array(out).T return array(out).T
def struct_equivalence(G,n1,n2): def struct_equivalence(G,n1,n2):
"""Returns the structural equivalence of a node pair. Two nodes """Returns the structural equivalence of a node pair. Two nodes
are structural equal if they share the same neighbors. are structural equal if they share the same neighbors.
@ -264,146 +269,22 @@ def hamming_distance(n1,n2):
""" """
pass pass
def graph_corrcoeff(G): def graph_corrcoeff(G, vec=None, nodelist=None, sim='corr'):
"""Not finnished. """Returns the correlation coefficient for each node. The
""" correlation coefficient is between the node and its neighbours.
A,index = NX.adj_matrix(G,with_labels=True)
#C = zeros(*A.shape(),'d')
n = 1.*G.number_of_nodes()
for node in G.nodes():
a_j = A[index[node],:] #neighbors
mean_a = sum(a_j)/n# degree(G)/number_of_nodes()
var_a = sqrt(sum((a_j - mean_a)**2)/n)
pass
def graph_and_data_intersection(data, graph, pathways=None,
keep_connected=True):
"""Returns the intersection of keys in two dictionaries.
NB: keep track of identifer sorting after these dict transforms.
input:
data -- dict, keys: gene id, value: measurement profile
graph -- networkx,base.graph, full graph
pathways -- dict, keys: pathway name, values: nodes in pathway
call:
new_data, new_graph,pathways = graph_and_data_intersection(data,graph,pathways,keep_connected=True)
""" """
new_graph = graph.copy() if nodelist==None:
new_data = {} nodelist=G.nodes()
new_pathways = {} if vec == None:
graph_set = set(graph.nodes()) vec = G.degree(nodelist)
data_set = set(data.keys()) if len(vec)!=len(nodelist):
intersection = data_set & graph_set raise NXUTILSError("The node value vector is not of same length (%s) as the nodelist(%s)") %(len(vec), len(nodelist))
new_graph.delete_nodes_from(graph_set - data_set) #remove difference
for k in intersection:
new_data[k] = data[k]
if keep_connected: A = NX.ad_matrix(G, nodelist=nodelist)
max_iter = 0 for i, node in enumerate(nodelist):
sub_graphs = NX.connected_component_subgraphs(new_graph) nei_i = A[i,:]==1
if len(sub_graphs)==0: vec_i = vec[nei_i]
new_graph = sub_graphs[0]
else:
new_graph = sub_graphs[0]
old_data = new_data
while new_graph.number_of_nodes() != len(new_data) and max_iter<100:
max_iter+=1
graph_set = sets.Set(new_graph.nodes())
data_set = sets.Set(new_data.keys())
intersection = data_set & graph_set
new_graph.delete_nodes_from(graph_set - data_set)
new_data={}
for k in intersection:
new_data[k] = old_data[k]
old_data = new_data.copy()
new_graph = NX.connected_component_subgraphs(new_graph)[0]
if pathways!=None:
for pth,nodes in pathways.items():
new_pathways[pth] = [node for node in nodes if node in new_graph]
print "\nSUMMARY (graph_and_data_intersection): "
print "Number of input variables: %s\n\
Number nodes in input graph: %s" %(len(data),len(graph))
print "\nUsing intersection of connected graph and nodes with data values"
print "Number of variables is now: %s" %len(new_data)
print "Number of nodes in graph: %s" %new_graph.number_of_nodes()
if pathways!=None:
return new_data,new_graph,new_pathways
else:
return new_data,new_graph
def rx_graph_and_data_intersection(graph,node_data,pathways,data,keep_connected=False):
"""Returns a (connected) reaction graph with present gene expression data.
keep_connected==True:
When a node (gene) is not present in our expression data, the node
is deleted and all neighbors are connected with edge weight=0.5
if the are not already neigbors.
input:
data -- dict, keys: gene id, value: measurement profile
graph -- networkx.xbase.xgraph, full wieghted graph
node_data -- dict, keys: rx id, value: set of gene_ids
pathways -- dict, keys: pathway name, values: lidt of nodes in pathway
"""
# We do not connect the full graph ... may be performed by using the reference graph?
graph = NX.connected_component_subgraphs(graph)[0] #largest connected component
new_graph = graph.copy()
new_data = {}
new_node_data = node_data.copy()
new_pathways = {}
genes_in_graph=set()
genes_in_data = set(data.keys())
rx_in_graph = set(new_graph.nodes())
# genes in graph nodes (rx_nodes)
for rx in rx_in_graph:
genes_in_graph.update(set(new_node_data.get(rx)))
keep_genes = genes_in_data.intersection(genes_in_graph) #both in graph and data
#update node data
for rx,genes in node_data.items(): # delete node data of nodes not present in graph
genes = set(genes)
genes.intersection_update(keep_genes) #remove genes if they are not in inters.
if len(genes)==0 or rx not in rx_in_graph: #no gene data or not in graph
print "removing: " + str(rx)
del new_node_data[rx]
rx_in_data= set(new_node_data.keys())
rx_intersection = rx_in_data.intersection(rx_in_graph)
for gene in keep_genes:
new_data[gene] = data.get(gene)
# update pathways nodes
for pth,genes in pathways.items():
if genes:
genes = set(genes)
genes.intersection_update(keep_genes) # gene needs to have data
else:
pass
new_pathways[pth] = genes
bad_nodes = rx_in_graph.difference(rx_in_data) #in graph but no data
if keep_connected==True:
dummy = new_graph.copy()
for rx in bad_nodes:
dummy.delete_node(rx)
if len(NX.connected_component_subgraphs(dummy))>1:
nghbrs = new_graph.neighbors(rx)
for i in nghbrs:
for j in nghbrs:
if i!=j:
if not new_graph.has_edge(i,j):
new_graph.add_edge(i,j,0.5)
#update graph
new_graph.delete_nodes_from(list(bad_nodes))
return new_graph,new_node_data,new_pathways,new_data
def weighted_laplacian(G,with_labels=False): def weighted_laplacian(G,with_labels=False):
"""Return standard Laplacian of graph from a weighted adjacency matrix.""" """Return standard Laplacian of graph from a weighted adjacency matrix."""
@ -418,10 +299,10 @@ def weighted_laplacian(G,with_labels=False):
else: else:
return L return L
def subnetworks(G, T2): def grow_subnetworks(G, T2):
"""Return the highest scoring (T2-test) subgraph og G. """Return the highest scoring (T2-test) subgraph og G.
Use simulated annealing to identify highly scoring subgraphs. Use simulated annealing to identify highly grow subgraphs.
ref: -- Ideker et.al (Bioinformatics 18, 2002) ref: -- Ideker et.al (Bioinformatics 18, 2002)
-- Patil and Nielsen (PNAS 2006) -- Patil and Nielsen (PNAS 2006)