laydi/fluents/lib/nx_utils.py

import os,sys
from itertools import izip
import networkx as NX
from scipy import shape,diag,dot,asarray,sqrt,real,zeros,eye,exp,maximum,\
     outer,maximum,sum,diag,real,atleast_2d
from scipy.linalg import eig,svd,inv,expm,norm
from cx_utils import sorted_eig

import numpy


eps = numpy.finfo(float).eps.item()
feps = numpy.finfo(numpy.single).eps.item()
_array_precision = {'f': 0, 'd': 1, 'F': 0, 'D': 1,'i': 1}

class NXUTILSException(Exception): pass

def xgraph_to_graph(G):
    """Convert an Xgraph to an ordinary graph.
    Edge attributes, mult.edges and self-loops are lost in the process.
    """
    
    GG = NX.convert.from_dict_of_lists(NX.convert.to_dict_of_lists(G))
    return GG

def get_affinity_matrix(G, data, ids, dist='e', mask=None, weight=None, t=0, out='dist'):
    """
    Function for calculating a general affinity matrix, based upon distances.
    Affiniy = 1 - distance ((10-1) 1 is far apart)
    INPUT

    data:
    gene expression data, type dict data[gene] = expression-vector

    G:
    The network (networkx.base.Graph object)
    
    mask:
    The array mask shows which data are missing. If mask[i][j]==0, then
    data[i][j] is missing.

    weights:
    The array weight contains the weights to be used when calculating distances.

    transpose:
    If transpose==0, then genes are clustered. If transpose==1, microarrays are
    clustered.

    dist:
    The character dist defines the distance function to be used:
    dist=='e': Euclidean distance
    dist=='b': City Block distance
    dist=='h': Harmonically summed Euclidean distance
    dist=='c': Pearson correlation
    dist=='a': absolute value of the correlation
    dist=='u': uncentered correlation
    dist=='x': absolute uncentered correlation
    dist=='s': Spearman's rank correlation
    dist=='k': Kendall's tau
    For other values of dist, the default (Euclidean distance) is used.

    OUTPUT
    D :
    Similariy matrix (nGenes x nGenes), symetric, d_ij e in [0,1]
    Normalized so max weight = 1.0
    """
    try:
        from Bio import Cluster as CLS
    except:
        raise NXUTILSError("Import of Biopython failed")
    n_var = len(data)
    n_samp = len(data[data.keys()[0]])
    X = zeros((nVar, nSamp),dtpye='<f8')

    for i, gene in enumerate(ids): #this shuld be right!!
        X[i,:] = data[gene]
    
    
    #X = transpose(X) # distancematrix needs matrix as (nGenes,nSamples)
    
    D_list  = CLS.distancematrix(X, dist=dist)
    D = zeros((nVar,nVar),dtype='<f8')
    for i,row in enumerate(D_list):
        if i>0:
            D[i,:len(row)]=row

    D = D + D.T
    MAX = 30.0
    D_max = max(ravel(D))/MAX
    D_n = D/D_max #normalised (max = 10.0)
    D_n = (MAX+1.) - D_n #using correlation (inverse distance for dists)
    
    A = NX.adj_matrix(G, nodelist=ids)
    if out=='dist':
        return D_n*A
    elif out=='heat_kernel':
        t=1.0
        K = exp(-t*D*A)
        return K
    elif out=='complete':
        return D_n
    else:
        return []

def remove_one_degree_nodes(G, iter=True):
    """Removes all nodes with only one neighbour.  These nodes does
    not contribute to community structure.
    input:
             G -- graph
             iter -- True/False iteratively remove?
    """
    G_copy = G.copy()
    if iter==True:
        while 1:
            bad_nodes=[]
            for node in G_copy.nodes():
                if len(G_copy.neighbors(node))==1:
                    bad_nodes.append(node)
            if len(bad_nodes)>0:
                G_copy.delete_nodes_from(bad_nodes)
            else:
                break
    else:
       bad_nodes=[]
       for ngb in G_copy.neighbors_iter():
           if len(G_copy.neighbors(node))==1:
               bad_nodes.append(node)
           if len(bad_nodes)>0:
               G_copy.delete_nodes_from(bad_nodes)

    print "Deleted %s nodes from network" %(len(G)-len(G_copy))
    return G_copy

def key_players(G, n=1, with_labels=False):
    """
    Resilince measure
    Identification of key nodes by fraction of nodes in
    disconnected subgraph when the node is removed.

    output:
           fraction of nodes disconnected when node i is removed
    """
    i=0
    frac=[]
    labels = {}
    for node in G.nodes():
        i+=1
        print i
        T = G.copy()
        T.delete_node(node)
        n_nodes = T.number_of_nodes()
        sub_graphs = NX.connected_component_subgraphs(T)
        n = len(sub_graphs)
        if n>1:
            strong_comp = sub_graphs[0]
            fraction = 1.0 - 1.0*strong_comp.number_of_nodes()/n_nodes
            frac.append(fraction)
            labels[node]=fraction
            
        else:
            frac.append(0.0)
            labels[node]=0.0

    out = 1.0 - array(frac)
    if with_labels==True:
        return out,labels
    else:
        return out

def node_weighted_adj_matrix(G, weights=None, ave_type='harmonic', with_labels=False):
    """Return a weighted adjacency matrix of graph. The weights are
    node weights.
    input: G -- graph
           weights -- dict, keys: nodes, values: weights
           with_labels -- True/False, return labels?

    output: A -- weighted eadjacency matrix
            [index] -- node labels 
    
    """
    n=G.order()
    # make an dictionary that maps vertex name to position
    index={}
    count=0
    for node in G.nodes():
        index[node]=count
        count = count+1
  
    a = zeros((n,n))
    if type(G)=='networkx.xbase.XGraph':
        raise
    for head,tail in G.edges():
        if ave_type == 'geometric':
            a[index[head],index[tail]]= sqrt(weights[head]*weights[tail])
            a[index[tail],index[head]]= a[index[head],index[tail]]
        elif ave_type == 'harmonic':
            a[index[head],index[tail]] = mean(weights[head],weights[tail])
            a[index[tail],index[head]]= mean(weights[head],weights[tail])
    if with_labels:
        return a,index
    else:
        return a            

def weighted_adj_matrix(G, with_labels=False):
    """Adjacency matrix of an XGraph whos weights are given in edges.
    """
    A, labels = NX.adj_matrix(G, with_labels=True)
    W = A.astype('<f8')
    for orf, i in labels.items():
        for orf2, j in labels.items():
            if G.has_edge(orf, orf2):
                edge_weight = G.get_edge(orf, orf2)
                W[i,j] = edge_weight
                W[j,i] = edge_weight
    if with_labels==True:
        return W, labels
    else:
        return W

def assortative_index(G):
    """Ouputs two vectors: the degree and the neighbor average degree.
    Used to measure the assortative mixing.  If the average degree is
    pos. correlated with the degree we know that hubs tend to connect
    to other hubs.

    input: G, graph connected!!
    ouput: d,mn_d: degree, and average degree of neighb.
    (degree sorting from degree(with_labels=True))
    """
    d = G.degree(with_labels=True)
    out=[]
    for node in G.nodes():
        nn = G.neighbors(node)
        if len(nn)>0:
            nn_d = mean([float(d[i]) for i in nn])
            out.append((d[node], nn_d))
    return array(out).T


def struct_equivalence(G,n1,n2):
    """Returns the structural equivalence of a node pair.  Two nodes
    are structural equal if they share the same neighbors.

    x_s = [ne(n1) union ne(n2) - ne(n1) intersection ne(n2)]/[ne(n1)
    union ne(n2) + ne(n1) intersection ne(n2)]
    ref: Brun et.al 2003
    """

    #[ne(n1) union ne(n2) - ne(n1) intersection ne(n2
    s1 = set(G.neighbors(n1))
    s2 = set(G.neighbors(n2))
    num_union = len(s1.union(s2))
    num_intersection = len(s1.intersection(s2))
    if num_union & num_intersection:
        xs=0
    else:
        xs = (num_union - num_intersection)/(num_union + num_intersection)
    return xs

def struct_equivalence_all(G):
    """Not finnished.
    """
    A,labels = NX.adj_matrix(G,with_labels=True)
    pass

def hamming_distance(n1,n2):
    """Not finnsihed.
    """
    pass

def graph_corrcoeff(G, vec=None, nodelist=None, sim='corr'):
    """Returns the correlation coefficient for each node. The
    correlation coefficient is between the node and its neighbours.

    """
    if nodelist==None:
        nodelist=G.nodes()
    if vec == None:
        vec = G.degree(nodelist)
    if len(vec)!=len(nodelist):
        raise NXUTILSError("The node value vector is not of same length (%s) as the nodelist(%s)") %(len(vec), len(nodelist))

    A = NX.ad_matrix(G, nodelist=nodelist)
    for i, node in enumerate(nodelist):
        nei_i = A[i,:]==1
        vec_i = vec[nei_i]

def weighted_laplacian(G,with_labels=False):
    """Return standard Laplacian of graph from a weighted adjacency matrix."""
    n= G.order()
    I = scipy.eye(n)
    A = weighted_adj_matrix(G)
    D = I*scipy.sum(A, 0)
    L = D-A
    if with_labels:
        A,index = weighted_adj_matrix(G, with_labels=True)
	return L, index
    else:	
        return L            

def grow_subnetworks(G, T2):
    """Return the highest scoring (T2-test) subgraph og G.

    Use simulated annealing to identify highly grow subgraphs.

    ref: -- Ideker et.al (Bioinformatics 18, 2002)
         -- Patil and Nielsen (PNAS 2006)
    
    """
    N = 1000
    states = [(node, False) for node in G.nodes()]
    t2_last = 0.0
    for i in xrange(N):
        if i==0: #assign random states
            states = [(state[0], True) for state in states if rand(1)>.5]
        sub_nodes = [state[0] for state in states if state[1]]
        Gsub = NX.subgraph(G, sub_nodes)
        Gsub = NX.connected_components_subgraphs(Gsub)[0]
        t2 = [T2[node] for node in Gsub]
        if t2>t2_last:
            pass
        else:
            p = numpy.exp()
            
    
"""Below are methods for calculating graph metrics

Four main decompositions :
0.) Adjacency diffusion kernel expm(A),
1.) von neumann kernels (diagonalisation of adjacency matrix)

2.) laplacian kernels (geometric series of adj.)

3.) diffusion kernels (exponential series of adj.)

---- Kv
von_neumann : Kv = (I-alpha*A)^-1 (mod: A(I-alpha*A)^-1)? ,
geom. series

---- Kl
laplacian: Kl = (I-alpha*L)^-1 , geom. series

---- Kd
laplacian_diffusion: Kd = expm(-alpha*L)
exp. series

---- Ke
Exponential diffusion.
Ke = expm(A) .... expm(-A)?

"""

# TODO:
# check for numerical unstable eigenvalues and set to zero
# othervise some inverses wil explode ->ok ..using pinv for inverses
#
# This gives results that look numerical unstable
#
# -- divided adj by sum(A[:]), check this one (paper by Lebart scales with number of edges)
#
#
#
# the neumann kernel is defined in Kandola to be K = A*(I-A)^-1
# lowest eigenvectors are same as the highest of K = A*A ?
# this needs clarification

# diffusion is still wrong! ... ok
# diff needs normalisation?! check the meaning of exp(-s) = exp(1/s) -L = 1/degree ... etc
# Is it the negative of exp. of adj. metrix in Kandola?
#
# Normalised=False returns only nans (no idea why!!) ... fixed ok

# 31.1: diff is ok exp(0)=1 not zero!
# 07.03.2005: normalisation is ok: -> normalisation will emphasize high degree nodes
# 10.03.2005: symeig is unstable an returns nans of some eigenvectors? switching back to eig
# 14.05.2006: diffusion returns negative values, using expm(-LL) instead (FIX)
# 13.09.2206: update for use in numpy

# 27.04.2007: diffusion now uses pade approximations to matrix exponential. Also the last 

def K_expAdj(W, normalised=True, alpha=1.0):
    """Matrix exponential of adjacency matrix, mentioned in Kandola as a general diffusion kernel. 
    """
    W = asarray(W)
    t = W.dtype.char
    if len(W.shape)!=2:
        raise ValueError, "Non-matrix input to matrix function."
    m,n = W.shape
    if t in ['F','D']:
        raise TypeError, "Complex input!"
    if normalised==True:
        T = diag( sqrt( 1./(sum(W,0))) )
        W = dot(dot(T, W), T)
    e,vr = eig(W)
    s = real(e)**2 # from eigenvalues to singularvalues
    vri = inv(vr)
    s = maximum.reduce(s) + s
    cond = {0: feps*1e3, 1: eps*1e6}[_array_precision[t]]
    cutoff = abs(cond*maximum.reduce(s))
    psigma = eye(m)
    for i in range(len(s)):
        if abs(s[i]) > cutoff:
            psigma[i,i] = .5*alpha*exp(s[i])
    
    return dot(dot(vr,psigma),vri)

def K_vonNeumann(W, normalised=True, alpha=1.0):
    """ The geometric series of path lengths.
    Returns matrix square root of pseudo inverse of the adjacency matrix.
    """
    W = asarray(W)
    t = W.dtype.char
    if len(W.shape)!=2:
        raise ValueError, "Non-matrix input to matrix function."
    m,n = W.shape
    if t in ['F','D']:
        raise TypeError, "Complex input!"
    
    if normalised==True:
        T = diag(sqrt(1./(sum(W,0))))
        W = dot(dot(T,W),T)
    e,vr = eig(W)
    vri = inv(vr)
    e = real(e)  # we only work with real pos. eigvals
    e = maximum.reduce(e) + e
    cond = {0: feps*1e3, 1: eps*1e6}[_array_precision[t]]
    cutoff = cond*maximum.reduce(e)
    psigma = zeros((m,n),t)
    for i in range(len(e)):
        if e[i] > cutoff:
            psigma[i,i] = 1.0/e[i] #these are eig.vals (=sqrt(sing.vals))
    return dot(dot(vr,psigma),vri).astype(t)

def K_laplacian(W, normalised=True, alpha=1.0):
    """ This is the matrix pseudo inverse of L.
    Also known as the average commute time matrix.
    """
    W = asarray(W)
    t = W.dtype.char
    if len(W.shape)!=2:
        raise ValueError, "Non-matrix input to matrix function."
    m,n = W.shape
    if t in ['F','D']:
        raise TypeError, "Complex input!"
    D = diag(sum(W,0))
    L = D - W
    if normalised==True:
        T = diag(sqrt(1./sum(W, 0)))
        L = dot(dot(T, L), T)
    e,vr = eig(L)
    e = real(e)
    vri = inv(vr)
    cond = {0: feps*1e3, 1: eps*1e6}[_array_precision[t]]
    cutoff = cond*maximum.reduce(e)
    psigma = zeros((m,),t) # if s close to zero -> set 1/s = 0 
    for i in range(len(e)):
        if e[i] > cutoff:
            psigma[i] = 1.0/e[i]
    K = dot(dot(vr, diag(psigma)), vri).astype(t)
    K = real(K)
    I = eye(n)
    K = (1-alpha)*I + alpha*K
    return K


def K_diffusion(W, normalised=True, alpha=1.0, beta=0.5, use_cut=False):
    """Returns diffusion kernel.
    input:
            -- W, adj. matrix
            -- normalised [True/False]
            -- alpha, [0,1] (degree of network influence)
            -- beta, [0->), (diffusion degree)
    """
    W = asarray(W)
    t = W.dtype.char
    if len(W.shape)!=2:
        raise ValueError, "Non-matrix input to matrix function."
    m, n = W.shape
    if t in ['F','D']:
        raise TypeError, "Complex input!"
    D = diag(W.sum(0))
    L = D - W
    if normalised==True:
        T = diag(sqrt(1./W.sum(0)))
        L = dot(dot(T, L), T)
    e, vr = eig(L)
    vri = inv(vr) #inv
    cond = 1.0*{0: feps*1e3, 1: eps*1e6}[_array_precision[t]]
    cutoff = 1.*abs(cond*maximum.reduce(e))
    psigma = eye(m) # if eigvals are 0 exp(0)=1 (unnecessary)
    #psigma = zeros((m,n), dtype='<f8')
    for i in range(len(e)):
        if abs(e[i]) > cutoff:
            psigma[i,i] = exp(-beta*e[i])
        #else:
        #    psigma[i,i] = 0.0
    K = real(dot(dot(vr, psigma), vri))
    I = eye(n, dtype='<f8')
    K = (1. - alpha)*I + alpha*K
    return K

def K_diffusion2(W, normalised=True, alpha=1.0, beta=0.5, ncomp=None):
    """Returns diffusion kernel, using fast pade approximation.
    input:
            -- W, adj. matrix
            -- normalised [True/False]
            -- beta, [0->), (diffusion degree)
    """
    
    D = diag(W.sum(0))
    L = D - W
    if normalised==True:
        T = diag(sqrt(1./W.sum(0)))
        L = dot(dot(T, L), T)
    return expm(-beta*L)
    
    
def K_modularity(W, alpha=1.0):
    """ Returns the matrix square root of Newmans modularity."""
    W = asarray(W)
    t = W.dtype.char
    m, n = W.shape
    d = sum(W, 0)
    m = 1.*sum(d)
    B = W - (outer(d, d)/m)
    s,v = sorted_eig(B, sort_by='lm')
    psigma = zeros( (n, n), dtype='<f8' )
    for i in range(len(s)):
        if s[i]>1e-7:
            psigma[i,i] = sqrt(s[i])
            #psigma[i,i] = s[i]
    K = dot(dot(v, psigma), v.T)
    I = eye(n)
    K = (1 - alpha)*I + alpha*K
    return K

def kernel_score(K, W):
    """Returns the modularity score.
    K -- (modularity) kernel
    W -- adjacency matrix (possibly weighted)
    """
    # normalize W (: W'W=I)
    m, n = shape(W)
    for i in range(n):
        W[:,i] = W[:,i]/norm(W[:,i])
    score = diag(dot(W, dot(K, W)) )
    tot = sum(score)
    return score, tot


def modularity_matrix(G, nodelist=None):
    if not nodelist:
        nodelist = G.nodes()
    else:
        G = NX.subgraph(G, nodelist) 
        
    A = NX.adj_matrix(G, nodelist=nodelist)
    d = atleast_2d(G.degree(nbunch=nodelist))
    m = 1.*G.number_of_edges()
    B = A - dot(d.T, d)/m
    return B
First import of chemometrics utils 2006-12-18 12:59:12 +01:00			`import os,sys`
			`from itertools import izip`
			`import networkx as NX`
			`from scipy import shape,diag,dot,asarray,sqrt,real,zeros,eye,exp,maximum,\`
Lib updates 2007-07-23 19:33:21 +02:00			`outer,maximum,sum,diag,real,atleast_2d`
First import of chemometrics utils 2006-12-18 12:59:12 +01:00			`from scipy.linalg import eig,svd,inv,expm,norm`
			`from cx_utils import sorted_eig`

			`import numpy`

Moved lots of stuff back to sandbox 2007-03-14 17:32:49 +01:00

First import of chemometrics utils 2006-12-18 12:59:12 +01:00			`eps = numpy.finfo(float).eps.item()`
			`feps = numpy.finfo(numpy.single).eps.item()`
			`_array_precision = {'f': 0, 'd': 1, 'F': 0, 'D': 1,'i': 1}`

Moved lots of stuff back to sandbox 2007-03-14 17:32:49 +01:00			`class NXUTILSException(Exception): pass`
First import of chemometrics utils 2006-12-18 12:59:12 +01:00
			`def xgraph_to_graph(G):`
			`"""Convert an Xgraph to an ordinary graph.`
			`Edge attributes, mult.edges and self-loops are lost in the process.`
			`"""`

			`GG = NX.convert.from_dict_of_lists(NX.convert.to_dict_of_lists(G))`
			`return GG`

			`def get_affinity_matrix(G, data, ids, dist='e', mask=None, weight=None, t=0, out='dist'):`
			`"""`
			`Function for calculating a general affinity matrix, based upon distances.`
			`Affiniy = 1 - distance ((10-1) 1 is far apart)`
			`INPUT`

			`data:`
			`gene expression data, type dict data[gene] = expression-vector`

			`G:`
			`The network (networkx.base.Graph object)`

			`mask:`
			`The array mask shows which data are missing. If mask[i][j]==0, then`
			`data[i][j] is missing.`

			`weights:`
			`The array weight contains the weights to be used when calculating distances.`

			`transpose:`
			`If transpose==0, then genes are clustered. If transpose==1, microarrays are`
			`clustered.`

			`dist:`
			`The character dist defines the distance function to be used:`
			`dist=='e': Euclidean distance`
			`dist=='b': City Block distance`
			`dist=='h': Harmonically summed Euclidean distance`
			`dist=='c': Pearson correlation`
			`dist=='a': absolute value of the correlation`
			`dist=='u': uncentered correlation`
			`dist=='x': absolute uncentered correlation`
			`dist=='s': Spearman's rank correlation`
			`dist=='k': Kendall's tau`
			`For other values of dist, the default (Euclidean distance) is used.`

			`OUTPUT`
			`D :`
			`Similariy matrix (nGenes x nGenes), symetric, d_ij e in [0,1]`
			`Normalized so max weight = 1.0`
			`"""`
			`try:`
			`from Bio import Cluster as CLS`
			`except:`
Moved lots of stuff back to sandbox 2007-03-14 17:32:49 +01:00			`raise NXUTILSError("Import of Biopython failed")`
			`n_var = len(data)`
			`n_samp = len(data[data.keys()[0]])`
First import of chemometrics utils 2006-12-18 12:59:12 +01:00			`X = zeros((nVar, nSamp),dtpye='<f8')`
Moved lots of stuff back to sandbox 2007-03-14 17:32:49 +01:00
			`for i, gene in enumerate(ids): #this shuld be right!!`
First import of chemometrics utils 2006-12-18 12:59:12 +01:00			`X[i,:] = data[gene]`


			`#X = transpose(X) # distancematrix needs matrix as (nGenes,nSamples)`

			`D_list = CLS.distancematrix(X, dist=dist)`
			`D = zeros((nVar,nVar),dtype='<f8')`
			`for i,row in enumerate(D_list):`
			`if i>0:`
			`D[i,:len(row)]=row`

			`D = D + D.T`
			`MAX = 30.0`
			`D_max = max(ravel(D))/MAX`
			`D_n = D/D_max #normalised (max = 10.0)`
			`D_n = (MAX+1.) - D_n #using correlation (inverse distance for dists)`

			`A = NX.adj_matrix(G, nodelist=ids)`
			`if out=='dist':`
			`return D_n*A`
			`elif out=='heat_kernel':`
			`t=1.0`
			`K = exp(-tDA)`
			`return K`
			`elif out=='complete':`
			`return D_n`
			`else:`
			`return []`

			`def remove_one_degree_nodes(G, iter=True):`
			`"""Removes all nodes with only one neighbour. These nodes does`
			`not contribute to community structure.`
			`input:`
			`G -- graph`
			`iter -- True/False iteratively remove?`
			`"""`
			`G_copy = G.copy()`
			`if iter==True:`
			`while 1:`
			`bad_nodes=[]`
			`for node in G_copy.nodes():`
			`if len(G_copy.neighbors(node))==1:`
			`bad_nodes.append(node)`
			`if len(bad_nodes)>0:`
			`G_copy.delete_nodes_from(bad_nodes)`
			`else:`
			`break`
			`else:`
			`bad_nodes=[]`
			`for ngb in G_copy.neighbors_iter():`
			`if len(G_copy.neighbors(node))==1:`
			`bad_nodes.append(node)`
			`if len(bad_nodes)>0:`
			`G_copy.delete_nodes_from(bad_nodes)`

			`print "Deleted %s nodes from network" %(len(G)-len(G_copy))`
			`return G_copy`

			`def key_players(G, n=1, with_labels=False):`
			`"""`
			`Resilince measure`
			`Identification of key nodes by fraction of nodes in`
			`disconnected subgraph when the node is removed.`

			`output:`
			`fraction of nodes disconnected when node i is removed`
			`"""`
			`i=0`
			`frac=[]`
			`labels = {}`
			`for node in G.nodes():`
			`i+=1`
			`print i`
			`T = G.copy()`
			`T.delete_node(node)`
			`n_nodes = T.number_of_nodes()`
			`sub_graphs = NX.connected_component_subgraphs(T)`
			`n = len(sub_graphs)`
			`if n>1:`
			`strong_comp = sub_graphs[0]`
			`fraction = 1.0 - 1.0*strong_comp.number_of_nodes()/n_nodes`
			`frac.append(fraction)`
			`labels[node]=fraction`

			`else:`
			`frac.append(0.0)`
			`labels[node]=0.0`

			`out = 1.0 - array(frac)`
			`if with_labels==True:`
			`return out,labels`
			`else:`
			`return out`

			`def node_weighted_adj_matrix(G, weights=None, ave_type='harmonic', with_labels=False):`
			`"""Return a weighted adjacency matrix of graph. The weights are`
			`node weights.`
			`input: G -- graph`
			`weights -- dict, keys: nodes, values: weights`
			`with_labels -- True/False, return labels?`

			`output: A -- weighted eadjacency matrix`
			`[index] -- node labels`

			`"""`
			`n=G.order()`
			`# make an dictionary that maps vertex name to position`
			`index={}`
			`count=0`
			`for node in G.nodes():`
			`index[node]=count`
			`count = count+1`

			`a = zeros((n,n))`
			`if type(G)=='networkx.xbase.XGraph':`
			`raise`
			`for head,tail in G.edges():`
			`if ave_type == 'geometric':`
			`a[index[head],index[tail]]= sqrt(weights[head]*weights[tail])`
			`a[index[tail],index[head]]= a[index[head],index[tail]]`
			`elif ave_type == 'harmonic':`
			`a[index[head],index[tail]] = mean(weights[head],weights[tail])`
			`a[index[tail],index[head]]= mean(weights[head],weights[tail])`
			`if with_labels:`
			`return a,index`
			`else:`
			`return a`

			`def weighted_adj_matrix(G, with_labels=False):`
			`"""Adjacency matrix of an XGraph whos weights are given in edges.`
			`"""`
Multiple lib changes 2007-01-25 12:58:10 +01:00			`A, labels = NX.adj_matrix(G, with_labels=True)`
First import of chemometrics utils 2006-12-18 12:59:12 +01:00			`W = A.astype('<f8')`
Multiple lib changes 2007-01-25 12:58:10 +01:00			`for orf, i in labels.items():`
			`for orf2, j in labels.items():`
			`if G.has_edge(orf, orf2):`
			`edge_weight = G.get_edge(orf, orf2)`
			`W[i,j] = edge_weight`
			`W[j,i] = edge_weight`
First import of chemometrics utils 2006-12-18 12:59:12 +01:00			`if with_labels==True:`
Multiple lib changes 2007-01-25 12:58:10 +01:00			`return W, labels`
First import of chemometrics utils 2006-12-18 12:59:12 +01:00			`else:`
			`return W`

			`def assortative_index(G):`
			`"""Ouputs two vectors: the degree and the neighbor average degree.`
			`Used to measure the assortative mixing. If the average degree is`
			`pos. correlated with the degree we know that hubs tend to connect`
			`to other hubs.`

			`input: G, graph connected!!`
			`ouput: d,mn_d: degree, and average degree of neighb.`
			`(degree sorting from degree(with_labels=True))`
			`"""`
			`d = G.degree(with_labels=True)`
			`out=[]`
			`for node in G.nodes():`
			`nn = G.neighbors(node)`
			`if len(nn)>0:`
			`nn_d = mean([float(d[i]) for i in nn])`
			`out.append((d[node], nn_d))`
			`return array(out).T`

Moved lots of stuff back to sandbox 2007-03-14 17:32:49 +01:00
First import of chemometrics utils 2006-12-18 12:59:12 +01:00			`def struct_equivalence(G,n1,n2):`
			`"""Returns the structural equivalence of a node pair. Two nodes`
			`are structural equal if they share the same neighbors.`

			`x_s = [ne(n1) union ne(n2) - ne(n1) intersection ne(n2)]/[ne(n1)`
			`union ne(n2) + ne(n1) intersection ne(n2)]`
			`ref: Brun et.al 2003`
			`"""`

			`#[ne(n1) union ne(n2) - ne(n1) intersection ne(n2`
			`s1 = set(G.neighbors(n1))`
			`s2 = set(G.neighbors(n2))`
			`num_union = len(s1.union(s2))`
			`num_intersection = len(s1.intersection(s2))`
			`if num_union & num_intersection:`
			`xs=0`
			`else:`
			`xs = (num_union - num_intersection)/(num_union + num_intersection)`
			`return xs`

			`def struct_equivalence_all(G):`
			`"""Not finnished.`
			`"""`
			`A,labels = NX.adj_matrix(G,with_labels=True)`
			`pass`

			`def hamming_distance(n1,n2):`
			`"""Not finnsihed.`
			`"""`
			`pass`

Moved lots of stuff back to sandbox 2007-03-14 17:32:49 +01:00			`def graph_corrcoeff(G, vec=None, nodelist=None, sim='corr'):`
			`"""Returns the correlation coefficient for each node. The`
			`correlation coefficient is between the node and its neighbours.`
First import of chemometrics utils 2006-12-18 12:59:12 +01:00
			`"""`
Moved lots of stuff back to sandbox 2007-03-14 17:32:49 +01:00			`if nodelist==None:`
			`nodelist=G.nodes()`
			`if vec == None:`
			`vec = G.degree(nodelist)`
			`if len(vec)!=len(nodelist):`
			`raise NXUTILSError("The node value vector is not of same length (%s) as the nodelist(%s)") %(len(vec), len(nodelist))`

			`A = NX.ad_matrix(G, nodelist=nodelist)`
			`for i, node in enumerate(nodelist):`
			`nei_i = A[i,:]==1`
			`vec_i = vec[nei_i]`
First import of chemometrics utils 2006-12-18 12:59:12 +01:00
			`def weighted_laplacian(G,with_labels=False):`
			`"""Return standard Laplacian of graph from a weighted adjacency matrix."""`
			`n= G.order()`
			`I = scipy.eye(n)`
			`A = weighted_adj_matrix(G)`
			`D = I*scipy.sum(A, 0)`
			`L = D-A`
			`if with_labels:`
			`A,index = weighted_adj_matrix(G, with_labels=True)`
			`return L, index`
			`else:`
			`return L`

Moved lots of stuff back to sandbox 2007-03-14 17:32:49 +01:00			`def grow_subnetworks(G, T2):`
Multiple lib changes 2007-01-25 12:58:10 +01:00			`"""Return the highest scoring (T2-test) subgraph og G.`

Moved lots of stuff back to sandbox 2007-03-14 17:32:49 +01:00			`Use simulated annealing to identify highly grow subgraphs.`
Multiple lib changes 2007-01-25 12:58:10 +01:00
			`ref: -- Ideker et.al (Bioinformatics 18, 2002)`
			`-- Patil and Nielsen (PNAS 2006)`

			`"""`
			`N = 1000`
			`states = [(node, False) for node in G.nodes()]`
			`t2_last = 0.0`
			`for i in xrange(N):`
			`if i==0: #assign random states`
			`states = [(state[0], True) for state in states if rand(1)>.5]`
			`sub_nodes = [state[0] for state in states if state[1]]`
			`Gsub = NX.subgraph(G, sub_nodes)`
			`Gsub = NX.connected_components_subgraphs(Gsub)[0]`
			`t2 = [T2[node] for node in Gsub]`
			`if t2>t2_last:`
			`pass`
			`else:`
			`p = numpy.exp()`


First import of chemometrics utils 2006-12-18 12:59:12 +01:00
			`"""Below are methods for calculating graph metrics`

			`Four main decompositions :`
			`0.) Adjacency diffusion kernel expm(A),`
			`1.) von neumann kernels (diagonalisation of adjacency matrix)`

			`2.) laplacian kernels (geometric series of adj.)`

			`3.) diffusion kernels (exponential series of adj.)`

			`---- Kv`
			`von_neumann : Kv = (I-alphaA)^-1 (mod: A(I-alphaA)^-1)? ,`
			`geom. series`

			`---- Kl`
			`laplacian: Kl = (I-alpha*L)^-1 , geom. series`

			`---- Kd`
			`laplacian_diffusion: Kd = expm(-alpha*L)`
			`exp. series`

			`---- Ke`
			`Exponential diffusion.`
			`Ke = expm(A) .... expm(-A)?`

			`"""`

			`# TODO:`
			`# check for numerical unstable eigenvalues and set to zero`
			`# othervise some inverses wil explode ->ok ..using pinv for inverses`
			`#`
			`# This gives results that look numerical unstable`
			`#`
			`# -- divided adj by sum(A[:]), check this one (paper by Lebart scales with number of edges)`
			`#`
			`#`
			`#`
			`# the neumann kernel is defined in Kandola to be K = A*(I-A)^-1`
			`# lowest eigenvectors are same as the highest of K = A*A ?`
			`# this needs clarification`

			`# diffusion is still wrong! ... ok`
			`# diff needs normalisation?! check the meaning of exp(-s) = exp(1/s) -L = 1/degree ... etc`
			`# Is it the negative of exp. of adj. metrix in Kandola?`
			`#`
			`# Normalised=False returns only nans (no idea why!!) ... fixed ok`

			`# 31.1: diff is ok exp(0)=1 not zero!`
			`# 07.03.2005: normalisation is ok: -> normalisation will emphasize high degree nodes`
			`# 10.03.2005: symeig is unstable an returns nans of some eigenvectors? switching back to eig`
			`# 14.05.2006: diffusion returns negative values, using expm(-LL) instead (FIX)`
			`# 13.09.2206: update for use in numpy`

Lib updates 2007-07-23 19:33:21 +02:00			`# 27.04.2007: diffusion now uses pade approximations to matrix exponential. Also the last`
First import of chemometrics utils 2006-12-18 12:59:12 +01:00
Multiple lib changes 2007-01-25 12:58:10 +01:00			`def K_expAdj(W, normalised=True, alpha=1.0):`
First import of chemometrics utils 2006-12-18 12:59:12 +01:00			`"""Matrix exponential of adjacency matrix, mentioned in Kandola as a general diffusion kernel.`
			`"""`
			`W = asarray(W)`
			`t = W.dtype.char`
			`if len(W.shape)!=2:`
			`raise ValueError, "Non-matrix input to matrix function."`
			`m,n = W.shape`
			`if t in ['F','D']:`
			`raise TypeError, "Complex input!"`
			`if normalised==True:`
			`T = diag( sqrt( 1./(sum(W,0))) )`
			`W = dot(dot(T, W), T)`
			`e,vr = eig(W)`
			`s = real(e)**2 # from eigenvalues to singularvalues`
			`vri = inv(vr)`
			`s = maximum.reduce(s) + s`
			`cond = {0: feps1e3, 1: eps1e6}[_array_precision[t]]`
			`cutoff = abs(cond*maximum.reduce(s))`
			`psigma = eye(m)`
			`for i in range(len(s)):`
			`if abs(s[i]) > cutoff:`
			`psigma[i,i] = .5alphaexp(s[i])`

			`return dot(dot(vr,psigma),vri)`

Multiple lib changes 2007-01-25 12:58:10 +01:00			`def K_vonNeumann(W, normalised=True, alpha=1.0):`
First import of chemometrics utils 2006-12-18 12:59:12 +01:00			`""" The geometric series of path lengths.`
			`Returns matrix square root of pseudo inverse of the adjacency matrix.`
			`"""`
			`W = asarray(W)`
			`t = W.dtype.char`
			`if len(W.shape)!=2:`
			`raise ValueError, "Non-matrix input to matrix function."`
			`m,n = W.shape`
			`if t in ['F','D']:`
			`raise TypeError, "Complex input!"`

			`if normalised==True:`
			`T = diag(sqrt(1./(sum(W,0))))`
			`W = dot(dot(T,W),T)`
			`e,vr = eig(W)`
			`vri = inv(vr)`
			`e = real(e) # we only work with real pos. eigvals`
			`e = maximum.reduce(e) + e`
			`cond = {0: feps1e3, 1: eps1e6}[_array_precision[t]]`
			`cutoff = cond*maximum.reduce(e)`
			`psigma = zeros((m,n),t)`
			`for i in range(len(e)):`
			`if e[i] > cutoff:`
			`psigma[i,i] = 1.0/e[i] #these are eig.vals (=sqrt(sing.vals))`
			`return dot(dot(vr,psigma),vri).astype(t)`

			`def K_laplacian(W, normalised=True, alpha=1.0):`
Lib updates 2007-07-23 19:33:21 +02:00			`""" This is the matrix pseudo inverse of L.`
			`Also known as the average commute time matrix.`
First import of chemometrics utils 2006-12-18 12:59:12 +01:00			`"""`
			`W = asarray(W)`
			`t = W.dtype.char`
			`if len(W.shape)!=2:`
			`raise ValueError, "Non-matrix input to matrix function."`
			`m,n = W.shape`
			`if t in ['F','D']:`
			`raise TypeError, "Complex input!"`
			`D = diag(sum(W,0))`
			`L = D - W`
			`if normalised==True:`
Multiple lib changes 2007-01-25 12:58:10 +01:00			`T = diag(sqrt(1./sum(W, 0)))`
			`L = dot(dot(T, L), T)`
First import of chemometrics utils 2006-12-18 12:59:12 +01:00			`e,vr = eig(L)`
			`e = real(e)`
			`vri = inv(vr)`
			`cond = {0: feps1e3, 1: eps1e6}[_array_precision[t]]`
			`cutoff = cond*maximum.reduce(e)`
			`psigma = zeros((m,),t) # if s close to zero -> set 1/s = 0`
			`for i in range(len(e)):`
			`if e[i] > cutoff:`
			`psigma[i] = 1.0/e[i]`
Multiple lib changes 2007-01-25 12:58:10 +01:00			`K = dot(dot(vr, diag(psigma)), vri).astype(t)`
First import of chemometrics utils 2006-12-18 12:59:12 +01:00			`K = real(K)`
			`I = eye(n)`
			`K = (1-alpha)I + alphaK`
			`return K`


Lib updates 2007-07-23 19:33:21 +02:00			`def K_diffusion(W, normalised=True, alpha=1.0, beta=0.5, use_cut=False):`
First import of chemometrics utils 2006-12-18 12:59:12 +01:00			`"""Returns diffusion kernel.`
			`input:`
			`-- W, adj. matrix`
			`-- normalised [True/False]`
			`-- alpha, [0,1] (degree of network influence)`
			`-- beta, [0->), (diffusion degree)`
			`"""`
			`W = asarray(W)`
			`t = W.dtype.char`
			`if len(W.shape)!=2:`
			`raise ValueError, "Non-matrix input to matrix function."`
Lib updates 2007-07-23 19:33:21 +02:00			`m, n = W.shape`
First import of chemometrics utils 2006-12-18 12:59:12 +01:00			`if t in ['F','D']:`
			`raise TypeError, "Complex input!"`
Lib updates 2007-07-23 19:33:21 +02:00			`D = diag(W.sum(0))`
			`L = D - W`
First import of chemometrics utils 2006-12-18 12:59:12 +01:00			`if normalised==True:`
Lib updates 2007-07-23 19:33:21 +02:00			`T = diag(sqrt(1./W.sum(0)))`
			`L = dot(dot(T, L), T)`
			`e, vr = eig(L)`
First import of chemometrics utils 2006-12-18 12:59:12 +01:00			`vri = inv(vr) #inv`
			`cond = 1.0{0: feps1e3, 1: eps*1e6}[_array_precision[t]]`
			`cutoff = 1.abs(condmaximum.reduce(e))`
Lib updates 2007-07-23 19:33:21 +02:00			`psigma = eye(m) # if eigvals are 0 exp(0)=1 (unnecessary)`
First import of chemometrics utils 2006-12-18 12:59:12 +01:00			`#psigma = zeros((m,n), dtype='<f8')`
			`for i in range(len(e)):`
			`if abs(e[i]) > cutoff:`
			`psigma[i,i] = exp(-beta*e[i])`
Lib updates 2007-07-23 19:33:21 +02:00			`#else:`
			`# psigma[i,i] = 0.0`
First import of chemometrics utils 2006-12-18 12:59:12 +01:00			`K = real(dot(dot(vr, psigma), vri))`
			`I = eye(n, dtype='<f8')`
			`K = (1. - alpha)I + alphaK`
			`return K`
Lib updates 2007-07-23 19:33:21 +02:00
			`def K_diffusion2(W, normalised=True, alpha=1.0, beta=0.5, ncomp=None):`
			`"""Returns diffusion kernel, using fast pade approximation.`
			`input:`
			`-- W, adj. matrix`
			`-- normalised [True/False]`
			`-- beta, [0->), (diffusion degree)`
			`"""`

			`D = diag(W.sum(0))`
			`L = D - W`
			`if normalised==True:`
			`T = diag(sqrt(1./W.sum(0)))`
			`L = dot(dot(T, L), T)`
			`return expm(-beta*L)`

First import of chemometrics utils 2006-12-18 12:59:12 +01:00
Tralala ... 2007-11-07 13:34:13 +01:00			`def K_modularity(W, alpha=1.0):`
First import of chemometrics utils 2006-12-18 12:59:12 +01:00			`""" Returns the matrix square root of Newmans modularity."""`
			`W = asarray(W)`
			`t = W.dtype.char`
			`m, n = W.shape`
			`d = sum(W, 0)`
			`m = 1.*sum(d)`
			`B = W - (outer(d, d)/m)`
			`s,v = sorted_eig(B, sort_by='lm')`
			`psigma = zeros( (n, n), dtype='<f8' )`
			`for i in range(len(s)):`
			`if s[i]>1e-7:`
			`psigma[i,i] = sqrt(s[i])`
			`#psigma[i,i] = s[i]`
			`K = dot(dot(v, psigma), v.T)`
			`I = eye(n)`
			`K = (1 - alpha)I + alphaK`
			`return K`

			`def kernel_score(K, W):`
			`"""Returns the modularity score.`
			`K -- (modularity) kernel`
			`W -- adjacency matrix (possibly weighted)`
			`"""`
			`# normalize W (: W'W=I)`
			`m, n = shape(W)`
			`for i in range(n):`
			`W[:,i] = W[:,i]/norm(W[:,i])`
			`score = diag(dot(W, dot(K, W)) )`
			`tot = sum(score)`
			`return score, tot`
Lib updates 2007-07-23 19:33:21 +02:00

			`def modularity_matrix(G, nodelist=None):`
			`if not nodelist:`
			`nodelist = G.nodes()`
			`else:`
			`G = NX.subgraph(G, nodelist)`

			`A = NX.adj_matrix(G, nodelist=nodelist)`
			`d = atleast_2d(G.degree(nbunch=nodelist))`
			`m = 1.*G.number_of_edges()`
Tralala ... 2007-11-07 13:34:13 +01:00			`B = A - dot(d.T, d)/m`
Lib updates 2007-07-23 19:33:21 +02:00			`return B`