import os,sys
from itertools import izip
import networkx as NX
from scipy import shape,diag,dot,asarray,sqrt,real,zeros,eye,exp,maximum,\
     outer,maximum,sum,diag,real,atleast_2d
from scipy.linalg import eig,svd,inv,expm,norm
from cx_utils import sorted_eig

import numpy


eps = numpy.finfo(float).eps.item()
feps = numpy.finfo(numpy.single).eps.item()
_array_precision = {'f': 0, 'd': 1, 'F': 0, 'D': 1,'i': 1}

class NXUTILSException(Exception): pass

def xgraph_to_graph(G):
    """Convert an Xgraph to an ordinary graph.
    Edge attributes, mult.edges and self-loops are lost in the process.
    """
    
    GG = NX.convert.from_dict_of_lists(NX.convert.to_dict_of_lists(G))
    return GG

def get_affinity_matrix(G, data, ids, dist='e', mask=None, weight=None, t=0, out='dist'):
    """
    Function for calculating a general affinity matrix, based upon distances.
    Affiniy = 1 - distance ((10-1) 1 is far apart)
    INPUT

    data:
    gene expression data, type dict data[gene] = expression-vector

    G:
    The network (networkx.base.Graph object)
    
    mask:
    The array mask shows which data are missing. If mask[i][j]==0, then
    data[i][j] is missing.

    weights:
    The array weight contains the weights to be used when calculating distances.

    transpose:
    If transpose==0, then genes are clustered. If transpose==1, microarrays are
    clustered.

    dist:
    The character dist defines the distance function to be used:
    dist=='e': Euclidean distance
    dist=='b': City Block distance
    dist=='h': Harmonically summed Euclidean distance
    dist=='c': Pearson correlation
    dist=='a': absolute value of the correlation
    dist=='u': uncentered correlation
    dist=='x': absolute uncentered correlation
    dist=='s': Spearman's rank correlation
    dist=='k': Kendall's tau
    For other values of dist, the default (Euclidean distance) is used.

    OUTPUT
    D :
    Similariy matrix (nGenes x nGenes), symetric, d_ij e in [0,1]
    Normalized so max weight = 1.0
    """
    try:
        from Bio import Cluster as CLS
    except:
        raise NXUTILSError("Import of Biopython failed")
    n_var = len(data)
    n_samp = len(data[data.keys()[0]])
    X = zeros((nVar, nSamp),dtpye='<f8')

    for i, gene in enumerate(ids): #this shuld be right!!
        X[i,:] = data[gene]
    
    
    #X = transpose(X) # distancematrix needs matrix as (nGenes,nSamples)
    
    D_list  = CLS.distancematrix(X, dist=dist)
    D = zeros((nVar,nVar),dtype='<f8')
    for i,row in enumerate(D_list):
        if i>0:
            D[i,:len(row)]=row

    D = D + D.T
    MAX = 30.0
    D_max = max(ravel(D))/MAX
    D_n = D/D_max #normalised (max = 10.0)
    D_n = (MAX+1.) - D_n #using correlation (inverse distance for dists)
    
    A = NX.adj_matrix(G, nodelist=ids)
    if out=='dist':
        return D_n*A
    elif out=='heat_kernel':
        t=1.0
        K = exp(-t*D*A)
        return K
    elif out=='complete':
        return D_n
    else:
        return []

def remove_one_degree_nodes(G, iter=True):
    """Removes all nodes with only one neighbour.  These nodes does
    not contribute to community structure.
    input:
             G -- graph
             iter -- True/False iteratively remove?
    """
    G_copy = G.copy()
    if iter==True:
        while 1:
            bad_nodes=[]
            for node in G_copy.nodes():
                if len(G_copy.neighbors(node))==1:
                    bad_nodes.append(node)
            if len(bad_nodes)>0:
                G_copy.delete_nodes_from(bad_nodes)
            else:
                break
    else:
       bad_nodes=[]
       for ngb in G_copy.neighbors_iter():
           if len(G_copy.neighbors(node))==1:
               bad_nodes.append(node)
           if len(bad_nodes)>0:
               G_copy.delete_nodes_from(bad_nodes)

    print "Deleted %s nodes from network" %(len(G)-len(G_copy))
    return G_copy

def key_players(G, n=1, with_labels=False):
    """
    Resilince measure
    Identification of key nodes by fraction of nodes in
    disconnected subgraph when the node is removed.

    output:
           fraction of nodes disconnected when node i is removed
    """
    i=0
    frac=[]
    labels = {}
    for node in G.nodes():
        i+=1
        print i
        T = G.copy()
        T.delete_node(node)
        n_nodes = T.number_of_nodes()
        sub_graphs = NX.connected_component_subgraphs(T)
        n = len(sub_graphs)
        if n>1:
            strong_comp = sub_graphs[0]
            fraction = 1.0 - 1.0*strong_comp.number_of_nodes()/n_nodes
            frac.append(fraction)
            labels[node]=fraction
            
        else:
            frac.append(0.0)
            labels[node]=0.0

    out = 1.0 - array(frac)
    if with_labels==True:
        return out,labels
    else:
        return out

def node_weighted_adj_matrix(G, weights=None, ave_type='harmonic', with_labels=False):
    """Return a weighted adjacency matrix of graph. The weights are
    node weights.
    input: G -- graph
           weights -- dict, keys: nodes, values: weights
           with_labels -- True/False, return labels?

    output: A -- weighted eadjacency matrix
            [index] -- node labels 
    
    """
    n=G.order()
    # make an dictionary that maps vertex name to position
    index={}
    count=0
    for node in G.nodes():
        index[node]=count
        count = count+1
  
    a = zeros((n,n))
    if type(G)=='networkx.xbase.XGraph':
        raise
    for head,tail in G.edges():
        if ave_type == 'geometric':
            a[index[head],index[tail]]= sqrt(weights[head]*weights[tail])
            a[index[tail],index[head]]= a[index[head],index[tail]]
        elif ave_type == 'harmonic':
            a[index[head],index[tail]] = mean(weights[head],weights[tail])
            a[index[tail],index[head]]= mean(weights[head],weights[tail])
    if with_labels:
        return a,index
    else:
        return a            

def weighted_adj_matrix(G, with_labels=False):
    """Adjacency matrix of an XGraph whos weights are given in edges.
    """
    A, labels = NX.adj_matrix(G, with_labels=True)
    W = A.astype('<f8')
    for orf, i in labels.items():
        for orf2, j in labels.items():
            if G.has_edge(orf, orf2):
                edge_weight = G.get_edge(orf, orf2)
                W[i,j] = edge_weight
                W[j,i] = edge_weight
    if with_labels==True:
        return W, labels
    else:
        return W

def assortative_index(G):
    """Ouputs two vectors: the degree and the neighbor average degree.
    Used to measure the assortative mixing.  If the average degree is
    pos. correlated with the degree we know that hubs tend to connect
    to other hubs.

    input: G, graph connected!!
    ouput: d,mn_d: degree, and average degree of neighb.
    (degree sorting from degree(with_labels=True))
    """
    d = G.degree(with_labels=True)
    out=[]
    for node in G.nodes():
        nn = G.neighbors(node)
        if len(nn)>0:
            nn_d = mean([float(d[i]) for i in nn])
            out.append((d[node], nn_d))
    return array(out).T


def struct_equivalence(G,n1,n2):
    """Returns the structural equivalence of a node pair.  Two nodes
    are structural equal if they share the same neighbors.

    x_s = [ne(n1) union ne(n2) - ne(n1) intersection ne(n2)]/[ne(n1)
    union ne(n2) + ne(n1) intersection ne(n2)]
    ref: Brun et.al 2003
    """

    #[ne(n1) union ne(n2) - ne(n1) intersection ne(n2
    s1 = set(G.neighbors(n1))
    s2 = set(G.neighbors(n2))
    num_union = len(s1.union(s2))
    num_intersection = len(s1.intersection(s2))
    if num_union & num_intersection:
        xs=0
    else:
        xs = (num_union - num_intersection)/(num_union + num_intersection)
    return xs

def struct_equivalence_all(G):
    """Not finnished.
    """
    A,labels = NX.adj_matrix(G,with_labels=True)
    pass

def hamming_distance(n1,n2):
    """Not finnsihed.
    """
    pass

def graph_corrcoeff(G, vec=None, nodelist=None, sim='corr'):
    """Returns the correlation coefficient for each node. The
    correlation coefficient is between the node and its neighbours.

    """
    if nodelist==None:
        nodelist=G.nodes()
    if vec == None:
        vec = G.degree(nodelist)
    if len(vec)!=len(nodelist):
        raise NXUTILSError("The node value vector is not of same length (%s) as the nodelist(%s)") %(len(vec), len(nodelist))

    A = NX.ad_matrix(G, nodelist=nodelist)
    for i, node in enumerate(nodelist):
        nei_i = A[i,:]==1
        vec_i = vec[nei_i]

def weighted_laplacian(G,with_labels=False):
    """Return standard Laplacian of graph from a weighted adjacency matrix."""
    n= G.order()
    I = scipy.eye(n)
    A = weighted_adj_matrix(G)
    D = I*scipy.sum(A, 0)
    L = D-A
    if with_labels:
        A,index = weighted_adj_matrix(G, with_labels=True)
	return L, index
    else:	
        return L            

def grow_subnetworks(G, T2):
    """Return the highest scoring (T2-test) subgraph og G.

    Use simulated annealing to identify highly grow subgraphs.

    ref: -- Ideker et.al (Bioinformatics 18, 2002)
         -- Patil and Nielsen (PNAS 2006)
    
    """
    N = 1000
    states = [(node, False) for node in G.nodes()]
    t2_last = 0.0
    for i in xrange(N):
        if i==0: #assign random states
            states = [(state[0], True) for state in states if rand(1)>.5]
        sub_nodes = [state[0] for state in states if state[1]]
        Gsub = NX.subgraph(G, sub_nodes)
        Gsub = NX.connected_components_subgraphs(Gsub)[0]
        t2 = [T2[node] for node in Gsub]
        if t2>t2_last:
            pass
        else:
            p = numpy.exp()
            
    
"""Below are methods for calculating graph metrics

Four main decompositions :
0.) Adjacency diffusion kernel expm(A),
1.) von neumann kernels (diagonalisation of adjacency matrix)

2.) laplacian kernels (geometric series of adj.)

3.) diffusion kernels (exponential series of adj.)

---- Kv
von_neumann : Kv = (I-alpha*A)^-1 (mod: A(I-alpha*A)^-1)? ,
geom. series

---- Kl
laplacian: Kl = (I-alpha*L)^-1 , geom. series

---- Kd
laplacian_diffusion: Kd = expm(-alpha*L)
exp. series

---- Ke
Exponential diffusion.
Ke = expm(A) .... expm(-A)?

"""

# TODO:
# check for numerical unstable eigenvalues and set to zero
# othervise some inverses wil explode ->ok ..using pinv for inverses
#
# This gives results that look numerical unstable
#
# -- divided adj by sum(A[:]), check this one (paper by Lebart scales with number of edges)
#
#
#
# the neumann kernel is defined in Kandola to be K = A*(I-A)^-1
# lowest eigenvectors are same as the highest of K = A*A ?
# this needs clarification

# diffusion is still wrong! ... ok
# diff needs normalisation?! check the meaning of exp(-s) = exp(1/s) -L = 1/degree ... etc
# Is it the negative of exp. of adj. metrix in Kandola?
#
# Normalised=False returns only nans (no idea why!!) ... fixed ok

# 31.1: diff is ok exp(0)=1 not zero!
# 07.03.2005: normalisation is ok: -> normalisation will emphasize high degree nodes
# 10.03.2005: symeig is unstable an returns nans of some eigenvectors? switching back to eig
# 14.05.2006: diffusion returns negative values, using expm(-LL) instead (FIX)
# 13.09.2206: update for use in numpy

# 27.04.2007: diffusion now uses pade approximations to matrix exponential. Also the last 

def K_expAdj(W, normalised=True, alpha=1.0):
    """Matrix exponential of adjacency matrix, mentioned in Kandola as a general diffusion kernel. 
    """
    W = asarray(W)
    t = W.dtype.char
    if len(W.shape)!=2:
        raise ValueError, "Non-matrix input to matrix function."
    m,n = W.shape
    if t in ['F','D']:
        raise TypeError, "Complex input!"
    if normalised==True:
        T = diag( sqrt( 1./(sum(W,0))) )
        W = dot(dot(T, W), T)
    e,vr = eig(W)
    s = real(e)**2 # from eigenvalues to singularvalues
    vri = inv(vr)
    s = maximum.reduce(s) + s
    cond = {0: feps*1e3, 1: eps*1e6}[_array_precision[t]]
    cutoff = abs(cond*maximum.reduce(s))
    psigma = eye(m)
    for i in range(len(s)):
        if abs(s[i]) > cutoff:
            psigma[i,i] = .5*alpha*exp(s[i])
    
    return dot(dot(vr,psigma),vri)

def K_vonNeumann(W, normalised=True, alpha=1.0):
    """ The geometric series of path lengths.
    Returns matrix square root of pseudo inverse of the adjacency matrix.
    """
    W = asarray(W)
    t = W.dtype.char
    if len(W.shape)!=2:
        raise ValueError, "Non-matrix input to matrix function."
    m,n = W.shape
    if t in ['F','D']:
        raise TypeError, "Complex input!"
    
    if normalised==True:
        T = diag(sqrt(1./(sum(W,0))))
        W = dot(dot(T,W),T)
    e,vr = eig(W)
    vri = inv(vr)
    e = real(e)  # we only work with real pos. eigvals
    e = maximum.reduce(e) + e
    cond = {0: feps*1e3, 1: eps*1e6}[_array_precision[t]]
    cutoff = cond*maximum.reduce(e)
    psigma = zeros((m,n),t)
    for i in range(len(e)):
        if e[i] > cutoff:
            psigma[i,i] = 1.0/e[i] #these are eig.vals (=sqrt(sing.vals))
    return dot(dot(vr,psigma),vri).astype(t)

def K_laplacian(W, normalised=True, alpha=1.0):
    """ This is the matrix pseudo inverse of L.
    Also known as the average commute time matrix.
    """
    W = asarray(W)
    t = W.dtype.char
    if len(W.shape)!=2:
        raise ValueError, "Non-matrix input to matrix function."
    m,n = W.shape
    if t in ['F','D']:
        raise TypeError, "Complex input!"
    D = diag(sum(W,0))
    L = D - W
    if normalised==True:
        T = diag(sqrt(1./sum(W, 0)))
        L = dot(dot(T, L), T)
    e,vr = eig(L)
    e = real(e)
    vri = inv(vr)
    cond = {0: feps*1e3, 1: eps*1e6}[_array_precision[t]]
    cutoff = cond*maximum.reduce(e)
    psigma = zeros((m,),t) # if s close to zero -> set 1/s = 0 
    for i in range(len(e)):
        if e[i] > cutoff:
            psigma[i] = 1.0/e[i]
    K = dot(dot(vr, diag(psigma)), vri).astype(t)
    K = real(K)
    I = eye(n)
    K = (1-alpha)*I + alpha*K
    return K


def K_diffusion(W, normalised=True, alpha=1.0, beta=0.5, use_cut=False):
    """Returns diffusion kernel.
    input:
            -- W, adj. matrix
            -- normalised [True/False]
            -- alpha, [0,1] (degree of network influence)
            -- beta, [0->), (diffusion degree)
    """
    W = asarray(W)
    t = W.dtype.char
    if len(W.shape)!=2:
        raise ValueError, "Non-matrix input to matrix function."
    m, n = W.shape
    if t in ['F','D']:
        raise TypeError, "Complex input!"
    D = diag(W.sum(0))
    L = D - W
    if normalised==True:
        T = diag(sqrt(1./W.sum(0)))
        L = dot(dot(T, L), T)
    e, vr = eig(L)
    vri = inv(vr) #inv
    cond = 1.0*{0: feps*1e3, 1: eps*1e6}[_array_precision[t]]
    cutoff = 1.*abs(cond*maximum.reduce(e))
    psigma = eye(m) # if eigvals are 0 exp(0)=1 (unnecessary)
    #psigma = zeros((m,n), dtype='<f8')
    for i in range(len(e)):
        if abs(e[i]) > cutoff:
            psigma[i,i] = exp(-beta*e[i])
        #else:
        #    psigma[i,i] = 0.0
    K = real(dot(dot(vr, psigma), vri))
    I = eye(n, dtype='<f8')
    K = (1. - alpha)*I + alpha*K
    return K

def K_diffusion2(W, normalised=True, alpha=1.0, beta=0.5, ncomp=None):
    """Returns diffusion kernel, using fast pade approximation.
    input:
            -- W, adj. matrix
            -- normalised [True/False]
            -- beta, [0->), (diffusion degree)
    """
    
    D = diag(W.sum(0))
    L = D - W
    if normalised==True:
        T = diag(sqrt(1./W.sum(0)))
        L = dot(dot(T, L), T)
    return expm(-beta*L)
    
    
def K_modularity(W, alpha=1.0):
    """ Returns the matrix square root of Newmans modularity."""
    W = asarray(W)
    t = W.dtype.char
    m, n = W.shape
    d = sum(W, 0)
    m = 1.*sum(d)
    B = W - (outer(d, d)/m)
    s,v = sorted_eig(B, sort_by='lm')
    psigma = zeros( (n, n), dtype='<f8' )
    for i in range(len(s)):
        if s[i]>1e-7:
            psigma[i,i] = sqrt(s[i])
            #psigma[i,i] = s[i]
    K = dot(dot(v, psigma), v.T)
    I = eye(n)
    K = (1 - alpha)*I + alpha*K
    return K

def kernel_score(K, W):
    """Returns the modularity score.
    K -- (modularity) kernel
    W -- adjacency matrix (possibly weighted)
    """
    # normalize W (: W'W=I)
    m, n = shape(W)
    for i in range(n):
        W[:,i] = W[:,i]/norm(W[:,i])
    score = diag(dot(W, dot(K, W)) )
    tot = sum(score)
    return score, tot


def modularity_matrix(G, nodelist=None):
    if not nodelist:
        nodelist = G.nodes()
    else:
        G = NX.subgraph(G, nodelist) 
        
    A = NX.adj_matrix(G, nodelist=nodelist)
    d = atleast_2d(G.degree(nbunch=nodelist))
    m = 1.*G.number_of_edges()
    B = A - dot(d.T, d)/m
    return B