Errors when identifers dont match shape, + whitespace
This commit is contained in:
parent
48bb47ec86
commit
ed2848beb3
|
@ -36,28 +36,30 @@ class Dataset:
|
||||||
|
|
||||||
data = Dataset(rand(10,20)) (generates dims and ids (no links))
|
data = Dataset(rand(10,20)) (generates dims and ids (no links))
|
||||||
"""
|
"""
|
||||||
def __init__(self,array,identifiers=None,name='Unnamed dataset'):
|
def __init__(self, array, identifiers=None, name='Unnamed dataset'):
|
||||||
self._dims = [] #existing dimensions in this dataset
|
self._dims = [] #existing dimensions in this dataset
|
||||||
self._map = {} # internal mapping for dataset: identifier <--> index
|
self._map = {} # internal mapping for dataset: identifier <--> index
|
||||||
self._name = name
|
self._name = name
|
||||||
self._identifiers = identifiers
|
self._identifiers = identifiers
|
||||||
self._type = 'n'
|
self._type = 'n'
|
||||||
try:
|
|
||||||
|
if len(array.shape)==1:
|
||||||
array = atleast_2d(asarray(array))
|
array = atleast_2d(asarray(array))
|
||||||
except:
|
|
||||||
print "Cant cast array as numpy-array"
|
|
||||||
return
|
|
||||||
# vectors are column vectors
|
# vectors are column vectors
|
||||||
if array.shape[0]==1:
|
if array.shape[0]==1:
|
||||||
array = array.T
|
array = array.T
|
||||||
self.shape = array.shape
|
self.shape = array.shape
|
||||||
|
|
||||||
if identifiers!=None:
|
if identifiers!=None:
|
||||||
self._set_identifiers(identifiers,self._all_dims)
|
identifier_shape = [len(i[1]) for i in identifiers]
|
||||||
|
if len(identifier_shape)!=len(self.shape):
|
||||||
|
raise ValueError, "Identifier list length must equal array dims"
|
||||||
|
for ni, na in zip(identifier_shape, self.shape):
|
||||||
|
if ni!=na:
|
||||||
|
raise ValueError, "identifier-array mismatch in %s: (idents: %s, array: %s)" %(self._name, ni, na)
|
||||||
|
self._set_identifiers(identifiers, self._all_dims)
|
||||||
else:
|
else:
|
||||||
self._identifiers = self._create_identifiers(self.shape,self._all_dims)
|
self._identifiers = self._create_identifiers(self.shape, self._all_dims)
|
||||||
self._set_identifiers(self._identifiers,self._all_dims)
|
self._set_identifiers(self._identifiers, self._all_dims)
|
||||||
|
|
||||||
self._array = array
|
self._array = array
|
||||||
|
|
||||||
def __iter__(self):
|
def __iter__(self):
|
||||||
|
@ -94,17 +96,16 @@ class Dataset:
|
||||||
all_dims.add(dim_suggestion)
|
all_dims.add(dim_suggestion)
|
||||||
return ids
|
return ids
|
||||||
|
|
||||||
def _set_identifiers(self,identifiers,all_dims):
|
def _set_identifiers(self, identifiers, all_dims):
|
||||||
"""Creates internal mapping of identifiers structure."""
|
"""Creates internal mapping of identifiers structure."""
|
||||||
for dim,ids in identifiers:
|
for dim, ids in identifiers:
|
||||||
pos_map = ReverseDict()
|
pos_map = ReverseDict()
|
||||||
if dim not in self._dims:
|
if dim not in self._dims:
|
||||||
self._dims.append(dim)
|
self._dims.append(dim)
|
||||||
all_dims.add(dim)
|
all_dims.add(dim)
|
||||||
else:
|
else:
|
||||||
raise ValueError, "Dimension names must be unique whitin dataset"
|
raise ValueError, "Dimension names must be unique whitin dataset"
|
||||||
|
for pos, id in enumerate(ids):
|
||||||
for pos,id in enumerate(ids):
|
|
||||||
pos_map[id] = pos
|
pos_map[id] = pos
|
||||||
self._map[dim] = pos_map
|
self._map[dim] = pos_map
|
||||||
|
|
||||||
|
@ -121,11 +122,10 @@ class Dataset:
|
||||||
"""Returns the numeric array (data) of dataset"""
|
"""Returns the numeric array (data) of dataset"""
|
||||||
return self._array
|
return self._array
|
||||||
|
|
||||||
def add_array(self,array):
|
def add_array(self, array):
|
||||||
"""Adds array as an ArrayType object.
|
"""Adds array as an ArrayType object.
|
||||||
A one-dim array is transformed to a two-dim array (row-vector)
|
A one-dim array is transformed to a two-dim array (row-vector)
|
||||||
"""
|
"""
|
||||||
|
|
||||||
if self.shape!=array.shape:
|
if self.shape!=array.shape:
|
||||||
raise ValueError, "Input array must be of similar dimensions as dataset"
|
raise ValueError, "Input array must be of similar dimensions as dataset"
|
||||||
self._array = atleast_2d(asarray(array))
|
self._array = atleast_2d(asarray(array))
|
||||||
|
@ -138,7 +138,7 @@ class Dataset:
|
||||||
"""Returns all dimensions in project"""
|
"""Returns all dimensions in project"""
|
||||||
return self._all_dims
|
return self._all_dims
|
||||||
|
|
||||||
def get_dim_name(self,axis=None):
|
def get_dim_name(self, axis=None):
|
||||||
"""Returns dim name for an axis, if no axis is provided it
|
"""Returns dim name for an axis, if no axis is provided it
|
||||||
returns a list of dims"""
|
returns a list of dims"""
|
||||||
if type(axis)==int:
|
if type(axis)==int:
|
||||||
|
@ -178,7 +178,6 @@ class Dataset:
|
||||||
You can optionally provide a list of identifiers to retrieve a
|
You can optionally provide a list of identifiers to retrieve a
|
||||||
index subset.
|
index subset.
|
||||||
|
|
||||||
|
|
||||||
Identifiers are the unique names (strings) for a variable in a
|
Identifiers are the unique names (strings) for a variable in a
|
||||||
given dim. Index (Indices) are the Identifiers position in a
|
given dim. Index (Indices) are the Identifiers position in a
|
||||||
matrix in a given dim. If none of the input identifiers are
|
matrix in a given dim. If none of the input identifiers are
|
||||||
|
@ -218,8 +217,8 @@ class CategoryDataset(Dataset):
|
||||||
.
|
.
|
||||||
"""
|
"""
|
||||||
|
|
||||||
def __init__(self,array,identifiers=None,name='C'):
|
def __init__(self, array, identifiers=None, name='C'):
|
||||||
Dataset.__init__(self,array,identifiers=identifiers,name=name)
|
Dataset.__init__(self, array, identifiers=identifiers, name=name)
|
||||||
self.has_dictlists = False
|
self.has_dictlists = False
|
||||||
self._type = 'c'
|
self._type = 'c'
|
||||||
|
|
||||||
|
@ -229,7 +228,7 @@ class CategoryDataset(Dataset):
|
||||||
ex: data['gene_id'] = ['map0030','map0010', ...]
|
ex: data['gene_id'] = ['map0030','map0010', ...]
|
||||||
"""
|
"""
|
||||||
data={}
|
data={}
|
||||||
for name,ind in self._map[self.get_dim_name(0)].items():
|
for name, ind in self._map[self.get_dim_name(0)].items():
|
||||||
data[name] = self.get_identifiers(self.get_dim_name(1),
|
data[name] = self.get_identifiers(self.get_dim_name(1),
|
||||||
list(self._array[ind,:].nonzero()))
|
list(self._array[ind,:].nonzero()))
|
||||||
self._dictlists = data
|
self._dictlists = data
|
||||||
|
@ -240,7 +239,7 @@ class CategoryDataset(Dataset):
|
||||||
"""Returns data as a list of Selection objects.
|
"""Returns data as a list of Selection objects.
|
||||||
"""
|
"""
|
||||||
ret_list = []
|
ret_list = []
|
||||||
for cat_name,ind in self._map[self.get_dim_name(1)].items():
|
for cat_name, ind in self._map[self.get_dim_name(1)].items():
|
||||||
ids = self.get_identifiers(self.get_dim_name(0),
|
ids = self.get_identifiers(self.get_dim_name(0),
|
||||||
self._array[:,ind].nonzero()[0])
|
self._array[:,ind].nonzero()[0])
|
||||||
selection = Selection(cat_name)
|
selection = Selection(cat_name)
|
||||||
|
@ -254,26 +253,26 @@ class GraphDataset(Dataset):
|
||||||
|
|
||||||
A dataset class for representing graphs using an (weighted)
|
A dataset class for representing graphs using an (weighted)
|
||||||
adjacency matrix
|
adjacency matrix
|
||||||
(aka. restricted to square symmetric matrices)
|
(restricted to square symmetric matrices)
|
||||||
|
|
||||||
If the library NetworkX is installed, there is support for
|
If the library NetworkX is installed, there is support for
|
||||||
representing the graph as a NetworkX.Graph, or NetworkX.XGraph structure.
|
representing the graph as a NetworkX.Graph, or NetworkX.XGraph structure.
|
||||||
"""
|
"""
|
||||||
|
|
||||||
def __init__(self,array=None,identifiers=None,shape=None,all_dims=[],**kwds):
|
def __init__(self, array=None, identifiers=None, shape=None, all_dims=[],**kwds):
|
||||||
Dataset.__init__(self,array=array,identifiers=identifiers,name='A')
|
Dataset.__init__(self, array=array, identifiers=identifiers, name='A')
|
||||||
self._graph = None
|
self._graph = None
|
||||||
self._type = 'g'
|
self._type = 'g'
|
||||||
|
|
||||||
def asnetworkx(self,nx_type='graph'):
|
def asnetworkx(self, nx_type='graph'):
|
||||||
dim = self.get_dim_name()[0]
|
dim = self.get_dim_name()[0]
|
||||||
ids = self.get_identifiers(dim,sorted=True)
|
ids = self.get_identifiers(dim, sorted=True)
|
||||||
adj_mat = self.asarray()
|
adj_mat = self.asarray()
|
||||||
G = self._graph_from_adj_matrix(adj_mat,labels=ids)
|
G = self._graph_from_adj_matrix(adj_mat, labels=ids)
|
||||||
self._graph = G
|
self._graph = G
|
||||||
return G
|
return G
|
||||||
|
|
||||||
def _graph_from_adj_matrix(self,A,labels=None):
|
def _graph_from_adj_matrix(self, A, labels=None):
|
||||||
"""Creates a networkx graph class from adjacency
|
"""Creates a networkx graph class from adjacency
|
||||||
(possibly weighted) matrix and ordered labels.
|
(possibly weighted) matrix and ordered labels.
|
||||||
|
|
||||||
|
@ -286,7 +285,7 @@ class GraphDataset(Dataset):
|
||||||
except:
|
except:
|
||||||
print "Failed in import of NetworkX"
|
print "Failed in import of NetworkX"
|
||||||
return
|
return
|
||||||
m,n = A.shape# adjacency matrix must be of type that evals to true/false for neigbours
|
m, n = A.shape# adjacency matrix must be of type that evals to true/false for neigbours
|
||||||
if m!=n:
|
if m!=n:
|
||||||
raise IOError, "Adjacency matrix must be square"
|
raise IOError, "Adjacency matrix must be square"
|
||||||
|
|
||||||
|
@ -298,17 +297,18 @@ class GraphDataset(Dataset):
|
||||||
if labels==None: # if labels not provided mark vertices with numbers
|
if labels==None: # if labels not provided mark vertices with numbers
|
||||||
labels = [str(i) for i in range(m)]
|
labels = [str(i) for i in range(m)]
|
||||||
|
|
||||||
for nbrs,head in izip(A,labels):
|
for nbrs, head in izip(A, labels):
|
||||||
for i,nbr in enumerate(nbrs):
|
for i, nbr in enumerate(nbrs):
|
||||||
if nbr:
|
if nbr:
|
||||||
tail = labels[i]
|
tail = labels[i]
|
||||||
if type(G)==nx.XGraph:
|
if type(G)==nx.XGraph:
|
||||||
G.add_edge(head,tail,nbr)
|
G.add_edge(head, tail, nbr)
|
||||||
else:
|
else:
|
||||||
G.add_edge(head,tail)
|
G.add_edge(head, tail)
|
||||||
return G
|
return G
|
||||||
|
|
||||||
Dataset._all_dims=set()
|
Dataset._all_dims = set()
|
||||||
|
|
||||||
|
|
||||||
class ReverseDict(dict):
|
class ReverseDict(dict):
|
||||||
"""
|
"""
|
||||||
|
@ -336,30 +336,34 @@ def to_file(filepath,dataset,name=None):
|
||||||
"""
|
"""
|
||||||
if not name:
|
if not name:
|
||||||
name = dataset._name
|
name = dataset._name
|
||||||
data = shelve.open(filepath,flag='c',protocol=2)
|
data = shelve.open(filepath, flag='c', protocol=2)
|
||||||
if data: #we have an append
|
if data: #we have an append
|
||||||
names = data.keys()
|
names = data.keys()
|
||||||
if name in names:
|
if name in names:
|
||||||
print "Data with name: %s overwritten" %dataset._name
|
print "Data with name: %s overwritten" %dataset._name
|
||||||
sub_data = {'array':dataset._array,'idents':dataset._identifiers,'type':dataset._type}
|
|
||||||
|
sub_data = {'array':dataset._array,
|
||||||
|
'idents':dataset._identifiers,
|
||||||
|
'type':dataset._type}
|
||||||
data[name] = sub_data
|
data[name] = sub_data
|
||||||
data.close()
|
data.close()
|
||||||
|
|
||||||
def from_file(filepath):
|
def from_file(filepath):
|
||||||
"""Read dataset from file """
|
"""Read dataset(s) from file """
|
||||||
data = shelve.open(filepath,flag='r')
|
data = shelve.open(filepath, flag='r')
|
||||||
out_data = []
|
out_data = []
|
||||||
for name in data.keys():
|
for name in data.keys():
|
||||||
sub_data = data[name]
|
sub_data = data[name]
|
||||||
if sub_data['type']=='c':
|
if sub_data['type']=='c':
|
||||||
out_data.append(CategoryDataset(sub_data['array'],identifiers=sub_data['idents'],name=name))
|
out_data.append(CategoryDataset(sub_data['array'], identifiers=sub_data['idents'], name=name))
|
||||||
elif sub_data['type']=='g':
|
elif sub_data['type']=='g':
|
||||||
out_data.append(GraphDataset(sub_data['array'],identifiers=sub_data['idents'],name=name))
|
out_data.append(GraphDataset(sub_data['array'], identifiers=sub_data['idents'], name=name))
|
||||||
else:
|
else:
|
||||||
out_data.append(Dataset(sub_data['array'],identifiers=sub_data['idents'],name=name))
|
out_data.append(Dataset(sub_data['array'], identifiers=sub_data['idents'], name=name))
|
||||||
|
|
||||||
return out_data
|
return out_data
|
||||||
|
|
||||||
|
|
||||||
class Selection(dict):
|
class Selection(dict):
|
||||||
"""Handles selected identifiers along each dimension of a dataset"""
|
"""Handles selected identifiers along each dimension of a dataset"""
|
||||||
|
|
||||||
|
|
Reference in New Issue