Errors when identifers dont match shape, + whitespace
This commit is contained in:
parent
48bb47ec86
commit
ed2848beb3
@ -36,28 +36,30 @@ class Dataset:
|
||||
|
||||
data = Dataset(rand(10,20)) (generates dims and ids (no links))
|
||||
"""
|
||||
def __init__(self,array,identifiers=None,name='Unnamed dataset'):
|
||||
def __init__(self, array, identifiers=None, name='Unnamed dataset'):
|
||||
self._dims = [] #existing dimensions in this dataset
|
||||
self._map = {} # internal mapping for dataset: identifier <--> index
|
||||
self._name = name
|
||||
self._identifiers = identifiers
|
||||
self._type = 'n'
|
||||
try:
|
||||
array = atleast_2d(asarray(array))
|
||||
except:
|
||||
print "Cant cast array as numpy-array"
|
||||
return
|
||||
# vectors are column vectors
|
||||
if array.shape[0]==1:
|
||||
array = array.T
|
||||
self.shape = array.shape
|
||||
|
||||
if len(array.shape)==1:
|
||||
array = atleast_2d(asarray(array))
|
||||
# vectors are column vectors
|
||||
if array.shape[0]==1:
|
||||
array = array.T
|
||||
self.shape = array.shape
|
||||
if identifiers!=None:
|
||||
self._set_identifiers(identifiers,self._all_dims)
|
||||
identifier_shape = [len(i[1]) for i in identifiers]
|
||||
if len(identifier_shape)!=len(self.shape):
|
||||
raise ValueError, "Identifier list length must equal array dims"
|
||||
for ni, na in zip(identifier_shape, self.shape):
|
||||
if ni!=na:
|
||||
raise ValueError, "identifier-array mismatch in %s: (idents: %s, array: %s)" %(self._name, ni, na)
|
||||
self._set_identifiers(identifiers, self._all_dims)
|
||||
else:
|
||||
self._identifiers = self._create_identifiers(self.shape,self._all_dims)
|
||||
self._set_identifiers(self._identifiers,self._all_dims)
|
||||
|
||||
self._identifiers = self._create_identifiers(self.shape, self._all_dims)
|
||||
self._set_identifiers(self._identifiers, self._all_dims)
|
||||
self._array = array
|
||||
|
||||
def __iter__(self):
|
||||
@ -94,17 +96,16 @@ class Dataset:
|
||||
all_dims.add(dim_suggestion)
|
||||
return ids
|
||||
|
||||
def _set_identifiers(self,identifiers,all_dims):
|
||||
def _set_identifiers(self, identifiers, all_dims):
|
||||
"""Creates internal mapping of identifiers structure."""
|
||||
for dim,ids in identifiers:
|
||||
for dim, ids in identifiers:
|
||||
pos_map = ReverseDict()
|
||||
if dim not in self._dims:
|
||||
self._dims.append(dim)
|
||||
all_dims.add(dim)
|
||||
else:
|
||||
raise ValueError, "Dimension names must be unique whitin dataset"
|
||||
|
||||
for pos,id in enumerate(ids):
|
||||
for pos, id in enumerate(ids):
|
||||
pos_map[id] = pos
|
||||
self._map[dim] = pos_map
|
||||
|
||||
@ -121,11 +122,10 @@ class Dataset:
|
||||
"""Returns the numeric array (data) of dataset"""
|
||||
return self._array
|
||||
|
||||
def add_array(self,array):
|
||||
def add_array(self, array):
|
||||
"""Adds array as an ArrayType object.
|
||||
A one-dim array is transformed to a two-dim array (row-vector)
|
||||
"""
|
||||
|
||||
if self.shape!=array.shape:
|
||||
raise ValueError, "Input array must be of similar dimensions as dataset"
|
||||
self._array = atleast_2d(asarray(array))
|
||||
@ -138,7 +138,7 @@ class Dataset:
|
||||
"""Returns all dimensions in project"""
|
||||
return self._all_dims
|
||||
|
||||
def get_dim_name(self,axis=None):
|
||||
def get_dim_name(self, axis=None):
|
||||
"""Returns dim name for an axis, if no axis is provided it
|
||||
returns a list of dims"""
|
||||
if type(axis)==int:
|
||||
@ -178,7 +178,6 @@ class Dataset:
|
||||
You can optionally provide a list of identifiers to retrieve a
|
||||
index subset.
|
||||
|
||||
|
||||
Identifiers are the unique names (strings) for a variable in a
|
||||
given dim. Index (Indices) are the Identifiers position in a
|
||||
matrix in a given dim. If none of the input identifiers are
|
||||
@ -218,8 +217,8 @@ class CategoryDataset(Dataset):
|
||||
.
|
||||
"""
|
||||
|
||||
def __init__(self,array,identifiers=None,name='C'):
|
||||
Dataset.__init__(self,array,identifiers=identifiers,name=name)
|
||||
def __init__(self, array, identifiers=None, name='C'):
|
||||
Dataset.__init__(self, array, identifiers=identifiers, name=name)
|
||||
self.has_dictlists = False
|
||||
self._type = 'c'
|
||||
|
||||
@ -229,7 +228,7 @@ class CategoryDataset(Dataset):
|
||||
ex: data['gene_id'] = ['map0030','map0010', ...]
|
||||
"""
|
||||
data={}
|
||||
for name,ind in self._map[self.get_dim_name(0)].items():
|
||||
for name, ind in self._map[self.get_dim_name(0)].items():
|
||||
data[name] = self.get_identifiers(self.get_dim_name(1),
|
||||
list(self._array[ind,:].nonzero()))
|
||||
self._dictlists = data
|
||||
@ -240,7 +239,7 @@ class CategoryDataset(Dataset):
|
||||
"""Returns data as a list of Selection objects.
|
||||
"""
|
||||
ret_list = []
|
||||
for cat_name,ind in self._map[self.get_dim_name(1)].items():
|
||||
for cat_name, ind in self._map[self.get_dim_name(1)].items():
|
||||
ids = self.get_identifiers(self.get_dim_name(0),
|
||||
self._array[:,ind].nonzero()[0])
|
||||
selection = Selection(cat_name)
|
||||
@ -254,26 +253,26 @@ class GraphDataset(Dataset):
|
||||
|
||||
A dataset class for representing graphs using an (weighted)
|
||||
adjacency matrix
|
||||
(aka. restricted to square symmetric matrices)
|
||||
(restricted to square symmetric matrices)
|
||||
|
||||
If the library NetworkX is installed, there is support for
|
||||
representing the graph as a NetworkX.Graph, or NetworkX.XGraph structure.
|
||||
"""
|
||||
|
||||
def __init__(self,array=None,identifiers=None,shape=None,all_dims=[],**kwds):
|
||||
Dataset.__init__(self,array=array,identifiers=identifiers,name='A')
|
||||
def __init__(self, array=None, identifiers=None, shape=None, all_dims=[],**kwds):
|
||||
Dataset.__init__(self, array=array, identifiers=identifiers, name='A')
|
||||
self._graph = None
|
||||
self._type = 'g'
|
||||
|
||||
def asnetworkx(self,nx_type='graph'):
|
||||
def asnetworkx(self, nx_type='graph'):
|
||||
dim = self.get_dim_name()[0]
|
||||
ids = self.get_identifiers(dim,sorted=True)
|
||||
ids = self.get_identifiers(dim, sorted=True)
|
||||
adj_mat = self.asarray()
|
||||
G = self._graph_from_adj_matrix(adj_mat,labels=ids)
|
||||
G = self._graph_from_adj_matrix(adj_mat, labels=ids)
|
||||
self._graph = G
|
||||
return G
|
||||
|
||||
def _graph_from_adj_matrix(self,A,labels=None):
|
||||
def _graph_from_adj_matrix(self, A, labels=None):
|
||||
"""Creates a networkx graph class from adjacency
|
||||
(possibly weighted) matrix and ordered labels.
|
||||
|
||||
@ -286,7 +285,7 @@ class GraphDataset(Dataset):
|
||||
except:
|
||||
print "Failed in import of NetworkX"
|
||||
return
|
||||
m,n = A.shape# adjacency matrix must be of type that evals to true/false for neigbours
|
||||
m, n = A.shape# adjacency matrix must be of type that evals to true/false for neigbours
|
||||
if m!=n:
|
||||
raise IOError, "Adjacency matrix must be square"
|
||||
|
||||
@ -298,17 +297,18 @@ class GraphDataset(Dataset):
|
||||
if labels==None: # if labels not provided mark vertices with numbers
|
||||
labels = [str(i) for i in range(m)]
|
||||
|
||||
for nbrs,head in izip(A,labels):
|
||||
for i,nbr in enumerate(nbrs):
|
||||
for nbrs, head in izip(A, labels):
|
||||
for i, nbr in enumerate(nbrs):
|
||||
if nbr:
|
||||
tail = labels[i]
|
||||
if type(G)==nx.XGraph:
|
||||
G.add_edge(head,tail,nbr)
|
||||
G.add_edge(head, tail, nbr)
|
||||
else:
|
||||
G.add_edge(head,tail)
|
||||
G.add_edge(head, tail)
|
||||
return G
|
||||
|
||||
Dataset._all_dims=set()
|
||||
Dataset._all_dims = set()
|
||||
|
||||
|
||||
class ReverseDict(dict):
|
||||
"""
|
||||
@ -336,30 +336,34 @@ def to_file(filepath,dataset,name=None):
|
||||
"""
|
||||
if not name:
|
||||
name = dataset._name
|
||||
data = shelve.open(filepath,flag='c',protocol=2)
|
||||
data = shelve.open(filepath, flag='c', protocol=2)
|
||||
if data: #we have an append
|
||||
names = data.keys()
|
||||
if name in names:
|
||||
print "Data with name: %s overwritten" %dataset._name
|
||||
sub_data = {'array':dataset._array,'idents':dataset._identifiers,'type':dataset._type}
|
||||
|
||||
sub_data = {'array':dataset._array,
|
||||
'idents':dataset._identifiers,
|
||||
'type':dataset._type}
|
||||
data[name] = sub_data
|
||||
data.close()
|
||||
|
||||
def from_file(filepath):
|
||||
"""Read dataset from file """
|
||||
data = shelve.open(filepath,flag='r')
|
||||
"""Read dataset(s) from file """
|
||||
data = shelve.open(filepath, flag='r')
|
||||
out_data = []
|
||||
for name in data.keys():
|
||||
sub_data = data[name]
|
||||
if sub_data['type']=='c':
|
||||
out_data.append(CategoryDataset(sub_data['array'],identifiers=sub_data['idents'],name=name))
|
||||
out_data.append(CategoryDataset(sub_data['array'], identifiers=sub_data['idents'], name=name))
|
||||
elif sub_data['type']=='g':
|
||||
out_data.append(GraphDataset(sub_data['array'],identifiers=sub_data['idents'],name=name))
|
||||
out_data.append(GraphDataset(sub_data['array'], identifiers=sub_data['idents'], name=name))
|
||||
else:
|
||||
out_data.append(Dataset(sub_data['array'],identifiers=sub_data['idents'],name=name))
|
||||
out_data.append(Dataset(sub_data['array'], identifiers=sub_data['idents'], name=name))
|
||||
|
||||
return out_data
|
||||
|
||||
|
||||
|
||||
class Selection(dict):
|
||||
"""Handles selected identifiers along each dimension of a dataset"""
|
||||
|
||||
|
Reference in New Issue
Block a user