Projects/laydi
Projects
/
laydi
Archived
7
0
Fork 0

Errors when identifers dont match shape, + whitespace

This commit is contained in:
Arnar Flatberg 2007-01-31 11:54:54 +00:00
parent 48bb47ec86
commit ed2848beb3
1 changed files with 51 additions and 47 deletions

View File

@ -36,28 +36,30 @@ class Dataset:
data = Dataset(rand(10,20)) (generates dims and ids (no links)) data = Dataset(rand(10,20)) (generates dims and ids (no links))
""" """
def __init__(self,array,identifiers=None,name='Unnamed dataset'): def __init__(self, array, identifiers=None, name='Unnamed dataset'):
self._dims = [] #existing dimensions in this dataset self._dims = [] #existing dimensions in this dataset
self._map = {} # internal mapping for dataset: identifier <--> index self._map = {} # internal mapping for dataset: identifier <--> index
self._name = name self._name = name
self._identifiers = identifiers self._identifiers = identifiers
self._type = 'n' self._type = 'n'
try:
array = atleast_2d(asarray(array))
except:
print "Cant cast array as numpy-array"
return
# vectors are column vectors
if array.shape[0]==1:
array = array.T
self.shape = array.shape
if len(array.shape)==1:
array = atleast_2d(asarray(array))
# vectors are column vectors
if array.shape[0]==1:
array = array.T
self.shape = array.shape
if identifiers!=None: if identifiers!=None:
self._set_identifiers(identifiers,self._all_dims) identifier_shape = [len(i[1]) for i in identifiers]
if len(identifier_shape)!=len(self.shape):
raise ValueError, "Identifier list length must equal array dims"
for ni, na in zip(identifier_shape, self.shape):
if ni!=na:
raise ValueError, "identifier-array mismatch in %s: (idents: %s, array: %s)" %(self._name, ni, na)
self._set_identifiers(identifiers, self._all_dims)
else: else:
self._identifiers = self._create_identifiers(self.shape,self._all_dims) self._identifiers = self._create_identifiers(self.shape, self._all_dims)
self._set_identifiers(self._identifiers,self._all_dims) self._set_identifiers(self._identifiers, self._all_dims)
self._array = array self._array = array
def __iter__(self): def __iter__(self):
@ -94,17 +96,16 @@ class Dataset:
all_dims.add(dim_suggestion) all_dims.add(dim_suggestion)
return ids return ids
def _set_identifiers(self,identifiers,all_dims): def _set_identifiers(self, identifiers, all_dims):
"""Creates internal mapping of identifiers structure.""" """Creates internal mapping of identifiers structure."""
for dim,ids in identifiers: for dim, ids in identifiers:
pos_map = ReverseDict() pos_map = ReverseDict()
if dim not in self._dims: if dim not in self._dims:
self._dims.append(dim) self._dims.append(dim)
all_dims.add(dim) all_dims.add(dim)
else: else:
raise ValueError, "Dimension names must be unique whitin dataset" raise ValueError, "Dimension names must be unique whitin dataset"
for pos, id in enumerate(ids):
for pos,id in enumerate(ids):
pos_map[id] = pos pos_map[id] = pos
self._map[dim] = pos_map self._map[dim] = pos_map
@ -121,11 +122,10 @@ class Dataset:
"""Returns the numeric array (data) of dataset""" """Returns the numeric array (data) of dataset"""
return self._array return self._array
def add_array(self,array): def add_array(self, array):
"""Adds array as an ArrayType object. """Adds array as an ArrayType object.
A one-dim array is transformed to a two-dim array (row-vector) A one-dim array is transformed to a two-dim array (row-vector)
""" """
if self.shape!=array.shape: if self.shape!=array.shape:
raise ValueError, "Input array must be of similar dimensions as dataset" raise ValueError, "Input array must be of similar dimensions as dataset"
self._array = atleast_2d(asarray(array)) self._array = atleast_2d(asarray(array))
@ -138,7 +138,7 @@ class Dataset:
"""Returns all dimensions in project""" """Returns all dimensions in project"""
return self._all_dims return self._all_dims
def get_dim_name(self,axis=None): def get_dim_name(self, axis=None):
"""Returns dim name for an axis, if no axis is provided it """Returns dim name for an axis, if no axis is provided it
returns a list of dims""" returns a list of dims"""
if type(axis)==int: if type(axis)==int:
@ -178,7 +178,6 @@ class Dataset:
You can optionally provide a list of identifiers to retrieve a You can optionally provide a list of identifiers to retrieve a
index subset. index subset.
Identifiers are the unique names (strings) for a variable in a Identifiers are the unique names (strings) for a variable in a
given dim. Index (Indices) are the Identifiers position in a given dim. Index (Indices) are the Identifiers position in a
matrix in a given dim. If none of the input identifiers are matrix in a given dim. If none of the input identifiers are
@ -218,8 +217,8 @@ class CategoryDataset(Dataset):
. .
""" """
def __init__(self,array,identifiers=None,name='C'): def __init__(self, array, identifiers=None, name='C'):
Dataset.__init__(self,array,identifiers=identifiers,name=name) Dataset.__init__(self, array, identifiers=identifiers, name=name)
self.has_dictlists = False self.has_dictlists = False
self._type = 'c' self._type = 'c'
@ -229,7 +228,7 @@ class CategoryDataset(Dataset):
ex: data['gene_id'] = ['map0030','map0010', ...] ex: data['gene_id'] = ['map0030','map0010', ...]
""" """
data={} data={}
for name,ind in self._map[self.get_dim_name(0)].items(): for name, ind in self._map[self.get_dim_name(0)].items():
data[name] = self.get_identifiers(self.get_dim_name(1), data[name] = self.get_identifiers(self.get_dim_name(1),
list(self._array[ind,:].nonzero())) list(self._array[ind,:].nonzero()))
self._dictlists = data self._dictlists = data
@ -240,7 +239,7 @@ class CategoryDataset(Dataset):
"""Returns data as a list of Selection objects. """Returns data as a list of Selection objects.
""" """
ret_list = [] ret_list = []
for cat_name,ind in self._map[self.get_dim_name(1)].items(): for cat_name, ind in self._map[self.get_dim_name(1)].items():
ids = self.get_identifiers(self.get_dim_name(0), ids = self.get_identifiers(self.get_dim_name(0),
self._array[:,ind].nonzero()[0]) self._array[:,ind].nonzero()[0])
selection = Selection(cat_name) selection = Selection(cat_name)
@ -254,26 +253,26 @@ class GraphDataset(Dataset):
A dataset class for representing graphs using an (weighted) A dataset class for representing graphs using an (weighted)
adjacency matrix adjacency matrix
(aka. restricted to square symmetric matrices) (restricted to square symmetric matrices)
If the library NetworkX is installed, there is support for If the library NetworkX is installed, there is support for
representing the graph as a NetworkX.Graph, or NetworkX.XGraph structure. representing the graph as a NetworkX.Graph, or NetworkX.XGraph structure.
""" """
def __init__(self,array=None,identifiers=None,shape=None,all_dims=[],**kwds): def __init__(self, array=None, identifiers=None, shape=None, all_dims=[],**kwds):
Dataset.__init__(self,array=array,identifiers=identifiers,name='A') Dataset.__init__(self, array=array, identifiers=identifiers, name='A')
self._graph = None self._graph = None
self._type = 'g' self._type = 'g'
def asnetworkx(self,nx_type='graph'): def asnetworkx(self, nx_type='graph'):
dim = self.get_dim_name()[0] dim = self.get_dim_name()[0]
ids = self.get_identifiers(dim,sorted=True) ids = self.get_identifiers(dim, sorted=True)
adj_mat = self.asarray() adj_mat = self.asarray()
G = self._graph_from_adj_matrix(adj_mat,labels=ids) G = self._graph_from_adj_matrix(adj_mat, labels=ids)
self._graph = G self._graph = G
return G return G
def _graph_from_adj_matrix(self,A,labels=None): def _graph_from_adj_matrix(self, A, labels=None):
"""Creates a networkx graph class from adjacency """Creates a networkx graph class from adjacency
(possibly weighted) matrix and ordered labels. (possibly weighted) matrix and ordered labels.
@ -286,7 +285,7 @@ class GraphDataset(Dataset):
except: except:
print "Failed in import of NetworkX" print "Failed in import of NetworkX"
return return
m,n = A.shape# adjacency matrix must be of type that evals to true/false for neigbours m, n = A.shape# adjacency matrix must be of type that evals to true/false for neigbours
if m!=n: if m!=n:
raise IOError, "Adjacency matrix must be square" raise IOError, "Adjacency matrix must be square"
@ -298,17 +297,18 @@ class GraphDataset(Dataset):
if labels==None: # if labels not provided mark vertices with numbers if labels==None: # if labels not provided mark vertices with numbers
labels = [str(i) for i in range(m)] labels = [str(i) for i in range(m)]
for nbrs,head in izip(A,labels): for nbrs, head in izip(A, labels):
for i,nbr in enumerate(nbrs): for i, nbr in enumerate(nbrs):
if nbr: if nbr:
tail = labels[i] tail = labels[i]
if type(G)==nx.XGraph: if type(G)==nx.XGraph:
G.add_edge(head,tail,nbr) G.add_edge(head, tail, nbr)
else: else:
G.add_edge(head,tail) G.add_edge(head, tail)
return G return G
Dataset._all_dims=set() Dataset._all_dims = set()
class ReverseDict(dict): class ReverseDict(dict):
""" """
@ -336,30 +336,34 @@ def to_file(filepath,dataset,name=None):
""" """
if not name: if not name:
name = dataset._name name = dataset._name
data = shelve.open(filepath,flag='c',protocol=2) data = shelve.open(filepath, flag='c', protocol=2)
if data: #we have an append if data: #we have an append
names = data.keys() names = data.keys()
if name in names: if name in names:
print "Data with name: %s overwritten" %dataset._name print "Data with name: %s overwritten" %dataset._name
sub_data = {'array':dataset._array,'idents':dataset._identifiers,'type':dataset._type}
sub_data = {'array':dataset._array,
'idents':dataset._identifiers,
'type':dataset._type}
data[name] = sub_data data[name] = sub_data
data.close() data.close()
def from_file(filepath): def from_file(filepath):
"""Read dataset from file """ """Read dataset(s) from file """
data = shelve.open(filepath,flag='r') data = shelve.open(filepath, flag='r')
out_data = [] out_data = []
for name in data.keys(): for name in data.keys():
sub_data = data[name] sub_data = data[name]
if sub_data['type']=='c': if sub_data['type']=='c':
out_data.append(CategoryDataset(sub_data['array'],identifiers=sub_data['idents'],name=name)) out_data.append(CategoryDataset(sub_data['array'], identifiers=sub_data['idents'], name=name))
elif sub_data['type']=='g': elif sub_data['type']=='g':
out_data.append(GraphDataset(sub_data['array'],identifiers=sub_data['idents'],name=name)) out_data.append(GraphDataset(sub_data['array'], identifiers=sub_data['idents'], name=name))
else: else:
out_data.append(Dataset(sub_data['array'],identifiers=sub_data['idents'],name=name)) out_data.append(Dataset(sub_data['array'], identifiers=sub_data['idents'], name=name))
return out_data return out_data
class Selection(dict): class Selection(dict):
"""Handles selected identifiers along each dimension of a dataset""" """Handles selected identifiers along each dimension of a dataset"""