removed shape in constructor, added all_dims to dataset base class, updated get_identifiers and get_indices

Denne linjen og de som er under vil bli ignorert--

M    dataset.py
This commit is contained in:
Arnar Flatberg 2006-08-01 09:45:53 +00:00
parent 61da4f562e
commit 676ea4e0b9

View File

@ -1,5 +1,6 @@
from scipy import atleast_2d,asarray,ArrayType
from scipy import atleast_2d,asarray,ArrayType,shape
from scipy import sort as array_sort
from itertools import izip
class Dataset:
"""The Dataset base class.
@ -32,45 +33,26 @@ class Dataset:
data = Dataset(rand(10,20)) (generates dims and ids (no links))
"""
def __init__(self,array=None,identifiers=None,shape=None,all_dims=[],**kwds):
self._name = kwds.get("name","Unnamed data")
def __init__(self,array,identifiers=None,name='Unnamed dataset'):
self._dims = [] #existing dimensions in this dataset
self._map = {} # internal mapping for dataset: identifier <--> index
self.has_array = False
self.shape = None
if array==None:
if shape == None:
raise ValueError, "Must define shape if array is None"
else:
self.shape = shape
if identifiers!=None:
self._set_identifiers(identifiers,all_dims)
else:
ids = self._create_identifiers(shape,all_dims)
self._set_identifiers(ids,all_dims)
elif isinstance(array,ArrayType):
self._name = name
if isinstance(array,ArrayType):
array = atleast_2d(asarray(array))
self.shape = array.shape
if shape != None:
if self.shape!=shape:
raise ValueError, "Differing in array and provided. %s != %s" % (self.shape, shape)
if identifiers!=None:
self._set_identifiers(identifiers,all_dims)
self._set_identifiers(identifiers,self._all_dims)
else:
ids = self._create_identifiers(self.shape,all_dims)
self._set_identifiers(ids,all_dims)
ids = self._create_identifiers(self.shape,self._all_dims)
self._set_identifiers(ids,self._all_dims)
self._array = array
self.has_array = True
else:
raise ValueError, "array input must be of ArrayType or None"
self._all_dims = all_dims
raise ValueError, "Array input must be of ArrayType"
def __str__(self):
return self._name + ":" + self._dims.__str__()
return self._name + ":\n" + "Dim names: " + self._dims.__str__()
def __iter__(self):
"""Returns an iterator over dimensions of dataset."""
@ -100,11 +82,10 @@ class Dataset:
dim_suggestion = dim_names[axis]
else:
dim_suggestion = 'dim'
while dim_suggestion in all_dims:
dim_suggestion = self._suggest_dim_name(dim_suggestion,all_dims)
dim_suggestion = self._suggest_dim_name(dim_suggestion,all_dims)
identifier_creation = [str(axis) + "_" + i for i in map(str,range(n))]
ids.append((dim_suggestion,identifier_creation))
all_dims.append(dim_suggestion)
all_dims.add(dim_suggestion)
return ids
def _set_identifiers(self,identifiers,all_dims):
@ -113,24 +94,22 @@ class Dataset:
pos_map={}
if dim not in self._dims:
self._dims.append(dim)
all_dims.append(dim)
all_dims.add(dim)
else:
raise ValueError, "Dimension names must be unique"
raise ValueError, "Dimension names must be unique whitin dataset"
for pos,id in enumerate(ids):
pos_map[id] = pos
self._map[dim] = pos_map
shape_chk = [len(i) for j,i in identifiers]
if shape_chk != list(self.shape):
raise ValueError, "Shape input: %s and array: %s mismatch" %(shape_chk,self.shape)
def _suggest_dim_name(self,dim_name,all_dims):
"""Suggests a unique name for dim and returns it"""
c = 0
while dim_name in all_dims:
dim_name = dim_name + "_" + str(c)
new_name = dim_name
while new_name in all_dims:
new_name = dim_name + "_" + str(c)
c+=1
return dim_name
return new_name
def asarray(self):
"""Returns the numeric array (data) of dataset"""
@ -144,15 +123,9 @@ class Dataset:
A one-dim array is transformed to a two-dim array (row-vector)
"""
if self.has_array:
raise ValueError, "Dataset has array"
else:
if (len(self._map)!=len(array.shape)):
raise ValueError, "range(array_dims) and range(dim_names) mismatch"
if self.shape!=array.shape:
raise ValueError, "Input array must be of similar dimensions as dataset"
self._array = atleast_2d(asarray(array))
self.has_array = True
if self.shape!=array.shape:
raise ValueError, "Input array must be of similar dimensions as dataset"
self._array = atleast_2d(asarray(array))
def get_name(self):
"""Returns dataset name"""
@ -162,34 +135,50 @@ class Dataset:
"""Returns all dimensions in project"""
return self._all_dims
def get_dim_names(self):
"""Returns dim names"""
return [dim for dim in self._dims]
def get_dim_name(self,axis=None):
"""Returns dim name for an axis, if no axis is provided it returns a list of dims"""
if type(axis)==int:
return self._dims[axis]
else:
return [dim for dim in self]
def get_identifiers(self, dim, indices=None):
"""Returns identifiers along dim, sorted by position (index).
def get_identifiers(self, dim, indices=None,sorted=True):
"""Returns identifiers along dim, sorted by position (index) is optional.
You can optionally provide a list of indices to get only the
identifiers of a given position.
Identifiers are the unique names (strings) for a variable in a given dim.
Index (Indices) are the Identifiers position in a matrix in a given dim.
"""
items = self._map[dim].items()
backitems=[ [v[1],v[0]] for v in items]
backitems.sort()
sorted_ids=[ backitems[i][1] for i in range(0,len(backitems))]
# we use id as scipy-arrays return a new array on boolean
# operations
if id(indices) != id(None):
return [sorted_ids[index] for index in indices]
if sorted==True:
items = self._map[dim].items()
backitems = [ [v[1],v[0]] for v in items]
backitems.sort()
ids = [ backitems[i][1] for i in range(0,len(backitems))]
else:
return sorted_ids
ids = self._map[dim].keys()
def get_indices(self, dim, idents):
"""Get indices for identifiers along dimension."""
reverse = {}
for key, value in self._map[dim].items():
reverse[value] = key
return [self._map[dim][key] for key in idents]
if indices != None:
ids = [self._map[index] for index in indices]
return ids
def get_indices(self, dim, idents=None):
"""Returns indices for identifiers along dimension.
You can optionally provide a list of identifiers to retrieve a index subset.
Identifiers are the unique names (strings) for a variable in a given dim.
Index (Indices) are the Identifiers position in a matrix in a given dim."""
if idents==None:
index = array_sort(self._map[dim].values())
else:
index = [self.map[dim][key] for key in idents]
return asarray(index)
class CategoryDataset(Dataset):
"""The category dataset class.
@ -236,10 +225,47 @@ class GraphDataset(Dataset):
If the library NetworkX is installed, there is support for
representing the graph as a NetworkX.Graph, or NetworkX.XGraph structure.
"""
def __init__(self):
Dataset.__init(self)
def __init__(self,array=None,identifiers=None,shape=None,all_dims=[],**kwds):
Dataset.__init__(self,array=array,identifiers=identifiers,name='A')
self.has_graph = False
def asnetworkx(self,nx_type='graph'):
dim = self.get_dim_names()[0]
ids = self.get_identifiers(dim)
adj_mat = self.asarray()
G = self._graph_from_adj_matrix(adj_mat,labels=ids)
self.has_graph = True
return G
def _graph_from_adj_matrix(self,A,labels=None,nx_type='graph'):
"""Creates a networkx graph class from adjacency matrix and ordered labels.
nx_type = ['graph',['xgraph']]
labels = None, results in string-numbered labels
"""
import networkx as nx
m,n = shape(A)# adjacency matrix must be of type that evals to true/false for neigbours
if m!=n:
raise IOError, "Adjacency matrix must be square"
if nx_type=='graph':
G = nx.Graph()
elif nx_type=='x_graph':
G = nx.XGraph()
else:
raise IOError, "Unknown graph type: %s" %nx_type
if labels==None: # if labels not provided mark vertices with numbers
labels = [str(i) for i in range(m)]
for nbrs,head in izip(A,labels):
for i,nbr in enumerate(nbrs):
if nbr:
tail = labels[i]
G.add_edge(head,tail)
return G
Dataset._all_dims=set()
class Selection:
"""Handles selected identifiers along each dimension of a dataset"""
def __init__(self):