diff --git a/system/dataset.py b/system/dataset.py index 75db010..6f6db4e 100644 --- a/system/dataset.py +++ b/system/dataset.py @@ -1,6 +1,7 @@ -from scipy import atleast_2d,asarray,ArrayType,shape,nonzero +from scipy import atleast_2d,asarray,ArrayType,shape,nonzero,io,transpose from scipy import sort as array_sort from itertools import izip +import shelve class Dataset: """The Dataset base class. @@ -37,14 +38,19 @@ class Dataset: self._dims = [] #existing dimensions in this dataset self._map = {} # internal mapping for dataset: identifier <--> index self._name = name + self._identifiers = identifiers + self._type = 'n' if isinstance(array,ArrayType): array = atleast_2d(asarray(array)) + # vectors are column vectors + if array.shape[0]==1: + array = transpose(array) self.shape = array.shape if identifiers!=None: self._set_identifiers(identifiers,self._all_dims) else: - ids = self._create_identifiers(self.shape,self._all_dims) - self._set_identifiers(ids,self._all_dims) + self._identifiers = self._create_identifiers(self.shape,self._all_dims) + self._set_identifiers(self._identifiers,self._all_dims) self._array = array @@ -176,9 +182,8 @@ class Dataset: index = array_sort(self._map[dim].values()) else: index = [self._map[dim][key] for key in idents] - return asarray(index) - - + return asarray(index) + class CategoryDataset(Dataset): """The category dataset class. @@ -203,6 +208,7 @@ class CategoryDataset(Dataset): def __init__(self,array,identifiers=None,name='C'): Dataset.__init__(self,array,identifiers=identifiers,name=name) self.has_dictlists = False + self._type = 'c' def as_dict_lists(self): """Returns data as dict of indices along first dim""" @@ -226,6 +232,7 @@ class GraphDataset(Dataset): def __init__(self,array=None,identifiers=None,shape=None,all_dims=[],**kwds): Dataset.__init__(self,array=array,identifiers=identifiers,name='A') self.has_graph = False + self._type = 'g' def asnetworkx(self,nx_type='graph'): dim = self.get_dim_name()[0] @@ -286,3 +293,34 @@ class ReverseDict(dict): def __setitem__(self, key, value): dict.__setitem__(self, key, value) self.reverse[value] = key + +def to_file(filepath,dataset,name=None): + """Write dataset to file. A file may contain multiple datasets. + append to file by using option mode='a' + """ + if not name: + name = dataset._name + data = shelve.open(filepath,protocol=2) + if data: #we have an append + names = data.keys() + if name in names: + print "Data with name: %s overwritten" %dataset._name + sub_data = {'array':dataset._array,'idents':dataset._identifiers,'type':dataset._type} + data[name] = sub_data + data.close() + +def from_file(filepath): + """Read dataset from file """ + data = shelve.open(filepath) + out_data = [] + for name in data.keys(): + sub_data = data[name] + if sub_data['type']=='c': + out_data.append(CategoryDataset(sub_data['array'],identifiers=sub_data['idents'],name=name)) + elif sub_data['type']=='g': + out_data.append(GraphDataset(sub_data['array'],identifiers=sub_data['idents'],name=name)) + else: + out_data.append(Dataset(sub_data['array'],identifiers=sub_data['idents'],name=name)) + + return out_data +