Added from_file and to_file
This commit is contained in:
parent
06905934db
commit
b14dc3c9c2
|
@ -1,6 +1,7 @@
|
||||||
from scipy import atleast_2d,asarray,ArrayType,shape,nonzero
|
from scipy import atleast_2d,asarray,ArrayType,shape,nonzero,io,transpose
|
||||||
from scipy import sort as array_sort
|
from scipy import sort as array_sort
|
||||||
from itertools import izip
|
from itertools import izip
|
||||||
|
import shelve
|
||||||
|
|
||||||
class Dataset:
|
class Dataset:
|
||||||
"""The Dataset base class.
|
"""The Dataset base class.
|
||||||
|
@ -37,14 +38,19 @@ class Dataset:
|
||||||
self._dims = [] #existing dimensions in this dataset
|
self._dims = [] #existing dimensions in this dataset
|
||||||
self._map = {} # internal mapping for dataset: identifier <--> index
|
self._map = {} # internal mapping for dataset: identifier <--> index
|
||||||
self._name = name
|
self._name = name
|
||||||
|
self._identifiers = identifiers
|
||||||
|
self._type = 'n'
|
||||||
if isinstance(array,ArrayType):
|
if isinstance(array,ArrayType):
|
||||||
array = atleast_2d(asarray(array))
|
array = atleast_2d(asarray(array))
|
||||||
|
# vectors are column vectors
|
||||||
|
if array.shape[0]==1:
|
||||||
|
array = transpose(array)
|
||||||
self.shape = array.shape
|
self.shape = array.shape
|
||||||
if identifiers!=None:
|
if identifiers!=None:
|
||||||
self._set_identifiers(identifiers,self._all_dims)
|
self._set_identifiers(identifiers,self._all_dims)
|
||||||
else:
|
else:
|
||||||
ids = self._create_identifiers(self.shape,self._all_dims)
|
self._identifiers = self._create_identifiers(self.shape,self._all_dims)
|
||||||
self._set_identifiers(ids,self._all_dims)
|
self._set_identifiers(self._identifiers,self._all_dims)
|
||||||
|
|
||||||
self._array = array
|
self._array = array
|
||||||
|
|
||||||
|
@ -178,7 +184,6 @@ class Dataset:
|
||||||
index = [self._map[dim][key] for key in idents]
|
index = [self._map[dim][key] for key in idents]
|
||||||
return asarray(index)
|
return asarray(index)
|
||||||
|
|
||||||
|
|
||||||
class CategoryDataset(Dataset):
|
class CategoryDataset(Dataset):
|
||||||
"""The category dataset class.
|
"""The category dataset class.
|
||||||
|
|
||||||
|
@ -203,6 +208,7 @@ class CategoryDataset(Dataset):
|
||||||
def __init__(self,array,identifiers=None,name='C'):
|
def __init__(self,array,identifiers=None,name='C'):
|
||||||
Dataset.__init__(self,array,identifiers=identifiers,name=name)
|
Dataset.__init__(self,array,identifiers=identifiers,name=name)
|
||||||
self.has_dictlists = False
|
self.has_dictlists = False
|
||||||
|
self._type = 'c'
|
||||||
|
|
||||||
def as_dict_lists(self):
|
def as_dict_lists(self):
|
||||||
"""Returns data as dict of indices along first dim"""
|
"""Returns data as dict of indices along first dim"""
|
||||||
|
@ -226,6 +232,7 @@ class GraphDataset(Dataset):
|
||||||
def __init__(self,array=None,identifiers=None,shape=None,all_dims=[],**kwds):
|
def __init__(self,array=None,identifiers=None,shape=None,all_dims=[],**kwds):
|
||||||
Dataset.__init__(self,array=array,identifiers=identifiers,name='A')
|
Dataset.__init__(self,array=array,identifiers=identifiers,name='A')
|
||||||
self.has_graph = False
|
self.has_graph = False
|
||||||
|
self._type = 'g'
|
||||||
|
|
||||||
def asnetworkx(self,nx_type='graph'):
|
def asnetworkx(self,nx_type='graph'):
|
||||||
dim = self.get_dim_name()[0]
|
dim = self.get_dim_name()[0]
|
||||||
|
@ -286,3 +293,34 @@ class ReverseDict(dict):
|
||||||
def __setitem__(self, key, value):
|
def __setitem__(self, key, value):
|
||||||
dict.__setitem__(self, key, value)
|
dict.__setitem__(self, key, value)
|
||||||
self.reverse[value] = key
|
self.reverse[value] = key
|
||||||
|
|
||||||
|
def to_file(filepath,dataset,name=None):
|
||||||
|
"""Write dataset to file. A file may contain multiple datasets.
|
||||||
|
append to file by using option mode='a'
|
||||||
|
"""
|
||||||
|
if not name:
|
||||||
|
name = dataset._name
|
||||||
|
data = shelve.open(filepath,protocol=2)
|
||||||
|
if data: #we have an append
|
||||||
|
names = data.keys()
|
||||||
|
if name in names:
|
||||||
|
print "Data with name: %s overwritten" %dataset._name
|
||||||
|
sub_data = {'array':dataset._array,'idents':dataset._identifiers,'type':dataset._type}
|
||||||
|
data[name] = sub_data
|
||||||
|
data.close()
|
||||||
|
|
||||||
|
def from_file(filepath):
|
||||||
|
"""Read dataset from file """
|
||||||
|
data = shelve.open(filepath)
|
||||||
|
out_data = []
|
||||||
|
for name in data.keys():
|
||||||
|
sub_data = data[name]
|
||||||
|
if sub_data['type']=='c':
|
||||||
|
out_data.append(CategoryDataset(sub_data['array'],identifiers=sub_data['idents'],name=name))
|
||||||
|
elif sub_data['type']=='g':
|
||||||
|
out_data.append(GraphDataset(sub_data['array'],identifiers=sub_data['idents'],name=name))
|
||||||
|
else:
|
||||||
|
out_data.append(Dataset(sub_data['array'],identifiers=sub_data['idents'],name=name))
|
||||||
|
|
||||||
|
return out_data
|
||||||
|
|
||||||
|
|
Reference in New Issue