removed shape in constructor, added all_dims to dataset base class, updated get_identifiers and get_indices
Denne linjen og de som er under vil bli ignorert-- M dataset.py
This commit is contained in:
parent
61da4f562e
commit
676ea4e0b9
@ -1,5 +1,6 @@
|
||||
from scipy import atleast_2d,asarray,ArrayType
|
||||
|
||||
from scipy import atleast_2d,asarray,ArrayType,shape
|
||||
from scipy import sort as array_sort
|
||||
from itertools import izip
|
||||
|
||||
class Dataset:
|
||||
"""The Dataset base class.
|
||||
@ -32,45 +33,26 @@ class Dataset:
|
||||
|
||||
data = Dataset(rand(10,20)) (generates dims and ids (no links))
|
||||
"""
|
||||
def __init__(self,array=None,identifiers=None,shape=None,all_dims=[],**kwds):
|
||||
self._name = kwds.get("name","Unnamed data")
|
||||
def __init__(self,array,identifiers=None,name='Unnamed dataset'):
|
||||
self._dims = [] #existing dimensions in this dataset
|
||||
self._map = {} # internal mapping for dataset: identifier <--> index
|
||||
self.has_array = False
|
||||
self.shape = None
|
||||
|
||||
if array==None:
|
||||
if shape == None:
|
||||
raise ValueError, "Must define shape if array is None"
|
||||
else:
|
||||
self.shape = shape
|
||||
if identifiers!=None:
|
||||
self._set_identifiers(identifiers,all_dims)
|
||||
else:
|
||||
ids = self._create_identifiers(shape,all_dims)
|
||||
self._set_identifiers(ids,all_dims)
|
||||
elif isinstance(array,ArrayType):
|
||||
self._name = name
|
||||
if isinstance(array,ArrayType):
|
||||
array = atleast_2d(asarray(array))
|
||||
self.shape = array.shape
|
||||
if shape != None:
|
||||
if self.shape!=shape:
|
||||
raise ValueError, "Differing in array and provided. %s != %s" % (self.shape, shape)
|
||||
if identifiers!=None:
|
||||
self._set_identifiers(identifiers,all_dims)
|
||||
self._set_identifiers(identifiers,self._all_dims)
|
||||
else:
|
||||
ids = self._create_identifiers(self.shape,all_dims)
|
||||
self._set_identifiers(ids,all_dims)
|
||||
ids = self._create_identifiers(self.shape,self._all_dims)
|
||||
self._set_identifiers(ids,self._all_dims)
|
||||
|
||||
self._array = array
|
||||
self.has_array = True
|
||||
|
||||
else:
|
||||
raise ValueError, "array input must be of ArrayType or None"
|
||||
|
||||
self._all_dims = all_dims
|
||||
raise ValueError, "Array input must be of ArrayType"
|
||||
|
||||
def __str__(self):
|
||||
return self._name + ":" + self._dims.__str__()
|
||||
return self._name + ":\n" + "Dim names: " + self._dims.__str__()
|
||||
|
||||
def __iter__(self):
|
||||
"""Returns an iterator over dimensions of dataset."""
|
||||
@ -100,11 +82,10 @@ class Dataset:
|
||||
dim_suggestion = dim_names[axis]
|
||||
else:
|
||||
dim_suggestion = 'dim'
|
||||
while dim_suggestion in all_dims:
|
||||
dim_suggestion = self._suggest_dim_name(dim_suggestion,all_dims)
|
||||
dim_suggestion = self._suggest_dim_name(dim_suggestion,all_dims)
|
||||
identifier_creation = [str(axis) + "_" + i for i in map(str,range(n))]
|
||||
ids.append((dim_suggestion,identifier_creation))
|
||||
all_dims.append(dim_suggestion)
|
||||
all_dims.add(dim_suggestion)
|
||||
return ids
|
||||
|
||||
def _set_identifiers(self,identifiers,all_dims):
|
||||
@ -113,24 +94,22 @@ class Dataset:
|
||||
pos_map={}
|
||||
if dim not in self._dims:
|
||||
self._dims.append(dim)
|
||||
all_dims.append(dim)
|
||||
all_dims.add(dim)
|
||||
else:
|
||||
raise ValueError, "Dimension names must be unique"
|
||||
raise ValueError, "Dimension names must be unique whitin dataset"
|
||||
|
||||
for pos,id in enumerate(ids):
|
||||
pos_map[id] = pos
|
||||
self._map[dim] = pos_map
|
||||
shape_chk = [len(i) for j,i in identifiers]
|
||||
if shape_chk != list(self.shape):
|
||||
raise ValueError, "Shape input: %s and array: %s mismatch" %(shape_chk,self.shape)
|
||||
|
||||
|
||||
def _suggest_dim_name(self,dim_name,all_dims):
|
||||
"""Suggests a unique name for dim and returns it"""
|
||||
c = 0
|
||||
while dim_name in all_dims:
|
||||
dim_name = dim_name + "_" + str(c)
|
||||
new_name = dim_name
|
||||
while new_name in all_dims:
|
||||
new_name = dim_name + "_" + str(c)
|
||||
c+=1
|
||||
return dim_name
|
||||
return new_name
|
||||
|
||||
def asarray(self):
|
||||
"""Returns the numeric array (data) of dataset"""
|
||||
@ -144,15 +123,9 @@ class Dataset:
|
||||
A one-dim array is transformed to a two-dim array (row-vector)
|
||||
"""
|
||||
|
||||
if self.has_array:
|
||||
raise ValueError, "Dataset has array"
|
||||
else:
|
||||
if (len(self._map)!=len(array.shape)):
|
||||
raise ValueError, "range(array_dims) and range(dim_names) mismatch"
|
||||
if self.shape!=array.shape:
|
||||
raise ValueError, "Input array must be of similar dimensions as dataset"
|
||||
self._array = atleast_2d(asarray(array))
|
||||
self.has_array = True
|
||||
if self.shape!=array.shape:
|
||||
raise ValueError, "Input array must be of similar dimensions as dataset"
|
||||
self._array = atleast_2d(asarray(array))
|
||||
|
||||
def get_name(self):
|
||||
"""Returns dataset name"""
|
||||
@ -162,34 +135,50 @@ class Dataset:
|
||||
"""Returns all dimensions in project"""
|
||||
return self._all_dims
|
||||
|
||||
def get_dim_names(self):
|
||||
"""Returns dim names"""
|
||||
return [dim for dim in self._dims]
|
||||
def get_dim_name(self,axis=None):
|
||||
"""Returns dim name for an axis, if no axis is provided it returns a list of dims"""
|
||||
if type(axis)==int:
|
||||
return self._dims[axis]
|
||||
else:
|
||||
return [dim for dim in self]
|
||||
|
||||
def get_identifiers(self, dim, indices=None):
|
||||
"""Returns identifiers along dim, sorted by position (index).
|
||||
def get_identifiers(self, dim, indices=None,sorted=True):
|
||||
"""Returns identifiers along dim, sorted by position (index) is optional.
|
||||
|
||||
You can optionally provide a list of indices to get only the
|
||||
identifiers of a given position.
|
||||
|
||||
Identifiers are the unique names (strings) for a variable in a given dim.
|
||||
Index (Indices) are the Identifiers position in a matrix in a given dim.
|
||||
"""
|
||||
items = self._map[dim].items()
|
||||
backitems=[ [v[1],v[0]] for v in items]
|
||||
backitems.sort()
|
||||
sorted_ids=[ backitems[i][1] for i in range(0,len(backitems))]
|
||||
|
||||
# we use id as scipy-arrays return a new array on boolean
|
||||
# operations
|
||||
if id(indices) != id(None):
|
||||
return [sorted_ids[index] for index in indices]
|
||||
if sorted==True:
|
||||
items = self._map[dim].items()
|
||||
backitems = [ [v[1],v[0]] for v in items]
|
||||
backitems.sort()
|
||||
ids = [ backitems[i][1] for i in range(0,len(backitems))]
|
||||
|
||||
else:
|
||||
return sorted_ids
|
||||
ids = self._map[dim].keys()
|
||||
|
||||
def get_indices(self, dim, idents):
|
||||
"""Get indices for identifiers along dimension."""
|
||||
reverse = {}
|
||||
for key, value in self._map[dim].items():
|
||||
reverse[value] = key
|
||||
return [self._map[dim][key] for key in idents]
|
||||
if indices != None:
|
||||
ids = [self._map[index] for index in indices]
|
||||
|
||||
return ids
|
||||
|
||||
|
||||
def get_indices(self, dim, idents=None):
|
||||
"""Returns indices for identifiers along dimension.
|
||||
|
||||
You can optionally provide a list of identifiers to retrieve a index subset.
|
||||
|
||||
|
||||
Identifiers are the unique names (strings) for a variable in a given dim.
|
||||
Index (Indices) are the Identifiers position in a matrix in a given dim."""
|
||||
if idents==None:
|
||||
index = array_sort(self._map[dim].values())
|
||||
else:
|
||||
index = [self.map[dim][key] for key in idents]
|
||||
return asarray(index)
|
||||
|
||||
class CategoryDataset(Dataset):
|
||||
"""The category dataset class.
|
||||
@ -236,10 +225,47 @@ class GraphDataset(Dataset):
|
||||
If the library NetworkX is installed, there is support for
|
||||
representing the graph as a NetworkX.Graph, or NetworkX.XGraph structure.
|
||||
"""
|
||||
def __init__(self):
|
||||
Dataset.__init(self)
|
||||
def __init__(self,array=None,identifiers=None,shape=None,all_dims=[],**kwds):
|
||||
Dataset.__init__(self,array=array,identifiers=identifiers,name='A')
|
||||
self.has_graph = False
|
||||
|
||||
def asnetworkx(self,nx_type='graph'):
|
||||
dim = self.get_dim_names()[0]
|
||||
ids = self.get_identifiers(dim)
|
||||
adj_mat = self.asarray()
|
||||
G = self._graph_from_adj_matrix(adj_mat,labels=ids)
|
||||
self.has_graph = True
|
||||
return G
|
||||
|
||||
def _graph_from_adj_matrix(self,A,labels=None,nx_type='graph'):
|
||||
"""Creates a networkx graph class from adjacency matrix and ordered labels.
|
||||
nx_type = ['graph',['xgraph']]
|
||||
labels = None, results in string-numbered labels
|
||||
|
||||
"""
|
||||
import networkx as nx
|
||||
m,n = shape(A)# adjacency matrix must be of type that evals to true/false for neigbours
|
||||
if m!=n:
|
||||
raise IOError, "Adjacency matrix must be square"
|
||||
if nx_type=='graph':
|
||||
G = nx.Graph()
|
||||
elif nx_type=='x_graph':
|
||||
G = nx.XGraph()
|
||||
else:
|
||||
raise IOError, "Unknown graph type: %s" %nx_type
|
||||
|
||||
if labels==None: # if labels not provided mark vertices with numbers
|
||||
labels = [str(i) for i in range(m)]
|
||||
|
||||
|
||||
for nbrs,head in izip(A,labels):
|
||||
for i,nbr in enumerate(nbrs):
|
||||
if nbr:
|
||||
tail = labels[i]
|
||||
G.add_edge(head,tail)
|
||||
return G
|
||||
Dataset._all_dims=set()
|
||||
|
||||
class Selection:
|
||||
"""Handles selected identifiers along each dimension of a dataset"""
|
||||
def __init__(self):
|
||||
|
Reference in New Issue
Block a user