From 676ea4e0b99504e190c394ef981436e4a0ff02ee Mon Sep 17 00:00:00 2001
From: flatberg <flatberg@pvv.ntnu.no>
Date: Tue, 1 Aug 2006 09:45:53 +0000
Subject: [PATCH] removed shape in constructor, added all_dims to dataset base
 class, updated get_identifiers and get_indices Denne linjen og de som er
 under vil bli ignorert--

M    dataset.py
---
 system/dataset.py | 172 ++++++++++++++++++++++++++--------------------
 1 file changed, 99 insertions(+), 73 deletions(-)

diff --git a/system/dataset.py b/system/dataset.py
index d4918be..6254e0d 100644
--- a/system/dataset.py
+++ b/system/dataset.py
@@ -1,5 +1,6 @@
-from scipy import atleast_2d,asarray,ArrayType
-
+from scipy import atleast_2d,asarray,ArrayType,shape
+from scipy import sort as array_sort
+from itertools import izip
 
 class Dataset:
     """The Dataset base class.
@@ -32,45 +33,26 @@ class Dataset:
 
     data = Dataset(rand(10,20)) (generates dims and ids (no links))
     """
-    def __init__(self,array=None,identifiers=None,shape=None,all_dims=[],**kwds):
-        self._name = kwds.get("name","Unnamed data")
+    def __init__(self,array,identifiers=None,name='Unnamed dataset'):
         self._dims = [] #existing dimensions in this dataset
         self._map = {} # internal mapping for dataset:  identifier <--> index
-        self.has_array = False
-        self.shape = None
-    
-        if array==None:
-            if shape == None:
-                raise ValueError, "Must define shape if array is None"
-            else:
-                self.shape = shape
-                if identifiers!=None:
-                    self._set_identifiers(identifiers,all_dims)
-                else:
-                    ids = self._create_identifiers(shape,all_dims)
-                    self._set_identifiers(ids,all_dims)
-        elif isinstance(array,ArrayType):
+        self._name = name
+        if isinstance(array,ArrayType):
             array = atleast_2d(asarray(array))
             self.shape = array.shape
-            if shape != None:
-                if self.shape!=shape:
-                    raise ValueError, "Differing in array and provided. %s != %s" % (self.shape, shape)
             if identifiers!=None:
-                self._set_identifiers(identifiers,all_dims)
+                self._set_identifiers(identifiers,self._all_dims)
             else:
-                ids = self._create_identifiers(self.shape,all_dims)
-                self._set_identifiers(ids,all_dims)
+                ids = self._create_identifiers(self.shape,self._all_dims)
+                self._set_identifiers(ids,self._all_dims)
             
             self._array = array
-            self.has_array = True
             
         else:
-            raise ValueError, "array input must be of ArrayType or None"
-
-        self._all_dims = all_dims
+            raise ValueError, "Array input must be of ArrayType"
                         
     def __str__(self):
-        return self._name + ":" + self._dims.__str__()
+        return self._name + ":\n" + "Dim names: " +  self._dims.__str__()
 
     def __iter__(self):
         """Returns an iterator over dimensions of dataset."""
@@ -100,11 +82,10 @@ class Dataset:
                 dim_suggestion = dim_names[axis]
             else:
                 dim_suggestion = 'dim'
-            while dim_suggestion in all_dims:
-                dim_suggestion = self._suggest_dim_name(dim_suggestion,all_dims) 
+            dim_suggestion = self._suggest_dim_name(dim_suggestion,all_dims) 
             identifier_creation = [str(axis) + "_" + i for i in map(str,range(n))]
             ids.append((dim_suggestion,identifier_creation))
-            all_dims.append(dim_suggestion)
+            all_dims.add(dim_suggestion)
         return ids
 
     def _set_identifiers(self,identifiers,all_dims):
@@ -113,24 +94,22 @@ class Dataset:
             pos_map={}
             if dim not in self._dims:
                 self._dims.append(dim)
-                all_dims.append(dim)
+                all_dims.add(dim)
             else:
-                raise ValueError, "Dimension names must be unique"
+                raise ValueError, "Dimension names must be unique whitin dataset"
             
             for pos,id in enumerate(ids):
                 pos_map[id] = pos
             self._map[dim] = pos_map
-        shape_chk = [len(i) for j,i in identifiers]
-        if shape_chk != list(self.shape):
-            raise ValueError, "Shape input: %s and array: %s mismatch" %(shape_chk,self.shape)
-
+            
     def _suggest_dim_name(self,dim_name,all_dims):
         """Suggests a unique name for dim and returns it"""
         c = 0
-        while dim_name in all_dims:
-            dim_name = dim_name + "_" + str(c)
+        new_name = dim_name
+        while new_name in all_dims:
+            new_name = dim_name + "_" + str(c)
             c+=1
-        return dim_name
+        return new_name
         
     def asarray(self):
         """Returns the numeric array (data) of dataset"""
@@ -144,15 +123,9 @@ class Dataset:
         A one-dim array is transformed to a two-dim array (row-vector)
         """
         
-        if self.has_array:
-            raise ValueError, "Dataset has array"
-        else:
-            if (len(self._map)!=len(array.shape)):
-                raise ValueError, "range(array_dims) and range(dim_names) mismatch"
-            if self.shape!=array.shape:
-                raise ValueError, "Input array must be of similar dimensions as dataset"
-            self._array = atleast_2d(asarray(array))
-            self.has_array = True
+        if self.shape!=array.shape:
+            raise ValueError, "Input array must be of similar dimensions as dataset"
+        self._array = atleast_2d(asarray(array))
 
     def get_name(self):
         """Returns dataset name"""
@@ -162,34 +135,50 @@ class Dataset:
         """Returns all dimensions in project"""
         return self._all_dims
 
-    def get_dim_names(self):
-        """Returns dim names""" 
-        return [dim for dim in self._dims]
+    def get_dim_name(self,axis=None):
+        """Returns dim name for an axis, if no axis is provided it returns a list of dims"""
+        if type(axis)==int:
+            return self._dims[axis]
+        else:
+            return [dim for dim in self]
 
-    def get_identifiers(self, dim, indices=None):
-        """Returns identifiers along dim, sorted by position (index).
+    def get_identifiers(self, dim, indices=None,sorted=True):
+        """Returns identifiers along dim, sorted by position (index) is optional.
 
         You can optionally provide a list of indices to get only the
         identifiers of a given position.
+
+        Identifiers are the unique names (strings) for a variable in a given dim.
+        Index (Indices) are the Identifiers position in a matrix in a given dim.
         """
-        items = self._map[dim].items()
-        backitems=[ [v[1],v[0]] for v in items]
-        backitems.sort()
-        sorted_ids=[ backitems[i][1] for i in range(0,len(backitems))]
-
-        # we use id as scipy-arrays return a new array on boolean
-        # operations
-        if id(indices) != id(None):
-            return [sorted_ids[index] for index in indices]
+        if sorted==True:
+            items = self._map[dim].items()
+            backitems = [ [v[1],v[0]] for v in items]
+            backitems.sort()
+            ids = [ backitems[i][1] for i in range(0,len(backitems))]
+            
         else:
-            return sorted_ids
+            ids = self._map[dim].keys()
 
-    def get_indices(self, dim, idents):
-        """Get indices for identifiers along dimension."""
-        reverse = {}
-        for key, value in self._map[dim].items():
-            reverse[value] = key
-        return [self._map[dim][key] for key in idents]
+        if indices != None:
+            ids = [self._map[index] for index in indices]
+
+        return ids
+        
+
+    def get_indices(self, dim, idents=None):
+        """Returns indices for identifiers along dimension.
+
+        You can optionally provide a list of identifiers to retrieve a index subset.
+        
+        
+        Identifiers are the unique names (strings) for a variable in a given dim.
+        Index (Indices) are the Identifiers position in a matrix in a given dim."""
+        if idents==None:
+            index = array_sort(self._map[dim].values())
+        else:
+            index = [self.map[dim][key] for key in idents]
+        return asarray(index)
      
 class CategoryDataset(Dataset):
     """The category dataset class.
@@ -236,10 +225,47 @@ class GraphDataset(Dataset):
     If the library NetworkX is installed, there is support for
     representing the graph as a NetworkX.Graph, or NetworkX.XGraph structure.
     """
-    def __init__(self):
-        Dataset.__init(self)
+    def __init__(self,array=None,identifiers=None,shape=None,all_dims=[],**kwds):
+        Dataset.__init__(self,array=array,identifiers=identifiers,name='A')
         self.has_graph = False
         
+    def asnetworkx(self,nx_type='graph'):
+        dim = self.get_dim_names()[0]
+        ids = self.get_identifiers(dim)
+        adj_mat = self.asarray()
+        G = self._graph_from_adj_matrix(adj_mat,labels=ids)
+        self.has_graph = True
+        return G
+    
+    def _graph_from_adj_matrix(self,A,labels=None,nx_type='graph'):
+        """Creates a networkx graph class from adjacency matrix and ordered labels.
+        nx_type = ['graph',['xgraph']]
+        labels = None, results in string-numbered labels
+        
+        """
+        import networkx as nx
+        m,n = shape(A)# adjacency matrix must be of type that evals to true/false for neigbours
+        if m!=n:
+            raise IOError, "Adjacency matrix must be square"
+        if nx_type=='graph':
+            G = nx.Graph()
+        elif nx_type=='x_graph':
+            G = nx.XGraph()
+        else:
+            raise IOError, "Unknown graph type: %s" %nx_type
+
+        if labels==None: # if labels not provided mark vertices with numbers
+            labels = [str(i) for i in range(m)]
+
+	
+        for nbrs,head in izip(A,labels):
+            for i,nbr in enumerate(nbrs):
+                if nbr:
+                    tail = labels[i]
+                    G.add_edge(head,tail)
+        return G
+Dataset._all_dims=set()
+
 class Selection:
     """Handles selected identifiers along each dimension of a dataset"""
     def __init__(self):