BREAKING STUFF!
Rename fluents to laydi.
This commit is contained in:
3
laydi/__init__.py
Normal file
3
laydi/__init__.py
Normal file
@@ -0,0 +1,3 @@
|
||||
|
||||
import main
|
||||
|
||||
105
laydi/annotations.py
Normal file
105
laydi/annotations.py
Normal file
@@ -0,0 +1,105 @@
|
||||
|
||||
from fluents import dataset
|
||||
|
||||
_dim_annotation_handlers = {}
|
||||
|
||||
def get_dim_annotations(dimname, annotation, ids):
|
||||
"""Returns a list of annotations corresponding to the given ids in
|
||||
dimension dimname"""
|
||||
global _dim_annotation_handlers
|
||||
|
||||
if _dim_annotation_handlers.has_key(dimname):
|
||||
return _dim_annotation_handlers[dimname].get_annotations(annotation, ids)
|
||||
return None
|
||||
|
||||
def set_dim_handler(dimname, handler):
|
||||
"""Set the handler for the given dimension."""
|
||||
global _dim_annotation_handlers
|
||||
_dim_annotation_handlers[dimname] = handler
|
||||
|
||||
def get_dim_handler(dimname):
|
||||
"""Get the handler for the given dimension."""
|
||||
global _dim_annotation_handlers
|
||||
if _dim_annotation_handlers.has_key(dimname):
|
||||
return _dim_annotation_handlers[dimname]
|
||||
else:
|
||||
return None
|
||||
|
||||
|
||||
class AnnotationHandler:
|
||||
def __init__(self):
|
||||
pass
|
||||
|
||||
def get_annotations(self, annotationname, ids, default=None):
|
||||
return None
|
||||
|
||||
def get_annotation_names(self):
|
||||
return []
|
||||
|
||||
|
||||
class DictAnnotationHandler(AnnotationHandler):
|
||||
|
||||
def __init__(self, d=None):
|
||||
if d == None:
|
||||
d = {}
|
||||
self._dict = d
|
||||
|
||||
def get_annotations(self, annotationname, ids, default=None):
|
||||
d = self._dict
|
||||
retval = []
|
||||
for id in ids:
|
||||
if d[annotationname].has_key(id):
|
||||
retval.append(d[annotationname][id])
|
||||
else:
|
||||
retval.append(default)
|
||||
return retval
|
||||
|
||||
def add_annotations(self, annotationname, d):
|
||||
self._dict[annotationname] = d
|
||||
|
||||
def get_annotation_names(self):
|
||||
return self._dict.keys()
|
||||
|
||||
|
||||
def read_annotations_file(filename):
|
||||
"""Read annotations from file.
|
||||
|
||||
Reads annotations from a tab delimited file of the format::
|
||||
dimname annotation_name1 annotation_name2 ...
|
||||
id1 Foo 0.43
|
||||
id2 Bar 0.59
|
||||
"""
|
||||
|
||||
ann = DictAnnotationHandler()
|
||||
dimname = None
|
||||
annotation_dicts = []
|
||||
annotation_names = []
|
||||
|
||||
fd = open(filename)
|
||||
|
||||
## Read the first line, which contains the dimension name and
|
||||
## annotation names.
|
||||
line = fd.readline()
|
||||
values = [x.strip() for x in line.split('\t')]
|
||||
dimname = values[0]
|
||||
annotation_names = values[1:]
|
||||
annotation_dicts = [{} for x in annotation_names]
|
||||
|
||||
## Read the lines containing the annotations. The first value on
|
||||
## each line is an id along the dimension.
|
||||
while line:
|
||||
values = [x.strip() for x in line.split('\t')]
|
||||
for i, x in enumerate(values[1:]):
|
||||
annotation_dicts[i][values[0]] = x
|
||||
line = fd.readline()
|
||||
|
||||
fd.close()
|
||||
|
||||
## Add everything to the annotation object and add the object to
|
||||
## the specified dimension.
|
||||
for i, a in enumerate(annotation_names):
|
||||
ann.add_annotations(a, annotation_dicts[i])
|
||||
|
||||
_dim_annotation_handlers[dimname] = ann
|
||||
return ann
|
||||
|
||||
1762
laydi/cfgparse.py
Normal file
1762
laydi/cfgparse.py
Normal file
File diff suppressed because it is too large
Load Diff
748
laydi/dataset.py
Normal file
748
laydi/dataset.py
Normal file
@@ -0,0 +1,748 @@
|
||||
from scipy import ndarray,atleast_2d,asarray,intersect1d,zeros,empty,sparse,\
|
||||
where
|
||||
from scipy import sort as array_sort
|
||||
from itertools import izip
|
||||
import shelve
|
||||
import copy
|
||||
import re
|
||||
|
||||
|
||||
class Dataset(object):
|
||||
"""The Dataset base class.
|
||||
|
||||
A Dataset is an n-way array with defined string identifiers across
|
||||
all dimensions.
|
||||
|
||||
example of use:
|
||||
|
||||
---
|
||||
dim_name_rows = 'rows'
|
||||
names_rows = ('row_a','row_b')
|
||||
ids_1 = [dim_name_rows, names_rows]
|
||||
|
||||
dim_name_cols = 'cols'
|
||||
names_cols = ('col_a','col_b','col_c','col_d')
|
||||
ids_2 = [dim_name_cols, names_cols]
|
||||
|
||||
Array_X = rand(2,4)
|
||||
data = Dataset(Array_X,(ids_1,ids_2),name="Testing")
|
||||
|
||||
dim_names = [dim for dim in data]
|
||||
|
||||
column_identifiers = [id for id in data['cols'].keys()]
|
||||
column_index = [index for index in data['cols'].values()]
|
||||
|
||||
'cols' in data -> True
|
||||
|
||||
---
|
||||
|
||||
data = Dataset(rand(10,20)) (generates dims and ids (no links))
|
||||
"""
|
||||
|
||||
def __init__(self, array, identifiers=None, name='Unnamed dataset'):
|
||||
self._dims = [] #existing dimensions in this dataset
|
||||
self._map = {} # internal mapping for dataset: identifier <--> index
|
||||
self._name = name
|
||||
self._identifiers = identifiers
|
||||
|
||||
if not isinstance(array, sparse.spmatrix):
|
||||
array = atleast_2d(asarray(array))
|
||||
# vector are column (array)
|
||||
if array.shape[0] == 1:
|
||||
array = array.T
|
||||
self.shape = array.shape
|
||||
|
||||
if identifiers != None:
|
||||
self._validate_identifiers(identifiers)
|
||||
self._set_identifiers(identifiers, self._all_dims)
|
||||
else:
|
||||
self._identifiers = self._create_identifiers(self.shape, self._all_dims)
|
||||
self._set_identifiers(self._identifiers, self._all_dims)
|
||||
self._array = array
|
||||
|
||||
def __iter__(self):
|
||||
"""Returns an iterator over dimensions of dataset."""
|
||||
return self._dims.__iter__()
|
||||
|
||||
def __contains__(self,dim):
|
||||
"""Returns True if dim is a dimension name in dataset."""
|
||||
# return self._dims.__contains__(dim)
|
||||
return self._map.__contains__(dim)
|
||||
|
||||
def __len__(self):
|
||||
"""Returns the number of dimensions in the dataset"""
|
||||
return len(self._map)
|
||||
|
||||
def __getitem__(self,dim):
|
||||
"""Return the identifers along the dimension dim."""
|
||||
return self._map[dim]
|
||||
|
||||
def _create_identifiers(self, shape, all_dims):
|
||||
"""Creates dimension names and identifier names, and returns
|
||||
identifiers."""
|
||||
|
||||
dim_names = ['rows','cols']
|
||||
ids = []
|
||||
for axis, n in enumerate(shape):
|
||||
if axis < 2:
|
||||
dim_suggestion = dim_names[axis]
|
||||
else:
|
||||
dim_suggestion = 'dim'
|
||||
dim_suggestion = self._suggest_dim_name(dim_suggestion, all_dims)
|
||||
identifier_creation = [str(axis) + "_" + i for i in map(str, range(n))]
|
||||
ids.append((dim_suggestion, identifier_creation))
|
||||
all_dims.add(dim_suggestion)
|
||||
return ids
|
||||
|
||||
def _set_identifiers(self, identifiers, all_dims):
|
||||
"""Creates internal mapping of identifiers structure."""
|
||||
for dim, ids in identifiers:
|
||||
pos_map = ReverseDict()
|
||||
if dim not in self._dims:
|
||||
self._dims.append(dim)
|
||||
all_dims.add(dim)
|
||||
else:
|
||||
raise ValueError, "Dimension names must be unique whitin dataset"
|
||||
for pos, id in enumerate(ids):
|
||||
pos_map[id] = pos
|
||||
self._map[dim] = pos_map
|
||||
|
||||
def _suggest_dim_name(self,dim_name,all_dims):
|
||||
"""Suggests a unique name for dim and returns it"""
|
||||
c = 0
|
||||
new_name = dim_name
|
||||
while new_name in all_dims:
|
||||
new_name = dim_name + "_" + str(c)
|
||||
c += 1
|
||||
return new_name
|
||||
|
||||
def asarray(self):
|
||||
"""Returns the numeric array (data) of dataset"""
|
||||
if isinstance(self._array, sparse.spmatrix):
|
||||
return self._array.toarray()
|
||||
return self._array
|
||||
|
||||
def set_array(self, array):
|
||||
"""Adds array as an ArrayType object.
|
||||
A one-dim array is transformed to a two-dim array (row-vector)
|
||||
"""
|
||||
if not isinstance(array, type(self._array)):
|
||||
raise ValueError("Input array of type: %s does not match existing array type: %s") %(type(array), type(self._array))
|
||||
if self.shape != array.shape:
|
||||
raise ValueError, "Input array must be of similar dimensions as dataset"
|
||||
self._array = atleast_2d(asarray(array))
|
||||
|
||||
def get_name(self):
|
||||
"""Returns dataset name"""
|
||||
return self._name
|
||||
|
||||
def get_all_dims(self):
|
||||
"""Returns all dimensions in project"""
|
||||
return self._all_dims
|
||||
|
||||
def get_dim_name(self, axis=None):
|
||||
"""Returns dim name for an axis, if no axis is provided it
|
||||
returns a list of dims"""
|
||||
if type(axis) == int:
|
||||
return self._dims[axis]
|
||||
else:
|
||||
return [dim for dim in self._dims]
|
||||
|
||||
def common_dims(self, ds):
|
||||
"""Returns a list of the common dimensions in the two datasets."""
|
||||
dims = self.get_dim_name()
|
||||
ds_dims = ds.get_dim_name()
|
||||
return [d for d in dims if d in ds_dims]
|
||||
|
||||
def get_identifiers(self, dim, indices=None, sorted=False):
|
||||
"""Returns identifiers along dim, sorted by position (index)
|
||||
is optional.
|
||||
|
||||
You can optionally provide a list/ndarray of indices to get
|
||||
only the identifiers of a given position.
|
||||
|
||||
Identifiers are the unique names (strings) for a variable in a
|
||||
given dim. Index (Indices) are the Identifiers position in a
|
||||
matrix in a given dim.
|
||||
"""
|
||||
if indices != None:
|
||||
if len(indices) == 0:# if empty list or empty array
|
||||
return []
|
||||
if indices != None:
|
||||
# be sure to match intersection
|
||||
#indices = intersect1d(self.get_indices(dim),indices)
|
||||
ids = [self._map[dim].reverse[i] for i in indices]
|
||||
else:
|
||||
if sorted == True:
|
||||
ids = [self._map[dim].reverse[i] for i in array_sort(self._map[dim].values())]
|
||||
else:
|
||||
ids = self._map[dim].keys()
|
||||
|
||||
return ids
|
||||
|
||||
def get_indices(self, dim, idents=None):
|
||||
"""Returns indices for identifiers along dimension.
|
||||
You can optionally provide a list of identifiers to retrieve a
|
||||
index subset.
|
||||
|
||||
Identifiers are the unique names (strings) for a variable in a
|
||||
given dim. Index (Indices) are the Identifiers position in a
|
||||
matrix in a given dim. If none of the input identifiers are
|
||||
found an empty index is returned
|
||||
"""
|
||||
if not isinstance(idents, list) and not isinstance(idents, set):
|
||||
raise ValueError("idents needs to be a list/set got: %s" %type(idents))
|
||||
if idents == None:
|
||||
index = array_sort(self._map[dim].values())
|
||||
else:
|
||||
index = [self._map[dim][key]
|
||||
for key in idents if self._map[dim].has_key(key)]
|
||||
return asarray(index)
|
||||
|
||||
def existing_identifiers(self, dim, idents):
|
||||
"""Filters a list of identifiers to find those that are present in the
|
||||
dataset.
|
||||
|
||||
The most common use of this function is to get a list of
|
||||
identifiers who correspond one to one with the list of indices produced
|
||||
when get_indices is given an identifier list. That is
|
||||
ds.get_indices(dim, idents) and ds.exisiting_identifiers(dim, idents)
|
||||
will have the same order.
|
||||
|
||||
@param dim: A dimension present in the dataset.
|
||||
@param idents: A list of identifiers along the given dimension.
|
||||
@return: A list of identifiers in the same order as idents, but
|
||||
without elements not present in the dataset.
|
||||
"""
|
||||
if not isinstance(idents, list) and not isinstance(idents, set):
|
||||
raise ValueError("idents needs to be a list/set got: %s" %type(idents))
|
||||
|
||||
return [key for key in idents if self._map[dim].has_key(key)]
|
||||
|
||||
def copy(self):
|
||||
""" Returns deepcopy of dataset.
|
||||
"""
|
||||
return copy.deepcopy(self)
|
||||
|
||||
def subdata(self, dim, idents):
|
||||
"""Returns a new dataset based on dimension and given identifiers.
|
||||
|
||||
"""
|
||||
ds = self.copy()
|
||||
indices = ds.get_indices(dim, idents)
|
||||
idents = ds.get_identifiers(dim, indices=indices)
|
||||
if not idents:
|
||||
raise ValueError("No of identifers from: \n%s \nfound in %s" %(str(idents), ds._name))
|
||||
ax = [i for i, name in enumerate(ds._dims) if name == dim][0]
|
||||
subarr = ds._array.take(indices, ax)
|
||||
new_indices = range(len(idents))
|
||||
ds._map[dim] = ReverseDict(zip(idents, new_indices))
|
||||
ds.shape = tuple(len(ds._map[d]) for d in ds._dims)
|
||||
ds.set_array(subarr)
|
||||
return ds
|
||||
|
||||
def transpose(self):
|
||||
"""Returns a copy of transpose of a dataset.
|
||||
|
||||
As for the moment: only support for 2D-arrays.
|
||||
"""
|
||||
|
||||
assert(len(self.shape) == 2)
|
||||
ds = self.copy()
|
||||
ds._array = ds._array.T
|
||||
ds._dims.reverse()
|
||||
ds.shape = ds._array.shape
|
||||
return ds
|
||||
|
||||
def _validate_identifiers(self, identifiers):
|
||||
for dim_name, ids in identifiers:
|
||||
if len(set(ids)) != len(ids):
|
||||
raise ValueError("Identifiers not unique in : %s" %dim_name)
|
||||
identifier_shape = [len(i[1]) for i in identifiers]
|
||||
if len(identifier_shape) != len(self.shape):
|
||||
raise ValueError("Identifier list length must equal array dims")
|
||||
for ni, na in zip(identifier_shape, self.shape):
|
||||
if ni != na:
|
||||
raise ValueError, "Identifier-array mismatch: %s: (idents: %s, array: %s)" %(self._name, ni, na)
|
||||
|
||||
|
||||
class CategoryDataset(Dataset):
|
||||
"""The category dataset class.
|
||||
|
||||
A dataset for representing class information as binary
|
||||
matrices (0/1-matrices).
|
||||
|
||||
There is support for using a less memory demanding, sparse format. The
|
||||
prefered (default) format for a category dataset is the compressed sparse row
|
||||
format (csr)
|
||||
|
||||
Always has linked dimension in first dim:
|
||||
ex matrix:
|
||||
. go_term1 go_term2 ...
|
||||
gene_1
|
||||
gene_2
|
||||
gene_3
|
||||
.
|
||||
.
|
||||
.
|
||||
|
||||
"""
|
||||
|
||||
def __init__(self, array, identifiers=None, name='C'):
|
||||
Dataset.__init__(self, array, identifiers=identifiers, name=name)
|
||||
|
||||
def as_spmatrix(self):
|
||||
if isinstance(self._array, sparse.spmatrix):
|
||||
return self._array
|
||||
else:
|
||||
arr = self.asarray()
|
||||
return sparse.csr_matrix(arr.astype('i'))
|
||||
|
||||
def to_spmatrix(self):
|
||||
if isinstance(self._array, sparse.spmatrix):
|
||||
self._array = self._array.tocsr()
|
||||
else:
|
||||
self._array = sparse.scr_matrix(self._array)
|
||||
|
||||
def as_dictlists(self):
|
||||
"""Returns data as dict of identifiers along first dim.
|
||||
|
||||
ex: data['gene_1'] = ['map0030','map0010', ...]
|
||||
|
||||
fixme: Deprecated?
|
||||
"""
|
||||
data = {}
|
||||
for name, ind in self._map[self.get_dim_name(0)].items():
|
||||
if isinstance(self._array, ndarray):
|
||||
indices = self._array[ind,:].nonzero()[0]
|
||||
elif isinstance(self._array, sparse.spmatrix):
|
||||
if not isinstance(self._array, sparse.csr_matrix):
|
||||
array = self._array.tocsr()
|
||||
else:
|
||||
array = self._array
|
||||
indices = array[ind,:].indices
|
||||
if len(indices) == 0: # should we allow categories with no members?
|
||||
continue
|
||||
data[name] = self.get_identifiers(self.get_dim_name(1), indices)
|
||||
self._dictlists = data
|
||||
return data
|
||||
|
||||
def as_selections(self):
|
||||
"""Returns data as a list of Selection objects.
|
||||
|
||||
The list of selections is not ordered (sorted) by any means.
|
||||
"""
|
||||
ret_list = []
|
||||
for cat_name, ind in self._map[self.get_dim_name(1)].items():
|
||||
if isinstance(self._array, sparse.spmatrix):
|
||||
if not isinstance(self._array, sparse.csc_matrix):
|
||||
self._array = self._array.tocsc()
|
||||
indices = self._array[:,ind].indices
|
||||
else:
|
||||
indices = self._array[:,ind].nonzero()[0]
|
||||
if len(indices) == 0:
|
||||
continue
|
||||
ids = self.get_identifiers(self.get_dim_name(0), indices)
|
||||
selection = Selection(cat_name)
|
||||
selection.select(self.get_dim_name(0), ids)
|
||||
ret_list.append(selection)
|
||||
return ret_list
|
||||
|
||||
|
||||
class GraphDataset(Dataset):
|
||||
"""The graph dataset class.
|
||||
|
||||
A dataset class for representing graphs. The constructor may use an
|
||||
incidence matrix (possibly sparse) or (if networkx installed) a
|
||||
networkx.(X)Graph structure.
|
||||
|
||||
If the networkx library is installed, there is support for
|
||||
representing the graph as a networkx.Graph, or networkx.XGraph structure.
|
||||
"""
|
||||
|
||||
def __init__(self, input, identifiers=None, name='A', nodepos = None):
|
||||
if isinstance(input, sparse.spmatrix):
|
||||
arr = input
|
||||
else:
|
||||
try:
|
||||
arr = asarray(input)
|
||||
except:
|
||||
raise ValueError("Could not identify input")
|
||||
Dataset.__init__(self, array=arr, identifiers=identifiers, name=name)
|
||||
self._graph = None
|
||||
self.nodepos = nodepos
|
||||
|
||||
def as_spmatrix(self):
|
||||
if isinstance(self._array, sparse.spmatrix):
|
||||
return self._array
|
||||
else:
|
||||
arr = self.asarray()
|
||||
return sparse.csr_matrix(arr.astype('i'))
|
||||
|
||||
def to_spmatrix(self):
|
||||
if isinstance(self._array, sparse.spmatrix):
|
||||
self._array = self._array.tocsr()
|
||||
else:
|
||||
self._array = sparse.scr_matrix(self._array)
|
||||
|
||||
def asnetworkx(self):
|
||||
if self._graph != None:
|
||||
return self._graph
|
||||
dim0, dim1 = self.get_dim_name()
|
||||
node_ids = self.get_identifiers(dim0, sorted=True)
|
||||
edge_ids = self.get_identifiers(dim1, sorted=True)
|
||||
G, weights = self._graph_from_incidence_matrix(self._array, node_ids=node_ids, edge_ids=edge_ids)
|
||||
self._graph = G
|
||||
return G
|
||||
|
||||
def from_networkx(cls, G, node_dim, edge_dim, sp_format=True):
|
||||
"""Create graph dataset from networkx graph.
|
||||
|
||||
When G is a Graph/Digraph edge identifiers will be created,
|
||||
else (XGraoh/XDigraph) it is assumed that edge attributes are
|
||||
the edge identifiers.
|
||||
"""
|
||||
|
||||
import networkx as nx
|
||||
n = G.number_of_nodes()
|
||||
m = G.number_of_edges()
|
||||
|
||||
if isinstance(G, nx.DiGraph):
|
||||
G = nx.XDiGraph(G)
|
||||
elif isinstance(G, nx.Graph):
|
||||
G = nx.XGraph(G)
|
||||
|
||||
edge_ids = [e[2] for e in G.edges()]
|
||||
node_ids = map(str, G.nodes())
|
||||
n2ind = {}
|
||||
for ind, node in enumerate(node_ids):
|
||||
n2ind[node] = ind
|
||||
|
||||
if sp_format:
|
||||
I = sparse.lil_matrix((n, m))
|
||||
else:
|
||||
I = zeros((m, n), dtype='i')
|
||||
|
||||
for i, (h, t, eid) in enumerate(G.edges()):
|
||||
if eid != None:
|
||||
edge_ids[i] = eid
|
||||
else:
|
||||
edge_ids[i] = 'e_' + str(i)
|
||||
hind = n2ind[str(h)]
|
||||
tind = n2ind[str(t)]
|
||||
I[hind, i] = 1
|
||||
if G.is_directed():
|
||||
I[tind, i] = -1
|
||||
else:
|
||||
I[tind, i] = 1
|
||||
idents = [[node_dim, node_ids], [edge_dim, edge_ids]]
|
||||
if G.name != '':
|
||||
name = G.name
|
||||
else:
|
||||
name = 'A'
|
||||
ds = GraphDataset(I, idents, name)
|
||||
return ds
|
||||
|
||||
from_networkx = classmethod(from_networkx)
|
||||
|
||||
def _incidence2adjacency(self, I):
|
||||
"""Incidence to adjacency matrix.
|
||||
|
||||
I*I.T - eye(n)?
|
||||
"""
|
||||
raise NotImplementedError
|
||||
|
||||
def _graph_from_incidence_matrix(self, I, node_ids, edge_ids):
|
||||
"""Creates a networkx graph class from incidence
|
||||
(possibly weighted) matrix and ordered labels.
|
||||
|
||||
labels = None, results in string-numbered labels
|
||||
"""
|
||||
try:
|
||||
import networkx as nx
|
||||
except:
|
||||
print "Failed in import of NetworkX"
|
||||
return None
|
||||
|
||||
m, n = I.shape
|
||||
assert(m == len(node_ids))
|
||||
assert(n == len(edge_ids))
|
||||
weights = []
|
||||
directed = False
|
||||
G = nx.XDiGraph(name=self._name)
|
||||
if isinstance(I, sparse.spmatrix):
|
||||
I = I.tocsr()
|
||||
for ename, col in izip(edge_ids, I.T):
|
||||
if isinstance(I, sparse.spmatrix):
|
||||
node_ind = col.indices
|
||||
w1, w2 = col.data
|
||||
else:
|
||||
node_ind = where(col != 0)[0]
|
||||
w1, w2 = col[node_ind]
|
||||
node1 = node_ids[node_ind[0]]
|
||||
node2 = node_ids[node_ind[1]]
|
||||
if w1 < 0: # w1 is tail
|
||||
directed = True
|
||||
assert(w2 > 0 and (w1 + w2) == 0)
|
||||
G.add_edge(node2, node1, ename)
|
||||
weights.append(w2)
|
||||
else: #w2 is tail or graph is undirected
|
||||
assert(w1 > 0)
|
||||
if w2 < 0:
|
||||
directed = True
|
||||
G.add_edge(node1, node2, ename)
|
||||
weights.append(w1)
|
||||
if not directed:
|
||||
G = G.to_undirected()
|
||||
return G, asarray(weights)
|
||||
|
||||
Dataset._all_dims = set()
|
||||
|
||||
|
||||
class ReverseDict(dict):
|
||||
"""A dictionary which can lookup values by key, and keys by value.
|
||||
|
||||
All values and keys must be hashable, and unique.
|
||||
|
||||
example:
|
||||
>>d = ReverseDict((['a',1],['b',2]))
|
||||
>>print d['a'] --> 1
|
||||
>>print d.reverse[1] --> 'a'
|
||||
"""
|
||||
def __init__(self, *args, **kw):
|
||||
dict.__init__(self, *args, **kw)
|
||||
self.reverse = dict([[v, k] for k, v in self.items()])
|
||||
|
||||
def __setitem__(self, key, value):
|
||||
dict.__setitem__(self, key, value)
|
||||
try:
|
||||
self.reverse[value] = key
|
||||
except:
|
||||
self.reverse = {value:key}
|
||||
|
||||
|
||||
class Selection(dict):
|
||||
"""Handles selected identifiers along each dimension of a dataset"""
|
||||
|
||||
def __init__(self, title='Unnamed Selecton'):
|
||||
self.title = title
|
||||
|
||||
def __getitem__(self, key):
|
||||
if not self.has_key(key):
|
||||
return None
|
||||
return dict.__getitem__(self, key)
|
||||
|
||||
def dims(self):
|
||||
return self.keys()
|
||||
|
||||
def axis_len(self, axis):
|
||||
if self._selection.has_key(axis):
|
||||
return len(self._selection[axis])
|
||||
return 0
|
||||
|
||||
def select(self, axis, labels):
|
||||
self[axis] = labels
|
||||
|
||||
|
||||
def write_ftsv(fd, ds, decimals=7, sep='\t', fmt=None, sp_format=True):
|
||||
"""Writes a dataset in fluents tab separated values (ftsv) form.
|
||||
|
||||
@param fd: An open file descriptor to the output file.
|
||||
@param ds: The dataset to be written.
|
||||
@param decimals: Number of decimals, only supported for dataset.
|
||||
@param fmt: String formating
|
||||
The function handles datasets of these classes:
|
||||
Dataset, CategoryDataset and GraphDataset
|
||||
"""
|
||||
opened = False
|
||||
if isinstance(fd, str):
|
||||
fd = open(fd, 'w')
|
||||
opened = True
|
||||
|
||||
# Write header information
|
||||
if isinstance(ds, CategoryDataset):
|
||||
type = 'category'
|
||||
if fmt == None:
|
||||
fmt = '%d'
|
||||
elif isinstance(ds, GraphDataset):
|
||||
type = 'network'
|
||||
if fmt == None:
|
||||
fmt = '%d'
|
||||
elif isinstance(ds, Dataset):
|
||||
type = 'dataset'
|
||||
if fmt == None:
|
||||
fmt = '%%.%df' % decimals
|
||||
else:
|
||||
fmt = '%%.%d' %decimals + fmt
|
||||
else:
|
||||
raise Exception("Unknown object type")
|
||||
fd.write('# type: %s' %type + '\n')
|
||||
|
||||
for dim in ds.get_dim_name():
|
||||
fd.write("# dimension: %s" % dim)
|
||||
for ident in ds.get_identifiers(dim, sorted=True):
|
||||
fd.write(" " + ident)
|
||||
fd.write("\n")
|
||||
|
||||
fd.write("# name: %s" % ds.get_name() + '\n')
|
||||
# xy-node-positions
|
||||
if type == 'network' and ds.nodepos != None:
|
||||
fd.write("# nodepos:")
|
||||
node_dim = ds.get_dim_name(0)
|
||||
for ident in ds.get_identifiers(node_dim, sorted=True):
|
||||
fd.write(" %s,%s" %ds.nodepos[ident])
|
||||
fd.write("\n")
|
||||
|
||||
# Write data
|
||||
if hasattr(ds, "as_spmatrix") and sp_format == True:
|
||||
m = ds.as_spmatrix()
|
||||
else:
|
||||
m = ds.asarray()
|
||||
if isinstance(m, sparse.spmatrix):
|
||||
_write_sparse_elements(fd, m, fmt, sep)
|
||||
else:
|
||||
_write_elements(fd, m, fmt, sep)
|
||||
|
||||
if opened:
|
||||
fd.close()
|
||||
|
||||
def read_ftsv(fd, sep=None):
|
||||
"""Read a dataset in fluents tab separated values (ftsv) form and return it.
|
||||
|
||||
@param fd: An open file descriptor.
|
||||
@return: A Dataset, CategoryDataset or GraphDataset depending on the information
|
||||
read.
|
||||
"""
|
||||
opened = False
|
||||
if isinstance(fd, str):
|
||||
fd = open(fd)
|
||||
opened = True
|
||||
|
||||
split_re = re.compile('^#\s*(\w+)\s*:\s*(.+)')
|
||||
dimensions = []
|
||||
identifiers = {}
|
||||
type = 'dataset'
|
||||
name = 'Unnamed dataset'
|
||||
sp_format = False
|
||||
nodepos = None
|
||||
# graphtype = 'graph'
|
||||
|
||||
# Read header lines from file.
|
||||
line = fd.readline()
|
||||
while line:
|
||||
m = split_re.match(line)
|
||||
if m:
|
||||
key, val = m.groups()
|
||||
|
||||
# The line is on the form;
|
||||
# dimension: dimname id1 id2 id3 ...
|
||||
if key == 'dimension':
|
||||
values = [v.strip() for v in val.split(' ')]
|
||||
dimensions.append(values[0])
|
||||
identifiers[values[0]] = values[1:]
|
||||
|
||||
# Read type of dataset.
|
||||
# Should be dataset, category, or network
|
||||
elif key == 'type':
|
||||
type = val
|
||||
|
||||
elif key == 'name':
|
||||
name = val
|
||||
|
||||
# storage format
|
||||
# if sp_format is True then use coordinate triplets
|
||||
elif key == 'sp_format':
|
||||
if val in ['False', 'false', '0', 'F', 'f',]:
|
||||
sp_format = False
|
||||
elif val in ['True', 'true', '1', 'T', 't']:
|
||||
sp_format = True
|
||||
else:
|
||||
raise ValueError("sp_format: %s not valid " %sp_format)
|
||||
|
||||
elif key == 'nodepos':
|
||||
node_dim = dimensions[0]
|
||||
idents = identifiers[node_dim]
|
||||
nodepos = {}
|
||||
xys = val.split(" ")
|
||||
for node_id, xy in zip(idents, xys):
|
||||
x, y = map(float, xy.split(","))
|
||||
nodepos[node_id] = (x, y)
|
||||
|
||||
else:
|
||||
break
|
||||
line = fd.readline()
|
||||
|
||||
# Dimensions in the form [(dim1, [id1, id2, id3 ..) ...]
|
||||
dims = [(x, identifiers[x]) for x in dimensions]
|
||||
dim_lengths = [len(identifiers[x]) for x in dimensions]
|
||||
|
||||
# Create matrix and assign element reader
|
||||
if type == 'category':
|
||||
if sp_format:
|
||||
matrix = sparse.lil_matrix(dim_lengths)
|
||||
else:
|
||||
matrix = empty(dim_lengths, dtype='i')
|
||||
else:
|
||||
if sp_format:
|
||||
matrix = sparse.lil_matrix(dim_lengths)
|
||||
else:
|
||||
matrix = empty(dim_lengths)
|
||||
|
||||
if sp_format:
|
||||
matrix = _read_sparse_elements(fd, matrix)
|
||||
else:
|
||||
matrix = _read_elements(fd, matrix)
|
||||
|
||||
|
||||
# Create dataset of specified type
|
||||
if type == 'category':
|
||||
ds = CategoryDataset(matrix, dims, name)
|
||||
elif type == 'network':
|
||||
ds = GraphDataset(matrix, dims, name=name, nodepos=nodepos)
|
||||
else:
|
||||
ds = Dataset(matrix, dims, name)
|
||||
|
||||
if opened:
|
||||
fd.close()
|
||||
|
||||
return ds
|
||||
|
||||
def _write_sparse_elements(fd, arr, fmt='%d', sep=None):
|
||||
""" Sparse coordinate format."""
|
||||
fd.write('# sp_format: True\n\n')
|
||||
fmt = '%d %d ' + fmt + '\n'
|
||||
csr = arr.tocsr()
|
||||
for ii in xrange(csr.size):
|
||||
ir, ic = csr.rowcol(ii)
|
||||
data = csr.getdata(ii)
|
||||
fd.write(fmt % (ir, ic, data))
|
||||
|
||||
def _write_elements(fd, arr, fmt='%f', sep='\t'):
|
||||
"""Standard value separated format."""
|
||||
fmt = fmt + sep
|
||||
fd.write('\n')
|
||||
y, x = arr.shape
|
||||
for j in range(y):
|
||||
for i in range(x):
|
||||
fd.write(fmt %arr[j, i])
|
||||
fd.write('\n')
|
||||
|
||||
def _read_elements(fd, arr, sep=None):
|
||||
line = fd.readline()
|
||||
i = 0
|
||||
while line:
|
||||
values = line.split(sep)
|
||||
for j, val in enumerate(values):
|
||||
arr[i,j] = float(val)
|
||||
i += 1
|
||||
line = fd.readline()
|
||||
return arr
|
||||
|
||||
def _read_sparse_elements(fd, arr, sep=None):
|
||||
line = fd.readline()
|
||||
while line:
|
||||
i, j, val = line.split()
|
||||
arr[int(i),int(j)] = float(val)
|
||||
line = fd.readline()
|
||||
return arr.tocsr()
|
||||
|
||||
108
laydi/dialogs.py
Normal file
108
laydi/dialogs.py
Normal file
@@ -0,0 +1,108 @@
|
||||
import pygtk
|
||||
# pygtk.require('2.0')
|
||||
import gtk
|
||||
import sys
|
||||
import os
|
||||
import gobject
|
||||
import logger, project, workflow
|
||||
|
||||
DATADIR = os.path.dirname(sys.modules['fluents'].__file__)
|
||||
GLADEFILENAME = os.path.join(DATADIR, 'fluents.glade')
|
||||
|
||||
class CreateProjectDruid(gtk.Window):
|
||||
"""A druid for creating a new project.
|
||||
|
||||
The CreateProjectDruid gets a list of all classes derived from
|
||||
Workflow, and asks the user to select one of these. A new project of
|
||||
the selected class is added to the application."""
|
||||
|
||||
def __init__(self):
|
||||
gtk.Window.__init__(self)
|
||||
self.widget_tree = gtk.glade.XML(GLADEFILENAME, 'new_project_druid')
|
||||
self.workflows = self.make_workflow_list()
|
||||
self.selected = None
|
||||
|
||||
renderer = gtk.CellRendererText()
|
||||
wf_name = gtk.TreeViewColumn('Workflow Name', renderer, text=0)
|
||||
self['workflow_list'].insert_column(wf_name, 0)
|
||||
|
||||
self.wf_info = gtk.TextBuffer()
|
||||
self['workflow_info'].set_buffer(self.wf_info)
|
||||
|
||||
def __getitem__(self, key):
|
||||
return self.widget_tree.get_widget(key)
|
||||
|
||||
def make_workflow_list(self):
|
||||
store = gtk.ListStore(gobject.TYPE_STRING, gobject.TYPE_PYOBJECT)
|
||||
for wf in workflow.workflow_list():
|
||||
store.insert_after(None, (wf.name, wf))
|
||||
return store
|
||||
|
||||
def run(self):
|
||||
self['workflow_list'].set_model(self.workflows)
|
||||
|
||||
self['druidpagestart1'].show()
|
||||
self['druidpagefinish1'].show()
|
||||
self['new_project_druid'].show()
|
||||
|
||||
self['druidpagefinish1'].connect('finish', self.finish)
|
||||
self['workflow_list'].connect('cursor_changed', self.selection_updated)
|
||||
self['druid'].connect('cancel', self.cancel)
|
||||
self.connect('destroy', self.delete)
|
||||
|
||||
def delete(self, widget):
|
||||
return False
|
||||
|
||||
def hide(self):
|
||||
self['druidpagestart1'].hide()
|
||||
self['druidpagefinish1'].hide()
|
||||
self['new_project_druid'].hide()
|
||||
gtk.Window.hide(self)
|
||||
|
||||
def finish(self, *rest):
|
||||
tree, it = self['workflow_list'].get_selection().get_selected()
|
||||
wf_class = self.workflows.get_value(it, 1)
|
||||
proj = project.Project()
|
||||
main.set_workflow(wf_class())
|
||||
# self.app.set_workflow(wf(self.app))
|
||||
# self.app.set_project(proj)
|
||||
main.set_project(proj)
|
||||
self.hide()
|
||||
self.destroy()
|
||||
|
||||
def cancel(self, *ignored):
|
||||
self.hide()
|
||||
self.destroy()
|
||||
|
||||
def selection_updated(self, *rest):
|
||||
tree, it = self['workflow_list'].get_selection().get_selected()
|
||||
wf = self.workflows.get_value(it, 1)
|
||||
self.wf_info.set_text(wf.description)
|
||||
|
||||
|
||||
def get_text(title, text):
|
||||
"""Allow user to type in a string for text."""
|
||||
dlg = gtk.Dialog(title)
|
||||
dlg.show()
|
||||
|
||||
text = gtk.Label(text)
|
||||
text.show()
|
||||
|
||||
entry = gtk.Entry()
|
||||
entry.show()
|
||||
entry.set_activates_default(True)
|
||||
dlg.vbox.pack_start(text)
|
||||
dlg.vbox.pack_start(entry)
|
||||
|
||||
dlg.add_button(gtk.STOCK_CANCEL, gtk.RESPONSE_CANCEL)
|
||||
dlg.add_button(gtk.STOCK_OK, gtk.RESPONSE_OK)
|
||||
dlg.set_default_response(gtk.RESPONSE_OK)
|
||||
response = dlg.run()
|
||||
|
||||
retval = None
|
||||
|
||||
if response == gtk.RESPONSE_OK:
|
||||
retval = entry.get_text()
|
||||
dlg.destroy()
|
||||
return retval
|
||||
|
||||
1223
laydi/fluents.glade
Normal file
1223
laydi/fluents.glade
Normal file
File diff suppressed because it is too large
Load Diff
402
laydi/fluents.py
Normal file
402
laydi/fluents.py
Normal file
@@ -0,0 +1,402 @@
|
||||
#!/usr/bin/python
|
||||
|
||||
import os
|
||||
import sys
|
||||
|
||||
import pygtk
|
||||
pygtk.require('2.0')
|
||||
import gobject
|
||||
import gtk
|
||||
import gtk.gdk
|
||||
import gtk.glade
|
||||
import gnome
|
||||
import gnome.ui
|
||||
import scipy
|
||||
import pango
|
||||
import project, workflow, dataset, view, navigator, dialogs, selections, plots, main
|
||||
from logger import logger, LogView
|
||||
|
||||
|
||||
PROGRAM_NAME = 'laydi'
|
||||
VERSION = '0.1.0'
|
||||
DATADIR = os.path.join(main.PYDIR, 'fluents')
|
||||
#ICONDIR = os.path.join(DATADIR,"..","icons")
|
||||
ICONDIR = main.ICONDIR
|
||||
GLADEFILENAME = os.path.join(main.PYDIR, 'fluents/fluents.glade')
|
||||
_icon_mapper = {dataset.Dataset: 'dataset',
|
||||
dataset.CategoryDataset: 'category_dataset',
|
||||
dataset.GraphDataset: 'graph_dataset',
|
||||
plots.Plot: 'line_plot'}
|
||||
|
||||
class IconFactory:
|
||||
"""Factory for icons that ensures that each icon is only loaded once."""
|
||||
|
||||
def __init__(self, path):
|
||||
self._path = path
|
||||
self._icons = {}
|
||||
|
||||
def get(self, iconname):
|
||||
"""Returns the gdk loaded PixBuf for the given icon.
|
||||
Reads the icon from file if necessary."""
|
||||
|
||||
# if iconname isnt a string, try to autoconvert
|
||||
if not isinstance(iconname, str):
|
||||
for cls in _icon_mapper.keys():
|
||||
if isinstance(iconname, cls):
|
||||
iconname = _icon_mapper[cls]
|
||||
|
||||
if self._icons.has_key(iconname):
|
||||
return self._icons[iconname]
|
||||
|
||||
icon_fname = os.path.join(self._path, '%s.png' % iconname)
|
||||
icon = gtk.gdk.pixbuf_new_from_file(icon_fname)
|
||||
self._icons[iconname] = icon
|
||||
return icon
|
||||
|
||||
icon_factory = IconFactory(ICONDIR)
|
||||
|
||||
class TableSizeSelection(gtk.Window):
|
||||
|
||||
def __init__(self):
|
||||
self._SIZE = size = 5
|
||||
gtk.Window.__init__(self, gtk.WINDOW_POPUP)
|
||||
self._table = gtk.Table(size, size, True)
|
||||
self._items = []
|
||||
|
||||
## Create a 3x3 table of EventBox object, doubly stored because
|
||||
## gtk.Table does not support indexed retrieval.
|
||||
|
||||
for y in range(size):
|
||||
line = []
|
||||
for x in range(size):
|
||||
ebox = gtk.EventBox()
|
||||
ebox.add(gtk.Frame())
|
||||
ebox.set_size_request(20, 20)
|
||||
ebox.set_visible_window(True)
|
||||
self._table.attach(ebox, x, x+1, y, y+1, gtk.FILL, gtk.FILL)
|
||||
line.append(ebox)
|
||||
self._items.append(line)
|
||||
|
||||
self.set_border_width(5)
|
||||
self.add(self._table)
|
||||
self.connect_signals()
|
||||
|
||||
def _get_child_pos(self, child):
|
||||
size = self._SIZE
|
||||
for x in range(size):
|
||||
for y in range(size):
|
||||
if self._items[y][x] == child:
|
||||
return (x, y)
|
||||
return None
|
||||
|
||||
def connect_signals(self):
|
||||
size = self._SIZE
|
||||
for x in range(size):
|
||||
for y in range(size):
|
||||
self._items[y][x].add_events(gtk.gdk.ENTER_NOTIFY_MASK)
|
||||
self._items[y][x].connect("enter-notify-event",
|
||||
self._on_enter_notify)
|
||||
self._items[y][x].connect("button-release-event",
|
||||
self._on_button_release)
|
||||
|
||||
def _on_enter_notify(self, widget, event):
|
||||
size = self._SIZE
|
||||
x, y = self._get_child_pos(widget)
|
||||
for i in range(size):
|
||||
for j in range(size):
|
||||
if i <= x and j <= y:
|
||||
self._items[j][i].set_state(gtk.STATE_SELECTED)
|
||||
else:
|
||||
self._items[j][i].set_state(gtk.STATE_NORMAL)
|
||||
self.x = x
|
||||
self.y = y
|
||||
|
||||
def _on_button_release(self, widget, event):
|
||||
size = self._SIZE
|
||||
self.emit('table-size-set', self.x+1, self.y+1)
|
||||
self.hide_all()
|
||||
|
||||
for x in range(size):
|
||||
for y in range(size):
|
||||
self._items[y][x].set_state(gtk.STATE_NORMAL)
|
||||
|
||||
|
||||
class ViewFrameToolButton (gtk.ToolItem):
|
||||
|
||||
def __init__(self):
|
||||
gtk.ToolItem.__init__(self)
|
||||
|
||||
fname = os.path.join(ICONDIR, "table_size.png")
|
||||
image = gtk.Image()
|
||||
image.set_from_file(fname)
|
||||
|
||||
self._button = gtk.Button()
|
||||
self._button.set_image(image)
|
||||
self._button.set_property("can-focus", False)
|
||||
|
||||
eb = gtk.EventBox()
|
||||
eb.add(self._button)
|
||||
self.add(eb)
|
||||
self._item = TableSizeSelection()
|
||||
self._button.connect("button-press-event", self._on_show_menu)
|
||||
image.show()
|
||||
self._image = image
|
||||
|
||||
self._item.connect("table-size-set", self._on_table_size_set)
|
||||
self._button.set_relief(gtk.RELIEF_NONE)
|
||||
self.show_all()
|
||||
|
||||
def _on_show_menu(self, widget, event):
|
||||
x, y = self._image.window.get_origin()
|
||||
x2, y2, w, h, b = self._image.window.get_geometry()
|
||||
|
||||
self._item.move(x, y+h)
|
||||
self._item.show_all()
|
||||
|
||||
def _on_table_size_set(self, widget, width, height):
|
||||
main.application['main_view'].resize_table(width, height)
|
||||
|
||||
|
||||
class FluentApp:
|
||||
|
||||
def __init__(self): # Application variables
|
||||
# self.project = None
|
||||
self.current_data = None
|
||||
self._last_view = None
|
||||
self._plot_toolbar = None
|
||||
self._toolbar_state = None
|
||||
|
||||
gtk.glade.set_custom_handler(self.custom_object_factory)
|
||||
self.widget_tree = gtk.glade.XML(GLADEFILENAME, 'appwindow')
|
||||
# self.workflow = wf
|
||||
|
||||
self.idlist_crt = selections.IdListController(self['identifier_list'])
|
||||
self.sellist_crt = selections.SelectionListController(self['selection_tree'],
|
||||
self.idlist_crt)
|
||||
self.dimlist_crt = selections.DimListController(self['dim_list'],
|
||||
self.sellist_crt)
|
||||
self.sellist_crt.set_dimlist_controller(self.dimlist_crt)
|
||||
|
||||
def init_gui(self):
|
||||
self['appwindow'].set_size_request(800, 600)
|
||||
|
||||
# Set up workflow
|
||||
self.wf_view = workflow.WorkflowView(main.workflow)
|
||||
self.wf_view.show()
|
||||
self['workflow_vbox'].pack_end(self.wf_view)
|
||||
|
||||
self._wf_menu = workflow.WorkflowMenu(main.workflow)
|
||||
self._wf_menu.show()
|
||||
wf_menuitem = gtk.MenuItem('Fu_nctions')
|
||||
wf_menuitem.set_submenu(self._wf_menu)
|
||||
wf_menuitem.show()
|
||||
|
||||
self['menubar1'].insert(wf_menuitem, 2)
|
||||
|
||||
# Connect signals
|
||||
signals = {'on_quit1_activate' : (gtk.main_quit),
|
||||
'on_appwindow_delete_event' : (gtk.main_quit),
|
||||
'on_zoom_in_button_clicked' : (self.on_single_view),
|
||||
'on_zoom_out_button_clicked' : (self.on_multiple_view),
|
||||
'on_new1_activate' : (self.on_create_project),
|
||||
'on_button_new_clicked' : (self.on_create_project),
|
||||
'on_workflow_refresh_clicked' : (self.on_workflow_refresh_clicked),
|
||||
'on_index1_activate' : (self.on_help_index),
|
||||
'on_about1_activate' : (self.on_help_about),
|
||||
'on_report_bug1_activate' : (self.on_help_report_bug),
|
||||
'on_small_view1_activate' : (self.on_multiple_view),
|
||||
'on_large_view1_activate' : (self.on_single_view),
|
||||
|
||||
'on_left1_activate' : (self.on_left),
|
||||
'on_right1_activate' : (self.on_right),
|
||||
'on_up1_activate' : (self.on_up),
|
||||
'on_down1_activate' : (self.on_down),
|
||||
|
||||
'on_navigator1_activate' : (self.on_show_navigator),
|
||||
'on_workflow1_activate' : (self.on_show_workflow),
|
||||
'on_information1_activate' : (self.on_show_infopane),
|
||||
}
|
||||
self.widget_tree.signal_autoconnect(signals)
|
||||
|
||||
self['main_view'].connect('view-changed', self.on_view_changed)
|
||||
|
||||
# Log that we've set up the app now
|
||||
logger.debug('Program started')
|
||||
|
||||
# Add ViewFrame table size to toolbar
|
||||
tb = ViewFrameToolButton()
|
||||
self['toolbar'].add(tb)
|
||||
|
||||
def set_project(self, proj):
|
||||
logger.notice('Welcome to your new project. Grasp That Data!')
|
||||
self.navigator_view.add_project(proj)
|
||||
self.dimlist_crt.set_project(proj)
|
||||
self.sellist_crt.set_project(proj)
|
||||
|
||||
def set_workflow(self, workflow):
|
||||
main.workflow = workflow
|
||||
self.wf_view.set_workflow(main.workflow)
|
||||
|
||||
def show(self):
|
||||
self.init_gui()
|
||||
|
||||
def change_plot(self, plot):
|
||||
"""Sets the plot in the currently active ViewFrame. If the plot is
|
||||
already shown in another ViewFrame it will be moved from there."""
|
||||
# Set current selection in the plot before showing it.
|
||||
plot.selection_changed(None, main.project.get_selection())
|
||||
|
||||
self['main_view'].insert_view(plot)
|
||||
self._update_toolbar(plot)
|
||||
|
||||
def change_plots(self, plots):
|
||||
"""Changes all plots."""
|
||||
self['main_view'].set_all_plots(plots)
|
||||
v = self.get_active_view_frame().get_view()
|
||||
self._update_toolbar(v)
|
||||
|
||||
def get_active_view_frame(self):
|
||||
return self['main_view'].get_active_view_frame()
|
||||
|
||||
def _update_toolbar(self, view):
|
||||
"""Set the plot specific toolbar to the toolbar of the currently
|
||||
active plot."""
|
||||
|
||||
# don't do anything on no change
|
||||
if self._last_view == view:
|
||||
return
|
||||
self._last_view = view
|
||||
|
||||
logger.debug("view changed to %s" % view)
|
||||
|
||||
window = self['plot_toolbar_dock']
|
||||
if self._plot_toolbar:
|
||||
toolbar_state = self._plot_toolbar.get_mode()
|
||||
window.remove(self._plot_toolbar)
|
||||
else:
|
||||
toolbar_state = "default"
|
||||
|
||||
if view:
|
||||
self._plot_toolbar = view.get_toolbar()
|
||||
self._plot_toolbar.set_mode(toolbar_state)
|
||||
else:
|
||||
self._plot_toolbar = None
|
||||
|
||||
if self._plot_toolbar:
|
||||
window.add(self._plot_toolbar)
|
||||
|
||||
# Methods to create GUI widgets from CustomWidgets in the glade file.
|
||||
# The custom_object_factory calls other functions to generate specific
|
||||
# widgets.
|
||||
|
||||
def custom_object_factory(self, glade, fun_name, widget_name, s1, s2, i1, i2):
|
||||
"Called by the glade file reader to create custom GUI widgets."
|
||||
handler = getattr(self, fun_name)
|
||||
return handler(s1, s2, i1, i2)
|
||||
|
||||
def create_logview(self, str1, str2, int1, int2):
|
||||
self.log_view = LogView(logger)
|
||||
self.log_view.show()
|
||||
return self.log_view
|
||||
|
||||
def create_main_view(self, str1, str2, int1, int2):
|
||||
self.main_view = view.MainView()
|
||||
self.main_view.show()
|
||||
return self.main_view
|
||||
|
||||
def create_navigator_view(self, str1, str2, int1, int2):
|
||||
self.navigator_view = navigator.NavigatorView()
|
||||
self.navigator_view.show()
|
||||
return self.navigator_view
|
||||
|
||||
def create_dim_list(self, str1, str2, int1, int2):
|
||||
self.dim_list = selections.DimList()
|
||||
self.dim_list.show()
|
||||
return self.dim_list
|
||||
|
||||
def create_selection_tree(self, str1, str2, int1, int2):
|
||||
self.selection_tree = selections.SelectionTree()
|
||||
self.selection_tree.show()
|
||||
return self.selection_tree
|
||||
|
||||
def create_identifier_list(self, str1, str2, int1, int2):
|
||||
self.identifier_list = selections.IdentifierList()
|
||||
self.identifier_list.show()
|
||||
return self.identifier_list
|
||||
|
||||
def __getitem__(self, key):
|
||||
return self.widget_tree.get_widget(key)
|
||||
|
||||
# Event handlers.
|
||||
# These methods are called by the gtk framework in response to events and
|
||||
# should not be called directly.
|
||||
|
||||
def on_single_view(self, *ignored):
|
||||
self['main_view'].goto_large()
|
||||
|
||||
def on_multiple_view(self, *ignored):
|
||||
self['main_view'].goto_small()
|
||||
|
||||
def on_create_project(self, *rest):
|
||||
d = dialogs.CreateProjectDruid(self)
|
||||
d.run()
|
||||
|
||||
def on_help_about(self, *rest):
|
||||
widget_tree = gtk.glade.XML(GLADEFILENAME, 'aboutdialog')
|
||||
about = widget_tree.get_widget('aboutdialog')
|
||||
about.run()
|
||||
|
||||
def on_help_index(self, *ignored):
|
||||
gnome.help_display_uri('https://dev.pvv.org/projects/fluents/wiki/help')
|
||||
|
||||
def on_help_report_bug(self, *ignored):
|
||||
gnome.help_display_uri('https://dev.pvv.org/projects/fluents/newticket')
|
||||
|
||||
def on_workflow_refresh_clicked(self, *ignored):
|
||||
try:
|
||||
reload(sys.modules[main.workflow.__class__.__module__])
|
||||
except Exception, e:
|
||||
logger.warning('Cannot reload workflow')
|
||||
logger.warning(e)
|
||||
else:
|
||||
logger.notice('Successfully reloaded workflow')
|
||||
|
||||
def on_view_changed(self, widget, vf):
|
||||
self._update_toolbar(vf.get_view())
|
||||
|
||||
def on_show_navigator(self, item):
|
||||
if item.get_active():
|
||||
self['data_vbox'].show()
|
||||
else:
|
||||
self['data_vbox'].hide()
|
||||
|
||||
def on_show_workflow(self, item):
|
||||
if item.get_active():
|
||||
self['workflow_vbox'].show()
|
||||
else:
|
||||
self['workflow_vbox'].hide()
|
||||
|
||||
def on_show_infopane(self, item):
|
||||
if item.get_active():
|
||||
self['bottom_notebook'].show()
|
||||
else:
|
||||
self['bottom_notebook'].hide()
|
||||
|
||||
def on_left(self, item):
|
||||
self.main_view.move_focus_left()
|
||||
|
||||
def on_right(self, item):
|
||||
self.main_view.move_focus_right()
|
||||
|
||||
def on_up(self, item):
|
||||
self.main_view.move_focus_up()
|
||||
|
||||
def on_down(self, item):
|
||||
self.main_view.move_focus_down()
|
||||
|
||||
|
||||
gobject.signal_new('table-size-set', TableSizeSelection,
|
||||
gobject.SIGNAL_RUN_LAST,
|
||||
gobject.TYPE_NONE,
|
||||
(gobject.TYPE_INT, gobject.TYPE_INT))
|
||||
|
||||
284
laydi/lib/R_utils.py
Normal file
284
laydi/lib/R_utils.py
Normal file
@@ -0,0 +1,284 @@
|
||||
"""A collection of functions that use R.
|
||||
|
||||
Most functions use libraries from bioconductor
|
||||
|
||||
depends on:
|
||||
(not updated)
|
||||
-- bioconductor min. install
|
||||
-- hgu133a
|
||||
-- hgu133plus2
|
||||
|
||||
"""
|
||||
|
||||
import scipy
|
||||
import Numeric as N
|
||||
import rpy
|
||||
silent_eval = rpy.with_mode(rpy.NO_CONVERSION, rpy.r)
|
||||
|
||||
def get_locusid(probelist=None,org="hgu133a"):
|
||||
"""Returns a dictionary of locus link id for each affy probeset
|
||||
and reverse mapping
|
||||
|
||||
innput:
|
||||
[probelist] -- probelist of affy probesets
|
||||
[org] -- chip type (organism)
|
||||
|
||||
out:
|
||||
aff2loc, loc2aff
|
||||
|
||||
The mapping is one-to-one for affy->locus_id
|
||||
However, there are several affy probesets for one locus_id
|
||||
|
||||
From bioc-mail-archive: BioC takes the GeneBank ids associated
|
||||
with the probes (provided by the manufacture) and then maps them
|
||||
to Entrez Gene ids using data from UniGene, Entrez Gene, and other
|
||||
available data sources we trust. The Entrez Gene id a probe is
|
||||
assigned to is determined by votes from all the sources used. If
|
||||
there is no agreement among the sources, we take the smallest
|
||||
Entrez Gene id.
|
||||
"""
|
||||
silent_eval("library("+org+")")
|
||||
silent_eval('locus_ids = as.list('+org+'LOCUSID)')
|
||||
silent_eval('pp<-as.list(locus_ids[!is.na(locus_ids)])')
|
||||
loc_ids = rpy.r("pp")
|
||||
for id in loc_ids:
|
||||
loc_ids[id] = str(loc_ids[id])
|
||||
|
||||
aff2loc = {}
|
||||
if probelist:
|
||||
for pid in probelist:
|
||||
try:
|
||||
aff2loc[pid]=loc_ids[pid]
|
||||
except:
|
||||
print "Affy probeset: %s has no locus id" %pid
|
||||
print "\nCONVERSION SUMMARY:\n \
|
||||
Number of probesets input %s \n \
|
||||
Number of translated locus ids: %s \n \
|
||||
Number of missings: %s" %(len(probelist),len(aff2loc),len(probelist)-len(aff2loc))
|
||||
else:
|
||||
aff2loc = loc_ids
|
||||
# reverse mapping
|
||||
loc2aff = {}
|
||||
for k,v in aff2loc.items():
|
||||
if loc2aff.has_key(v):
|
||||
loc2aff[v].append(k)
|
||||
else:
|
||||
loc2aff[v]=[k]
|
||||
|
||||
return aff2loc,loc2aff
|
||||
|
||||
def get_kegg_paths(org="hgu133plus2",id_type='aff',probelist=None):
|
||||
"""Returns a dictionary of KEGG maps.
|
||||
|
||||
input:
|
||||
org -- chip_type (see bioconductor.org)
|
||||
id_type -- id ['aff','loc']
|
||||
|
||||
key: affy_id, value = list of kegg map id
|
||||
example: '65884_at': ['00510', '00513']
|
||||
"""
|
||||
silent_eval("library("+org+")")
|
||||
silent_eval('xx<-as.list('+org+'PATH)')
|
||||
silent_eval('xp <- xx[!is.na(xx)]')
|
||||
aff2path = rpy.r("xp")
|
||||
dummy = rpy.r("xx")
|
||||
|
||||
if id_type=='loc':
|
||||
aff2loc,loc2aff = get_locusid(org=org)
|
||||
loc2path = {}
|
||||
for id,path in aff2path.items():
|
||||
if loc2path.has_key(id):
|
||||
pp = [path.append(i) for i in loc2path[id]]
|
||||
print "Found duplicate in path: %s" %path
|
||||
loc2path[aff2loc[id]]=path
|
||||
aff2path = loc2path
|
||||
out = {}
|
||||
|
||||
if probelist:
|
||||
for pid in probelist:
|
||||
try:
|
||||
out[pid]=aff2path[pid]
|
||||
except:
|
||||
print "Could not find id: %s" %pid
|
||||
else:
|
||||
out = aff2path
|
||||
for k,v in out.items():
|
||||
# if string convert tol list
|
||||
try:
|
||||
v + ''
|
||||
out[k] = [v]
|
||||
except:
|
||||
out[k] = v
|
||||
|
||||
return out
|
||||
|
||||
def get_probe_list(org="hgu133plus2"):
|
||||
rpy.r.library(org)
|
||||
silent_eval('probe_list<-ls('+org+'ACCNUM )')
|
||||
pl = rpy.r("probe_list")
|
||||
return pl
|
||||
|
||||
def get_GO_from_aff(org="hgu133plus2",id_type='aff',probelist=None):
|
||||
"""Returns a dictionary of GO terms.
|
||||
|
||||
input:
|
||||
org -- chip_type (see bioconductor.org)
|
||||
id_type -- id ['aff','loc']
|
||||
|
||||
key:
|
||||
example: '65884_at':
|
||||
"""
|
||||
silent_eval("library("+org+")")
|
||||
silent_eval('xx<-as.list('+org+'GO)')
|
||||
silent_eval('xp <- xx[!is.na(xx)]')
|
||||
aff2path = rpy.r("xp")
|
||||
dummy = rpy.r("xx")
|
||||
if id_type=='loc':
|
||||
LOC = get_locusid(org=org)
|
||||
loc2path = {}
|
||||
for id,path in aff2path.items():
|
||||
if loc2path.has_key(id):
|
||||
pp = [path.append(i) for i in loc2path[id]]
|
||||
print "Found duplicate in path: %s" %path
|
||||
loc2path[LOC[id]]=path
|
||||
aff2path = loc2path
|
||||
out = {}
|
||||
if probelist:
|
||||
for pid in probelist:
|
||||
try:
|
||||
out[pid]=aff2path[pid]
|
||||
except:
|
||||
print "Could not find id: %s" %pid
|
||||
return aff2path
|
||||
|
||||
def get_kegg_as_category(org="hgu133plus2",id_type='aff',probelist=None):
|
||||
"""Returns kegg pathway memberships in dummy (1/0) matrix (genes x maps)
|
||||
|
||||
"""
|
||||
kegg = get_kegg_paths(org=org, id_type=id_type, probelist=probelist)
|
||||
maps = set()
|
||||
for kpth in kegg.values():
|
||||
maps.update(kpth)
|
||||
|
||||
n_maps = len(maps)
|
||||
n_genes = len(kegg)
|
||||
gene2index = dict(zip(kegg.keys(), range(n_genes)))
|
||||
map2index = dict(zip(maps, range(n_maps)))
|
||||
C = scipy.zeros((n_genes, n_maps))
|
||||
for k,v in kegg.items():
|
||||
for m in v:
|
||||
C[gene2index[k], map2index[m]]=1
|
||||
|
||||
return C, list(maps), kegg.keys()
|
||||
|
||||
def impute(X, k=10, rowmax=0.5, colmax=0.8, maxp=1500, seed=362436069):
|
||||
"""
|
||||
A function to impute missing expression data, using nearest
|
||||
neighbor averaging. (from bioconductors impute)
|
||||
|
||||
input:
|
||||
|
||||
data: An expression matrix with genes in the rows, samples in the
|
||||
columns
|
||||
|
||||
k: Number of neighbors to be used in the imputation (default=10)
|
||||
|
||||
rowmax: The maximum percent missing data allowed in any row (default
|
||||
50%). For any rows with more than 'rowmax'% missing are
|
||||
imputed using the overall mean per sample.
|
||||
|
||||
colmax: The maximum percent missing data allowed in any column
|
||||
(default 80%). If any column has more than 'colmax'% missing
|
||||
data, the program halts and reports an error.
|
||||
|
||||
maxp: The largest block of genes imputed using the knn algorithm
|
||||
inside 'impute.knn' (default 1500); larger blocks are divided
|
||||
by two-means clustering (recursively) prior to imputation. If
|
||||
'maxp=p', only knn imputation is done
|
||||
|
||||
seed: The seed used for the random number generator (default
|
||||
362436069) for reproducibility.
|
||||
|
||||
|
||||
call:
|
||||
impute(data ,k = 10, rowmax = 0.5, colmax = 0.8, maxp = 1500, rng.seed=362436069)
|
||||
"""
|
||||
|
||||
rpy.r.library("impute")
|
||||
X = N.asarray(X) # cast as numeric array
|
||||
m, n = scipy.shape(X)
|
||||
if m>n:
|
||||
print "Warning (impute): more samples than variables. running transpose"
|
||||
t_flag = True
|
||||
else:
|
||||
X = N.transpose(X)
|
||||
t_flag = False
|
||||
|
||||
rpy.r.assign("X", X)
|
||||
rpy.r.assign("k", k)
|
||||
rpy.r.assign("rmax", rowmax)
|
||||
rpy.r.assign("cmax", colmax)
|
||||
rpy.r.assign("maxp", maxp)
|
||||
|
||||
call = "out<-impute.knn(X,k=k,rowmax=rmax,colmax=cmax,maxp=maxp)"
|
||||
silent_eval(call)
|
||||
out = rpy.r("out")
|
||||
if not t_flag:
|
||||
E = out['data']
|
||||
E = scipy.asarray(E)
|
||||
E = E.T
|
||||
else:
|
||||
E = out['data']
|
||||
E = scipy.asarray(E)
|
||||
return E
|
||||
|
||||
|
||||
def get_chip_annotation(org="hgu133a",annot='pmid', id_type='loc',probelist=None):
|
||||
"""Returns a dictionary of annoations.
|
||||
|
||||
input:
|
||||
org -- chip_type (see bioconductor.org)
|
||||
annot -- annotation ['genename', 'pmid', ' symbol']
|
||||
id_type -- id ['aff','loc']
|
||||
|
||||
|
||||
key: id, value = list of annoations
|
||||
example: '65884_at': ['15672394', '138402']
|
||||
"""
|
||||
_valid_annot = ['genename', 'pmid', 'symbol', 'enzyme', 'chr', 'chrloc']
|
||||
if annot.lower() not in _valid_annot:
|
||||
raise ValueError("Annotation must be one of %s" %_valid_annot)
|
||||
silent_eval("library("+org+")")
|
||||
silent_eval("dummy<-as.list("+org+annot.upper()+")")
|
||||
silent_eval('annotations <- dummy[!is.na(dummy)]')
|
||||
aff2annot = rpy.r("annotations")
|
||||
if id_type=='loc':
|
||||
aff2loc, loc2aff = get_locusid(org=org)
|
||||
loc2annot = {}
|
||||
for geneid, annotation in aff2annot.items():
|
||||
annotation = ensure_list(annotation)
|
||||
print annotation
|
||||
if loc2annot.has_key(geneid):
|
||||
for extra in loc2annot[geneid]:
|
||||
annotation.append(extra)
|
||||
print "Found duplicate in gene: %s" %geneid
|
||||
loc2annot[aff2loc[geneid]] = annotation
|
||||
aff2annot = loc2annot
|
||||
|
||||
out = {}
|
||||
if probelist:
|
||||
for pid in probelist:
|
||||
try:
|
||||
out[pid] = aff2annot.get(pid, 'none')
|
||||
except:
|
||||
print "Could not find id: %s" %pid
|
||||
else:
|
||||
out = aff2annot
|
||||
|
||||
return out
|
||||
|
||||
def ensure_list(value):
|
||||
if isinstance(value, list):
|
||||
return value
|
||||
else:
|
||||
return [value]
|
||||
0
laydi/lib/__init__.py
Normal file
0
laydi/lib/__init__.py
Normal file
1378
laydi/lib/blmfuncs.py
Normal file
1378
laydi/lib/blmfuncs.py
Normal file
File diff suppressed because it is too large
Load Diff
458
laydi/lib/blmplots.py
Normal file
458
laydi/lib/blmplots.py
Normal file
@@ -0,0 +1,458 @@
|
||||
"""Specialised plots for functions defined in blmfuncs.py.
|
||||
|
||||
fixme:
|
||||
-- If scatterplot is not inited with a colorvector there will be no
|
||||
colorbar, but when adding colors the colorbar shoud be created.
|
||||
"""
|
||||
|
||||
from matplotlib import cm,patches
|
||||
import gtk
|
||||
import fluents
|
||||
from fluents import plots, main,logger
|
||||
import scipy
|
||||
from scipy import dot,sum,diag,arange,log,mean,newaxis,sqrt,apply_along_axis,empty
|
||||
from scipy.stats import corrcoef
|
||||
|
||||
def correlation_loadings(data, T, test=True):
|
||||
""" Returns correlation loadings.
|
||||
|
||||
:input:
|
||||
- D: [nsamps, nvars], data (non-centered data)
|
||||
- T: [nsamps, a_max], Scores
|
||||
:ouput:
|
||||
- R: [nvars, a_max], Correlation loadings
|
||||
|
||||
:notes:
|
||||
|
||||
"""
|
||||
nsamps, nvars = data.shape
|
||||
nsampsT, a_max = T.shape
|
||||
|
||||
if nsamps!=nsampsT: raise IOError("D/T mismatch")
|
||||
|
||||
# center
|
||||
data = data - data.mean(0)
|
||||
R = empty((nvars, a_max),'d')
|
||||
for a in range(a_max):
|
||||
for k in range(nvars):
|
||||
R[k,a] = corrcoef(data[:,k], T[:,a])[0,1]
|
||||
|
||||
return R
|
||||
|
||||
class BlmScatterPlot(plots.ScatterPlot):
|
||||
"""Scatter plot used for scores and loadings in bilinear models."""
|
||||
|
||||
def __init__(self, title, model, absi=0, ordi=1, part_name='T', color_by=None):
|
||||
self.model = model
|
||||
if model.model.has_key(part_name)!=True:
|
||||
raise ValueError("Model part: %s not found in model" %mod_param)
|
||||
self._T = model.model[part_name]
|
||||
if self._T.shape[1]==1:
|
||||
logger.log('notice', 'Scores have only one component')
|
||||
absi= ordi = 0
|
||||
self._absi = absi
|
||||
self._ordi = ordi
|
||||
self._cmap = cm.summer
|
||||
|
||||
dataset_1 = model.as_dataset(part_name)
|
||||
id_dim = dataset_1.get_dim_name(0)
|
||||
sel_dim = dataset_1.get_dim_name(1)
|
||||
id_1, = dataset_1.get_identifiers(sel_dim, [absi])
|
||||
id_2, = dataset_1.get_identifiers(sel_dim, [ordi])
|
||||
col = 'b'
|
||||
if model.model.has_key(color_by):
|
||||
col = model.model[color_by].ravel()
|
||||
plots.ScatterPlot.__init__(self, dataset_1, dataset_1, id_dim, sel_dim, id_1, id_2 ,c=col ,s=40 , name=title)
|
||||
self._mappable.set_cmap(self._cmap)
|
||||
self.sc = self._mappable
|
||||
self.add_pc_spin_buttons(self._T.shape[1], absi, ordi)
|
||||
|
||||
def set_facecolor(self, colors):
|
||||
"""Set patch facecolors.
|
||||
"""
|
||||
pass
|
||||
|
||||
def set_alphas(self, alphas):
|
||||
"""Set alpha channel for all patches."""
|
||||
pass
|
||||
|
||||
def set_sizes(self, sizes):
|
||||
"""Set patch sizes."""
|
||||
pass
|
||||
|
||||
def set_expvar_axlabels(self, param=None):
|
||||
if param == None:
|
||||
param = self._expvar_param
|
||||
else:
|
||||
self._expvar_param = param
|
||||
if not self.model.model.has_key(param):
|
||||
self.model.model[param] = None
|
||||
if self.model.model[param]==None:
|
||||
logger.log('notice', 'Param: %s not in model' %param)
|
||||
print self.model.model.keys()
|
||||
print self.model.model[param]
|
||||
pass #fixme: do expvar calc here if not present
|
||||
else:
|
||||
expvar = self.model.model[param]
|
||||
xstr = "Comp: %s , %.1f " %(self._absi, expvar[self._absi+1])
|
||||
ystr = "Comp: %s , %.1f " %(self._ordi, expvar[self._ordi+1])
|
||||
self.axes.set_xlabel(xstr)
|
||||
self.axes.set_ylabel(ystr)
|
||||
|
||||
def add_pc_spin_buttons(self, amax, absi, ordi):
|
||||
sb_a = gtk.SpinButton(climb_rate=1)
|
||||
sb_a.set_range(1, amax)
|
||||
sb_a.set_value(absi+1)
|
||||
sb_a.set_increments(1, 5)
|
||||
sb_a.connect('value_changed', self.set_absicca)
|
||||
sb_o = gtk.SpinButton(climb_rate=1)
|
||||
sb_o.set_range(1, amax)
|
||||
sb_o.set_value(ordi+1)
|
||||
sb_o.set_increments(1, 5)
|
||||
sb_o.connect('value_changed', self.set_ordinate)
|
||||
hbox = gtk.HBox()
|
||||
gtk_label_a = gtk.Label("A:")
|
||||
gtk_label_o = gtk.Label(" O:")
|
||||
toolitem = gtk.ToolItem()
|
||||
toolitem.set_expand(False)
|
||||
toolitem.set_border_width(2)
|
||||
toolitem.add(hbox)
|
||||
hbox.pack_start(gtk_label_a)
|
||||
hbox.pack_start(sb_a)
|
||||
hbox.pack_start(gtk_label_o)
|
||||
hbox.pack_start(sb_o)
|
||||
self._toolbar.insert(toolitem, -1)
|
||||
toolitem.set_tooltip(self._toolbar.tooltips, "Set Principal component")
|
||||
self._toolbar.show_all() #do i need this?
|
||||
|
||||
def set_absicca(self, sb):
|
||||
self._absi = sb.get_value_as_int() - 1
|
||||
xy = self._T[:,[self._absi, self._ordi]]
|
||||
self.xaxis_data = xy[:,0]
|
||||
self.yaxis_data = xy[:,1]
|
||||
self.sc._offsets = xy
|
||||
self.selection_collection._offsets = xy
|
||||
self.canvas.draw_idle()
|
||||
pad = abs(self.xaxis_data.min()-self.xaxis_data.max())*0.05
|
||||
new_lims = (self.xaxis_data.min() - pad, self.xaxis_data.max() + pad)
|
||||
self.axes.set_xlim(new_lims, emit=True)
|
||||
self.set_expvar_axlabels()
|
||||
self.canvas.draw_idle()
|
||||
|
||||
def set_ordinate(self, sb):
|
||||
self._ordi = sb.get_value_as_int() - 1
|
||||
xy = self._T[:,[self._absi, self._ordi]]
|
||||
self.xaxis_data = xy[:,0]
|
||||
self.yaxis_data = xy[:,1]
|
||||
self.sc._offsets = xy
|
||||
self.selection_collection._offsets = xy
|
||||
pad = abs(self.yaxis_data.min()-self.yaxis_data.max())*0.05
|
||||
new_lims = (self.yaxis_data.min() - pad, self.yaxis_data.max() + pad)
|
||||
self.axes.set_ylim(new_lims, emit=True)
|
||||
self.set_expvar_axlabels()
|
||||
self.canvas.draw_idle()
|
||||
|
||||
def show_labels(self, index=None):
|
||||
if self._text_labels == None:
|
||||
x = self.xaxis_data
|
||||
y = self.yaxis_data
|
||||
self._text_labels = {}
|
||||
for name, n in self.dataset_1[self.current_dim].items():
|
||||
txt = self.axes.text(x[n],y[n], name)
|
||||
txt.set_visible(False)
|
||||
self._text_labels[n] = txt
|
||||
if index!=None:
|
||||
self.hide_labels()
|
||||
for indx,txt in self._text_labels.items():
|
||||
if indx in index:
|
||||
txt.set_visible(True)
|
||||
self.canvas.draw_idle()
|
||||
|
||||
def hide_labels(self):
|
||||
for txt in self._text_labels.values():
|
||||
txt.set_visible(False)
|
||||
self.canvas.draw_idle()
|
||||
|
||||
|
||||
class PcaScreePlot(plots.BarPlot):
|
||||
def __init__(self, model):
|
||||
title = "Pca, (%s) Scree" %model._dataset['X'].get_name()
|
||||
ds = model.as_dataset('eigvals')
|
||||
if ds==None:
|
||||
logger.log('notice', 'Model does not contain eigvals')
|
||||
plots.BarPlot.__init__(self, ds, name=title)
|
||||
|
||||
|
||||
class PcaScorePlot(BlmScatterPlot):
|
||||
def __init__(self, model, absi=0, ordi=1):
|
||||
title = "Pca scores (%s)" %model._dataset['X'].get_name()
|
||||
BlmScatterPlot.__init__(self, title, model, absi, ordi, 'T')
|
||||
self.set_expvar_axlabels(param="expvarx")
|
||||
|
||||
class PcaLoadingPlot(BlmScatterPlot):
|
||||
def __init__(self, model, absi=0, ordi=1):
|
||||
title = "Pca loadings (%s)" %model._dataset['X'].get_name()
|
||||
BlmScatterPlot.__init__(self, title, model, absi, ordi, part_name='P', color_by='p_tsq')
|
||||
self.set_expvar_axlabels(param="expvarx")
|
||||
|
||||
class PlsScorePlot(BlmScatterPlot):
|
||||
def __init__(self, model, absi=0, ordi=1):
|
||||
title = "Pls scores (%s)" %model._dataset['X'].get_name()
|
||||
BlmScatterPlot.__init__(self, title, model, absi, ordi, 'T')
|
||||
|
||||
|
||||
class PlsXLoadingPlot(BlmScatterPlot):
|
||||
def __init__(self, model, absi=0, ordi=1):
|
||||
title = "Pls x-loadings (%s)" %model._dataset['X'].get_name()
|
||||
BlmScatterPlot.__init__(self, title, model, absi, ordi, part_name='P', color_by='w_tsq')
|
||||
#self.set_expvar_axlabels(self, param="expvarx")
|
||||
|
||||
|
||||
class PlsYLoadingPlot(BlmScatterPlot):
|
||||
def __init__(self, model, absi=0, ordi=1):
|
||||
title = "Pls y-loadings (%s)" %model._dataset['Y'].get_name()
|
||||
BlmScatterPlot.__init__(self, title, model, absi, ordi, part_name='Q')
|
||||
|
||||
|
||||
class PlsCorrelationLoadingPlot(BlmScatterPlot):
|
||||
def __init__(self, model, absi=0, ordi=1):
|
||||
title = "Pls correlation loadings (%s)" %model._dataset['X'].get_name()
|
||||
BlmScatterPlot.__init__(self, title, model, absi, ordi, part_name='CP')
|
||||
|
||||
|
||||
class LplsScorePlot(BlmScatterPlot):
|
||||
def __init__(self, model, absi=0, ordi=1):
|
||||
title = "L-pls scores (%s)" %model._dataset['X'].get_name()
|
||||
BlmScatterPlot.__init__(self, title, model, absi, ordi, 'T')
|
||||
self.set_expvar_axlabels("evx")
|
||||
|
||||
|
||||
class LplsXLoadingPlot(BlmScatterPlot):
|
||||
def __init__(self, model, absi=0, ordi=1):
|
||||
title = "Lpls x-loadings (%s)" %model._dataset['X'].get_name()
|
||||
BlmScatterPlot.__init__(self, title, model, absi, ordi, part_name='P', color_by='tsqx')
|
||||
self.set_expvar_axlabels("evx")
|
||||
|
||||
|
||||
class LplsZLoadingPlot(BlmScatterPlot, plots.PlotThresholder):
|
||||
def __init__(self, model, absi=0, ordi=1):
|
||||
title = "Lpls z-loadings (%s)" %model._dataset['Z'].get_name()
|
||||
BlmScatterPlot.__init__(self, title, model, absi, ordi, part_name='L', color_by='tsqz')
|
||||
self.set_expvar_axlabels(param="evz")
|
||||
plots.PlotThresholder.__init__(self, "IC")
|
||||
|
||||
|
||||
def _update_color_from_dataset(self, ds):
|
||||
BlmScatterPlot._update_color_from_dataset(self, ds)
|
||||
self.set_threshold_dataset(ds)
|
||||
|
||||
|
||||
class LplsXCorrelationPlot(BlmScatterPlot):
|
||||
def __init__(self, model, absi=0, ordi=1):
|
||||
title = "Lpls x-corr. loads (%s)" %model._dataset['X'].get_name()
|
||||
if not model.model.has_key('Rx'):
|
||||
R = correlation_loadings(model._data['X'], model.model['T'])
|
||||
model.model['Rx'] = R
|
||||
BlmScatterPlot.__init__(self, title, model, absi, ordi, part_name='Rx')
|
||||
self.set_expvar_axlabels("evx")
|
||||
radius = 1
|
||||
center = (0,0)
|
||||
c100 = patches.Circle(center,radius=radius,
|
||||
facecolor='gray',
|
||||
alpha=.1,
|
||||
zorder=1)
|
||||
c50 = patches.Circle(center, radius= sqrt(radius/2.0),
|
||||
facecolor='gray',
|
||||
alpha=.1,
|
||||
zorder=2)
|
||||
self.axes.add_patch(c100)
|
||||
self.axes.add_patch(c50)
|
||||
self.axes.axhline(lw=1.5,color='k')
|
||||
self.axes.axvline(lw=1.5,color='k')
|
||||
self.axes.set_xlim([-1.05,1.05])
|
||||
self.axes.set_ylim([-1.05, 1.05])
|
||||
self.canvas.show()
|
||||
|
||||
class LplsZCorrelationPlot(BlmScatterPlot):
|
||||
def __init__(self, model, absi=0, ordi=1):
|
||||
title = "Lpls z-corr. loads (%s)" %model._dataset['Z'].get_name()
|
||||
if not model.model.has_key('Rz'):
|
||||
R = correlation_loadings(model._data['Z'].T, model.model['W'])
|
||||
model.model['Rz'] = R
|
||||
BlmScatterPlot.__init__(self, title, model, absi, ordi, part_name='Rz')
|
||||
self.set_expvar_axlabels("evz")
|
||||
radius = 1
|
||||
center = (0,0)
|
||||
c100 = patches.Circle(center,radius=radius,
|
||||
facecolor='gray',
|
||||
alpha=.1,
|
||||
zorder=1)
|
||||
c50 = patches.Circle(center, radius=sqrt(radius/2.0),
|
||||
facecolor='gray',
|
||||
alpha=.1,
|
||||
zorder=2)
|
||||
self.axes.add_patch(c100)
|
||||
self.axes.add_patch(c50)
|
||||
self.axes.axhline(lw=1.5,color='k')
|
||||
self.axes.axvline(lw=1.5,color='k')
|
||||
self.axes.set_xlim([-1.05,1.05])
|
||||
self.axes.set_ylim([-1.05, 1.05])
|
||||
self.canvas.show()
|
||||
|
||||
|
||||
class LplsHypoidCorrelationPlot(BlmScatterPlot):
|
||||
def __init__(self, model, absi=0, ordi=1):
|
||||
title = "Hypoid correlations(%s)" %model._dataset['X'].get_name()
|
||||
BlmScatterPlot.__init__(self, title, model, absi, ordi, part_name='W')
|
||||
|
||||
|
||||
class LplsExplainedVariancePlot(plots.Plot):
|
||||
def __init__(self, model):
|
||||
self.model = model
|
||||
plots.Plot.__init__(self, "Explained variance")
|
||||
xax = scipy.arange(model.model['evx'].shape[0])
|
||||
self.axes.plot(xax, model.model['evx'], 'b-', label='X', linewidth=1.5)
|
||||
self.axes.plot(xax, model.model['evy'], 'k-', label='Y', linewidth=1.5)
|
||||
self.axes.plot(xax, model.model['evz'], 'g-', label='Z', linewidth=1.5)
|
||||
self.canvas.draw()
|
||||
|
||||
class LineViewXc(plots.LineViewPlot):
|
||||
"""A line view of centered raw data
|
||||
"""
|
||||
def __init__(self, model, name='Profiles'):
|
||||
dx = model._dataset['X']
|
||||
plots.LineViewPlot.__init__(self, dx, 1, None, False,name)
|
||||
self.add_center_check_button(self.data_is_centered)
|
||||
|
||||
def add_center_check_button(self, ticked):
|
||||
"""Add a checker button for centerd view of data."""
|
||||
cb = gtk.CheckButton("Center")
|
||||
cb.set_active(ticked)
|
||||
cb.connect('toggled', self._toggle_center)
|
||||
toolitem = gtk.ToolItem()
|
||||
toolitem.set_expand(False)
|
||||
toolitem.set_border_width(2)
|
||||
toolitem.add(cb)
|
||||
self._toolbar.insert(toolitem, -1)
|
||||
toolitem.set_tooltip(self._toolbar.tooltips, "Column center the line view")
|
||||
self._toolbar.show_all() #do i need this?
|
||||
|
||||
def _toggle_center(self, active):
|
||||
if self.data_is_centered:
|
||||
self._data = self._data + self._mn_data
|
||||
self.data_is_centered = False
|
||||
else:
|
||||
self._mn_data = self._data.mean(0)
|
||||
self._data = self._data - self._mn_data
|
||||
self.data_is_centered = True
|
||||
self.make_lines()
|
||||
self.set_background()
|
||||
self.set_current_selection(main.project.get_selection())
|
||||
|
||||
|
||||
class ParalellCoordinates(plots.Plot):
|
||||
"""Parallell coordinates for score loads with many comp.
|
||||
"""
|
||||
def __init__(self, model, p='loads'):
|
||||
pass
|
||||
|
||||
|
||||
class PlsQvalScatter(plots.ScatterPlot):
|
||||
"""A vulcano like plot of loads vs qvals
|
||||
"""
|
||||
def __init__(self, model, pc=0):
|
||||
if not model.model.has_key('w_tsq'):
|
||||
return None
|
||||
self._W = model.model['W']
|
||||
dataset_1 = model.as_dataset('W')
|
||||
dataset_2 = model.as_dataset('w_tsq')
|
||||
id_dim = dataset_1.get_dim_name(0) #genes
|
||||
sel_dim = dataset_1.get_dim_name(1) #_comp
|
||||
sel_dim_2 = dataset_2.get_dim_name(1) #_zero_dim
|
||||
id_1, = dataset_1.get_identifiers(sel_dim, [0])
|
||||
id_2, = dataset_2.get_identifiers(sel_dim_2, [0])
|
||||
if model.model.has_key('w_tsq'):
|
||||
col = model.model['w_tsq'].ravel()
|
||||
#col = normalise(col)
|
||||
else:
|
||||
col = 'g'
|
||||
plots.ScatterPlot.__init__(self, dataset_1, dataset_2,
|
||||
id_dim, sel_dim, id_1, id_2,
|
||||
c=col, s=20, sel_dim_2=sel_dim_2,
|
||||
name='Load Volcano')
|
||||
|
||||
|
||||
class PredictionErrorPlot(plots.Plot):
|
||||
"""A boxplot of prediction error vs. comp. number.
|
||||
"""
|
||||
def __init__(self, model, name="Prediction Error"):
|
||||
if not model.model.has_key('sep'):
|
||||
logger.log('notice', 'Model has no calculations of sep')
|
||||
return None
|
||||
plots.Plot.__init__(self, name)
|
||||
self._frozen = True
|
||||
self.current_dim = 'johndoe'
|
||||
self.axes = self.fig.add_subplot(111)
|
||||
|
||||
# draw
|
||||
sep = model.model['sep']
|
||||
aopt = model.model['aopt']
|
||||
bx_plot_lines = self.axes.boxplot(sqrt(sep))
|
||||
aopt_marker = self.axes.axvline(aopt, linewidth=10,
|
||||
color='r',zorder=0,
|
||||
alpha=.5)
|
||||
|
||||
# add canvas
|
||||
self.add(self.canvas)
|
||||
self.canvas.show()
|
||||
|
||||
def set_current_selection(self, selection):
|
||||
pass
|
||||
|
||||
|
||||
class TRBiplot(plots.ScatterPlot):
|
||||
def __init__(self, model, absi=0, ordi=1):
|
||||
title = "Target rotation biplot(%s)" %model._dataset['X'].get_name()
|
||||
BlmScatterPlot.__init__(self, title, model, absi, ordi, 'B')
|
||||
B = model.model.get('B')
|
||||
# normalize B
|
||||
Bnorm = scipy.apply_along_axis(scipy.linalg.norm, 1, B)
|
||||
x = model._dataset['X'].copy()
|
||||
Xc = x._array - mean(x._array,0)[newaxis]
|
||||
w_rot = B/Bnorm
|
||||
t_rot = dot(Xc, w_rot)
|
||||
|
||||
|
||||
class InfluencePlot(plots.ScatterPlot):
|
||||
""" Returns a leverage vs resiudal scatter plot.
|
||||
"""
|
||||
def __init__(self, model, dim, name="Influence"):
|
||||
if not model.model.has_key('levx'):
|
||||
logger.log('notice', 'Model has no calculations of leverages')
|
||||
return
|
||||
if not model.model.has_key('ssqx'):
|
||||
logger.log('notice', 'Model has no calculations of residuals')
|
||||
return
|
||||
ds1 = model.as_dataset('levx')
|
||||
ds2 = model.as_dataset('ssqx')
|
||||
plots.ScatterPlot.__init__(self, ds1, ds2,
|
||||
id_dim, sel_dim, id_1, id_2,
|
||||
c=col, s=20, sel_dim_2=sel_dim_2,
|
||||
name='Load Volcano')
|
||||
|
||||
|
||||
class RMSEPPlot(plots.BarPlot):
|
||||
def __init__(self, model, name="RMSEP"):
|
||||
if not model.model.has_key('rmsep'):
|
||||
logger.log('notice', 'Model has no calculations of sep')
|
||||
return
|
||||
dataset = model.as_dataset('rmsep')
|
||||
plots.BarPlot.__init__(self, dataset, name=name)
|
||||
|
||||
|
||||
def normalise(x):
|
||||
"""Scale vector x to [0,1]
|
||||
"""
|
||||
x = x - x.min()
|
||||
x = x/x.max()
|
||||
return x
|
||||
66
laydi/lib/cv_index.py
Normal file
66
laydi/lib/cv_index.py
Normal file
@@ -0,0 +1,66 @@
|
||||
from numpy import array_split,arange
|
||||
|
||||
|
||||
def cv(n, k, randomise=False, sequential=False):
|
||||
"""
|
||||
Generates k (training, validation) index pairs.
|
||||
|
||||
Each pair is a partition of arange(n), where validation is an iterable
|
||||
of length ~n/k.
|
||||
|
||||
If randomise is true, a copy of index is shuffled before partitioning,
|
||||
otherwise its order is preserved in training and validation.
|
||||
|
||||
Randomise overrides the sequential argument. If randomise is true,
|
||||
sequential is False
|
||||
|
||||
If sequential is true the index is partioned in continous blocks,
|
||||
otherwise interleaved ordering is used.
|
||||
"""
|
||||
index = xrange(N)
|
||||
if randomise:
|
||||
from random import shuffle
|
||||
index = list(index)
|
||||
shuffle(index)
|
||||
sequential = False
|
||||
if sequential:
|
||||
for validation in array_split(index, K):
|
||||
training = [i for i in index if i not in validation]
|
||||
yield training, validation
|
||||
else:
|
||||
for k in xrange(K):
|
||||
training = [i for i in index if i % K != k]
|
||||
validation = [i for i in index if i % K == k]
|
||||
yield training, validation
|
||||
|
||||
def shuffle_diag(shape, K, randomise=False, sequential=False):
|
||||
"""
|
||||
Generates k (training, validation) index pairs.
|
||||
"""
|
||||
m, n = shape
|
||||
|
||||
if K>m or K>n:
|
||||
msg = "You may not use more subsets than max(n_rows, n_cols)"
|
||||
raise ValueError, msg
|
||||
|
||||
mon = max(m, n)
|
||||
#index = xrange(n)
|
||||
index = [i for i in range(m*n) if i % m == 0]
|
||||
print index
|
||||
if randomise:
|
||||
from random import shuffle
|
||||
index = list(index)
|
||||
shuffle(index)
|
||||
sequential = False
|
||||
|
||||
if sequential:
|
||||
start_inds = array_split(index, K)
|
||||
else:
|
||||
for k in xrange(K):
|
||||
start_inds = [index[i] for i in xrange(n) if i % K == k]
|
||||
|
||||
print start_inds
|
||||
for start in start_inds:
|
||||
ind = arange(start, n*m, mon+1)
|
||||
yield ind
|
||||
|
||||
438
laydi/lib/cx_stats.py
Normal file
438
laydi/lib/cx_stats.py
Normal file
@@ -0,0 +1,438 @@
|
||||
import time
|
||||
import cPickle
|
||||
|
||||
from scipy import zeros,zeros_like,sqrt,dot,trace,sign,round_,argmax,\
|
||||
sort,ravel,newaxis,asarray,diag,sum,outer,argsort,arange,ones_like,\
|
||||
all,apply_along_axis,eye,atleast_2d,empty
|
||||
from scipy.linalg import svd,inv,norm,det,sqrtm
|
||||
from scipy.stats import mean,median
|
||||
|
||||
#import plots_lpls
|
||||
|
||||
from cx_utils import mat_center
|
||||
from validation import pls_jkW, lpls_jk
|
||||
from select_generators import shuffle_1d
|
||||
from engines import pca, pls, bridge
|
||||
from engines import nipals_lpls as lpls
|
||||
|
||||
|
||||
|
||||
def hotelling(Pcv, P, p_center='med', cov_center='med',
|
||||
alpha=0.3, crot=True, strict=False):
|
||||
"""Returns regularized hotelling T^2.
|
||||
|
||||
alpha -- regularisation towards pooled cov estimates
|
||||
beta -- regularisation for unstable eigenvalues
|
||||
p_center -- location method for submodels
|
||||
cov_center -- location method for sub coviariances
|
||||
alpha -- regularisation
|
||||
crot -- rotate submodels toward full?
|
||||
strict -- only rotate 90 degree ?
|
||||
|
||||
"""
|
||||
m, n = P.shape
|
||||
n_sets, n, amax = Pcv.shape
|
||||
# allocate
|
||||
T_sq = empty((n, ),dtype='d')
|
||||
Cov_i = zeros((n, amax, amax),dtype='d')
|
||||
|
||||
# rotate sub_models to full model
|
||||
if crot:
|
||||
for i, Pi in enumerate(Pcv):
|
||||
Pcv[i] = procrustes(P, Pi, strict=strict)
|
||||
|
||||
# center of pnull
|
||||
if p_center=='med':
|
||||
P_ctr = median(Pcv, 0)
|
||||
elif p_center=='mean':
|
||||
# fixme: mean is unstable
|
||||
P_ctr = mean(Pcv, 0)
|
||||
else: #use full
|
||||
P_ctr = P
|
||||
|
||||
for i in xrange(n):
|
||||
Pi = Pcv[:,i,:] # (n_sets x amax)
|
||||
Pi_ctr = P_ctr[i,:] # (1 x amax)
|
||||
Pim = (Pi - Pi_ctr[newaxis])*sqrt(n_sets-1)
|
||||
Cov_i[i] = (1./n_sets)*dot(Pim.T, Pim)
|
||||
|
||||
if cov_center == 'med':
|
||||
Cov = median(Cov_i, 0)
|
||||
else:
|
||||
Cov = mean(Cov_i, 0)
|
||||
|
||||
reg_cov = (1. - alpha)*Cov_i + alpha*Cov
|
||||
for i in xrange(n):
|
||||
#Pc = P_ctr[i,:][:,newaxis]
|
||||
Pc = P_ctr[i,:]
|
||||
sigma = reg_cov[i]
|
||||
# T_sq[i] = (dot(Pc, inv(sigma) )*Pc).sum() #slow
|
||||
T_sq[i] = dot(dot(Pc, inv(sigma)), Pc) # dont need to care about transposes
|
||||
#T_sq[i] = dot(dot(Pc.T, inv(sigma)), Pc).ravel()
|
||||
return T_sq
|
||||
|
||||
def procrustes(A, B, strict=True, center=False, verbose=False):
|
||||
"""Rotation of B to A.
|
||||
|
||||
strict -- Only do flipping and shuffling
|
||||
center -- Center before rotation, translate back after
|
||||
verbose -- Print ssq
|
||||
|
||||
No scaling calculated.
|
||||
Output B_rot = Rotated B
|
||||
"""
|
||||
if center:
|
||||
A,mn_A = mat_center(A, ret_mn=True)
|
||||
B,mn_B = mat_center(B, ret_mn=True)
|
||||
u,s,vh = svd(dot(B.T, A))
|
||||
v = vh.T
|
||||
Cm = dot(u, v.T) #orthogonal rotation matrix
|
||||
if strict: # just inverting and flipping
|
||||
Cm = ensure_strict(Cm)
|
||||
b_rot = dot(B, Cm)
|
||||
|
||||
if verbose:
|
||||
print Cm.round()
|
||||
fit = sum(ravel(B - b_rot)**2)
|
||||
print "Sum of squares: %s" %fit
|
||||
if center:
|
||||
return mn_B + b_rot
|
||||
else:
|
||||
return b_rot
|
||||
|
||||
def expl_var_x(Xc, T):
|
||||
"""Returns explained variance of X.
|
||||
T should carry variance in length, Xc has zero col-mean.
|
||||
"""
|
||||
exp_var_x = diag(dot(T.T, T))*100/(sum(Xc**2))
|
||||
return exp_var_x
|
||||
|
||||
def expl_var_y(Y, T, Q):
|
||||
"""Returns explained variance of Y.
|
||||
"""
|
||||
# centered Y
|
||||
exp_var_y = zeros((Q.shape[1], ))
|
||||
for a in range(Q.shape[1]):
|
||||
Ya = outer(T[:,a], Q[:,a])
|
||||
exp_var_y[a] = 100*sum(Ya**2)/sum(Y**2)
|
||||
return exp_var_y
|
||||
|
||||
def pls_qvals(a, b, aopt=None, alpha=.3,
|
||||
n_iter=20, algo='pls',
|
||||
center=True,
|
||||
sim_method='shuffle',
|
||||
p_center='med', cov_center='med',
|
||||
crot=True, strict=False):
|
||||
|
||||
"""Returns qvals for pls model.
|
||||
|
||||
input:
|
||||
a -- data matrix
|
||||
b -- data matrix
|
||||
aopt -- scalar, opt. number of components
|
||||
alpha -- [0,1] regularisation parameter for T2-test
|
||||
n_iter -- number of permutations
|
||||
sim_method -- permutation method ['shuffle']
|
||||
p_center -- location estimator for sub models ['med']
|
||||
cov_center -- location estimator for covariance of submodels ['med']
|
||||
crot -- bool, use rotations of sub models?
|
||||
strict -- bool, use stict (rot/flips only) rotations?
|
||||
"""
|
||||
|
||||
m, n = a.shape
|
||||
TSQ = zeros((n, n_iter), dtype='d') # (nvars x n_subsets)
|
||||
n_false = zeros((n, n_iter), dtype='d')
|
||||
|
||||
#full model
|
||||
if center:
|
||||
ac = a - a.mean(0)
|
||||
bc = b - b.mean(0)
|
||||
|
||||
if algo=='bridge':
|
||||
dat = bridge(ac, bc, aopt, 'loads', 'fast')
|
||||
else:
|
||||
dat = pls(ac, bc, aopt, 'loads', 'fast')
|
||||
Wcv = pls_jkW(a, b, aopt, n_blocks=None, algo=algo,center=True)
|
||||
tsq_full = hotelling(Wcv, dat['W'], p_center=p_center,
|
||||
alpha=alpha, crot=crot, strict=strict,
|
||||
cov_center=cov_center)
|
||||
#t0 = time.time()
|
||||
Vs = shuffle_1d(bc, n_iter, axis=0)
|
||||
for i, b_shuff in enumerate(Vs):
|
||||
#t1 = time.time()
|
||||
if algo=='bridge':
|
||||
dat = bridge(ac, b_shuff, aopt, 'loads','fast')
|
||||
else:
|
||||
dat = pls(ac, b_shuff, aopt, 'loads', 'fast')
|
||||
Wcv = pls_jkW(a, b_shuff, aopt, n_blocks=None, algo=algo)
|
||||
TSQ[:,i] = hotelling(Wcv, dat['W'], p_center=p_center,
|
||||
alpha=alpha, crot=crot, strict=strict,
|
||||
cov_center=cov_center)
|
||||
#print time.time() - t1
|
||||
|
||||
return fdr(tsq_full, TSQ, median)
|
||||
|
||||
|
||||
def ensure_strict(C, only_flips=True):
|
||||
"""Ensure that a rotation matrix does only 90 degree rotations.
|
||||
In multiplication with pcs this allows flips and reordering.
|
||||
|
||||
if only_flips is True there will onlt be flips allowed
|
||||
"""
|
||||
Cm = C
|
||||
S = sign(C) # signs
|
||||
if only_flips==True:
|
||||
C = eye(Cm.shape[0])*S
|
||||
return C
|
||||
Cm = zeros_like(C)
|
||||
Cm.putmask(1.,abs(C)>.6)
|
||||
if det(Cm)>1:
|
||||
raise ValueError,"Implement this!"
|
||||
return Cm*S
|
||||
|
||||
def pls_qvals_II(a, b, aopt=None, center=True, alpha=.3,
|
||||
n_iter=20, algo='pls',
|
||||
sim_method='shuffle',
|
||||
p_center='med', cov_center='med',
|
||||
crot=True, strict=False):
|
||||
|
||||
"""Returns qvals for pls model.
|
||||
Shuffling of variables in X.
|
||||
Null model is 'If I put genes randomly on network' ... if they are sign:
|
||||
then this is due to network structure and not covariance with response.
|
||||
|
||||
input:
|
||||
a -- data matrix
|
||||
b -- data matrix
|
||||
aopt -- scalar, opt. number of components
|
||||
alpha -- [0,1] regularisation parameter for T2-test
|
||||
n_iter -- number of permutations
|
||||
sim_method -- permutation method ['shuffle']
|
||||
p_center -- location estimator for sub models ['med']
|
||||
cov_center -- location estimator for covariance of submodels ['med']
|
||||
crot -- bool, use rotations of sub models?
|
||||
strict -- bool, use stict (rot/flips only) rotations?
|
||||
"""
|
||||
|
||||
m, n = a.shape
|
||||
TSQ = zeros((n, n_iter), dtype='<f8') # (nvars x n_subsets)
|
||||
n_false = zeros((n, n_iter), dtype='<f8')
|
||||
|
||||
#full model
|
||||
|
||||
# center?
|
||||
if center==True:
|
||||
ac = a - a.mean(0)
|
||||
bc = b - b.mean(0)
|
||||
|
||||
if algo=='bridge':
|
||||
dat = bridge(ac, bc, aopt, 'loads', 'fast')
|
||||
else:
|
||||
dat = pls(ac, bc, aopt, 'loads', 'fast')
|
||||
Wcv = pls_jkW(a, b, aopt, n_blocks=None, algo=algo)
|
||||
tsq_full = hotelling(Wcv, dat['W'], p_center=p_center,
|
||||
alpha=alpha, crot=crot, strict=strict,
|
||||
cov_center=cov_center)
|
||||
t0 = time.time()
|
||||
Vs = shuffle_1d(a, n_iter, 1)
|
||||
for i, a_shuff in enumerate(Vs):
|
||||
t1 = time.time()
|
||||
a = a_shuff - a_shuff.mean(0)
|
||||
|
||||
if algo=='bridge':
|
||||
dat = bridge(a, b, aopt, 'loads','fast')
|
||||
else:
|
||||
dat = pls(a, b, aopt, 'loads', 'fast')
|
||||
Wcv = pls_jkW(a, b, aopt, n_blocks=None, algo=algo)
|
||||
TSQ[:,i] = hotelling(Wcv, dat['W'], p_center=p_center,
|
||||
alpha=alpha, crot=crot, strict=strict,
|
||||
cov_center=cov_center)
|
||||
print time.time() - t1
|
||||
sort_index = argsort(tsq_full)[::-1]
|
||||
back_sort_index = sort_index.argsort()
|
||||
print time.time() - t0
|
||||
|
||||
# count false positives
|
||||
tsq_full_sorted = tsq_full.take(sort_index)
|
||||
for i in xrange(n_iter):
|
||||
for j in xrange(n):
|
||||
n_false[j,i] = sum(TSQ[:,i]>=tsq_full[j])
|
||||
false_pos = median(n_false, 1)
|
||||
ll = arange(1, len(false_pos)+1, 1)
|
||||
sort_qval = false_pos.take(sort_index)/ll
|
||||
qval = false_pos/ll.take(back_sort_index)
|
||||
print time.time() - t0
|
||||
#return qval, false_pos, TSQ, tsq_full
|
||||
|
||||
return qval
|
||||
|
||||
def leverage(aopt=1,*args):
|
||||
"""Returns leverages
|
||||
input : aopt, number of components to base leverage calculations on
|
||||
*args, matrices of normed blm-paramters
|
||||
output: leverages
|
||||
|
||||
For PCA typical inputs are normalised T or normalised P
|
||||
For PLSR typical inputs are normalised T or normalised W
|
||||
"""
|
||||
if aopt<1:
|
||||
raise ValueError,"Leverages only make sense for aopt>0"
|
||||
lev = []
|
||||
for u in args:
|
||||
lev_u = 1./u.shape[0] + dot(u[:,:aopt], u[:,:aopt].T).diagonal()
|
||||
lev.append(lev_u)
|
||||
return lev
|
||||
|
||||
def variances(a, t, p):
|
||||
"""Returns explained variance and ind. var from blm-params.
|
||||
input:
|
||||
a -- full centered matrix
|
||||
t,p -- parameters from a bilinear approx of the above matrix.
|
||||
output:
|
||||
var -- variance of each component
|
||||
var_exp -- cumulative explained variance in percentage
|
||||
|
||||
Typical inputs are: X(centered),T,P for PCA or
|
||||
X(centered),T,P / Y(centered),T,Q for PLSR.
|
||||
"""
|
||||
|
||||
tot_var = sum(a**2)
|
||||
var = 100*(sum(p**2, 0)*sum(t**2, 0))/tot_var
|
||||
var_exp = var.cumsum()
|
||||
return var, var_exp
|
||||
|
||||
def residual_diagnostics(Y, Yhat, aopt=1):
|
||||
"""Root mean errors and press values.
|
||||
R2 vals
|
||||
"""
|
||||
pass
|
||||
|
||||
|
||||
def ssq(E, axis=0, weights=None):
|
||||
"""Sum of squares, supports weights."""
|
||||
n = E.shape[axis]
|
||||
if weights==None:
|
||||
weights = eye(n)
|
||||
else:
|
||||
weigths = diag(weigths)
|
||||
if axis==0:
|
||||
Ew = dot(weights, E)
|
||||
elif axis==1:
|
||||
Ew = dot(E, weights)
|
||||
else:
|
||||
raise NotImplementedError, "Higher order modes not supported"
|
||||
return pow(Ew,2).sum(axis)
|
||||
|
||||
|
||||
def vnorm(x):
|
||||
"""Returns the euclidian norm of a vector.
|
||||
|
||||
This is considerably faster than linalg.norm
|
||||
"""
|
||||
return sqrt(dot(x,x.conj()))
|
||||
|
||||
def mahalanobis(a, loc=None, acov=None, invcov=None):
|
||||
"""Returns the distance of each observation in a
|
||||
from the location estimate (loc) of the data,
|
||||
relative to the shape of the data.
|
||||
|
||||
|
||||
a : data matrix (n observations in rows, p variables in columns)
|
||||
loc : location estimate of the data (p-dimensional vector)
|
||||
covmat or invcov : scatter estimate of the data or the inverse of the scatter estimate (pxp matrix)
|
||||
|
||||
:Returns:
|
||||
A vector containing the distances of all the observations to locvct.
|
||||
|
||||
"""
|
||||
n, p = a.shape
|
||||
if loc==None:
|
||||
loc = a.mean(0)
|
||||
loc = atleast_2d(loc)
|
||||
if loc.shape[1]==1:
|
||||
loc = loc.T; #ensure rowvector
|
||||
assert(loc.shape[1]==p)
|
||||
xc = a - loc
|
||||
if acov==None and invcov==None:
|
||||
acov = dot(xc.T, xc)
|
||||
|
||||
if invcov != None:
|
||||
covmat = atleast_2d(invcov)
|
||||
if min(covmat.shape)==1:
|
||||
covmat = diag(invcov.ravel())
|
||||
else:
|
||||
covmat = atleast_2d(acov)
|
||||
if min(covmat.shape)==1:
|
||||
covmat = diag(covmat.ravel())
|
||||
covmat = inv(covmat)
|
||||
# mdist = diag(dot(dot(xc, covmat),xc.T))
|
||||
mdist = (dot(xc, covmat)*xc).sum(1)
|
||||
return mdist
|
||||
|
||||
def lpls_qvals(a, b, c, aopt=None, alpha=.3, zx_alpha=.5, n_iter=20,
|
||||
sim_method='shuffle',p_center='med', cov_center='med',crot=True,
|
||||
strict=False, mean_ctr=[2,0,2], nsets=None):
|
||||
|
||||
"""Returns qvals for l-pls model.
|
||||
|
||||
input:
|
||||
a -- data matrix
|
||||
b -- data matrix
|
||||
c -- data matrix
|
||||
aopt -- scalar, opt. number of components
|
||||
alpha -- [0,1] regularisation parameter for T2-test
|
||||
xz_alpha -- [0,1] how much z info to include
|
||||
n_iter -- number of permutations
|
||||
sim_method -- permutation method ['shuffle']
|
||||
p_center -- location estimator for sub models ['med']
|
||||
cov_center -- location estimator for covariance of submodels ['med']
|
||||
crot -- bool, use rotations of sub models?
|
||||
strict -- bool, use stict (rot/flips only) rotations?
|
||||
"""
|
||||
|
||||
m, n = a.shape
|
||||
p, k = c.shape
|
||||
pert_tsq_x = zeros((n, n_iter), dtype='d') # (nxvars x n_subsets)
|
||||
pert_tsq_z = zeros((p, n_iter), dtype='d') # (nzvars x n_subsets)
|
||||
|
||||
# Full model
|
||||
#print "Full model start"
|
||||
dat = lpls(a, b, c, aopt, scale='loads', mean_ctr=mean_ctr)
|
||||
Wc, Lc = lpls_jk(a, b, c , aopt, nsets=nsets)
|
||||
#print "Full hot"
|
||||
cal_tsq_x = hotelling(Wc, dat['W'], alpha = alpha)
|
||||
cal_tsq_z = hotelling(Lc, dat['L'], alpha = 0)
|
||||
|
||||
# Perturbations
|
||||
Vs = shuffle_1d(b, n_iter, axis=0)
|
||||
for i, b_shuff in enumerate(Vs):
|
||||
print i
|
||||
dat = lpls(a, b_shuff,c, aopt, scale='loads', mean_ctr=mean_ctr)
|
||||
Wi, Li = lpls_jk(a, b_shuff, c, aopt, nsets=nsets)
|
||||
pert_tsq_x[:,i] = hotelling(Wi, dat['W'], alpha=alpha)
|
||||
pert_tsq_z[:,i] = hotelling(Li, dat['L'], alpha=alpha)
|
||||
|
||||
return cal_tsq_z, pert_tsq_z, cal_tsq_x, pert_tsq_x
|
||||
|
||||
|
||||
|
||||
def fdr(tsq, tsqp, loc_method='mean'):
|
||||
n, = tsq.shape
|
||||
k, m = tsqp.shape
|
||||
assert(n==k)
|
||||
n_false = empty((n, m), 'd')
|
||||
sort_index = argsort(tsq)[::-1]
|
||||
r_index = argsort(sort_index)
|
||||
for i in xrange(m):
|
||||
for j in xrange(n):
|
||||
n_false[j,i] = (tsqp[:,i]>tsq[j]).sum()
|
||||
#cPickle.dump(n_false, open("/tmp/nfalse.dat_"+str(n), "w"))
|
||||
if loc_method=='mean':
|
||||
fp = mean(n_false,1)
|
||||
elif loc_method == 'median':
|
||||
fp = median(n_false.T)
|
||||
else:
|
||||
raise ValueError
|
||||
n_signif = (arange(n) + 1.0)[r_index]
|
||||
fd_rate = fp/n_signif
|
||||
return fd_rate
|
||||
115
laydi/lib/cx_utils.py
Normal file
115
laydi/lib/cx_utils.py
Normal file
@@ -0,0 +1,115 @@
|
||||
from scipy import apply_along_axis,newaxis,zeros,\
|
||||
median,round_,nonzero,dot,argmax,any,sqrt,ndarray,\
|
||||
trace,zeros_like,sign,sort,real,argsort,rand,array,\
|
||||
matrix,nan
|
||||
from scipy.linalg import norm,svd,inv,eig
|
||||
from scipy.stats import median,mean
|
||||
|
||||
def normalise(a, axis=0, return_scales=False):
|
||||
s = apply_along_axis(norm, axis, a)
|
||||
if axis==0:
|
||||
s = s[newaxis]
|
||||
else:
|
||||
s = s[:,newaxis]
|
||||
|
||||
a_s = a/s
|
||||
|
||||
if return_scales:
|
||||
return a_s, s
|
||||
|
||||
return a_s
|
||||
|
||||
def sub2ind(shape, i, j):
|
||||
"""Indices from subscripts. Only support for 2d"""
|
||||
row,col = shape
|
||||
ind = []
|
||||
for k in xrange(len(i)):
|
||||
for m in xrange(len(j)):
|
||||
ind.append(i[k]*col + j[m])
|
||||
return ind
|
||||
|
||||
|
||||
def sorted_eig(a, b=None,sort_by='sm'):
|
||||
"""
|
||||
Just eig with real part of output sorted:
|
||||
This is for convenience only, not general!
|
||||
|
||||
sort_by='sm': return the eigenvectors by eigenvalues
|
||||
of smallest magnitude first. (default)
|
||||
'lm': returns largest eigenvalues first
|
||||
|
||||
output: just as eig with 2 outputs
|
||||
-- s,v (eigvals,eigenvectors)
|
||||
(This is reversed output compared to matlab)
|
||||
|
||||
"""
|
||||
s,v = eig(a, b)
|
||||
s = real(s) # dont expect any imaginary part
|
||||
v = real(v)
|
||||
ind = argsort(s)
|
||||
if sort_by=='lm':
|
||||
ind = ind[::-1]
|
||||
v = v.take(ind, 1)
|
||||
s = s.take(ind)
|
||||
|
||||
return s,v
|
||||
|
||||
def str2num(string_number):
|
||||
"""Convert input (string number) into number, if float(string_number) fails, a nan is inserted.
|
||||
"""
|
||||
missings = ['','nan','NaN','NA']
|
||||
try:
|
||||
num = float(string_number)
|
||||
except:
|
||||
if string_number in missings:
|
||||
num = nan
|
||||
else:
|
||||
print "Found strange entry: %s" %string_number
|
||||
raise
|
||||
return num
|
||||
|
||||
def randperm(n):
|
||||
r = rand(n)
|
||||
dict={}
|
||||
for i in range(n):
|
||||
dict[r[i]] = i
|
||||
r = sort(r)
|
||||
out = zeros(n)
|
||||
for i in range(n):
|
||||
out[i] = dict[r[i]]
|
||||
return array(out).astype('i')
|
||||
|
||||
def mat_center(X,axis=0,ret_mn=False):
|
||||
"""Mean center matrix along axis.
|
||||
|
||||
X -- matrix, data
|
||||
axis -- dim,
|
||||
ret_mn -- bool, return mean
|
||||
|
||||
output:
|
||||
Xc, [mnX]
|
||||
|
||||
NB: axis = 1 is column-centering, axis=0=row-centering
|
||||
default is row centering (axis=0)
|
||||
"""
|
||||
|
||||
try:
|
||||
rows,cols = X.shape
|
||||
except ValueError:
|
||||
print "The X data needs to be two-dimensional"
|
||||
|
||||
if axis==0:
|
||||
mnX = mean(X,axis)[newaxis]
|
||||
Xs = X - mnX
|
||||
|
||||
elif axis==1:
|
||||
mnX = mean(X,axis)[newaxis]
|
||||
Xs = (X.T - mnX).T
|
||||
if ret_mn:
|
||||
return Xs,mnX
|
||||
else:
|
||||
return Xs
|
||||
|
||||
def m_shape(array):
|
||||
"""Returns the array shape on the form of a numpy.matrix."""
|
||||
return matrix(array).shape
|
||||
879
laydi/lib/engines.py
Normal file
879
laydi/lib/engines.py
Normal file
@@ -0,0 +1,879 @@
|
||||
"""Module contain algorithms for low-rank models.
|
||||
|
||||
There is almost no typechecking of any kind here, just focus on speed
|
||||
"""
|
||||
|
||||
import math
|
||||
import warnings
|
||||
from scipy.linalg import svd,inv
|
||||
from scipy import dot,empty,eye,newaxis,zeros,sqrt,diag,\
|
||||
apply_along_axis,mean,ones,randn,empty_like,outer,r_,c_,\
|
||||
rand,sum,cumsum,matrix, expand_dims,minimum,where,arange,inner,tile
|
||||
has_sym = True
|
||||
has_arpack = True
|
||||
try:
|
||||
from symeig import symeig
|
||||
except:
|
||||
has_sym = False
|
||||
try:
|
||||
from scipy.sandbox import arpack
|
||||
except:
|
||||
has_arpack = False
|
||||
|
||||
|
||||
def pca(a, aopt,scale='scores',mode='normal',center_axis=0):
|
||||
""" Principal Component Analysis.
|
||||
|
||||
Performs PCA on given matrix and returns results in a dictionary.
|
||||
|
||||
:Parameters:
|
||||
a : array
|
||||
Data measurement matrix, (samples x variables)
|
||||
aopt : int
|
||||
Number of components to use, aopt<=min(samples, variables)
|
||||
|
||||
:Returns:
|
||||
results : dict
|
||||
keys -- values, T -- scores, P -- loadings, E -- residuals,
|
||||
lev --leverages, ssq -- sum of squares, expvar -- cumulative
|
||||
explained variance, aopt -- number of components used
|
||||
|
||||
:OtherParam eters:
|
||||
mode : str
|
||||
Amount of info retained, ('fast', 'normal', 'detailed')
|
||||
center_axis : int
|
||||
Center along given axis. If neg.: no centering (-inf,..., matrix modes)
|
||||
|
||||
:SeeAlso:
|
||||
- pcr : other blm
|
||||
- pls : other blm
|
||||
- lpls : other blm
|
||||
|
||||
Notes
|
||||
-----
|
||||
Uses kernel speed-up if m>>n or m<<n.
|
||||
|
||||
If residuals turn rank deficient, a lower number of component than given
|
||||
in input will be used. The number of components used is given in
|
||||
results-dict.
|
||||
|
||||
Examples
|
||||
--------
|
||||
|
||||
>>> import scipy,engines
|
||||
>>> a=scipy.asarray([[1,2,3],[2,4,5]])
|
||||
>>> dat=engines.pca(a, 2)
|
||||
>>> dat['expvarx']
|
||||
array([0.,99.8561562, 100.])
|
||||
|
||||
"""
|
||||
m, n = a.shape
|
||||
assert(aopt<=min(m,n))
|
||||
if center_axis>=0:
|
||||
a = a - expand_dims(a.mean(center_axis), center_axis)
|
||||
if m>(n+100) or n>(m+100):
|
||||
u, s, v = esvd(a, amax=None) # fixme:amax option need to work with expl.var
|
||||
else:
|
||||
u, s, vt = svd(a, 0)
|
||||
v = vt.T
|
||||
e = s**2
|
||||
tol = 1e-10
|
||||
eff_rank = sum(s>s[0]*tol)
|
||||
aopt = minimum(aopt, eff_rank)
|
||||
T = u*s
|
||||
s = s[:aopt]
|
||||
T = T[:,:aopt]
|
||||
P = v[:,:aopt]
|
||||
|
||||
if scale=='loads':
|
||||
T = T/s
|
||||
P = P*s
|
||||
|
||||
if mode == 'fast':
|
||||
return {'T':T, 'P':P, 'aopt':aopt}
|
||||
|
||||
if mode=='detailed':
|
||||
E = empty((aopt, m, n))
|
||||
ssq = []
|
||||
lev = []
|
||||
for ai in range(aopt):
|
||||
E[ai,:,:] = a - dot(T[:,:ai+1], P[:,:ai+1].T)
|
||||
ssq.append([(E[ai,:,:]**2).mean(0), (E[ai,:,:]**2).mean(1)])
|
||||
if scale=='loads':
|
||||
lev.append([((s*T)**2).sum(1), (P**2).sum(1)])
|
||||
else:
|
||||
lev.append([(T**2).sum(1), ((s*P)**2).sum(1)])
|
||||
else:
|
||||
# residuals
|
||||
E = a - dot(T, P.T)
|
||||
#E = a
|
||||
SEP = E**2
|
||||
ssq = [SEP.sum(0), SEP.sum(1)]
|
||||
# leverages
|
||||
if scale=='loads':
|
||||
lev = [(1./m)+(T**2).sum(1), (1./n)+((P/s)**2).sum(1)]
|
||||
else:
|
||||
lev = [(1./m)+((T/s)**2).sum(1), (1./n)+(P**2).sum(1)]
|
||||
# variances
|
||||
expvarx = r_[0, 100*e.cumsum()/e.sum()][:aopt+1]
|
||||
|
||||
return {'T':T, 'P':P, 'E':E, 'expvarx':expvarx, 'levx':lev, 'ssqx':ssq, 'aopt':aopt, 'eigvals': e[:aopt,newaxis]}
|
||||
|
||||
def pcr(a, b, aopt, scale='scores',mode='normal',center_axis=0):
|
||||
""" Principal Component Regression.
|
||||
|
||||
Performs PCR on given matrix and returns results in a dictionary.
|
||||
|
||||
:Parameters:
|
||||
a : array
|
||||
Data measurement matrix, (samples x variables)
|
||||
b : array
|
||||
Data response matrix, (samples x responses)
|
||||
aopt : int
|
||||
Number of components to use, aopt<=min(samples, variables)
|
||||
|
||||
:Returns:
|
||||
results : dict
|
||||
keys -- values, T -- scores, P -- loadings, E -- residuals,
|
||||
levx -- leverages, ssqx -- sum of squares, expvarx -- cumulative
|
||||
explained variance, aopt -- number of components used
|
||||
|
||||
:OtherParameters:
|
||||
mode : str
|
||||
Amount of info retained, ('fast', 'normal', 'detailed')
|
||||
center_axis : int
|
||||
Center along given axis. If neg.: no centering (-inf,..., matrix modes)
|
||||
|
||||
:SeeAlso:
|
||||
- pca : other blm
|
||||
- pls : other blm
|
||||
- lpls : other blm
|
||||
|
||||
Notes
|
||||
-----
|
||||
|
||||
Uses kernel speed-up if m>>n or m<<n.
|
||||
|
||||
If residuals turn rank deficient, a lower number of component than given
|
||||
in input will be used. The number of components used is given in results-dict.
|
||||
|
||||
|
||||
Examples
|
||||
--------
|
||||
|
||||
>>> import scipy,engines
|
||||
>>> a=scipy.asarray([[1,2,3],[2,4,5]])
|
||||
>>> b=scipy.asarray([[1,1],[2,3]])
|
||||
>>> dat=engines.pcr(a, 2)
|
||||
>>> dat['expvarx']
|
||||
array([0.,99.8561562, 100.])
|
||||
|
||||
"""
|
||||
k, l = m_shape(b)
|
||||
if center_axis>=0:
|
||||
b = b - expand_dims(b.mean(center_axis), center_axis)
|
||||
dat = pca(a, aopt=aopt, scale=scale, mode=mode, center_axis=center_axis)
|
||||
T = dat['T']
|
||||
weights = apply_along_axis(vnorm, 0, T)**2
|
||||
if scale=='loads':
|
||||
Q = dot(b.T, T*weights)
|
||||
else:
|
||||
Q = dot(b.T, T/weights)
|
||||
|
||||
if mode=='fast':
|
||||
dat.update({'Q':Q})
|
||||
return dat
|
||||
if mode=='detailed':
|
||||
F = empty((aopt, k, l))
|
||||
for i in range(aopt):
|
||||
F[i,:,:] = b - dot(T[:,:i+1], Q[:,:i+1].T)
|
||||
else:
|
||||
F = b - dot(T, Q.T)
|
||||
expvary = r_[0, 100*((T**2).sum(0)*(Q**2).sum(0)/(b**2).sum()).cumsum()[:aopt]]
|
||||
#fixme: Y-var leverages
|
||||
dat.update({'Q':Q, 'F':F, 'expvary':expvary})
|
||||
return dat
|
||||
|
||||
def pls(a, b, aopt=2, scale='scores', mode='normal', center_axis=-1, ab=None):
|
||||
"""Partial Least Squares Regression.
|
||||
|
||||
Performs PLS on given matrix and returns results in a dictionary.
|
||||
|
||||
:Parameters:
|
||||
a : array
|
||||
Data measurement matrix, (samples x variables)
|
||||
b : array
|
||||
Data response matrix, (samples x responses)
|
||||
aopt : int
|
||||
Number of components to use, aopt<=min(samples, variables)
|
||||
|
||||
:Returns:
|
||||
results : dict
|
||||
keys -- values, T -- scores, P -- loadings, E -- residuals,
|
||||
levx -- leverages, ssqx -- sum of squares, expvarx -- cumulative
|
||||
explained variance of descriptors, expvary -- cumulative explained
|
||||
variance of responses, aopt -- number of components used
|
||||
|
||||
:OtherParameters:
|
||||
mode : str
|
||||
Amount of info retained, ('fast', 'normal', 'detailed')
|
||||
center_axis : int
|
||||
Center along given axis. If neg.: no centering (-inf,..., matrix modes)
|
||||
|
||||
:SeeAlso:
|
||||
- pca : other blm
|
||||
- pcr : other blm
|
||||
- lpls : other blm
|
||||
|
||||
Notes
|
||||
-----
|
||||
|
||||
Uses kernel speed-up if m>>n or m<<n.
|
||||
|
||||
If residuals turn rank deficient, a lower number of component than given
|
||||
in input will be used. The number of components used is given in results-dict.
|
||||
|
||||
Examples
|
||||
--------
|
||||
|
||||
>>> import scipy,engines
|
||||
>>> a=scipy.asarray([[1,2,3],[2,4,5]])
|
||||
>>> b=scipy.asarray([[1,1],[2,3]])
|
||||
>>> dat=engines.pls(a, b, 2)
|
||||
>>> dat['expvarx']
|
||||
array([0.,99.8561562, 100.])
|
||||
|
||||
"""
|
||||
|
||||
m, n = m_shape(a)
|
||||
if ab!=None:
|
||||
mm, l = m_shape(ab)
|
||||
assert(m==mm)
|
||||
else:
|
||||
k, l = m_shape(b)
|
||||
|
||||
if center_axis>=0:
|
||||
a = a - expand_dims(a.mean(center_axis), center_axis)
|
||||
b = b - expand_dims(b.mean(center_axis), center_axis)
|
||||
|
||||
W = empty((n, aopt))
|
||||
P = empty((n, aopt))
|
||||
R = empty((n, aopt))
|
||||
Q = empty((l, aopt))
|
||||
T = empty((m, aopt))
|
||||
B = empty((aopt, n, l))
|
||||
tt = empty((aopt,))
|
||||
|
||||
if ab==None:
|
||||
ab = dot(a.T, b)
|
||||
for i in range(aopt):
|
||||
if ab.shape[1]==1: #pls 1
|
||||
w = ab.reshape(n, l)
|
||||
w = w/vnorm(w)
|
||||
elif n<l: # more yvars than xvars
|
||||
if has_sym:
|
||||
s, w = symeig(dot(ab, ab.T),range=[n,n],overwrite=True)
|
||||
else:
|
||||
w, s, vh = svd(dot(ab, ab.T))
|
||||
w = w[:,:1]
|
||||
else: # standard wide xdata
|
||||
if has_sym:
|
||||
s, q = symeig(dot(ab.T, ab),range=[l,l],overwrite=True)
|
||||
else:
|
||||
q, s, vh = svd(dot(ab.T, ab))
|
||||
q = q[:,:1]
|
||||
w = dot(ab, q)
|
||||
w = w/vnorm(w)
|
||||
r = w.copy()
|
||||
if i>0:
|
||||
for j in range(0, i, 1):
|
||||
r = r - dot(P[:,j].T, w)*R[:,j][:,newaxis]
|
||||
|
||||
t = dot(a, r)
|
||||
tt[i] = tti = dot(t.T, t).ravel()
|
||||
p = dot(a.T, t)/tti
|
||||
q = dot(r.T, ab).T/tti
|
||||
ab = ab - dot(p, q.T)*tti
|
||||
T[:,i] = t.ravel()
|
||||
W[:,i] = w.ravel()
|
||||
|
||||
if mode=='fast' and i==aopt-1:
|
||||
if scale=='loads':
|
||||
tnorm = sqrt(tt)
|
||||
T = T/tnorm
|
||||
W = W*tnorm
|
||||
return {'T':T, 'W':W}
|
||||
|
||||
P[:,i] = p.ravel()
|
||||
R[:,i] = r.ravel()
|
||||
Q[:,i] = q.ravel()
|
||||
#B[i] = dot(R[:,:i+1], Q[:,:i+1].T)
|
||||
|
||||
|
||||
|
||||
qnorm = apply_along_axis(vnorm, 0, Q)
|
||||
tnorm = sqrt(tt)
|
||||
pp = (P**2).sum(0)
|
||||
if mode=='detailed':
|
||||
E = empty((aopt, m, n))
|
||||
F = empty((aopt, k, l))
|
||||
ssqx, ssqy = [], []
|
||||
leverage = empty((aopt, m))
|
||||
h2x = [] #hotellings T^2
|
||||
h2y = []
|
||||
for ai in range(aopt):
|
||||
E[ai,:,:] = a - dot(T[:,:ai+1], P[:,:ai+1].T)
|
||||
F[i-1] = b - dot(T[:,:i], Q[:,:i].T)
|
||||
ssqx.append([(E[ai,:,:]**2).mean(0), (E[ai,:,:]**2).mean(1)])
|
||||
ssqy.append([(F[ai,:,:]**2).mean(0), (F[ai,:,:]**2).mean(1)])
|
||||
leverage[ai,:] = 1./m + ((T[:,:ai+1]/tnorm[:ai+1])**2).sum(1)
|
||||
h2y.append(1./k + ((Q[:,:ai+1]/qnorm[:ai+1])**2).sum(1))
|
||||
else:
|
||||
# residuals
|
||||
E = a - dot(T, P.T)
|
||||
F = b - dot(T, Q.T)
|
||||
sepx = E**2
|
||||
ssqx = [sepx.sum(0), sepx.sum(1)]
|
||||
sepy = F**2
|
||||
ssqy = [sepy.sum(0), sepy.sum(1)]
|
||||
# leverage
|
||||
leverage = 1./m + ((T/tnorm)**2).sum(1)
|
||||
h2x = []
|
||||
h2y = []
|
||||
# variances
|
||||
tp= tt*pp
|
||||
tq = tt*qnorm*qnorm
|
||||
expvarx = r_[0, 100*tp/(a*a).sum()]
|
||||
expvary = r_[0, 100*tq/(b*b).sum()]
|
||||
|
||||
if scale=='loads':
|
||||
T = T/tnorm
|
||||
W = W*tnorm
|
||||
Q = Q*tnorm
|
||||
P = P*tnorm
|
||||
|
||||
return {'Q':Q, 'P':P, 'T':T, 'W':W, 'R':R, 'E':E, 'F':F,
|
||||
'expvarx':expvarx, 'expvary':expvary, 'ssqx':ssqx, 'ssqy':ssqy,
|
||||
'leverage':leverage, 'h2':h2x}
|
||||
|
||||
def w_simpls(aat, b, aopt):
|
||||
""" Simpls for wide matrices.
|
||||
Fast pls for crossval, used in calc rmsep for wide X
|
||||
There is no P or W. T is normalised
|
||||
"""
|
||||
bb = b.copy()
|
||||
m, m = aat.shape
|
||||
U = empty((m, aopt)) # W
|
||||
T = empty((m, aopt))
|
||||
H = empty((m, aopt)) # R
|
||||
PROJ = empty((m, aopt)) # P?
|
||||
for i in range(aopt):
|
||||
q, s, vh = svd(dot(dot(b.T, aat), b), full_matrices=0)
|
||||
u = dot(b, q[:,:1]) #y-factor scores
|
||||
U[:,i] = u.ravel()
|
||||
t = dot(aat, u)
|
||||
t = t/vnorm(t)
|
||||
T[:,i] = t.ravel()
|
||||
h = dot(aat, t) #score-weights
|
||||
H[:,i] = h.ravel()
|
||||
PROJ[:,:i+1] = dot(T[:,:i+1], inv(dot(T[:,:i+1].T, H[:,:i+1])) )
|
||||
if i<aopt:
|
||||
b = b - dot(PROJ[:,:i+1], dot(H[:,:i+1].T,b) )
|
||||
C = dot(bb.T, T)
|
||||
|
||||
return {'T':T, 'U':U, 'Q':C, 'H':H}
|
||||
|
||||
def w_pls(aat, b, aopt):
|
||||
""" Pls for wide matrices.
|
||||
Fast pls for crossval, used in calc rmsep for wide X
|
||||
There is no P or W. T is normalised
|
||||
|
||||
aat = centered kernel matrix
|
||||
b = centered y
|
||||
"""
|
||||
bb = b.copy()
|
||||
k, l = m_shape(b)
|
||||
m, m = m_shape(aat)
|
||||
U = empty((m, aopt)) # W
|
||||
T = empty((m, aopt))
|
||||
R = empty((m, aopt)) # R
|
||||
PROJ = empty((m, aopt)) # P?
|
||||
|
||||
for i in range(aopt):
|
||||
if has_sym:
|
||||
s, q = symeig(dot(dot(b.T, aat), b), range=(l,l),overwrite=True)
|
||||
else:
|
||||
q, s, vh = svd(dot(dot(b.T, aat), b), full_matrices=0)
|
||||
q = q[:,:1]
|
||||
u = dot(b , q) #y-factor scores
|
||||
U[:,i] = u.ravel()
|
||||
t = dot(aat, u)
|
||||
|
||||
t = t/vnorm(t)
|
||||
T[:,i] = t.ravel()
|
||||
r = dot(aat, t)#score-weights
|
||||
#r = r/vnorm(r)
|
||||
R[:,i] = r.ravel()
|
||||
PROJ[:,: i+1] = dot(T[:,:i+1], inv(dot(T[:,:i+1].T, R[:,:i+1])) )
|
||||
if i<aopt:
|
||||
b = b - dot(PROJ[:,:i+1], dot(R[:,:i+1].T, b) )
|
||||
C = dot(bb.T, T)
|
||||
|
||||
return {'T':T, 'U':U, 'Q':C, 'R':R}
|
||||
|
||||
def bridge(a, b, aopt, scale='scores', mode='normal', r=0):
|
||||
"""Undeflated Ridged svd(X'Y)
|
||||
"""
|
||||
m, n = m_shape(a)
|
||||
k, l = m_shape(b)
|
||||
u, s, vt = svd(b, full_matrices=0)
|
||||
g0 = dot(u*s, u.T)
|
||||
g = (1 - r)*g0 + r*eye(m)
|
||||
ag = dot(a.T, g)
|
||||
u, s, vt = svd(ag, full_matrices=0)
|
||||
W = u[:,:aopt]
|
||||
K = vt[:aopt,:].T
|
||||
T = dot(a, W)
|
||||
tnorm = apply_along_axis(vnorm, 0, T) # norm of T-columns
|
||||
|
||||
if mode == 'fast':
|
||||
if scale=='loads':
|
||||
T = T/tnorm
|
||||
W = W*tnorm
|
||||
return {'T':T, 'W':W}
|
||||
|
||||
U = dot(g0, K) #fixme check this
|
||||
Q = dot(b.T, dot(T, inv(dot(T.T, T)) ))
|
||||
B = zeros((aopt, n, l), dtype='f')
|
||||
for i in range(aopt):
|
||||
B[i] = dot(W[:,:i+1], Q[:,:i+1].T)
|
||||
|
||||
if mode == 'detailed':
|
||||
E = empty((aopt, m, n))
|
||||
F = empty((aopt, k, l))
|
||||
for i in range(aopt):
|
||||
E[i] = a - dot(T[:,:i+1], W[:,:i+1].T)
|
||||
F[i] = b - dot(a, B[i])
|
||||
else: #normal
|
||||
F = b - dot(a, B[-1])
|
||||
E = a - dot(T, W.T)
|
||||
|
||||
if scale=='loads':
|
||||
T = T/tnorm
|
||||
W = W*tnorm
|
||||
Q = Q*tnorm
|
||||
|
||||
return {'B':B, 'W':W, 'T':T, 'Q':Q, 'E':E, 'F':F, 'U':U, 'P':W}
|
||||
|
||||
|
||||
def nipals_lpls(X, Y, Z, a_max, alpha=.7, mean_ctr=[2, 0, 1], scale='scores', verbose=False):
|
||||
""" L-shaped Partial Least Sqaures Regression by the nipals algorithm.
|
||||
|
||||
(X!Z)->Y
|
||||
:input:
|
||||
X : data matrix (m, n)
|
||||
Y : data matrix (m, l)
|
||||
Z : data matrix (n, o)
|
||||
|
||||
:output:
|
||||
T : X-scores
|
||||
W : X-weights/Z-weights
|
||||
P : X-loadings
|
||||
Q : Y-loadings
|
||||
U : X-Y relation
|
||||
L : Z-scores
|
||||
K : Z-loads
|
||||
B : Regression coefficients X->Y
|
||||
b0: Regression coefficient intercept
|
||||
evx : X-explained variance
|
||||
evy : Y-explained variance
|
||||
evz : Z-explained variance
|
||||
mnx : X location
|
||||
mny : Y location
|
||||
mnz : Z location
|
||||
|
||||
:Notes:
|
||||
|
||||
"""
|
||||
if mean_ctr!=None:
|
||||
xctr, yctr, zctr = mean_ctr
|
||||
X, mnX = center(X, xctr)
|
||||
Y, mnY = center(Y, yctr)
|
||||
Z, mnZ = center(Z, zctr)
|
||||
|
||||
varX = (X**2).sum()
|
||||
varY = (Y**2).sum()
|
||||
varZ = (Z**2).sum()
|
||||
|
||||
m, n = X.shape
|
||||
k, l = Y.shape
|
||||
u, o = Z.shape
|
||||
|
||||
# initialize
|
||||
U = empty((k, a_max))
|
||||
Q = empty((l, a_max))
|
||||
T = empty((m, a_max))
|
||||
W = empty((n, a_max))
|
||||
P = empty((n, a_max))
|
||||
K = empty((o, a_max))
|
||||
L = empty((u, a_max))
|
||||
B = empty((a_max, n, l))
|
||||
#b0 = empty((a_max, 1, l))
|
||||
var_x = empty((a_max,))
|
||||
var_y = empty((a_max,))
|
||||
var_z = empty((a_max,))
|
||||
|
||||
MAX_ITER = 250
|
||||
LIM = 1e-1
|
||||
for a in range(a_max):
|
||||
if verbose:
|
||||
print "\nWorking on comp. %s" %a
|
||||
u = Y[:,:1]
|
||||
diff = 1
|
||||
niter = 0
|
||||
while (diff>LIM and niter<MAX_ITER):
|
||||
niter += 1
|
||||
u1 = u.copy()
|
||||
w = dot(X.T, u)
|
||||
w = w/sqrt(dot(w.T, w))
|
||||
#w = w/dot(w.T, w)
|
||||
l = dot(Z, w)
|
||||
k = dot(Z.T, l)
|
||||
k = k/sqrt(dot(k.T, k))
|
||||
#k = k/dot(k.T, k)
|
||||
w = alpha*k + (1-alpha)*w
|
||||
#print sqrt(dot(w.T, w))
|
||||
w = w/sqrt(dot(w.T, w))
|
||||
t = dot(X, w)
|
||||
c = dot(Y.T, t)
|
||||
c = c/sqrt(dot(c.T, c))
|
||||
u = dot(Y, c)
|
||||
diff = dot((u-u1).T, (u-u1))
|
||||
if verbose:
|
||||
print "Converged after %s iterations" %niter
|
||||
print "Error: %.2E" %diff
|
||||
tt = dot(t.T, t)
|
||||
p = dot(X.T, t)/tt
|
||||
q = dot(Y.T, t)/tt
|
||||
l = dot(Z, w)
|
||||
|
||||
U[:,a] = u.ravel()
|
||||
W[:,a] = w.ravel()
|
||||
P[:,a] = p.ravel()
|
||||
T[:,a] = t.ravel()
|
||||
Q[:,a] = q.ravel()
|
||||
L[:,a] = l.ravel()
|
||||
K[:,a] = k.ravel()
|
||||
|
||||
X = X - dot(t, p.T)
|
||||
Y = Y - dot(t, q.T)
|
||||
Z = (Z.T - dot(w, l.T)).T
|
||||
|
||||
var_x[a] = pow(X, 2).sum()
|
||||
var_y[a] = pow(Y, 2).sum()
|
||||
var_z[a] = pow(Z, 2).sum()
|
||||
|
||||
B[a] = dot(dot(W[:,:a+1], inv(dot(P[:,:a+1].T, W[:,:a+1]))), Q[:,:a+1].T)
|
||||
#b0[a] = mnY - dot(mnX, B[a])
|
||||
|
||||
|
||||
# variance explained
|
||||
evx = 100.0*(1 - var_x/varX)
|
||||
evy = 100.0*(1 - var_y/varY)
|
||||
evz = 100.0*(1 - var_z/varZ)
|
||||
if scale=='loads':
|
||||
tnorm = apply_along_axis(vnorm, 0, T)
|
||||
T = T/tnorm
|
||||
W = W*tnorm
|
||||
Q = Q*tnorm
|
||||
knorm = apply_along_axis(vnorm, 0, K)
|
||||
L = L*knorm
|
||||
K = K/knorm
|
||||
|
||||
return {'T':T, 'W':W, 'P':P, 'Q':Q, 'U':U, 'L':L, 'K':K, 'B':B, 'evx':evx, 'evy':evy, 'evz':evz,'mnx': mnX, 'mny': mnY, 'mnz': mnZ}
|
||||
|
||||
|
||||
|
||||
def nipals_pls(X, Y, a_max, alpha=.7, ax_center=0, mode='normal', scale='scores', verbose=False):
|
||||
"""Partial Least Sqaures Regression by the nipals algorithm.
|
||||
|
||||
(X!Z)->Y
|
||||
:input:
|
||||
X : data matrix (m, n)
|
||||
Y : data matrix (m, l)
|
||||
|
||||
:output:
|
||||
T : X-scores
|
||||
W : X-weights/Z-weights
|
||||
P : X-loadings
|
||||
Q : Y-loadings
|
||||
U : X-Y relation
|
||||
B : Regression coefficients X->Y
|
||||
b0: Regression coefficient intercept
|
||||
evx : X-explained variance
|
||||
evy : Y-explained variance
|
||||
evz : Z-explained variance
|
||||
|
||||
:Notes:
|
||||
|
||||
"""
|
||||
if ax_center>=0:
|
||||
mn_x = expand_dims(X.mean(ax_center), ax_center)
|
||||
mn_y = expand_dims(Y.mean(ax_center), ax_center)
|
||||
X = X - mn_x
|
||||
Y = Y - mn_y
|
||||
|
||||
varX = pow(X, 2).sum()
|
||||
varY = pow(Y, 2).sum()
|
||||
|
||||
m, n = X.shape
|
||||
k, l = Y.shape
|
||||
|
||||
# initialize
|
||||
U = empty((k, a_max))
|
||||
Q = empty((l, a_max))
|
||||
T = empty((m, a_max))
|
||||
W = empty((n, a_max))
|
||||
P = empty((n, a_max))
|
||||
B = empty((a_max, n, l))
|
||||
b0 = empty((a_max, m, l))
|
||||
var_x = empty((a_max,))
|
||||
var_y = empty((a_max,))
|
||||
|
||||
t1 = X[:,:1]
|
||||
for a in range(a_max):
|
||||
if verbose:
|
||||
print "\n Working on comp. %s" %a
|
||||
u = Y[:,:1]
|
||||
diff = 1
|
||||
MAX_ITER = 100
|
||||
lim = 1e-16
|
||||
niter = 0
|
||||
while (diff>lim and niter<MAX_ITER):
|
||||
niter += 1
|
||||
#u1 = u.copy()
|
||||
w = dot(X.T, u)
|
||||
w = w/sqrt(dot(w.T, w))
|
||||
#l = dot(Z, w)
|
||||
#k = dot(Z.T, l)
|
||||
#k = k/sqrt(dot(k.T, k))
|
||||
#w = alpha*k + (1-alpha)*w
|
||||
#w = w/sqrt(dot(w.T, w))
|
||||
t = dot(X, w)
|
||||
q = dot(Y.T, t)
|
||||
q = q/sqrt(dot(q.T, q))
|
||||
u = dot(Y, q)
|
||||
diff = vnorm(t1 - t)
|
||||
t1 = t.copy()
|
||||
if verbose:
|
||||
print "Converged after %s iterations" %niter
|
||||
#tt = dot(t.T, t)
|
||||
#p = dot(X.T, t)/tt
|
||||
#q = dot(Y.T, t)/tt
|
||||
#l = dot(Z, w)
|
||||
p = dot(X.T, t)/dot(t.T, t)
|
||||
p_norm = vnorm(p)
|
||||
t = t*p_norm
|
||||
w = w*p_norm
|
||||
p = p/p_norm
|
||||
|
||||
U[:,a] = u.ravel()
|
||||
W[:,a] = w.ravel()
|
||||
P[:,a] = p.ravel()
|
||||
T[:,a] = t.ravel()
|
||||
Q[:,a] = q.ravel()
|
||||
|
||||
X = X - dot(t, p.T)
|
||||
Y = Y - dot(t, q.T)
|
||||
|
||||
var_x[a] = pow(X, 2).sum()
|
||||
var_y[a] = pow(Y, 2).sum()
|
||||
|
||||
B[a] = dot(dot(W[:,:a+1], inv(dot(P[:,:a+1].T, W[:,:a+1]))), Q[:,:a+1].T)
|
||||
b0[a] = mn_y - dot(mn_x, B[a])
|
||||
|
||||
# variance explained
|
||||
evx = 100.0*(1 - var_x/varX)
|
||||
evy = 100.0*(1 - var_y/varY)
|
||||
|
||||
if scale=='loads':
|
||||
tnorm = apply_along_axis(vnorm, 0, T)
|
||||
T = T/tnorm
|
||||
W = W*tnorm
|
||||
Q = Q*tnorm
|
||||
|
||||
return {'T':T, 'W':W, 'P':P, 'Q':Q, 'U':U, 'B':B, 'b0':b0, 'evx':evx, 'evy':evy,
|
||||
'mnx': mnX, 'mny': mnY, 'xc': X, 'yc': Y}
|
||||
|
||||
|
||||
########### Helper routines #########
|
||||
|
||||
def m_shape(array):
|
||||
return matrix(array).shape
|
||||
|
||||
def esvd(data, amax=None):
|
||||
"""SVD with the option of economy sized calculation
|
||||
Calculate subspaces of X'X or XX' depending on the shape
|
||||
of the matrix.
|
||||
|
||||
Good for extreme fat or thin matrices
|
||||
|
||||
:notes:
|
||||
Numpy supports this by setting full_matrices=0
|
||||
"""
|
||||
has_arpack = True
|
||||
try:
|
||||
import arpack
|
||||
except:
|
||||
has_arpack = False
|
||||
m, n = data.shape
|
||||
if m>=n:
|
||||
kernel = dot(data.T, data)
|
||||
if has_arpack:
|
||||
if amax==None:
|
||||
amax = n
|
||||
s, v = arpack.eigen_symmetric(kernel,k=amax, which='LM',
|
||||
maxiter=200,tol=1e-5)
|
||||
if has_sym:
|
||||
if amax==None:
|
||||
amax = n
|
||||
pcrange = None
|
||||
else:
|
||||
pcrange = [n-amax, n]
|
||||
s, v = symeig(kernel, range=pcrange, overwrite=True)
|
||||
s = s[::-1].real
|
||||
v = v[:,::-1].real
|
||||
else:
|
||||
u, s, vt = svd(kernel)
|
||||
v = vt.T
|
||||
s = sqrt(s)
|
||||
u = dot(data, v)/s
|
||||
else:
|
||||
kernel = dot(data, data.T)
|
||||
if has_sym:
|
||||
if amax==None:
|
||||
amax = m
|
||||
pcrange = None
|
||||
else:
|
||||
pcrange = [m-amax, m]
|
||||
s, u = symeig(kernel, range=pcrange, overwrite=True)
|
||||
s = s[::-1]
|
||||
u = u[:,::-1]
|
||||
else:
|
||||
u, s, vt = svd(kernel)
|
||||
s = sqrt(s)
|
||||
v = dot(data.T, u)/s
|
||||
# some use of symeig returns the 0 imaginary part
|
||||
return u.real, s.real, v.real
|
||||
|
||||
def vnorm(x):
|
||||
# assume column arrays (or vectors)
|
||||
return math.sqrt(dot(x.T, x))
|
||||
|
||||
def center(a, axis):
|
||||
# 0 = col center, 1 = row center, 2 = double center
|
||||
# -1 = nothing
|
||||
|
||||
# check if we have a vector
|
||||
is_vec = len(a.shape)==1
|
||||
if not is_vec:
|
||||
is_vec = a.shape[0]==1 or a.shape[1]==1
|
||||
if is_vec:
|
||||
if axis==2:
|
||||
warnings.warn("Double centering of vecor ignored, using ordinary centering")
|
||||
if axis==-1:
|
||||
mn = 0
|
||||
else:
|
||||
mn = a.mean()
|
||||
return a - mn, mn
|
||||
# !!!fixme: use broadcasting
|
||||
if axis==-1:
|
||||
mn = zeros((1,a.shape[1],))
|
||||
#mn = tile(mn, (a.shape[0], 1))
|
||||
elif axis==0:
|
||||
mn = a.mean(0)[newaxis]
|
||||
#mn = tile(mn, (a.shape[0], 1))
|
||||
elif axis==1:
|
||||
mn = a.mean(1)[:,newaxis]
|
||||
#mn = tile(mn, (1, a.shape[1]))
|
||||
elif axis==2:
|
||||
mn = a.mean(0)[newaxis] + a.mean(1)[:,newaxis] - a.mean()
|
||||
return a - mn , a.mean(0)[newaxis]
|
||||
else:
|
||||
raise IOError("input error: axis must be in [-1,0,1,2]")
|
||||
|
||||
return a - mn, mn
|
||||
|
||||
def scale(a, axis):
|
||||
if axis==-1:
|
||||
sc = zeros((a.shape[1],))
|
||||
elif axis==0:
|
||||
sc = a.std(0)
|
||||
elif axis==1:
|
||||
sc = a.std(1)[:,newaxis]
|
||||
else:
|
||||
raise IOError("input error: axis must be in [-1,0,1]")
|
||||
|
||||
return a - sc, sc
|
||||
|
||||
|
||||
|
||||
## #PCA CALCS
|
||||
|
||||
## % Calculate Q limit using unused eigenvalues
|
||||
## temp = diag(s);
|
||||
## if n < m
|
||||
## emod = temp(lv+1:n,:);
|
||||
## else
|
||||
## emod = temp(lv+1:m,:);
|
||||
## end
|
||||
## th1 = sum(emod);
|
||||
## th2 = sum(emod.^2);
|
||||
## th3 = sum(emod.^3);
|
||||
## h0 = 1 - ((2*th1*th3)/(3*th2^2));
|
||||
## if h0 <= 0.0
|
||||
## h0 = .0001;
|
||||
## disp(' ')
|
||||
## disp('Warning: Distribution of unused eigenvalues indicates that')
|
||||
## disp(' you should probably retain more PCs in the model.')
|
||||
## end
|
||||
## q = th1*(((1.65*sqrt(2*th2*h0^2)/th1) + 1 + th2*h0*(h0-1)/th1^2)^(1/h0));
|
||||
## disp(' ')
|
||||
## disp('The 95% Q limit is')
|
||||
## disp(q)
|
||||
## if plots >= 1
|
||||
## lim = [q q];
|
||||
## plot(scl,res,scllim,lim,'--b')
|
||||
## str = sprintf('Process Residual Q with 95 Percent Limit Based on %g PC Model',lv);
|
||||
## title(str)
|
||||
## xlabel('Sample Number')
|
||||
## ylabel('Residual')
|
||||
## pause
|
||||
## end
|
||||
## % Calculate T^2 limit using ftest routine
|
||||
## if lv > 1
|
||||
## if m > 300
|
||||
## tsq = (lv*(m-1)/(m-lv))*ftest(.95,300,lv,2);
|
||||
## else
|
||||
## tsq = (lv*(m-1)/(m-lv))*ftest(.95,m-lv,lv,2);
|
||||
## end
|
||||
## disp(' ')
|
||||
## disp('The 95% T^2 limit is')
|
||||
## disp(tsq)
|
||||
## % Calculate the value of T^2 by normalizing the scores to
|
||||
## % unit variance and summing them up
|
||||
## if plots >= 1.0
|
||||
## temp2 = scores*inv(diag(ssq(1:lv,2).^.5));
|
||||
## tsqvals = sum((temp2.^2)');
|
||||
## tlim = [tsq tsq];
|
||||
## plot(scl,tsqvals,scllim,tlim,'--b')
|
||||
## str = sprintf('Value of T^2 with 95 Percent Limit Based on %g PC Model',lv);
|
||||
## title(str)
|
||||
## xlabel('Sample Number')
|
||||
## ylabel('Value of T^2')
|
||||
## end
|
||||
## else
|
||||
## disp('T^2 not calculated when number of latent variables = 1')
|
||||
## tsq = 1.96^2;
|
||||
## end
|
||||
|
||||
95
laydi/lib/hypergeom.py
Normal file
95
laydi/lib/hypergeom.py
Normal file
@@ -0,0 +1,95 @@
|
||||
import scipy
|
||||
|
||||
try:
|
||||
# FIXME: remove rpy in a more proper way
|
||||
import rpy_does_not_exist
|
||||
has_rpy = True
|
||||
silent_eval = rpy.with_mode(rpy.NO_CONVERSION, rpy.r)
|
||||
except:
|
||||
has_rpy = False
|
||||
|
||||
def gene_hypergeo_test(selection, category_dataset):
|
||||
"""Returns the pvals from a hypergeometric test of significance.
|
||||
|
||||
input:
|
||||
-- selection: list of selected identifiers along 0 dim of cat.set
|
||||
-- category dataset, categories along dim 1 (cols)
|
||||
"""
|
||||
gene_dim_name = category_dataset.get_dim_name(0)
|
||||
category_dim_name = category_dataset.get_dim_name(1)
|
||||
|
||||
#categories
|
||||
all_cats = category_dataset.get_identifiers(category_dim_name, sorted=True)
|
||||
|
||||
# gene_ids universe
|
||||
all_genes = category_dataset.get_identifiers(gene_dim_name)
|
||||
|
||||
# signifcant genes
|
||||
good_genes_all = list(selection)
|
||||
gg_index = category_dataset.get_indices(gene_dim_name, good_genes_all)
|
||||
|
||||
# significant genes pr. category
|
||||
good_genes_cat = []
|
||||
for col in category_dataset.asarray().T:
|
||||
index = scipy.where(col==1)[0]
|
||||
index = scipy.intersect1d(index, gg_index)
|
||||
if index.size==0:
|
||||
good_genes_cat.append([])
|
||||
else:
|
||||
good_genes_cat.append(category_dataset.get_identifiers(gene_dim_name, index))
|
||||
count = map(len, good_genes_cat)
|
||||
count = scipy.asarray([max(i, 0) for i in count])
|
||||
cat_count = category_dataset.asarray().sum(0)
|
||||
if has_rpy:
|
||||
rpy.r.assign("x", count - 1) #number of sign. genes in category i
|
||||
rpy.r.assign("m", len(good_genes_all)) # number of sign. genes tot
|
||||
rpy.r.assign("n", len(all_genes)-len(good_genes_all) ) # num. genes not sign.
|
||||
rpy.r.assign("k", cat_count) #num. genes in cat i
|
||||
silent_eval('pvals <- phyper(x, m, n, k, lower.tail=FALSE)')
|
||||
pvals = rpy.r("pvals")
|
||||
|
||||
else:
|
||||
pvals = p_hyper_geom(count, len(good_genes_all),
|
||||
len(all_genes)-len(good_genes_all),
|
||||
cat_count)
|
||||
|
||||
pvals = scipy.where(cat_count==0, 2, pvals)
|
||||
pvals = scipy.where(scipy.isnan(pvals), 2, pvals)
|
||||
out = {}
|
||||
for i in range(pvals.size):
|
||||
out[str(all_cats[i])] = (count[i], cat_count[i], pvals[i])
|
||||
return out
|
||||
|
||||
|
||||
def p_hyper_geom(x, m, n, k):
|
||||
"""Distribution function for the hypergeometric distribution.
|
||||
|
||||
Inputs:
|
||||
-- x: vector of quantiles representing the number of white balls
|
||||
drawn without replacement from an urn which contains both
|
||||
black and white balls.
|
||||
-- m: the number of white balls in the urn.
|
||||
-- n: the number of black balls in the urn.
|
||||
-- k: [vector] the number of balls drawn from the urn
|
||||
|
||||
Comments:
|
||||
Similar to R's phyper with lower.tail=FALSE
|
||||
|
||||
"""
|
||||
|
||||
M = m + n
|
||||
multiple_draws = False
|
||||
if isinstance(k, scipy.ndarray) and k.size>1:
|
||||
multiple_draws = True
|
||||
n_draws = k.size
|
||||
if n_draws<x.size:
|
||||
print "n_draws: %d and n_found: %d Length mismatch, zero padded" %(k.size, x.size)
|
||||
N = k
|
||||
n = m
|
||||
if not multiple_draws:
|
||||
out = scipy.stats.hypergeom.pmf(x, M, n, N).cumsum()
|
||||
else:
|
||||
out = scipy.zeros((max(n_draws, x.size),))
|
||||
for i in xrange(N.size):
|
||||
out[i] = scipy.stats.hypergeom.pmf(x, M, n, N[i]).cumsum()[i]
|
||||
return out
|
||||
567
laydi/lib/nx_utils.py
Normal file
567
laydi/lib/nx_utils.py
Normal file
@@ -0,0 +1,567 @@
|
||||
import os,sys
|
||||
from itertools import izip
|
||||
import networkx as NX
|
||||
from scipy import shape,diag,dot,asarray,sqrt,real,zeros,eye,exp,maximum,\
|
||||
outer,maximum,sum,diag,real,atleast_2d
|
||||
from scipy.linalg import eig,svd,inv,expm,norm
|
||||
from cx_utils import sorted_eig
|
||||
|
||||
import numpy
|
||||
|
||||
|
||||
|
||||
eps = numpy.finfo(float).eps.item()
|
||||
feps = numpy.finfo(numpy.single).eps.item()
|
||||
_array_precision = {'f': 0, 'd': 1, 'F': 0, 'D': 1,'i': 1}
|
||||
|
||||
class NXUTILSException(Exception): pass
|
||||
|
||||
def xgraph_to_graph(G):
|
||||
"""Convert an Xgraph to an ordinary graph.
|
||||
Edge attributes, mult.edges and self-loops are lost in the process.
|
||||
"""
|
||||
|
||||
GG = NX.convert.from_dict_of_lists(NX.convert.to_dict_of_lists(G))
|
||||
return GG
|
||||
|
||||
def get_affinity_matrix(G, data, ids, dist='e', mask=None, weight=None, t=0, out='dist'):
|
||||
"""
|
||||
Function for calculating a general affinity matrix, based upon distances.
|
||||
Affiniy = 1 - distance ((10-1) 1 is far apart)
|
||||
INPUT
|
||||
|
||||
data:
|
||||
gene expression data, type dict data[gene] = expression-vector
|
||||
|
||||
G:
|
||||
The network (networkx.base.Graph object)
|
||||
|
||||
mask:
|
||||
The array mask shows which data are missing. If mask[i][j]==0, then
|
||||
data[i][j] is missing.
|
||||
|
||||
weights:
|
||||
The array weight contains the weights to be used when calculating distances.
|
||||
|
||||
transpose:
|
||||
If transpose==0, then genes are clustered. If transpose==1, microarrays are
|
||||
clustered.
|
||||
|
||||
dist:
|
||||
The character dist defines the distance function to be used:
|
||||
dist=='e': Euclidean distance
|
||||
dist=='b': City Block distance
|
||||
dist=='h': Harmonically summed Euclidean distance
|
||||
dist=='c': Pearson correlation
|
||||
dist=='a': absolute value of the correlation
|
||||
dist=='u': uncentered correlation
|
||||
dist=='x': absolute uncentered correlation
|
||||
dist=='s': Spearman's rank correlation
|
||||
dist=='k': Kendall's tau
|
||||
For other values of dist, the default (Euclidean distance) is used.
|
||||
|
||||
OUTPUT
|
||||
D :
|
||||
Similariy matrix (nGenes x nGenes), symetric, d_ij e in [0,1]
|
||||
Normalized so max weight = 1.0
|
||||
"""
|
||||
try:
|
||||
from Bio import Cluster as CLS
|
||||
except:
|
||||
raise NXUTILSError("Import of Biopython failed")
|
||||
n_var = len(data)
|
||||
n_samp = len(data[data.keys()[0]])
|
||||
X = zeros((nVar, nSamp),dtpye='<f8')
|
||||
|
||||
for i, gene in enumerate(ids): #this shuld be right!!
|
||||
X[i,:] = data[gene]
|
||||
|
||||
|
||||
#X = transpose(X) # distancematrix needs matrix as (nGenes,nSamples)
|
||||
|
||||
D_list = CLS.distancematrix(X, dist=dist)
|
||||
D = zeros((nVar,nVar),dtype='<f8')
|
||||
for i,row in enumerate(D_list):
|
||||
if i>0:
|
||||
D[i,:len(row)]=row
|
||||
|
||||
D = D + D.T
|
||||
MAX = 30.0
|
||||
D_max = max(ravel(D))/MAX
|
||||
D_n = D/D_max #normalised (max = 10.0)
|
||||
D_n = (MAX+1.) - D_n #using correlation (inverse distance for dists)
|
||||
|
||||
A = NX.adj_matrix(G, nodelist=ids)
|
||||
if out=='dist':
|
||||
return D_n*A
|
||||
elif out=='heat_kernel':
|
||||
t=1.0
|
||||
K = exp(-t*D*A)
|
||||
return K
|
||||
elif out=='complete':
|
||||
return D_n
|
||||
else:
|
||||
return []
|
||||
|
||||
def remove_one_degree_nodes(G, iter=True):
|
||||
"""Removes all nodes with only one neighbour. These nodes does
|
||||
not contribute to community structure.
|
||||
input:
|
||||
G -- graph
|
||||
iter -- True/False iteratively remove?
|
||||
"""
|
||||
G_copy = G.copy()
|
||||
if iter==True:
|
||||
while 1:
|
||||
bad_nodes=[]
|
||||
for node in G_copy.nodes():
|
||||
if len(G_copy.neighbors(node))==1:
|
||||
bad_nodes.append(node)
|
||||
if len(bad_nodes)>0:
|
||||
G_copy.delete_nodes_from(bad_nodes)
|
||||
else:
|
||||
break
|
||||
else:
|
||||
bad_nodes=[]
|
||||
for ngb in G_copy.neighbors_iter():
|
||||
if len(G_copy.neighbors(node))==1:
|
||||
bad_nodes.append(node)
|
||||
if len(bad_nodes)>0:
|
||||
G_copy.delete_nodes_from(bad_nodes)
|
||||
|
||||
print "Deleted %s nodes from network" %(len(G)-len(G_copy))
|
||||
return G_copy
|
||||
|
||||
def key_players(G, n=1, with_labels=False):
|
||||
"""
|
||||
Resilince measure
|
||||
Identification of key nodes by fraction of nodes in
|
||||
disconnected subgraph when the node is removed.
|
||||
|
||||
output:
|
||||
fraction of nodes disconnected when node i is removed
|
||||
"""
|
||||
i=0
|
||||
frac=[]
|
||||
labels = {}
|
||||
for node in G.nodes():
|
||||
i+=1
|
||||
print i
|
||||
T = G.copy()
|
||||
T.delete_node(node)
|
||||
n_nodes = T.number_of_nodes()
|
||||
sub_graphs = NX.connected_component_subgraphs(T)
|
||||
n = len(sub_graphs)
|
||||
if n>1:
|
||||
strong_comp = sub_graphs[0]
|
||||
fraction = 1.0 - 1.0*strong_comp.number_of_nodes()/n_nodes
|
||||
frac.append(fraction)
|
||||
labels[node]=fraction
|
||||
|
||||
else:
|
||||
frac.append(0.0)
|
||||
labels[node]=0.0
|
||||
|
||||
out = 1.0 - array(frac)
|
||||
if with_labels==True:
|
||||
return out,labels
|
||||
else:
|
||||
return out
|
||||
|
||||
def node_weighted_adj_matrix(G, weights=None, ave_type='harmonic', with_labels=False):
|
||||
"""Return a weighted adjacency matrix of graph. The weights are
|
||||
node weights.
|
||||
input: G -- graph
|
||||
weights -- dict, keys: nodes, values: weights
|
||||
with_labels -- True/False, return labels?
|
||||
|
||||
output: A -- weighted eadjacency matrix
|
||||
[index] -- node labels
|
||||
|
||||
"""
|
||||
n=G.order()
|
||||
# make an dictionary that maps vertex name to position
|
||||
index={}
|
||||
count=0
|
||||
for node in G.nodes():
|
||||
index[node]=count
|
||||
count = count+1
|
||||
|
||||
a = zeros((n,n))
|
||||
if type(G)=='networkx.xbase.XGraph':
|
||||
raise
|
||||
for head,tail in G.edges():
|
||||
if ave_type == 'geometric':
|
||||
a[index[head],index[tail]]= sqrt(weights[head]*weights[tail])
|
||||
a[index[tail],index[head]]= a[index[head],index[tail]]
|
||||
elif ave_type == 'harmonic':
|
||||
a[index[head],index[tail]] = mean(weights[head],weights[tail])
|
||||
a[index[tail],index[head]]= mean(weights[head],weights[tail])
|
||||
if with_labels:
|
||||
return a,index
|
||||
else:
|
||||
return a
|
||||
|
||||
def weighted_adj_matrix(G, with_labels=False):
|
||||
"""Adjacency matrix of an XGraph whos weights are given in edges.
|
||||
"""
|
||||
A, labels = NX.adj_matrix(G, with_labels=True)
|
||||
W = A.astype('<f8')
|
||||
for orf, i in labels.items():
|
||||
for orf2, j in labels.items():
|
||||
if G.has_edge(orf, orf2):
|
||||
edge_weight = G.get_edge(orf, orf2)
|
||||
W[i,j] = edge_weight
|
||||
W[j,i] = edge_weight
|
||||
if with_labels==True:
|
||||
return W, labels
|
||||
else:
|
||||
return W
|
||||
|
||||
def assortative_index(G):
|
||||
"""Ouputs two vectors: the degree and the neighbor average degree.
|
||||
Used to measure the assortative mixing. If the average degree is
|
||||
pos. correlated with the degree we know that hubs tend to connect
|
||||
to other hubs.
|
||||
|
||||
input: G, graph connected!!
|
||||
ouput: d,mn_d: degree, and average degree of neighb.
|
||||
(degree sorting from degree(with_labels=True))
|
||||
"""
|
||||
d = G.degree(with_labels=True)
|
||||
out=[]
|
||||
for node in G.nodes():
|
||||
nn = G.neighbors(node)
|
||||
if len(nn)>0:
|
||||
nn_d = mean([float(d[i]) for i in nn])
|
||||
out.append((d[node], nn_d))
|
||||
return array(out).T
|
||||
|
||||
|
||||
def struct_equivalence(G,n1,n2):
|
||||
"""Returns the structural equivalence of a node pair. Two nodes
|
||||
are structural equal if they share the same neighbors.
|
||||
|
||||
x_s = [ne(n1) union ne(n2) - ne(n1) intersection ne(n2)]/[ne(n1)
|
||||
union ne(n2) + ne(n1) intersection ne(n2)]
|
||||
ref: Brun et.al 2003
|
||||
"""
|
||||
|
||||
#[ne(n1) union ne(n2) - ne(n1) intersection ne(n2
|
||||
s1 = set(G.neighbors(n1))
|
||||
s2 = set(G.neighbors(n2))
|
||||
num_union = len(s1.union(s2))
|
||||
num_intersection = len(s1.intersection(s2))
|
||||
if num_union & num_intersection:
|
||||
xs=0
|
||||
else:
|
||||
xs = (num_union - num_intersection)/(num_union + num_intersection)
|
||||
return xs
|
||||
|
||||
def struct_equivalence_all(G):
|
||||
"""Not finnished.
|
||||
"""
|
||||
A,labels = NX.adj_matrix(G,with_labels=True)
|
||||
pass
|
||||
|
||||
def hamming_distance(n1,n2):
|
||||
"""Not finnsihed.
|
||||
"""
|
||||
pass
|
||||
|
||||
def graph_corrcoeff(G, vec=None, nodelist=None, sim='corr'):
|
||||
"""Returns the correlation coefficient for each node. The
|
||||
correlation coefficient is between the node and its neighbours.
|
||||
|
||||
"""
|
||||
if nodelist==None:
|
||||
nodelist=G.nodes()
|
||||
if vec == None:
|
||||
vec = G.degree(nodelist)
|
||||
if len(vec)!=len(nodelist):
|
||||
raise NXUTILSError("The node value vector is not of same length (%s) as the nodelist(%s)") %(len(vec), len(nodelist))
|
||||
|
||||
A = NX.ad_matrix(G, nodelist=nodelist)
|
||||
for i, node in enumerate(nodelist):
|
||||
nei_i = A[i,:]==1
|
||||
vec_i = vec[nei_i]
|
||||
|
||||
def weighted_laplacian(G,with_labels=False):
|
||||
"""Return standard Laplacian of graph from a weighted adjacency matrix."""
|
||||
n= G.order()
|
||||
I = scipy.eye(n)
|
||||
A = weighted_adj_matrix(G)
|
||||
D = I*scipy.sum(A, 0)
|
||||
L = D-A
|
||||
if with_labels:
|
||||
A,index = weighted_adj_matrix(G, with_labels=True)
|
||||
return L, index
|
||||
else:
|
||||
return L
|
||||
|
||||
def grow_subnetworks(G, T2):
|
||||
"""Return the highest scoring (T2-test) subgraph og G.
|
||||
|
||||
Use simulated annealing to identify highly grow subgraphs.
|
||||
|
||||
ref: -- Ideker et.al (Bioinformatics 18, 2002)
|
||||
-- Patil and Nielsen (PNAS 2006)
|
||||
|
||||
"""
|
||||
N = 1000
|
||||
states = [(node, False) for node in G.nodes()]
|
||||
t2_last = 0.0
|
||||
for i in xrange(N):
|
||||
if i==0: #assign random states
|
||||
states = [(state[0], True) for state in states if rand(1)>.5]
|
||||
sub_nodes = [state[0] for state in states if state[1]]
|
||||
Gsub = NX.subgraph(G, sub_nodes)
|
||||
Gsub = NX.connected_components_subgraphs(Gsub)[0]
|
||||
t2 = [T2[node] for node in Gsub]
|
||||
if t2>t2_last:
|
||||
pass
|
||||
else:
|
||||
p = numpy.exp()
|
||||
|
||||
|
||||
|
||||
"""Below are methods for calculating graph metrics
|
||||
|
||||
Four main decompositions :
|
||||
0.) Adjacency diffusion kernel expm(A),
|
||||
1.) von neumann kernels (diagonalisation of adjacency matrix)
|
||||
|
||||
2.) laplacian kernels (geometric series of adj.)
|
||||
|
||||
3.) diffusion kernels (exponential series of adj.)
|
||||
|
||||
---- Kv
|
||||
von_neumann : Kv = (I-alpha*A)^-1 (mod: A(I-alpha*A)^-1)? ,
|
||||
geom. series
|
||||
|
||||
---- Kl
|
||||
laplacian: Kl = (I-alpha*L)^-1 , geom. series
|
||||
|
||||
---- Kd
|
||||
laplacian_diffusion: Kd = expm(-alpha*L)
|
||||
exp. series
|
||||
|
||||
---- Ke
|
||||
Exponential diffusion.
|
||||
Ke = expm(A) .... expm(-A)?
|
||||
|
||||
"""
|
||||
|
||||
# TODO:
|
||||
# check for numerical unstable eigenvalues and set to zero
|
||||
# othervise some inverses wil explode ->ok ..using pinv for inverses
|
||||
#
|
||||
# This gives results that look numerical unstable
|
||||
#
|
||||
# -- divided adj by sum(A[:]), check this one (paper by Lebart scales with number of edges)
|
||||
#
|
||||
#
|
||||
#
|
||||
# the neumann kernel is defined in Kandola to be K = A*(I-A)^-1
|
||||
# lowest eigenvectors are same as the highest of K = A*A ?
|
||||
# this needs clarification
|
||||
|
||||
# diffusion is still wrong! ... ok
|
||||
# diff needs normalisation?! check the meaning of exp(-s) = exp(1/s) -L = 1/degree ... etc
|
||||
# Is it the negative of exp. of adj. metrix in Kandola?
|
||||
#
|
||||
# Normalised=False returns only nans (no idea why!!) ... fixed ok
|
||||
|
||||
# 31.1: diff is ok exp(0)=1 not zero!
|
||||
# 07.03.2005: normalisation is ok: -> normalisation will emphasize high degree nodes
|
||||
# 10.03.2005: symeig is unstable an returns nans of some eigenvectors? switching back to eig
|
||||
# 14.05.2006: diffusion returns negative values, using expm(-LL) instead (FIX)
|
||||
# 13.09.2206: update for use in numpy
|
||||
|
||||
# 27.04.2007: diffusion now uses pade approximations to matrix exponential. Also the last
|
||||
|
||||
def K_expAdj(W, normalised=True, alpha=1.0):
|
||||
"""Matrix exponential of adjacency matrix, mentioned in Kandola as a general diffusion kernel.
|
||||
"""
|
||||
W = asarray(W)
|
||||
t = W.dtype.char
|
||||
if len(W.shape)!=2:
|
||||
raise ValueError, "Non-matrix input to matrix function."
|
||||
m,n = W.shape
|
||||
if t in ['F','D']:
|
||||
raise TypeError, "Complex input!"
|
||||
if normalised==True:
|
||||
T = diag( sqrt( 1./(sum(W,0))) )
|
||||
W = dot(dot(T, W), T)
|
||||
e,vr = eig(W)
|
||||
s = real(e)**2 # from eigenvalues to singularvalues
|
||||
vri = inv(vr)
|
||||
s = maximum.reduce(s) + s
|
||||
cond = {0: feps*1e3, 1: eps*1e6}[_array_precision[t]]
|
||||
cutoff = abs(cond*maximum.reduce(s))
|
||||
psigma = eye(m)
|
||||
for i in range(len(s)):
|
||||
if abs(s[i]) > cutoff:
|
||||
psigma[i,i] = .5*alpha*exp(s[i])
|
||||
|
||||
return dot(dot(vr,psigma),vri)
|
||||
|
||||
def K_vonNeumann(W, normalised=True, alpha=1.0):
|
||||
""" The geometric series of path lengths.
|
||||
Returns matrix square root of pseudo inverse of the adjacency matrix.
|
||||
"""
|
||||
W = asarray(W)
|
||||
t = W.dtype.char
|
||||
if len(W.shape)!=2:
|
||||
raise ValueError, "Non-matrix input to matrix function."
|
||||
m,n = W.shape
|
||||
if t in ['F','D']:
|
||||
raise TypeError, "Complex input!"
|
||||
|
||||
if normalised==True:
|
||||
T = diag(sqrt(1./(sum(W,0))))
|
||||
W = dot(dot(T,W),T)
|
||||
e,vr = eig(W)
|
||||
vri = inv(vr)
|
||||
e = real(e) # we only work with real pos. eigvals
|
||||
e = maximum.reduce(e) + e
|
||||
cond = {0: feps*1e3, 1: eps*1e6}[_array_precision[t]]
|
||||
cutoff = cond*maximum.reduce(e)
|
||||
psigma = zeros((m,n),t)
|
||||
for i in range(len(e)):
|
||||
if e[i] > cutoff:
|
||||
psigma[i,i] = 1.0/e[i] #these are eig.vals (=sqrt(sing.vals))
|
||||
return dot(dot(vr,psigma),vri).astype(t)
|
||||
|
||||
def K_laplacian(W, normalised=True, alpha=1.0):
|
||||
""" This is the matrix pseudo inverse of L.
|
||||
Also known as the average commute time matrix.
|
||||
"""
|
||||
W = asarray(W)
|
||||
t = W.dtype.char
|
||||
if len(W.shape)!=2:
|
||||
raise ValueError, "Non-matrix input to matrix function."
|
||||
m,n = W.shape
|
||||
if t in ['F','D']:
|
||||
raise TypeError, "Complex input!"
|
||||
D = diag(sum(W,0))
|
||||
L = D - W
|
||||
if normalised==True:
|
||||
T = diag(sqrt(1./sum(W, 0)))
|
||||
L = dot(dot(T, L), T)
|
||||
e,vr = eig(L)
|
||||
e = real(e)
|
||||
vri = inv(vr)
|
||||
cond = {0: feps*1e3, 1: eps*1e6}[_array_precision[t]]
|
||||
cutoff = cond*maximum.reduce(e)
|
||||
psigma = zeros((m,),t) # if s close to zero -> set 1/s = 0
|
||||
for i in range(len(e)):
|
||||
if e[i] > cutoff:
|
||||
psigma[i] = 1.0/e[i]
|
||||
K = dot(dot(vr, diag(psigma)), vri).astype(t)
|
||||
K = real(K)
|
||||
I = eye(n)
|
||||
K = (1-alpha)*I + alpha*K
|
||||
return K
|
||||
|
||||
|
||||
def K_diffusion(W, normalised=True, alpha=1.0, beta=0.5, use_cut=False):
|
||||
"""Returns diffusion kernel.
|
||||
input:
|
||||
-- W, adj. matrix
|
||||
-- normalised [True/False]
|
||||
-- alpha, [0,1] (degree of network influence)
|
||||
-- beta, [0->), (diffusion degree)
|
||||
"""
|
||||
W = asarray(W)
|
||||
t = W.dtype.char
|
||||
if len(W.shape)!=2:
|
||||
raise ValueError, "Non-matrix input to matrix function."
|
||||
m, n = W.shape
|
||||
if t in ['F','D']:
|
||||
raise TypeError, "Complex input!"
|
||||
D = diag(W.sum(0))
|
||||
L = D - W
|
||||
if normalised==True:
|
||||
T = diag(sqrt(1./W.sum(0)))
|
||||
L = dot(dot(T, L), T)
|
||||
e, vr = eig(L)
|
||||
vri = inv(vr) #inv
|
||||
cond = 1.0*{0: feps*1e3, 1: eps*1e6}[_array_precision[t]]
|
||||
cutoff = 1.*abs(cond*maximum.reduce(e))
|
||||
psigma = eye(m) # if eigvals are 0 exp(0)=1 (unnecessary)
|
||||
#psigma = zeros((m,n), dtype='<f8')
|
||||
for i in range(len(e)):
|
||||
if abs(e[i]) > cutoff:
|
||||
psigma[i,i] = exp(-beta*e[i])
|
||||
#else:
|
||||
# psigma[i,i] = 0.0
|
||||
K = real(dot(dot(vr, psigma), vri))
|
||||
I = eye(n, dtype='<f8')
|
||||
K = (1. - alpha)*I + alpha*K
|
||||
return K
|
||||
|
||||
def K_diffusion2(W, normalised=True, alpha=1.0, beta=0.5, ncomp=None):
|
||||
"""Returns diffusion kernel, using fast pade approximation.
|
||||
input:
|
||||
-- W, adj. matrix
|
||||
-- normalised [True/False]
|
||||
-- beta, [0->), (diffusion degree)
|
||||
"""
|
||||
|
||||
D = diag(W.sum(0))
|
||||
L = D - W
|
||||
if normalised==True:
|
||||
T = diag(sqrt(1./W.sum(0)))
|
||||
L = dot(dot(T, L), T)
|
||||
return expm(-beta*L)
|
||||
|
||||
|
||||
def K_modularity(W, alpha=1.0):
|
||||
""" Returns the matrix square root of Newmans modularity."""
|
||||
W = asarray(W)
|
||||
t = W.dtype.char
|
||||
m, n = W.shape
|
||||
d = sum(W, 0)
|
||||
m = 1.*sum(d)
|
||||
B = W - (outer(d, d)/m)
|
||||
s,v = sorted_eig(B, sort_by='lm')
|
||||
psigma = zeros( (n, n), dtype='<f8' )
|
||||
for i in range(len(s)):
|
||||
if s[i]>1e-7:
|
||||
psigma[i,i] = sqrt(s[i])
|
||||
#psigma[i,i] = s[i]
|
||||
K = dot(dot(v, psigma), v.T)
|
||||
I = eye(n)
|
||||
K = (1 - alpha)*I + alpha*K
|
||||
return K
|
||||
|
||||
def kernel_score(K, W):
|
||||
"""Returns the modularity score.
|
||||
K -- (modularity) kernel
|
||||
W -- adjacency matrix (possibly weighted)
|
||||
"""
|
||||
# normalize W (: W'W=I)
|
||||
m, n = shape(W)
|
||||
for i in range(n):
|
||||
W[:,i] = W[:,i]/norm(W[:,i])
|
||||
score = diag(dot(W, dot(K, W)) )
|
||||
tot = sum(score)
|
||||
return score, tot
|
||||
|
||||
|
||||
def modularity_matrix(G, nodelist=None):
|
||||
if not nodelist:
|
||||
nodelist = G.nodes()
|
||||
else:
|
||||
G = NX.subgraph(G, nodelist)
|
||||
|
||||
A = NX.adj_matrix(G, nodelist=nodelist)
|
||||
d = atleast_2d(G.degree(nbunch=nodelist))
|
||||
m = 1.*G.number_of_edges()
|
||||
B = A - dot(d.T, d)/m
|
||||
return B
|
||||
|
||||
|
||||
|
||||
|
||||
28
laydi/lib/packer.py
Normal file
28
laydi/lib/packer.py
Normal file
@@ -0,0 +1,28 @@
|
||||
class Packer:
|
||||
"""A compression object used to speed up model calculations.
|
||||
|
||||
Often used in conjunction with crossvalidation and perturbations
|
||||
analysis.
|
||||
"""
|
||||
def __init__(self,array):
|
||||
self._shape = array.shape
|
||||
self._array = array
|
||||
self._packed_data = None
|
||||
|
||||
def expand(self,a):
|
||||
if self._inflater!=None:
|
||||
return dot(self._inflater,a)
|
||||
|
||||
def collapse(self,axis=None,mode='svd'):
|
||||
if not axis:
|
||||
axis = argmin(self._array.shape) # default is the smallest dim
|
||||
|
||||
if axis == 1:
|
||||
self._array = self._array.T
|
||||
u, s, vt = svd(self._array,full_matrices=0)
|
||||
self._inflater = vt.T
|
||||
self._packed_data = u*s
|
||||
return self._packed_data
|
||||
|
||||
def get_packed_data(self):
|
||||
return self._packed_data
|
||||
223
laydi/lib/select_generators.py
Normal file
223
laydi/lib/select_generators.py
Normal file
@@ -0,0 +1,223 @@
|
||||
"""Matrix cross validation selection generators
|
||||
"""
|
||||
from scipy import take,arange,ceil,repeat,newaxis,mean,asarray,dot,ones,\
|
||||
random,array_split,floor,vstack,asarray,minimum
|
||||
from cx_utils import randperm
|
||||
|
||||
def w_pls_gen(aat,b,n_blocks=None,center=True,index_out=False):
|
||||
"""Random block crossvalidation for wide (XX.T) trick in PLS.
|
||||
Leave-one-out is a subset, with n_blocks equals nSamples
|
||||
|
||||
aat -- outerproduct of X
|
||||
b -- Y
|
||||
n_blocks =
|
||||
center -- use centering of calibration ,sets (aat_in,b_in) are centered
|
||||
|
||||
Returns:
|
||||
-- aat_in,aat_out,b_in,b_out,[out]
|
||||
"""
|
||||
m, n = aat.shape
|
||||
index = randperm(m)
|
||||
if n_blocks==None: n_blocks = m
|
||||
nValuesInBlock = m/n_blocks
|
||||
if n_blocks==m:
|
||||
index = arange(m)
|
||||
out_ind = [index[i*nValuesInBlock:(i+1)*nValuesInBlock] for i in range(n_blocks)]
|
||||
|
||||
for out in out_ind:
|
||||
inn = [i for i in index if i not in out]
|
||||
aat_in = aat[inn,:][:,inn]
|
||||
aat_out = aat[out,:][:,inn]
|
||||
b_in = b[inn,:]
|
||||
b_out = b[out,:]
|
||||
if center:
|
||||
aat_in, mn = outerprod_centering(aat_in)
|
||||
b_in = b_in - b_in.mean(0) # b_in + b_out/(b_in.shape[0])
|
||||
if index_out:
|
||||
yield aat_in,aat_out,b_in,b_out,out
|
||||
else:
|
||||
yield aat_in,aat_out,b_in,b_out
|
||||
|
||||
def pls_gen(a, b, n_blocks=None, center=False, index_out=False,axis=0):
|
||||
"""Random block crossvalidation
|
||||
Leave-one-out is a subset, with n_blocks equals a.shape[-1]
|
||||
"""
|
||||
index = randperm(a.shape[axis])
|
||||
#index = arange(a.shape[axis])
|
||||
if n_blocks==None:
|
||||
n_blocks = a.shape[axis]
|
||||
n_in_set = ceil(float(a.shape[axis])/n_blocks)
|
||||
out_ind_sets = [index[i*n_in_set:(i+1)*n_in_set] for i in range(n_blocks)]
|
||||
for out in out_ind_sets:
|
||||
inn = [i for i in index if i not in out]
|
||||
acal = a.take(inn, 0)
|
||||
atrue = a.take(out, 0)
|
||||
bcal = b.take(inn, 0)
|
||||
btrue = b.take(out, 0)
|
||||
if center:
|
||||
mn_a = acal.mean(0)[newaxis]
|
||||
acal = acal - mn_a
|
||||
atrue = atrue - mn_a
|
||||
mn_b = bcal.mean(0)[newaxis]
|
||||
bcal = bcal - mn_b
|
||||
btrue = btrue - mn_b
|
||||
|
||||
if index_out:
|
||||
yield acal, atrue, bcal, btrue, out
|
||||
else:
|
||||
yield acal, atrue, bcal, btrue
|
||||
|
||||
|
||||
def pca_gen(a, n_sets=None, center=False, index_out=False, axis=0):
|
||||
"""Returns a generator of crossvalidation sample segments.
|
||||
|
||||
input:
|
||||
-- a, data matrix (m x n)
|
||||
-- n_sets, number of segments/subsets to generate.
|
||||
-- center, bool, choice of centering each subset
|
||||
-- index_out, bool, return subset index
|
||||
-- axis, int, which axis to get subset from
|
||||
|
||||
ouput:
|
||||
-- V, generator with (n_sets) memebers (subsets)
|
||||
|
||||
"""
|
||||
m = a.shape[axis]
|
||||
index = randperm(m)
|
||||
if n_sets==None:
|
||||
n_sets = m
|
||||
n_in_set = ceil(float(m)/n_sets)
|
||||
out_ind_sets = [index[i*n_in_set:(i+1)*n_in_set] for i in range(n_sets)]
|
||||
for out in out_ind_sets:
|
||||
inn = [i for i in index if i not in out]
|
||||
acal = a.take(inn, 0)
|
||||
atrue = a.take(out, 0)
|
||||
if center:
|
||||
mn_a = acal.mean(0)[newaxis]
|
||||
acal = acal - mn_a
|
||||
atrue = atrue - mn_a
|
||||
|
||||
if index_out:
|
||||
yield acal, atrue, out
|
||||
else:
|
||||
yield acal, atrue
|
||||
|
||||
def w_pls_gen_jk(a, b, n_sets=None, center=True,
|
||||
index_out=False, axis=0):
|
||||
"""Random block crossvalidation for wide X (m>>n)
|
||||
Leave-one-out is a subset, with n_sets equals a.shape[-1]
|
||||
|
||||
Returns : X_m and X_m'Y_m
|
||||
"""
|
||||
m = a.shape[axis]
|
||||
ab = dot(a.T, b)
|
||||
index = randperm(m)
|
||||
if n_sets==None:
|
||||
n_sets = m
|
||||
n_in_set = ceil(float(m)/n_sets)
|
||||
out_ind_sets = [index[i*n_in_set:(i+1)*n_in_set] for i in range(n_sets)]
|
||||
for out in out_ind_sets:
|
||||
inn = [i for i in index if i not in out]
|
||||
nin = len(inn)
|
||||
nout = len(out)
|
||||
a_in = a[inn,:]
|
||||
mn_a = 0
|
||||
mAB = 0
|
||||
if center:
|
||||
mn_a = a_in.mean(0)[newaxis]
|
||||
mAin = dot(-ones((1,nout)), a[out,:])/nin
|
||||
mBin = dot(-ones((1,nout)), b[out,:])/nin
|
||||
mAB = dot(mAin.T, (mBin*nin))
|
||||
ab_in = ab - dot(a[out,].T, b[out,:]) - mAB
|
||||
a_in = a_in - mn_a
|
||||
|
||||
if index_out:
|
||||
yield a_in, ab_in, out
|
||||
else:
|
||||
yield a_in, ab_in
|
||||
|
||||
def shuffle_1d_block(a, n_sets=None, blocks=None, index_out=False, axis=0):
|
||||
"""Random block shuffling along 1d axis
|
||||
Returns : Shuffled a by axis
|
||||
"""
|
||||
m = a.shape[axis]
|
||||
if blocks==None:
|
||||
blocks = m
|
||||
for ii in xrange(n_sets):
|
||||
index = randperm(m)
|
||||
if blocks==m:
|
||||
a_out = a.take(index, axis)
|
||||
else:
|
||||
index = arange(m)
|
||||
dummy = map(random.shuffle, array_split(index, blocks))
|
||||
a_out = a.take(index, axis)
|
||||
|
||||
if index_out:
|
||||
yield a_out, index
|
||||
else:
|
||||
yield a_out
|
||||
|
||||
def shuffle_1d(a, n_sets, axis=0):
|
||||
"""Random shuffling along 1d axis.
|
||||
|
||||
Returns : Shuffled a by axis
|
||||
"""
|
||||
m = a.shape[axis]
|
||||
for ii in xrange(n_sets):
|
||||
index = randperm(m)
|
||||
a = a.take(index, axis)
|
||||
yield a
|
||||
|
||||
def diag_pert(a, n_sets=10, center=True, index_out=False):
|
||||
"""Alter generator returning sets perturbed with means at diagonals.
|
||||
|
||||
input:
|
||||
X -- matrix, data
|
||||
alpha -- scalar, approx. portion of data perturbed
|
||||
"""
|
||||
|
||||
m, n = a.shape
|
||||
tr=False
|
||||
if m>n:
|
||||
a = a.T
|
||||
m, n = a.shape
|
||||
tr = True
|
||||
if n_sets>m or n_sets>n:
|
||||
msg = "You may not use more subsets than max(n_rows, n_cols)"
|
||||
raise ValueError, msg
|
||||
nm=n*m
|
||||
start_inds = array_split(randperm(m),n_sets) # we use random start diags
|
||||
if center:
|
||||
a = a - mean(a, 0)[newaxis]
|
||||
for v in range(n_sets):
|
||||
a_out = a.copy()
|
||||
out = []
|
||||
for start in start_inds[v]:
|
||||
ind = arange(start+v, nm, n+1)
|
||||
[out.append(i) for i in ind]
|
||||
if center:
|
||||
a_out.put(a.mean(),ind)
|
||||
else:
|
||||
a_out.put(0, ind)
|
||||
if tr:
|
||||
a_out = a_out.T
|
||||
|
||||
if index_out:
|
||||
yield a_out, asarray(out)
|
||||
else:
|
||||
yield a_out
|
||||
|
||||
|
||||
def outerprod_centering(aat, ret_mn=True):
|
||||
"""Returns double centered symmetric outerproduct matrix.
|
||||
"""
|
||||
h = aat.mean(0)[newaxis]
|
||||
h = h - 0.5*h.mean()
|
||||
mn_a = h + h.T # beauty of broadcasting
|
||||
aatc = aat - mn_a
|
||||
if ret_mn:
|
||||
return aatc, h
|
||||
return aatc
|
||||
|
||||
|
||||
|
||||
315
laydi/lib/validation.py
Normal file
315
laydi/lib/validation.py
Normal file
@@ -0,0 +1,315 @@
|
||||
"""This module implements some common validation schemes from pca and pls.
|
||||
"""
|
||||
from scipy import ones,mean,sqrt,dot,newaxis,zeros,sum,empty,\
|
||||
apply_along_axis,eye,kron,array,sort,zeros_like,argmax,atleast_2d
|
||||
from scipy.stats import median
|
||||
from scipy.linalg import triu,inv,svd,norm
|
||||
|
||||
from select_generators import w_pls_gen,w_pls_gen_jk,pls_gen,pca_gen,diag_pert
|
||||
from engines import w_simpls,pls,bridge,pca,nipals_lpls
|
||||
from cx_utils import m_shape
|
||||
|
||||
|
||||
def w_pls_cv_val(X, Y, amax, n_blocks=None):
|
||||
"""Returns rmsep and aopt for pls tailored for wide X.
|
||||
|
||||
The root mean square error of cross validation is calculated
|
||||
based on random block cross-validation. With number of blocks equal to
|
||||
number of samples [default] gives leave-one-out cv.
|
||||
The pls model is based on the simpls algorithm for wide X.
|
||||
|
||||
:Parameters:
|
||||
X : ndarray
|
||||
column centered data matrix of size (samples x variables)
|
||||
Y : ndarray
|
||||
column centered response matrix of size (samples x responses)
|
||||
amax : scalar
|
||||
Maximum number of components
|
||||
n_blocks : scalar
|
||||
Number of blocks in cross validation
|
||||
|
||||
:Returns:
|
||||
rmsep : ndarray
|
||||
Root Mean Square Error of cross-validated Predictions
|
||||
aopt : scalar
|
||||
Guestimate of the optimal number of components
|
||||
|
||||
:SeeAlso:
|
||||
- pls_cv_val : Same output, not optimised for wide X
|
||||
- w_simpls : Simpls algorithm for wide X
|
||||
|
||||
Notes
|
||||
-----
|
||||
Based (cowardly translated) on m-files from the Chemoact toolbox
|
||||
X, Y inputs need to be centered (fixme: check)
|
||||
|
||||
|
||||
Examples
|
||||
--------
|
||||
|
||||
>>> import numpy as n
|
||||
>>> X = n.array([[1., 2., 3.],[]])
|
||||
>>> Y = n.array([[1., 2., 3.],[]])
|
||||
>>> w_pls(X, Y, 1)
|
||||
[4,5,6], 1
|
||||
"""
|
||||
|
||||
k, l = m_shape(Y)
|
||||
PRESS = zeros((l, amax+1), dtype='f')
|
||||
if n_blocks==None:
|
||||
n_blocks = Y.shape[0]
|
||||
XXt = dot(X, X.T)
|
||||
V = w_pls_gen(XXt, Y, n_blocks=n_blocks, center=True)
|
||||
for Din, Doi, Yin, Yout in V:
|
||||
ym = -sum(Yout, 0)[newaxis]/(1.0*Yin.shape[0])
|
||||
PRESS[:,0] = PRESS[:,0] + ((Yout - ym)**2).sum(0)
|
||||
|
||||
dat = w_simpls(Din, Yin, amax)
|
||||
Q, U, H = dat['Q'], dat['U'], dat['H']
|
||||
That = dot(Doi, dot(U, inv(triu(dot(H.T, U))) ))
|
||||
|
||||
Yhat = []
|
||||
for j in range(l):
|
||||
TQ = dot(That, triu(dot(Q[j,:][:,newaxis], ones((1,amax)))) )
|
||||
E = Yout[:,j][:,newaxis] - TQ
|
||||
E = E + sum(E, 0)/Din.shape[0]
|
||||
PRESS[j,1:] = PRESS[j,1:] + sum(E**2, 0)
|
||||
#Yhat = Yin - dot(That,Q.T)
|
||||
msep = PRESS/(Y.shape[0])
|
||||
aopt = find_aopt_from_sep(msep)
|
||||
return sqrt(msep), aopt
|
||||
|
||||
def pls_val(X, Y, amax=2, n_blocks=10, algo='pls'):
|
||||
k, l = m_shape(Y)
|
||||
PRESS = zeros((l, amax+1), dtype='<f8')
|
||||
EE = zeros((amax, k, l), dtype='<f8')
|
||||
Yhat = zeros((amax, k, l), dtype='<f8')
|
||||
V = pls_gen(X, Y, n_blocks=n_blocks, center=True, index_out=True)
|
||||
for Xin, Xout, Yin, Yout, out in V:
|
||||
ym = -sum(Yout,0)[newaxis]/Yin.shape[0]
|
||||
Yin = (Yin - ym)
|
||||
PRESS[:,0] = PRESS[:,0] + ((Yout - ym)**2).sum(0)
|
||||
|
||||
if algo=='pls':
|
||||
dat = pls(Xin, Yin, amax, mode='normal')
|
||||
elif algo=='bridge':
|
||||
dat = simpls(Xin, Yin, amax, mode='normal')
|
||||
|
||||
for a in range(amax):
|
||||
Ba = dat['B'][a,:,:]
|
||||
Yhat[a,out[:],:] = dot(Xout, Ba)
|
||||
E = Yout - dot(Xout, Ba)
|
||||
EE[a,out,:] = E
|
||||
PRESS[:,a+1] = PRESS[:,a+1] + sum(E**2,0)
|
||||
|
||||
#rmsep = sqrt(PRESS/(k-1.))
|
||||
msep = PRESS
|
||||
aopt = find_aopt_from_sep(msep)
|
||||
return msep, Yhat, aopt
|
||||
|
||||
def lpls_val(X, Y, Z, a_max=2, nsets=None,alpha=.5, mean_ctr=[2,0,2]):
|
||||
"""Performs crossvalidation to get generalisation error in lpls"""
|
||||
assert(nsets<=X.shape[0])
|
||||
|
||||
cv_iter = pls_gen(X, Y, n_blocks=nsets,center=False,index_out=True)
|
||||
k, l = Y.shape
|
||||
Yc = empty((k, l), 'd')
|
||||
Yhat = empty((a_max, k, l), 'd')
|
||||
Yhatc = empty((a_max, k, l), 'd')
|
||||
sep2 = empty((a_max, k, l), 'd')
|
||||
for i, (xcal,xi,ycal,yi,ind) in enumerate(cv_iter):
|
||||
print ind
|
||||
dat = nipals_lpls(xcal,ycal,Z,
|
||||
a_max=a_max,
|
||||
alpha=alpha,
|
||||
mean_ctr=mean_ctr,
|
||||
verbose=False)
|
||||
|
||||
B = dat['B']
|
||||
#b0 = dat['b0']
|
||||
for a in range(a_max):
|
||||
if mean_ctr[0] in [0, 2]:
|
||||
xi = xi - dat['mnx']
|
||||
else:
|
||||
xi = xi - xi.mean(1)[:,newaxis] #???: cheating?
|
||||
if mean_ctr[1] in [0, 2]:
|
||||
ym = dat['mny']
|
||||
else:
|
||||
ym = yi.mean(1)[:,newaxis] #???: check this
|
||||
|
||||
Yhat[a,ind,:] = atleast_2d(ym + dot(xi, B[a]))
|
||||
#Yhat[a,ind,:] = atleast_2d(b0[a] + dot(xi, B[a]))
|
||||
|
||||
# todo: need a better support for class validation
|
||||
y_is_class = Y.dtype.char.lower() in ['i','p', 'b', 'h','?']
|
||||
#print Y.dtype.char
|
||||
if y_is_class:
|
||||
Yhat_class = zeros_like(Yhat)
|
||||
for a in range(a_max):
|
||||
for i in range(k):
|
||||
Yhat_class[a,i,argmax(Yhat[a,i,:])] = 1.0
|
||||
class_err = 100*((Yhat_class+Y)==2).sum(1)/Y.sum(0).astype('d')
|
||||
|
||||
sep = (Y - Yhat)**2
|
||||
rmsep = sqrt(sep.mean(1)).T
|
||||
#rmsep2 = sqrt(sep2.mean(1))
|
||||
|
||||
aopt = find_aopt_from_sep(rmsep)
|
||||
|
||||
return rmsep, Yhat, aopt
|
||||
|
||||
def pca_alter_val(a, amax, n_sets=10, method='diag'):
|
||||
"""Pca validation by altering elements in X.
|
||||
|
||||
comments:
|
||||
-- may do all jk estimates in this loop
|
||||
"""
|
||||
|
||||
V = diag_pert(a, n_sets, center=True, index_out=True)
|
||||
sep = empty((n_sets, amax), dtype='f')
|
||||
for i, (xi, ind) in enumerate(V):
|
||||
dat_i = pca(xi, amax, mode='detailed')
|
||||
Ti, Pi = dat_i['T'],dat_i['P']
|
||||
for j in xrange(amax):
|
||||
Xhat = dot(Ti[:,:j+1], Pi[:,:j+1].T)
|
||||
a_sub = a.ravel().take(ind)
|
||||
EE = a_sub - Xhat.ravel().take(ind)
|
||||
tot = (a_sub**2).sum()
|
||||
sep[i,j] = (EE**2).sum()/tot
|
||||
sep = sqrt(sep)
|
||||
aopt = find_aopt_from_sep(sep)
|
||||
return sep, aopt
|
||||
|
||||
def pca_cv_val(a, amax, n_sets):
|
||||
""" Returns PRESS from cross-validated pca using random segments.
|
||||
|
||||
input:
|
||||
-- a, data matrix (m x n)
|
||||
-- amax, maximum nuber of components used
|
||||
-- n_sets, number of segments to calculate
|
||||
output:
|
||||
-- sep, (amax x m x n), squared error of prediction (press)
|
||||
-- aopt, guestimated optimal number of components
|
||||
|
||||
"""
|
||||
|
||||
m, n = a.shape
|
||||
E = empty((amax, m, n), dtype='f')
|
||||
xtot = (a**2).sum() # this needs centering
|
||||
V = pca_gen(a, n_sets=7, center=True, index_out=True)
|
||||
for xi, xout, ind in V:
|
||||
dat_i = pca(xi, amax, mode='fast')
|
||||
Pi = dat_i['P']
|
||||
for a in xrange(amax):
|
||||
Pia = Pi[:,:a+1]
|
||||
E[a][ind,:] = (X[ind,:] - dot(xout, dot(Pia,Pia.T) ))**2
|
||||
|
||||
sep = []
|
||||
for a in xrange(amax):
|
||||
sep.append(E[a].sum()/xtot)
|
||||
sep = array(sep)
|
||||
aopt = find_aopt_from_sep(sep)
|
||||
|
||||
return sep, aopt
|
||||
|
||||
def pls_jkW(a, b, amax, n_blocks=None, algo='pls', use_pack=True, center=True):
|
||||
""" Returns CV-segments of paramter W for wide X.
|
||||
|
||||
todo: add support for T,Q and B
|
||||
"""
|
||||
if n_blocks == None:
|
||||
n_blocks = b.shape[0]
|
||||
|
||||
Wcv = empty((n_blocks, a.shape[1], amax), dtype='d')
|
||||
if use_pack:
|
||||
u, s, inflater = svd(a, full_matrices=0)
|
||||
a = u*s
|
||||
|
||||
V = pls_gen(a, b, n_blocks=n_blocks, center=center)
|
||||
for nn,(a_in, a_out, b_in, b_out) in enumerate(V):
|
||||
if algo=='pls':
|
||||
dat = pls(a_in, b_in, amax, 'loads', 'fast')
|
||||
|
||||
elif algo=='bridge':
|
||||
dat = bridge(a_in, b_in, amax, 'loads', 'fast')
|
||||
|
||||
W = dat['W']
|
||||
if use_pack:
|
||||
W = dot(inflater.T, W)
|
||||
|
||||
Wcv[nn,:,:] = W[:,:,]
|
||||
|
||||
return Wcv
|
||||
|
||||
def pca_jkP(a, aopt, n_blocks=None):
|
||||
"""Returns loading from PCA on CV-segments.
|
||||
|
||||
input:
|
||||
-- a, data matrix (n x m)
|
||||
-- aopt, number of components in model.
|
||||
-- nblocks, number of segments
|
||||
output:
|
||||
-- PP, loadings collected in a three way matrix
|
||||
(n_segments, m, aopt)
|
||||
|
||||
comments:
|
||||
* The loadings are scaled with the (1/samples)*eigenvalues.
|
||||
* Crossvalidation method is currently set to random blocks of samples.
|
||||
|
||||
todo: add support for T
|
||||
fixme: more efficient to add this in validation loop
|
||||
"""
|
||||
if n_blocks == None:
|
||||
n_blocks = a.shape[0]
|
||||
|
||||
PP = empty((n_blocks, a.shape[1], aopt), dtype='f')
|
||||
V = pca_gen(a, n_sets=n_blocks, center=True)
|
||||
for nn,(a_in, a_out) in enumerate(V):
|
||||
dat = pca(a_in, aopt, mode='fast', scale='loads')
|
||||
P = dat['P']
|
||||
PP[nn,:,:] = P
|
||||
|
||||
return PP
|
||||
|
||||
|
||||
def lpls_jk(X, Y, Z, a_max, nsets=None, xz_alpha=.5, mean_ctr=[2,0,2]):
|
||||
cv_iter = pls_gen(X, Y, n_blocks=nsets,center=False,index_out=False)
|
||||
m, n = X.shape
|
||||
k, l = Y.shape
|
||||
o, p = Z.shape
|
||||
if nsets==None:
|
||||
nsets = m
|
||||
WWx = empty((nsets, n, a_max), 'd')
|
||||
WWz = empty((nsets, o, a_max), 'd')
|
||||
#WWy = empty((nsets, l, a_max), 'd')
|
||||
for i, (xcal, xi, ycal, yi) in enumerate(cv_iter):
|
||||
dat = nipals_lpls(xcal,ycal,Z,a_max=a_max,alpha=xz_alpha,
|
||||
mean_ctr=mean_ctr,scale='loads',verbose=False)
|
||||
WWx[i,:,:] = dat['W']
|
||||
WWz[i,:,:] = dat['L']
|
||||
#WWy[i,:,:] = dat['Q']
|
||||
|
||||
return WWx, WWz
|
||||
|
||||
def find_aopt_from_sep(sep, method='75perc'):
|
||||
"""Returns an estimate of optimal number of components from rmsecv.
|
||||
"""
|
||||
sep = sep.copy()
|
||||
if method=='vanilla':
|
||||
# min rmsep
|
||||
rmsecv = sqrt(sep.mean(0))
|
||||
return rmsecv.argmin() + 1
|
||||
|
||||
elif method=='75perc':
|
||||
prct = .75 #percentile
|
||||
ind = 1.*sep.shape[0]*prct
|
||||
med = median(sep)
|
||||
prc_75 = []
|
||||
for col in sep.T:
|
||||
col.sort() #this is inplace -> ruins sep, so we are doing a copy
|
||||
prc_75.append(col[int(ind)])
|
||||
prc_75 = array(prc_75)
|
||||
for i in range(1, sep.shape[1], 1):
|
||||
if med[i-1]<prc_75[i]:
|
||||
return i
|
||||
return len(med)
|
||||
168
laydi/logger.py
Normal file
168
laydi/logger.py
Normal file
@@ -0,0 +1,168 @@
|
||||
|
||||
import gobject
|
||||
import gtk
|
||||
import time
|
||||
|
||||
class Logger:
|
||||
def __init__(self):
|
||||
self.store = gtk.ListStore(gobject.TYPE_STRING,
|
||||
gobject.TYPE_STRING,
|
||||
gobject.TYPE_STRING)
|
||||
self.levels = ['debug', 'notice', 'warning', 'error']
|
||||
self.level_text = {'debug': 'Debug',
|
||||
'notice': 'Notice',
|
||||
'warning': 'Warning',
|
||||
'error': 'Error'}
|
||||
self.components = {}
|
||||
self.colors = { 'debug': 'grey',
|
||||
'notice': 'black',
|
||||
'warning': 'brown',
|
||||
'error': 'red' }
|
||||
|
||||
def log(self, level, message):
|
||||
iter = self.store.append()
|
||||
self.store.set_value(iter, 0, level)
|
||||
self.store.set_value(iter, 1, message)
|
||||
self.store.set_value(iter, 2, self.colors[level])
|
||||
|
||||
def level_number(self, level):
|
||||
return self.levels.index(level)
|
||||
|
||||
def debug(self, message):
|
||||
self.log('debug', message)
|
||||
|
||||
def notice(self, message):
|
||||
self.log('notice', message)
|
||||
|
||||
def warning(self, message):
|
||||
self.log('warning', message)
|
||||
|
||||
def error(self, message):
|
||||
self.log('error', message)
|
||||
|
||||
|
||||
class LogView(gtk.TreeView):
|
||||
|
||||
def __init__(self, logger=None, level='notice'):
|
||||
self.logger = logger
|
||||
self.model = logger.store
|
||||
self.level = level
|
||||
self.level_no = logger.level_number(level)
|
||||
|
||||
# Set up filter
|
||||
self.filter = self.model.filter_new()
|
||||
gtk.TreeView.__init__(self, self.filter)
|
||||
self.filter.set_visible_func(self.level_filter)
|
||||
|
||||
# Set up log level column
|
||||
renderer = gtk.CellRendererText()
|
||||
self.level_col = gtk.TreeViewColumn('Level', renderer, text=0)
|
||||
self.level_col.add_attribute(renderer, "foreground", 2)
|
||||
self.append_column(self.level_col)
|
||||
|
||||
# Set up message column
|
||||
renderer = gtk.CellRendererText()
|
||||
self.message_col = gtk.TreeViewColumn('Message', renderer, text=1)
|
||||
self.message_col.add_attribute(renderer, "foreground", 2)
|
||||
self.append_column(self.message_col)
|
||||
|
||||
# Activate context menu
|
||||
self.menu = LogMenu(self.logger, self)
|
||||
self.connect('popup_menu', self.popup_menu)
|
||||
self.connect('button_press_event', self.mouse_popup_menu)
|
||||
|
||||
# Make sure tree view displays bottom entry when entered
|
||||
def scroll_to_last(model, path, it):
|
||||
if path:
|
||||
self.scroll_to_cell(path)
|
||||
self.model.connect('row-changed', scroll_to_last)
|
||||
|
||||
def set_level(self, level):
|
||||
self.level = level
|
||||
self.level_no = self.logger.levels.index(level)
|
||||
self.filter.refilter()
|
||||
self.queue_draw()
|
||||
|
||||
def popup_menu(self, *rest):
|
||||
self.menu.popup(None, None, None, 0, 0)
|
||||
|
||||
def mouse_popup_menu(self, widget, event):
|
||||
if event.button == 3:
|
||||
self.menu.popup(None, None, None, event.button, event.time)
|
||||
|
||||
def level_filter(self, store, iter):
|
||||
if store.get_value(iter,0):
|
||||
value = self.logger.levels.index(store.get_value(iter, 0))
|
||||
return value >= self.level_no
|
||||
else:
|
||||
return False
|
||||
|
||||
|
||||
class LogLevelMenu(gtk.Menu):
|
||||
def __init__(self, logger, view):
|
||||
self.logger = logger
|
||||
self.view = view
|
||||
items = []
|
||||
gtk.Menu.__init__(self)
|
||||
|
||||
for level in logger.levels:
|
||||
if len(items) == 0:
|
||||
group = None
|
||||
else:
|
||||
group = items[0]
|
||||
item = gtk.RadioMenuItem(group, logger.level_text[level], level)
|
||||
item.connect('activate', self.set_log_level, level)
|
||||
items.append(item)
|
||||
self.append(item)
|
||||
item.show()
|
||||
|
||||
item_no = logger.level_number(view.level)
|
||||
items[item_no].set_active(True)
|
||||
|
||||
def set_log_level(self, widget, level, *rest):
|
||||
if widget.active:
|
||||
self.view.set_level(level)
|
||||
|
||||
class LogComponentMenu(gtk.Menu):
|
||||
def __init__(self, logger, view):
|
||||
gtk.Menu.__init__(self)
|
||||
components = logger.components.keys()
|
||||
components.sort(str.__gt__)
|
||||
|
||||
for c in components:
|
||||
item = gtk.MenuItem(c)
|
||||
self.append(item)
|
||||
item.show()
|
||||
|
||||
# for component in logger.components
|
||||
class LogMenu(gtk.Menu):
|
||||
def __init__(self, logger, view):
|
||||
gtk.Menu.__init__(self)
|
||||
self.logger = logger
|
||||
|
||||
# View Log Level
|
||||
self.view_menu = LogLevelMenu(logger, view)
|
||||
self.view_item = gtk.MenuItem('View Log Level')
|
||||
self.view_item.set_submenu(self.view_menu)
|
||||
self.append(self.view_item)
|
||||
self.view_item.show()
|
||||
|
||||
# View Components
|
||||
self.component_menu = LogComponentMenu(logger, view)
|
||||
self.component_item = gtk.MenuItem('View Components')
|
||||
self.component_item.set_submenu(self.component_menu)
|
||||
self.append(self.component_item)
|
||||
self.component_item.show()
|
||||
|
||||
# Clear Log
|
||||
clear_item = gtk.MenuItem('Clear Log')
|
||||
clear_item.connect('activate', self.activate_clear_button)
|
||||
self.append(clear_item)
|
||||
clear_item.show()
|
||||
|
||||
def activate_clear_button(self, item):
|
||||
self.logger.store.clear()
|
||||
|
||||
logger = Logger()
|
||||
log = logger.log
|
||||
|
||||
101
laydi/main.py
Normal file
101
laydi/main.py
Normal file
@@ -0,0 +1,101 @@
|
||||
|
||||
import sys
|
||||
import os.path
|
||||
import paths
|
||||
|
||||
# Site specific directories set by configure script.
|
||||
PREFIX = paths.PREFIX
|
||||
BINDIR = paths.BINDIR
|
||||
DATADIR = paths.DATADIR
|
||||
DOCDIR = paths.DOCDIR
|
||||
PYDIR = paths.PYDIR
|
||||
|
||||
ICONDIR = os.path.join(DATADIR, 'icons')
|
||||
|
||||
#: Dictionary of observers
|
||||
_observers = {}
|
||||
|
||||
#: The current Navigator object.
|
||||
navigator = None
|
||||
|
||||
#: The current application
|
||||
application = None
|
||||
|
||||
#: The current project
|
||||
project = None
|
||||
|
||||
#: The current workflow
|
||||
workflow = None
|
||||
|
||||
#: A cfgparse/optparse options object.
|
||||
options = None
|
||||
|
||||
def notify_observers(name):
|
||||
call = "%s_changed" % name
|
||||
for s in _observers.get(name, []):
|
||||
getattr(s, call)(getattr(sys.modules[__name__], name))
|
||||
|
||||
def _add_observer(name, observer):
|
||||
"""Adds observer as an observer of the named object."""
|
||||
if not _observers.has_key(name):
|
||||
_observers[name] = []
|
||||
_observers[name].append(observer)
|
||||
|
||||
def _remove_observer(name, observer):
|
||||
"""Removes observer as an observer of the named object."""
|
||||
if not _observers.has_key(name):
|
||||
return
|
||||
_observers.remove(observer)
|
||||
|
||||
def add_navigator_observer(observer):
|
||||
_add_observer('navigator', observer)
|
||||
|
||||
def add_project_observer(observer):
|
||||
_add_observer('project', observer)
|
||||
|
||||
def add_workflow_observer(observer):
|
||||
_add_observer('workflow', observer)
|
||||
|
||||
def add_application_observer(observer):
|
||||
_add_observer('application', observer)
|
||||
|
||||
def remove_navigator_observer(observer):
|
||||
_remove_observer('navigator', observer)
|
||||
|
||||
def remove_project_observer(observer):
|
||||
_remove_observer('project', observer)
|
||||
|
||||
def remove_workflow_observer(observer):
|
||||
_remove_observer('workflow', observer)
|
||||
|
||||
def remove_application_observer(observer):
|
||||
_remove_observer('application', observer)
|
||||
|
||||
def remove_options_observer(observer):
|
||||
_remove_observer('options', observer)
|
||||
|
||||
def set_navigator(nav):
|
||||
global navigator
|
||||
navigator = nav
|
||||
notify_observers('navigator')
|
||||
|
||||
def set_application(app):
|
||||
global application
|
||||
application = app
|
||||
notify_observers('application')
|
||||
|
||||
def set_project(p):
|
||||
global project
|
||||
project = p
|
||||
notify_observers('project')
|
||||
|
||||
def set_workflow(wf):
|
||||
global workflow
|
||||
workflow = wf
|
||||
notify_observers('workflow')
|
||||
|
||||
def set_options(opt):
|
||||
global options
|
||||
options = opt
|
||||
notify_observers('options')
|
||||
|
||||
444
laydi/navigator.py
Normal file
444
laydi/navigator.py
Normal file
@@ -0,0 +1,444 @@
|
||||
import gtk
|
||||
import gobject
|
||||
import plots
|
||||
import time
|
||||
import fluents
|
||||
from logger import logger
|
||||
import dataset, plots, project, workflow, main
|
||||
import scipy
|
||||
|
||||
class NavigatorView (gtk.TreeView):
|
||||
"""The NavigatorView is a tree view of the project.
|
||||
|
||||
There is always one NavigatorView, that shows the functions, plots and
|
||||
datasets in the current project.
|
||||
"""
|
||||
|
||||
def __init__(self):
|
||||
if main.project:
|
||||
self.data_tree = main.project.data_tree
|
||||
else:
|
||||
self.data_tree = None
|
||||
|
||||
gtk.TreeView.__init__(self)
|
||||
|
||||
# Various properties
|
||||
self.set_enable_tree_lines(True)
|
||||
self.set_headers_visible(False)
|
||||
self.get_hadjustment().set_value(0)
|
||||
|
||||
# Selection Mode
|
||||
self.get_selection().set_mode(gtk.SELECTION_MULTIPLE)
|
||||
self.get_selection().connect('changed',self.on_selection_changed)
|
||||
self._previous_selection = []
|
||||
|
||||
# Setting up TextRenderers etc
|
||||
self.connect('row_activated', self.on_row_activated)
|
||||
self.connect('cursor_changed', self.on_cursor_changed)
|
||||
|
||||
# Activate context menu
|
||||
self.menu = NavigatorMenu(self)
|
||||
self.connect('popup_menu', self.popup_menu)
|
||||
self.connect('button_press_event', self.on_mouse_event)
|
||||
|
||||
self.textrenderer = textrenderer = gtk.CellRendererText()
|
||||
pixbufrenderer = gtk.CellRendererPixbuf()
|
||||
self.object_col = gtk.TreeViewColumn('Object')
|
||||
self.object_col.pack_start(pixbufrenderer,expand=False)
|
||||
self.object_col.pack_start(textrenderer,expand=False)
|
||||
self.object_col.set_attributes(textrenderer, cell_background=3,
|
||||
foreground=4, text=0)
|
||||
self.object_col.set_attributes(pixbufrenderer, pixbuf=5)
|
||||
self.append_column(self.object_col)
|
||||
|
||||
# send events to plots / itself
|
||||
self.enable_model_drag_source(gtk.gdk.BUTTON1_MASK,
|
||||
[("GTK_TREE_MODEL_ROW", gtk.TARGET_SAME_APP, 7)],
|
||||
gtk.gdk.ACTION_LINK | gtk.gdk.ACTION_MOVE)
|
||||
|
||||
self.connect("drag-data-get",self.slot_drag_data)
|
||||
|
||||
logger.debug('Initializing navigator window.')
|
||||
|
||||
def slot_drag_data(self, treeview, context, selection, target_id, etime):
|
||||
"""Sets the data for a drag event."""
|
||||
treeselection = treeview.get_selection()
|
||||
model, paths = treeselection.get_selected_rows()
|
||||
if paths:
|
||||
self.data_tree.drag_data_get(paths[0], selection)
|
||||
|
||||
def add_project(self, project):
|
||||
"""Dependency injection."""
|
||||
self.data_tree = project.data_tree
|
||||
self.set_model(project.data_tree)
|
||||
self.data_tree.connect('row-changed',self.on_row_changed)
|
||||
|
||||
def on_selection_changed(self, selection):
|
||||
"""Update the list of currently selected datasets."""
|
||||
|
||||
# update prev selection right away in case of multiple events
|
||||
model, paths = selection.get_selected_rows()
|
||||
if not paths: # a plot is marked: do nothing
|
||||
return
|
||||
|
||||
tmp = self._previous_selection
|
||||
self._previous_selection = paths
|
||||
tree = self.data_tree
|
||||
|
||||
# set timestamp on newly selected objects
|
||||
[tree.set_value(tree.get_iter(path), 6, time.time())
|
||||
for path in paths if path not in tmp]
|
||||
|
||||
objs = [tree.get_iter(path) for path in paths]
|
||||
objs = [(tree[iter][6], tree[iter][2]) for iter in objs]
|
||||
|
||||
objs.sort()
|
||||
objs = [obj for timestamp, obj in objs]
|
||||
|
||||
if objs and isinstance(objs[0], dataset.Dataset):
|
||||
logger.debug('Selecting dataset')
|
||||
main.project.current_data = objs
|
||||
else:
|
||||
logger.debug('Deselecting dataset')
|
||||
main.project.current_data = []
|
||||
|
||||
def on_row_changed(self, treestore, pos, iter):
|
||||
"""Set correct focus and colours when rows have changed."""
|
||||
obj = treestore[iter][2]
|
||||
obj_type = treestore[iter][1]
|
||||
|
||||
if not (obj or obj_type):
|
||||
return
|
||||
self.expand_to_path(pos)
|
||||
|
||||
if isinstance(obj, dataset.Dataset):
|
||||
self.set_cursor(pos)
|
||||
self.grab_focus()
|
||||
|
||||
def on_row_activated(self, widget, path, column):
|
||||
tree_iter = self.data_tree.get_iter(path)
|
||||
obj = self.data_tree.get_value(tree_iter, 2)
|
||||
|
||||
if isinstance(obj, plots.Plot):
|
||||
logger.debug('Activating plot')
|
||||
main.application.change_plot(obj)
|
||||
elif isinstance(obj, dataset.Dataset):
|
||||
pass
|
||||
elif obj == None:
|
||||
children = []
|
||||
i = self.data_tree.iter_children(tree_iter)
|
||||
while i:
|
||||
child = self.data_tree.get(i, 2)[0]
|
||||
if isinstance(child, plots.Plot):
|
||||
children.append(child)
|
||||
i = self.data_tree.iter_next(i)
|
||||
main.application.change_plots(children)
|
||||
else:
|
||||
t = type(obj)
|
||||
logger.notice('Activated datatype was %s. Don\'t know what to do.' % t)
|
||||
|
||||
def popup_menu(self, *rest):
|
||||
self.menu.popup(None, None, None, 0, 0)
|
||||
|
||||
def on_mouse_event(self, widget, event):
|
||||
path = widget.get_path_at_pos(int(event.x), int(event.y))
|
||||
|
||||
if path:
|
||||
iter = self.data_tree.get_iter(path[0])
|
||||
obj = self.data_tree.get_value(iter, 2)
|
||||
else:
|
||||
iter = None
|
||||
obj = None
|
||||
|
||||
if isinstance(obj, dataset.Dataset):
|
||||
self.menu.set_dataset(obj, iter)
|
||||
else:
|
||||
self.menu.set_dataset(None, iter)
|
||||
|
||||
if event.button == 3:
|
||||
self.menu.popup(None, None, None, event.button, event.time)
|
||||
|
||||
def on_cursor_changed(self, widget):
|
||||
"""Update statusbar to contain dataset information.
|
||||
|
||||
Lists the dimensions of a dataset in the statusbar of the program
|
||||
if a dataset is focused in the navigator.
|
||||
"""
|
||||
path = widget.get_cursor()[0]
|
||||
tree_iter = self.data_tree.get_iter(path)
|
||||
obj = self.data_tree.get_value(tree_iter, 2)
|
||||
|
||||
if isinstance(obj, dataset.Dataset):
|
||||
dims = zip(obj.get_dim_name(), obj.shape)
|
||||
dim_text = ", ".join(["%s (%d)" % dim for dim in dims])
|
||||
else:
|
||||
dim_text = ""
|
||||
main.application['appbar1'].push(dim_text)
|
||||
|
||||
|
||||
class NavigatorMenu(gtk.Menu):
|
||||
def __init__(self, navigator):
|
||||
gtk.Menu.__init__(self)
|
||||
self.navigator = navigator
|
||||
self.dataset = None
|
||||
self.tree_iter = None
|
||||
|
||||
# Populate main menu
|
||||
self.load_item = gtk.MenuItem('Load dataset')
|
||||
self.load_item.connect('activate', self.on_load_dataset, navigator)
|
||||
self.append(self.load_item)
|
||||
self.load_item.show()
|
||||
|
||||
self.save_item = gtk.MenuItem('Save dataset')
|
||||
self.save_item.connect('activate', self.on_save_dataset, navigator)
|
||||
self.append(self.save_item)
|
||||
self.save_item.show()
|
||||
|
||||
self.delete_item = gtk.MenuItem('Delete')
|
||||
self.delete_item.connect('activate', self.on_delete, navigator)
|
||||
self.append(self.delete_item)
|
||||
self.delete_item.show()
|
||||
|
||||
self.split_item = gtk.MenuItem('Split on selection')
|
||||
self.split_item.connect('activate', self.on_split, navigator)
|
||||
self.append(self.split_item)
|
||||
self.split_item.show()
|
||||
|
||||
# Build transform sub menu
|
||||
self.trans_menu = gtk.Menu()
|
||||
|
||||
self.trans_tr_item = gtk.MenuItem('Transpose')
|
||||
self.trans_tr_item.connect('activate', self.on_transpose, navigator)
|
||||
self.trans_menu.append(self.trans_tr_item)
|
||||
self.trans_tr_item.show()
|
||||
|
||||
self.trans_stdr_item = gtk.MenuItem('Std. rows')
|
||||
self.trans_stdr_item.connect('activate', self.on_standardise_rows, navigator)
|
||||
self.trans_menu.append(self.trans_stdr_item)
|
||||
self.trans_stdr_item.show()
|
||||
|
||||
self.trans_stdc_item = gtk.MenuItem('Std. cols')
|
||||
self.trans_stdc_item.connect('activate', self.on_standardise_cols, navigator)
|
||||
self.trans_menu.append(self.trans_stdc_item)
|
||||
self.trans_stdc_item.show()
|
||||
|
||||
self.trans_log_item = gtk.MenuItem('Log')
|
||||
self.trans_log_item.connect('activate', self.on_log, navigator)
|
||||
self.trans_menu.append(self.trans_log_item)
|
||||
self.trans_log_item.show()
|
||||
|
||||
self.trans_item = gtk.MenuItem("Transformation")
|
||||
self.append(self.trans_item)
|
||||
self.trans_item.set_submenu(self.trans_menu)
|
||||
self.trans_item.show()
|
||||
|
||||
# Build plot sub menu
|
||||
self.plot_menu = gtk.Menu()
|
||||
|
||||
self.plot_image_item = gtk.MenuItem('Image Plot')
|
||||
self.plot_image_item.connect('activate', self.on_plot_image, navigator)
|
||||
self.plot_menu.append(self.plot_image_item)
|
||||
self.plot_image_item.show()
|
||||
|
||||
self.plot_hist_item = gtk.MenuItem('Histogram')
|
||||
self.plot_hist_item.connect('activate', self.on_plot_hist, navigator)
|
||||
self.plot_menu.append(self.plot_hist_item)
|
||||
self.plot_hist_item.show()
|
||||
|
||||
self.plot_scatter_item = gtk.MenuItem('Scatter')
|
||||
self.plot_scatter_item.connect('activate', self.on_plot_scatter, navigator)
|
||||
self.plot_menu.append(self.plot_scatter_item)
|
||||
self.plot_scatter_item.show()
|
||||
|
||||
self.plot_line_item = gtk.MenuItem('Line view')
|
||||
self.plot_line_item.connect('activate', self.on_plot_line, navigator)
|
||||
self.plot_menu.append(self.plot_line_item)
|
||||
self.plot_line_item.show()
|
||||
|
||||
self.plot_item = gtk.MenuItem('Plot')
|
||||
self.append(self.plot_item)
|
||||
self.plot_item.set_submenu(self.plot_menu)
|
||||
self.plot_item.show()
|
||||
|
||||
def set_dataset(self, ds, it):
|
||||
self.dataset = ds
|
||||
self.tree_iter = it
|
||||
|
||||
if ds == None:
|
||||
self.save_item.set_property('sensitive', False)
|
||||
self.plot_item.set_property('sensitive', False)
|
||||
self.trans_item.set_property('sensitive', False)
|
||||
else:
|
||||
self.save_item.set_property('sensitive', True)
|
||||
self.plot_item.set_property('sensitive', True)
|
||||
self.trans_item.set_property('sensitive', True)
|
||||
|
||||
def load_dataset(self, filename):
|
||||
"""Load the dataset from the given file and add it to the project."""
|
||||
ds = dataset.read_ftsv(filename)
|
||||
|
||||
if isinstance(ds, dataset.GraphDataset):
|
||||
icon = fluents.icon_factory.get("graph_dataset")
|
||||
elif isinstance(ds, dataset.CategoryDataset):
|
||||
icon = fluents.icon_factory.get("category_dataset")
|
||||
else:
|
||||
icon = fluents.icon_factory.get("dataset")
|
||||
|
||||
main.project.add_dataset(ds)
|
||||
main.project.data_tree_insert(None, ds.get_name(), ds, None, "black", icon)
|
||||
|
||||
def on_load_dataset(self, item, navigator):
|
||||
# Set up file chooser.
|
||||
dialog = gtk.FileChooserDialog('Load dataset')
|
||||
dialog.set_action(gtk.FILE_CHOOSER_ACTION_OPEN)
|
||||
dialog.add_buttons(gtk.STOCK_CANCEL, gtk.RESPONSE_CANCEL,
|
||||
gtk.STOCK_OPEN, gtk.RESPONSE_OK)
|
||||
dialog.set_select_multiple(True)
|
||||
dialog.set_current_folder(main.options.datadir)
|
||||
|
||||
retval = dialog.run()
|
||||
if retval in [gtk.RESPONSE_CANCEL, gtk.RESPONSE_DELETE_EVENT]:
|
||||
pass
|
||||
elif retval == gtk.RESPONSE_OK:
|
||||
for filename in dialog.get_filenames():
|
||||
self.load_dataset(filename)
|
||||
else:
|
||||
print "unknown; ", retval
|
||||
dialog.destroy()
|
||||
|
||||
def on_save_dataset(self, item, navigator):
|
||||
dialog = gtk.FileChooserDialog('Save dataset')
|
||||
dialog.set_action(gtk.FILE_CHOOSER_ACTION_SAVE)
|
||||
dialog.add_buttons(gtk.STOCK_CANCEL, gtk.RESPONSE_CANCEL, gtk.STOCK_SAVE, gtk.RESPONSE_OK)
|
||||
dialog.set_current_name("%s.ftsv" % self.dataset.get_name())
|
||||
retval = dialog.run()
|
||||
if retval in [gtk.RESPONSE_CANCEL, gtk.RESPONSE_DELETE_EVENT]:
|
||||
logger.debug("Cancelled save dataset")
|
||||
elif retval == gtk.RESPONSE_OK:
|
||||
logger.debug("Saving dataset as: %s" % dialog.get_filename())
|
||||
fd = open(dialog.get_filename(), 'w')
|
||||
dataset.write_ftsv(fd, self.dataset)
|
||||
fd.close()
|
||||
else:
|
||||
print "unknown; ", retval
|
||||
dialog.destroy()
|
||||
|
||||
def on_delete(self, item, navigator):
|
||||
tm, rows = navigator.get_selection().get_selected_rows()
|
||||
iters = [tm.get_iter(r) for r in rows]
|
||||
iters.reverse()
|
||||
for i in iters:
|
||||
main.project.delete_data(i)
|
||||
# tm.remove(i)
|
||||
|
||||
def on_plot_image(self, item, navigator):
|
||||
plot = plots.ImagePlot(self.dataset, name='Image Plot')
|
||||
icon = fluents.icon_factory.get("line_plot")
|
||||
main.project.data_tree_insert(self.tree_iter, 'Image Plot', plot, None, "black", icon)
|
||||
# fixme: image plot selections are not well defined
|
||||
#plot.set_selection_listener(project.set_selection)
|
||||
#project._selection_observers.append(plot)
|
||||
|
||||
def on_plot_hist(self, item, navigator):
|
||||
project = main.project
|
||||
plot = plots.HistogramPlot(self.dataset, name='Histogram')
|
||||
icon = fluents.icon_factory.get("line_plot")
|
||||
project.data_tree_insert(self.tree_iter, 'Histogram', plot, None, "black", icon)
|
||||
plot.set_selection_listener(project.set_selection)
|
||||
project._selection_observers.append(plot)
|
||||
|
||||
def on_plot_scatter(self, item, navigator):
|
||||
project = main.project
|
||||
datasets = main.project.current_data
|
||||
ds_major = datasets[0]
|
||||
dims_major = ds_major.get_dim_name()
|
||||
ids_major = ds_major.get_identifiers(dims_major[1], sorted=True)
|
||||
if len(datasets) > 1:
|
||||
# If there is more than one active dataset -> try to use the two first
|
||||
ds_minor = datasets[1]
|
||||
dims_minor = ds_minor.get_dim_name()
|
||||
if dims_minor != dims_major or ds_minor.shape[0] != ds_major.shape[0]:
|
||||
# the selected datasets are not matched -> use initial selected
|
||||
ds_minor = ds_major
|
||||
else:
|
||||
#Only one dataset selected
|
||||
ds_minor = ds_major
|
||||
|
||||
plot = plots.ScatterPlot(ds_major, ds_minor,
|
||||
dims_major[0], dims_major[1],
|
||||
ids_major[0], ids_major[1],
|
||||
name='Scatter (%s)' % ds_major.get_name())
|
||||
plot.add_axes_spin_buttons(len(ids_major), 0, 1)
|
||||
icon = fluents.icon_factory.get("line_plot")
|
||||
project.data_tree_insert(self.tree_iter, 'Scatter', plot, None, "black", icon)
|
||||
plot.set_selection_listener(project.set_selection)
|
||||
project._selection_observers.append(plot)
|
||||
|
||||
def on_plot_line(self, item, navigator):
|
||||
project = main.project
|
||||
ds = self.dataset
|
||||
dims = ds.get_dim_name()
|
||||
ids = ds.get_identifiers(dims[1])
|
||||
plot = plots.LineViewPlot(ds, name='Line (%s)' % ds.get_name())
|
||||
icon = fluents.icon_factory.get("line_plot")
|
||||
project.data_tree_insert(self.tree_iter, 'Line view', plot, None, "black", icon)
|
||||
plot.set_selection_listener(project.set_selection)
|
||||
project._selection_observers.append(plot)
|
||||
|
||||
def on_transpose(self, item, navigator):
|
||||
project = main.project
|
||||
ds = self.dataset.transpose()
|
||||
ds._name = ds._name + ".T"
|
||||
icon = fluents.icon_factory.get(ds)
|
||||
project.data_tree_insert(self.tree_iter, ds.get_name(), ds, None, "black", icon)
|
||||
|
||||
def on_standardise_rows(self, item, navigator):
|
||||
project = main.project
|
||||
ds = self.dataset.copy()
|
||||
ds._name = self.dataset._name + ".rsc"
|
||||
axis = 1
|
||||
ds._array = ds.asarray()/scipy.expand_dims(ds.asarray().std(axis), axis)
|
||||
icon = fluents.icon_factory.get(ds)
|
||||
project.data_tree_insert(self.tree_iter, ds.get_name(), ds, None, "black", icon)
|
||||
|
||||
def on_standardise_cols(self, item, navigator):
|
||||
project = main.project
|
||||
ds = self.dataset.copy()
|
||||
ds._name = self.dataset._name + ".csc"
|
||||
axis = 0
|
||||
ds._array = ds.asarray()/scipy.expand_dims(ds.asarray().std(axis), axis)
|
||||
icon = fluents.icon_factory.get(ds)
|
||||
project.data_tree_insert(self.tree_iter, ds.get_name(), ds, None, "black", icon)
|
||||
|
||||
def on_log(self, item, navigator):
|
||||
project = main.project
|
||||
try:
|
||||
if not scipy.all(self.dataset.asarray()>0):
|
||||
raise ValueError
|
||||
except:
|
||||
logger.log('warning', 'Datasets needs to be strictly positive for a log transform')
|
||||
return
|
||||
|
||||
ds = self.dataset.copy()
|
||||
ds._array = scipy.log(ds.asarray())
|
||||
icon = fluents.icon_factory.get(ds)
|
||||
ds._name = ds._name + ".log"
|
||||
project.data_tree_insert(self.tree_iter, ds.get_name(), ds, None, "black", icon)
|
||||
|
||||
def on_split(self, item, navigator):
|
||||
if self.dataset is None:
|
||||
logger.warn("Only datasets can be split.")
|
||||
return
|
||||
|
||||
dim = self.dataset.get_dim_name(0)
|
||||
|
||||
project = main.project
|
||||
sel_ids = set(project.get_selection()[dim])
|
||||
sel_ds = self.dataset.subdata(dim, sel_ids)
|
||||
|
||||
unsel_ids = set(self.dataset.get_identifiers(dim)) - set(sel_ids)
|
||||
unsel_ds = self.dataset.subdata(dim, unsel_ids)
|
||||
|
||||
icon = fluents.icon_factory.get(self.dataset)
|
||||
project.data_tree_insert(self.tree_iter, 'Selected', sel_ds, None, "black", icon)
|
||||
project.data_tree_insert(self.tree_iter, 'Unselected', unsel_ds, None, "black", icon)
|
||||
7
laydi/paths.py.m4
Normal file
7
laydi/paths.py.m4
Normal file
@@ -0,0 +1,7 @@
|
||||
|
||||
PREFIX = "M4_PREFIX"
|
||||
BINDIR = "M4_BINDIR"
|
||||
DATADIR = "M4_DATADIR"
|
||||
DOCDIR = "M4_DOCDIR"
|
||||
PYDIR = "M4_PYDIR"
|
||||
|
||||
1138
laydi/pca_options.glade
Normal file
1138
laydi/pca_options.glade
Normal file
File diff suppressed because it is too large
Load Diff
1195
laydi/plots.py
Normal file
1195
laydi/plots.py
Normal file
File diff suppressed because it is too large
Load Diff
1092
laydi/pls_options.glade
Normal file
1092
laydi/pls_options.glade
Normal file
File diff suppressed because it is too large
Load Diff
154
laydi/project.py
Normal file
154
laydi/project.py
Normal file
@@ -0,0 +1,154 @@
|
||||
import os
|
||||
import scipy
|
||||
import gobject
|
||||
import gtk
|
||||
import fluents
|
||||
import logger, dataset, plots, main
|
||||
|
||||
class Project:
|
||||
"""A Project contains datasets, selections etc.
|
||||
The project, of which the application has only one at any given time,
|
||||
is the container for all datasets, plots and selections in use. The data
|
||||
in the project is organized in a gtk.TreeStrore that is displayed in the
|
||||
navigator.
|
||||
"""
|
||||
|
||||
def __init__(self, name="Testing"):
|
||||
self.data_tree = gtk.TreeStore(str,
|
||||
str,
|
||||
object,
|
||||
str,
|
||||
str,
|
||||
gobject.TYPE_OBJECT,
|
||||
float)
|
||||
|
||||
self.name = name
|
||||
self.dim_names = []
|
||||
self._selection_observers = []
|
||||
self._dataset_observers = []
|
||||
self.current_data = []
|
||||
self.datasets = []
|
||||
self.sel_obj = dataset.Selection('Current Selection')
|
||||
self.selections = []
|
||||
self._last_selection = None
|
||||
self._dataset_iter_map = {}
|
||||
|
||||
def add_selection_observer(self, observer):
|
||||
self._selection_observers.append(observer)
|
||||
observer.selection_changed(None, self.get_selection())
|
||||
|
||||
def notify_selection_listeners(self, dim_name):
|
||||
"""Notifies observers"""
|
||||
for observer in self._selection_observers:
|
||||
observer.selection_changed(dim_name, self.get_selection())
|
||||
|
||||
def add_dataset_observer(self, observer):
|
||||
self._dataset_observers.append(observer)
|
||||
observer.dataset_changed()
|
||||
|
||||
def notify_dataset_listeners(self):
|
||||
"""Notifies observers when new datasets are added"""
|
||||
for observer in self._dataset_observers:
|
||||
observer.dataset_changed()
|
||||
|
||||
def set_selection(self, dim_name, selection):
|
||||
"""Sets a current selection and notify observers"""
|
||||
self.sel_obj[dim_name] = set(selection)
|
||||
self.notify_selection_listeners(dim_name)
|
||||
self._last_selection = selection
|
||||
|
||||
def get_selection(self):
|
||||
"""Returns the current selection object"""
|
||||
return self.sel_obj
|
||||
|
||||
def delete_data(self, it):
|
||||
"""Delete elements from the project."""
|
||||
child = self.data_tree.iter_children(it)
|
||||
while child != None:
|
||||
c = self.data_tree.iter_next(child)
|
||||
self.delete_data(child)
|
||||
child = c
|
||||
main.application.main_view.remove_view(self.data_tree.get(it, 2)[0])
|
||||
self.data_tree.remove(it)
|
||||
|
||||
def add_data(self, parents, data, fun='Function'):
|
||||
"""Adds a set of data and plots to the navigator.
|
||||
|
||||
This method is usually called after a Function in a workflow
|
||||
has finished and returns its output."""
|
||||
|
||||
if len(parents) > 0:
|
||||
parent_iter = self._dataset_iter_map[parents[0]]
|
||||
else:
|
||||
parent_iter = None
|
||||
|
||||
# Add the function node to the tree
|
||||
icon = fluents.icon_factory.get("folder_grey")
|
||||
it = self.data_tree_insert(parent_iter, fun, None, None, "black", icon)
|
||||
|
||||
# Add all returned datasets/plots/selections
|
||||
for d in data:
|
||||
# Any kind of dataset
|
||||
if isinstance(d, dataset.Dataset):
|
||||
if isinstance(d, dataset.GraphDataset):
|
||||
icon = fluents.icon_factory.get("graph_dataset")
|
||||
elif isinstance(d, dataset.CategoryDataset):
|
||||
icon = fluents.icon_factory.get("category_dataset")
|
||||
else:
|
||||
icon = fluents.icon_factory.get("dataset")
|
||||
|
||||
self.add_dataset(d)
|
||||
self.data_tree_insert(it, d.get_name(), d, None, "black", icon)
|
||||
|
||||
# Any kind of plot
|
||||
elif isinstance(d, plots.Plot):
|
||||
icon = fluents.icon_factory.get("line_plot")
|
||||
self.data_tree_insert(it, d.get_title(), d, None, "black", icon)
|
||||
d.set_selection_listener(self.set_selection)
|
||||
self._selection_observers.append(d)
|
||||
|
||||
# Selections are not added to the data tree
|
||||
elif isinstance(d, dataset.Selection):
|
||||
self.add_selection(d)
|
||||
|
||||
def data_tree_insert(self, parent, text, data, bg, fg, icon, selected = 0):
|
||||
"""Inserts data into the tree view.
|
||||
@param text: The title of the object.
|
||||
@param data: A dataset, plot or function object.
|
||||
@param bg: Background color.
|
||||
@param fg: Foreground (font) color.
|
||||
@param icon: Pixmap icon.
|
||||
"""
|
||||
tree = self.data_tree
|
||||
it = tree.append(parent)
|
||||
tree[it] = [text, type(data), data, bg, fg, icon, selected]
|
||||
self._dataset_iter_map[data] = it
|
||||
return it
|
||||
|
||||
def add_dataset(self, dataset):
|
||||
"""Appends a new Dataset to the project."""
|
||||
logger.log('debug','Adding dataset: %s' %dataset.get_name())
|
||||
self.datasets.append(dataset)
|
||||
for dim_name in dataset.get_all_dims():
|
||||
if dim_name not in self.dim_names:
|
||||
self.dim_names.append(dim_name)
|
||||
self.sel_obj[dim_name] = set()
|
||||
self.notify_selection_listeners(dim_name)
|
||||
self.notify_dataset_listeners()
|
||||
|
||||
def add_selection(self, selection):
|
||||
"""Adds a new selection to the project."""
|
||||
self.selections.append(selection)
|
||||
self.notify_dataset_listeners()
|
||||
|
||||
def object_at(self, path):
|
||||
"""Returns the object at a given path in the tree."""
|
||||
it = self.get_iter(path)
|
||||
obj = self[it][2]
|
||||
if obj:
|
||||
obj.show()
|
||||
return obj
|
||||
|
||||
#def set_current_data(self, obj):
|
||||
# self.current_data = obj
|
||||
|
||||
659
laydi/selections.py
Normal file
659
laydi/selections.py
Normal file
@@ -0,0 +1,659 @@
|
||||
import pygtk
|
||||
import gtk
|
||||
import gtk.gdk
|
||||
import gtk.glade
|
||||
import gnome
|
||||
import gnome.ui
|
||||
import gobject
|
||||
import scipy
|
||||
|
||||
import logger, dataset, main
|
||||
import annotations
|
||||
from lib import hypergeom
|
||||
|
||||
|
||||
class SimpleMenu(gtk.Menu):
|
||||
def __init__(self):
|
||||
gtk.Menu.__init__(self)
|
||||
|
||||
def add_simple_item(self, title, function, *args):
|
||||
item = gtk.MenuItem(title)
|
||||
item.connect('activate', function, *args)
|
||||
self.append(item)
|
||||
item.show()
|
||||
|
||||
|
||||
class IdListController:
|
||||
"""Controller class for the identifier list."""
|
||||
|
||||
def __init__(self, idlist):
|
||||
self._idlist = idlist
|
||||
self._idlist.get_selection().set_mode(gtk.SELECTION_MULTIPLE)
|
||||
self._idlist.set_rubber_banding(True)
|
||||
|
||||
# dimname: current_annotation_name
|
||||
self._annotation = {}
|
||||
|
||||
# current dimension
|
||||
self._dimension = None
|
||||
|
||||
# id, annotation
|
||||
self._idstore = gtk.ListStore(gobject.TYPE_STRING,
|
||||
gobject.TYPE_STRING)
|
||||
self._idstore.set_sort_func(0, self._numeric_compare)
|
||||
|
||||
# Annotation tree column
|
||||
self._annotation_column = None
|
||||
|
||||
## Set up identifier list
|
||||
idlist.set_model(self._idstore)
|
||||
|
||||
renderer = gtk.CellRendererText()
|
||||
dim_column = gtk.TreeViewColumn('Identifiers', renderer, text=0)
|
||||
dim_column.set_sort_indicator(True)
|
||||
dim_column.set_sort_column_id(0)
|
||||
dim_column.set_sort_order(gtk.SORT_ASCENDING)
|
||||
idlist.insert_column(dim_column, 0)
|
||||
idlist.connect('button-press-event', self._button_pressed)
|
||||
|
||||
## Enable dropping
|
||||
idlist.drag_dest_set(gtk.DEST_DEFAULT_ALL,
|
||||
[("GTK_TREE_MODEL_ROW", gtk.TARGET_SAME_APP, 7)],
|
||||
gtk.gdk.ACTION_LINK)
|
||||
idlist.connect('drag-data-received', self._drag_data_received)
|
||||
|
||||
## Set up identifier list context menu
|
||||
menu = self._menu = SimpleMenu()
|
||||
menu.add_simple_item('Import...', self._on_import_list)
|
||||
menu.add_simple_item('Export...', self._on_export_list)
|
||||
menu.add_simple_item('Add to selection', self._on_make_selection)
|
||||
item = gtk.MenuItem('Show annotations')
|
||||
menu.append(item)
|
||||
item.show()
|
||||
self._menu_ann = item
|
||||
|
||||
##
|
||||
## Public interface
|
||||
##
|
||||
def set_dimension(self, dimname):
|
||||
"""Set dimension"""
|
||||
if dimname == self._dimension:
|
||||
return
|
||||
|
||||
self._dimension = dimname
|
||||
self.set_annotation(self._annotation.get(dimname, None))
|
||||
|
||||
if not self._annotation.has_key(dimname):
|
||||
self._annotation[dimname] = None
|
||||
|
||||
def set_annotation(self, annotation):
|
||||
"""Set the displayed annotation to annotation. If annotation is None,
|
||||
the annotation column is hidden. Otherwise the annotation column is
|
||||
shown and filled with values from the given annotation field."""
|
||||
|
||||
if annotation == None:
|
||||
if self._annotation_column != None:
|
||||
self._idlist.remove_column(self._annotation_column)
|
||||
self._annotation_column = None
|
||||
else:
|
||||
|
||||
idlist = [x[0] for x in self._idstore]
|
||||
annlist = annotations.get_dim_annotations(self._dimension,
|
||||
annotation,
|
||||
idlist)
|
||||
|
||||
for i, x in enumerate(self._idstore):
|
||||
x[1] = annlist[i]
|
||||
|
||||
if self._annotation_column == None:
|
||||
renderer = gtk.CellRendererText()
|
||||
col = gtk.TreeViewColumn(annotation, renderer, text=1)
|
||||
col.set_sort_indicator(True)
|
||||
col.set_sort_column_id(1)
|
||||
col.set_sort_order(gtk.SORT_ASCENDING)
|
||||
self._idlist.append_column(col)
|
||||
self._annotation_column = col
|
||||
self._annotation_column.set_title(annotation)
|
||||
|
||||
self._annotation[self._dimension] = annotation
|
||||
|
||||
def set_selection(self, selection):
|
||||
"""Set the selection to be displayed.
|
||||
The selection is not stored, the values are copied into the TreeStore"""
|
||||
self._idstore.clear()
|
||||
|
||||
# Return if no selection
|
||||
if selection == None:
|
||||
return
|
||||
|
||||
# Otherwise show selection, possibly with annotations.
|
||||
#id_list = list(selection[self._dimension])
|
||||
idlist = list(selection[self._dimension])
|
||||
if self._annotation[self._dimension] != None:
|
||||
annlist = annotations.get_dim_annotations(self._dimension,
|
||||
self._annotation[self._dimension],
|
||||
idlist)
|
||||
for id, ann in zip(idlist, annlist):
|
||||
self._idstore.append((id, ann))
|
||||
else:
|
||||
for e in idlist:
|
||||
self._idstore.append((e, None))
|
||||
|
||||
##
|
||||
## Private interface
|
||||
##
|
||||
def _update_annotations_menu(self):
|
||||
"""Updates the annotations menu with the available annotations for the
|
||||
current dim."""
|
||||
|
||||
dim_h = annotations.get_dim_handler(self._dimension)
|
||||
if not dim_h:
|
||||
print "set_sensitive(False)"
|
||||
self._menu_ann.set_sensitive(False)
|
||||
else:
|
||||
annotations_menu = gtk.Menu()
|
||||
print "set_sensitive(True)"
|
||||
self._menu_ann.set_sensitive(True)
|
||||
dh = annotations.get_dim_handler(self._dimension)
|
||||
ann_names = dh.get_annotation_names()
|
||||
|
||||
for ann in ann_names:
|
||||
item = gtk.MenuItem(ann)
|
||||
item.connect('activate', self._on_annotation_activated, ann)
|
||||
annotations_menu.append(item)
|
||||
item.show()
|
||||
|
||||
self._menu_ann.set_submenu(annotations_menu)
|
||||
|
||||
|
||||
def import_annotation_file(self):
|
||||
"""Pops up a file dialog and ask the user to select the annotation
|
||||
file to be loaded. Only one file can be selected. The file is loaded
|
||||
into a annotations.AnnotationDictHandler object"""
|
||||
|
||||
dialog = gtk.FileChooserDialog('Load annotations')
|
||||
dialog.set_action(gtk.FILE_CHOOSER_ACTION_OPEN)
|
||||
dialog.add_buttons(gtk.STOCK_CANCEL, gtk.RESPONSE_CANCEL,
|
||||
gtk.STOCK_OPEN, gtk.RESPONSE_OK)
|
||||
dialog.set_select_multiple(True)
|
||||
retval = dialog.run()
|
||||
if retval in [gtk.RESPONSE_CANCEL, gtk.RESPONSE_DELETE_EVENT]:
|
||||
pass
|
||||
elif retval == gtk.RESPONSE_OK:
|
||||
for filename in dialog.get_filenames():
|
||||
annotations.read_annotations_file(filename)
|
||||
else:
|
||||
print "unknown; ", retval
|
||||
dialog.destroy()
|
||||
|
||||
def export_annotations(self):
|
||||
"""Pops up a file dialog and ask the user to select a file to save
|
||||
the currently displayed annotations to.
|
||||
"""
|
||||
|
||||
dialog = gtk.FileChooserDialog('Load annotations')
|
||||
dialog.set_action(gtk.FILE_CHOOSER_ACTION_SAVE)
|
||||
dialog.add_buttons(gtk.STOCK_CANCEL, gtk.RESPONSE_CANCEL,
|
||||
gtk.STOCK_SAVE, gtk.RESPONSE_OK)
|
||||
retval = dialog.run()
|
||||
if retval in [gtk.RESPONSE_CANCEL, gtk.RESPONSE_DELETE_EVENT]:
|
||||
pass
|
||||
elif retval == gtk.RESPONSE_OK:
|
||||
filename = dialog.get_filename()
|
||||
fd = open(filename, 'w')
|
||||
dim = self._dimension
|
||||
print >> fd, "%s\t%s" % (dim, self._annotation[dim])
|
||||
for id, value in self._idstore:
|
||||
print >> fd, "%s\t%s" % (id, value)
|
||||
fd.close()
|
||||
else:
|
||||
print "unknown; ", retval
|
||||
dialog.destroy()
|
||||
|
||||
def set_rank(self, ds):
|
||||
print "Set rank."
|
||||
|
||||
ra = scipy.sum(ds.asarray(), 1)
|
||||
ranks = {}
|
||||
dim = ds.get_dim_name()[0]
|
||||
for key, value in ds[dim].items():
|
||||
ranks[key] = ra[value]
|
||||
|
||||
ann_h = annotations.get_dim_handler(self._dimension)
|
||||
if ann_h is None:
|
||||
ann_h = annotations.DictAnnotationHandler()
|
||||
annotations.set_dim_handler(self._dimension, ann_h)
|
||||
|
||||
ann_h.add_annotations('Rank', ranks)
|
||||
|
||||
##
|
||||
## GTK Callbacks
|
||||
##
|
||||
|
||||
def _numeric_compare(self, treemodel, iter1, iter2):
|
||||
column = treemodel.get_sort_column_id()[0]
|
||||
|
||||
item1 = treemodel.get_value(iter1, column)
|
||||
item2 = treemodel.get_value(iter2, column)
|
||||
|
||||
try:
|
||||
item1 = float(item1)
|
||||
item2 = float(item2)
|
||||
except:
|
||||
logger.log("notice", "Could not convert to float: %s, %s" %(item1, item2))
|
||||
|
||||
return cmp(item1, item2)
|
||||
|
||||
def _popup_menu(self, *rest):
|
||||
self._update_annotations_menu()
|
||||
self._menu.popup(None, None, None, 0, 0)
|
||||
|
||||
def _on_annotation_activated(self, menuitem, annotation):
|
||||
self.set_annotation(annotation)
|
||||
|
||||
def _button_pressed(self, widget, event):
|
||||
if event.button == 3:
|
||||
self._update_annotations_menu()
|
||||
self._menu.popup(None, None, None, event.button, event.time)
|
||||
|
||||
def _on_export_list(self, menuitem):
|
||||
self.export_annotations()
|
||||
|
||||
def _on_import_list(self, menuitem):
|
||||
self.import_annotation_file()
|
||||
|
||||
def _on_make_selection(self, menuitem):
|
||||
selection = self._idlist.get_selection()
|
||||
model, paths = selection.get_selected_rows()
|
||||
if paths==None: return
|
||||
iters = [self._idstore.get_iter(p) for p in paths]
|
||||
ids = [self._idstore.get_value(i, 0) for i in iters]
|
||||
main.project.set_selection(self._dimension, ids)
|
||||
|
||||
def _drag_data_received(self, widget, drag_context, x, y,
|
||||
selection, info, timestamp):
|
||||
treestore, path = selection.tree_get_row_drag_data()
|
||||
i = treestore.get_iter(path)
|
||||
obj = treestore.get_value(i, 2)
|
||||
if isinstance(obj, dataset.Dataset):
|
||||
if self._dimension in obj.get_dim_name():
|
||||
self.set_rank(obj)
|
||||
widget.emit_stop_by_name('drag-data-received')
|
||||
|
||||
|
||||
class SelectionListController:
|
||||
def __init__(self, seltree, idlist_controller):
|
||||
self._seltree = seltree
|
||||
self._sel_stores = {}
|
||||
self._detail_cols = []
|
||||
self._dimension = None
|
||||
self._idlist_controller = idlist_controller
|
||||
self._details_on = False
|
||||
|
||||
# Selection column
|
||||
renderer = gtk.CellRendererText()
|
||||
sel_column = gtk.TreeViewColumn('Selection', renderer, text=0)
|
||||
sel_column.set_resizable(True)
|
||||
sel_column.set_max_width(200)
|
||||
seltree.insert_column(sel_column, 0)
|
||||
|
||||
# Detail columns
|
||||
cols = [('In CS', 3), ('All', 4), ('Rank', 5)]
|
||||
for name, store_col_num in cols:
|
||||
col = gtk.TreeViewColumn(name, renderer, text=store_col_num)
|
||||
col.set_sort_indicator(True)
|
||||
col.set_sort_column_id(store_col_num)
|
||||
col.set_sort_order(gtk.SORT_ASCENDING)
|
||||
|
||||
self._detail_cols.append(col)
|
||||
# Signals
|
||||
seltree.connect('row-activated', self._on_row_activated)
|
||||
seltree.connect('cursor-changed', self._on_cursor_changed)
|
||||
seltree.connect('button-press-event', self._on_button_pressed)
|
||||
seltree.drag_dest_set(gtk.DEST_DEFAULT_ALL,
|
||||
[("GTK_TREE_MODEL_ROW", gtk.TARGET_SAME_APP, 7)],
|
||||
gtk.gdk.ACTION_LINK)
|
||||
|
||||
seltree.connect('drag-data-received', self._drag_data_received)
|
||||
|
||||
# Selections context menu
|
||||
self._seltree_menu = SimpleMenu()
|
||||
self._seltree_menu.add_simple_item('Sort by selection',
|
||||
self._on_seltree_sort)
|
||||
self._seltree_menu.add_simple_item('Show details',
|
||||
self._enable_details, True)
|
||||
self._seltree_menu.add_simple_item('Hide details',
|
||||
self._enable_details, False)
|
||||
|
||||
#
|
||||
# Public interface
|
||||
#
|
||||
def activate(self):
|
||||
self._seltree.set_cursor((0,))
|
||||
|
||||
def set_project(self, project):
|
||||
"""Dependency injection."""
|
||||
main.project.add_selection_observer(self)
|
||||
|
||||
def set_dimlist_controller(self, dimlist_controller):
|
||||
"""Dependency injection of the dimension list controller."""
|
||||
self._dimlist_controller = dimlist_controller
|
||||
|
||||
def set_dimension(self, dim):
|
||||
"""Set the current dimension, changing the model of the treeview
|
||||
to match dim. After this the current dimension of the identifier list
|
||||
is updated."""
|
||||
self._ensure_selection_store(dim)
|
||||
self._seltree.set_model(self._sel_stores[dim])
|
||||
self._idlist_controller.set_dimension(dim)
|
||||
self._dimension = dim
|
||||
|
||||
def selection_changed(self, dimname, selection):
|
||||
"""Callback function from Project."""
|
||||
for dim in selection.dims():
|
||||
self._ensure_selection_store(dim)
|
||||
store = self._sel_stores[dim]
|
||||
|
||||
if not self._get_current_selection_iter(selection, dim):
|
||||
n = len(selection[dim])
|
||||
values = (selection.title, selection, dim, n, n, 0)
|
||||
store.insert_after(None, None, values)
|
||||
else:
|
||||
# update size of current selection
|
||||
for row in store:
|
||||
if row[1]==selection:
|
||||
row[3] = row[4] = len(selection[dim])
|
||||
|
||||
path = self._seltree.get_cursor()
|
||||
if path and self._sel_stores.has_key(self._dimension):
|
||||
it = self._sel_stores[self._dimension].get_iter(path[0])
|
||||
sel = self._sel_stores[self._dimension].get_value(it, 1)
|
||||
self._idlist_controller.set_selection(sel)
|
||||
|
||||
def add_dataset(self, dataset):
|
||||
"""Converts a CategoryDataset to Selection objects and adds it to
|
||||
the selection tree. The name of the dataset will be the parent
|
||||
node in the tree, and the identifers along the first axis will
|
||||
be added as the names of the subselections."""
|
||||
dim_name = dataset.get_dim_name(0)
|
||||
self._ensure_selection_store(dim_name)
|
||||
store = self._sel_stores[dim_name]
|
||||
di = self._get_dataset_iter(dataset)
|
||||
if not di:
|
||||
n_tot = dataset.shape[0]
|
||||
selection = main.project.get_selection().get(dim_name)
|
||||
ds_idents = dataset.get_identifiers(dim_name)
|
||||
n_cs = len(selection.intersection(ds_idents))
|
||||
values = (dataset.get_name(), dataset, dim_name, n_cs, n_tot, 2)
|
||||
|
||||
i = store.insert_after(None, None, values)
|
||||
for selection in dataset.as_selections():
|
||||
n_sel = len(selection[dim_name])
|
||||
values = (selection.title, selection, dim_name, 0, n_sel, 0)
|
||||
store.insert_after(i, None, values)
|
||||
|
||||
#
|
||||
# Private interface
|
||||
#
|
||||
def _add_selection_store(self, dim):
|
||||
"""Add a new gtk.TreeStore for the selections on a dimension."""
|
||||
# Create new store
|
||||
# Two types of lines, one for CategoryDatasets and one for
|
||||
# Selections. The elements are title, link to dataset or selection,
|
||||
# name of dimension, num. members in selection, num. in
|
||||
# intersection with current selection and the rank of selection.
|
||||
store = gtk.TreeStore(gobject.TYPE_STRING,
|
||||
gobject.TYPE_PYOBJECT,
|
||||
gobject.TYPE_STRING,
|
||||
gobject.TYPE_INT,
|
||||
gobject.TYPE_INT,
|
||||
gobject.TYPE_FLOAT)
|
||||
|
||||
# Set selection store for this dimension
|
||||
self._sel_stores[dim] = store
|
||||
|
||||
def _ensure_selection_store(self, dim):
|
||||
"""Ensure that the object has a gtk.TreeStore for the given dimension"""
|
||||
# Do not overwrite existing stores
|
||||
if self._sel_stores.has_key(dim):
|
||||
return
|
||||
self._add_selection_store(dim)
|
||||
|
||||
def _get_dataset_iter(self, ds):
|
||||
"""Returns the iterator to the selection tree row containing a
|
||||
given dataset."""
|
||||
|
||||
store = self._sel_stores[ds.get_dim_name(0)]
|
||||
|
||||
i = store.get_iter_first()
|
||||
while i:
|
||||
if store.get_value(i, 1) == ds:
|
||||
return i
|
||||
i = store.iter_next(i)
|
||||
return None
|
||||
|
||||
def _get_current_selection_iter(self, selection, dimension):
|
||||
if not self._sel_stores.has_key(dimension):
|
||||
return None
|
||||
|
||||
store = self._sel_stores[dimension]
|
||||
|
||||
i = store.get_iter_first()
|
||||
while i:
|
||||
if store.get_value(i, 1) == selection:
|
||||
if store.get_value(i, 2) == dimension:
|
||||
return i
|
||||
i = store.iter_next(i)
|
||||
return None
|
||||
|
||||
def _sort_selections(self, dataset):
|
||||
"""Ranks selections by intersection with current selection.
|
||||
Ranks determined by the hypergeometric distribution.
|
||||
"""
|
||||
dim_name = dataset.get_dim_name(0)
|
||||
sel_store = self._sel_stores[dim_name]
|
||||
selection_obj = main.project.get_selection()
|
||||
current_selection = selection_obj.get(dim_name)
|
||||
if current_selection==None: return
|
||||
|
||||
pvals = hypergeom.gene_hypergeo_test(current_selection, dataset)
|
||||
|
||||
for row in sel_store:
|
||||
if row[1]==dataset:
|
||||
for child in row.iterchildren():
|
||||
name = child[0]
|
||||
child[3] = pvals[name][0]
|
||||
child[4] = pvals[name][1]
|
||||
child[5] = pvals[name][2]
|
||||
|
||||
sel_store.set_sort_column_id(5, gtk.SORT_ASCENDING)
|
||||
|
||||
#
|
||||
# GTK callbacks
|
||||
#
|
||||
def _enable_details(self, widget, bool):
|
||||
if self._details_on == bool : return
|
||||
self._details_on = bool
|
||||
if bool==True:
|
||||
for col in self._detail_cols:
|
||||
self._seltree.insert_column(col, -1)
|
||||
else:
|
||||
for col in self._detail_cols:
|
||||
self._seltree.remove_column(col)
|
||||
|
||||
def _drag_data_received(self, widget, drag_context, x, y,
|
||||
selection, info, timestamp):
|
||||
|
||||
treestore, path = selection.tree_get_row_drag_data()
|
||||
i = treestore.get_iter(path)
|
||||
obj = treestore.get_value(i, 2)
|
||||
if isinstance(obj, dataset.CategoryDataset):
|
||||
self.add_dataset(obj)
|
||||
self._dimlist_controller.set_dimension(obj.get_dim_name(0))
|
||||
widget.emit_stop_by_name('drag-data-received')
|
||||
|
||||
def _on_cursor_changed(self, widget):
|
||||
"Show the list of identifier strings."
|
||||
store = self._sel_stores[self._dimension]
|
||||
|
||||
p = self._seltree.get_cursor()[0]
|
||||
i = store.get_iter(p)
|
||||
obj = store.get_value(i, 1)
|
||||
|
||||
if isinstance(obj, dataset.Selection):
|
||||
self._idlist_controller.set_selection(obj)
|
||||
else:
|
||||
self._idlist_controller.set_selection(None)
|
||||
|
||||
def _on_row_activated(self, widget, path, column):
|
||||
store = self._sel_stores[self._dimension]
|
||||
i = store.get_iter(path)
|
||||
obj = store.get_value(i, 1)
|
||||
if isinstance(obj, dataset.Dataset):
|
||||
seltree = self._seltree
|
||||
if seltree.row_expanded(path):
|
||||
seltree.collapse_row(path)
|
||||
else:
|
||||
seltree.expand_row(path, True)
|
||||
elif isinstance(obj, dataset.Selection):
|
||||
main.project.set_selection(self._dimension,
|
||||
obj[self._dimension])
|
||||
|
||||
def _on_button_pressed(self, widget, event):
|
||||
"""Button press callbak."""
|
||||
if event.button == 3:
|
||||
self._seltree_menu.popup(None, None, None, event.button, event.time)
|
||||
|
||||
def _on_seltree_sort(self, menuitem):
|
||||
"""Sort selection tree if row is category dataset."""
|
||||
store = self._sel_stores[self._dimension]
|
||||
p = self._seltree.get_cursor()[0]
|
||||
i = store.get_iter(p)
|
||||
obj = store.get_value(i, 1)
|
||||
if isinstance(obj, dataset.CategoryDataset):
|
||||
self._sort_selections(obj)
|
||||
|
||||
|
||||
class DimListController:
|
||||
def __init__(self, dimlist, seltree_controller):
|
||||
|
||||
self._current_dim = None
|
||||
self._seltree_controller = seltree_controller
|
||||
|
||||
self.show_hidden = False
|
||||
|
||||
## dimstore is a list of all dimensions in the application
|
||||
self.dimstore = gtk.ListStore(gobject.TYPE_STRING)
|
||||
|
||||
# filter for hiding dims prefixed with underscore
|
||||
self.dimstore_filter = self.dimstore.filter_new()
|
||||
self.dimstore_filter.set_visible_func(self._dimension_filter)
|
||||
|
||||
## The widgets we are controlling
|
||||
self.dimlist = dimlist
|
||||
|
||||
## Set up dimensions list
|
||||
dimlist.set_model(self.dimstore_filter)
|
||||
|
||||
renderer = gtk.CellRendererText()
|
||||
dim_column = gtk.TreeViewColumn('Dimension', renderer, text=0)
|
||||
dimlist.insert_column(dim_column, 0)
|
||||
|
||||
# Signals
|
||||
dimlist.connect('row-activated', self._dim_row_activated)
|
||||
dimlist.connect('cursor-changed', self._dim_cursor_changed)
|
||||
dimlist.connect('button-press-event', self._dimlist_button_pressed)
|
||||
|
||||
# Set up dimension context menu
|
||||
self._dimlist_menu = SimpleMenu()
|
||||
self._dimlist_menu.add_simple_item('Hide', self._on_dim_hide)
|
||||
self._dimlist_menu.add_simple_item('Show all', self._on_dim_show)
|
||||
|
||||
|
||||
##
|
||||
## Public interface
|
||||
##
|
||||
def set_project(self, project):
|
||||
"""Dependency injection."""
|
||||
# self.project = project
|
||||
self.dim_names = project.dim_names
|
||||
self.update_dims()
|
||||
project.add_dataset_observer(self)
|
||||
|
||||
def get_dimension(self, dim):
|
||||
"""Returns the iterator to the dimension with the given name, or
|
||||
None if not found."""
|
||||
|
||||
i = self.dimstore_filter.get_iter_first()
|
||||
while i:
|
||||
if self.dimstore_filter.get_value(i, 0) == dim:
|
||||
return i
|
||||
i = self.dimstore_filter.iter_next(i)
|
||||
return None
|
||||
|
||||
def set_dimension(self, dimname):
|
||||
"""Sets the current dimension."""
|
||||
self._current_dim = dimname
|
||||
|
||||
dim = self.get_dimension(self._current_dim)
|
||||
path = self.dimstore_filter.get_path(dim)
|
||||
|
||||
if self.dimlist.get_cursor()[0] != path:
|
||||
self.dimlist.set_cursor(self.dimstore_filter.get_path(dim))
|
||||
self._seltree_controller.set_dimension(dimname)
|
||||
|
||||
def dataset_changed(self):
|
||||
"""Callback function from Project."""
|
||||
self.update_dims()
|
||||
|
||||
def update_dims(self):
|
||||
"""Update the list of dimensions shown"""
|
||||
for dim in self.dim_names:
|
||||
if not self.get_dimension(dim):
|
||||
self.dimstore.insert_after(None, (dim,))
|
||||
self.dimstore_filter.refilter()
|
||||
|
||||
#
|
||||
# Private interface
|
||||
#
|
||||
def _dimension_filter(self, store, row):
|
||||
"""Filters out dimensions with underscore prefix."""
|
||||
if self.show_hidden:
|
||||
return True
|
||||
|
||||
visible = False
|
||||
name = store.get_value(row, 0)
|
||||
if name != None:
|
||||
visible = name[0]!="_"
|
||||
return visible
|
||||
|
||||
#
|
||||
# GTK Callbacks.
|
||||
#
|
||||
def _on_dim_hide(self, menuitem):
|
||||
"""Menu item callback function which hides underscore prefixed
|
||||
dimensions."""
|
||||
self.show_hidden = False
|
||||
self.dimstore_filter.refilter()
|
||||
|
||||
def _on_dim_show(self, menuitem):
|
||||
"""Menu item callback function that shows underscore prefixed
|
||||
dimension names."""
|
||||
self.show_hidden = True
|
||||
self.dimstore_filter.refilter()
|
||||
|
||||
def _dim_cursor_changed(self, widget):
|
||||
cursor = self.dimlist.get_cursor()[0]
|
||||
i = self.dimstore_filter.get_iter(cursor)
|
||||
row = self.dimstore_filter.get_value(i, 0)
|
||||
self.set_dimension(row)
|
||||
self._seltree_controller.activate()
|
||||
|
||||
def _dim_row_activated(self, widget, path, column):
|
||||
#self._seltree_controller.set_dimension(dim)
|
||||
pass
|
||||
|
||||
def _dimlist_button_pressed(self, widget, event):
|
||||
if event.button == 3:
|
||||
self._dimlist_menu.popup(None, None, None, event.button, event.time)
|
||||
|
||||
1002
laydi/view.py
Normal file
1002
laydi/view.py
Normal file
File diff suppressed because it is too large
Load Diff
480
laydi/workflow.py
Normal file
480
laydi/workflow.py
Normal file
@@ -0,0 +1,480 @@
|
||||
import gtk, gobject
|
||||
import sys
|
||||
import os
|
||||
import inspect
|
||||
import logger
|
||||
import fluents
|
||||
import main
|
||||
|
||||
def _workflow_classes(dir, modname):
|
||||
"""Returns a list of all subclasses of Workflow in a given module"""
|
||||
workflow_classes = []
|
||||
|
||||
module = __import__('%s' % (modname,))
|
||||
|
||||
d = module.__dict__
|
||||
for wf in d.values():
|
||||
try:
|
||||
if issubclass(wf, Workflow):
|
||||
workflow_classes.append(wf)
|
||||
except TypeError, e:
|
||||
pass
|
||||
return workflow_classes
|
||||
|
||||
def workflow_list():
|
||||
"""Returns a list containing all new workflows"""
|
||||
retval = []
|
||||
|
||||
# List all .py files that can contain workflow classes
|
||||
wf_path = sys.modules['workflows'].__path__
|
||||
wf_files = []
|
||||
|
||||
for dir in wf_path:
|
||||
for fn in os.listdir(dir):
|
||||
if fn.endswith('.py') and ('#' not in fn):
|
||||
wf_files.append(fn[:-3])
|
||||
|
||||
# Try to load each file and look for Workflow derived classes
|
||||
for fn in wf_files:
|
||||
try:
|
||||
for wf in _workflow_classes(fn):
|
||||
retval.append(wf)
|
||||
except Exception, e:
|
||||
logger.log('warning', 'Cannot load workflow: %s' % fn)
|
||||
logger.log('warning', e)
|
||||
|
||||
return retval
|
||||
|
||||
def find_workflow(basename):
|
||||
"""Searches for a workflow with a given filename."""
|
||||
print "find_workflow"
|
||||
|
||||
# List all .py files that can contain workflow classes
|
||||
wf_path = main.options.workflowdir.split(';')
|
||||
wf_file = None
|
||||
|
||||
for dir in wf_path:
|
||||
fn = os.path.join(dir, "%s.py" % basename)
|
||||
if os.path.isfile(fn):
|
||||
wf_file = fn
|
||||
return _workflow_classes(dir, basename)[0]
|
||||
|
||||
return None
|
||||
|
||||
class Workflow:
|
||||
"""Defines a workflow that contains a set of analysis stages.
|
||||
|
||||
A Workflow is a set of analysis stages for a certain type of analysis.
|
||||
Each stage contains some possible operations to do accomplish that
|
||||
task.
|
||||
"""
|
||||
|
||||
name = "Workflow"
|
||||
ident = None
|
||||
description = "Workflow Description"
|
||||
|
||||
def __init__(self):
|
||||
print "Setting stages"
|
||||
self.stages = []
|
||||
self.stages_by_id = {}
|
||||
|
||||
def get_data_file_name(self, filename):
|
||||
"""Checks if a file with the given name exists in the data directory.
|
||||
Returns the file name if the file exists in the data directory, which
|
||||
is defined as datadir/workflowname. If the file does not exist, or the
|
||||
workflow does not have an identificator, this method returns None."""
|
||||
print os.path.join(main.options.datadir, self.ident, filename)
|
||||
if self.ident == None:
|
||||
return None
|
||||
fn = os.path.join(main.options.datadir, self.ident, filename)
|
||||
if os.path.isfile(fn):
|
||||
return fn
|
||||
return None
|
||||
|
||||
def add_stage(self, stage):
|
||||
self.stages.append(stage)
|
||||
self.stages_by_id[stage.id] = stage
|
||||
|
||||
def print_tree(self):
|
||||
print self.name
|
||||
for stage in self.stages:
|
||||
print ' %s' % stage.name
|
||||
for fun in stage.functions:
|
||||
print ' %s' % fun.name
|
||||
|
||||
# def add_project(self,project):
|
||||
# if project == None:
|
||||
# logger.log('notice','Proejct is empty')
|
||||
# logger.log('notice','Project added in : %s' %self.name)
|
||||
# self.project = project
|
||||
|
||||
|
||||
class EmptyWorkflow(Workflow):
|
||||
name = 'Empty Workflow'
|
||||
|
||||
def __init__(self):
|
||||
print "initing empty workflow"
|
||||
Workflow.__init__(self)
|
||||
|
||||
|
||||
class Stage:
|
||||
"""A stage is a part of the data analysis process.
|
||||
|
||||
Each stage contains a set of functions that can be used to
|
||||
accomplish the task. A typical early stage is 'preprocessing', which
|
||||
can be done in several ways, each represented by a function.
|
||||
"""
|
||||
|
||||
def __init__(self, id, name):
|
||||
self.id = id
|
||||
self.name = name
|
||||
self.functions = []
|
||||
self.functions_by_id = {}
|
||||
|
||||
def add_function(self, fun):
|
||||
self.functions.append(fun)
|
||||
self.functions_by_id[fun.id] = fun
|
||||
|
||||
|
||||
class Function:
|
||||
"""A Function object encapsulates a function on a data set.
|
||||
|
||||
Each Function instance encapsulates some function that can be applied
|
||||
to one or more types of data.
|
||||
"""
|
||||
|
||||
def __init__(self, id, name):
|
||||
self.id = id
|
||||
self.name = name
|
||||
|
||||
# just return a Validation object
|
||||
def validate_input(input):
|
||||
return Validation(True,"Validation Not Implemented")
|
||||
|
||||
def run(self):
|
||||
pass
|
||||
|
||||
|
||||
class Validation:
|
||||
def __init__(self,result, reason):
|
||||
self.succeeded = result
|
||||
self.reason = reason
|
||||
|
||||
|
||||
class WorkflowView (gtk.VBox):
|
||||
|
||||
def __init__(self, wf):
|
||||
gtk.VBox.__init__(self)
|
||||
self.workflow = wf
|
||||
self.setup_workflow(wf)
|
||||
|
||||
def setup_workflow(self, wf):
|
||||
# Add stage in the process
|
||||
for stage in wf.stages:
|
||||
exp = gtk.Expander(stage.name)
|
||||
btn_align = gtk.Alignment(xscale=0.9)
|
||||
btn_align.set_padding(0,4,20,0)
|
||||
btn_align.show()
|
||||
btn_box = gtk.VBox()
|
||||
btn_align.add(btn_box)
|
||||
btn_box.show()
|
||||
exp.add(btn_align)
|
||||
|
||||
# Add functions in each stage
|
||||
for fun in stage.functions:
|
||||
btn = gtk.Button(fun.name)
|
||||
btn.connect('clicked',
|
||||
lambda button, f=fun : run_function(f))
|
||||
|
||||
btn_box.add(btn)
|
||||
btn.show()
|
||||
|
||||
exp.show()
|
||||
self.pack_start(exp, expand=False, fill=False)
|
||||
|
||||
def remove_workflow(self):
|
||||
for c in self.get_children():
|
||||
c.hide()
|
||||
self.remove(c)
|
||||
|
||||
def set_workflow(self, workflow):
|
||||
self.workflow = workflow
|
||||
self.remove_workflow()
|
||||
self.setup_workflow(workflow)
|
||||
|
||||
|
||||
class Options(dict):
|
||||
"""Options base class.
|
||||
"""
|
||||
def __init__(self, *args,**kw):
|
||||
dict.__init__(self, *args, **kw)
|
||||
self['out_plots'] = []
|
||||
self['out_data'] = []
|
||||
self['all_plots'] = []
|
||||
self['all_data'] = []
|
||||
|
||||
def _copy_from_list(self, key_list):
|
||||
"""Returns suboptions (dictionary) from a list of keys.
|
||||
"""
|
||||
d = {}
|
||||
for key in key_list:
|
||||
d[key] = self.get(key, None)
|
||||
return d
|
||||
|
||||
|
||||
class OptionsDialog(gtk.Dialog):
|
||||
"""The basic input/output dialog box.
|
||||
|
||||
This defines the first page of the function options-gui.
|
||||
Any function that invokes a option-gui will inherit from this class.
|
||||
"""
|
||||
def __init__(self, data, options, input_names=['X','Y']):
|
||||
gtk.Dialog.__init__(self, 'Input-Output dialog',
|
||||
None,
|
||||
gtk.DIALOG_DESTROY_WITH_PARENT,
|
||||
(gtk.STOCK_OK, gtk.RESPONSE_OK,
|
||||
gtk.STOCK_CANCEL, gtk.RESPONSE_CANCEL))
|
||||
|
||||
self._options = options
|
||||
self._data = data
|
||||
self._editable = True
|
||||
self.set_size_request(550,450)
|
||||
|
||||
# create notebook
|
||||
self.nb = nb = gtk.Notebook()
|
||||
|
||||
# 1. page: input/output
|
||||
|
||||
#inputs
|
||||
input_frame = gtk.Frame("Input")
|
||||
hbox = gtk.HBox(True, 8)
|
||||
align = gtk.Alignment(1, 1, 1, 1)
|
||||
align.set_padding(8, 8, 8, 8)
|
||||
align.add(hbox)
|
||||
input_frame.add(align)
|
||||
for i, name in enumerate(input_names):
|
||||
frame = gtk.Frame(name)
|
||||
frame.set_label_align(0.5, 0.5)
|
||||
label = gtk.Label(data[i]._name + "\n" + str(data[i]._array.shape))
|
||||
frame.add(label)
|
||||
hbox.add(frame)
|
||||
|
||||
#outputs
|
||||
output_frame = gtk.Frame("Output")
|
||||
output_hbox = gtk.HBox(True,4)
|
||||
output_align = gtk.Alignment(1, 1, 1, 1)
|
||||
output_align.set_padding(8, 8, 8, 8) #left padding:8
|
||||
output_align.add(output_hbox)
|
||||
output_frame.add(output_align)
|
||||
|
||||
# plots
|
||||
plot_list = gtk.ListStore(str, 'gboolean', gtk.gdk.Pixbuf)
|
||||
plot_treeview = gtk.TreeView(plot_list)
|
||||
|
||||
# Add plots
|
||||
plot_icon = fluents.icon_factory.get('line_plot')
|
||||
for plt, name, use in self._options['all_plots']:
|
||||
plot_list.append((name, use, plot_icon))
|
||||
|
||||
# Renderer for icon
|
||||
plot_icon = fluents.icon_factory.get('line_plot')
|
||||
icon_renderer = gtk.CellRendererPixbuf()
|
||||
icon_renderer.set_property('pixbuf', plot_icon)
|
||||
|
||||
# Renderer for active toggle.
|
||||
active_renderer = gtk.CellRendererToggle()
|
||||
active_renderer.set_property('mode', gtk.CELL_RENDERER_MODE_ACTIVATABLE)
|
||||
active_renderer.connect('toggled', toggled, plot_list)
|
||||
active_column = gtk.TreeViewColumn('Use', active_renderer, active=1)
|
||||
|
||||
# Renderer for plot title.
|
||||
title_renderer = gtk.CellRendererText()
|
||||
title_renderer.set_property('mode', gtk.CELL_RENDERER_MODE_EDITABLE)
|
||||
title_column = gtk.TreeViewColumn('Plot', title_renderer, text=0)
|
||||
title_column.pack_start(icon_renderer, expand=False)
|
||||
|
||||
# Add columns to tree view.
|
||||
plot_treeview.append_column(active_column)
|
||||
plot_treeview.append_column(title_column)
|
||||
|
||||
## datasets
|
||||
dataset_list = gtk.ListStore(str, 'gboolean', gtk.gdk.Pixbuf)
|
||||
dataset_treeview = gtk.TreeView(dataset_list)
|
||||
|
||||
# Add datasets
|
||||
data_icon = fluents.icon_factory.get('dataset')
|
||||
for dat, name, use in self._options['all_data']:
|
||||
dataset_list.append((name, use, data_icon))
|
||||
|
||||
# Renderer for icon
|
||||
icon_renderer = gtk.CellRendererPixbuf()
|
||||
icon_renderer.set_property('pixbuf', data_icon)
|
||||
|
||||
# Renderer for active toggle.
|
||||
active_renderer = gtk.CellRendererToggle()
|
||||
active_renderer.set_property('mode', gtk.CELL_RENDERER_MODE_ACTIVATABLE)
|
||||
active_renderer.connect('toggled', toggled, dataset_list)
|
||||
active_column = gtk.TreeViewColumn('Use', active_renderer, active=1)
|
||||
|
||||
# Renderer for dataset title.
|
||||
title_renderer = gtk.CellRendererText()
|
||||
title_renderer.set_property('mode', gtk.CELL_RENDERER_MODE_EDITABLE)
|
||||
title_column = gtk.TreeViewColumn('Dataset', title_renderer, text=0)
|
||||
title_column.pack_start(icon_renderer, expand=False)
|
||||
|
||||
# Add columns to tree view.
|
||||
dataset_treeview.append_column(active_column)
|
||||
dataset_treeview.append_column(title_column)
|
||||
|
||||
# add treeviews to output frame
|
||||
output_hbox.add(plot_treeview)
|
||||
output_hbox.add(dataset_treeview)
|
||||
|
||||
# vbox for input/spacer/output
|
||||
vbox1 = gtk.VBox()
|
||||
vbox1.add(input_frame)
|
||||
vbox1.add(gtk.HSeparator())
|
||||
vbox1.add(output_frame)
|
||||
|
||||
# add vbox to notebook
|
||||
nb.insert_page(vbox1, gtk.Label("Input/Output"), 0)
|
||||
self.vbox.add(nb)
|
||||
|
||||
#keep ref to liststores
|
||||
self.dataset_list = dataset_list
|
||||
self.plot_list = plot_list
|
||||
|
||||
def run(self):
|
||||
self.vbox.show_all()
|
||||
return gtk.Dialog.run(self)
|
||||
|
||||
def set_options(self, options):
|
||||
self._options = options
|
||||
|
||||
def update_options(self, options):
|
||||
self._options.update(options)
|
||||
|
||||
def set_output(self):
|
||||
# get toggled output data
|
||||
out_data = [item[0] for name, mark, ic in self.dataset_list for item in self._options['all_data'] if mark==True and name==item[1]]
|
||||
# get toggled plots
|
||||
out_plots = [item[0] for name, mark, ic in self.plot_list for item in self._options['all_plots'] if mark==True and name==item[1]]
|
||||
# update options
|
||||
self._options['out_data'] = out_data
|
||||
self._options['out_plots'] = out_plots
|
||||
|
||||
def set_editable(self, editable):
|
||||
self._editable = True
|
||||
|
||||
def set_data(self, data):
|
||||
self._data = data
|
||||
|
||||
def get_data(self):
|
||||
return self._data
|
||||
|
||||
def get_options(self):
|
||||
return self._options
|
||||
|
||||
def add_page_from_glade(self, glade_file, widget_name, page_title):
|
||||
"""Adds a new page(s) to the existing notebook.
|
||||
The input widget (added as a page in notebook) is defined
|
||||
in the glade file.
|
||||
|
||||
input:
|
||||
glade_file -- path to glade file
|
||||
widget_name -- name of widget from glade file
|
||||
"""
|
||||
|
||||
try:
|
||||
self.wTree = gtk.glade.XML(glade_file)
|
||||
except:
|
||||
logger.log('notice', 'Could not find glade file: %s' %glade_file)
|
||||
|
||||
widget = self.wTree.get_widget(widget_name)
|
||||
win = widget.get_parent()
|
||||
win.hide()
|
||||
widget.unparent()
|
||||
self.nb.insert_page(widget, gtk.Label(page_title), -1)
|
||||
self.nb.set_current_page(0)
|
||||
|
||||
|
||||
def toggled(renderer, path, store):
|
||||
it = store.get_iter(path)
|
||||
old_value = store.get_value(it, 1)
|
||||
store.set_value(it, 1, not old_value)
|
||||
|
||||
|
||||
class WorkflowMenu (gtk.Menu):
|
||||
|
||||
def __init__(self, workflow):
|
||||
gtk.Menu.__init__(self)
|
||||
self._workflow = workflow
|
||||
for stage in workflow.stages:
|
||||
self.append(self._create_stage_item(stage))
|
||||
|
||||
def _create_stage_item(self, stage):
|
||||
stage_menu_item = gtk.MenuItem(stage.name)
|
||||
stage_menu_item.show()
|
||||
stage_menu = gtk.Menu()
|
||||
stage_menu_item.set_submenu(stage_menu)
|
||||
|
||||
for fun in stage.functions:
|
||||
stage_menu.append(self._create_function_item(fun))
|
||||
return stage_menu_item
|
||||
|
||||
def _create_function_item(self, func):
|
||||
menuitem = gtk.MenuItem(func.name)
|
||||
menuitem.connect('activate',
|
||||
lambda item, f=func : run_function(f))
|
||||
menuitem.show()
|
||||
return menuitem
|
||||
|
||||
def run_function(function):
|
||||
logger.log('debug', 'Starting function: %s' % function.name)
|
||||
parent_data = main.project.current_data
|
||||
|
||||
validation = function.validate_input()
|
||||
|
||||
if not validation.succeeded:
|
||||
logger.log('warning','Invalid Inputdata: ' + str(reason))
|
||||
return
|
||||
|
||||
args, varargs, varkw, defaults = inspect.getargspec(function.run)
|
||||
|
||||
# first argument is 'self' and second should be the selection
|
||||
# and we don't care about those...
|
||||
args.remove('self')
|
||||
if "selection" in args:
|
||||
pass_selection = True
|
||||
args.remove('selection')
|
||||
else:
|
||||
pass_selection = False
|
||||
|
||||
if varargs and len(parent_data) < len(args):
|
||||
logger.log('warning', "Function requires minimum %d datasets selected." % len(args))
|
||||
return
|
||||
elif not varargs and args and len(args) != len(parent_data):
|
||||
# functions requiring datasets have to have the right number
|
||||
logger.log('warning', "Function requires %d datasets, but only %d selected." % (len(args), len(parent_data)))
|
||||
return
|
||||
|
||||
if not args:
|
||||
# we allow functions requiring no data to be run even if a
|
||||
# dataset is is selected
|
||||
data = []
|
||||
else:
|
||||
data = parent_data
|
||||
|
||||
if pass_selection:
|
||||
# if the function has a 'selection' argument, we pass in
|
||||
# the selection
|
||||
new_data = function.run(selection=main.project.get_selection(), *data)
|
||||
else:
|
||||
new_data = function.run(*data)
|
||||
|
||||
if new_data != None:
|
||||
main.project.add_data(parent_data, new_data, function.name)
|
||||
|
||||
logger.log('debug', 'Function ended: %s' % function.name)
|
||||
|
||||
|
||||
Reference in New Issue
Block a user