New pca workflow and datset updates
This commit is contained in:
parent
c09f2ceb92
commit
800e7dc42e
4
fluent
4
fluent
|
@ -19,6 +19,7 @@ import logger
|
||||||
import plots
|
import plots
|
||||||
import navigator
|
import navigator
|
||||||
import go_workflow
|
import go_workflow
|
||||||
|
import pca_workflow
|
||||||
import scipy
|
import scipy
|
||||||
|
|
||||||
PROGRAM_NAME = 'fluent'
|
PROGRAM_NAME = 'fluent'
|
||||||
|
@ -45,7 +46,8 @@ class FluentApp:
|
||||||
self.current_data = None
|
self.current_data = None
|
||||||
gtk.glade.set_custom_handler(self.custom_object_factory)
|
gtk.glade.set_custom_handler(self.custom_object_factory)
|
||||||
self.widget_tree = gtk.glade.XML(GLADEFILENAME, 'appwindow')
|
self.widget_tree = gtk.glade.XML(GLADEFILENAME, 'appwindow')
|
||||||
self.workflow = go_workflow.EinarsWorkflow(self)
|
self.workflow = pca_workflow.PCAWorkflow(self)
|
||||||
|
self.workflow.add_project(self.project)
|
||||||
|
|
||||||
def custom_object_factory(self, glade, function_name, widget_name,\
|
def custom_object_factory(self, glade, function_name, widget_name,\
|
||||||
str1, str2, int1, int2):
|
str1, str2, int1, int2):
|
||||||
|
|
|
@ -12,12 +12,16 @@ class Dataset:
|
||||||
"""
|
"""
|
||||||
def __init__(self,input_array,def_list):
|
def __init__(self,input_array,def_list):
|
||||||
self._data = asarray(input_array)
|
self._data = asarray(input_array)
|
||||||
self.dims = shape(self._data)
|
dims = shape(self._data)
|
||||||
self.def_list = def_list
|
self.def_list = def_list
|
||||||
self._ids_set = set()
|
self._ids_set = set()
|
||||||
self.ids={}
|
self.ids={}
|
||||||
self._dim_num = {}
|
self._dim_num = {}
|
||||||
self._dim_names = []
|
self._dim_names = []
|
||||||
|
if len(dims)==1: # a vector is defined to be column vector!
|
||||||
|
self.dims = (dims[0],1)
|
||||||
|
else:
|
||||||
|
self.dims = dims
|
||||||
if len(def_list)!=len(self.dims):
|
if len(def_list)!=len(self.dims):
|
||||||
raise ValueError,"array dims and identifyer mismatch"
|
raise ValueError,"array dims and identifyer mismatch"
|
||||||
for axis,(dim_name,ids) in enumerate(def_list):
|
for axis,(dim_name,ids) in enumerate(def_list):
|
||||||
|
@ -25,7 +29,7 @@ class Dataset:
|
||||||
#if dim_name not in project.c_p.dim_names:
|
#if dim_name not in project.c_p.dim_names:
|
||||||
# dim_name = project.c_p.suggest_dim_name(dim_name)
|
# dim_name = project.c_p.suggest_dim_name(dim_name)
|
||||||
if not ids:
|
if not ids:
|
||||||
logger.log('debug','Creating identifiers along: '+dim_name)
|
logger.log('debug','Creating identifiers along: '+ str(dim_name))
|
||||||
ids = self._create_identifiers(axis)
|
ids = self._create_identifiers(axis)
|
||||||
for num,name in enumerate(ids):
|
for num,name in enumerate(ids):
|
||||||
enum_ids[name] = num
|
enum_ids[name] = num
|
||||||
|
@ -40,13 +44,21 @@ class Dataset:
|
||||||
raise ValueError,"dim size and identifyer mismatch"
|
raise ValueError,"dim size and identifyer mismatch"
|
||||||
|
|
||||||
def names(self,axis=0):
|
def names(self,axis=0):
|
||||||
"""Returns identifier names of a dimension. NB: not in any order! """
|
"""Returns identifier names of a dimension.
|
||||||
|
NB: sorted by values!
|
||||||
|
OK? necessary?"""
|
||||||
|
|
||||||
if type(axis)==int:
|
if type(axis)==int:
|
||||||
dim_name = self._dim_names[axis]
|
dim_name = self._dim_names[axis]
|
||||||
elif type(axis)==str:
|
elif type(axis)==str:
|
||||||
dim_name = axis
|
dim_name = axis
|
||||||
return self.ids[dim_name].keys()
|
if dim_name not in self._dim_names:
|
||||||
|
raise ValueError, dim_name + " not a dimension in dataset"
|
||||||
|
items = self.ids[dim_name].items()
|
||||||
|
backitems=[ [v[1],v[0]] for v in items]
|
||||||
|
backitems.sort()
|
||||||
|
sorted_ids=[ backitems[i][1] for i in range(0,len(backitems))]
|
||||||
|
return sorted_ids
|
||||||
|
|
||||||
def extract_data(self,ids,dim_name):
|
def extract_data(self,ids,dim_name):
|
||||||
"""Extracts data along a dimension by identifiers"""
|
"""Extracts data along a dimension by identifiers"""
|
||||||
|
|
|
@ -27,6 +27,9 @@ class Workflow:
|
||||||
for fun in stage.functions:
|
for fun in stage.functions:
|
||||||
print ' %s' % fun.name
|
print ' %s' % fun.name
|
||||||
|
|
||||||
|
def add_project(self,project):
|
||||||
|
self.project = project
|
||||||
|
|
||||||
class Stage:
|
class Stage:
|
||||||
"""A stage is a part of the data analysis process.
|
"""A stage is a part of the data analysis process.
|
||||||
|
|
||||||
|
|
|
@ -0,0 +1,89 @@
|
||||||
|
import gtk
|
||||||
|
import logger
|
||||||
|
from workflow import *
|
||||||
|
from scipy import array
|
||||||
|
from data import read_affy_annot
|
||||||
|
import plots
|
||||||
|
|
||||||
|
class PCAWorkflow(Workflow):
|
||||||
|
|
||||||
|
def __init__(self, app):
|
||||||
|
Workflow.__init__(self, app)
|
||||||
|
self.name = 'PCAs Workflow'
|
||||||
|
|
||||||
|
load = Stage('load', 'Load Data')
|
||||||
|
load.add_function(Function('load_mootha', 'Load Microarrays'))
|
||||||
|
self.add_stage(load)
|
||||||
|
|
||||||
|
preproc = Stage('preprocess', 'Preprocessing')
|
||||||
|
preproc.add_function(Function('log2', 'Logarithm'))
|
||||||
|
self.add_stage(preproc)
|
||||||
|
|
||||||
|
annot = Stage('annot', 'Affy annotations')
|
||||||
|
annot.add_function(LoadAnnotationsFunction())
|
||||||
|
self.add_stage(annot)
|
||||||
|
|
||||||
|
model = Stage('model', 'Model')
|
||||||
|
model.add_function(Function('pca', 'PCA'))
|
||||||
|
self.add_stage(model)
|
||||||
|
|
||||||
|
logger.log('debug', '\tPCA\'s workflow is now active')
|
||||||
|
|
||||||
|
class LoadAnnotationsFunction(Function):
|
||||||
|
|
||||||
|
def __init__(self):
|
||||||
|
Function.__init__(self, 'load', 'Load Annotations')
|
||||||
|
self.annotations = None
|
||||||
|
|
||||||
|
def load_affy_file(self, filename):
|
||||||
|
f = open(filename)
|
||||||
|
logger.log('notice', 'Loading annotation file: %s' % filename)
|
||||||
|
self.file = f
|
||||||
|
|
||||||
|
def on_response(self, dialog, response):
|
||||||
|
if response == gtk.RESPONSE_OK:
|
||||||
|
logger.log('notice', 'Reading file: %s' % dialog.get_filename())
|
||||||
|
self.load_affy_file(dialog.get_filename())
|
||||||
|
|
||||||
|
def run(self, data):
|
||||||
|
btns = ('Open', gtk.RESPONSE_OK, \
|
||||||
|
'Cancel', gtk.RESPONSE_CANCEL)
|
||||||
|
dialog = gtk.FileChooserDialog('Open Affy Annotation File',
|
||||||
|
buttons=btns)
|
||||||
|
dialog.connect('response', self.on_response)
|
||||||
|
dialog.run()
|
||||||
|
dialog.destroy()
|
||||||
|
|
||||||
|
### Reading and aprsing here
|
||||||
|
annot = read_affy_annot(self.file)
|
||||||
|
return [self.annotations]
|
||||||
|
|
||||||
|
class PCAFunction(Function):
|
||||||
|
|
||||||
|
def __init__(self):
|
||||||
|
Function.__init__(self, 'X', 'a_opt')
|
||||||
|
self.output = None
|
||||||
|
|
||||||
|
def run(self, data):
|
||||||
|
logger.log('debug', 'datatype: %s' % type(data))
|
||||||
|
if not isinstance(data,dataset.Dataset):
|
||||||
|
return None
|
||||||
|
logger.log('debug', 'dimensions: %s' % data.dims)
|
||||||
|
|
||||||
|
## calculations
|
||||||
|
T,P,E,tsq = pca(data._data,a_opt=2)
|
||||||
|
comp_def = ['comp',['1','2']]
|
||||||
|
singel_def = ['1',['s']]
|
||||||
|
col_def = [data._dim_names[0],data.names(0)]
|
||||||
|
row_def = [data._dim_names[1],data.names(1)]
|
||||||
|
T = dataset.Dataset(T,[col_def,comp_def])
|
||||||
|
P = dataset.Dataset(T,[row_def,comp_def])
|
||||||
|
E = dataset.Dataset(E,[col_def,row_def])
|
||||||
|
tsq = dataset.Dataset(tsq,[row_def,sigel_def])
|
||||||
|
|
||||||
|
## plots
|
||||||
|
loading_plot = plots.ScatterPlot()
|
||||||
|
|
||||||
|
|
||||||
|
return [T,P,E,r]
|
||||||
|
|
Reference in New Issue