From ff8833a22cb072a2e6826892724967dca9b10cb9 Mon Sep 17 00:00:00 2001 From: einarr Date: Thu, 21 Jun 2007 10:28:10 +0000 Subject: [PATCH] Added read functions for the medium and large smokers sets. --- workflows/smalltest.py | 53 +++++++++++++++++++++++++++++++++++++++++- 1 file changed, 52 insertions(+), 1 deletion(-) diff --git a/workflows/smalltest.py b/workflows/smalltest.py index 3476bfc..cb61526 100644 --- a/workflows/smalltest.py +++ b/workflows/smalltest.py @@ -19,9 +19,16 @@ class SmallTestWorkflow(workflow.Workflow): # DATA IMPORT load = workflow.Stage('load', 'Data') load.add_function(DatasetLoadFunctionSmokerSmall()) + load.add_function(DatasetLoadFunctionSmokerMedium()) + load.add_function(DatasetLoadFunctionSmokerFull()) #load.add_function(DatasetLoadFunctionCYCLE()) self.add_stage(load) + # PREPROCESSING + prep = workflow.Stage('prep', 'Preprocessing') + prep.add_function(LogFunction()) + self.add_stage(prep) + # NETWORK PREPROCESSING net = workflow.Stage('net', 'Network integration') net.add_function(DiffKernelFunction()) @@ -58,7 +65,7 @@ class SmallTestWorkflow(workflow.Workflow): class DatasetLoadFunctionSmokerSmall(workflow.Function): """Loader for all ftsv files of smokers small datasets.""" def __init__(self): - workflow.Function.__init__(self, 'load_data', 'Smoker') + workflow.Function.__init__(self, 'load_small', 'Smoker (Small)') def run(self): path = 'data/smokers-small/' @@ -70,6 +77,39 @@ class DatasetLoadFunctionSmokerSmall(workflow.Function): out.append(dataset.read_ftsv(input_file)) return out + +class DatasetLoadFunctionSmokerMedium(workflow.Function): + """Loader for all ftsv files of smokers small datasets.""" + def __init__(self): + workflow.Function.__init__(self, 'load_medium', 'Smoker (Medium)') + + def run(self): + path = 'data/smokers-medium/' + files = os.listdir(path) + out = [] + for fname in files: + if fname.endswith('.ftsv'): + input_file = open(os.path.join(path, fname)) + out.append(dataset.read_ftsv(input_file)) + return out + + +class DatasetLoadFunctionSmokerFull(workflow.Function): + """Loader for all ftsv files of smokers small datasets.""" + def __init__(self): + workflow.Function.__init__(self, 'load_full', 'Smoker (Full)') + + def run(self): + path = 'data/smokers-full/' + files = os.listdir(path) + out = [] + for fname in files: + if fname.endswith('.ftsv'): + input_file = open(os.path.join(path, fname)) + out.append(dataset.read_ftsv(input_file)) + return out + + class DatasetLoadFunctionCYCLE(workflow.Function): """Loader for pickled CYCLE datasets.""" def __init__(self): @@ -220,3 +260,14 @@ class KEGGQuery(workflow.Function): webbrowser.open(web_str) +class LogFunction(workflow.Function): + def __init__(self): + workflow.Function.__init__(self, 'log', 'Log') + + def run(self, data): + logger.log('notice', 'Taking the log of dataset %s' % data.get_name()) + d = data.copy() + d._array = scipy.log(d._array) + d._name = 'log(%s)' % data.get_name() + return [d] +