Added read functions for the medium and large smokers sets.

This commit is contained in:
Einar Ryeng 2007-06-21 10:28:10 +00:00
parent d6298a2d99
commit ff8833a22c

View File

@ -19,9 +19,16 @@ class SmallTestWorkflow(workflow.Workflow):
# DATA IMPORT
load = workflow.Stage('load', 'Data')
load.add_function(DatasetLoadFunctionSmokerSmall())
load.add_function(DatasetLoadFunctionSmokerMedium())
load.add_function(DatasetLoadFunctionSmokerFull())
#load.add_function(DatasetLoadFunctionCYCLE())
self.add_stage(load)
# PREPROCESSING
prep = workflow.Stage('prep', 'Preprocessing')
prep.add_function(LogFunction())
self.add_stage(prep)
# NETWORK PREPROCESSING
net = workflow.Stage('net', 'Network integration')
net.add_function(DiffKernelFunction())
@ -58,7 +65,7 @@ class SmallTestWorkflow(workflow.Workflow):
class DatasetLoadFunctionSmokerSmall(workflow.Function):
"""Loader for all ftsv files of smokers small datasets."""
def __init__(self):
workflow.Function.__init__(self, 'load_data', 'Smoker')
workflow.Function.__init__(self, 'load_small', 'Smoker (Small)')
def run(self):
path = 'data/smokers-small/'
@ -70,6 +77,39 @@ class DatasetLoadFunctionSmokerSmall(workflow.Function):
out.append(dataset.read_ftsv(input_file))
return out
class DatasetLoadFunctionSmokerMedium(workflow.Function):
"""Loader for all ftsv files of smokers small datasets."""
def __init__(self):
workflow.Function.__init__(self, 'load_medium', 'Smoker (Medium)')
def run(self):
path = 'data/smokers-medium/'
files = os.listdir(path)
out = []
for fname in files:
if fname.endswith('.ftsv'):
input_file = open(os.path.join(path, fname))
out.append(dataset.read_ftsv(input_file))
return out
class DatasetLoadFunctionSmokerFull(workflow.Function):
"""Loader for all ftsv files of smokers small datasets."""
def __init__(self):
workflow.Function.__init__(self, 'load_full', 'Smoker (Full)')
def run(self):
path = 'data/smokers-full/'
files = os.listdir(path)
out = []
for fname in files:
if fname.endswith('.ftsv'):
input_file = open(os.path.join(path, fname))
out.append(dataset.read_ftsv(input_file))
return out
class DatasetLoadFunctionCYCLE(workflow.Function):
"""Loader for pickled CYCLE datasets."""
def __init__(self):
@ -220,3 +260,14 @@ class KEGGQuery(workflow.Function):
webbrowser.open(web_str)
class LogFunction(workflow.Function):
def __init__(self):
workflow.Function.__init__(self, 'log', 'Log')
def run(self, data):
logger.log('notice', 'Taking the log of dataset %s' % data.get_name())
d = data.copy()
d._array = scipy.log(d._array)
d._name = 'log(%s)' % data.get_name()
return [d]