Added read functions for the medium and large smokers sets.
This commit is contained in:
parent
d6298a2d99
commit
ff8833a22c
|
@ -19,9 +19,16 @@ class SmallTestWorkflow(workflow.Workflow):
|
||||||
# DATA IMPORT
|
# DATA IMPORT
|
||||||
load = workflow.Stage('load', 'Data')
|
load = workflow.Stage('load', 'Data')
|
||||||
load.add_function(DatasetLoadFunctionSmokerSmall())
|
load.add_function(DatasetLoadFunctionSmokerSmall())
|
||||||
|
load.add_function(DatasetLoadFunctionSmokerMedium())
|
||||||
|
load.add_function(DatasetLoadFunctionSmokerFull())
|
||||||
#load.add_function(DatasetLoadFunctionCYCLE())
|
#load.add_function(DatasetLoadFunctionCYCLE())
|
||||||
self.add_stage(load)
|
self.add_stage(load)
|
||||||
|
|
||||||
|
# PREPROCESSING
|
||||||
|
prep = workflow.Stage('prep', 'Preprocessing')
|
||||||
|
prep.add_function(LogFunction())
|
||||||
|
self.add_stage(prep)
|
||||||
|
|
||||||
# NETWORK PREPROCESSING
|
# NETWORK PREPROCESSING
|
||||||
net = workflow.Stage('net', 'Network integration')
|
net = workflow.Stage('net', 'Network integration')
|
||||||
net.add_function(DiffKernelFunction())
|
net.add_function(DiffKernelFunction())
|
||||||
|
@ -58,7 +65,7 @@ class SmallTestWorkflow(workflow.Workflow):
|
||||||
class DatasetLoadFunctionSmokerSmall(workflow.Function):
|
class DatasetLoadFunctionSmokerSmall(workflow.Function):
|
||||||
"""Loader for all ftsv files of smokers small datasets."""
|
"""Loader for all ftsv files of smokers small datasets."""
|
||||||
def __init__(self):
|
def __init__(self):
|
||||||
workflow.Function.__init__(self, 'load_data', 'Smoker')
|
workflow.Function.__init__(self, 'load_small', 'Smoker (Small)')
|
||||||
|
|
||||||
def run(self):
|
def run(self):
|
||||||
path = 'data/smokers-small/'
|
path = 'data/smokers-small/'
|
||||||
|
@ -70,6 +77,39 @@ class DatasetLoadFunctionSmokerSmall(workflow.Function):
|
||||||
out.append(dataset.read_ftsv(input_file))
|
out.append(dataset.read_ftsv(input_file))
|
||||||
return out
|
return out
|
||||||
|
|
||||||
|
|
||||||
|
class DatasetLoadFunctionSmokerMedium(workflow.Function):
|
||||||
|
"""Loader for all ftsv files of smokers small datasets."""
|
||||||
|
def __init__(self):
|
||||||
|
workflow.Function.__init__(self, 'load_medium', 'Smoker (Medium)')
|
||||||
|
|
||||||
|
def run(self):
|
||||||
|
path = 'data/smokers-medium/'
|
||||||
|
files = os.listdir(path)
|
||||||
|
out = []
|
||||||
|
for fname in files:
|
||||||
|
if fname.endswith('.ftsv'):
|
||||||
|
input_file = open(os.path.join(path, fname))
|
||||||
|
out.append(dataset.read_ftsv(input_file))
|
||||||
|
return out
|
||||||
|
|
||||||
|
|
||||||
|
class DatasetLoadFunctionSmokerFull(workflow.Function):
|
||||||
|
"""Loader for all ftsv files of smokers small datasets."""
|
||||||
|
def __init__(self):
|
||||||
|
workflow.Function.__init__(self, 'load_full', 'Smoker (Full)')
|
||||||
|
|
||||||
|
def run(self):
|
||||||
|
path = 'data/smokers-full/'
|
||||||
|
files = os.listdir(path)
|
||||||
|
out = []
|
||||||
|
for fname in files:
|
||||||
|
if fname.endswith('.ftsv'):
|
||||||
|
input_file = open(os.path.join(path, fname))
|
||||||
|
out.append(dataset.read_ftsv(input_file))
|
||||||
|
return out
|
||||||
|
|
||||||
|
|
||||||
class DatasetLoadFunctionCYCLE(workflow.Function):
|
class DatasetLoadFunctionCYCLE(workflow.Function):
|
||||||
"""Loader for pickled CYCLE datasets."""
|
"""Loader for pickled CYCLE datasets."""
|
||||||
def __init__(self):
|
def __init__(self):
|
||||||
|
@ -220,3 +260,14 @@ class KEGGQuery(workflow.Function):
|
||||||
webbrowser.open(web_str)
|
webbrowser.open(web_str)
|
||||||
|
|
||||||
|
|
||||||
|
class LogFunction(workflow.Function):
|
||||||
|
def __init__(self):
|
||||||
|
workflow.Function.__init__(self, 'log', 'Log')
|
||||||
|
|
||||||
|
def run(self, data):
|
||||||
|
logger.log('notice', 'Taking the log of dataset %s' % data.get_name())
|
||||||
|
d = data.copy()
|
||||||
|
d._array = scipy.log(d._array)
|
||||||
|
d._name = 'log(%s)' % data.get_name()
|
||||||
|
return [d]
|
||||||
|
|
||||||
|
|
Reference in New Issue