diff --git a/workflows/smokers.py b/workflows/smokers.py index 03c6563..6b16cf5 100644 --- a/workflows/smokers.py +++ b/workflows/smokers.py @@ -20,10 +20,23 @@ class SmallTestWorkflow(workflow.Workflow): # DATA IMPORT load = workflow.Stage('load', 'Data') - load.add_function(DatasetLoadFunctionSmokerSmall()) - load.add_function(DatasetLoadFunctionSmokerMedium()) - load.add_function(DatasetLoadFunctionSmokerFull()) - load.add_function(DatasetLoadFunctionSmokerGO()) + + load_small = LoadDataFunction('load-small', 'Smokers small', + self, 'small') + load.add_function(load_small) + + load_medium = LoadDataFunction('load-medium', 'Smokers medium', + self, 'medium') + load.add_function(load_medium) + + load_full = LoadDataFunction('load-full', 'Smokers full', + self, 'full') + load.add_function(load_full) + + load_go = LoadDataFunction('load-go', 'Smokers GO', + self, 'go') + load.add_function(load_go) + #load.add_function(DatasetLoadFunctionCYCLE()) self.add_stage(load) @@ -71,68 +84,23 @@ class SmallTestWorkflow(workflow.Workflow): logger.log('debug', 'Small test workflow is now active') -class DatasetLoadFunctionSmokerSmall(workflow.Function): - """Loader for all ftsv files of smokers small datasets.""" - def __init__(self): - workflow.Function.__init__(self, 'load_small', 'Smoker (Small)') +class LoadDataFunction(workflow.Function): + """Loads all datasets in a given directory.""" + def __init__(self, ident, label, wf, dir=''): + workflow.Function.__init__(self, ident, label) + self._dir = dir + self._wf = wf def run(self): - path = os.path.join(main.options.datadir, 'smokers-small/') + path = os.path.join(main.options.datadir, self._wf.ident, self._dir) files = os.listdir(path) out = [] - for fname in files: - if fname.endswith('.ftsv'): - input_file = open(os.path.join(path, fname)) - out.append(dataset.read_ftsv(input_file)) + for fn in files: + if fn.endswith('.ftsv'): + out.append(dataset.read_ftsv(os.path.join(path, fn))) return out -class DatasetLoadFunctionSmokerMedium(workflow.Function): - """Loader for all ftsv files of smokers small datasets.""" - def __init__(self): - workflow.Function.__init__(self, 'load_medium', 'Smoker (Medium)') - - def run(self): - path = os.path.join(main.options.datadir, 'smokers-medium/') - files = os.listdir(path) - out = [] - for fname in files: - if fname.endswith('.ftsv'): - input_file = open(os.path.join(path, fname)) - out.append(dataset.read_ftsv(input_file)) - return out - - -class DatasetLoadFunctionSmokerFull(workflow.Function): - """Loader for all ftsv files of smokers small datasets.""" - def __init__(self): - workflow.Function.__init__(self, 'load_full', 'Smoker (Full)') - - def run(self): - path = os.path.join(main.options.datadir, 'smokers-full/') - files = os.listdir(path) - out = [] - for fname in files: - if fname.endswith('.ftsv'): - input_file = open(os.path.join(path, fname)) - out.append(dataset.read_ftsv(input_file)) - return out - -class DatasetLoadFunctionSmokerGO(workflow.Function): - """Loader for all ftsv files of smokers small datasets.""" - def __init__(self): - workflow.Function.__init__(self, 'load_go', 'Smoker (GO)') - - def run(self): - path = os.path.join(main.options.datadir, 'smokers-go/') - files = os.listdir(path) - out = [] - for fname in files: - if fname.endswith('.ftsv'): - input_file = open(os.path.join(path, fname)) - out.append(dataset.read_ftsv(input_file)) - return out - class DatasetLoadFunctionCYCLE(workflow.Function): """Loader for pickled CYCLE datasets.""" def __init__(self):