diff --git a/workflows/go_workflow.py b/workflows/go_workflow.py index 1ff4ef2..ecc002b 100644 --- a/workflows/go_workflow.py +++ b/workflows/go_workflow.py @@ -112,6 +112,7 @@ class GoWorkflow (workflow.Workflow): load = workflow.Stage('load', 'Load GO Annotations') load.add_function(LoadGOFunction()) load.add_function(LoadAnnotationsFunction()) + load.add_function(LoadTextDatasetFunction()) self.add_stage(load) go = workflow.Stage('go', 'Gene Ontology') @@ -143,6 +144,7 @@ class LoadTextDatasetFunction(workflow.Function): identifiers = {} type = 'dataset' name = 'Unnamed dataset' + graphtype = 'graph' # Read header lines from file. line = fd.readline() @@ -168,13 +170,45 @@ class LoadTextDatasetFunction(workflow.Function): elif key == 'name': name = val + elif key == 'graphtype': + graphtype = val + else: break line = f.readline() - + # Dimensions in the form [(dim1, [id1, id2, id3 ..) ...] + dims = [(x, identifiers[x]) for x in dimensions] + dim_lengths = [len(identifiers[x]) for x in dimensions] + + # Create dataset of specified type + if type == 'category': + matrix = zeros(dim_lengths, dtype=bool) + ds = dataset.CategoryDataset(matrix, dims) + elif type == 'network': + matrix = zeros(dim_lengths) + ds = dataset.GraphDataset(matrix, dims) + else: + matrix = zeros(dim_lengths) + ds = dataset.Dataset(matrix, dims) + + line = f.readline() + y = 0 + while line: + values = line.split() + for x, v in enumerate(values): + matrix[x,y] = float(v) + y += 1 + line = f.readline() + + # Build NetowrkX graph from matrix. + if type == 'network': + matrix = zeros(dim_lengths) + ds = dataset.NetworkDataset(matrix, dims) + + def run(self): - f = open('/home/einarr/foodata.tds') + f = open('/home/einarr/foodata.fcsv') return read_text_dataset(f)