From 6be624e87294e10a1b62b5d2783239091bbfe06c Mon Sep 17 00:00:00 2001 From: einarr Date: Thu, 24 Mar 2011 08:46:17 +0000 Subject: [PATCH] Script to generate mapping files between identifiers found in tab separated files. --- scripts/illumina/laydi-mapping-illumina | 93 +++++++++++++++++++++++++ 1 file changed, 93 insertions(+) create mode 100755 scripts/illumina/laydi-mapping-illumina diff --git a/scripts/illumina/laydi-mapping-illumina b/scripts/illumina/laydi-mapping-illumina new file mode 100755 index 0000000..211024e --- /dev/null +++ b/scripts/illumina/laydi-mapping-illumina @@ -0,0 +1,93 @@ +#!/usr/bin/python + +import getopt +import os, os.path +import sys + +OUTPUT_COLS = ["Array_Address_Id", "Entrez_Gene_ID", "Accession", "ILMN_Gene", "Definition", ] + +def print_help(): + print "laydi-mapping-illumina" + print + print "Usage: laydi-mapping-illumina " + print + print "Description:" + print " Produce mapping files from Illumina text annotation files" + print " Illumina files can be downloaded from:" + print " http://www.switchtoi.com/annotationfiles.ilmn" + print + print " NOTE: and are the column names in the illumina text file," + print " not laydi dimensions." + print + + +def parse_cmdline(): + short_opts = "h" + long_opts = ["help"] + options, params = getopt.getopt(sys.argv[1:], short_opts, long_opts) + + for key, val in options: + if key in ["-h", "--help"]: + print_help() + sys.exit(0) + + if len(params) != 3: + print_help() + sys.exit(1) + + return params + + +def build_map(fn, from_dim, to_dim): + retval = {} + fd = open(fn) + line = fd.readline() + while line != "" and line.strip() != "[Probes]": + line = fd.readline() + if line == "": + return None + + line = fd.readline() + cols = [x.strip() for x in line.split("\t")] + from_col = cols.index(from_dim) + to_col = cols.index(to_dim) + + line = fd.readline() + while line != "" and not line.strip().startswith("["): + key = line.split("\t")[from_col] + val = line.split("\t")[to_col] + if not retval.has_key(key): + retval[key] = [val] + else: + retval[key].append(val) + + line = fd.readline() + + return retval + + +def write_map(fd, d, from_dim, to_dim): + opened_here = False + if isinstance(fd, str): + fd = open(fd, "w") + opened_here = True + + print >> fd, "# from: %s" % from_dim + print >> fd, "# to: %s" % to_dim + print >> fd, "# description: " + print >> fd + + for k, v in d.items(): + print >> fd, k, + for e in v: + print >> fd, e, + print >> fd + if opened_here: + fd.close() + + +if __name__ == '__main__': + fn, from_dim, to_dim = parse_cmdline() + m = build_map(fn, from_dim, to_dim) + write_map(sys.stdout, m, from_dim, to_dim) +