#!/usr/bin/python import getopt import os, os.path import sys OUTPUT_COLS = ["Array_Address_Id", "Entrez_Gene_ID", "Accession", "ILMN_Gene", "Definition", ] def print_help(): print "laydi-mapping-illumina" print print "Usage: laydi-mapping-illumina " print print "Description:" print " Produce mapping files from Illumina text annotation files" print " Illumina files can be downloaded from:" print " http://www.switchtoi.com/annotationfiles.ilmn" print print " NOTE: and are the column names in the illumina text file," print " not laydi dimensions." print def parse_cmdline(): short_opts = "h" long_opts = ["help"] options, params = getopt.getopt(sys.argv[1:], short_opts, long_opts) for key, val in options: if key in ["-h", "--help"]: print_help() sys.exit(0) if len(params) != 3: print_help() sys.exit(1) return params def build_map(fn, from_dim, to_dim): retval = {} fd = open(fn) line = fd.readline() while line != "" and line.strip() != "[Probes]": line = fd.readline() if line == "": return None line = fd.readline() cols = [x.strip() for x in line.split("\t")] from_col = cols.index(from_dim) to_col = cols.index(to_dim) line = fd.readline() while line != "" and not line.strip().startswith("["): key = line.split("\t")[from_col] val = line.split("\t")[to_col] if not retval.has_key(key): retval[key] = [val] else: retval[key].append(val) line = fd.readline() return retval def write_map(fd, d, from_dim, to_dim): opened_here = False if isinstance(fd, str): fd = open(fd, "w") opened_here = True print >> fd, "# from: %s" % from_dim print >> fd, "# to: %s" % to_dim print >> fd, "# description: " print >> fd for k, v in d.items(): print >> fd, k, for e in v: print >> fd, e, print >> fd if opened_here: fd.close() if __name__ == '__main__': fn, from_dim, to_dim = parse_cmdline() m = build_map(fn, from_dim, to_dim) write_map(sys.stdout, m, from_dim, to_dim)