#!/usr/bin/python import getopt import os, os.path import sys #OUTPUT_COLS = ["Array_Address_Id", "Entrez_Gene_ID", "Accession", "Chromosome", "Definition", "Ontology_Component", "Ontology_Process", "Ontology_Function", "ILMN_Gene"] OUTPUT_COLS = ["Array_Address_Id", "Entrez_Gene_ID", "Accession", "ILMN_Gene", "Definition", ] def print_help(): print "laydi-annot-illumina" print print "Usage: laydi-annot-illumina " print print "Description:" print " Produce laydi annotation files from Illumina text annotation files" print " Illumina files can be downloaded from:" print " http://www.switchtoi.com/annotationfiles.ilmn" print def parse_cmdline(): short_opts = "h" long_opts = ["help"] options, params = getopt.getopt(sys.argv[1:], short_opts, long_opts) for key, val in options: if key in ["-h", "--help"]: print_help() sys.exit(0) if len(params) != 1: print_help() sys.exit(1) return params[0] def convert_annotations(fn_in, fn_out): fd_in = open(fn_in) fd_out = open(fn_out, "w") # Skip headers line = fd_in.readline() while not line.startswith("[Probes]"): line = fd_in.readline() colnames = fd_in.readline().split("\t") export_colnums = [colnames.index(x) for x in OUTPUT_COLS] # Print output column headers export_colnames = ["probe-id"] + OUTPUT_COLS[1:] print >> fd_out, "\t".join(OUTPUT_COLS) line = fd_in.readline() while not line == "" and not line.startswith("["): values = line.split("\t") output_values = [values[x] for x in export_colnums] print >> fd_out, "\t".join(output_values) line = fd_in.readline() if __name__ == "__main__": fn_in = parse_cmdline() fn_out = os.path.split(fn_in)[1] fn_out = os.path.splitext(fn_out)[0] + ".annot" print "Reading: %s" % (fn_in,) print "Writing: %s" % (fn_out,) print print "Annotations:" print ", ".join(OUTPUT_COLS) convert_annotations(fn_in, fn_out)