#!/usr/bin/python

import getopt
import os, os.path
import sys

#OUTPUT_COLS = ["Array_Address_Id", "Entrez_Gene_ID", "Accession", "Chromosome", "Definition", "Ontology_Component", "Ontology_Process", "Ontology_Function", "ILMN_Gene"]
OUTPUT_COLS = ["Array_Address_Id", "Entrez_Gene_ID", "Accession", "ILMN_Gene", "Definition", ]

def print_help(): 
    print "laydi-annot-illumina"
    print
    print "Usage: laydi-annot-illumina <illumina-annotation-file.txt>"
    print
    print "Description:"
    print "    Produce laydi annotation files from Illumina text annotation files"
    print "    Illumina files can be downloaded from:"
    print "    http://www.switchtoi.com/annotationfiles.ilmn"
    print

def parse_cmdline():
    short_opts = "h"
    long_opts = ["help"]
    options, params = getopt.getopt(sys.argv[1:], short_opts, long_opts)

    for key, val in options:
        if key in ["-h", "--help"]:
            print_help()
            sys.exit(0)
    
    if len(params) != 1:
        print_help()
        sys.exit(1)

    return params[0]

def convert_annotations(fn_in, fn_out):
    fd_in = open(fn_in)
    fd_out = open(fn_out, "w")

    # Skip headers
    line = fd_in.readline()
    while not line.startswith("[Probes]"):
        line = fd_in.readline()

    colnames = fd_in.readline().split("\t")
    export_colnums = [colnames.index(x) for x in OUTPUT_COLS]

    # Print output column headers
    export_colnames = ["probe-id"] + OUTPUT_COLS[1:]
    print >> fd_out, "\t".join(OUTPUT_COLS)

    line = fd_in.readline()
    while not line == "" and not line.startswith("["):
        values = line.split("\t")
        output_values = [values[x] for x in export_colnums]
        print >> fd_out, "\t".join(output_values)
        line = fd_in.readline()

if __name__ == "__main__":
    fn_in = parse_cmdline()
    fn_out = os.path.split(fn_in)[1]
    fn_out = os.path.splitext(fn_out)[0] + ".annot"
    
    print "Reading: %s" % (fn_in,)
    print "Writing: %s" % (fn_out,)
    print
    print "Annotations:"
    print ", ".join(OUTPUT_COLS)

    convert_annotations(fn_in, fn_out)
    
