def smdb_annot(orflist=None, input_fname='registry.genenames.tab', output_fname='yeast.annot'): """Reads registry.genenames.tab from the Stanford yeast microarray database. Available from: ftp://genome-ftp.stanford.edu/pub/yeast/data_download/gene_registry/registry.genenames.tab input: orf -- list of orfs (open reading frames) file -- (optional) file to fetch info from registry.genames contains: 0 = Locus name 1 = Other name 2 = Description 3 = Gene product 4 = Phenotype 5 = ORF name 6 = SGDID NB! Other name, Gene product and Phenotype may have more than one mapping. These are separated by | Output: writes an annotation file """ outfile = open(output_fname, 'w') header = "Orf\tLocus_id\tOther_name\tDescription\tGene_product\tPhenotype\tSGD_ID\n" outfile.write(header) text = open(input_fname, 'r').read().splitlines() for line in text: els = line.split('\t') orf_name = els.pop(5) if orf_name!='': # we dont care about non-named orfs if orflist and orf_name not in orflist: break for e in els: if e !='': outfile.write(str(e) + "\t") else: outfile.write("NA") f.write("\n")