Annotations on yeast from Stanford mdb.

This commit is contained in:
Arnar Flatberg 2007-03-23 20:51:04 +00:00
parent 0904a59310
commit 947cf8385f

44
scripts/yeast_annot.py Normal file
View File

@ -0,0 +1,44 @@
def smdb_annot(orflist=None, input_fname='registry.genenames.tab', output_fname='yeast.annot'):
"""Reads registry.genenames.tab from the Stanford yeast
microarray database.
Available from:
ftp://genome-ftp.stanford.edu/pub/yeast/data_download/gene_registry/registry.genenames.tab
input: orf -- list of orfs (open reading frames)
file -- (optional) file to fetch info from
registry.genames contains:
0 = Locus name
1 = Other name
2 = Description
3 = Gene product
4 = Phenotype
5 = ORF name
6 = SGDID
NB! Other name, Gene product and Phenotype may have more
than one mapping. These are separated by |
Output: writes an annotation file
"""
outfile = open(output_fname, 'w')
header = "Orf\tLocus_id\tOther_name\tDescription\tGene_product\tPhenotype\tSGD_ID\n"
outfile.write(header)
text = open(input_fname, 'r').read().splitlines()
for line in text:
els = line.split('\t')
orf_name = els.pop(5)
if orf_name!='': # we dont care about non-named orfs
if orflist and orf_name not in orflist:
break
for e in els:
if e !='':
outfile.write(str(e) + "\t")
else:
outfile.write("NA")
f.write("\n")