From 28f0f53e8aab490fe8318f3cd4d298139474997f Mon Sep 17 00:00:00 2001 From: einarr Date: Tue, 3 Apr 2007 13:08:48 +0000 Subject: [PATCH] go-distance script soon calculating resnik distances. --- scripts/geneontology/go-distance/Makefile | 2 +- scripts/geneontology/go-distance/godist.c | 60 ++++++++++++++++++++--- scripts/geneontology/go-distance/godist.h | 6 +++ 3 files changed, 60 insertions(+), 8 deletions(-) diff --git a/scripts/geneontology/go-distance/Makefile b/scripts/geneontology/go-distance/Makefile index 2d05f5d..b2d3978 100644 --- a/scripts/geneontology/go-distance/Makefile +++ b/scripts/geneontology/go-distance/Makefile @@ -5,7 +5,7 @@ godist.o: godist.c godist.h gcc -ggdb -c godist.c go-distance: godist.o main.o - gcc -ggdb -o go-distance godist.o main.o + gcc -ggdb -o go-distance godist.o main.o -lm clean: -rm go-distance godist.o main.o diff --git a/scripts/geneontology/go-distance/godist.c b/scripts/geneontology/go-distance/godist.c index 575fbdc..63ae7d0 100644 --- a/scripts/geneontology/go-distance/godist.c +++ b/scripts/geneontology/go-distance/godist.c @@ -1,4 +1,5 @@ +#include #include #include #include @@ -10,7 +11,8 @@ void print_terms(); void add_link(char*, char*); struct node* get_bp(); struct node* get_term(char *); -float calc_ic(struct node *, unsigned int); +void calc_ic(struct node *, unsigned int); +struct node *common_subsumer(struct node *, struct node *); /* initialisation */ @@ -19,6 +21,7 @@ int godist_init() { hcreate(MAX_NODES); term_array_size = 0; link_count = 0; + struct node *n; /* Read ontology terms from file */ printf("Reading GO terms from go-terms.txt..."); @@ -51,13 +54,24 @@ int godist_init() { fclose(tree_fd); printf(" %d edges\n", link_count); + printf("Calculating accumulated evidence..."); + fflush(stdout); for (i=0; iacc_evidence[i]; + printf("Using %d annotations.\n", total_ann); + print_term(get_bp()); - + /* print_term(get_term("GO:0040007")); print_term(get_term("GO:0007275")); print_term(get_term("GO:0007582")); @@ -69,9 +83,14 @@ int godist_init() { print_term(get_term("GO:0009987")); print_term(get_term("GO:0050896")); print_term(get_term("GO:0050789")); - - calc_ic(get_bp(), 0xffff); +*/ + printf("Calculation information content..."); + fflush(stdout); + calculate_ics(0xffff); + printf("\n"); +/* calc_ic(get_bp(), 0xffff);*/ /* find_multi_parented();*/ + common_subsumer(get_term("GO:0000003"), get_term("GO:0000004")); } void godist_exit() { @@ -232,12 +251,39 @@ void find_multi_parented() { } } -float calc_ic(struct node *n, unsigned int evidence) { +void calculate_ics(unsigned int evidence) { + int i; + for (i=0; iacc_evidence[i]; - printf("%f", ann); + n->ic = -log(ann/total_ann); + printf("%f\n", n->ic); +} + +struct node *common_subsumer(struct node *n1, struct node *n2) { + struct node *anc1[MAX_NODES]; + struct node *anc2[MAX_NODES]; + int ancc1=0, ancc2=0; + + add_ancestors(&ancc1, anc1, n1); + add_ancestors(&ancc2, anc2, n2); + printf("Ancestors: %d %d\n", ancc1, ancc2); +} + +void add_ancestors(int *ancc, struct node *anc[], struct node *n) { + int i=0; + anc[(*ancc)++] = n; + for (i=0; iparentc; i++) + add_ancestors(ancc, anc, n->parents[i]); +} + +float resnik(struct node *n1, struct node *n2) { } diff --git a/scripts/geneontology/go-distance/godist.h b/scripts/geneontology/go-distance/godist.h index a074f6e..4a3f918 100644 --- a/scripts/geneontology/go-distance/godist.h +++ b/scripts/geneontology/go-distance/godist.h @@ -48,6 +48,8 @@ struct node { struct node* term_array[MAX_NODES]; long term_array_size; int link_count; +int total_ann; +int evidence; /* bitvector with one bit per evidence code */ /* Ontology initialisation functions. */ int godist_init(); @@ -58,6 +60,10 @@ void accumulate_evidence(struct node*); /* Distance metric functions */ float resnik_distance(char *term1, char *term2); float fussimeg_distance(char *term1, char *term2); +void calc_ic(struct node *n, unsigned int evidence); + +void clear_flags(struct node *n); +void print_term(struct node *n); #endif