go-distance script soon calculating resnik distances.

This commit is contained in:
Einar Ryeng 2007-04-03 13:08:48 +00:00
parent a5c3a1f154
commit 28f0f53e8a
3 changed files with 60 additions and 8 deletions

View File

@ -5,7 +5,7 @@ godist.o: godist.c godist.h
gcc -ggdb -c godist.c
go-distance: godist.o main.o
gcc -ggdb -o go-distance godist.o main.o
gcc -ggdb -o go-distance godist.o main.o -lm
clean:
-rm go-distance godist.o main.o

View File

@ -1,4 +1,5 @@
#include <math.h>
#include <string.h>
#include <errno.h>
#include <stdio.h>
@ -10,7 +11,8 @@ void print_terms();
void add_link(char*, char*);
struct node* get_bp();
struct node* get_term(char *);
float calc_ic(struct node *, unsigned int);
void calc_ic(struct node *, unsigned int);
struct node *common_subsumer(struct node *, struct node *);
/* initialisation */
@ -19,6 +21,7 @@ int godist_init() {
hcreate(MAX_NODES);
term_array_size = 0;
link_count = 0;
struct node *n;
/* Read ontology terms from file */
printf("Reading GO terms from go-terms.txt...");
@ -51,13 +54,24 @@ int godist_init() {
fclose(tree_fd);
printf(" %d edges\n", link_count);
printf("Calculating accumulated evidence...");
fflush(stdout);
for (i=0; i<term_array_size; i++) {
clear_flags(get_bp());
accumulate_evidence(term_array[i]);
printf(".");
}
printf("\n");
evidence = 0xff;
total_ann = 0;
n = get_bp();
for (i=0; i<12; i++)
if (evidence & 1<<i)
total_ann += n->acc_evidence[i];
printf("Using %d annotations.\n", total_ann);
print_term(get_bp());
/*
print_term(get_term("GO:0040007"));
print_term(get_term("GO:0007275"));
print_term(get_term("GO:0007582"));
@ -69,9 +83,14 @@ int godist_init() {
print_term(get_term("GO:0009987"));
print_term(get_term("GO:0050896"));
print_term(get_term("GO:0050789"));
calc_ic(get_bp(), 0xffff);
*/
printf("Calculation information content...");
fflush(stdout);
calculate_ics(0xffff);
printf("\n");
/* calc_ic(get_bp(), 0xffff);*/
/* find_multi_parented();*/
common_subsumer(get_term("GO:0000003"), get_term("GO:0000004"));
}
void godist_exit() {
@ -232,12 +251,39 @@ void find_multi_parented() {
}
}
float calc_ic(struct node *n, unsigned int evidence) {
void calculate_ics(unsigned int evidence) {
int i;
for (i=0; i<term_array_size; i++)
calc_ic(term_array[i], evidence);
}
void calc_ic(struct node *n, unsigned int evidence) {
int i;
float ann=0.0;
for (i=0; i<12; i++)
if (evidence & 1<<i)
ann += (float) n->acc_evidence[i];
printf("%f", ann);
n->ic = -log(ann/total_ann);
printf("%f\n", n->ic);
}
struct node *common_subsumer(struct node *n1, struct node *n2) {
struct node *anc1[MAX_NODES];
struct node *anc2[MAX_NODES];
int ancc1=0, ancc2=0;
add_ancestors(&ancc1, anc1, n1);
add_ancestors(&ancc2, anc2, n2);
printf("Ancestors: %d %d\n", ancc1, ancc2);
}
void add_ancestors(int *ancc, struct node *anc[], struct node *n) {
int i=0;
anc[(*ancc)++] = n;
for (i=0; i<n->parentc; i++)
add_ancestors(ancc, anc, n->parents[i]);
}
float resnik(struct node *n1, struct node *n2) {
}

View File

@ -48,6 +48,8 @@ struct node {
struct node* term_array[MAX_NODES];
long term_array_size;
int link_count;
int total_ann;
int evidence; /* bitvector with one bit per evidence code */
/* Ontology initialisation functions. */
int godist_init();
@ -58,6 +60,10 @@ void accumulate_evidence(struct node*);
/* Distance metric functions */
float resnik_distance(char *term1, char *term2);
float fussimeg_distance(char *term1, char *term2);
void calc_ic(struct node *n, unsigned int evidence);
void clear_flags(struct node *n);
void print_term(struct node *n);
#endif