go-distance script soon calculating resnik distances.
This commit is contained in:
parent
a5c3a1f154
commit
28f0f53e8a
@ -5,7 +5,7 @@ godist.o: godist.c godist.h
|
||||
gcc -ggdb -c godist.c
|
||||
|
||||
go-distance: godist.o main.o
|
||||
gcc -ggdb -o go-distance godist.o main.o
|
||||
gcc -ggdb -o go-distance godist.o main.o -lm
|
||||
|
||||
clean:
|
||||
-rm go-distance godist.o main.o
|
||||
|
@ -1,4 +1,5 @@
|
||||
|
||||
#include <math.h>
|
||||
#include <string.h>
|
||||
#include <errno.h>
|
||||
#include <stdio.h>
|
||||
@ -10,7 +11,8 @@ void print_terms();
|
||||
void add_link(char*, char*);
|
||||
struct node* get_bp();
|
||||
struct node* get_term(char *);
|
||||
float calc_ic(struct node *, unsigned int);
|
||||
void calc_ic(struct node *, unsigned int);
|
||||
struct node *common_subsumer(struct node *, struct node *);
|
||||
|
||||
|
||||
/* initialisation */
|
||||
@ -19,6 +21,7 @@ int godist_init() {
|
||||
hcreate(MAX_NODES);
|
||||
term_array_size = 0;
|
||||
link_count = 0;
|
||||
struct node *n;
|
||||
|
||||
/* Read ontology terms from file */
|
||||
printf("Reading GO terms from go-terms.txt...");
|
||||
@ -51,13 +54,24 @@ int godist_init() {
|
||||
fclose(tree_fd);
|
||||
printf(" %d edges\n", link_count);
|
||||
|
||||
printf("Calculating accumulated evidence...");
|
||||
fflush(stdout);
|
||||
for (i=0; i<term_array_size; i++) {
|
||||
clear_flags(get_bp());
|
||||
accumulate_evidence(term_array[i]);
|
||||
printf(".");
|
||||
}
|
||||
printf("\n");
|
||||
|
||||
evidence = 0xff;
|
||||
total_ann = 0;
|
||||
n = get_bp();
|
||||
for (i=0; i<12; i++)
|
||||
if (evidence & 1<<i)
|
||||
total_ann += n->acc_evidence[i];
|
||||
printf("Using %d annotations.\n", total_ann);
|
||||
|
||||
print_term(get_bp());
|
||||
|
||||
/*
|
||||
print_term(get_term("GO:0040007"));
|
||||
print_term(get_term("GO:0007275"));
|
||||
print_term(get_term("GO:0007582"));
|
||||
@ -69,9 +83,14 @@ int godist_init() {
|
||||
print_term(get_term("GO:0009987"));
|
||||
print_term(get_term("GO:0050896"));
|
||||
print_term(get_term("GO:0050789"));
|
||||
|
||||
calc_ic(get_bp(), 0xffff);
|
||||
*/
|
||||
printf("Calculation information content...");
|
||||
fflush(stdout);
|
||||
calculate_ics(0xffff);
|
||||
printf("\n");
|
||||
/* calc_ic(get_bp(), 0xffff);*/
|
||||
/* find_multi_parented();*/
|
||||
common_subsumer(get_term("GO:0000003"), get_term("GO:0000004"));
|
||||
}
|
||||
|
||||
void godist_exit() {
|
||||
@ -232,12 +251,39 @@ void find_multi_parented() {
|
||||
}
|
||||
}
|
||||
|
||||
float calc_ic(struct node *n, unsigned int evidence) {
|
||||
void calculate_ics(unsigned int evidence) {
|
||||
int i;
|
||||
for (i=0; i<term_array_size; i++)
|
||||
calc_ic(term_array[i], evidence);
|
||||
}
|
||||
|
||||
void calc_ic(struct node *n, unsigned int evidence) {
|
||||
int i;
|
||||
float ann=0.0;
|
||||
for (i=0; i<12; i++)
|
||||
if (evidence & 1<<i)
|
||||
ann += (float) n->acc_evidence[i];
|
||||
printf("%f", ann);
|
||||
n->ic = -log(ann/total_ann);
|
||||
printf("%f\n", n->ic);
|
||||
}
|
||||
|
||||
struct node *common_subsumer(struct node *n1, struct node *n2) {
|
||||
struct node *anc1[MAX_NODES];
|
||||
struct node *anc2[MAX_NODES];
|
||||
int ancc1=0, ancc2=0;
|
||||
|
||||
add_ancestors(&ancc1, anc1, n1);
|
||||
add_ancestors(&ancc2, anc2, n2);
|
||||
printf("Ancestors: %d %d\n", ancc1, ancc2);
|
||||
}
|
||||
|
||||
void add_ancestors(int *ancc, struct node *anc[], struct node *n) {
|
||||
int i=0;
|
||||
anc[(*ancc)++] = n;
|
||||
for (i=0; i<n->parentc; i++)
|
||||
add_ancestors(ancc, anc, n->parents[i]);
|
||||
}
|
||||
|
||||
float resnik(struct node *n1, struct node *n2) {
|
||||
}
|
||||
|
||||
|
@ -48,6 +48,8 @@ struct node {
|
||||
struct node* term_array[MAX_NODES];
|
||||
long term_array_size;
|
||||
int link_count;
|
||||
int total_ann;
|
||||
int evidence; /* bitvector with one bit per evidence code */
|
||||
|
||||
/* Ontology initialisation functions. */
|
||||
int godist_init();
|
||||
@ -58,6 +60,10 @@ void accumulate_evidence(struct node*);
|
||||
/* Distance metric functions */
|
||||
float resnik_distance(char *term1, char *term2);
|
||||
float fussimeg_distance(char *term1, char *term2);
|
||||
void calc_ic(struct node *n, unsigned int evidence);
|
||||
|
||||
void clear_flags(struct node *n);
|
||||
void print_term(struct node *n);
|
||||
|
||||
#endif
|
||||
|
||||
|
Reference in New Issue
Block a user