From cc911f28df88f45ba14ae3bf7edc1eb56e212183 Mon Sep 17 00:00:00 2001 From: Fredrik Robertsen Date: Sun, 1 Feb 2026 18:33:07 +0100 Subject: [PATCH] it works! --- src/common.odin | 37 +-- src/feature_selection.odin | 140 +++++++++ src/ga.odin | 185 +++++++++++ src/knapsack.odin | 55 ++++ src/linreg.odin | 194 ++---------- src/main.odin | 610 ++----------------------------------- 6 files changed, 436 insertions(+), 785 deletions(-) create mode 100644 src/feature_selection.odin create mode 100644 src/ga.odin create mode 100644 src/knapsack.odin diff --git a/src/common.odin b/src/common.odin index b107fa1..72a9345 100644 --- a/src/common.odin +++ b/src/common.odin @@ -2,39 +2,24 @@ package main import "core:container/bit_array" -// Knapsack -OUTPUT_FILE :: "output/data.csv" -DATA_FILE :: "res/knapPI_12_500_1000_82.csv" -NUMBER_OF_ITEMS :: 500 -CAPACITY :: 280785 - -Item :: struct { - profit, weight: int, -} - -// Feature selection -DATASET_FILE :: "res/dataset.csv" -NUMBER_OF_FEATURES :: 100 -DATASET_ROWS :: 1994 - -Dataset_Record :: struct { - features: [NUMBER_OF_FEATURES]f64, - target: f64, -} -Dataset :: #soa[DATASET_ROWS]Dataset_Record - -// GA Chromosome :: ^bit_array.Bit_Array Population :: [POPULATION_SIZE]Chromosome + POPULATION_SIZE :: 100 GENERATIONS :: 100 TOURNAMENT_SIZE :: 3 CROSSOVER_RATE :: 0.8 MUTATION_RATE :: 0.01 RANDOM_SEED :: u64(42) +OUTPUT_FILE :: "output/data.csv" -// stats -Data :: struct { - best, worst: int, - mean: f32, +Problem :: struct { + name: string, + chromosome_size: int, + fitness_proc: proc(_: Chromosome) -> f64, + maximize: bool, // true = higher better, false = lower better +} + +Stats :: struct { + best, mean, worst: f64, } diff --git a/src/feature_selection.odin b/src/feature_selection.odin new file mode 100644 index 0000000..888caf4 --- /dev/null +++ b/src/feature_selection.odin @@ -0,0 +1,140 @@ +package main + +import "core:container/bit_array" +import "core:encoding/csv" +import "core:fmt" +import "core:math" +import "core:os" +import "core:strconv" + +NUMBER_OF_FEATURES :: 100 +DATASET_ROWS :: 1994 +DATASET_FILE :: "res/dataset.csv" + +Dataset_Record :: struct { + features: [NUMBER_OF_FEATURES]f64, + target: f64, +} +Dataset :: #soa[DATASET_ROWS]Dataset_Record + +Feature_State :: struct { + dataset: Dataset, +} + +feature_state: Feature_State + +load_feature_data :: proc() -> bool { + file_data := os.read_entire_file(DATASET_FILE) or_return + defer delete(file_data) + + r: csv.Reader + csv.reader_init_with_string(&r, string(file_data)) + defer csv.reader_destroy(&r) + + r.trim_leading_space = true + r.reuse_record = true + + idx := 0 + for { + record, err := csv.read(&r) + if err != nil {break} + if idx >= DATASET_ROWS {break} + + for i in 0 ..< NUMBER_OF_FEATURES { + feature_state.dataset[idx].features[i] = strconv.parse_f64(record[i]) or_return + } + feature_state.dataset[idx].target = strconv.parse_f64(record[NUMBER_OF_FEATURES]) or_return + + idx += 1 + } + + return idx == DATASET_ROWS +} + +fitness_features :: proc(chrom: Chromosome) -> f64 { + // Extract selected features + X, y := get_selected_features(feature_state.dataset, chrom) + if X == nil { + return math.F64_MAX + } + defer { + for row in X {delete(row)} + delete(X) + delete(y) + } + + // Train/test split + X_train, X_test, y_train, y_test := train_test_split(X, y, 0.2, RANDOM_SEED) + defer { + for row in X_train {delete(row)} + for row in X_test {delete(row)} + delete(X_train) + delete(X_test) + delete(y_train) + delete(y_test) + } + + // Train and evaluate + beta := train_linear_regression(X_train, y_train) + defer delete(beta) + + predictions := predict(X_test, beta) + defer delete(predictions) + + return rmse(predictions, y_test) +} + +get_selected_features :: proc(dataset: Dataset, chrom: Chromosome) -> ([][]f64, []f64) { + n_rows := len(dataset) + + // Count selected features + selected_count := 0 + for i in 0 ..< NUMBER_OF_FEATURES { + if bit_array.get(chrom, i) { + selected_count += 1 + } + } + + if selected_count == 0 { + return nil, nil + } + + // Allocate + X := make([][]f64, n_rows) + y := make([]f64, n_rows) + + // Extract + for i in 0 ..< n_rows { + X[i] = make([]f64, selected_count) + col_idx := 0 + for j in 0 ..< NUMBER_OF_FEATURES { + if bit_array.get(chrom, j) { + X[i][col_idx] = dataset[i].features[j] + col_idx += 1 + } + } + y[i] = dataset[i].target + } + + return X, y +} + +run_baseline :: proc() -> f64 { + all_features := bit_array.create(NUMBER_OF_FEATURES) + defer bit_array.destroy(all_features) + + for i in 0 ..< NUMBER_OF_FEATURES { + bit_array.set(all_features, i, true) + } + + return fitness_features(all_features) +} + +feature_selection_problem :: proc() -> Problem { + return Problem { + name = "Feature Selection", + chromosome_size = NUMBER_OF_FEATURES, + fitness_proc = fitness_features, + maximize = false, + } +} diff --git a/src/ga.odin b/src/ga.odin new file mode 100644 index 0000000..8da4b18 --- /dev/null +++ b/src/ga.odin @@ -0,0 +1,185 @@ +package main + +import "core:container/bit_array" +import "core:encoding/csv" +import "core:fmt" +import "core:math" +import "core:math/rand" +import "core:os" + +run_ga :: proc(problem: Problem) { + fmt.printfln("=== Running GA: %s ===", problem.name) + + population := generate_population(problem.chromosome_size) + defer destroy_population(&population) + + generation_stats := make([dynamic]Stats, 0, GENERATIONS) + defer delete(generation_stats) + + for gen in 0 ..< GENERATIONS { + fitnesses := evaluate_population(&population, problem.fitness_proc) + stats := compute_stats(fitnesses[:], problem.maximize) + append(&generation_stats, stats) + + fmt.printfln( + "Gen %d: Best=%.4f Mean=%.4f Worst=%.4f", + gen, + stats.best, + stats.mean, + stats.worst, + ) + + offspring := create_offspring(&population, fitnesses[:], problem.maximize) + destroy_population(&population) + population = offspring + } + + write_results(OUTPUT_FILE, generation_stats[:]) + fmt.printfln("Results written to %s\n", OUTPUT_FILE) +} + +generate_population :: proc(size: int) -> Population { + pop: Population + for i in 0 ..< POPULATION_SIZE { + pop[i] = bit_array.create(size) + for j in 0 ..< size { + bit_array.set(pop[i], j, rand.int_max(2) == 1) + } + } + return pop +} + +destroy_population :: proc(pop: ^Population) { + for chrom in pop { + bit_array.destroy(chrom) + } +} + +evaluate_population :: proc( + pop: ^Population, + fitness_proc: proc(_: Chromosome) -> f64, +) -> [POPULATION_SIZE]f64 { + fitnesses: [POPULATION_SIZE]f64 + for chrom, i in pop { + fitnesses[i] = fitness_proc(chrom) + } + return fitnesses +} + +compute_stats :: proc(fitnesses: []f64, maximize: bool) -> Stats { + best := maximize ? -math.F64_MAX : math.F64_MAX + worst := maximize ? math.F64_MAX : -math.F64_MAX + sum := 0.0 + + for f in fitnesses { + if maximize { + best = max(best, f) + worst = min(worst, f) + } else { + best = min(best, f) + worst = max(worst, f) + } + sum += f + } + + return {best, sum / f64(len(fitnesses)), worst} +} + +tournament_selection :: proc(pop: ^Population, fitnesses: []f64, maximize: bool) -> Chromosome { + best_idx := rand.int_max(POPULATION_SIZE) + best_fitness := fitnesses[best_idx] + + for _ in 1 ..< TOURNAMENT_SIZE { + idx := rand.int_max(POPULATION_SIZE) + is_better := maximize ? fitnesses[idx] > best_fitness : fitnesses[idx] < best_fitness + + if is_better { + best_idx = idx + best_fitness = fitnesses[idx] + } + } + + return pop[best_idx] +} + +two_point_crossover :: proc(parent1, parent2: Chromosome) -> (Chromosome, Chromosome) { + size := bit_array.len(parent1) + point1 := rand.int_max(size) + point2 := rand.int_max(size) + if point1 > point2 { + point1, point2 = point2, point1 + } + + child1 := bit_array.create(size) + child2 := bit_array.create(size) + + for i in 0 ..< size { + in_swap := i >= point1 && i < point2 + if in_swap { + bit_array.set(child1, i, bit_array.get(parent2, i)) + bit_array.set(child2, i, bit_array.get(parent1, i)) + } else { + bit_array.set(child1, i, bit_array.get(parent1, i)) + bit_array.set(child2, i, bit_array.get(parent2, i)) + } + } + + return child1, child2 +} + +bit_flip_mutation :: proc(chrom: Chromosome) { + for i in 0 ..< bit_array.len(chrom) { + if rand.float32() < f32(MUTATION_RATE) { + bit_array.set(chrom, i, !bit_array.get(chrom, i)) + } + } +} + +create_offspring :: proc(pop: ^Population, fitnesses: []f64, maximize: bool) -> Population { + offspring: Population + + for i := 0; i < POPULATION_SIZE; i += 2 { + parent1 := tournament_selection(pop, fitnesses, maximize) + parent2 := tournament_selection(pop, fitnesses, maximize) + + child1, child2 := two_point_crossover(parent1, parent2) + + bit_flip_mutation(child1) + if i + 1 < POPULATION_SIZE { + bit_flip_mutation(child2) + offspring[i + 1] = child2 + } else { + bit_array.destroy(child2) + } + + offspring[i] = child1 + } + + return offspring +} + +write_results :: proc(filename: string, stats: []Stats) -> bool { + handle, err := os.open(filename, os.O_CREATE | os.O_WRONLY | os.O_TRUNC, 0o644) + if err != os.ERROR_NONE {return false} + defer os.close(handle) + + w: csv.Writer + csv.writer_init(&w, os.stream_from_handle(handle)) + + csv.write(&w, []string{"Generation", "Best", "Mean", "Worst"}) + + for stat, gen in stats { + csv.write( + &w, + []string { + fmt.tprintf("%d", gen), + fmt.tprintf("%.6f", stat.best), + fmt.tprintf("%.6f", stat.mean), + fmt.tprintf("%.6f", stat.worst), + }, + ) + } + + csv.writer_flush(&w) + return true +} diff --git a/src/knapsack.odin b/src/knapsack.odin new file mode 100644 index 0000000..5a4fb1b --- /dev/null +++ b/src/knapsack.odin @@ -0,0 +1,55 @@ +package main + +import "core:container/bit_array" +import "core:os" +import "core:strconv" +import "core:strings" + +NUMBER_OF_ITEMS :: 500 +CAPACITY :: 280785 +DATA_FILE :: "res/knapPI_12_500_1000_82.csv" + +Item :: struct { + profit, weight: int, +} + +Knapsack_State :: struct { + items: [NUMBER_OF_ITEMS]Item, +} + +knapsack_state: Knapsack_State + +load_knapsack_data :: proc() -> bool { + data := string(os.read_entire_file(DATA_FILE) or_return) + defer delete(data) + + for line, idx in strings.split_lines(strings.trim_space(data))[1:] { + s := strings.split(line, ",") + defer delete(s) + knapsack_state.items[idx] = { + strconv.parse_int(s[1]) or_return, + strconv.parse_int(s[2]) or_return, + } + } + return true +} + +fitness_knapsack :: proc(chrom: Chromosome) -> f64 { + tot_profit, tot_weight := 0, 0 + for idx in 0 ..< NUMBER_OF_ITEMS { + if !bit_array.get(chrom, idx) {continue} + tot_profit += knapsack_state.items[idx].profit + tot_weight += knapsack_state.items[idx].weight + } + penalty := 500 * max(tot_weight - CAPACITY, 0) + return f64(tot_profit - penalty) +} + +knapsack_problem :: proc() -> Problem { + return Problem { + name = "Knapsack", + chromosome_size = NUMBER_OF_ITEMS, + fitness_proc = fitness_knapsack, + maximize = true, + } +} diff --git a/src/linreg.odin b/src/linreg.odin index 0fc00fe..8e29115 100644 --- a/src/linreg.odin +++ b/src/linreg.odin @@ -1,14 +1,12 @@ package main -import "core:container/bit_array" import "core:math" import "core:math/rand" -// Solves Ax = b using Gaussian elimination with partial pivoting +// Gaussian elimination solver solve_linear_system :: proc(A: [][]f64, b: []f64) -> []f64 { n := len(A) - // Create augmented matrix [A|b] aug := make([][]f64, n) for i in 0 ..< n { aug[i] = make([]f64, n + 1) @@ -22,7 +20,6 @@ solve_linear_system :: proc(A: [][]f64, b: []f64) -> []f64 { // Forward elimination with partial pivoting for col in 0 ..< n { - // Find pivot (largest absolute value in column) max_row := col for row in col + 1 ..< n { if math.abs(aug[row][col]) > math.abs(aug[max_row][col]) { @@ -30,19 +27,15 @@ solve_linear_system :: proc(A: [][]f64, b: []f64) -> []f64 { } } - // Swap rows if max_row != col { aug[col], aug[max_row] = aug[max_row], aug[col] } - // Check for singular matrix if math.abs(aug[col][col]) < 1e-10 { - // Matrix is singular, return zero vector x := make([]f64, n) return x } - // Eliminate column entries below pivot for row in col + 1 ..< n { factor := aug[row][col] / aug[col][col] for j in col ..< n + 1 { @@ -54,7 +47,7 @@ solve_linear_system :: proc(A: [][]f64, b: []f64) -> []f64 { // Back substitution x := make([]f64, n) for i in 0 ..< n { - row := n - 1 - i // Process from bottom to top + row := n - 1 - i x[row] = aug[row][n] for j in row + 1 ..< n { x[row] -= aug[row][j] * x[j] @@ -65,12 +58,12 @@ solve_linear_system :: proc(A: [][]f64, b: []f64) -> []f64 { return x } -// Linear regression using normal equation: β = (X^T X)^-1 X^T y +// Linear regression train_linear_regression :: proc(X: [][]f64, y: []f64) -> []f64 { n := len(X) m := len(X[0]) - // Compute X^T X + // X^T X XtX := make([][]f64, m) for i in 0 ..< m { XtX[i] = make([]f64, m) @@ -87,7 +80,7 @@ train_linear_regression :: proc(X: [][]f64, y: []f64) -> []f64 { delete(XtX) } - // Compute X^T y + // X^T y Xty := make([]f64, m) defer delete(Xty) for i in 0 ..< m { @@ -98,9 +91,7 @@ train_linear_regression :: proc(X: [][]f64, y: []f64) -> []f64 { Xty[i] = sum } - // Solve (X^T X) β = X^T y using Gaussian elimination - beta := solve_linear_system(XtX, Xty) - return beta + return solve_linear_system(XtX, Xty) } predict :: proc(X: [][]f64, beta: []f64) -> []f64 { @@ -124,44 +115,45 @@ rmse :: proc(predictions: []f64, actual: []f64) -> f64 { return math.sqrt(sum / f64(len(predictions))) } +// Train/test split train_test_split :: proc( X: [][]f64, y: []f64, test_size: f64 = 0.2, random_seed: u64 = 0, ) -> ( - X_train, X_test: [][]f64, - y_train, y_test: []f64, + [][]f64, + [][]f64, + []f64, + []f64, ) { n := len(X) - test_count := int(f64(n) * test_size) - train_count := n - test_count - if n == 0 || len(X[0]) == 0 { return nil, nil, nil, nil } n_features := len(X[0]) + test_count := int(f64(n) * test_size) + train_count := n - test_count - // Create shuffled indices + // Shuffle indices indices := make([]int, n) defer delete(indices) for i in 0 ..< n { indices[i] = i } - // Shuffle rng := rand.create(random_seed) context.random_generator = rand.default_random_generator(&rng) rand.shuffle(indices[:]) - // Allocate splits - X_train = make([][]f64, train_count) - X_test = make([][]f64, test_count) - y_train = make([]f64, train_count) - y_test = make([]f64, test_count) + // Allocate + X_train := make([][]f64, train_count) + X_test := make([][]f64, test_count) + y_train := make([]f64, train_count) + y_test := make([]f64, test_count) - // Copy training data (DEEP COPY) + // Copy training data for i in 0 ..< train_count { idx := indices[i] X_train[i] = make([]f64, n_features) @@ -169,7 +161,7 @@ train_test_split :: proc( y_train[i] = y[idx] } - // Copy test data (DEEP COPY) + // Copy test data for i in 0 ..< test_count { idx := indices[train_count + i] X_test[i] = make([]f64, n_features) @@ -177,147 +169,5 @@ train_test_split :: proc( y_test[i] = y[idx] } - return -} - - -// Extract columns based on bit_array chromosome -get_columns :: proc(X: [][]f64, chrom: ^bit_array.Bit_Array) -> [][]f64 { - n_rows := len(X) - n_cols := bit_array.len(chrom) - - // Count selected features - selected_count := 0 - for i in 0 ..< n_cols { - if bit_array.get(chrom, i) { - selected_count += 1 - } - } - - if selected_count == 0 { - return nil - } - - // Create subset with only selected columns - subset := make([][]f64, n_rows) - for i in 0 ..< n_rows { - subset[i] = make([]f64, selected_count) - col_idx := 0 - for j in 0 ..< n_cols { - if bit_array.get(chrom, j) { - subset[i][col_idx] = X[i][j] - col_idx += 1 - } - } - } - - return subset -} - -// Fitness function for feature selection -get_fitness :: proc( - X: [][]f64, - y: []f64, - chrom: ^bit_array.Bit_Array, - random_seed: u64 = 0, -) -> f64 { - X_selected := get_columns(X, chrom) - if X_selected == nil { - return math.F64_MAX - } - defer { - for row in X_selected {delete(row)} - delete(X_selected) - } - - // Split data - X_train, X_test, y_train, y_test := train_test_split(X_selected, y, 0.2, random_seed) - defer { - delete(X_train) - delete(X_test) - delete(y_train) - delete(y_test) - } - - // Train model - beta := train_linear_regression(X_train, y_train) - defer delete(beta) - - // Predict on test set - predictions := predict(X_test, beta) - defer delete(predictions) - - // Return RMSE - return rmse(predictions, y_test) -} - -// Extract selected features from dataset based on chromosome -get_selected_features :: proc(dataset: Dataset, chrom: Chromosome) -> (X: [][]f64, y: []f64) { - n_rows := len(dataset) - n_features := bit_array.len(chrom) - - // Count selected features - selected_count := 0 - for i in 0 ..< n_features { - if bit_array.get(chrom, i) { - selected_count += 1 - } - } - - if selected_count == 0 { - return nil, nil - } - - // Allocate - X = make([][]f64, n_rows) - y = make([]f64, n_rows) - - // Extract - for i in 0 ..< n_rows { - X[i] = make([]f64, selected_count) - col_idx := 0 - for j in 0 ..< n_features { - if bit_array.get(chrom, j) { - X[i][col_idx] = dataset[i].features[j] - col_idx += 1 - } - } - y[i] = dataset[i].target - } - - return X, y -} - - -// Fitness for feature selection (returns RMSE) -fitness_feature_selection :: proc( - dataset: Dataset, - chrom: Chromosome, - random_seed: u64 = 0, -) -> f64 { - X, y := get_selected_features(dataset, chrom) - if X == nil { - return math.F64_MAX - } - defer { - for row in X {delete(row)} - delete(X) - delete(y) - } - - X_train, X_test, y_train, y_test := train_test_split(X, y, 0.2, random_seed) - defer { - delete(X_train) - delete(X_test) - delete(y_train) - delete(y_test) - } - - beta := train_linear_regression(X_train, y_train) - defer delete(beta) - - predictions := predict(X_test, beta) - defer delete(predictions) - - return rmse(predictions, y_test) + return X_train, X_test, y_train, y_test } diff --git a/src/main.odin b/src/main.odin index 5ba22f2..54c6d5d 100644 --- a/src/main.odin +++ b/src/main.odin @@ -1,595 +1,31 @@ package main -import "core:container/bit_array" -import "core:encoding/csv" import "core:fmt" -import "core:math" -import "core:math/rand" -import "core:os" -import "core:slice" -import "core:strconv" -import "core:strings" - -dataset: Dataset -items: [NUMBER_OF_ITEMS]Item -stats: [GENERATIONS]Data - -read_data :: proc(file: string) -> (res: [NUMBER_OF_ITEMS]Item, ok := true) { - data := string(os.read_entire_file(file) or_return) - defer delete(data) - - for line, idx in strings.split_lines(strings.trim_space(data))[1:] { - s := strings.split(line, ",") - res[idx] = {strconv.parse_int(s[1]) or_return, strconv.parse_int(s[2]) or_return} - } - return -} - -load_dataset :: proc(filename: string) -> (data: Dataset, ok := true) { - file_data := os.read_entire_file(filename) or_return - defer delete(file_data) - - r: csv.Reader - csv.reader_init_with_string(&r, string(file_data)) - defer csv.reader_destroy(&r) - - r.trim_leading_space = true - r.reuse_record = true - - idx := 0 - for { - record, err := csv.read(&r) - if err != nil {break} - if idx >= DATASET_ROWS {break} - - // Parse features (columns 0-99) - for i in 0 ..< NUMBER_OF_FEATURES { - data[idx].features[i] = strconv.parse_f64(record[i]) or_return - } - - // Parse target (column 100) - data[idx].target = strconv.parse_f64(record[NUMBER_OF_FEATURES]) or_return - - idx += 1 - } - - return data, idx == DATASET_ROWS -} - - -write_results :: proc(filename: string, stats: []Data) -> bool { - handle, err := os.open(filename, os.O_CREATE | os.O_WRONLY | os.O_TRUNC, 0o644) - if err != os.ERROR_NONE {return false} - defer os.close(handle) - - w: csv.Writer - csv.writer_init(&w, os.stream_from_handle(handle)) - - csv.write(&w, []string{"generation", "best", "worst", "mean"}) - - for stat, gen in stats { - csv.write( - &w, - []string { - fmt.tprintf("%d", gen), - fmt.tprintf("%d", stat.best), - fmt.tprintf("%d", stat.worst), - fmt.tprintf("%f", stat.mean), - }, - ) - } - - csv.writer_flush(&w) - return true -} - - -fitness :: proc(chrom: Chromosome) -> int { - tot_profit, tot_weight := 0, 0 - for idx in 0 ..< bit_array.len(chrom) { - if !bit_array.get(chrom, idx) {continue} - tot_profit += items[idx].profit - tot_weight += items[idx].weight - } - return tot_profit - 500 * max(tot_weight - CAPACITY, 0) -} - -fitness_rmse :: proc(chrom: Chromosome) -> f64 { - return fitness_feature_selection(dataset, chrom, RANDOM_SEED) -} - -create_random_chromosome :: proc(size: int = NUMBER_OF_ITEMS) -> Chromosome { - chrom := bit_array.create(size) - for i in 0 ..< size { - bit_array.set(chrom, i, rand.int_max(2) == 1) - } - return chrom -} - -copy_chromosome :: proc(src: Chromosome) -> Chromosome { - dest := bit_array.create(NUMBER_OF_ITEMS) - for i in 0 ..< NUMBER_OF_ITEMS { - bit_array.set(dest, i, bit_array.get(src, i)) - } - return dest -} - -generate_population :: proc() -> Population { - pop: Population - for i in 0 ..< POPULATION_SIZE { - pop[i] = create_random_chromosome() - } - return pop -} - -generate_population_features :: proc() -> Population { - pop: Population - for i in 0 ..< POPULATION_SIZE { - pop[i] = create_random_chromosome(NUMBER_OF_FEATURES) - } - return pop -} - -destroy_population :: proc(pop: ^Population) { - for chrom in pop { - bit_array.destroy(chrom) - } -} - -evaluate_population :: proc(pop: ^Population) -> [POPULATION_SIZE]int { - fitnesses: [POPULATION_SIZE]int - for chrom, i in pop { - fitnesses[i] = fitness(chrom) - } - return fitnesses -} - -evaluate_population_rmse :: proc(pop: ^Population) -> [POPULATION_SIZE]f64 { - fitnesses: [POPULATION_SIZE]f64 - for chrom, i in pop { - fitnesses[i] = fitness_rmse(chrom) - } - return fitnesses -} - -tournament_selection :: proc( - pop: ^Population, - fitnesses: []int, - k := TOURNAMENT_SIZE, -) -> Chromosome { - best_idx := rand.int_max(POPULATION_SIZE) - best_fitness := fitnesses[best_idx] - - for _ in 1 ..< k { - idx := rand.int_max(POPULATION_SIZE) - if fitnesses[idx] > best_fitness { - best_idx = idx - best_fitness = fitnesses[idx] - } - } - - return pop[best_idx] -} - -tournament_selection_rmse :: proc(pop: ^Population, fitnesses: []f64) -> Chromosome { - best_idx := rand.int_max(POPULATION_SIZE) - best_fitness := fitnesses[best_idx] - - for _ in 1 ..< TOURNAMENT_SIZE { - idx := rand.int_max(POPULATION_SIZE) - if fitnesses[idx] < best_fitness { // Lower is better - best_idx = idx - best_fitness = fitnesses[idx] - } - } - - return pop[best_idx] -} - - -roulette_selection :: proc(pop: ^Population, fitnesses: []int) -> Chromosome { - total_fitness := 0 - for f in fitnesses { - total_fitness += max(f, 0) - } - - if total_fitness == 0 { - return pop[rand.int_max(POPULATION_SIZE)] - } - - spin := rand.float32() * f32(total_fitness) - running_sum := 0 - - for fitness, idx in fitnesses { - running_sum += max(fitness, 0) - if f32(running_sum) >= spin { - return pop[idx] - } - } - - return pop[POPULATION_SIZE - 1] -} - -single_point_crossover :: proc(parent1, parent2: Chromosome) -> (child1, child2: Chromosome) { - point := rand.int_max(NUMBER_OF_ITEMS) - child1 = bit_array.create(NUMBER_OF_ITEMS) - child2 = bit_array.create(NUMBER_OF_ITEMS) - - for i in 0 ..< NUMBER_OF_ITEMS { - if i < point { - bit_array.set(child1, i, bit_array.get(parent1, i)) - bit_array.set(child2, i, bit_array.get(parent2, i)) - } else { - bit_array.set(child1, i, bit_array.get(parent2, i)) - bit_array.set(child2, i, bit_array.get(parent1, i)) - } - } - - return -} - -two_point_crossover :: proc(parent1, parent2: Chromosome) -> (child1, child2: Chromosome) { - point1 := rand.int_max(NUMBER_OF_ITEMS) - point2 := rand.int_max(NUMBER_OF_ITEMS) - if point1 > point2 { - point1, point2 = point2, point1 - } - - child1 = bit_array.create(NUMBER_OF_ITEMS) - child2 = bit_array.create(NUMBER_OF_ITEMS) - - for i in 0 ..< NUMBER_OF_ITEMS { - in_swap := i >= point1 && i < point2 - if in_swap { - bit_array.set(child1, i, bit_array.get(parent2, i)) - bit_array.set(child2, i, bit_array.get(parent1, i)) - } else { - bit_array.set(child1, i, bit_array.get(parent1, i)) - bit_array.set(child2, i, bit_array.get(parent2, i)) - } - } - - return -} - -uniform_crossover :: proc( - parent1, parent2: Chromosome, - prob := f32(0.5), -) -> ( - child1, child2: Chromosome, -) { - child1 = bit_array.create(NUMBER_OF_ITEMS) - child2 = bit_array.create(NUMBER_OF_ITEMS) - - for i in 0 ..< NUMBER_OF_ITEMS { - if rand.float32() < prob { - bit_array.set(child1, i, bit_array.get(parent1, i)) - bit_array.set(child2, i, bit_array.get(parent2, i)) - } else { - bit_array.set(child1, i, bit_array.get(parent2, i)) - bit_array.set(child2, i, bit_array.get(parent1, i)) - } - } - - return -} - -bit_flip_mutation :: proc(chrom: Chromosome, mutation_rate := MUTATION_RATE) { - for i in 0 ..< NUMBER_OF_ITEMS { - if rand.float32() < f32(mutation_rate) { - bit_array.set(chrom, i, !bit_array.get(chrom, i)) - } - } -} - -swap_mutation :: proc(chrom: Chromosome) { - idx1 := rand.int_max(NUMBER_OF_ITEMS) - idx2 := rand.int_max(NUMBER_OF_ITEMS) - bit1 := bit_array.get(chrom, idx1) - bit2 := bit_array.get(chrom, idx2) - bit_array.set(chrom, idx1, bit2) - bit_array.set(chrom, idx2, bit1) -} - -inversion_mutation :: proc(chrom: Chromosome) { - point1 := rand.int_max(NUMBER_OF_ITEMS) - point2 := rand.int_max(NUMBER_OF_ITEMS) - if point1 > point2 { - point1, point2 = point2, point1 - } - - for i in 0 ..< (point2 - point1) / 2 { - idx1 := point1 + i - idx2 := point2 - i - 1 - bit1 := bit_array.get(chrom, idx1) - bit2 := bit_array.get(chrom, idx2) - bit_array.set(chrom, idx1, bit2) - bit_array.set(chrom, idx2, bit1) - } -} - -create_offspring :: proc(population: ^Population, fitnesses: []int) -> Population { - offspring: Population - - for i := 0; i < POPULATION_SIZE; i += 2 { - parent1 := tournament_selection(population, fitnesses) - parent2 := tournament_selection(population, fitnesses) - - child1, child2: Chromosome - if rand.float32() < CROSSOVER_RATE { - child1, child2 = single_point_crossover(parent1, parent2) - } else { - child1 = copy_chromosome(parent1) - child2 = copy_chromosome(parent2) - } - - bit_flip_mutation(child1) - bit_flip_mutation(child2) - - offspring[i] = child1 - if i + 1 < POPULATION_SIZE { - offspring[i + 1] = child2 - } else { - bit_array.destroy(child2) - } - } - - return offspring -} - -elitism_survivor_selection :: proc( - parents: ^Population, - offspring: ^Population, - parent_fitnesses: []int, - offspring_fitnesses: []int, -) -> Population { - Index_Fitness :: struct { - idx: int, - fitness: int, - is_parent: bool, - } - - combined := make([]Index_Fitness, POPULATION_SIZE * 2) - defer delete(combined) - - for i in 0 ..< POPULATION_SIZE { - combined[i] = {i, parent_fitnesses[i], true} - combined[POPULATION_SIZE + i] = {i, offspring_fitnesses[i], false} - } - - slice.sort_by(combined[:], proc(a, b: Index_Fitness) -> bool { - return a.fitness > b.fitness - }) - - survivors: Population - for i in 0 ..< POPULATION_SIZE { - entry := combined[i] - source := parents[entry.idx] if entry.is_parent else offspring[entry.idx] - survivors[i] = copy_chromosome(source) - } - - return survivors -} - -compute_stats :: proc(fitnesses: []int) -> Data { - best, worst := math.min(int), math.max(int) - sum := 0 - for f in fitnesses { - best = max(best, f) - worst = min(worst, f) - sum += f - } - return {best, worst, f32(sum) / f32(len(fitnesses))} -} - -compute_stats_rmse :: proc(fitnesses: []f64) -> [3]f64 { - best := math.F64_MAX - worst := -math.F64_MAX - sum := 0.0 - - for f in fitnesses { - best = min(best, f) // Lower is better - worst = max(worst, f) // Higher is worse - sum += f - } - - mean := sum / f64(len(fitnesses)) - return {best, mean, worst} -} - -run_ga :: proc() { - population := generate_population() - defer destroy_population(&population) - - best_fitness := math.min(int) - best_generation := 0 - best_chromosome: Chromosome - - for gen in 0 ..< GENERATIONS { - fitnesses := evaluate_population(&population) - stats[gen] = compute_stats(fitnesses[:]) - - for f, i in fitnesses { - if f <= best_fitness {continue} - best_fitness = f - best_generation = gen - bit_array.destroy(best_chromosome) - best_chromosome = copy_chromosome(population[i]) - tot_profit, tot_weight := 0, 0 - for idx in 0 ..< bit_array.len(best_chromosome) { - if !bit_array.get(best_chromosome, idx) {continue} - tot_profit += items[idx].profit - tot_weight += items[idx].weight - } - fmt.printfln( - "Gen %d: Fitness=%d, Profit=%d, Weight=%d/%d", - gen, - f, - tot_profit, - tot_weight, - CAPACITY, - ) - } - - offspring := create_offspring(&population, fitnesses[:]) - offspring_fitnesses := evaluate_population(&offspring) - - new_population := elitism_survivor_selection( - &population, - &offspring, - fitnesses[:], - offspring_fitnesses[:], - ) - - destroy_population(&population) - destroy_population(&offspring) - - population = new_population - } - - fmt.printfln("\nFinal Best: Fitness=%d (Generation %d)", best_fitness, best_generation) - fmt.println("this solution is the following bit-string:") - for i in 0 ..< best_chromosome.length { - b := bit_array.get(best_chromosome, i) - fmt.print(i32(b)) - } - fmt.println() - write_results(OUTPUT_FILE, stats[:]) - fmt.println("successfully wrote data to", OUTPUT_FILE) -} - -run_baseline :: proc() -> f64 { - all_features := bit_array.create(NUMBER_OF_FEATURES) - defer bit_array.destroy(all_features) - - // Select all features - for i in 0 ..< NUMBER_OF_FEATURES { - bit_array.set(all_features, i, true) - } - - return fitness_feature_selection(dataset, all_features, RANDOM_SEED) -} - -create_offspring_rmse :: proc(pop: ^Population, fitnesses: []f64) -> Population { - offspring: Population - - for i := 0; i < POPULATION_SIZE; i += 2 { - parent1 := tournament_selection_rmse(pop, fitnesses) - parent2 := tournament_selection_rmse(pop, fitnesses) - - child1, child2 := two_point_crossover(parent1, parent2) - - swap_mutation(child1) - if i + 1 < POPULATION_SIZE { - swap_mutation(child2) - } - - offspring[i] = child1 - if i + 1 < POPULATION_SIZE { - offspring[i + 1] = child2 - } else { - bit_array.destroy(child2) - } - } - - return offspring -} - -write_results_rmse :: proc(filename: string, stats: [][3]f64) -> bool { - handle, err := os.open(filename, os.O_CREATE | os.O_WRONLY | os.O_TRUNC, 0o644) - if err != os.ERROR_NONE {return false} - defer os.close(handle) - - w: csv.Writer - csv.writer_init(&w, os.stream_from_handle(handle)) - - csv.write(&w, []string{"Generation", "Best", "Mean", "Worst"}) - - for stat, gen in stats { - csv.write( - &w, - []string { - fmt.tprintf("%d", gen), - fmt.tprintf("%.6f", stat[0]), - fmt.tprintf("%.6f", stat[1]), - fmt.tprintf("%.6f", stat[2]), - }, - ) - } - - csv.writer_flush(&w) - return true -} - -run_ga_feature_selection :: proc() { - population := generate_population_features() - defer destroy_population(&population) - - generation_stats := make([dynamic][3]f64, 0, GENERATIONS) - defer delete(generation_stats) - - for gen in 0 ..< GENERATIONS { - fitnesses := evaluate_population_rmse(&population) - stats := compute_stats_rmse(fitnesses[:]) - append(&generation_stats, stats) - - fmt.printfln("Gen %d: Best=%.4f Mean=%.4f Worst=%.4f", gen, stats[0], stats[1], stats[2]) - - // Create offspring - offspring := create_offspring_rmse(&population, fitnesses[:]) - defer destroy_population(&offspring) - - // Replace population - destroy_population(&population) - population = offspring - } - - // Write results - write_results_rmse(OUTPUT_FILE, generation_stats[:]) - - // Final best solution - final_fitnesses := evaluate_population_rmse(&population) - best_idx := 0 - best_rmse := final_fitnesses[0] - for f, i in final_fitnesses { - if f < best_rmse { - best_rmse = f - best_idx = i - } - } - - // Count selected features - selected_count := 0 - for i in 0 ..< NUMBER_OF_FEATURES { - if bit_array.get(population[best_idx], i) { - selected_count += 1 - } - } - - fmt.printfln("\nBest solution: %d features selected, RMSE=%.4f", selected_count, best_rmse) -} main :: proc() { - // Load knapsack data - knapsack_data, ok := read_data(DATA_FILE) - if !ok { - fmt.eprintln("Failed to load knapsack data") - return + // Choose problem + problem_type := "feature_selection" // or "knapsack" + + problem: Problem + switch problem_type { + case "knapsack": + if !load_knapsack_data() { + fmt.eprintln("Failed to load knapsack data") + return + } + problem = knapsack_problem() + + case "feature_selection": + if !load_feature_data() { + fmt.eprintln("Failed to load feature data") + return + } + problem = feature_selection_problem() + + fmt.println("=== Baseline (All Features) ===") + baseline := run_baseline() + fmt.printfln("RMSE: %.4f\n", baseline) } - items = knapsack_data - // Load feature selection dataset - feature_data, dataset_ok := load_dataset(DATASET_FILE) - if !dataset_ok { - fmt.eprintln("Failed to load dataset from:", DATASET_FILE) - return - } - dataset = feature_data - - fmt.println("=== Baseline (All Features) ===") - baseline_rmse := run_baseline() - fmt.printfln("RMSE with all features: %.4f\n", baseline_rmse) - - fmt.println("=== GA Feature Selection ===") - run_ga_feature_selection() + run_ga(problem) }