holy what is going on

This commit is contained in:
2026-02-01 13:56:30 +01:00
parent 6beb7a6311
commit b44807ff31
5 changed files with 344 additions and 46 deletions

View File

Can't render this file because it is too large.

40
src/common.odin Normal file
View File

@@ -0,0 +1,40 @@
package main
import "core:container/bit_array"
// Knapsack
OUTPUT_FILE :: "output/data.csv"
DATA_FILE :: "res/knapPI_12_500_1000_82.csv"
NUMBER_OF_ITEMS :: 500
CAPACITY :: 280785
Item :: struct {
profit, weight: int,
}
// Feature selection
DATASET_FILE :: "res/dataset.csv"
NUMBER_OF_FEATURES :: 100
DATASET_ROWS :: 1994
Dataset_Record :: struct {
features: [NUMBER_OF_FEATURES]f64,
target: f64,
}
Dataset :: #soa[DATASET_ROWS]Dataset_Record
// GA
Chromosome :: ^bit_array.Bit_Array
Population :: [POPULATION_SIZE]Chromosome
POPULATION_SIZE :: 100
GENERATIONS :: 100
TOURNAMENT_SIZE :: 3
CROSSOVER_RATE :: 0.8
MUTATION_RATE :: 0.01
RANDOM_SEED :: u64(42)
// stats
Data :: struct {
best, worst: int,
mean: f32,
}

View File

@@ -1,4 +1,4 @@
package utils
package main
import "core:container/bit_array"
import "core:math"
@@ -137,6 +137,12 @@ train_test_split :: proc(
test_count := int(f64(n) * test_size)
train_count := n - test_count
if n == 0 || len(X[0]) == 0 {
return nil, nil, nil, nil
}
n_features := len(X[0])
// Create shuffled indices
indices := make([]int, n)
defer delete(indices)
@@ -144,9 +150,10 @@ train_test_split :: proc(
indices[i] = i
}
// Shuffle using seed
// Shuffle
rng := rand.create(random_seed)
rand.shuffle(indices[:], rand.default_random_generator(&rng))
context.random_generator = rand.default_random_generator(&rng)
rand.shuffle(indices[:])
// Allocate splits
X_train = make([][]f64, train_count)
@@ -154,23 +161,26 @@ train_test_split :: proc(
y_train = make([]f64, train_count)
y_test = make([]f64, test_count)
// Copy training data
// Copy training data (DEEP COPY)
for i in 0 ..< train_count {
idx := indices[i]
X_train[i] = X[idx]
X_train[i] = make([]f64, n_features)
copy(X_train[i], X[idx])
y_train[i] = y[idx]
}
// Copy test data
// Copy test data (DEEP COPY)
for i in 0 ..< test_count {
idx := indices[train_count + i]
X_test[i] = X[idx]
X_test[i] = make([]f64, n_features)
copy(X_test[i], X[idx])
y_test[i] = y[idx]
}
return
}
// Extract columns based on bit_array chromosome
get_columns :: proc(X: [][]f64, chrom: ^bit_array.Bit_Array) -> [][]f64 {
n_rows := len(X)
@@ -240,3 +250,74 @@ get_fitness :: proc(
// Return RMSE
return rmse(predictions, y_test)
}
// Extract selected features from dataset based on chromosome
get_selected_features :: proc(dataset: Dataset, chrom: Chromosome) -> (X: [][]f64, y: []f64) {
n_rows := len(dataset)
n_features := bit_array.len(chrom)
// Count selected features
selected_count := 0
for i in 0 ..< n_features {
if bit_array.get(chrom, i) {
selected_count += 1
}
}
if selected_count == 0 {
return nil, nil
}
// Allocate
X = make([][]f64, n_rows)
y = make([]f64, n_rows)
// Extract
for i in 0 ..< n_rows {
X[i] = make([]f64, selected_count)
col_idx := 0
for j in 0 ..< n_features {
if bit_array.get(chrom, j) {
X[i][col_idx] = dataset[i].features[j]
col_idx += 1
}
}
y[i] = dataset[i].target
}
return X, y
}
// Fitness for feature selection (returns RMSE)
fitness_feature_selection :: proc(
dataset: Dataset,
chrom: Chromosome,
random_seed: u64 = 0,
) -> f64 {
X, y := get_selected_features(dataset, chrom)
if X == nil {
return math.F64_MAX
}
defer {
for row in X {delete(row)}
delete(X)
delete(y)
}
X_train, X_test, y_train, y_test := train_test_split(X, y, 0.2, random_seed)
defer {
delete(X_train)
delete(X_test)
delete(y_train)
delete(y_test)
}
beta := train_linear_regression(X_train, y_train)
defer delete(beta)
predictions := predict(X_test, beta)
defer delete(predictions)
return rmse(predictions, y_test)
}

View File

@@ -1,4 +1,4 @@
package utils
package main
import "core:fmt"
import "core:math"

View File

@@ -10,30 +10,8 @@ import "core:slice"
import "core:strconv"
import "core:strings"
OUTPUT_FILE :: "output/data.csv"
DATA_FILE :: "res/knapPI_12_500_1000_82.csv"
NUMBER_OF_ITEMS :: 500
CAPACITY :: 280785
POPULATION_SIZE :: 100
GENERATIONS :: 100
TOURNAMENT_SIZE :: 3
CROSSOVER_RATE :: 0.8
MUTATION_RATE :: 0.01
Item :: struct {
profit, weight: int,
}
Chromosome :: ^bit_array.Bit_Array
Population :: [POPULATION_SIZE]Chromosome
dataset: Dataset
items: [NUMBER_OF_ITEMS]Item
Data :: struct {
best, worst: int,
mean: f32,
}
stats: [GENERATIONS]Data
read_data :: proc(file: string) -> (res: [NUMBER_OF_ITEMS]Item, ok := true) {
@@ -47,6 +25,38 @@ read_data :: proc(file: string) -> (res: [NUMBER_OF_ITEMS]Item, ok := true) {
return
}
load_dataset :: proc(filename: string) -> (data: Dataset, ok := true) {
file_data := os.read_entire_file(filename) or_return
defer delete(file_data)
r: csv.Reader
csv.reader_init_with_string(&r, string(file_data))
defer csv.reader_destroy(&r)
r.trim_leading_space = true
r.reuse_record = true
idx := 0
for {
record, err := csv.read(&r)
if err != nil {break}
if idx >= DATASET_ROWS {break}
// Parse features (columns 0-99)
for i in 0 ..< NUMBER_OF_FEATURES {
data[idx].features[i] = strconv.parse_f64(record[i]) or_return
}
// Parse target (column 100)
data[idx].target = strconv.parse_f64(record[NUMBER_OF_FEATURES]) or_return
idx += 1
}
return data, idx == DATASET_ROWS
}
write_results :: proc(filename: string, stats: []Data) -> bool {
handle, err := os.open(filename, os.O_CREATE | os.O_WRONLY | os.O_TRUNC, 0o644)
if err != os.ERROR_NONE {return false}
@@ -84,9 +94,13 @@ fitness :: proc(chrom: Chromosome) -> int {
return tot_profit - 500 * max(tot_weight - CAPACITY, 0)
}
create_random_chromosome :: proc() -> Chromosome {
chrom := bit_array.create(NUMBER_OF_ITEMS)
for i in 0 ..< NUMBER_OF_ITEMS {
fitness_rmse :: proc(chrom: Chromosome) -> f64 {
return fitness_feature_selection(dataset, chrom, RANDOM_SEED)
}
create_random_chromosome :: proc(size: int = NUMBER_OF_ITEMS) -> Chromosome {
chrom := bit_array.create(size)
for i in 0 ..< size {
bit_array.set(chrom, i, rand.int_max(2) == 1)
}
return chrom
@@ -108,6 +122,14 @@ generate_population :: proc() -> Population {
return pop
}
generate_population_features :: proc() -> Population {
pop: Population
for i in 0 ..< POPULATION_SIZE {
pop[i] = create_random_chromosome(NUMBER_OF_FEATURES)
}
return pop
}
destroy_population :: proc(pop: ^Population) {
for chrom in pop {
bit_array.destroy(chrom)
@@ -122,6 +144,14 @@ evaluate_population :: proc(pop: ^Population) -> [POPULATION_SIZE]int {
return fitnesses
}
evaluate_population_rmse :: proc(pop: ^Population) -> [POPULATION_SIZE]f64 {
fitnesses: [POPULATION_SIZE]f64
for chrom, i in pop {
fitnesses[i] = fitness_rmse(chrom)
}
return fitnesses
}
tournament_selection :: proc(
pop: ^Population,
fitnesses: []int,
@@ -141,6 +171,22 @@ tournament_selection :: proc(
return pop[best_idx]
}
tournament_selection_rmse :: proc(pop: ^Population, fitnesses: []f64) -> Chromosome {
best_idx := rand.int_max(POPULATION_SIZE)
best_fitness := fitnesses[best_idx]
for _ in 1 ..< TOURNAMENT_SIZE {
idx := rand.int_max(POPULATION_SIZE)
if fitnesses[idx] < best_fitness { // Lower is better
best_idx = idx
best_fitness = fitnesses[idx]
}
}
return pop[best_idx]
}
roulette_selection :: proc(pop: ^Population, fitnesses: []int) -> Chromosome {
total_fitness := 0
for f in fitnesses {
@@ -336,6 +382,21 @@ compute_stats :: proc(fitnesses: []int) -> Data {
return {best, worst, f32(sum) / f32(len(fitnesses))}
}
compute_stats_rmse :: proc(fitnesses: []f64) -> [3]f64 {
best := math.F64_MAX
worst := -math.F64_MAX
sum := 0.0
for f in fitnesses {
best = min(best, f) // Lower is better
worst = max(worst, f) // Higher is worse
sum += f
}
mean := sum / f64(len(fitnesses))
return {best, mean, worst}
}
run_ga :: proc() {
population := generate_population()
defer destroy_population(&population)
@@ -397,22 +458,138 @@ run_ga :: proc() {
fmt.println("successfully wrote data to", OUTPUT_FILE)
}
run_baseline :: proc() -> f64 {
all_features := bit_array.create(NUMBER_OF_FEATURES)
defer bit_array.destroy(all_features)
// Select all features
for i in 0 ..< NUMBER_OF_FEATURES {
bit_array.set(all_features, i, true)
}
return fitness_feature_selection(dataset, all_features, RANDOM_SEED)
}
create_offspring_rmse :: proc(pop: ^Population, fitnesses: []f64) -> Population {
offspring: Population
for i := 0; i < POPULATION_SIZE; i += 2 {
parent1 := tournament_selection_rmse(pop, fitnesses)
parent2 := tournament_selection_rmse(pop, fitnesses)
child1, child2 := two_point_crossover(parent1, parent2)
swap_mutation(child1)
if i + 1 < POPULATION_SIZE {
swap_mutation(child2)
}
offspring[i] = child1
if i + 1 < POPULATION_SIZE {
offspring[i + 1] = child2
} else {
bit_array.destroy(child2)
}
}
return offspring
}
write_results_rmse :: proc(filename: string, stats: [][3]f64) -> bool {
handle, err := os.open(filename, os.O_CREATE | os.O_WRONLY | os.O_TRUNC, 0o644)
if err != os.ERROR_NONE {return false}
defer os.close(handle)
w: csv.Writer
csv.writer_init(&w, os.stream_from_handle(handle))
csv.write(&w, []string{"Generation", "Best", "Mean", "Worst"})
for stat, gen in stats {
csv.write(
&w,
[]string {
fmt.tprintf("%d", gen),
fmt.tprintf("%.6f", stat[0]),
fmt.tprintf("%.6f", stat[1]),
fmt.tprintf("%.6f", stat[2]),
},
)
}
csv.writer_flush(&w)
return true
}
run_ga_feature_selection :: proc() {
population := generate_population_features()
defer destroy_population(&population)
generation_stats := make([dynamic][3]f64, 0, GENERATIONS)
defer delete(generation_stats)
for gen in 0 ..< GENERATIONS {
fitnesses := evaluate_population_rmse(&population)
stats := compute_stats_rmse(fitnesses[:])
append(&generation_stats, stats)
fmt.printfln("Gen %d: Best=%.4f Mean=%.4f Worst=%.4f", gen, stats[0], stats[1], stats[2])
// Create offspring
offspring := create_offspring_rmse(&population, fitnesses[:])
defer destroy_population(&offspring)
// Replace population
destroy_population(&population)
population = offspring
}
// Write results
write_results_rmse(OUTPUT_FILE, generation_stats[:])
// Final best solution
final_fitnesses := evaluate_population_rmse(&population)
best_idx := 0
best_rmse := final_fitnesses[0]
for f, i in final_fitnesses {
if f < best_rmse {
best_rmse = f
best_idx = i
}
}
// Count selected features
selected_count := 0
for i in 0 ..< NUMBER_OF_FEATURES {
if bit_array.get(population[best_idx], i) {
selected_count += 1
}
}
fmt.printfln("\nBest solution: %d features selected, RMSE=%.4f", selected_count, best_rmse)
}
main :: proc() {
data, ok := read_data(DATA_FILE)
// Load knapsack data
knapsack_data, ok := read_data(DATA_FILE)
if !ok {
fmt.eprintln("Failed to read data from", DATA_FILE)
fmt.eprintln("Failed to load knapsack data")
return
}
items = data
items = knapsack_data
fmt.println("Running Genetic Algorithm for Binary Knapsack Problem")
fmt.printfln(
"Items: %d, Capacity: %d, Population: %d, Generations: %d\n",
NUMBER_OF_ITEMS,
CAPACITY,
POPULATION_SIZE,
GENERATIONS,
)
// Load feature selection dataset
feature_data, dataset_ok := load_dataset(DATASET_FILE)
if !dataset_ok {
fmt.eprintln("Failed to load dataset from:", DATASET_FILE)
return
}
dataset = feature_data
run_ga()
fmt.println("=== Baseline (All Features) ===")
baseline_rmse := run_baseline()
fmt.printfln("RMSE with all features: %.4f\n", baseline_rmse)
fmt.println("=== GA Feature Selection ===")
run_ga_feature_selection()
}