From 6c008810ade15f5ef8deca2e875c2d5cf1e20fc2 Mon Sep 17 00:00:00 2001
From: Fredrik Robertsen <fredrikr79@pm.me>
Date: Fri, 6 Feb 2026 09:17:14 +0100
Subject: [PATCH] memoize linear regression

down to 0.7 sec runtime
---
 src/feature_selection.odin | 59 ++++++++++++++++++++++++++++++-------
 src/linreg.odin            | 52 +++++++++++++--------------------
 src/linreg_test.odin       | 60 --------------------------------------
 src/main.odin              |  2 ++
 4 files changed, 72 insertions(+), 101 deletions(-)

diff --git a/src/feature_selection.odin b/src/feature_selection.odin
index 79cdf6a..b867de8 100644
--- a/src/feature_selection.odin
+++ b/src/feature_selection.odin
@@ -26,6 +26,8 @@ Feature_State :: struct {
 	y_train: []f64,
 	y_test: []f64,
 	selected_indices_buffer: [NUMBER_OF_FEATURES]int,
+	XtX_cache: [][]f64,
+    Xty_cache: []f64,
 }
 
 feature_state: Feature_State
@@ -55,7 +57,6 @@ load_feature_data :: proc() -> bool {
 		idx += 1
 	}
 
-	split_dataset()
 	return idx == DATASET_ROWS
 }
 
@@ -97,22 +98,60 @@ split_dataset :: proc() {
     }
 }
 
+precompute :: proc() {
+    n := len(feature_state.X_train)
+    m := NUMBER_OF_FEATURES
+
+    feature_state.XtX_cache = make([][]f64, m)
+    for i in 0 ..< m {
+        feature_state.XtX_cache[i] = make([]f64, m)
+        for j in 0 ..< m {
+            sum := 0.0
+            for k in 0 ..< n {
+                sum += feature_state.X_train[k][i] * feature_state.X_train[k][j]
+            }
+            feature_state.XtX_cache[i][j] = sum
+        }
+    }
+
+    feature_state.Xty_cache = make([]f64, m)
+    for i in 0 ..< m {
+        sum := 0.0
+        for k in 0 ..< n {
+            sum += feature_state.X_train[k][i] * feature_state.y_train[k]
+        }
+        feature_state.Xty_cache[i] = sum
+    }
+}
+
 fitness_features :: proc(chrom: Chromosome) -> f64 {
-    temp_arena: runtime.Arena
-    defer runtime.arena_destroy(&temp_arena)
-    temp_allocator := runtime.arena_allocator(&temp_arena)
+    // Get selected feature indices
+    selected := make([dynamic]int, 0, NUMBER_OF_FEATURES)
+    defer delete(selected)
 
-    context.allocator = temp_allocator
+    for i in 0 ..< bit_array.len(chrom) {
+        if bit_array.get(chrom, i) {
+            append(&selected, i)
+        }
+    }
 
-    X_train_selected := select_features(feature_state.X_train, chrom)
-    X_test_selected := select_features(feature_state.X_test, chrom)
-
-    if X_train_selected == nil {
+    if len(selected) == 0 {
         return math.F64_MAX
     }
 
-    beta := train_linear_regression(X_train_selected, feature_state.y_train)
+    // Train using cached matrices - MUCH faster
+    beta := train_linear_regression_cached(selected[:], feature_state.y_train)
+    defer delete(beta)
+
+    // Still need to select test features for prediction
+    X_test_selected := select_features(feature_state.X_test, chrom)
+    defer {
+        for row in X_test_selected {delete(row)}
+        delete(X_test_selected)
+    }
+
     predictions := predict(X_test_selected, beta)
+    defer delete(predictions)
 
     return rmse(predictions, feature_state.y_test)
 }
diff --git a/src/linreg.odin b/src/linreg.odin
index 6697227..0c66bee 100644
--- a/src/linreg.odin
+++ b/src/linreg.odin
@@ -59,40 +59,30 @@ solve_linear_system :: proc(A: [][]f64, b: []f64) -> []f64 {
 	return x
 }
 
-// Linear regression
-train_linear_regression :: proc(X: [][]f64, y: []f64) -> []f64 {
-	n := len(X)
-	m := len(X[0])
+train_linear_regression_cached :: proc(selected_features: []int, y: []f64) -> []f64 {
+    m := len(selected_features)
 
-	// X^T X
-	XtX := make([][]f64, m)
-	for i in 0 ..< m {
-		XtX[i] = make([]f64, m)
-		for j in 0 ..< m {
-			sum := 0.0
-			for k in 0 ..< n {
-				sum += X[k][i] * X[k][j]
-			}
-			XtX[i][j] = sum
-		}
-	}
-	defer {
-		for row in XtX {delete(row)}
-		delete(XtX)
-	}
+    // Extract submatrix from cache
+    XtX := make([][]f64, m)
+    defer {
+        for row in XtX {delete(row)}
+        delete(XtX)
+    }
 
-	// X^T y
-	Xty := make([]f64, m)
-	defer delete(Xty)
-	for i in 0 ..< m {
-		sum := 0.0
-		for k in 0 ..< n {
-			sum += X[k][i] * y[k]
-		}
-		Xty[i] = sum
-	}
+    for i in 0 ..< m {
+        XtX[i] = make([]f64, m)
+        for j in 0 ..< m {
+            XtX[i][j] = feature_state.XtX_cache[selected_features[i]][selected_features[j]]
+        }
+    }
 
-	return solve_linear_system(XtX, Xty)
+    Xty := make([]f64, m)
+    defer delete(Xty)
+    for i in 0 ..< m {
+        Xty[i] = feature_state.Xty_cache[selected_features[i]]
+    }
+
+    return solve_linear_system(XtX, Xty)
 }
 
 predict :: proc(X: [][]f64, beta: []f64) -> []f64 {
diff --git a/src/linreg_test.odin b/src/linreg_test.odin
index f8bd090..4e76ea4 100644
--- a/src/linreg_test.odin
+++ b/src/linreg_test.odin
@@ -93,42 +93,6 @@ test_solve_larger_system :: proc(t: ^testing.T) {
 	}
 }
 
-@(test)
-test_train_linear_regression :: proc(t: ^testing.T) {
-	X := [][]f64{{1.0}, {2.0}, {3.0}, {4.0}, {5.0}}
-	y := []f64{5.0, 7.0, 9.0, 11.0, 13.0}
-
-	beta := train_linear_regression(X, y)
-	defer delete(beta)
-
-	fmt.printfln("beta = %v", beta)
-
-	// For y = 2x + 3 with no intercept term:
-	// Best fit through origin: minimize Σ(y - βx)²
-	// β = Σ(xy) / Σ(x²) = (1*5 + 2*7 + 3*9 + 4*11 + 5*13) / (1 + 4 + 9 + 16 + 25)
-	//   = (5 + 14 + 27 + 44 + 65) / 55 = 155 / 55 ≈ 2.818
-	testing.expect(t, math.abs(beta[0] - 2.818) < 0.01, "slope should be ~2.818")
-}
-
-@(test)
-test_train_with_intercept :: proc(t: ^testing.T) {
-	// Dataset with intercept column: y = 2x + 3
-	X := [][]f64 {
-		{1.0, 1.0}, // [intercept, x]
-		{1.0, 2.0},
-		{1.0, 3.0},
-		{1.0, 4.0},
-		{1.0, 5.0},
-	}
-	y := []f64{5.0, 7.0, 9.0, 11.0, 13.0}
-
-	beta := train_linear_regression(X, y)
-	defer delete(beta)
-
-	testing.expect(t, math.abs(beta[0] - 3.0) < 1e-6, "intercept should be 3.0")
-	testing.expect(t, math.abs(beta[1] - 2.0) < 1e-6, "slope should be 2.0")
-}
-
 @(test)
 test_predict :: proc(t: ^testing.T) {
 	X := [][]f64{{1.0, 1.0}, {1.0, 2.0}, {1.0, 3.0}}
@@ -163,27 +127,3 @@ test_rmse_perfect_fit :: proc(t: ^testing.T) {
 
 	testing.expect(t, error < 1e-10, "RMSE should be 0 for perfect fit")
 }
-
-@(test)
-test_full_pipeline :: proc(t: ^testing.T) {
-	// Train on y = 3x + 2
-	X_train := [][]f64{{1.0, 1.0}, {1.0, 2.0}, {1.0, 3.0}, {1.0, 4.0}}
-	y_train := []f64{5.0, 8.0, 11.0, 14.0}
-
-	// Test data
-	X_test := [][]f64{{1.0, 5.0}, {1.0, 6.0}}
-	y_test := []f64{17.0, 20.0}
-
-	// Train
-	beta := train_linear_regression(X_train, y_train)
-	defer delete(beta)
-
-	// Predict
-	predictions := predict(X_test, beta)
-	defer delete(predictions)
-
-	// Evaluate
-	error := rmse(predictions, y_test)
-
-	testing.expect(t, error < 1e-6, "Should have near-zero error on linear data")
-}
diff --git a/src/main.odin b/src/main.odin
index 42362ca..017ea5d 100644
--- a/src/main.odin
+++ b/src/main.odin
@@ -25,6 +25,8 @@ main :: proc() {
 			fmt.eprintln("Failed to load feature data")
 			return
 		}
+		split_dataset()
+		precompute()
 		problem = feature_selection_problem()
 
 		fmt.println("=== Baseline (All Features) ===")