From 6c008810ade15f5ef8deca2e875c2d5cf1e20fc2 Mon Sep 17 00:00:00 2001 From: Fredrik Robertsen Date: Fri, 6 Feb 2026 09:17:14 +0100 Subject: [PATCH] memoize linear regression down to 0.7 sec runtime --- src/feature_selection.odin | 59 ++++++++++++++++++++++++++++++------- src/linreg.odin | 52 +++++++++++++-------------------- src/linreg_test.odin | 60 -------------------------------------- src/main.odin | 2 ++ 4 files changed, 72 insertions(+), 101 deletions(-) diff --git a/src/feature_selection.odin b/src/feature_selection.odin index 79cdf6a..b867de8 100644 --- a/src/feature_selection.odin +++ b/src/feature_selection.odin @@ -26,6 +26,8 @@ Feature_State :: struct { y_train: []f64, y_test: []f64, selected_indices_buffer: [NUMBER_OF_FEATURES]int, + XtX_cache: [][]f64, + Xty_cache: []f64, } feature_state: Feature_State @@ -55,7 +57,6 @@ load_feature_data :: proc() -> bool { idx += 1 } - split_dataset() return idx == DATASET_ROWS } @@ -97,22 +98,60 @@ split_dataset :: proc() { } } +precompute :: proc() { + n := len(feature_state.X_train) + m := NUMBER_OF_FEATURES + + feature_state.XtX_cache = make([][]f64, m) + for i in 0 ..< m { + feature_state.XtX_cache[i] = make([]f64, m) + for j in 0 ..< m { + sum := 0.0 + for k in 0 ..< n { + sum += feature_state.X_train[k][i] * feature_state.X_train[k][j] + } + feature_state.XtX_cache[i][j] = sum + } + } + + feature_state.Xty_cache = make([]f64, m) + for i in 0 ..< m { + sum := 0.0 + for k in 0 ..< n { + sum += feature_state.X_train[k][i] * feature_state.y_train[k] + } + feature_state.Xty_cache[i] = sum + } +} + fitness_features :: proc(chrom: Chromosome) -> f64 { - temp_arena: runtime.Arena - defer runtime.arena_destroy(&temp_arena) - temp_allocator := runtime.arena_allocator(&temp_arena) + // Get selected feature indices + selected := make([dynamic]int, 0, NUMBER_OF_FEATURES) + defer delete(selected) - context.allocator = temp_allocator + for i in 0 ..< bit_array.len(chrom) { + if bit_array.get(chrom, i) { + append(&selected, i) + } + } - X_train_selected := select_features(feature_state.X_train, chrom) - X_test_selected := select_features(feature_state.X_test, chrom) - - if X_train_selected == nil { + if len(selected) == 0 { return math.F64_MAX } - beta := train_linear_regression(X_train_selected, feature_state.y_train) + // Train using cached matrices - MUCH faster + beta := train_linear_regression_cached(selected[:], feature_state.y_train) + defer delete(beta) + + // Still need to select test features for prediction + X_test_selected := select_features(feature_state.X_test, chrom) + defer { + for row in X_test_selected {delete(row)} + delete(X_test_selected) + } + predictions := predict(X_test_selected, beta) + defer delete(predictions) return rmse(predictions, feature_state.y_test) } diff --git a/src/linreg.odin b/src/linreg.odin index 6697227..0c66bee 100644 --- a/src/linreg.odin +++ b/src/linreg.odin @@ -59,40 +59,30 @@ solve_linear_system :: proc(A: [][]f64, b: []f64) -> []f64 { return x } -// Linear regression -train_linear_regression :: proc(X: [][]f64, y: []f64) -> []f64 { - n := len(X) - m := len(X[0]) +train_linear_regression_cached :: proc(selected_features: []int, y: []f64) -> []f64 { + m := len(selected_features) - // X^T X - XtX := make([][]f64, m) - for i in 0 ..< m { - XtX[i] = make([]f64, m) - for j in 0 ..< m { - sum := 0.0 - for k in 0 ..< n { - sum += X[k][i] * X[k][j] - } - XtX[i][j] = sum - } - } - defer { - for row in XtX {delete(row)} - delete(XtX) - } + // Extract submatrix from cache + XtX := make([][]f64, m) + defer { + for row in XtX {delete(row)} + delete(XtX) + } - // X^T y - Xty := make([]f64, m) - defer delete(Xty) - for i in 0 ..< m { - sum := 0.0 - for k in 0 ..< n { - sum += X[k][i] * y[k] - } - Xty[i] = sum - } + for i in 0 ..< m { + XtX[i] = make([]f64, m) + for j in 0 ..< m { + XtX[i][j] = feature_state.XtX_cache[selected_features[i]][selected_features[j]] + } + } - return solve_linear_system(XtX, Xty) + Xty := make([]f64, m) + defer delete(Xty) + for i in 0 ..< m { + Xty[i] = feature_state.Xty_cache[selected_features[i]] + } + + return solve_linear_system(XtX, Xty) } predict :: proc(X: [][]f64, beta: []f64) -> []f64 { diff --git a/src/linreg_test.odin b/src/linreg_test.odin index f8bd090..4e76ea4 100644 --- a/src/linreg_test.odin +++ b/src/linreg_test.odin @@ -93,42 +93,6 @@ test_solve_larger_system :: proc(t: ^testing.T) { } } -@(test) -test_train_linear_regression :: proc(t: ^testing.T) { - X := [][]f64{{1.0}, {2.0}, {3.0}, {4.0}, {5.0}} - y := []f64{5.0, 7.0, 9.0, 11.0, 13.0} - - beta := train_linear_regression(X, y) - defer delete(beta) - - fmt.printfln("beta = %v", beta) - - // For y = 2x + 3 with no intercept term: - // Best fit through origin: minimize Σ(y - βx)² - // β = Σ(xy) / Σ(x²) = (1*5 + 2*7 + 3*9 + 4*11 + 5*13) / (1 + 4 + 9 + 16 + 25) - // = (5 + 14 + 27 + 44 + 65) / 55 = 155 / 55 ≈ 2.818 - testing.expect(t, math.abs(beta[0] - 2.818) < 0.01, "slope should be ~2.818") -} - -@(test) -test_train_with_intercept :: proc(t: ^testing.T) { - // Dataset with intercept column: y = 2x + 3 - X := [][]f64 { - {1.0, 1.0}, // [intercept, x] - {1.0, 2.0}, - {1.0, 3.0}, - {1.0, 4.0}, - {1.0, 5.0}, - } - y := []f64{5.0, 7.0, 9.0, 11.0, 13.0} - - beta := train_linear_regression(X, y) - defer delete(beta) - - testing.expect(t, math.abs(beta[0] - 3.0) < 1e-6, "intercept should be 3.0") - testing.expect(t, math.abs(beta[1] - 2.0) < 1e-6, "slope should be 2.0") -} - @(test) test_predict :: proc(t: ^testing.T) { X := [][]f64{{1.0, 1.0}, {1.0, 2.0}, {1.0, 3.0}} @@ -163,27 +127,3 @@ test_rmse_perfect_fit :: proc(t: ^testing.T) { testing.expect(t, error < 1e-10, "RMSE should be 0 for perfect fit") } - -@(test) -test_full_pipeline :: proc(t: ^testing.T) { - // Train on y = 3x + 2 - X_train := [][]f64{{1.0, 1.0}, {1.0, 2.0}, {1.0, 3.0}, {1.0, 4.0}} - y_train := []f64{5.0, 8.0, 11.0, 14.0} - - // Test data - X_test := [][]f64{{1.0, 5.0}, {1.0, 6.0}} - y_test := []f64{17.0, 20.0} - - // Train - beta := train_linear_regression(X_train, y_train) - defer delete(beta) - - // Predict - predictions := predict(X_test, beta) - defer delete(predictions) - - // Evaluate - error := rmse(predictions, y_test) - - testing.expect(t, error < 1e-6, "Should have near-zero error on linear data") -} diff --git a/src/main.odin b/src/main.odin index 42362ca..017ea5d 100644 --- a/src/main.odin +++ b/src/main.odin @@ -25,6 +25,8 @@ main :: proc() { fmt.eprintln("Failed to load feature data") return } + split_dataset() + precompute() problem = feature_selection_problem() fmt.println("=== Baseline (All Features) ===")