memoize linear regression
down to 0.7 sec runtime
This commit is contained in:
@@ -26,6 +26,8 @@ Feature_State :: struct {
|
||||
y_train: []f64,
|
||||
y_test: []f64,
|
||||
selected_indices_buffer: [NUMBER_OF_FEATURES]int,
|
||||
XtX_cache: [][]f64,
|
||||
Xty_cache: []f64,
|
||||
}
|
||||
|
||||
feature_state: Feature_State
|
||||
@@ -55,7 +57,6 @@ load_feature_data :: proc() -> bool {
|
||||
idx += 1
|
||||
}
|
||||
|
||||
split_dataset()
|
||||
return idx == DATASET_ROWS
|
||||
}
|
||||
|
||||
@@ -97,22 +98,60 @@ split_dataset :: proc() {
|
||||
}
|
||||
}
|
||||
|
||||
precompute :: proc() {
|
||||
n := len(feature_state.X_train)
|
||||
m := NUMBER_OF_FEATURES
|
||||
|
||||
feature_state.XtX_cache = make([][]f64, m)
|
||||
for i in 0 ..< m {
|
||||
feature_state.XtX_cache[i] = make([]f64, m)
|
||||
for j in 0 ..< m {
|
||||
sum := 0.0
|
||||
for k in 0 ..< n {
|
||||
sum += feature_state.X_train[k][i] * feature_state.X_train[k][j]
|
||||
}
|
||||
feature_state.XtX_cache[i][j] = sum
|
||||
}
|
||||
}
|
||||
|
||||
feature_state.Xty_cache = make([]f64, m)
|
||||
for i in 0 ..< m {
|
||||
sum := 0.0
|
||||
for k in 0 ..< n {
|
||||
sum += feature_state.X_train[k][i] * feature_state.y_train[k]
|
||||
}
|
||||
feature_state.Xty_cache[i] = sum
|
||||
}
|
||||
}
|
||||
|
||||
fitness_features :: proc(chrom: Chromosome) -> f64 {
|
||||
temp_arena: runtime.Arena
|
||||
defer runtime.arena_destroy(&temp_arena)
|
||||
temp_allocator := runtime.arena_allocator(&temp_arena)
|
||||
// Get selected feature indices
|
||||
selected := make([dynamic]int, 0, NUMBER_OF_FEATURES)
|
||||
defer delete(selected)
|
||||
|
||||
context.allocator = temp_allocator
|
||||
for i in 0 ..< bit_array.len(chrom) {
|
||||
if bit_array.get(chrom, i) {
|
||||
append(&selected, i)
|
||||
}
|
||||
}
|
||||
|
||||
X_train_selected := select_features(feature_state.X_train, chrom)
|
||||
X_test_selected := select_features(feature_state.X_test, chrom)
|
||||
|
||||
if X_train_selected == nil {
|
||||
if len(selected) == 0 {
|
||||
return math.F64_MAX
|
||||
}
|
||||
|
||||
beta := train_linear_regression(X_train_selected, feature_state.y_train)
|
||||
// Train using cached matrices - MUCH faster
|
||||
beta := train_linear_regression_cached(selected[:], feature_state.y_train)
|
||||
defer delete(beta)
|
||||
|
||||
// Still need to select test features for prediction
|
||||
X_test_selected := select_features(feature_state.X_test, chrom)
|
||||
defer {
|
||||
for row in X_test_selected {delete(row)}
|
||||
delete(X_test_selected)
|
||||
}
|
||||
|
||||
predictions := predict(X_test_selected, beta)
|
||||
defer delete(predictions)
|
||||
|
||||
return rmse(predictions, feature_state.y_test)
|
||||
}
|
||||
|
||||
@@ -59,40 +59,30 @@ solve_linear_system :: proc(A: [][]f64, b: []f64) -> []f64 {
|
||||
return x
|
||||
}
|
||||
|
||||
// Linear regression
|
||||
train_linear_regression :: proc(X: [][]f64, y: []f64) -> []f64 {
|
||||
n := len(X)
|
||||
m := len(X[0])
|
||||
train_linear_regression_cached :: proc(selected_features: []int, y: []f64) -> []f64 {
|
||||
m := len(selected_features)
|
||||
|
||||
// X^T X
|
||||
XtX := make([][]f64, m)
|
||||
for i in 0 ..< m {
|
||||
XtX[i] = make([]f64, m)
|
||||
for j in 0 ..< m {
|
||||
sum := 0.0
|
||||
for k in 0 ..< n {
|
||||
sum += X[k][i] * X[k][j]
|
||||
}
|
||||
XtX[i][j] = sum
|
||||
}
|
||||
}
|
||||
defer {
|
||||
for row in XtX {delete(row)}
|
||||
delete(XtX)
|
||||
}
|
||||
// Extract submatrix from cache
|
||||
XtX := make([][]f64, m)
|
||||
defer {
|
||||
for row in XtX {delete(row)}
|
||||
delete(XtX)
|
||||
}
|
||||
|
||||
// X^T y
|
||||
Xty := make([]f64, m)
|
||||
defer delete(Xty)
|
||||
for i in 0 ..< m {
|
||||
sum := 0.0
|
||||
for k in 0 ..< n {
|
||||
sum += X[k][i] * y[k]
|
||||
}
|
||||
Xty[i] = sum
|
||||
}
|
||||
for i in 0 ..< m {
|
||||
XtX[i] = make([]f64, m)
|
||||
for j in 0 ..< m {
|
||||
XtX[i][j] = feature_state.XtX_cache[selected_features[i]][selected_features[j]]
|
||||
}
|
||||
}
|
||||
|
||||
return solve_linear_system(XtX, Xty)
|
||||
Xty := make([]f64, m)
|
||||
defer delete(Xty)
|
||||
for i in 0 ..< m {
|
||||
Xty[i] = feature_state.Xty_cache[selected_features[i]]
|
||||
}
|
||||
|
||||
return solve_linear_system(XtX, Xty)
|
||||
}
|
||||
|
||||
predict :: proc(X: [][]f64, beta: []f64) -> []f64 {
|
||||
|
||||
@@ -93,42 +93,6 @@ test_solve_larger_system :: proc(t: ^testing.T) {
|
||||
}
|
||||
}
|
||||
|
||||
@(test)
|
||||
test_train_linear_regression :: proc(t: ^testing.T) {
|
||||
X := [][]f64{{1.0}, {2.0}, {3.0}, {4.0}, {5.0}}
|
||||
y := []f64{5.0, 7.0, 9.0, 11.0, 13.0}
|
||||
|
||||
beta := train_linear_regression(X, y)
|
||||
defer delete(beta)
|
||||
|
||||
fmt.printfln("beta = %v", beta)
|
||||
|
||||
// For y = 2x + 3 with no intercept term:
|
||||
// Best fit through origin: minimize Σ(y - βx)²
|
||||
// β = Σ(xy) / Σ(x²) = (1*5 + 2*7 + 3*9 + 4*11 + 5*13) / (1 + 4 + 9 + 16 + 25)
|
||||
// = (5 + 14 + 27 + 44 + 65) / 55 = 155 / 55 ≈ 2.818
|
||||
testing.expect(t, math.abs(beta[0] - 2.818) < 0.01, "slope should be ~2.818")
|
||||
}
|
||||
|
||||
@(test)
|
||||
test_train_with_intercept :: proc(t: ^testing.T) {
|
||||
// Dataset with intercept column: y = 2x + 3
|
||||
X := [][]f64 {
|
||||
{1.0, 1.0}, // [intercept, x]
|
||||
{1.0, 2.0},
|
||||
{1.0, 3.0},
|
||||
{1.0, 4.0},
|
||||
{1.0, 5.0},
|
||||
}
|
||||
y := []f64{5.0, 7.0, 9.0, 11.0, 13.0}
|
||||
|
||||
beta := train_linear_regression(X, y)
|
||||
defer delete(beta)
|
||||
|
||||
testing.expect(t, math.abs(beta[0] - 3.0) < 1e-6, "intercept should be 3.0")
|
||||
testing.expect(t, math.abs(beta[1] - 2.0) < 1e-6, "slope should be 2.0")
|
||||
}
|
||||
|
||||
@(test)
|
||||
test_predict :: proc(t: ^testing.T) {
|
||||
X := [][]f64{{1.0, 1.0}, {1.0, 2.0}, {1.0, 3.0}}
|
||||
@@ -163,27 +127,3 @@ test_rmse_perfect_fit :: proc(t: ^testing.T) {
|
||||
|
||||
testing.expect(t, error < 1e-10, "RMSE should be 0 for perfect fit")
|
||||
}
|
||||
|
||||
@(test)
|
||||
test_full_pipeline :: proc(t: ^testing.T) {
|
||||
// Train on y = 3x + 2
|
||||
X_train := [][]f64{{1.0, 1.0}, {1.0, 2.0}, {1.0, 3.0}, {1.0, 4.0}}
|
||||
y_train := []f64{5.0, 8.0, 11.0, 14.0}
|
||||
|
||||
// Test data
|
||||
X_test := [][]f64{{1.0, 5.0}, {1.0, 6.0}}
|
||||
y_test := []f64{17.0, 20.0}
|
||||
|
||||
// Train
|
||||
beta := train_linear_regression(X_train, y_train)
|
||||
defer delete(beta)
|
||||
|
||||
// Predict
|
||||
predictions := predict(X_test, beta)
|
||||
defer delete(predictions)
|
||||
|
||||
// Evaluate
|
||||
error := rmse(predictions, y_test)
|
||||
|
||||
testing.expect(t, error < 1e-6, "Should have near-zero error on linear data")
|
||||
}
|
||||
|
||||
@@ -25,6 +25,8 @@ main :: proc() {
|
||||
fmt.eprintln("Failed to load feature data")
|
||||
return
|
||||
}
|
||||
split_dataset()
|
||||
precompute()
|
||||
problem = feature_selection_problem()
|
||||
|
||||
fmt.println("=== Baseline (All Features) ===")
|
||||
|
||||
Reference in New Issue
Block a user