diff --git a/exercise5/handout_pthreads/wave_2d_pthread.c b/exercise5/handout_pthreads/wave_2d_pthread.c index 044e3f3..4154c8f 100644 --- a/exercise5/handout_pthreads/wave_2d_pthread.c +++ b/exercise5/handout_pthreads/wave_2d_pthread.c @@ -85,10 +85,13 @@ void domain_finalize(void) { // Integration formula void time_step(int_t thread_id) { // BEGIN: T3 - for (int_t i = 0; i < N; i += 1) + // let threads work each on a row for better cache locality (huge speed-up) + int_t start_row = thread_id * (N / n_threads); + // let last thread handle remainder + int_t end_row = (thread_id == n_threads - 1) ? N : (thread_id + 1) * (N / n_threads); + for (int_t i = start_row; i < end_row; i++) for (int_t j = 0; j < N; j++) - if (j % n_threads == thread_id) - U_nxt(i, j) = -U_prv(i, j) + 2.0 * U(i, j) + (dt * dt * c * c) / (h * h) * (U(i - 1, j) + U(i + 1, j) + U(i, j - 1) + U(i, j + 1) - 4.0 * U(i, j)); + U_nxt(i, j) = -U_prv(i, j) + 2.0 * U(i, j) + (dt * dt * c * c) / (h * h) * (U(i - 1, j) + U(i + 1, j) + U(i, j - 1) + U(i, j + 1) - 4.0 * U(i, j)); // END: T3 }