diff --git a/exercise5/handout_openmp/.gitignore b/exercise5/handout_openmp/.gitignore new file mode 100644 index 0000000..1050c9f --- /dev/null +++ b/exercise5/handout_openmp/.gitignore @@ -0,0 +1,5 @@ +data/ +data_sequential/ +images/ +parallel +sequential diff --git a/exercise5/handout_openmp/Makefile b/exercise5/handout_openmp/Makefile new file mode 100644 index 0000000..aac2022 --- /dev/null +++ b/exercise5/handout_openmp/Makefile @@ -0,0 +1,33 @@ +CC=gcc +CFLAGS+= -std=c99 -fopenmp +LDLIBS+= -lm +SEQUENTIAL_SRC_FILES=wave_2d_sequential.c +PARALLEL_SRC_FILES=wave_2d_workshare.c +BARRIER_SRC_FILES=wave_2d_barrier.c +IMAGES=$(shell find data -type f | sed s/\\.dat/.png/g | sed s/data/images/g ) +.PHONY: all clean dirs plot movie +all: dirs ${SEQUENTIAL_SRC_FILES} ${PARALLEL_SRC_FILES} ${BARRIER_SRC_FILES} +dirs: + mkdir -p data images +sequential: ${SEQUENTIAL_SRC_FILES} + $(CC) $^ $(CFLAGS) -o $@ $(LDLIBS) +parallel: ${PARALLEL_SRC_FILES} + $(CC) $^ $(CFLAGS) -o $@ $(LDLIBS) +barrier: ${BARRIER_SRC_FILES} + $(CC) $^ $(CFLAGS) -o $@ $(LDLIBS) +plot: ${IMAGES} +images/%.png: data/%.dat + ./plot_image.sh $< +movie: ${IMAGES} + ffmpeg -y -an -i images/%5d.png -vcodec libx264 -pix_fmt yuv420p -profile:v baseline -level 3 -r 12 wave.mp4 +check: dirs sequential parallel + mkdir -p data_sequential + ./sequential + cp -rf ./data/* ./data_sequential + ./parallel + ./compare.sh + rm -rf data_sequential +clean: + -rm -fr ${TARGETS} data images wave.mp4 + -rm sequential + -rm parallel diff --git a/exercise5/handout_openmp/compare.sh b/exercise5/handout_openmp/compare.sh new file mode 100755 index 0000000..5cd69a7 --- /dev/null +++ b/exercise5/handout_openmp/compare.sh @@ -0,0 +1,52 @@ +#! /usr/bin/env bash + + +GREEN=$(tput setaf 2) +RED=$(tput setaf 1) +RESET=$(tput sgr0) + +DIR1="data" +DIR2="data_sequential" + +if [ ! -d "$DIR1" ]; then + echo "Directory $DIR1 does not exist." + exit 1 +fi + +if [ ! -d "$DIR2" ]; then + echo "Directory $DIR2 does not exist." + exit 1 +fi + +found_difference=1 +for file in "$DIR1"/*.dat; do + # Extract the file name (basename) + filename=$(basename "$file") + + # Check if the corresponding file exists in DIR2 + if [ -f "$DIR2/$filename" ]; then + # Compare the two files using diff + diff_output=$(diff "$file" "$DIR2/$filename") + + if [ -n "$diff_output" ]; then + echo "$diff_output" + found_difference=0 + fi + else + echo "File $filename does not exist in $DIR2" + fi +done + +if [ $found_difference -eq 1 ]; then + echo + echo + echo "${GREEN}The sequential and parallel version produced mathcing output!${RESET}" + echo + echo +else + echo + echo + echo "${RED}There were mismatches between the sequential and the parallel output.${RESET}" + echo + echo +fi diff --git a/exercise5/handout_openmp/plot_image.sh b/exercise5/handout_openmp/plot_image.sh new file mode 100755 index 0000000..e36e6af --- /dev/null +++ b/exercise5/handout_openmp/plot_image.sh @@ -0,0 +1,10 @@ +#! /usr/bin/env bash +SIZE=1024 +DATAFILE=$1 +IMAGEFILE=`echo $1 | sed s/dat$/png/ | sed s/data/images/` +cat < +#include +#include +#include +#include + +#include + +// Option to change numerical precision +typedef int64_t int_t; +typedef double real_t; + +// Simulation parameters: size, step count, and how often to save the state +const int_t + N = 1024, + max_iteration = 4000, + snapshot_freq = 20; + +// Wave equation parameters, time step is derived from the space step +const real_t + c = 1.0, + h = 1.0; +real_t + dt; + +// Buffers for three time steps, indexed with 2 ghost points for the boundary +real_t + *buffers[3] = { NULL, NULL, NULL }; +#define U_prv(i,j) buffers[0][((i)+1)*(N+2)+(j)+1] +#define U(i,j) buffers[1][((i)+1)*(N+2)+(j)+1] +#define U_nxt(i,j) buffers[2][((i)+1)*(N+2)+(j)+1] + +// Function definitions follow below main +void domain_initialize ( void ); +void domain_save ( int_t step ); +void domain_finalize ( void ); +void main_loop ( void ); +void time_step ( int_t thread_id ); +void boundary_condition ( int_t thread_id ); + + +// Main time integration loop +int +main () +{ + // Set up the initial state of the domain + domain_initialize(); + + double t_start, t_end; + t_start = omp_get_wtime(); + + #pragma omp parallel + main_loop(); + + t_end = omp_get_wtime(); + printf ( "%lf seconds elapsed with %d threads\n", + t_end-t_start, + omp_get_max_threads() + ); + + // Clean up and shut down + domain_finalize(); + exit ( EXIT_SUCCESS ); +} + + +void +main_loop ( void ) +{ + int_t thread_id = omp_get_thread_num(); + + // Go through each time step + for ( int_t iteration=0; iteration<=max_iteration; iteration++ ) + { + // Master thread saves the state of the computation + #pragma omp master + if ( (iteration % snapshot_freq)==0 ) + { + domain_save ( iteration / snapshot_freq ); + printf ( "Iteration %ld out of %ld\n", iteration, max_iteration ); + } + + // Make sure all saving is finished before we begin + #pragma omp barrier + + // Run the time step in parallel + boundary_condition ( thread_id ); + #pragma omp barrier + time_step ( thread_id ); + + #pragma omp barrier + + // Master thread rotates the time step buffers + #pragma omp master + { + real_t *temp = buffers[0]; + buffers[0] = buffers[1]; + buffers[1] = buffers[2]; + buffers[2] = temp; + } + } +} + + +// Integration formula +void +time_step ( int_t thread_id ) +{ + int_t n_threads = omp_get_num_threads(); + for ( int_t i=thread_id; i +#include +#include +#include +#include +#include + + +// Convert 'struct timeval' into seconds in double prec. floating point +#define WALLTIME(t) ((double)(t).tv_sec + 1e-6 * (double)(t).tv_usec) + +// Option to change numerical precision +typedef int64_t int_t; +typedef double real_t; + +// Simulation parameters: size, step count, and how often to save the state +int_t + N = 1024, + M = 1024, + max_iteration = 4000, + snapshot_freq = 20; + +// Wave equation parameters, time step is derived from the space step +const real_t + c = 1.0, + dx = 1.0, + dy = 1.0; +real_t + dt; + +// Buffers for three time steps, indexed with 2 ghost points for the boundary +real_t + *buffers[3] = { NULL, NULL, NULL }; + +#define U_prv(i,j) buffers[0][((i)+1)*(N+2)+(j)+1] +#define U(i,j) buffers[1][((i)+1)*(N+2)+(j)+1] +#define U_nxt(i,j) buffers[2][((i)+1)*(N+2)+(j)+1] + + +// Rotate the time step buffers. +void move_buffer_window ( void ) +{ + real_t *temp = buffers[0]; + buffers[0] = buffers[1]; + buffers[1] = buffers[2]; + buffers[2] = temp; +} + + +// Save the present time step in a numbered file under 'data/' +void domain_save ( int_t step ) +{ + char filename[256]; + sprintf ( filename, "data/%.5ld.dat", step ); + FILE *out = fopen ( filename, "wb" ); + for ( int_t i=0; i +#include +#include +#include +#include + +// TASK: T6 +// Include the OpenMP library +// BEGIN: T6 +; +// END: T6 + +// Option to change numerical precision +typedef int64_t int_t; +typedef double real_t; + +// Simulation parameters: size, step count, and how often to save the state +const int_t + N = 1024, + max_iteration = 4000, + snapshot_freq = 20; + +// Wave equation parameters, time step is derived from the space step +const real_t + c = 1.0, + h = 1.0; +real_t + dt; + +// Buffers for three time steps, indexed with 2 ghost points for the boundary +real_t + *buffers[3] = { NULL, NULL, NULL }; + +#define U_prv(i, j) buffers[0][((i) + 1) * (N + 2) + (j) + 1] +#define U(i, j) buffers[1][((i) + 1) * (N + 2) + (j) + 1] +#define U_nxt(i, j) buffers[2][((i) + 1) * (N + 2) + (j) + 1] + +// Rotate the time step buffers. +void move_buffer_window(void) { + real_t *temp = buffers[0]; + buffers[0] = buffers[1]; + buffers[1] = buffers[2]; + buffers[2] = temp; +} + +// Set up our three buffers, and fill two with an initial perturbation +void domain_initialize(void) { + buffers[0] = malloc((N + 2) * (N + 2) * sizeof(real_t)); + buffers[1] = malloc((N + 2) * (N + 2) * sizeof(real_t)); + buffers[2] = malloc((N + 2) * (N + 2) * sizeof(real_t)); + + for (int_t i = 0; i < N; i++) { + for (int_t j = 0; j < N; j++) { + real_t delta = sqrt(((i - N / 2) * (i - N / 2) + (j - N / 2) * (j - N / 2)) / (real_t)N); + U_prv(i, j) = U(i, j) = exp(-4.0 * delta * delta); + } + } + + // Set the time step for 2D case + dt = (h * h) / (4.0 * c * c); +} + +// Get rid of all the memory allocations +void domain_finalize(void) { + free(buffers[0]); + free(buffers[1]); + free(buffers[2]); +} + +// TASK: T7 +// Integration formula +void time_step(void) { + // BEGIN: T7 + for (int_t i = 0; i < N; i++) + for (int_t j = 0; j < N; j++) + U_nxt(i, j) = -U_prv(i, j) + 2.0 * U(i, j) + (dt * dt * c * c) / (h * h) * (U(i - 1, j) + U(i + 1, j) + U(i, j - 1) + U(i, j + 1) - 4.0 * U(i, j)); + // END: T7 +} + +// Neumann (reflective) boundary condition +void boundary_condition(void) { + for (int_t i = 0; i < N; i++) { + U(i, -1) = U(i, 1); + U(i, N) = U(i, N - 2); + } + for (int_t j = 0; j < N; j++) { + U(-1, j) = U(1, j); + U(N, j) = U(N - 2, j); + } +} + +// Save the present time step in a numbered file under 'data/' +void domain_save(int_t step) { + char filename[256]; + sprintf(filename, "data/%.5ld.dat", step); + FILE *out = fopen(filename, "wb"); + for (int_t i = 0; i < N; i++) + fwrite(&U(i, 0), sizeof(real_t), N, out); + fclose(out); +} + +// Main time integration. +void simulate(void) { + // Go through each time step + for (int_t iteration = 0; iteration <= max_iteration; iteration++) { + if ((iteration % snapshot_freq) == 0) { + domain_save(iteration / snapshot_freq); + } + + // Derive step t+1 from steps t and t-1 + boundary_condition(); + time_step(); + + // Rotate the time step buffers + move_buffer_window(); + } +} + +int main(void) { + // Set up the initial state of the domain + domain_initialize(); + + double t_start, t_end; + t_start = omp_get_wtime(); + // Go through each time step + simulate(); + t_end = omp_get_wtime(); + printf("%lf seconds elapsed with %d threads\n", + t_end - t_start, + omp_get_max_threads()); + + // Clean up and shut down + domain_finalize(); + exit(EXIT_SUCCESS); +} diff --git a/exercise5/handout_pthreads/.gitignore b/exercise5/handout_pthreads/.gitignore new file mode 100644 index 0000000..1050c9f --- /dev/null +++ b/exercise5/handout_pthreads/.gitignore @@ -0,0 +1,5 @@ +data/ +data_sequential/ +images/ +parallel +sequential diff --git a/exercise5/handout_pthreads/Makefile b/exercise5/handout_pthreads/Makefile new file mode 100644 index 0000000..58add22 --- /dev/null +++ b/exercise5/handout_pthreads/Makefile @@ -0,0 +1,36 @@ +CC=gcc +CFLAGS+= -std=c99 -pthread +LDLIBS+= -lm +SEQUENTIAL_SRC_FILES=wave_2d_sequential.c +PARALLEL_SRC_FILES=wave_2d_pthread.c +IMAGES=$(shell find data -type f | sed s/\\.dat/.png/g | sed s/data/images/g ) +.PHONY: all clean dirs plot movie +all: dirs ${SEQUENTIAL_SRC_FILES} ${PARALLEL_SRC_FILES} +dirs: + mkdir -p data images +sequential: ${SEQUENTIAL_SRC_FILES} + $(CC) $^ $(CFLAGS) -o $@ $(LDLIBS) +parallel: ${PARALLEL_SRC_FILES} + $(CC) $^ $(CFLAGS) -o $@ $(LDLIBS) +plot: ${IMAGES} +images/%.png: data/%.dat + ./plot_image.sh $< +movie: ${IMAGES} + ffmpeg -y -an -i images/%5d.png -vcodec libx264 -pix_fmt yuv420p -profile:v baseline -level 3 -r 12 wave.mp4 +check: dirs sequential parallel + mkdir -p data_sequential + ./sequential + cp -rf ./data/* ./data_sequential + ./parallel 1 + ./compare.sh + rm ./data/* + ./parallel 4 + ./compare.sh + rm ./data/* + ./parallel 13 + ./compare.sh + rm -rf data_sequential +clean: + -rm -fr ${TARGETS} data images wave.mp4 + -rm sequential + -rm parallel diff --git a/exercise5/handout_pthreads/compare.sh b/exercise5/handout_pthreads/compare.sh new file mode 100755 index 0000000..5cd69a7 --- /dev/null +++ b/exercise5/handout_pthreads/compare.sh @@ -0,0 +1,52 @@ +#! /usr/bin/env bash + + +GREEN=$(tput setaf 2) +RED=$(tput setaf 1) +RESET=$(tput sgr0) + +DIR1="data" +DIR2="data_sequential" + +if [ ! -d "$DIR1" ]; then + echo "Directory $DIR1 does not exist." + exit 1 +fi + +if [ ! -d "$DIR2" ]; then + echo "Directory $DIR2 does not exist." + exit 1 +fi + +found_difference=1 +for file in "$DIR1"/*.dat; do + # Extract the file name (basename) + filename=$(basename "$file") + + # Check if the corresponding file exists in DIR2 + if [ -f "$DIR2/$filename" ]; then + # Compare the two files using diff + diff_output=$(diff "$file" "$DIR2/$filename") + + if [ -n "$diff_output" ]; then + echo "$diff_output" + found_difference=0 + fi + else + echo "File $filename does not exist in $DIR2" + fi +done + +if [ $found_difference -eq 1 ]; then + echo + echo + echo "${GREEN}The sequential and parallel version produced mathcing output!${RESET}" + echo + echo +else + echo + echo + echo "${RED}There were mismatches between the sequential and the parallel output.${RESET}" + echo + echo +fi diff --git a/exercise5/handout_pthreads/plot_image.sh b/exercise5/handout_pthreads/plot_image.sh new file mode 100755 index 0000000..e36e6af --- /dev/null +++ b/exercise5/handout_pthreads/plot_image.sh @@ -0,0 +1,10 @@ +#! /usr/bin/env bash +SIZE=1024 +DATAFILE=$1 +IMAGEFILE=`echo $1 | sed s/dat$/png/ | sed s/data/images/` +cat < +#include +#include +#include +#include +#include +#include + +// TASK: T1a +// Include the pthreads library +// BEGIN: T1a +; +// END: T1a + +// Option to change numerical precision +typedef int64_t int_t; +typedef double real_t; + +// TASK: T1b +// Pthread management +// BEGIN: T1b +int_t n_threads = 1; +// END: T1b + +// Performance measurement +struct timeval t_start, t_end; +#define WALLTIME(t) ((double)(t).tv_sec + 1e-6 * (double)(t).tv_usec) + +// Simulation parameters: size, step count, and how often to save the state +const int_t + N = 1024, + max_iteration = 4000, + snapshot_freq = 20; + +// Wave equation parameters, time step is derived from the space step +const real_t + c = 1.0, + h = 1.0; +real_t + dt; + +// Buffers for three time steps, indexed with 2 ghost points for the boundary +real_t + *buffers[3] = { NULL, NULL, NULL }; + +#define U_prv(i, j) buffers[0][((i) + 1) * (N + 2) + (j) + 1] +#define U(i, j) buffers[1][((i) + 1) * (N + 2) + (j) + 1] +#define U_nxt(i, j) buffers[2][((i) + 1) * (N + 2) + (j) + 1] + +// Rotate the time step buffers. +void move_buffer_window(void) { + real_t *temp = buffers[0]; + buffers[0] = buffers[1]; + buffers[1] = buffers[2]; + buffers[2] = temp; +} + +// Set up our three buffers, and fill two with an initial perturbation +void domain_initialize(void) { + buffers[0] = malloc((N + 2) * (N + 2) * sizeof(real_t)); + buffers[1] = malloc((N + 2) * (N + 2) * sizeof(real_t)); + buffers[2] = malloc((N + 2) * (N + 2) * sizeof(real_t)); + + for (int_t i = 0; i < N; i++) { + for (int_t j = 0; j < N; j++) { + real_t delta = sqrt(((i - N / 2) * (i - N / 2) + (j - N / 2) * (j - N / 2)) / (real_t)N); + U_prv(i, j) = U(i, j) = exp(-4.0 * delta * delta); + } + } + + // Set the time step + dt = (h * h) / (4.0 * c * c); +} + +// Get rid of all the memory allocations +void domain_finalize(void) { + free(buffers[0]); + free(buffers[1]); + free(buffers[2]); +} + +// TASK: T3 +// Integration formula +void time_step(int_t thread_id) { + // BEGIN: T3 + for (int_t i = 0; i < N; i += 1) + for (int_t j = 0; j < N; j++) + U_nxt(i, j) = -U_prv(i, j) + 2.0 * U(i, j) + (dt * dt * c * c) / (h * h) * (U(i - 1, j) + U(i + 1, j) + U(i, j - 1) + U(i, j + 1) - 4.0 * U(i, j)); + // END: T3 +} + +// TASK: T4 +// Neumann (reflective) boundary condition +void boundary_condition(int_t thread_id) { + // BEGIN: T4 + for (int_t i = 0; i < N; i += 1) { + U(i, -1) = U(i, 1); + U(i, N) = U(i, N - 2); + } + for (int_t j = 0; j < N; j += 1) { + U(-1, j) = U(1, j); + U(N, j) = U(N - 2, j); + } + // END: T4 +} + +// Save the present time step in a numbered file under 'data/' +void domain_save(int_t step) { + char filename[256]; + sprintf(filename, "data/%.5ld.dat", step); + FILE *out = fopen(filename, "wb"); + for (int_t i = 0; i < N; i++) + fwrite(&U(i, 0), sizeof(real_t), N, out); + fclose(out); +} + +// TASK: T5 +// Main loop +void *simulate(void *id) { + // BEGIN: T5 + // Go through each time step + for (int_t iteration = 0; iteration <= max_iteration; iteration++) { + if ((iteration % snapshot_freq) == 0) { + domain_save(iteration / snapshot_freq); + } + + // Derive step t+1 from steps t and t-1 + boundary_condition(0); + time_step(0); + + // Rotate the time step buffers + move_buffer_window(); + } + // END: T5 +} + +// Main time integration loop +int main(int argc, char **argv) { + // Number of threads is an optional argument, sanity check its value + if (argc > 1) { + n_threads = strtol(argv[1], NULL, 10); + if (errno == EINVAL) + fprintf(stderr, "'%s' is not a valid thread count\n", argv[1]); + if (n_threads < 1) { + fprintf(stderr, "Number of threads must be >0\n"); + exit(EXIT_FAILURE); + } + } + + // TASK: T1c + // Initialise pthreads + // BEGIN: T1c + ; + // END: T1b + + // Set up the initial state of the domain + domain_initialize(); + + // Time the execution + gettimeofday(&t_start, NULL); + + // TASK: T2 + // Run the integration loop + // BEGIN: T2 + simulate(NULL); + // END: T2 + + // Report how long we spent in the integration stage + gettimeofday(&t_end, NULL); + printf("%lf seconds elapsed with %ld threads\n", + WALLTIME(t_end) - WALLTIME(t_start), + n_threads); + + // Clean up and shut down + domain_finalize(); + + // TASK: T1d + // Finalise pthreads + // BEGIN: T1d + ; + // END: T1d + + exit(EXIT_SUCCESS); +} diff --git a/exercise5/handout_pthreads/wave_2d_sequential.c b/exercise5/handout_pthreads/wave_2d_sequential.c new file mode 100644 index 0000000..eca6a82 --- /dev/null +++ b/exercise5/handout_pthreads/wave_2d_sequential.c @@ -0,0 +1,169 @@ +#define _XOPEN_SOURCE 600 +#include +#include +#include +#include +#include +#include + + +// Convert 'struct timeval' into seconds in double prec. floating point +#define WALLTIME(t) ((double)(t).tv_sec + 1e-6 * (double)(t).tv_usec) + +// Option to change numerical precision +typedef int64_t int_t; +typedef double real_t; + +// Simulation parameters: size, step count, and how often to save the state +int_t + N = 1024, + M = 1024, + max_iteration = 4000, + snapshot_freq = 20; + +// Wave equation parameters, time step is derived from the space step +const real_t + c = 1.0, + dx = 1.0, + dy = 1.0; +real_t + dt; + +// Buffers for three time steps, indexed with 2 ghost points for the boundary +real_t + *buffers[3] = { NULL, NULL, NULL }; + +#define U_prv(i,j) buffers[0][((i)+1)*(N+2)+(j)+1] +#define U(i,j) buffers[1][((i)+1)*(N+2)+(j)+1] +#define U_nxt(i,j) buffers[2][((i)+1)*(N+2)+(j)+1] + + +// Rotate the time step buffers. +void move_buffer_window ( void ) +{ + real_t *temp = buffers[0]; + buffers[0] = buffers[1]; + buffers[1] = buffers[2]; + buffers[2] = temp; +} + + +// Save the present time step in a numbered file under 'data/' +void domain_save ( int_t step ) +{ + char filename[256]; + sprintf ( filename, "data/%.5ld.dat", step ); + FILE *out = fopen ( filename, "wb" ); + for ( int_t i=0; i