From 2deec88da02f6664ef52ca81c92922c32668bab8 Mon Sep 17 00:00:00 2001 From: fredrikr79 Date: Mon, 20 Oct 2025 18:17:16 +0200 Subject: [PATCH] ex5: init --- exercise5/handout_openmp/.gitignore | 5 + exercise5/handout_openmp/Makefile | 33 ++++ exercise5/handout_openmp/compare.sh | 52 +++++ exercise5/handout_openmp/plot_image.sh | 10 + exercise5/handout_openmp/wave_2d_barrier.c | 180 +++++++++++++++++ exercise5/handout_openmp/wave_2d_sequential.c | 169 ++++++++++++++++ exercise5/handout_openmp/wave_2d_workshare.c | 136 +++++++++++++ exercise5/handout_pthreads/.gitignore | 5 + exercise5/handout_pthreads/Makefile | 36 ++++ exercise5/handout_pthreads/compare.sh | 52 +++++ exercise5/handout_pthreads/plot_image.sh | 10 + exercise5/handout_pthreads/sequential | Bin 0 -> 16752 bytes exercise5/handout_pthreads/wave_2d_pthread.c | 185 ++++++++++++++++++ .../handout_pthreads/wave_2d_sequential.c | 169 ++++++++++++++++ 14 files changed, 1042 insertions(+) create mode 100644 exercise5/handout_openmp/.gitignore create mode 100644 exercise5/handout_openmp/Makefile create mode 100755 exercise5/handout_openmp/compare.sh create mode 100755 exercise5/handout_openmp/plot_image.sh create mode 100644 exercise5/handout_openmp/wave_2d_barrier.c create mode 100644 exercise5/handout_openmp/wave_2d_sequential.c create mode 100644 exercise5/handout_openmp/wave_2d_workshare.c create mode 100644 exercise5/handout_pthreads/.gitignore create mode 100644 exercise5/handout_pthreads/Makefile create mode 100755 exercise5/handout_pthreads/compare.sh create mode 100755 exercise5/handout_pthreads/plot_image.sh create mode 100755 exercise5/handout_pthreads/sequential create mode 100644 exercise5/handout_pthreads/wave_2d_pthread.c create mode 100644 exercise5/handout_pthreads/wave_2d_sequential.c diff --git a/exercise5/handout_openmp/.gitignore b/exercise5/handout_openmp/.gitignore new file mode 100644 index 0000000..1050c9f --- /dev/null +++ b/exercise5/handout_openmp/.gitignore @@ -0,0 +1,5 @@ +data/ +data_sequential/ +images/ +parallel +sequential diff --git a/exercise5/handout_openmp/Makefile b/exercise5/handout_openmp/Makefile new file mode 100644 index 0000000..aac2022 --- /dev/null +++ b/exercise5/handout_openmp/Makefile @@ -0,0 +1,33 @@ +CC=gcc +CFLAGS+= -std=c99 -fopenmp +LDLIBS+= -lm +SEQUENTIAL_SRC_FILES=wave_2d_sequential.c +PARALLEL_SRC_FILES=wave_2d_workshare.c +BARRIER_SRC_FILES=wave_2d_barrier.c +IMAGES=$(shell find data -type f | sed s/\\.dat/.png/g | sed s/data/images/g ) +.PHONY: all clean dirs plot movie +all: dirs ${SEQUENTIAL_SRC_FILES} ${PARALLEL_SRC_FILES} ${BARRIER_SRC_FILES} +dirs: + mkdir -p data images +sequential: ${SEQUENTIAL_SRC_FILES} + $(CC) $^ $(CFLAGS) -o $@ $(LDLIBS) +parallel: ${PARALLEL_SRC_FILES} + $(CC) $^ $(CFLAGS) -o $@ $(LDLIBS) +barrier: ${BARRIER_SRC_FILES} + $(CC) $^ $(CFLAGS) -o $@ $(LDLIBS) +plot: ${IMAGES} +images/%.png: data/%.dat + ./plot_image.sh $< +movie: ${IMAGES} + ffmpeg -y -an -i images/%5d.png -vcodec libx264 -pix_fmt yuv420p -profile:v baseline -level 3 -r 12 wave.mp4 +check: dirs sequential parallel + mkdir -p data_sequential + ./sequential + cp -rf ./data/* ./data_sequential + ./parallel + ./compare.sh + rm -rf data_sequential +clean: + -rm -fr ${TARGETS} data images wave.mp4 + -rm sequential + -rm parallel diff --git a/exercise5/handout_openmp/compare.sh b/exercise5/handout_openmp/compare.sh new file mode 100755 index 0000000..5cd69a7 --- /dev/null +++ b/exercise5/handout_openmp/compare.sh @@ -0,0 +1,52 @@ +#! /usr/bin/env bash + + +GREEN=$(tput setaf 2) +RED=$(tput setaf 1) +RESET=$(tput sgr0) + +DIR1="data" +DIR2="data_sequential" + +if [ ! -d "$DIR1" ]; then + echo "Directory $DIR1 does not exist." + exit 1 +fi + +if [ ! -d "$DIR2" ]; then + echo "Directory $DIR2 does not exist." + exit 1 +fi + +found_difference=1 +for file in "$DIR1"/*.dat; do + # Extract the file name (basename) + filename=$(basename "$file") + + # Check if the corresponding file exists in DIR2 + if [ -f "$DIR2/$filename" ]; then + # Compare the two files using diff + diff_output=$(diff "$file" "$DIR2/$filename") + + if [ -n "$diff_output" ]; then + echo "$diff_output" + found_difference=0 + fi + else + echo "File $filename does not exist in $DIR2" + fi +done + +if [ $found_difference -eq 1 ]; then + echo + echo + echo "${GREEN}The sequential and parallel version produced mathcing output!${RESET}" + echo + echo +else + echo + echo + echo "${RED}There were mismatches between the sequential and the parallel output.${RESET}" + echo + echo +fi diff --git a/exercise5/handout_openmp/plot_image.sh b/exercise5/handout_openmp/plot_image.sh new file mode 100755 index 0000000..e36e6af --- /dev/null +++ b/exercise5/handout_openmp/plot_image.sh @@ -0,0 +1,10 @@ +#! /usr/bin/env bash +SIZE=1024 +DATAFILE=$1 +IMAGEFILE=`echo $1 | sed s/dat$/png/ | sed s/data/images/` +cat < +#include +#include +#include +#include + +#include + +// Option to change numerical precision +typedef int64_t int_t; +typedef double real_t; + +// Simulation parameters: size, step count, and how often to save the state +const int_t + N = 1024, + max_iteration = 4000, + snapshot_freq = 20; + +// Wave equation parameters, time step is derived from the space step +const real_t + c = 1.0, + h = 1.0; +real_t + dt; + +// Buffers for three time steps, indexed with 2 ghost points for the boundary +real_t + *buffers[3] = { NULL, NULL, NULL }; +#define U_prv(i,j) buffers[0][((i)+1)*(N+2)+(j)+1] +#define U(i,j) buffers[1][((i)+1)*(N+2)+(j)+1] +#define U_nxt(i,j) buffers[2][((i)+1)*(N+2)+(j)+1] + +// Function definitions follow below main +void domain_initialize ( void ); +void domain_save ( int_t step ); +void domain_finalize ( void ); +void main_loop ( void ); +void time_step ( int_t thread_id ); +void boundary_condition ( int_t thread_id ); + + +// Main time integration loop +int +main () +{ + // Set up the initial state of the domain + domain_initialize(); + + double t_start, t_end; + t_start = omp_get_wtime(); + + #pragma omp parallel + main_loop(); + + t_end = omp_get_wtime(); + printf ( "%lf seconds elapsed with %d threads\n", + t_end-t_start, + omp_get_max_threads() + ); + + // Clean up and shut down + domain_finalize(); + exit ( EXIT_SUCCESS ); +} + + +void +main_loop ( void ) +{ + int_t thread_id = omp_get_thread_num(); + + // Go through each time step + for ( int_t iteration=0; iteration<=max_iteration; iteration++ ) + { + // Master thread saves the state of the computation + #pragma omp master + if ( (iteration % snapshot_freq)==0 ) + { + domain_save ( iteration / snapshot_freq ); + printf ( "Iteration %ld out of %ld\n", iteration, max_iteration ); + } + + // Make sure all saving is finished before we begin + #pragma omp barrier + + // Run the time step in parallel + boundary_condition ( thread_id ); + #pragma omp barrier + time_step ( thread_id ); + + #pragma omp barrier + + // Master thread rotates the time step buffers + #pragma omp master + { + real_t *temp = buffers[0]; + buffers[0] = buffers[1]; + buffers[1] = buffers[2]; + buffers[2] = temp; + } + } +} + + +// Integration formula +void +time_step ( int_t thread_id ) +{ + int_t n_threads = omp_get_num_threads(); + for ( int_t i=thread_id; i +#include +#include +#include +#include +#include + + +// Convert 'struct timeval' into seconds in double prec. floating point +#define WALLTIME(t) ((double)(t).tv_sec + 1e-6 * (double)(t).tv_usec) + +// Option to change numerical precision +typedef int64_t int_t; +typedef double real_t; + +// Simulation parameters: size, step count, and how often to save the state +int_t + N = 1024, + M = 1024, + max_iteration = 4000, + snapshot_freq = 20; + +// Wave equation parameters, time step is derived from the space step +const real_t + c = 1.0, + dx = 1.0, + dy = 1.0; +real_t + dt; + +// Buffers for three time steps, indexed with 2 ghost points for the boundary +real_t + *buffers[3] = { NULL, NULL, NULL }; + +#define U_prv(i,j) buffers[0][((i)+1)*(N+2)+(j)+1] +#define U(i,j) buffers[1][((i)+1)*(N+2)+(j)+1] +#define U_nxt(i,j) buffers[2][((i)+1)*(N+2)+(j)+1] + + +// Rotate the time step buffers. +void move_buffer_window ( void ) +{ + real_t *temp = buffers[0]; + buffers[0] = buffers[1]; + buffers[1] = buffers[2]; + buffers[2] = temp; +} + + +// Save the present time step in a numbered file under 'data/' +void domain_save ( int_t step ) +{ + char filename[256]; + sprintf ( filename, "data/%.5ld.dat", step ); + FILE *out = fopen ( filename, "wb" ); + for ( int_t i=0; i +#include +#include +#include +#include + +// TASK: T6 +// Include the OpenMP library +// BEGIN: T6 +; +// END: T6 + +// Option to change numerical precision +typedef int64_t int_t; +typedef double real_t; + +// Simulation parameters: size, step count, and how often to save the state +const int_t + N = 1024, + max_iteration = 4000, + snapshot_freq = 20; + +// Wave equation parameters, time step is derived from the space step +const real_t + c = 1.0, + h = 1.0; +real_t + dt; + +// Buffers for three time steps, indexed with 2 ghost points for the boundary +real_t + *buffers[3] = { NULL, NULL, NULL }; + +#define U_prv(i, j) buffers[0][((i) + 1) * (N + 2) + (j) + 1] +#define U(i, j) buffers[1][((i) + 1) * (N + 2) + (j) + 1] +#define U_nxt(i, j) buffers[2][((i) + 1) * (N + 2) + (j) + 1] + +// Rotate the time step buffers. +void move_buffer_window(void) { + real_t *temp = buffers[0]; + buffers[0] = buffers[1]; + buffers[1] = buffers[2]; + buffers[2] = temp; +} + +// Set up our three buffers, and fill two with an initial perturbation +void domain_initialize(void) { + buffers[0] = malloc((N + 2) * (N + 2) * sizeof(real_t)); + buffers[1] = malloc((N + 2) * (N + 2) * sizeof(real_t)); + buffers[2] = malloc((N + 2) * (N + 2) * sizeof(real_t)); + + for (int_t i = 0; i < N; i++) { + for (int_t j = 0; j < N; j++) { + real_t delta = sqrt(((i - N / 2) * (i - N / 2) + (j - N / 2) * (j - N / 2)) / (real_t)N); + U_prv(i, j) = U(i, j) = exp(-4.0 * delta * delta); + } + } + + // Set the time step for 2D case + dt = (h * h) / (4.0 * c * c); +} + +// Get rid of all the memory allocations +void domain_finalize(void) { + free(buffers[0]); + free(buffers[1]); + free(buffers[2]); +} + +// TASK: T7 +// Integration formula +void time_step(void) { + // BEGIN: T7 + for (int_t i = 0; i < N; i++) + for (int_t j = 0; j < N; j++) + U_nxt(i, j) = -U_prv(i, j) + 2.0 * U(i, j) + (dt * dt * c * c) / (h * h) * (U(i - 1, j) + U(i + 1, j) + U(i, j - 1) + U(i, j + 1) - 4.0 * U(i, j)); + // END: T7 +} + +// Neumann (reflective) boundary condition +void boundary_condition(void) { + for (int_t i = 0; i < N; i++) { + U(i, -1) = U(i, 1); + U(i, N) = U(i, N - 2); + } + for (int_t j = 0; j < N; j++) { + U(-1, j) = U(1, j); + U(N, j) = U(N - 2, j); + } +} + +// Save the present time step in a numbered file under 'data/' +void domain_save(int_t step) { + char filename[256]; + sprintf(filename, "data/%.5ld.dat", step); + FILE *out = fopen(filename, "wb"); + for (int_t i = 0; i < N; i++) + fwrite(&U(i, 0), sizeof(real_t), N, out); + fclose(out); +} + +// Main time integration. +void simulate(void) { + // Go through each time step + for (int_t iteration = 0; iteration <= max_iteration; iteration++) { + if ((iteration % snapshot_freq) == 0) { + domain_save(iteration / snapshot_freq); + } + + // Derive step t+1 from steps t and t-1 + boundary_condition(); + time_step(); + + // Rotate the time step buffers + move_buffer_window(); + } +} + +int main(void) { + // Set up the initial state of the domain + domain_initialize(); + + double t_start, t_end; + t_start = omp_get_wtime(); + // Go through each time step + simulate(); + t_end = omp_get_wtime(); + printf("%lf seconds elapsed with %d threads\n", + t_end - t_start, + omp_get_max_threads()); + + // Clean up and shut down + domain_finalize(); + exit(EXIT_SUCCESS); +} diff --git a/exercise5/handout_pthreads/.gitignore b/exercise5/handout_pthreads/.gitignore new file mode 100644 index 0000000..1050c9f --- /dev/null +++ b/exercise5/handout_pthreads/.gitignore @@ -0,0 +1,5 @@ +data/ +data_sequential/ +images/ +parallel +sequential diff --git a/exercise5/handout_pthreads/Makefile b/exercise5/handout_pthreads/Makefile new file mode 100644 index 0000000..58add22 --- /dev/null +++ b/exercise5/handout_pthreads/Makefile @@ -0,0 +1,36 @@ +CC=gcc +CFLAGS+= -std=c99 -pthread +LDLIBS+= -lm +SEQUENTIAL_SRC_FILES=wave_2d_sequential.c +PARALLEL_SRC_FILES=wave_2d_pthread.c +IMAGES=$(shell find data -type f | sed s/\\.dat/.png/g | sed s/data/images/g ) +.PHONY: all clean dirs plot movie +all: dirs ${SEQUENTIAL_SRC_FILES} ${PARALLEL_SRC_FILES} +dirs: + mkdir -p data images +sequential: ${SEQUENTIAL_SRC_FILES} + $(CC) $^ $(CFLAGS) -o $@ $(LDLIBS) +parallel: ${PARALLEL_SRC_FILES} + $(CC) $^ $(CFLAGS) -o $@ $(LDLIBS) +plot: ${IMAGES} +images/%.png: data/%.dat + ./plot_image.sh $< +movie: ${IMAGES} + ffmpeg -y -an -i images/%5d.png -vcodec libx264 -pix_fmt yuv420p -profile:v baseline -level 3 -r 12 wave.mp4 +check: dirs sequential parallel + mkdir -p data_sequential + ./sequential + cp -rf ./data/* ./data_sequential + ./parallel 1 + ./compare.sh + rm ./data/* + ./parallel 4 + ./compare.sh + rm ./data/* + ./parallel 13 + ./compare.sh + rm -rf data_sequential +clean: + -rm -fr ${TARGETS} data images wave.mp4 + -rm sequential + -rm parallel diff --git a/exercise5/handout_pthreads/compare.sh b/exercise5/handout_pthreads/compare.sh new file mode 100755 index 0000000..5cd69a7 --- /dev/null +++ b/exercise5/handout_pthreads/compare.sh @@ -0,0 +1,52 @@ +#! /usr/bin/env bash + + +GREEN=$(tput setaf 2) +RED=$(tput setaf 1) +RESET=$(tput sgr0) + +DIR1="data" +DIR2="data_sequential" + +if [ ! -d "$DIR1" ]; then + echo "Directory $DIR1 does not exist." + exit 1 +fi + +if [ ! -d "$DIR2" ]; then + echo "Directory $DIR2 does not exist." + exit 1 +fi + +found_difference=1 +for file in "$DIR1"/*.dat; do + # Extract the file name (basename) + filename=$(basename "$file") + + # Check if the corresponding file exists in DIR2 + if [ -f "$DIR2/$filename" ]; then + # Compare the two files using diff + diff_output=$(diff "$file" "$DIR2/$filename") + + if [ -n "$diff_output" ]; then + echo "$diff_output" + found_difference=0 + fi + else + echo "File $filename does not exist in $DIR2" + fi +done + +if [ $found_difference -eq 1 ]; then + echo + echo + echo "${GREEN}The sequential and parallel version produced mathcing output!${RESET}" + echo + echo +else + echo + echo + echo "${RED}There were mismatches between the sequential and the parallel output.${RESET}" + echo + echo +fi diff --git a/exercise5/handout_pthreads/plot_image.sh b/exercise5/handout_pthreads/plot_image.sh new file mode 100755 index 0000000..e36e6af --- /dev/null +++ b/exercise5/handout_pthreads/plot_image.sh @@ -0,0 +1,10 @@ +#! /usr/bin/env bash +SIZE=1024 +DATAFILE=$1 +IMAGEFILE=`echo $1 | sed s/dat$/png/ | sed s/data/images/` +cat <JfjWMqH=CI&kO5O0B?16T+`GB6lefw^G9fx&`-m%)KSmO+Mrje&uIg@J(q zrp^J%g3&jaz*-n!GzWyszzo$V0b(#PFi0>%On}kBP<1dG@ zpccSr1_6jFkUmxr6Ut{0fa-(NnNWQ&8d)FMJ_V3w1_lNNs6Hrt0_r~)jjRt8HWs=N zeHOM5eK7h5RG$Xa|1fVwg6Z$pd)+XG?-KP^ci$G!llz6Pkl4bX6e(J3Iq85kH~G{_E+ zP~g*&6i~Q;*u-F1GzUTL!xay(c!SZPbOJJ8KQFUFzqlm7C{;fvH_tq$yfUSrs4z1r zB|AN%+`K$JInT(jG9xFa%FtLhJts3MS=UI<#6Z`~Og|?xNk1n=HzzZ%v_iMS!c5o9 zM6Wns&j@5H132CDGcYjlGB7Z3F)%RjL4?61%x+MU07Wy*@9Yea_yVP0P#ADBFfed4 zFfi~iFo5$aNFNA;4Iz>+XJB9u!lE`4hj=v(aY-EFjyS|Y?u14?itR}_)NA1o_roD> zgG0Othqwq1aZvif7Cs?3)HB4#r|0J9#TS<(7L~-uGo)4&FccRSl`zDEgp#x4lQXj8 z(-Jdt7~J zpA2G|F}VA9IyuK1=^0rfn0lsgwy~ZGoQ+@^n=t5S6hkZ>K7#G z=a-fgl$I2OXi(y}qA2Si>rTrqH?b(oFwZQ@Da*;tC@{$~Hpr+nN-s+{Fvv>J%+pOz zPS!Ov(KFUF&;{qZOi(IfVqj)qVqjrlVqj!oVqj%p0z)1t1 z<}oweg7P8eF~QCIU=JyYLHQVz$3bR-@DC($kQ_)1geO47L1h3a?|{TWcm|Rwyxc{}()(k8m6Y8}#2aN}qw@ziNy=0|URj1H*q+ z5I+MX_wvF2|NsB12I(^}WPq9`FE4=kULZcGY4h>`nC}GQgPJHWH-Pz8AU>#R@^S%~ zZv^6lnj|kLfcaV=KBx)wvH{Fj0`Wmjla~cxz7&WLYLdK60P}@Fd{9&5WdNAZ1>%F6 zATJ%jd?pYd)bx01@c;jRki(I2s7L2hkLEW9DIVP{su~Oo3?7}XKMW7N*uuoX;L%%q z!SLG)%m4rXzu3YC=07m}_CoXj|Nm+H?+>K$zd!kjUqIB5pMl|%2fvo$~@kbm4Yw+OL^1bniKk`%>|NBcI`OpiW_#;kz;*UA_;{Ct> z|6jafW?(S<|AP7d|Ns2*450WieCyeHG0vm&kw^2B4*?;rh6fB!dK^E_An3sGUsPL< zf#FM<9>07G*jpg=hY{QfI}98g7(8r2WQj1iQ45M<&ix>P{}0k898Nuqd^O$NgQhL;ZW%eyczgnD-V z@aW~0_Vm9AkJJ;S0}h zagXN1jGoqSMLWhg#yZA1#>av@XL!=^o8f;~!vlw3bp84NA7LLT$Ps>Pe&gYB z`~b+U9^C>S#}9xU>C>&@(_6&k(HXkIx3`wbmw)|t&x6mHeLBCti2w8dzfb3PkIuue zVC|H6;RO-&>^uS$^yy6Ccwqu^=<$Of?|O8HZt&=I6!2)h#NYRYfq}uRm*p=gF?w{m zuJB|$;=y>>xAiT5-%SPvhBW^7-_rQs|99L6${vOXKpEiECw>9eGu#XepFH`s{(j;Y zES>R*Kk~yT{>bB>_yucsfJ7F2;umy1z%LIn;DD>)e~`WdpZH^37kuK6Jn)G>&UFSz z#fb^fjQJL1)^U)6126?UKJiDmo=D@@dr@BVi9hhbCw>9mIFPOUktaU!M;?4}@b~}! zpPcyxUH))#xGDJsOEUl z4`B#+bjSWU`LglX|Nj#}>Jjk^i(62sb-|3!oqciq|Pq&AFN3S!JPp`~TkZU|Uk9u^v9`Niu?$deO^Wak!kMFlV zduu-+(wArR5k`;Z+7k?QRbIU&#-6=CvJ9ROzr&-Q@tOzYMX3Ki@eBAcaeQ**7ZCWw zFW|z)@rggeg%L!uFs1Qp+yrr%1U~Uc-uc8Iaq^QRe+)=Df8?c4{4s}N1?MM6enA&T z4v*#o93Ga(OILezyB_dmyyv5N+n4dK5957L#^XMgsTcTLK=}q7AH6nLL1RH4-5wksj6Xd(PZ*v=Dl#X0;umlb z@Mu2B_=#VT0p!CAKR)pbx?JG!VEo}>`LXoAN9(1!ELh0*dNFx4zhm^^U;ojg+X3RC zUK>A;UY-jc2OqH@84N4mKt>8EKn-X4@QGiLQRJ1<)26AQ%E`2 z{D#Bx_)!T^F$F5LDttYU9}#eHU@*M>S{V_)FJ6NR!{)<`FDCu{|Nq6@U;qDmTE;Te z$a{3&@R;fO-A2Wu^-}3gkIv&S7XJGG|MhujJcC*c;E;zDPya!!2T<7#PTKI+1SpM- z;?WQo4S~@R7!85Z5Eu=Cfer!Cd`L=SNus{0o@q{sUP@vKLpf*`B_zKjF-IXaC$XS7 zHAMk5Gi9ZqnvN^&`Ii|L?%az+mw8|NjI=28M=j|Nr+eGB6~3|NnmhNS=X#0W{ACasy*k z5Cda{0HZVyJI4e@$UKk?0|Ud15C8wGffTr~3xL{tpt+$fAO8Ob4NQQ9RTvl;To@P_ zKD>jtfq_rJjZeahpSzr+fx%wNTFY1kJcAC>o5R4skn!>Ve`%0L1_lPuumZ>*B_IF) ze+CkB0>>}T#{>1FL?OB zMW|(NV+1Mo289{O?ROX$7w2K5U-LNIwy`x(S9fUdLt`tN@}h<^ae2er#Vd=FNLeV_?g z5dQ&G9@ft8fO-tn_W;R*+WjC}6ht6lP}>~Dt$?oE0!_Pt_zF<{u>KfmVizPX4k8#B z7#6TY?1zaLK>09#fcg+1rLcYi%zinLAatE9i23_J#D7rh82&-|AJ`z~|A+En?)m`b zUx3Pk%%CNPt>;F!!wee#ZcsW5N~b~TGAP{!rKdsZWl(w>ls*QfuR-Z&P#WDnbUrLz z7JwEhgCt=I2THp;J6kDexch}_DnM3Vg4E!_pzvp5_=qv@4vJ4k1|bIYb>^Tox7gIH zgZq$}=75$}GcoWmJb><3fH}p4fq?-eE&$D+FmZRNI4s}7#ACsERDuCC?+p?Ixf?`- z)$~z~Vd%51{sf$2b@m80JI8 z4?x`yD{nV}#gPnvGEam3#mC?P-Om6EhjUQ(qlepVusJ*o4WbbLg2!$c7#LoI!x?HQ z67>sYK9e{D2U_^EGl9aN556x2noSwRn6SsMCRn{V187YE$at7LwV6QUKN1WHXy$|V znFulQFkFDm7af@EOk ztOBd&VYna+i5Kt~90LQx98 zypp0yhP0x@+|>A#(%jrihWL0SUVLU=W(kVYns<9xg!)@$NqUPL4kD{%&r;t|9Ruj!r(V@!(Au4)8q@dZw6r zEa0-nCJcTIDfys%7xBf3WvL8FrDv!pmL zv7k63za&1bD7BCQG_w$2T#{M<+4BNd3Gz^GVnuvrNor9dI5?r94oU`@5D$U&&7f$2 zY=J=$ijNQRb%v&llnREF67a4ZR3#uIK^t}uqL!e@LF~POOMu+ZpjTX(TauW>pjTW{ z1fervtjxTU)S?0gz4W|Ny^O@-3Egk&U$kuY{jWnN-#W-^0bdVUE5XeKC;K`*Jem_aW&KQ}iuuLN9N zP)dO6QBd0o*6xPw_XL$sAhTinJwa>`2B`(nFbrx3g6bfcepr8|0NUXQfGUL5gRpWG z##d%wVEFd`e?Ck zKzd>A98(6!9zd9WSbxg{_zyRvI!1TlVD;J<0HdsFh=6;wu z812Qt0PYLH_^|%a2dI9~JQc`ZkaIw4(ET3?ZpVV!)*w|346y!L0kp#g>z~2whv|p; zKN+gu38Ed|-&+9H59=R-^ot?&=V1DC!R=Yl4lSqwuzn3_a)^O}0hGR=GGGd14wwNY zKr5F)VG8vwg922)0#rX}ei5n^OhJXg`^7*4#pw1!+eHk5AXx^6NT@#`Jj8xm7$4NW zM7AH+PK|+j5Z3R9sYiEzJ=A_s+4VWX7$9jErXSXigYEYQwYNcT1?4{w4bulphYz9Q57Q6p*Hu6}5FH>*NEoIcMuYlc zpeTarhxbdM85c^yO@Qfx3A};&5vCv3o;QGMgiYTyoNz=6sEkXbPIL$enHC$#*9YC}+F5T+5#V-PNA$vBh+p+MOjO$k`S R$P!F3{DM$$66S80000Yn`)>dM literal 0 HcmV?d00001 diff --git a/exercise5/handout_pthreads/wave_2d_pthread.c b/exercise5/handout_pthreads/wave_2d_pthread.c new file mode 100644 index 0000000..3843790 --- /dev/null +++ b/exercise5/handout_pthreads/wave_2d_pthread.c @@ -0,0 +1,185 @@ +#define _XOPEN_SOURCE 600 +#include +#include +#include +#include +#include +#include +#include + +// TASK: T1a +// Include the pthreads library +// BEGIN: T1a +; +// END: T1a + +// Option to change numerical precision +typedef int64_t int_t; +typedef double real_t; + +// TASK: T1b +// Pthread management +// BEGIN: T1b +int_t n_threads = 1; +// END: T1b + +// Performance measurement +struct timeval t_start, t_end; +#define WALLTIME(t) ((double)(t).tv_sec + 1e-6 * (double)(t).tv_usec) + +// Simulation parameters: size, step count, and how often to save the state +const int_t + N = 1024, + max_iteration = 4000, + snapshot_freq = 20; + +// Wave equation parameters, time step is derived from the space step +const real_t + c = 1.0, + h = 1.0; +real_t + dt; + +// Buffers for three time steps, indexed with 2 ghost points for the boundary +real_t + *buffers[3] = { NULL, NULL, NULL }; + +#define U_prv(i, j) buffers[0][((i) + 1) * (N + 2) + (j) + 1] +#define U(i, j) buffers[1][((i) + 1) * (N + 2) + (j) + 1] +#define U_nxt(i, j) buffers[2][((i) + 1) * (N + 2) + (j) + 1] + +// Rotate the time step buffers. +void move_buffer_window(void) { + real_t *temp = buffers[0]; + buffers[0] = buffers[1]; + buffers[1] = buffers[2]; + buffers[2] = temp; +} + +// Set up our three buffers, and fill two with an initial perturbation +void domain_initialize(void) { + buffers[0] = malloc((N + 2) * (N + 2) * sizeof(real_t)); + buffers[1] = malloc((N + 2) * (N + 2) * sizeof(real_t)); + buffers[2] = malloc((N + 2) * (N + 2) * sizeof(real_t)); + + for (int_t i = 0; i < N; i++) { + for (int_t j = 0; j < N; j++) { + real_t delta = sqrt(((i - N / 2) * (i - N / 2) + (j - N / 2) * (j - N / 2)) / (real_t)N); + U_prv(i, j) = U(i, j) = exp(-4.0 * delta * delta); + } + } + + // Set the time step + dt = (h * h) / (4.0 * c * c); +} + +// Get rid of all the memory allocations +void domain_finalize(void) { + free(buffers[0]); + free(buffers[1]); + free(buffers[2]); +} + +// TASK: T3 +// Integration formula +void time_step(int_t thread_id) { + // BEGIN: T3 + for (int_t i = 0; i < N; i += 1) + for (int_t j = 0; j < N; j++) + U_nxt(i, j) = -U_prv(i, j) + 2.0 * U(i, j) + (dt * dt * c * c) / (h * h) * (U(i - 1, j) + U(i + 1, j) + U(i, j - 1) + U(i, j + 1) - 4.0 * U(i, j)); + // END: T3 +} + +// TASK: T4 +// Neumann (reflective) boundary condition +void boundary_condition(int_t thread_id) { + // BEGIN: T4 + for (int_t i = 0; i < N; i += 1) { + U(i, -1) = U(i, 1); + U(i, N) = U(i, N - 2); + } + for (int_t j = 0; j < N; j += 1) { + U(-1, j) = U(1, j); + U(N, j) = U(N - 2, j); + } + // END: T4 +} + +// Save the present time step in a numbered file under 'data/' +void domain_save(int_t step) { + char filename[256]; + sprintf(filename, "data/%.5ld.dat", step); + FILE *out = fopen(filename, "wb"); + for (int_t i = 0; i < N; i++) + fwrite(&U(i, 0), sizeof(real_t), N, out); + fclose(out); +} + +// TASK: T5 +// Main loop +void *simulate(void *id) { + // BEGIN: T5 + // Go through each time step + for (int_t iteration = 0; iteration <= max_iteration; iteration++) { + if ((iteration % snapshot_freq) == 0) { + domain_save(iteration / snapshot_freq); + } + + // Derive step t+1 from steps t and t-1 + boundary_condition(0); + time_step(0); + + // Rotate the time step buffers + move_buffer_window(); + } + // END: T5 +} + +// Main time integration loop +int main(int argc, char **argv) { + // Number of threads is an optional argument, sanity check its value + if (argc > 1) { + n_threads = strtol(argv[1], NULL, 10); + if (errno == EINVAL) + fprintf(stderr, "'%s' is not a valid thread count\n", argv[1]); + if (n_threads < 1) { + fprintf(stderr, "Number of threads must be >0\n"); + exit(EXIT_FAILURE); + } + } + + // TASK: T1c + // Initialise pthreads + // BEGIN: T1c + ; + // END: T1b + + // Set up the initial state of the domain + domain_initialize(); + + // Time the execution + gettimeofday(&t_start, NULL); + + // TASK: T2 + // Run the integration loop + // BEGIN: T2 + simulate(NULL); + // END: T2 + + // Report how long we spent in the integration stage + gettimeofday(&t_end, NULL); + printf("%lf seconds elapsed with %ld threads\n", + WALLTIME(t_end) - WALLTIME(t_start), + n_threads); + + // Clean up and shut down + domain_finalize(); + + // TASK: T1d + // Finalise pthreads + // BEGIN: T1d + ; + // END: T1d + + exit(EXIT_SUCCESS); +} diff --git a/exercise5/handout_pthreads/wave_2d_sequential.c b/exercise5/handout_pthreads/wave_2d_sequential.c new file mode 100644 index 0000000..eca6a82 --- /dev/null +++ b/exercise5/handout_pthreads/wave_2d_sequential.c @@ -0,0 +1,169 @@ +#define _XOPEN_SOURCE 600 +#include +#include +#include +#include +#include +#include + + +// Convert 'struct timeval' into seconds in double prec. floating point +#define WALLTIME(t) ((double)(t).tv_sec + 1e-6 * (double)(t).tv_usec) + +// Option to change numerical precision +typedef int64_t int_t; +typedef double real_t; + +// Simulation parameters: size, step count, and how often to save the state +int_t + N = 1024, + M = 1024, + max_iteration = 4000, + snapshot_freq = 20; + +// Wave equation parameters, time step is derived from the space step +const real_t + c = 1.0, + dx = 1.0, + dy = 1.0; +real_t + dt; + +// Buffers for three time steps, indexed with 2 ghost points for the boundary +real_t + *buffers[3] = { NULL, NULL, NULL }; + +#define U_prv(i,j) buffers[0][((i)+1)*(N+2)+(j)+1] +#define U(i,j) buffers[1][((i)+1)*(N+2)+(j)+1] +#define U_nxt(i,j) buffers[2][((i)+1)*(N+2)+(j)+1] + + +// Rotate the time step buffers. +void move_buffer_window ( void ) +{ + real_t *temp = buffers[0]; + buffers[0] = buffers[1]; + buffers[1] = buffers[2]; + buffers[2] = temp; +} + + +// Save the present time step in a numbered file under 'data/' +void domain_save ( int_t step ) +{ + char filename[256]; + sprintf ( filename, "data/%.5ld.dat", step ); + FILE *out = fopen ( filename, "wb" ); + for ( int_t i=0; i