ex5: init
This commit is contained in:
5
exercise5/handout_openmp/.gitignore
vendored
Normal file
5
exercise5/handout_openmp/.gitignore
vendored
Normal file
@@ -0,0 +1,5 @@
|
||||
data/
|
||||
data_sequential/
|
||||
images/
|
||||
parallel
|
||||
sequential
|
||||
33
exercise5/handout_openmp/Makefile
Normal file
33
exercise5/handout_openmp/Makefile
Normal file
@@ -0,0 +1,33 @@
|
||||
CC=gcc
|
||||
CFLAGS+= -std=c99 -fopenmp
|
||||
LDLIBS+= -lm
|
||||
SEQUENTIAL_SRC_FILES=wave_2d_sequential.c
|
||||
PARALLEL_SRC_FILES=wave_2d_workshare.c
|
||||
BARRIER_SRC_FILES=wave_2d_barrier.c
|
||||
IMAGES=$(shell find data -type f | sed s/\\.dat/.png/g | sed s/data/images/g )
|
||||
.PHONY: all clean dirs plot movie
|
||||
all: dirs ${SEQUENTIAL_SRC_FILES} ${PARALLEL_SRC_FILES} ${BARRIER_SRC_FILES}
|
||||
dirs:
|
||||
mkdir -p data images
|
||||
sequential: ${SEQUENTIAL_SRC_FILES}
|
||||
$(CC) $^ $(CFLAGS) -o $@ $(LDLIBS)
|
||||
parallel: ${PARALLEL_SRC_FILES}
|
||||
$(CC) $^ $(CFLAGS) -o $@ $(LDLIBS)
|
||||
barrier: ${BARRIER_SRC_FILES}
|
||||
$(CC) $^ $(CFLAGS) -o $@ $(LDLIBS)
|
||||
plot: ${IMAGES}
|
||||
images/%.png: data/%.dat
|
||||
./plot_image.sh $<
|
||||
movie: ${IMAGES}
|
||||
ffmpeg -y -an -i images/%5d.png -vcodec libx264 -pix_fmt yuv420p -profile:v baseline -level 3 -r 12 wave.mp4
|
||||
check: dirs sequential parallel
|
||||
mkdir -p data_sequential
|
||||
./sequential
|
||||
cp -rf ./data/* ./data_sequential
|
||||
./parallel
|
||||
./compare.sh
|
||||
rm -rf data_sequential
|
||||
clean:
|
||||
-rm -fr ${TARGETS} data images wave.mp4
|
||||
-rm sequential
|
||||
-rm parallel
|
||||
52
exercise5/handout_openmp/compare.sh
Executable file
52
exercise5/handout_openmp/compare.sh
Executable file
@@ -0,0 +1,52 @@
|
||||
#! /usr/bin/env bash
|
||||
|
||||
|
||||
GREEN=$(tput setaf 2)
|
||||
RED=$(tput setaf 1)
|
||||
RESET=$(tput sgr0)
|
||||
|
||||
DIR1="data"
|
||||
DIR2="data_sequential"
|
||||
|
||||
if [ ! -d "$DIR1" ]; then
|
||||
echo "Directory $DIR1 does not exist."
|
||||
exit 1
|
||||
fi
|
||||
|
||||
if [ ! -d "$DIR2" ]; then
|
||||
echo "Directory $DIR2 does not exist."
|
||||
exit 1
|
||||
fi
|
||||
|
||||
found_difference=1
|
||||
for file in "$DIR1"/*.dat; do
|
||||
# Extract the file name (basename)
|
||||
filename=$(basename "$file")
|
||||
|
||||
# Check if the corresponding file exists in DIR2
|
||||
if [ -f "$DIR2/$filename" ]; then
|
||||
# Compare the two files using diff
|
||||
diff_output=$(diff "$file" "$DIR2/$filename")
|
||||
|
||||
if [ -n "$diff_output" ]; then
|
||||
echo "$diff_output"
|
||||
found_difference=0
|
||||
fi
|
||||
else
|
||||
echo "File $filename does not exist in $DIR2"
|
||||
fi
|
||||
done
|
||||
|
||||
if [ $found_difference -eq 1 ]; then
|
||||
echo
|
||||
echo
|
||||
echo "${GREEN}The sequential and parallel version produced mathcing output!${RESET}"
|
||||
echo
|
||||
echo
|
||||
else
|
||||
echo
|
||||
echo
|
||||
echo "${RED}There were mismatches between the sequential and the parallel output.${RESET}"
|
||||
echo
|
||||
echo
|
||||
fi
|
||||
10
exercise5/handout_openmp/plot_image.sh
Executable file
10
exercise5/handout_openmp/plot_image.sh
Executable file
@@ -0,0 +1,10 @@
|
||||
#! /usr/bin/env bash
|
||||
SIZE=1024
|
||||
DATAFILE=$1
|
||||
IMAGEFILE=`echo $1 | sed s/dat$/png/ | sed s/data/images/`
|
||||
cat <<END_OF_SCRIPT | gnuplot -
|
||||
set term png
|
||||
set output "$IMAGEFILE"
|
||||
set zrange[-1:1]
|
||||
splot "$DATAFILE" binary array=${SIZE}x${SIZE} format='%double' with pm3d
|
||||
END_OF_SCRIPT
|
||||
180
exercise5/handout_openmp/wave_2d_barrier.c
Normal file
180
exercise5/handout_openmp/wave_2d_barrier.c
Normal file
@@ -0,0 +1,180 @@
|
||||
#define _XOPEN_SOURCE 600
|
||||
#include <stdio.h>
|
||||
#include <stdlib.h>
|
||||
#include <string.h>
|
||||
#include <stdint.h>
|
||||
#include <math.h>
|
||||
|
||||
#include <omp.h>
|
||||
|
||||
// Option to change numerical precision
|
||||
typedef int64_t int_t;
|
||||
typedef double real_t;
|
||||
|
||||
// Simulation parameters: size, step count, and how often to save the state
|
||||
const int_t
|
||||
N = 1024,
|
||||
max_iteration = 4000,
|
||||
snapshot_freq = 20;
|
||||
|
||||
// Wave equation parameters, time step is derived from the space step
|
||||
const real_t
|
||||
c = 1.0,
|
||||
h = 1.0;
|
||||
real_t
|
||||
dt;
|
||||
|
||||
// Buffers for three time steps, indexed with 2 ghost points for the boundary
|
||||
real_t
|
||||
*buffers[3] = { NULL, NULL, NULL };
|
||||
#define U_prv(i,j) buffers[0][((i)+1)*(N+2)+(j)+1]
|
||||
#define U(i,j) buffers[1][((i)+1)*(N+2)+(j)+1]
|
||||
#define U_nxt(i,j) buffers[2][((i)+1)*(N+2)+(j)+1]
|
||||
|
||||
// Function definitions follow below main
|
||||
void domain_initialize ( void );
|
||||
void domain_save ( int_t step );
|
||||
void domain_finalize ( void );
|
||||
void main_loop ( void );
|
||||
void time_step ( int_t thread_id );
|
||||
void boundary_condition ( int_t thread_id );
|
||||
|
||||
|
||||
// Main time integration loop
|
||||
int
|
||||
main ()
|
||||
{
|
||||
// Set up the initial state of the domain
|
||||
domain_initialize();
|
||||
|
||||
double t_start, t_end;
|
||||
t_start = omp_get_wtime();
|
||||
|
||||
#pragma omp parallel
|
||||
main_loop();
|
||||
|
||||
t_end = omp_get_wtime();
|
||||
printf ( "%lf seconds elapsed with %d threads\n",
|
||||
t_end-t_start,
|
||||
omp_get_max_threads()
|
||||
);
|
||||
|
||||
// Clean up and shut down
|
||||
domain_finalize();
|
||||
exit ( EXIT_SUCCESS );
|
||||
}
|
||||
|
||||
|
||||
void
|
||||
main_loop ( void )
|
||||
{
|
||||
int_t thread_id = omp_get_thread_num();
|
||||
|
||||
// Go through each time step
|
||||
for ( int_t iteration=0; iteration<=max_iteration; iteration++ )
|
||||
{
|
||||
// Master thread saves the state of the computation
|
||||
#pragma omp master
|
||||
if ( (iteration % snapshot_freq)==0 )
|
||||
{
|
||||
domain_save ( iteration / snapshot_freq );
|
||||
printf ( "Iteration %ld out of %ld\n", iteration, max_iteration );
|
||||
}
|
||||
|
||||
// Make sure all saving is finished before we begin
|
||||
#pragma omp barrier
|
||||
|
||||
// Run the time step in parallel
|
||||
boundary_condition ( thread_id );
|
||||
#pragma omp barrier
|
||||
time_step ( thread_id );
|
||||
|
||||
#pragma omp barrier
|
||||
|
||||
// Master thread rotates the time step buffers
|
||||
#pragma omp master
|
||||
{
|
||||
real_t *temp = buffers[0];
|
||||
buffers[0] = buffers[1];
|
||||
buffers[1] = buffers[2];
|
||||
buffers[2] = temp;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
// Integration formula
|
||||
void
|
||||
time_step ( int_t thread_id )
|
||||
{
|
||||
int_t n_threads = omp_get_num_threads();
|
||||
for ( int_t i=thread_id; i<N; i+=n_threads )
|
||||
for ( int_t j=0; j<N; j++ )
|
||||
U_nxt(i,j) = -U_prv(i,j) + 2.0*U(i,j)
|
||||
+ (dt*dt*c*c)/(h*h) * (
|
||||
U(i-1,j)+U(i+1,j)+U(i,j-1)+U(i,j+1)-4.0*U(i,j)
|
||||
);
|
||||
}
|
||||
|
||||
|
||||
// Neumann (reflective) boundary condition
|
||||
void
|
||||
boundary_condition ( int_t thread_id )
|
||||
{
|
||||
int_t n_threads = omp_get_num_threads();
|
||||
for ( int_t i=thread_id; i<N; i+=n_threads )
|
||||
{
|
||||
U(i,-1) = U(i,1);
|
||||
U(i,N) = U(i,N-2);
|
||||
}
|
||||
for ( int_t j=thread_id; j<N; j+=n_threads )
|
||||
{
|
||||
U(-1,j) = U(1,j);
|
||||
U(N,j) = U(N-2,j);
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
// Set up our three buffers, and fill two with an initial perturbation
|
||||
void
|
||||
domain_initialize ( void )
|
||||
{
|
||||
buffers[0] = malloc ( (N+2)*(N+2)*sizeof(real_t) );
|
||||
buffers[1] = malloc ( (N+2)*(N+2)*sizeof(real_t) );
|
||||
buffers[2] = malloc ( (N+2)*(N+2)*sizeof(real_t) );
|
||||
|
||||
for ( int_t i=0; i<N; i++ )
|
||||
{
|
||||
for ( int_t j=0; j<N; j++ )
|
||||
{
|
||||
real_t delta = sqrt ( ((i-N/2)*(i-N/2)+(j-N/2)*(j-N/2))/(real_t)N );
|
||||
U_prv(i,j) = U(i,j) = exp ( -4.0*delta*delta );
|
||||
}
|
||||
}
|
||||
|
||||
// Set the time step for 2D case
|
||||
dt = (h*h) / (4.0*c*c);
|
||||
}
|
||||
|
||||
|
||||
// Save the present time step in a numbered file under 'data/'
|
||||
void
|
||||
domain_save ( int_t step )
|
||||
{
|
||||
char filename[256];
|
||||
sprintf ( filename, "data/%.5ld.dat", step );
|
||||
FILE *out = fopen ( filename, "wb" );
|
||||
for ( int_t i=0; i<N; i++ )
|
||||
fwrite ( &U(i,0), sizeof(real_t), N, out );
|
||||
fclose ( out );
|
||||
}
|
||||
|
||||
|
||||
// Get rid of all the memory allocations
|
||||
void
|
||||
domain_finalize ( void )
|
||||
{
|
||||
free ( buffers[0] );
|
||||
free ( buffers[1] );
|
||||
free ( buffers[2] );
|
||||
}
|
||||
169
exercise5/handout_openmp/wave_2d_sequential.c
Normal file
169
exercise5/handout_openmp/wave_2d_sequential.c
Normal file
@@ -0,0 +1,169 @@
|
||||
#define _XOPEN_SOURCE 600
|
||||
#include <stdio.h>
|
||||
#include <stdlib.h>
|
||||
#include <string.h>
|
||||
#include <stdint.h>
|
||||
#include <math.h>
|
||||
#include <sys/time.h>
|
||||
|
||||
|
||||
// Convert 'struct timeval' into seconds in double prec. floating point
|
||||
#define WALLTIME(t) ((double)(t).tv_sec + 1e-6 * (double)(t).tv_usec)
|
||||
|
||||
// Option to change numerical precision
|
||||
typedef int64_t int_t;
|
||||
typedef double real_t;
|
||||
|
||||
// Simulation parameters: size, step count, and how often to save the state
|
||||
int_t
|
||||
N = 1024,
|
||||
M = 1024,
|
||||
max_iteration = 4000,
|
||||
snapshot_freq = 20;
|
||||
|
||||
// Wave equation parameters, time step is derived from the space step
|
||||
const real_t
|
||||
c = 1.0,
|
||||
dx = 1.0,
|
||||
dy = 1.0;
|
||||
real_t
|
||||
dt;
|
||||
|
||||
// Buffers for three time steps, indexed with 2 ghost points for the boundary
|
||||
real_t
|
||||
*buffers[3] = { NULL, NULL, NULL };
|
||||
|
||||
#define U_prv(i,j) buffers[0][((i)+1)*(N+2)+(j)+1]
|
||||
#define U(i,j) buffers[1][((i)+1)*(N+2)+(j)+1]
|
||||
#define U_nxt(i,j) buffers[2][((i)+1)*(N+2)+(j)+1]
|
||||
|
||||
|
||||
// Rotate the time step buffers.
|
||||
void move_buffer_window ( void )
|
||||
{
|
||||
real_t *temp = buffers[0];
|
||||
buffers[0] = buffers[1];
|
||||
buffers[1] = buffers[2];
|
||||
buffers[2] = temp;
|
||||
}
|
||||
|
||||
|
||||
// Save the present time step in a numbered file under 'data/'
|
||||
void domain_save ( int_t step )
|
||||
{
|
||||
char filename[256];
|
||||
sprintf ( filename, "data/%.5ld.dat", step );
|
||||
FILE *out = fopen ( filename, "wb" );
|
||||
for ( int_t i=0; i<M; i++ )
|
||||
{
|
||||
fwrite ( &U(i,0), sizeof(real_t), N, out );
|
||||
}
|
||||
fclose ( out );
|
||||
}
|
||||
|
||||
|
||||
// Set up our three buffers, and fill two with an initial perturbation
|
||||
void
|
||||
domain_initialize ( void )
|
||||
{
|
||||
buffers[0] = malloc ( (M+2)*(N+2)*sizeof(real_t) );
|
||||
buffers[1] = malloc ( (M+2)*(N+2)*sizeof(real_t) );
|
||||
buffers[2] = malloc ( (M+2)*(N+2)*sizeof(real_t) );
|
||||
|
||||
for ( int_t i=0; i<M; i++ )
|
||||
{
|
||||
for ( int_t j=0; j<N; j++ )
|
||||
{
|
||||
// Calculate delta (radial distance) adjusted for M x N grid
|
||||
real_t delta = sqrt ( ((i - M/2.0) * (i - M/2.0)) / (real_t)M +
|
||||
((j - N/2.0) * (j - N/2.0)) / (real_t)N );
|
||||
U_prv(i,j) = U(i,j) = exp ( -4.0*delta*delta );
|
||||
}
|
||||
}
|
||||
|
||||
// Set the time step for 2D case
|
||||
dt = dx*dy / (4.0*c*c);
|
||||
}
|
||||
|
||||
|
||||
// Get rid of all the memory allocations
|
||||
void domain_finalize ( void )
|
||||
{
|
||||
free ( buffers[0] );
|
||||
free ( buffers[1] );
|
||||
free ( buffers[2] );
|
||||
}
|
||||
|
||||
|
||||
// Integration formula (Eq. 9 from the pdf document)
|
||||
void time_step ( void )
|
||||
{
|
||||
for ( int_t i=0; i<M; i++ )
|
||||
{
|
||||
for ( int_t j=0; j<N; j++ )
|
||||
{
|
||||
U_nxt(i,j) = -U_prv(i,j) + 2.0*U(i,j)
|
||||
+ (dt*dt*c*c)/(dx*dy) * (
|
||||
U(i-1,j)+U(i+1,j)+U(i,j-1)+U(i,j+1)-4.0*U(i,j)
|
||||
);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
// Neumann (reflective) boundary condition
|
||||
void boundary_condition ( void )
|
||||
{
|
||||
for ( int_t i=0; i<M; i++ )
|
||||
{
|
||||
U(i,-1) = U(i,1);
|
||||
U(i,N) = U(i,N-2);
|
||||
}
|
||||
for ( int_t j=0; j<N; j++ )
|
||||
{
|
||||
U(-1,j) = U(1,j);
|
||||
U(M,j) = U(M-2,j);
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
// Main time integration.
|
||||
void simulate( void )
|
||||
{
|
||||
// Go through each time step
|
||||
for ( int_t iteration=0; iteration<=max_iteration; iteration++ )
|
||||
{
|
||||
if ( (iteration % snapshot_freq)==0 )
|
||||
{
|
||||
domain_save ( iteration / snapshot_freq );
|
||||
}
|
||||
|
||||
// Derive step t+1 from steps t and t-1
|
||||
boundary_condition();
|
||||
time_step();
|
||||
|
||||
// Rotate the time step buffers
|
||||
move_buffer_window();
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
int main ( int argc, char **argv )
|
||||
{
|
||||
// Set up the initial state of the domain
|
||||
domain_initialize();
|
||||
|
||||
struct timeval t_start, t_end;
|
||||
|
||||
gettimeofday ( &t_start, NULL );
|
||||
simulate();
|
||||
gettimeofday ( &t_end, NULL );
|
||||
|
||||
printf ( "Total elapsed time: %lf seconds\n",
|
||||
WALLTIME(t_end) - WALLTIME(t_start)
|
||||
);
|
||||
|
||||
// Clean up and shut down
|
||||
domain_finalize();
|
||||
exit ( EXIT_SUCCESS );
|
||||
}
|
||||
136
exercise5/handout_openmp/wave_2d_workshare.c
Normal file
136
exercise5/handout_openmp/wave_2d_workshare.c
Normal file
@@ -0,0 +1,136 @@
|
||||
#define _XOPEN_SOURCE 600
|
||||
#include <math.h>
|
||||
#include <stdint.h>
|
||||
#include <stdio.h>
|
||||
#include <stdlib.h>
|
||||
#include <string.h>
|
||||
|
||||
// TASK: T6
|
||||
// Include the OpenMP library
|
||||
// BEGIN: T6
|
||||
;
|
||||
// END: T6
|
||||
|
||||
// Option to change numerical precision
|
||||
typedef int64_t int_t;
|
||||
typedef double real_t;
|
||||
|
||||
// Simulation parameters: size, step count, and how often to save the state
|
||||
const int_t
|
||||
N = 1024,
|
||||
max_iteration = 4000,
|
||||
snapshot_freq = 20;
|
||||
|
||||
// Wave equation parameters, time step is derived from the space step
|
||||
const real_t
|
||||
c = 1.0,
|
||||
h = 1.0;
|
||||
real_t
|
||||
dt;
|
||||
|
||||
// Buffers for three time steps, indexed with 2 ghost points for the boundary
|
||||
real_t
|
||||
*buffers[3] = { NULL, NULL, NULL };
|
||||
|
||||
#define U_prv(i, j) buffers[0][((i) + 1) * (N + 2) + (j) + 1]
|
||||
#define U(i, j) buffers[1][((i) + 1) * (N + 2) + (j) + 1]
|
||||
#define U_nxt(i, j) buffers[2][((i) + 1) * (N + 2) + (j) + 1]
|
||||
|
||||
// Rotate the time step buffers.
|
||||
void move_buffer_window(void) {
|
||||
real_t *temp = buffers[0];
|
||||
buffers[0] = buffers[1];
|
||||
buffers[1] = buffers[2];
|
||||
buffers[2] = temp;
|
||||
}
|
||||
|
||||
// Set up our three buffers, and fill two with an initial perturbation
|
||||
void domain_initialize(void) {
|
||||
buffers[0] = malloc((N + 2) * (N + 2) * sizeof(real_t));
|
||||
buffers[1] = malloc((N + 2) * (N + 2) * sizeof(real_t));
|
||||
buffers[2] = malloc((N + 2) * (N + 2) * sizeof(real_t));
|
||||
|
||||
for (int_t i = 0; i < N; i++) {
|
||||
for (int_t j = 0; j < N; j++) {
|
||||
real_t delta = sqrt(((i - N / 2) * (i - N / 2) + (j - N / 2) * (j - N / 2)) / (real_t)N);
|
||||
U_prv(i, j) = U(i, j) = exp(-4.0 * delta * delta);
|
||||
}
|
||||
}
|
||||
|
||||
// Set the time step for 2D case
|
||||
dt = (h * h) / (4.0 * c * c);
|
||||
}
|
||||
|
||||
// Get rid of all the memory allocations
|
||||
void domain_finalize(void) {
|
||||
free(buffers[0]);
|
||||
free(buffers[1]);
|
||||
free(buffers[2]);
|
||||
}
|
||||
|
||||
// TASK: T7
|
||||
// Integration formula
|
||||
void time_step(void) {
|
||||
// BEGIN: T7
|
||||
for (int_t i = 0; i < N; i++)
|
||||
for (int_t j = 0; j < N; j++)
|
||||
U_nxt(i, j) = -U_prv(i, j) + 2.0 * U(i, j) + (dt * dt * c * c) / (h * h) * (U(i - 1, j) + U(i + 1, j) + U(i, j - 1) + U(i, j + 1) - 4.0 * U(i, j));
|
||||
// END: T7
|
||||
}
|
||||
|
||||
// Neumann (reflective) boundary condition
|
||||
void boundary_condition(void) {
|
||||
for (int_t i = 0; i < N; i++) {
|
||||
U(i, -1) = U(i, 1);
|
||||
U(i, N) = U(i, N - 2);
|
||||
}
|
||||
for (int_t j = 0; j < N; j++) {
|
||||
U(-1, j) = U(1, j);
|
||||
U(N, j) = U(N - 2, j);
|
||||
}
|
||||
}
|
||||
|
||||
// Save the present time step in a numbered file under 'data/'
|
||||
void domain_save(int_t step) {
|
||||
char filename[256];
|
||||
sprintf(filename, "data/%.5ld.dat", step);
|
||||
FILE *out = fopen(filename, "wb");
|
||||
for (int_t i = 0; i < N; i++)
|
||||
fwrite(&U(i, 0), sizeof(real_t), N, out);
|
||||
fclose(out);
|
||||
}
|
||||
|
||||
// Main time integration.
|
||||
void simulate(void) {
|
||||
// Go through each time step
|
||||
for (int_t iteration = 0; iteration <= max_iteration; iteration++) {
|
||||
if ((iteration % snapshot_freq) == 0) {
|
||||
domain_save(iteration / snapshot_freq);
|
||||
}
|
||||
|
||||
// Derive step t+1 from steps t and t-1
|
||||
boundary_condition();
|
||||
time_step();
|
||||
|
||||
// Rotate the time step buffers
|
||||
move_buffer_window();
|
||||
}
|
||||
}
|
||||
|
||||
int main(void) {
|
||||
// Set up the initial state of the domain
|
||||
domain_initialize();
|
||||
|
||||
double t_start, t_end;
|
||||
t_start = omp_get_wtime();
|
||||
// Go through each time step
|
||||
simulate();
|
||||
t_end = omp_get_wtime();
|
||||
printf("%lf seconds elapsed with %d threads\n",
|
||||
t_end - t_start,
|
||||
omp_get_max_threads());
|
||||
|
||||
// Clean up and shut down
|
||||
domain_finalize();
|
||||
exit(EXIT_SUCCESS);
|
||||
}
|
||||
5
exercise5/handout_pthreads/.gitignore
vendored
Normal file
5
exercise5/handout_pthreads/.gitignore
vendored
Normal file
@@ -0,0 +1,5 @@
|
||||
data/
|
||||
data_sequential/
|
||||
images/
|
||||
parallel
|
||||
sequential
|
||||
36
exercise5/handout_pthreads/Makefile
Normal file
36
exercise5/handout_pthreads/Makefile
Normal file
@@ -0,0 +1,36 @@
|
||||
CC=gcc
|
||||
CFLAGS+= -std=c99 -pthread
|
||||
LDLIBS+= -lm
|
||||
SEQUENTIAL_SRC_FILES=wave_2d_sequential.c
|
||||
PARALLEL_SRC_FILES=wave_2d_pthread.c
|
||||
IMAGES=$(shell find data -type f | sed s/\\.dat/.png/g | sed s/data/images/g )
|
||||
.PHONY: all clean dirs plot movie
|
||||
all: dirs ${SEQUENTIAL_SRC_FILES} ${PARALLEL_SRC_FILES}
|
||||
dirs:
|
||||
mkdir -p data images
|
||||
sequential: ${SEQUENTIAL_SRC_FILES}
|
||||
$(CC) $^ $(CFLAGS) -o $@ $(LDLIBS)
|
||||
parallel: ${PARALLEL_SRC_FILES}
|
||||
$(CC) $^ $(CFLAGS) -o $@ $(LDLIBS)
|
||||
plot: ${IMAGES}
|
||||
images/%.png: data/%.dat
|
||||
./plot_image.sh $<
|
||||
movie: ${IMAGES}
|
||||
ffmpeg -y -an -i images/%5d.png -vcodec libx264 -pix_fmt yuv420p -profile:v baseline -level 3 -r 12 wave.mp4
|
||||
check: dirs sequential parallel
|
||||
mkdir -p data_sequential
|
||||
./sequential
|
||||
cp -rf ./data/* ./data_sequential
|
||||
./parallel 1
|
||||
./compare.sh
|
||||
rm ./data/*
|
||||
./parallel 4
|
||||
./compare.sh
|
||||
rm ./data/*
|
||||
./parallel 13
|
||||
./compare.sh
|
||||
rm -rf data_sequential
|
||||
clean:
|
||||
-rm -fr ${TARGETS} data images wave.mp4
|
||||
-rm sequential
|
||||
-rm parallel
|
||||
52
exercise5/handout_pthreads/compare.sh
Executable file
52
exercise5/handout_pthreads/compare.sh
Executable file
@@ -0,0 +1,52 @@
|
||||
#! /usr/bin/env bash
|
||||
|
||||
|
||||
GREEN=$(tput setaf 2)
|
||||
RED=$(tput setaf 1)
|
||||
RESET=$(tput sgr0)
|
||||
|
||||
DIR1="data"
|
||||
DIR2="data_sequential"
|
||||
|
||||
if [ ! -d "$DIR1" ]; then
|
||||
echo "Directory $DIR1 does not exist."
|
||||
exit 1
|
||||
fi
|
||||
|
||||
if [ ! -d "$DIR2" ]; then
|
||||
echo "Directory $DIR2 does not exist."
|
||||
exit 1
|
||||
fi
|
||||
|
||||
found_difference=1
|
||||
for file in "$DIR1"/*.dat; do
|
||||
# Extract the file name (basename)
|
||||
filename=$(basename "$file")
|
||||
|
||||
# Check if the corresponding file exists in DIR2
|
||||
if [ -f "$DIR2/$filename" ]; then
|
||||
# Compare the two files using diff
|
||||
diff_output=$(diff "$file" "$DIR2/$filename")
|
||||
|
||||
if [ -n "$diff_output" ]; then
|
||||
echo "$diff_output"
|
||||
found_difference=0
|
||||
fi
|
||||
else
|
||||
echo "File $filename does not exist in $DIR2"
|
||||
fi
|
||||
done
|
||||
|
||||
if [ $found_difference -eq 1 ]; then
|
||||
echo
|
||||
echo
|
||||
echo "${GREEN}The sequential and parallel version produced mathcing output!${RESET}"
|
||||
echo
|
||||
echo
|
||||
else
|
||||
echo
|
||||
echo
|
||||
echo "${RED}There were mismatches between the sequential and the parallel output.${RESET}"
|
||||
echo
|
||||
echo
|
||||
fi
|
||||
10
exercise5/handout_pthreads/plot_image.sh
Executable file
10
exercise5/handout_pthreads/plot_image.sh
Executable file
@@ -0,0 +1,10 @@
|
||||
#! /usr/bin/env bash
|
||||
SIZE=1024
|
||||
DATAFILE=$1
|
||||
IMAGEFILE=`echo $1 | sed s/dat$/png/ | sed s/data/images/`
|
||||
cat <<END_OF_SCRIPT | gnuplot -
|
||||
set term png
|
||||
set output "$IMAGEFILE"
|
||||
set zrange[-1:1]
|
||||
splot "$DATAFILE" binary array=${SIZE}x${SIZE} format='%double' with pm3d
|
||||
END_OF_SCRIPT
|
||||
BIN
exercise5/handout_pthreads/sequential
Executable file
BIN
exercise5/handout_pthreads/sequential
Executable file
Binary file not shown.
185
exercise5/handout_pthreads/wave_2d_pthread.c
Normal file
185
exercise5/handout_pthreads/wave_2d_pthread.c
Normal file
@@ -0,0 +1,185 @@
|
||||
#define _XOPEN_SOURCE 600
|
||||
#include <errno.h>
|
||||
#include <math.h>
|
||||
#include <stdint.h>
|
||||
#include <stdio.h>
|
||||
#include <stdlib.h>
|
||||
#include <string.h>
|
||||
#include <sys/time.h>
|
||||
|
||||
// TASK: T1a
|
||||
// Include the pthreads library
|
||||
// BEGIN: T1a
|
||||
;
|
||||
// END: T1a
|
||||
|
||||
// Option to change numerical precision
|
||||
typedef int64_t int_t;
|
||||
typedef double real_t;
|
||||
|
||||
// TASK: T1b
|
||||
// Pthread management
|
||||
// BEGIN: T1b
|
||||
int_t n_threads = 1;
|
||||
// END: T1b
|
||||
|
||||
// Performance measurement
|
||||
struct timeval t_start, t_end;
|
||||
#define WALLTIME(t) ((double)(t).tv_sec + 1e-6 * (double)(t).tv_usec)
|
||||
|
||||
// Simulation parameters: size, step count, and how often to save the state
|
||||
const int_t
|
||||
N = 1024,
|
||||
max_iteration = 4000,
|
||||
snapshot_freq = 20;
|
||||
|
||||
// Wave equation parameters, time step is derived from the space step
|
||||
const real_t
|
||||
c = 1.0,
|
||||
h = 1.0;
|
||||
real_t
|
||||
dt;
|
||||
|
||||
// Buffers for three time steps, indexed with 2 ghost points for the boundary
|
||||
real_t
|
||||
*buffers[3] = { NULL, NULL, NULL };
|
||||
|
||||
#define U_prv(i, j) buffers[0][((i) + 1) * (N + 2) + (j) + 1]
|
||||
#define U(i, j) buffers[1][((i) + 1) * (N + 2) + (j) + 1]
|
||||
#define U_nxt(i, j) buffers[2][((i) + 1) * (N + 2) + (j) + 1]
|
||||
|
||||
// Rotate the time step buffers.
|
||||
void move_buffer_window(void) {
|
||||
real_t *temp = buffers[0];
|
||||
buffers[0] = buffers[1];
|
||||
buffers[1] = buffers[2];
|
||||
buffers[2] = temp;
|
||||
}
|
||||
|
||||
// Set up our three buffers, and fill two with an initial perturbation
|
||||
void domain_initialize(void) {
|
||||
buffers[0] = malloc((N + 2) * (N + 2) * sizeof(real_t));
|
||||
buffers[1] = malloc((N + 2) * (N + 2) * sizeof(real_t));
|
||||
buffers[2] = malloc((N + 2) * (N + 2) * sizeof(real_t));
|
||||
|
||||
for (int_t i = 0; i < N; i++) {
|
||||
for (int_t j = 0; j < N; j++) {
|
||||
real_t delta = sqrt(((i - N / 2) * (i - N / 2) + (j - N / 2) * (j - N / 2)) / (real_t)N);
|
||||
U_prv(i, j) = U(i, j) = exp(-4.0 * delta * delta);
|
||||
}
|
||||
}
|
||||
|
||||
// Set the time step
|
||||
dt = (h * h) / (4.0 * c * c);
|
||||
}
|
||||
|
||||
// Get rid of all the memory allocations
|
||||
void domain_finalize(void) {
|
||||
free(buffers[0]);
|
||||
free(buffers[1]);
|
||||
free(buffers[2]);
|
||||
}
|
||||
|
||||
// TASK: T3
|
||||
// Integration formula
|
||||
void time_step(int_t thread_id) {
|
||||
// BEGIN: T3
|
||||
for (int_t i = 0; i < N; i += 1)
|
||||
for (int_t j = 0; j < N; j++)
|
||||
U_nxt(i, j) = -U_prv(i, j) + 2.0 * U(i, j) + (dt * dt * c * c) / (h * h) * (U(i - 1, j) + U(i + 1, j) + U(i, j - 1) + U(i, j + 1) - 4.0 * U(i, j));
|
||||
// END: T3
|
||||
}
|
||||
|
||||
// TASK: T4
|
||||
// Neumann (reflective) boundary condition
|
||||
void boundary_condition(int_t thread_id) {
|
||||
// BEGIN: T4
|
||||
for (int_t i = 0; i < N; i += 1) {
|
||||
U(i, -1) = U(i, 1);
|
||||
U(i, N) = U(i, N - 2);
|
||||
}
|
||||
for (int_t j = 0; j < N; j += 1) {
|
||||
U(-1, j) = U(1, j);
|
||||
U(N, j) = U(N - 2, j);
|
||||
}
|
||||
// END: T4
|
||||
}
|
||||
|
||||
// Save the present time step in a numbered file under 'data/'
|
||||
void domain_save(int_t step) {
|
||||
char filename[256];
|
||||
sprintf(filename, "data/%.5ld.dat", step);
|
||||
FILE *out = fopen(filename, "wb");
|
||||
for (int_t i = 0; i < N; i++)
|
||||
fwrite(&U(i, 0), sizeof(real_t), N, out);
|
||||
fclose(out);
|
||||
}
|
||||
|
||||
// TASK: T5
|
||||
// Main loop
|
||||
void *simulate(void *id) {
|
||||
// BEGIN: T5
|
||||
// Go through each time step
|
||||
for (int_t iteration = 0; iteration <= max_iteration; iteration++) {
|
||||
if ((iteration % snapshot_freq) == 0) {
|
||||
domain_save(iteration / snapshot_freq);
|
||||
}
|
||||
|
||||
// Derive step t+1 from steps t and t-1
|
||||
boundary_condition(0);
|
||||
time_step(0);
|
||||
|
||||
// Rotate the time step buffers
|
||||
move_buffer_window();
|
||||
}
|
||||
// END: T5
|
||||
}
|
||||
|
||||
// Main time integration loop
|
||||
int main(int argc, char **argv) {
|
||||
// Number of threads is an optional argument, sanity check its value
|
||||
if (argc > 1) {
|
||||
n_threads = strtol(argv[1], NULL, 10);
|
||||
if (errno == EINVAL)
|
||||
fprintf(stderr, "'%s' is not a valid thread count\n", argv[1]);
|
||||
if (n_threads < 1) {
|
||||
fprintf(stderr, "Number of threads must be >0\n");
|
||||
exit(EXIT_FAILURE);
|
||||
}
|
||||
}
|
||||
|
||||
// TASK: T1c
|
||||
// Initialise pthreads
|
||||
// BEGIN: T1c
|
||||
;
|
||||
// END: T1b
|
||||
|
||||
// Set up the initial state of the domain
|
||||
domain_initialize();
|
||||
|
||||
// Time the execution
|
||||
gettimeofday(&t_start, NULL);
|
||||
|
||||
// TASK: T2
|
||||
// Run the integration loop
|
||||
// BEGIN: T2
|
||||
simulate(NULL);
|
||||
// END: T2
|
||||
|
||||
// Report how long we spent in the integration stage
|
||||
gettimeofday(&t_end, NULL);
|
||||
printf("%lf seconds elapsed with %ld threads\n",
|
||||
WALLTIME(t_end) - WALLTIME(t_start),
|
||||
n_threads);
|
||||
|
||||
// Clean up and shut down
|
||||
domain_finalize();
|
||||
|
||||
// TASK: T1d
|
||||
// Finalise pthreads
|
||||
// BEGIN: T1d
|
||||
;
|
||||
// END: T1d
|
||||
|
||||
exit(EXIT_SUCCESS);
|
||||
}
|
||||
169
exercise5/handout_pthreads/wave_2d_sequential.c
Normal file
169
exercise5/handout_pthreads/wave_2d_sequential.c
Normal file
@@ -0,0 +1,169 @@
|
||||
#define _XOPEN_SOURCE 600
|
||||
#include <stdio.h>
|
||||
#include <stdlib.h>
|
||||
#include <string.h>
|
||||
#include <stdint.h>
|
||||
#include <math.h>
|
||||
#include <sys/time.h>
|
||||
|
||||
|
||||
// Convert 'struct timeval' into seconds in double prec. floating point
|
||||
#define WALLTIME(t) ((double)(t).tv_sec + 1e-6 * (double)(t).tv_usec)
|
||||
|
||||
// Option to change numerical precision
|
||||
typedef int64_t int_t;
|
||||
typedef double real_t;
|
||||
|
||||
// Simulation parameters: size, step count, and how often to save the state
|
||||
int_t
|
||||
N = 1024,
|
||||
M = 1024,
|
||||
max_iteration = 4000,
|
||||
snapshot_freq = 20;
|
||||
|
||||
// Wave equation parameters, time step is derived from the space step
|
||||
const real_t
|
||||
c = 1.0,
|
||||
dx = 1.0,
|
||||
dy = 1.0;
|
||||
real_t
|
||||
dt;
|
||||
|
||||
// Buffers for three time steps, indexed with 2 ghost points for the boundary
|
||||
real_t
|
||||
*buffers[3] = { NULL, NULL, NULL };
|
||||
|
||||
#define U_prv(i,j) buffers[0][((i)+1)*(N+2)+(j)+1]
|
||||
#define U(i,j) buffers[1][((i)+1)*(N+2)+(j)+1]
|
||||
#define U_nxt(i,j) buffers[2][((i)+1)*(N+2)+(j)+1]
|
||||
|
||||
|
||||
// Rotate the time step buffers.
|
||||
void move_buffer_window ( void )
|
||||
{
|
||||
real_t *temp = buffers[0];
|
||||
buffers[0] = buffers[1];
|
||||
buffers[1] = buffers[2];
|
||||
buffers[2] = temp;
|
||||
}
|
||||
|
||||
|
||||
// Save the present time step in a numbered file under 'data/'
|
||||
void domain_save ( int_t step )
|
||||
{
|
||||
char filename[256];
|
||||
sprintf ( filename, "data/%.5ld.dat", step );
|
||||
FILE *out = fopen ( filename, "wb" );
|
||||
for ( int_t i=0; i<M; i++ )
|
||||
{
|
||||
fwrite ( &U(i,0), sizeof(real_t), N, out );
|
||||
}
|
||||
fclose ( out );
|
||||
}
|
||||
|
||||
|
||||
// Set up our three buffers, and fill two with an initial perturbation
|
||||
void
|
||||
domain_initialize ( void )
|
||||
{
|
||||
buffers[0] = malloc ( (M+2)*(N+2)*sizeof(real_t) );
|
||||
buffers[1] = malloc ( (M+2)*(N+2)*sizeof(real_t) );
|
||||
buffers[2] = malloc ( (M+2)*(N+2)*sizeof(real_t) );
|
||||
|
||||
for ( int_t i=0; i<M; i++ )
|
||||
{
|
||||
for ( int_t j=0; j<N; j++ )
|
||||
{
|
||||
// Calculate delta (radial distance) adjusted for M x N grid
|
||||
real_t delta = sqrt ( ((i - M/2.0) * (i - M/2.0)) / (real_t)M +
|
||||
((j - N/2.0) * (j - N/2.0)) / (real_t)N );
|
||||
U_prv(i,j) = U(i,j) = exp ( -4.0*delta*delta );
|
||||
}
|
||||
}
|
||||
|
||||
// Set the time step for 2D case
|
||||
dt = dx*dy / (4.0*c*c);
|
||||
}
|
||||
|
||||
|
||||
// Get rid of all the memory allocations
|
||||
void domain_finalize ( void )
|
||||
{
|
||||
free ( buffers[0] );
|
||||
free ( buffers[1] );
|
||||
free ( buffers[2] );
|
||||
}
|
||||
|
||||
|
||||
// Integration formula (Eq. 9 from the pdf document)
|
||||
void time_step ( void )
|
||||
{
|
||||
for ( int_t i=0; i<M; i++ )
|
||||
{
|
||||
for ( int_t j=0; j<N; j++ )
|
||||
{
|
||||
U_nxt(i,j) = -U_prv(i,j) + 2.0*U(i,j)
|
||||
+ (dt*dt*c*c)/(dx*dy) * (
|
||||
U(i-1,j)+U(i+1,j)+U(i,j-1)+U(i,j+1)-4.0*U(i,j)
|
||||
);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
// Neumann (reflective) boundary condition
|
||||
void boundary_condition ( void )
|
||||
{
|
||||
for ( int_t i=0; i<M; i++ )
|
||||
{
|
||||
U(i,-1) = U(i,1);
|
||||
U(i,N) = U(i,N-2);
|
||||
}
|
||||
for ( int_t j=0; j<N; j++ )
|
||||
{
|
||||
U(-1,j) = U(1,j);
|
||||
U(M,j) = U(M-2,j);
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
// Main time integration.
|
||||
void simulate( void )
|
||||
{
|
||||
// Go through each time step
|
||||
for ( int_t iteration=0; iteration<=max_iteration; iteration++ )
|
||||
{
|
||||
if ( (iteration % snapshot_freq)==0 )
|
||||
{
|
||||
domain_save ( iteration / snapshot_freq );
|
||||
}
|
||||
|
||||
// Derive step t+1 from steps t and t-1
|
||||
boundary_condition();
|
||||
time_step();
|
||||
|
||||
// Rotate the time step buffers
|
||||
move_buffer_window();
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
int main ( int argc, char **argv )
|
||||
{
|
||||
// Set up the initial state of the domain
|
||||
domain_initialize();
|
||||
|
||||
struct timeval t_start, t_end;
|
||||
|
||||
gettimeofday ( &t_start, NULL );
|
||||
simulate();
|
||||
gettimeofday ( &t_end, NULL );
|
||||
|
||||
printf ( "Total elapsed time: %lf seconds\n",
|
||||
WALLTIME(t_end) - WALLTIME(t_start)
|
||||
);
|
||||
|
||||
// Clean up and shut down
|
||||
domain_finalize();
|
||||
exit ( EXIT_SUCCESS );
|
||||
}
|
||||
Reference in New Issue
Block a user