398 lines
11 KiB
C
398 lines
11 KiB
C
#define _XOPEN_SOURCE 600
|
|
#include <errno.h>
|
|
#include <inttypes.h>
|
|
#include <math.h>
|
|
#include <stdint.h>
|
|
#include <stdio.h>
|
|
#include <stdlib.h>
|
|
#include <string.h>
|
|
#include <sys/stat.h>
|
|
#include <sys/time.h>
|
|
|
|
#include "argument_utils.h"
|
|
|
|
// TASK: T1a
|
|
// Include the MPI headerfile
|
|
// BEGIN: T1
|
|
#include <mpi.h>
|
|
// END: T1a
|
|
|
|
// Convert 'struct timeval' into seconds in double prec. floating point
|
|
#define WALLTIME(t) ((double)(t).tv_sec + 1e-6 * (double)(t).tv_usec)
|
|
|
|
// Option to change numerical precision
|
|
typedef int64_t int_t;
|
|
typedef double real_t;
|
|
|
|
// Buffers for three time steps, indexed with 2 ghost points for the boundary
|
|
real_t
|
|
*buffers[3] = { NULL, NULL, NULL };
|
|
|
|
// TASK: T1b
|
|
// Declare variables each MPI process will need
|
|
// BEGIN: T1b
|
|
#define U_prv(i, j) buffers[0][((i) + 1) * (local_N + 2) + (j) + 1]
|
|
#define U(i, j) buffers[1][((i) + 1) * (local_N + 2) + (j) + 1]
|
|
#define U_nxt(i, j) buffers[2][((i) + 1) * (local_N + 2) + (j) + 1]
|
|
|
|
int world_size;
|
|
int world_rank;
|
|
|
|
#define ROOT 0
|
|
|
|
int local_M, local_N;
|
|
|
|
MPI_Comm cartesian_comm;
|
|
int coordinates[2], dims[2] = { 0 };
|
|
int cartesian_rank;
|
|
|
|
int up, down, left, right;
|
|
int up_left, up_right, down_left, down_right;
|
|
// END: T1b
|
|
|
|
// Simulation parameters: size, step count, and how often to save the state
|
|
int_t
|
|
M = 256, // rows
|
|
N = 256, // cols
|
|
max_iteration = 4000,
|
|
snapshot_freq = 20;
|
|
|
|
// Wave equation parameters, time step is derived from the space step
|
|
const real_t
|
|
c = 1.0,
|
|
dx = 1.0,
|
|
dy = 1.0;
|
|
real_t
|
|
dt;
|
|
|
|
// Rotate the time step buffers.
|
|
void move_buffer_window(void) {
|
|
real_t *temp = buffers[0];
|
|
buffers[0] = buffers[1];
|
|
buffers[1] = buffers[2];
|
|
buffers[2] = temp;
|
|
}
|
|
|
|
// TASK: T8
|
|
void domain_save(int_t step) {
|
|
// BEGIN: T8
|
|
char filename[256];
|
|
|
|
// Ensure output directory exists (ignore error if it already exists)
|
|
if (world_rank == ROOT) {
|
|
if (mkdir("data", 0755) != 0 && errno != EEXIST) {
|
|
perror("mkdir data");
|
|
exit(EXIT_FAILURE);
|
|
}
|
|
}
|
|
|
|
MPI_Barrier(MPI_COMM_WORLD);
|
|
|
|
snprintf(filename, sizeof(filename), "data/%05" PRId64 ".dat", step);
|
|
|
|
MPI_File fh;
|
|
int rc = MPI_File_open(cartesian_comm, filename, MPI_MODE_CREATE | MPI_MODE_WRONLY, MPI_INFO_NULL, &fh);
|
|
if (rc != MPI_SUCCESS) {
|
|
perror("fopen output file");
|
|
fprintf(stderr, "Failed to open '%s' for writing.\n", filename);
|
|
exit(EXIT_FAILURE);
|
|
}
|
|
|
|
MPI_Status status;
|
|
for (int_t i = 0; i < local_M; i++) {
|
|
MPI_Offset offset = ((coordinates[0] * local_M + i) * N // global row position
|
|
+ coordinates[1] * local_N) *
|
|
sizeof(real_t);
|
|
rc = MPI_File_write_at(fh, offset, &U(i, 0), local_N, MPI_DOUBLE, &status);
|
|
if (rc != MPI_SUCCESS) {
|
|
perror("fwrite");
|
|
MPI_File_close(&fh);
|
|
exit(EXIT_FAILURE);
|
|
}
|
|
}
|
|
|
|
MPI_File_close(&fh);
|
|
// END: T8
|
|
}
|
|
|
|
// TASK: T7
|
|
// Neumann (reflective) boundary condition
|
|
void boundary_condition(void) {
|
|
// BEGIN: T7
|
|
|
|
if (up == MPI_PROC_NULL) {
|
|
// note: -1 to handle corners automatically
|
|
for (int_t j = -1; j < local_N + 1; j++) {
|
|
U(-1, j) = U(1, j);
|
|
}
|
|
}
|
|
if (down == MPI_PROC_NULL) {
|
|
for (int_t j = -1; j < local_N + 1; j++) {
|
|
U(local_M, j) = U(local_M - 2, j);
|
|
}
|
|
}
|
|
if (left == MPI_PROC_NULL) {
|
|
for (int_t i = -1; i < local_M + 1; i++) {
|
|
U(i, -1) = U(i, 1);
|
|
}
|
|
}
|
|
if (right == MPI_PROC_NULL) {
|
|
for (int_t i = -1; i < local_M + 1; i++) {
|
|
U(i, local_N) = U(i, local_N - 2);
|
|
}
|
|
}
|
|
// END: T7
|
|
}
|
|
|
|
// TASK: T4
|
|
// Set up our three buffers, and fill two with an initial perturbation
|
|
// and set the time step.
|
|
void domain_initialize(void) {
|
|
// BEGIN: T4
|
|
buffers[0] = malloc((local_M + 2) * (local_N + 2) * sizeof(real_t));
|
|
buffers[1] = malloc((local_M + 2) * (local_N + 2) * sizeof(real_t));
|
|
buffers[2] = malloc((local_M + 2) * (local_N + 2) * sizeof(real_t));
|
|
|
|
if (!buffers[0] || !buffers[1] || !buffers[2]) {
|
|
perror("malloc");
|
|
exit(EXIT_FAILURE);
|
|
}
|
|
|
|
// initialize interior (physical cells)
|
|
int rows_offset = coordinates[0] * local_M;
|
|
int cols_offset = coordinates[1] * local_N;
|
|
for (int_t i = 0; i < local_M; i++) {
|
|
for (int_t j = 0; j < local_N; j++) {
|
|
real_t dx_i = (i + rows_offset - M / 2.0);
|
|
real_t dy_j = (j + cols_offset - N / 2.0);
|
|
real_t delta = sqrt((dx_i * dx_i) / (real_t)M + (dy_j * dy_j) / (real_t)N);
|
|
U_prv(i, j) = U(i, j) = exp(-4.0 * delta * delta);
|
|
}
|
|
}
|
|
|
|
// Set the time step (CFL) with a safety factor
|
|
dt = 1.0 / (c * sqrt(1.0 / (dx * dx) + 1.0 / (dy * dy)));
|
|
dt *= 0.9; // safety factor
|
|
|
|
// Fill ghost cells once so they are valid before the first step
|
|
boundary_condition();
|
|
// END: T4
|
|
}
|
|
|
|
// Get rid of all the memory allocations
|
|
void domain_finalize(void) {
|
|
free(buffers[0]);
|
|
free(buffers[1]);
|
|
free(buffers[2]);
|
|
}
|
|
|
|
// TASK: T5
|
|
// Integration formula
|
|
void time_step(void) {
|
|
// BEGIN: T5
|
|
double coef_x = (c * dt) * (c * dt) / (dx * dx);
|
|
double coef_y = (c * dt) * (c * dt) / (dy * dy);
|
|
double coef_xy = (c * dt) * (c * dt) / (6.0 * dx * dy);
|
|
|
|
// Only iterate over physical domain cells i = 0..M-1, j = 0..N-1
|
|
for (int i = 0; i < local_M; i++) {
|
|
for (int j = 0; j < local_N; j++) {
|
|
U_nxt(i, j) = 2.0 * U(i, j) - U_prv(i, j) + coef_x * (U(i + 1, j) - 2.0 * U(i, j) + U(i - 1, j)) + coef_y * (U(i, j + 1) - 2.0 * U(i, j) + U(i, j - 1)) + coef_xy * (U(i + 1, j + 1) + U(i + 1, j - 1) + U(i - 1, j + 1) + U(i - 1, j - 1) - 4.0 * U(i, j));
|
|
}
|
|
}
|
|
// END: T5
|
|
}
|
|
|
|
void get_corner_procs(void) {
|
|
int neighbor_coords[2];
|
|
|
|
neighbor_coords[0] = coordinates[0] - 1;
|
|
neighbor_coords[1] = coordinates[1] - 1;
|
|
if (neighbor_coords[0] >= 0 && neighbor_coords[1] >= 0) {
|
|
MPI_Cart_rank(cartesian_comm, neighbor_coords, &up_left);
|
|
} else {
|
|
up_left = MPI_PROC_NULL;
|
|
}
|
|
|
|
neighbor_coords[0] = coordinates[0] - 1;
|
|
neighbor_coords[1] = coordinates[1] + 1;
|
|
if (neighbor_coords[0] >= 0 && neighbor_coords[1] < dims[1]) {
|
|
MPI_Cart_rank(cartesian_comm, neighbor_coords, &up_right);
|
|
} else {
|
|
up_right = MPI_PROC_NULL;
|
|
}
|
|
|
|
neighbor_coords[0] = coordinates[0] + 1;
|
|
neighbor_coords[1] = coordinates[1] - 1;
|
|
if (neighbor_coords[0] < dims[0] && neighbor_coords[1] >= 0) {
|
|
MPI_Cart_rank(cartesian_comm, neighbor_coords, &down_left);
|
|
} else {
|
|
down_left = MPI_PROC_NULL;
|
|
}
|
|
|
|
neighbor_coords[0] = coordinates[0] + 1;
|
|
neighbor_coords[1] = coordinates[1] + 1;
|
|
if (neighbor_coords[0] < dims[0] && neighbor_coords[1] < dims[1]) {
|
|
MPI_Cart_rank(cartesian_comm, neighbor_coords, &down_right);
|
|
} else {
|
|
down_right = MPI_PROC_NULL;
|
|
}
|
|
}
|
|
|
|
// TASK: T6
|
|
// Communicate the border between processes.
|
|
void border_exchange(void) {
|
|
// BEGIN: T6
|
|
|
|
// rows
|
|
MPI_Sendrecv(&U(0, 0), local_N, MPI_DOUBLE, up, 0,
|
|
&U(local_M, 0), local_N, MPI_DOUBLE, down, 0,
|
|
cartesian_comm, MPI_STATUS_IGNORE);
|
|
|
|
MPI_Sendrecv(&U(local_M - 1, 0), local_N, MPI_DOUBLE, down, 1,
|
|
&U(-1, 0), local_N, MPI_DOUBLE, up, 1,
|
|
cartesian_comm, MPI_STATUS_IGNORE);
|
|
|
|
// columns
|
|
MPI_Datatype column_type;
|
|
MPI_Type_vector(local_M, 1, local_N + 2, MPI_DOUBLE, &column_type);
|
|
MPI_Type_commit(&column_type);
|
|
|
|
MPI_Sendrecv(&U(0, 0), 1, column_type, left, 2,
|
|
&U(0, local_N), 1, column_type, right, 2,
|
|
cartesian_comm, MPI_STATUS_IGNORE);
|
|
|
|
MPI_Sendrecv(&U(0, local_N - 1), 1, column_type, right, 3,
|
|
&U(0, -1), 1, column_type, left, 3,
|
|
cartesian_comm, MPI_STATUS_IGNORE);
|
|
|
|
MPI_Type_free(&column_type);
|
|
|
|
// corners
|
|
get_corner_procs();
|
|
|
|
MPI_Sendrecv(&U(0, 0), 1, MPI_DOUBLE, up_left, 4,
|
|
&U(local_M, local_N), 1, MPI_DOUBLE, down_right, 4,
|
|
cartesian_comm, MPI_STATUS_IGNORE);
|
|
|
|
MPI_Sendrecv(&U(0, local_N - 1), 1, MPI_DOUBLE, up_right, 5,
|
|
&U(local_M, -1), 1, MPI_DOUBLE, down_left, 5,
|
|
cartesian_comm, MPI_STATUS_IGNORE);
|
|
|
|
MPI_Sendrecv(&U(local_M - 1, 0), 1, MPI_DOUBLE, down_left, 6,
|
|
&U(-1, local_N), 1, MPI_DOUBLE, up_right, 6,
|
|
cartesian_comm, MPI_STATUS_IGNORE);
|
|
|
|
MPI_Sendrecv(&U(local_M - 1, local_N - 1), 1, MPI_DOUBLE, down_right, 7,
|
|
&U(-1, -1), 1, MPI_DOUBLE, up_left, 7,
|
|
cartesian_comm, MPI_STATUS_IGNORE);
|
|
// END: T6
|
|
}
|
|
|
|
// Main time integration.
|
|
void simulate(void) {
|
|
// Go through each time step
|
|
for (int_t iteration = 0; iteration <= max_iteration; iteration++) {
|
|
if ((iteration % snapshot_freq) == 0) {
|
|
domain_save(iteration / snapshot_freq);
|
|
}
|
|
|
|
// Derive step t+1 from steps t and t-1
|
|
border_exchange();
|
|
boundary_condition();
|
|
time_step();
|
|
|
|
// Rotate the time step buffers
|
|
move_buffer_window();
|
|
}
|
|
}
|
|
|
|
int main(int argc, char **argv) {
|
|
// TASK: T1c
|
|
// Initialise MPI
|
|
// BEGIN: T1c
|
|
MPI_Init(&argc, &argv);
|
|
MPI_Comm_size(MPI_COMM_WORLD, &world_size);
|
|
MPI_Comm_rank(MPI_COMM_WORLD, &world_rank);
|
|
// END: T1c
|
|
|
|
// TASK: T3
|
|
// Distribute the user arguments to all the processes
|
|
// BEGIN: T3
|
|
// create options type for easy broadcast
|
|
MPI_Datatype options_type;
|
|
MPI_Type_contiguous(4, MPI_INT64_T, &options_type);
|
|
MPI_Type_commit(&options_type);
|
|
|
|
OPTIONS options;
|
|
if (world_rank == ROOT) {
|
|
OPTIONS *parsed = parse_args(argc, argv);
|
|
if (!parsed) {
|
|
fprintf(stderr, "Argument parsing failed\n");
|
|
exit(EXIT_FAILURE);
|
|
}
|
|
options = *parsed;
|
|
free(parsed);
|
|
}
|
|
|
|
MPI_Bcast(&options, 1, options_type, ROOT, MPI_COMM_WORLD);
|
|
MPI_Type_free(&options_type);
|
|
|
|
// read options
|
|
M = options.M;
|
|
N = options.N;
|
|
max_iteration = options.max_iteration;
|
|
snapshot_freq = options.snapshot_frequency;
|
|
|
|
// grid dimensions
|
|
MPI_Dims_create(world_size, 2, dims);
|
|
|
|
local_M = M / dims[0];
|
|
local_N = N / dims[1];
|
|
|
|
// create cartesian communicator
|
|
int periodicity[2] = { 0, 0 };
|
|
MPI_Cart_create(MPI_COMM_WORLD, 2, dims, periodicity, 1, &cartesian_comm);
|
|
|
|
// fetch cartesian coords
|
|
MPI_Comm_rank(cartesian_comm, &cartesian_rank);
|
|
MPI_Cart_coords(cartesian_comm, cartesian_rank, 2, coordinates);
|
|
MPI_Cart_shift(cartesian_comm, 0, 1, &up, &down);
|
|
MPI_Cart_shift(cartesian_comm, 1, 1, &left, &right);
|
|
// END: T3
|
|
|
|
// Set up the initial state of the domain
|
|
domain_initialize();
|
|
|
|
struct timeval t_start, t_end;
|
|
|
|
// TASK: T2
|
|
// Time your code
|
|
// BEGIN: T2
|
|
if (world_rank == ROOT)
|
|
gettimeofday(&t_start, NULL);
|
|
|
|
simulate();
|
|
// MPI_Barrier(MPI_COMM_WORLD); // TODO
|
|
|
|
if (world_rank == ROOT) {
|
|
gettimeofday(&t_end, NULL);
|
|
|
|
printf("Total elapsed time: %lf seconds\n",
|
|
WALLTIME(t_end) - WALLTIME(t_start));
|
|
}
|
|
// END: T2
|
|
|
|
// Clean up and shut down
|
|
domain_finalize();
|
|
|
|
// TASK: T1d
|
|
// Finalise MPI
|
|
// BEGIN: T1d
|
|
MPI_Comm_free(&cartesian_comm);
|
|
MPI_Finalize();
|
|
// END: T1d
|
|
|
|
exit(EXIT_SUCCESS);
|
|
}
|