#define _XOPEN_SOURCE 600 #include #include #include #include #include #include #include #include #include #include "argument_utils.h" // TASK: T1a // Include the MPI headerfile // BEGIN: T1 #include // END: T1a // Convert 'struct timeval' into seconds in double prec. floating point #define WALLTIME(t) ((double)(t).tv_sec + 1e-6 * (double)(t).tv_usec) // Option to change numerical precision typedef int64_t int_t; typedef double real_t; // Buffers for three time steps, indexed with 2 ghost points for the boundary real_t *buffers[3] = { NULL, NULL, NULL }; // TASK: T1b // Declare variables each MPI process will need // BEGIN: T1b #define U_prv(i, j) buffers[0][((i) + 1) * (local_N + 2) + (j) + 1] #define U(i, j) buffers[1][((i) + 1) * (local_N + 2) + (j) + 1] #define U_nxt(i, j) buffers[2][((i) + 1) * (local_N + 2) + (j) + 1] int world_size; int world_rank; #define ROOT 0 int local_M, local_N; MPI_Comm cartesian_comm; int coordinates[2], dims[2] = { 0 }; int cartesian_rank; int up, down, left, right; int up_left, up_right, down_left, down_right; // END: T1b // Simulation parameters: size, step count, and how often to save the state int_t M = 256, // rows N = 256, // cols max_iteration = 4000, snapshot_freq = 20; // Wave equation parameters, time step is derived from the space step const real_t c = 1.0, dx = 1.0, dy = 1.0; real_t dt; // Rotate the time step buffers. void move_buffer_window(void) { real_t *temp = buffers[0]; buffers[0] = buffers[1]; buffers[1] = buffers[2]; buffers[2] = temp; } // TASK: T8 void domain_save(int_t step) { // BEGIN: T8 char filename[256]; // Ensure output directory exists (ignore error if it already exists) if (world_rank == ROOT) { if (mkdir("data", 0755) != 0 && errno != EEXIST) { perror("mkdir data"); exit(EXIT_FAILURE); } } MPI_Barrier(MPI_COMM_WORLD); snprintf(filename, sizeof(filename), "data/%05" PRId64 ".dat", step); MPI_File fh; int rc = MPI_File_open(cartesian_comm, filename, MPI_MODE_CREATE | MPI_MODE_WRONLY, MPI_INFO_NULL, &fh); if (rc != MPI_SUCCESS) { perror("fopen output file"); fprintf(stderr, "Failed to open '%s' for writing.\n", filename); exit(EXIT_FAILURE); } MPI_Status status; for (int_t i = 0; i < local_M; i++) { MPI_Offset offset = ((coordinates[0] * local_M + i) * N // global row position + coordinates[1] * local_N) * sizeof(real_t); rc = MPI_File_write_at(fh, offset, &U(i, 0), local_N, MPI_DOUBLE, &status); if (rc != MPI_SUCCESS) { perror("fwrite"); MPI_File_close(&fh); exit(EXIT_FAILURE); } } MPI_File_close(&fh); // END: T8 } // TASK: T7 // Neumann (reflective) boundary condition void boundary_condition(void) { // BEGIN: T7 if (up == MPI_PROC_NULL) { // note: -1 to handle corners automatically for (int_t j = -1; j < local_N + 1; j++) { U(-1, j) = U(1, j); } } if (down == MPI_PROC_NULL) { for (int_t j = -1; j < local_N + 1; j++) { U(local_M, j) = U(local_M - 2, j); } } if (left == MPI_PROC_NULL) { for (int_t i = -1; i < local_M + 1; i++) { U(i, -1) = U(i, 1); } } if (right == MPI_PROC_NULL) { for (int_t i = -1; i < local_M + 1; i++) { U(i, local_N) = U(i, local_N - 2); } } // END: T7 } // TASK: T4 // Set up our three buffers, and fill two with an initial perturbation // and set the time step. void domain_initialize(void) { // BEGIN: T4 buffers[0] = malloc((local_M + 2) * (local_N + 2) * sizeof(real_t)); buffers[1] = malloc((local_M + 2) * (local_N + 2) * sizeof(real_t)); buffers[2] = malloc((local_M + 2) * (local_N + 2) * sizeof(real_t)); if (!buffers[0] || !buffers[1] || !buffers[2]) { perror("malloc"); exit(EXIT_FAILURE); } // initialize interior (physical cells) int rows_offset = coordinates[0] * local_M; int cols_offset = coordinates[1] * local_N; for (int_t i = 0; i < local_M; i++) { for (int_t j = 0; j < local_N; j++) { real_t dx_i = (i + rows_offset - M / 2.0); real_t dy_j = (j + cols_offset - N / 2.0); real_t delta = sqrt((dx_i * dx_i) / (real_t)M + (dy_j * dy_j) / (real_t)N); U_prv(i, j) = U(i, j) = exp(-4.0 * delta * delta); } } // Set the time step (CFL) with a safety factor dt = 1.0 / (c * sqrt(1.0 / (dx * dx) + 1.0 / (dy * dy))); dt *= 0.9; // safety factor // Fill ghost cells once so they are valid before the first step boundary_condition(); // END: T4 } // Get rid of all the memory allocations void domain_finalize(void) { free(buffers[0]); free(buffers[1]); free(buffers[2]); } // TASK: T5 // Integration formula void time_step(void) { // BEGIN: T5 double coef_x = (c * dt) * (c * dt) / (dx * dx); double coef_y = (c * dt) * (c * dt) / (dy * dy); double coef_xy = (c * dt) * (c * dt) / (6.0 * dx * dy); // Only iterate over physical domain cells i = 0..M-1, j = 0..N-1 for (int i = 0; i < local_M; i++) { for (int j = 0; j < local_N; j++) { U_nxt(i, j) = 2.0 * U(i, j) - U_prv(i, j) + coef_x * (U(i + 1, j) - 2.0 * U(i, j) + U(i - 1, j)) + coef_y * (U(i, j + 1) - 2.0 * U(i, j) + U(i, j - 1)) + coef_xy * (U(i + 1, j + 1) + U(i + 1, j - 1) + U(i - 1, j + 1) + U(i - 1, j - 1) - 4.0 * U(i, j)); } } // END: T5 } void get_corner_procs(void) { int neighbor_coords[2]; neighbor_coords[0] = coordinates[0] - 1; neighbor_coords[1] = coordinates[1] - 1; if (neighbor_coords[0] >= 0 && neighbor_coords[1] >= 0) { MPI_Cart_rank(cartesian_comm, neighbor_coords, &up_left); } else { up_left = MPI_PROC_NULL; } neighbor_coords[0] = coordinates[0] - 1; neighbor_coords[1] = coordinates[1] + 1; if (neighbor_coords[0] >= 0 && neighbor_coords[1] < dims[1]) { MPI_Cart_rank(cartesian_comm, neighbor_coords, &up_right); } else { up_right = MPI_PROC_NULL; } neighbor_coords[0] = coordinates[0] + 1; neighbor_coords[1] = coordinates[1] - 1; if (neighbor_coords[0] < dims[0] && neighbor_coords[1] >= 0) { MPI_Cart_rank(cartesian_comm, neighbor_coords, &down_left); } else { down_left = MPI_PROC_NULL; } neighbor_coords[0] = coordinates[0] + 1; neighbor_coords[1] = coordinates[1] + 1; if (neighbor_coords[0] < dims[0] && neighbor_coords[1] < dims[1]) { MPI_Cart_rank(cartesian_comm, neighbor_coords, &down_right); } else { down_right = MPI_PROC_NULL; } } // TASK: T6 // Communicate the border between processes. void border_exchange(void) { // BEGIN: T6 // rows MPI_Sendrecv(&U(0, 0), local_N, MPI_DOUBLE, up, 0, &U(local_M, 0), local_N, MPI_DOUBLE, down, 0, cartesian_comm, MPI_STATUS_IGNORE); MPI_Sendrecv(&U(local_M - 1, 0), local_N, MPI_DOUBLE, down, 1, &U(-1, 0), local_N, MPI_DOUBLE, up, 1, cartesian_comm, MPI_STATUS_IGNORE); // columns MPI_Datatype column_type; MPI_Type_vector(local_M, 1, local_N + 2, MPI_DOUBLE, &column_type); MPI_Type_commit(&column_type); MPI_Sendrecv(&U(0, 0), 1, column_type, left, 2, &U(0, local_N), 1, column_type, right, 2, cartesian_comm, MPI_STATUS_IGNORE); MPI_Sendrecv(&U(0, local_N - 1), 1, column_type, right, 3, &U(0, -1), 1, column_type, left, 3, cartesian_comm, MPI_STATUS_IGNORE); MPI_Type_free(&column_type); // corners get_corner_procs(); MPI_Sendrecv(&U(0, 0), 1, MPI_DOUBLE, up_left, 4, &U(local_M, local_N), 1, MPI_DOUBLE, down_right, 4, cartesian_comm, MPI_STATUS_IGNORE); MPI_Sendrecv(&U(0, local_N - 1), 1, MPI_DOUBLE, up_right, 5, &U(local_M, -1), 1, MPI_DOUBLE, down_left, 5, cartesian_comm, MPI_STATUS_IGNORE); MPI_Sendrecv(&U(local_M - 1, 0), 1, MPI_DOUBLE, down_left, 6, &U(-1, local_N), 1, MPI_DOUBLE, up_right, 6, cartesian_comm, MPI_STATUS_IGNORE); MPI_Sendrecv(&U(local_M - 1, local_N - 1), 1, MPI_DOUBLE, down_right, 7, &U(-1, -1), 1, MPI_DOUBLE, up_left, 7, cartesian_comm, MPI_STATUS_IGNORE); // END: T6 } // Main time integration. void simulate(void) { // Go through each time step for (int_t iteration = 0; iteration <= max_iteration; iteration++) { if ((iteration % snapshot_freq) == 0) { domain_save(iteration / snapshot_freq); } // Derive step t+1 from steps t and t-1 border_exchange(); boundary_condition(); time_step(); // Rotate the time step buffers move_buffer_window(); } } int main(int argc, char **argv) { // TASK: T1c // Initialise MPI // BEGIN: T1c MPI_Init(&argc, &argv); MPI_Comm_size(MPI_COMM_WORLD, &world_size); MPI_Comm_rank(MPI_COMM_WORLD, &world_rank); // END: T1c // TASK: T3 // Distribute the user arguments to all the processes // BEGIN: T3 // create options type for easy broadcast MPI_Datatype options_type; MPI_Type_contiguous(4, MPI_INT64_T, &options_type); MPI_Type_commit(&options_type); OPTIONS options; if (world_rank == ROOT) { OPTIONS *parsed = parse_args(argc, argv); if (!parsed) { fprintf(stderr, "Argument parsing failed\n"); exit(EXIT_FAILURE); } options = *parsed; free(parsed); } MPI_Bcast(&options, 1, options_type, ROOT, MPI_COMM_WORLD); MPI_Type_free(&options_type); // read options M = options.M; N = options.N; max_iteration = options.max_iteration; snapshot_freq = options.snapshot_frequency; // grid dimensions MPI_Dims_create(world_size, 2, dims); local_M = M / dims[0]; local_N = N / dims[1]; // create cartesian communicator int periodicity[2] = { 0, 0 }; MPI_Cart_create(MPI_COMM_WORLD, 2, dims, periodicity, 1, &cartesian_comm); // fetch cartesian coords MPI_Comm_rank(cartesian_comm, &cartesian_rank); MPI_Cart_coords(cartesian_comm, cartesian_rank, 2, coordinates); MPI_Cart_shift(cartesian_comm, 0, 1, &up, &down); MPI_Cart_shift(cartesian_comm, 1, 1, &left, &right); // END: T3 // Set up the initial state of the domain domain_initialize(); struct timeval t_start, t_end; // TASK: T2 // Time your code // BEGIN: T2 if (world_rank == ROOT) gettimeofday(&t_start, NULL); simulate(); // MPI_Barrier(MPI_COMM_WORLD); // TODO if (world_rank == ROOT) { gettimeofday(&t_end, NULL); printf("Total elapsed time: %lf seconds\n", WALLTIME(t_end) - WALLTIME(t_start)); } // END: T2 // Clean up and shut down domain_finalize(); // TASK: T1d // Finalise MPI // BEGIN: T1d MPI_Comm_free(&cartesian_comm); MPI_Finalize(); // END: T1d exit(EXIT_SUCCESS); }