Files
TDT4200/exercise6/mandel.cu
2025-10-25 15:36:24 +02:00

205 lines
5.9 KiB
Plaintext

#include <math.h>
#include <stdio.h>
#include <stdlib.h>
#include <sys/time.h>
/* Problem size */
#define XSIZE 2560
#define YSIZE 2048
/* Divide the problem into blocks of BLOCKX x BLOCKY threads */
#define BLOCKY 8
#define BLOCKX 8
#define MAXITER 255
double xleft = -2.01;
double xright = 1;
double yupper, ylower;
double ycenter = 1e-6;
double step;
int host_pixel[XSIZE * YSIZE];
int device_pixel[XSIZE * YSIZE];
typedef struct {
double real, imag;
} my_complex_t;
#define PIXEL(i, j) ((i) + (j) * XSIZE)
/********** SUBTASK1: Create kernel device_calculate *************************/
// Insert code here
/********** SUBTASK1 END *****************************************************/
void host_calculate() {
for (int j = 0; j < YSIZE; j++) {
for (int i = 0; i < XSIZE; i++) {
/* Calculate the number of iterations until divergence for each pixel.
If divergence never happens, return MAXITER */
my_complex_t c, z, temp;
int iter = 0;
c.real = (xleft + step * i);
c.imag = (yupper - step * j);
z = c;
while (z.real * z.real + z.imag * z.imag < 4.0) {
temp.real = z.real * z.real - z.imag * z.imag + c.real;
temp.imag = 2.0 * z.real * z.imag + c.imag;
z = temp;
if (++iter == MAXITER)
break;
}
host_pixel[PIXEL(i, j)] = iter;
}
}
}
typedef unsigned char uchar;
/* save 24-bits bmp file, buffer must be in bmp format: upside-down */
void savebmp(char *name, uchar *buffer, int x, int y) {
FILE *f = fopen(name, "wb");
if (!f) {
printf("Error writing image to disk.\n");
return;
}
unsigned int size = x * y * 3 + 54;
uchar header[54] = { 'B', 'M', static_cast<uchar>(size & 255), static_cast<uchar>((size >> 8) & 255), static_cast<uchar>((size >> 16) & 255), static_cast<uchar>(size >> 24), 0, 0, 0, 0, 54, 0, 0, 0, 40, 0, 0, 0, static_cast<uchar>(x & 255), static_cast<uchar>(x >> 8), 0, 0, static_cast<uchar>(y & 255), static_cast<uchar>(y >> 8), 0, 0, 1, 0, 24, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 };
fwrite(header, 1, 54, f);
fwrite(buffer, 1, x * y * 3, f);
fclose(f);
}
/* given iteration number, set a colour */
void fancycolour(uchar *p, int iter) {
if (iter == MAXITER)
;
else if (iter < 8) {
p[0] = 128 + iter * 16;
p[1] = p[2] = 0;
} else if (iter < 24) {
p[0] = 255;
p[1] = p[2] = (iter - 8) * 16;
} else if (iter < 160) {
p[0] = p[1] = 255 - (iter - 24) * 2;
p[2] = 255;
} else {
p[0] = p[1] = (iter - 160) * 2;
p[2] = 255 - (iter - 160) * 2;
}
}
/*
* Get system time to microsecond precision (ostensibly, the same as MPI_Wtime),
* returns time in seconds
*/
double walltime(void) {
static struct timeval t;
gettimeofday(&t, NULL);
return (t.tv_sec + 1e-6 * t.tv_usec);
}
int main(int argc, char **argv) {
if (argc == 1) {
puts("Usage: MANDEL n");
puts("n decides whether image should be written to disk (1=yes, 0=no)");
return 0;
}
double start;
double hosttime = 0;
double devicetime = 0;
double memtime = 0;
cudaDeviceProp p;
cudaSetDevice(0);
cudaGetDeviceProperties(&p, 0);
printf("Device compute capability: %d.%d\n", p.major, p.minor);
/* Calculate the range in the y-axis such that we preserve the
aspect ratio */
step = (xright - xleft) / XSIZE;
yupper = ycenter + (step * YSIZE) / 2;
ylower = ycenter - (step * YSIZE) / 2;
/* Host calculates image */
start = walltime();
host_calculate();
hosttime += walltime() - start;
/********** SUBTASK2: Set up device memory *******************************/
// Insert code here
/********** SUBTASK2 END *************************************************/
start = walltime();
/********** SUBTASK3: Execute the kernel on the device *******************/
// Insert code here
/********** SUBTASK3 END *************************************************/
devicetime += walltime() - start;
start = walltime();
/********** SUBTASK4: Transfer the result from device to device_pixel[][]*/
// Insert code here
/********** SUBTASK4 END *************************************************/
memtime += walltime() - start;
/********** SUBTASK5: Free the device memory also ************************/
// Insert code here
/********** SUBTASK5 END *************************************************/
int errors = 0;
/* check if result is correct */
for (int i = 0; i < XSIZE; i++) {
for (int j = 0; j < YSIZE; j++) {
int diff = host_pixel[PIXEL(i, j)] - device_pixel[PIXEL(i, j)];
if (diff < 0)
diff = -diff;
/* allow +-1 difference */
if (diff > 1) {
if (errors < 10)
printf("Error on pixel %d %d: expected %d, found %d\n",
i, j, host_pixel[PIXEL(i, j)], device_pixel[PIXEL(i, j)]);
else if (errors == 10)
puts("...");
errors++;
}
}
}
if (errors > 0)
printf("Found %d errors.\n", errors);
else
puts("Device calculations are correct.");
printf("\n");
printf("Host time: %7.3f ms\n", hosttime * 1e3);
printf("Device calculation: %7.3f ms\n", devicetime * 1e3);
printf("Copy result: %7.3f ms\n", memtime * 1e3);
if (strtol(argv[1], NULL, 10) != 0) {
/* create nice image from iteration counts. take care to create it upside
down (bmp format) */
unsigned char *buffer = (unsigned char *)calloc(XSIZE * YSIZE * 3, 1);
for (int i = 0; i < XSIZE; i++) {
for (int j = 0; j < YSIZE; j++) {
int p = ((YSIZE - j - 1) * XSIZE + i) * 3;
fancycolour(buffer + p, device_pixel[PIXEL(i, j)]);
}
}
/* write image to disk */
savebmp("mandel1.bmp", buffer, XSIZE, YSIZE);
}
return 0;
}