ex6: finish

This commit is contained in:
2025-10-25 18:07:49 +02:00
parent 919e9ce992
commit 42cf2f9592

View File

@@ -1,3 +1,6 @@
#include <cuda_device_runtime_api.h>
#include <cuda_runtime.h>
#include <driver_types.h>
#include <math.h>
#include <stdio.h>
#include <stdlib.h>
@@ -8,8 +11,8 @@
#define YSIZE 2048
/* Divide the problem into blocks of BLOCKX x BLOCKY threads */
#define BLOCKY 8
#define BLOCKX 8
#define BLOCKY 16
#define BLOCKX 16
#define MAXITER 255
@@ -30,7 +33,28 @@ typedef struct {
/********** SUBTASK1: Create kernel device_calculate *************************/
// Insert code here
__global__ void device_calculate(double step, double xleft, double yupper, int *gpu_buf) {
// assumes the threads are organized in a grid, such that each thread
// calculates a single pixel. thus a block corresponds to a region of the
// output image.
int i = threadIdx.x + blockIdx.x * blockDim.x;
int j = threadIdx.y + blockIdx.y * blockDim.y;
// same as inner-most part of host_calculate
my_complex_t c, z, temp;
int iter = 0;
c.real = (xleft + step * i);
c.imag = (yupper - step * j);
z = c;
while (z.real * z.real + z.imag * z.imag < 4.0) {
temp.real = z.real * z.real - z.imag * z.imag + c.real;
temp.imag = 2.0 * z.real * z.imag + c.imag;
z = temp;
if (++iter == MAXITER)
break;
}
gpu_buf[PIXEL(i, j)] = iter;
}
/********** SUBTASK1 END *****************************************************/
@@ -130,14 +154,17 @@ int main(int argc, char **argv) {
/********** SUBTASK2: Set up device memory *******************************/
// Insert code here
int *gpu_buf;
cudaMalloc(&gpu_buf, sizeof(int) * XSIZE * YSIZE);
/********** SUBTASK2 END *************************************************/
start = walltime();
/********** SUBTASK3: Execute the kernel on the device *******************/
// Insert code here
dim3 grid(XSIZE / BLOCKX, YSIZE / BLOCKY);
dim3 block(BLOCKX, BLOCKY);
device_calculate<<<grid, block>>>(step, xleft, yupper, gpu_buf);
/********** SUBTASK3 END *************************************************/
@@ -147,7 +174,7 @@ int main(int argc, char **argv) {
/********** SUBTASK4: Transfer the result from device to device_pixel[][]*/
// Insert code here
cudaMemcpy(device_pixel, gpu_buf, sizeof(int) * XSIZE * YSIZE, cudaMemcpyDeviceToHost);
/********** SUBTASK4 END *************************************************/
@@ -155,7 +182,7 @@ int main(int argc, char **argv) {
/********** SUBTASK5: Free the device memory also ************************/
// Insert code here
cudaFree(gpu_buf);
/********** SUBTASK5 END *************************************************/