ex6: finish
This commit is contained in:
@@ -1,3 +1,6 @@
|
||||
#include <cuda_device_runtime_api.h>
|
||||
#include <cuda_runtime.h>
|
||||
#include <driver_types.h>
|
||||
#include <math.h>
|
||||
#include <stdio.h>
|
||||
#include <stdlib.h>
|
||||
@@ -8,8 +11,8 @@
|
||||
#define YSIZE 2048
|
||||
|
||||
/* Divide the problem into blocks of BLOCKX x BLOCKY threads */
|
||||
#define BLOCKY 8
|
||||
#define BLOCKX 8
|
||||
#define BLOCKY 16
|
||||
#define BLOCKX 16
|
||||
|
||||
#define MAXITER 255
|
||||
|
||||
@@ -30,7 +33,28 @@ typedef struct {
|
||||
|
||||
/********** SUBTASK1: Create kernel device_calculate *************************/
|
||||
|
||||
// Insert code here
|
||||
__global__ void device_calculate(double step, double xleft, double yupper, int *gpu_buf) {
|
||||
// assumes the threads are organized in a grid, such that each thread
|
||||
// calculates a single pixel. thus a block corresponds to a region of the
|
||||
// output image.
|
||||
int i = threadIdx.x + blockIdx.x * blockDim.x;
|
||||
int j = threadIdx.y + blockIdx.y * blockDim.y;
|
||||
|
||||
// same as inner-most part of host_calculate
|
||||
my_complex_t c, z, temp;
|
||||
int iter = 0;
|
||||
c.real = (xleft + step * i);
|
||||
c.imag = (yupper - step * j);
|
||||
z = c;
|
||||
while (z.real * z.real + z.imag * z.imag < 4.0) {
|
||||
temp.real = z.real * z.real - z.imag * z.imag + c.real;
|
||||
temp.imag = 2.0 * z.real * z.imag + c.imag;
|
||||
z = temp;
|
||||
if (++iter == MAXITER)
|
||||
break;
|
||||
}
|
||||
gpu_buf[PIXEL(i, j)] = iter;
|
||||
}
|
||||
|
||||
/********** SUBTASK1 END *****************************************************/
|
||||
|
||||
@@ -130,14 +154,17 @@ int main(int argc, char **argv) {
|
||||
|
||||
/********** SUBTASK2: Set up device memory *******************************/
|
||||
|
||||
// Insert code here
|
||||
int *gpu_buf;
|
||||
cudaMalloc(&gpu_buf, sizeof(int) * XSIZE * YSIZE);
|
||||
|
||||
/********** SUBTASK2 END *************************************************/
|
||||
|
||||
start = walltime();
|
||||
/********** SUBTASK3: Execute the kernel on the device *******************/
|
||||
|
||||
// Insert code here
|
||||
dim3 grid(XSIZE / BLOCKX, YSIZE / BLOCKY);
|
||||
dim3 block(BLOCKX, BLOCKY);
|
||||
device_calculate<<<grid, block>>>(step, xleft, yupper, gpu_buf);
|
||||
|
||||
/********** SUBTASK3 END *************************************************/
|
||||
|
||||
@@ -147,7 +174,7 @@ int main(int argc, char **argv) {
|
||||
|
||||
/********** SUBTASK4: Transfer the result from device to device_pixel[][]*/
|
||||
|
||||
// Insert code here
|
||||
cudaMemcpy(device_pixel, gpu_buf, sizeof(int) * XSIZE * YSIZE, cudaMemcpyDeviceToHost);
|
||||
|
||||
/********** SUBTASK4 END *************************************************/
|
||||
|
||||
@@ -155,7 +182,7 @@ int main(int argc, char **argv) {
|
||||
|
||||
/********** SUBTASK5: Free the device memory also ************************/
|
||||
|
||||
// Insert code here
|
||||
cudaFree(gpu_buf);
|
||||
|
||||
/********** SUBTASK5 END *************************************************/
|
||||
|
||||
|
||||
Reference in New Issue
Block a user