Skip to content
This repository has been archived by the owner on Jun 27, 2022. It is now read-only.

Intro to parallel Programming #24

Open
wants to merge 11 commits into
base: master
Choose a base branch
from
28 changes: 21 additions & 7 deletions Problem Sets/Problem Set 1/student_func.cu
Original file line number Diff line number Diff line change
Expand Up @@ -2,11 +2,11 @@
// Color to Greyscale Conversion

//A common way to represent color images is known as RGBA - the color
//is specified by how much Red, Grean and Blue is in it.
//The 'A' stands for Alpha and is used for transparency, it will be
//is specified by how much Red, Green, and Blue is in it.
//The 'A' stands for Alpha and is used for transparency; it will be
//ignored in this homework.

//Each channel Red, Blue, Green and Alpha is represented by one byte.
//Each channel Red, Blue, Green, and Alpha is represented by one byte.
//Since we are using one byte for each color there are 256 different
//possible values for each color. This means we use 4 bytes per pixel.

Expand All @@ -32,6 +32,7 @@
//so that the entire image is processed.

#include "utils.h"
#include <stdio.h>

__global__
void rgba_to_greyscale(const uchar4* const rgbaImage,
Expand All @@ -48,19 +49,32 @@ void rgba_to_greyscale(const uchar4* const rgbaImage,
//Note: We will be ignoring the alpha channel for this conversion

//First create a mapping from the 2D block and grid locations
//to an absolute 2D location in the image, then use that to
//to an absolute 2D location in the image, they use that to
//calculate a 1D offset
int y = threadIdx.y+ blockIdx.y* blockDim.y;
int x = threadIdx.x+ blockIdx.x* blockDim.x;
if (y < numCols && x < numRows) {
int index = numRows*y +x;
uchar4 color = rgbaImage[index];
unsigned char grey = (unsigned char)(0.299f*color.x+ 0.587f*color.y + 0.114f*color.z);
greyImage[index] = grey;
}
}

void your_rgba_to_greyscale(const uchar4 * const h_rgbaImage, uchar4 * const d_rgbaImage,
unsigned char* const d_greyImage, size_t numRows, size_t numCols)
{
//You must fill in the correct sizes for the blockSize and gridSize
//currently only one block with one thread is being launched
const dim3 blockSize(1, 1, 1); //TODO
const dim3 gridSize( 1, 1, 1); //TODO

int blockWidth = 32;

const dim3 blockSize(blockWidth, blockWidth, 1);
int blocksX = numRows/blockWidth+1;
int blocksY = numCols/blockWidth+1; //TODO
const dim3 gridSize( blocksX, blocksY, 1); //TODO
rgba_to_greyscale<<<gridSize, blockSize>>>(d_rgbaImage, d_greyImage, numRows, numCols);

cudaDeviceSynchronize(); checkCudaErrors(cudaGetLastError());

}

95 changes: 77 additions & 18 deletions Problem Sets/Problem Set 2/student_func.cu
Original file line number Diff line number Diff line change
@@ -1,3 +1,4 @@
#include <stdio.h>
// Homework 2
// Image Blurring
//
Expand Down Expand Up @@ -102,6 +103,7 @@

#include "utils.h"

#include <stdio.h>
__global__
void gaussian_blur(const unsigned char* const inputChannel,
unsigned char* const outputChannel,
Expand All @@ -117,11 +119,33 @@ void gaussian_blur(const unsigned char* const inputChannel,
// the image. You'll want code that performs the following check before accessing
// GPU memory:
//
// if ( absolute_image_position_x >= numCols ||
// absolute_image_position_y >= numRows )
// {
// return;
// }


const int2 p = make_int2( blockIdx.x * blockDim.x + threadIdx.x,
blockIdx.y * blockDim.y + threadIdx.y);
const int m = p.y * numCols + p.x;

if(p.x >= numCols || p.y >= numRows)
return;

float color = 0.0f;

for(int f_y = 0; f_y < filterWidth; f_y++) {
for(int f_x = 0; f_x < filterWidth; f_x++) {

int c_x = p.x + f_x - filterWidth/2;
int c_y = p.y + f_y - filterWidth/2;
c_x = min(max(c_x, 0), numCols - 1);
c_y = min(max(c_y, 0), numRows - 1);
float filter_value = filter[f_y*filterWidth + f_x];
color += filter_value*static_cast<float>(inputChannel[c_y*numCols + c_x]);

}
}

outputChannel[m] = color;



// NOTE: If a thread's absolute position 2D position is within the image, but some of
// its neighbors are outside the image, then you will need to be extra careful. Instead
Expand All @@ -147,11 +171,16 @@ void separateChannels(const uchar4* const inputImageRGBA,
// the image. You'll want code that performs the following check before accessing
// GPU memory:
//
// if ( absolute_image_position_x >= numCols ||
// absolute_image_position_y >= numRows )
// {
// return;
// }

const int2 p = make_int2( blockIdx.x * blockDim.x + threadIdx.x,
blockIdx.y * blockDim.y + threadIdx.y);
const int m = p.y * numCols + p.x;

if(p.x >= numCols || p.y >= numRows)
return;
redChannel[m] = inputImageRGBA[m].x;
greenChannel[m] = inputImageRGBA[m].y;
blueChannel[m] = inputImageRGBA[m].z;
}

//This kernel takes in three color channels and recombines them
Expand Down Expand Up @@ -205,11 +234,12 @@ void allocateMemoryAndCopyToGPU(const size_t numRowsImage, const size_t numColsI
//be sure to use checkCudaErrors like the above examples to
//be able to tell if anything goes wrong
//IMPORTANT: Notice that we pass a pointer to a pointer to cudaMalloc

checkCudaErrors(cudaMalloc(&d_filter, sizeof( float) * filterWidth * filterWidth));
//TODO:
//Copy the filter on the host (h_filter) to the memory you just allocated
//on the GPU. cudaMemcpy(dst, src, numBytes, cudaMemcpyHostToDevice);
//Remember to use checkCudaErrors!
checkCudaErrors(cudaMemcpy(d_filter, h_filter, sizeof(float) * filterWidth * filterWidth, cudaMemcpyHostToDevice));

}

Expand All @@ -221,21 +251,50 @@ void your_gaussian_blur(const uchar4 * const h_inputImageRGBA, uchar4 * const d_
const int filterWidth)
{
//TODO: Set reasonable block size (i.e., number of threads per block)
const dim3 blockSize;
const dim3 blockSize(32, 32);

//TODO:
//Compute correct grid size (i.e., number of blocks per kernel launch)
//from the image size and and block size.
const dim3 gridSize;
const dim3 gridSize(numCols/blockSize.x + 1, numRows/blockSize.y + 1);


//TODO: Launch a kernel for separating the RGBA image into different color channels

// Call cudaDeviceSynchronize(), then call checkCudaErrors() immediately after
// launching your kernel to make sure that you didn't make any mistakes.
separateChannels<<<gridSize, blockSize>>>(d_inputImageRGBA,
numRows,
numCols,
d_red,
d_green,
d_blue);
cudaDeviceSynchronize(); checkCudaErrors(cudaGetLastError());


//TODO: Call your convolution kernel here 3 times, once for each color channel.

gaussian_blur<<<gridSize, blockSize>>>(
d_red,
d_redBlurred,
numRows,
numCols,
d_filter,
filterWidth);
cudaDeviceSynchronize(); checkCudaErrors(cudaGetLastError());

gaussian_blur<<<gridSize, blockSize>>>(
d_blue,
d_blueBlurred,
numRows,
numCols,
d_filter,
filterWidth);
cudaDeviceSynchronize(); checkCudaErrors(cudaGetLastError());

gaussian_blur<<<gridSize, blockSize>>>(
d_green,
d_greenBlurred,
numRows,
numCols,
d_filter,
filterWidth);
// Again, call cudaDeviceSynchronize(), then call checkCudaErrors() immediately after
// launching your kernel to make sure that you didn't make any mistakes.
cudaDeviceSynchronize(); checkCudaErrors(cudaGetLastError());
Expand All @@ -251,7 +310,6 @@ void your_gaussian_blur(const uchar4 * const h_inputImageRGBA, uchar4 * const d_
numRows,
numCols);
cudaDeviceSynchronize(); checkCudaErrors(cudaGetLastError());

}


Expand All @@ -262,3 +320,4 @@ void cleanup() {
checkCudaErrors(cudaFree(d_green));
checkCudaErrors(cudaFree(d_blue));
}

Loading