udacity · trongnghiavp07 · Sep 1, 2014 · Sep 1, 2014 · Sep 1, 2014 · Sep 1, 2014
diff --git a/Problem Sets/Problem Set 1/student_func.cu b/Problem Sets/Problem Set 1/student_func.cu
@@ -2,11 +2,11 @@
 // Color to Greyscale Conversion
 
 //A common way to represent color images is known as RGBA - the color
-//is specified by how much Red, Grean and Blue is in it.
-//The 'A' stands for Alpha and is used for transparency, it will be
+//is specified by how much Red, Green, and Blue is in it.
+//The 'A' stands for Alpha and is used for transparency; it will be
 //ignored in this homework.
 
-//Each channel Red, Blue, Green and Alpha is represented by one byte.
+//Each channel Red, Blue, Green, and Alpha is represented by one byte.
 //Since we are using one byte for each color there are 256 different
 //possible values for each color.  This means we use 4 bytes per pixel.
 
@@ -32,6 +32,7 @@
 //so that the entire image is processed.
 
 #include "utils.h"
+#include <stdio.h>
 
 __global__
 void rgba_to_greyscale(const uchar4* const rgbaImage,
@@ -48,19 +49,32 @@ void rgba_to_greyscale(const uchar4* const rgbaImage,
   //Note: We will be ignoring the alpha channel for this conversion
 
   //First create a mapping from the 2D block and grid locations
-  //to an absolute 2D location in the image, then use that to
+  //to an absolute 2D location in the image, they use that to
   //calculate a 1D offset
+  int y = threadIdx.y+ blockIdx.y* blockDim.y;
+  int x = threadIdx.x+ blockIdx.x* blockDim.x;
+  if (y < numCols && x < numRows) {
+  	int index = numRows*y +x;
+  uchar4 color = rgbaImage[index];
+  unsigned char grey = (unsigned char)(0.299f*color.x+ 0.587f*color.y + 0.114f*color.z);
+  greyImage[index] = grey;
+  }
 }
 
 void your_rgba_to_greyscale(const uchar4 * const h_rgbaImage, uchar4 * const d_rgbaImage,
                             unsigned char* const d_greyImage, size_t numRows, size_t numCols)
 {
   //You must fill in the correct sizes for the blockSize and gridSize
   //currently only one block with one thread is being launched
-  const dim3 blockSize(1, 1, 1);  //TODO
-  const dim3 gridSize( 1, 1, 1);  //TODO
+
+  int   blockWidth = 32;
+
+  const dim3 blockSize(blockWidth, blockWidth, 1);
+  int   blocksX = numRows/blockWidth+1;
+  int   blocksY = numCols/blockWidth+1; //TODO
+  const dim3 gridSize( blocksX, blocksY, 1);  //TODO
   rgba_to_greyscale<<<gridSize, blockSize>>>(d_rgbaImage, d_greyImage, numRows, numCols);
 
   cudaDeviceSynchronize(); checkCudaErrors(cudaGetLastError());
-
 }
+
diff --git a/Problem Sets/Problem Set 2/student_func.cu b/Problem Sets/Problem Set 2/student_func.cu
@@ -1,3 +1,4 @@
+#include <stdio.h>
 // Homework 2
 // Image Blurring
 //
@@ -102,6 +103,7 @@
 
 #include "utils.h"
 
+#include <stdio.h>
 __global__
 void gaussian_blur(const unsigned char* const inputChannel,
                    unsigned char* const outputChannel,
@@ -117,11 +119,33 @@ void gaussian_blur(const unsigned char* const inputChannel,
   // the image. You'll want code that performs the following check before accessing
   // GPU memory:
   //
-  // if ( absolute_image_position_x >= numCols ||
-  //      absolute_image_position_y >= numRows )
-  // {
-  //     return;
-  // }
+
+
+    const int2 p = make_int2( blockIdx.x * blockDim.x + threadIdx.x,
+                               blockIdx.y * blockDim.y + threadIdx.y);
+    const int m = p.y * numCols + p.x;
+
+    if(p.x >= numCols || p.y >= numRows)
+         return;
+
+    float color = 0.0f;
+
+    for(int f_y = 0; f_y < filterWidth; f_y++) {
+        for(int f_x = 0; f_x < filterWidth; f_x++) {
+
+            int c_x = p.x + f_x - filterWidth/2;
+            int c_y = p.y + f_y - filterWidth/2;
+            c_x = min(max(c_x, 0), numCols - 1);
+            c_y = min(max(c_y, 0), numRows - 1);
+            float filter_value = filter[f_y*filterWidth + f_x];
+            color += filter_value*static_cast<float>(inputChannel[c_y*numCols + c_x]);
+
+        }
+    }
+
+    outputChannel[m] = color;
+
+
 
   // NOTE: If a thread's absolute position 2D position is within the image, but some of
   // its neighbors are outside the image, then you will need to be extra careful. Instead
@@ -147,11 +171,16 @@ void separateChannels(const uchar4* const inputImageRGBA,
   // the image. You'll want code that performs the following check before accessing
   // GPU memory:
   //
-  // if ( absolute_image_position_x >= numCols ||
-  //      absolute_image_position_y >= numRows )
-  // {
-  //     return;
-  // }
+
+  const int2 p = make_int2( blockIdx.x * blockDim.x + threadIdx.x,
+                             blockIdx.y * blockDim.y + threadIdx.y);
+  const int m = p.y * numCols + p.x;
+
+  if(p.x >= numCols || p.y >= numRows)
+      return;
+  redChannel[m]   = inputImageRGBA[m].x;
+  greenChannel[m] = inputImageRGBA[m].y;
+  blueChannel[m]  = inputImageRGBA[m].z;
 }
 
 //This kernel takes in three color channels and recombines them
@@ -205,11 +234,12 @@ void allocateMemoryAndCopyToGPU(const size_t numRowsImage, const size_t numColsI
   //be sure to use checkCudaErrors like the above examples to
   //be able to tell if anything goes wrong
   //IMPORTANT: Notice that we pass a pointer to a pointer to cudaMalloc
-
+  checkCudaErrors(cudaMalloc(&d_filter, sizeof( float) * filterWidth * filterWidth));
   //TODO:
   //Copy the filter on the host (h_filter) to the memory you just allocated
   //on the GPU.  cudaMemcpy(dst, src, numBytes, cudaMemcpyHostToDevice);
   //Remember to use checkCudaErrors!
+  checkCudaErrors(cudaMemcpy(d_filter, h_filter, sizeof(float) * filterWidth * filterWidth, cudaMemcpyHostToDevice));
 
 }
 
@@ -221,21 +251,50 @@ void your_gaussian_blur(const uchar4 * const h_inputImageRGBA, uchar4 * const d_
                         const int filterWidth)
 {
   //TODO: Set reasonable block size (i.e., number of threads per block)
-  const dim3 blockSize;
+  const dim3 blockSize(32, 32);
 
   //TODO:
   //Compute correct grid size (i.e., number of blocks per kernel launch)
   //from the image size and and block size.
-  const dim3 gridSize;
+  const dim3 gridSize(numCols/blockSize.x + 1, numRows/blockSize.y + 1);
+
 
   //TODO: Launch a kernel for separating the RGBA image into different color channels
-
-  // Call cudaDeviceSynchronize(), then call checkCudaErrors() immediately after
-  // launching your kernel to make sure that you didn't make any mistakes.
+  separateChannels<<<gridSize, blockSize>>>(d_inputImageRGBA,
+                                            numRows,
+                                            numCols,
+                                            d_red,
+                                            d_green,
+                                            d_blue);
   cudaDeviceSynchronize(); checkCudaErrors(cudaGetLastError());
+
 
   //TODO: Call your convolution kernel here 3 times, once for each color channel.
-
+  gaussian_blur<<<gridSize, blockSize>>>(
+      d_red,
+      d_redBlurred,
+      numRows,
+      numCols,
+      d_filter,
+      filterWidth);
+  cudaDeviceSynchronize(); checkCudaErrors(cudaGetLastError());
+
+  gaussian_blur<<<gridSize, blockSize>>>(
+      d_blue,
+      d_blueBlurred,
+      numRows,
+      numCols,
+      d_filter,
+      filterWidth);
+  cudaDeviceSynchronize(); checkCudaErrors(cudaGetLastError());
+
+  gaussian_blur<<<gridSize, blockSize>>>(
+      d_green,
+      d_greenBlurred,
+      numRows,
+      numCols,
+      d_filter,
+      filterWidth);
   // Again, call cudaDeviceSynchronize(), then call checkCudaErrors() immediately after
   // launching your kernel to make sure that you didn't make any mistakes.
   cudaDeviceSynchronize(); checkCudaErrors(cudaGetLastError());
@@ -251,7 +310,6 @@ void your_gaussian_blur(const uchar4 * const h_inputImageRGBA, uchar4 * const d_
                                              numRows,
                                              numCols);
   cudaDeviceSynchronize(); checkCudaErrors(cudaGetLastError());
-
 }
 
 
@@ -262,3 +320,4 @@ void cleanup() {
   checkCudaErrors(cudaFree(d_green));
   checkCudaErrors(cudaFree(d_blue));
 }
+