diff --git a/core/src/main/scala/org/apache/spark/cuda/CUDACodeGenerator.scala b/core/src/main/scala/org/apache/spark/cuda/CUDACodeGenerator.scala
index e1466a408a..550e697f3d 100644
--- a/core/src/main/scala/org/apache/spark/cuda/CUDACodeGenerator.scala
+++ b/core/src/main/scala/org/apache/spark/cuda/CUDACodeGenerator.scala
@@ -31,7 +31,7 @@ import org.apache.xbean.asm5._
 
 private class mapLambdaExpressionExtractor extends ClassVisitor(ASM5) {
   var isValid = false
-  var methodName:String = null
+  var methodName: String = null
   var exprtype: Int = 0
   var exprop: Int = 0
   var exprconst : Long = 0
@@ -40,31 +40,31 @@ private class mapLambdaExpressionExtractor extends ClassVisitor(ASM5) {
                            sig: String, exceptions: Array[String]): MethodVisitor = {
     val idxmc = name.indexOfSlice("$mc")
     val idxsp = name.indexOfSlice("$sp")
-    if (idxmc + (3 + 2) == idxsp) {	// find "$mc??$sp"
+    if (idxmc + (3 + 2) == idxsp) {    // find "$mc??$sp"
       isValid = true
       methodName = name
       new MethodVisitor(ASM5) {
         override def visitIntInsn(op: Int, value: Int) {
-	  if ((op == SIPUSH) || (op == BIPUSH)) {
+          if ((op == SIPUSH) || (op == BIPUSH)) {
             exprconst = value
           } else {
             isValid = false
           }
-        } 
+        }
         override def visitLdcInsn(cst: java.lang.Object) {
           cst match {
-            case _: java.lang.Integer => 
+            case _: java.lang.Integer =>
               exprconst = cst.asInstanceOf[Int]
-            case _: java.lang.Long    =>
+            case _: java.lang.Long =>
               exprconst = cst.asInstanceOf[Long]
-            case _: java.lang.Float   =>
+            case _: java.lang.Float =>
               exprconst = java.lang.Float.floatToIntBits(cst.asInstanceOf[Float])
-            case _: java.lang.Double  =>
+            case _: java.lang.Double =>
               exprconst = java.lang.Double.doubleToLongBits(cst.asInstanceOf[Double])
             case _ =>
               isValid = false
           }
-        } 
+        }
         override def visitInsn(op: Int) {
           op match {
             case ICONST_M1 => exprconst = -1
@@ -102,10 +102,10 @@ private class mapLambdaExpressionExtractor extends ClassVisitor(ASM5) {
             case _ =>
               isValid = false
           }
-        } 
+        }
         override def visitFieldInsn(op: Int, owner: String, name: String, desc: String) = {
           isValid = false
-        } 
+        }
         override def visitIincInsn(varidx: Int, increment: Int) = {
           isValid = false
         }
@@ -118,15 +118,14 @@ private class mapLambdaExpressionExtractor extends ClassVisitor(ASM5) {
         override def visitMethodInsn(op: Int, owner: String, name: String, desc: String,
             itf: Boolean) = {
           isValid = false
-        }     
+        }
         override def visitMultiANewArrayInsn(desc: String, dims: Int) = {
           isValid = false
-        }     
+        }
         override def visitTableSwitchInsn(min: Int, max: Int, dflt: Label, labels: Label*) = {
           isValid = false
-        }     
+        }
       }
-
     } else {
       new MethodVisitor(ASM5) {}
     }
@@ -141,7 +140,7 @@ private class reduceLambdaExpressionExtractor extends ClassVisitor(ASM5) {
                            sig: String, exceptions: Array[String]): MethodVisitor = {
     val idxmc = name.indexOfSlice("$mc")
     val idxsp = name.indexOfSlice("$sp")
-    if (idxmc + (3 + 3) == idxsp) {	// find "$mc???$sp"
+    if (idxmc + (3 + 3) == idxsp) {    // find "$mc???$sp"
       isValid = true
       new MethodVisitor(ASM5) {
         override def visitInsn(op: Int) {
@@ -171,16 +170,16 @@ private class reduceLambdaExpressionExtractor extends ClassVisitor(ASM5) {
             case _ =>
               isValid = false
           }
-        } 
+        }
         override def visitIntInsn(op: Int, value: Int) {
           isValid = false
-        } 
+        }
         override def visitLdcInsn(cst: java.lang.Object) {
           isValid = false
-        } 
+        }
         override def visitFieldInsn(op: Int, owner: String, name: String, desc: String) = {
           isValid = false
-        } 
+        }
         override def visitIincInsn(varidx: Int, increment: Int) = {
           isValid = false
         }
@@ -193,15 +192,14 @@ private class reduceLambdaExpressionExtractor extends ClassVisitor(ASM5) {
         override def visitMethodInsn(op: Int, owner: String, name: String, desc: String,
             itf: Boolean) = {
           isValid = false
-        }     
+        }
         override def visitMultiANewArrayInsn(desc: String, dims: Int) = {
           isValid = false
-        }     
+        }
         override def visitTableSwitchInsn(min: Int, max: Int, dflt: Label, labels: Label*) = {
           isValid = false
-        }     
+        }
       }
-
     } else {
       new MethodVisitor(ASM5) {}
     }
@@ -228,17 +226,18 @@ object CUDACodeGenerator {
   def generateForMap[U: ClassTag, T: ClassTag](f: T => U): Option[CUDAFunction] = {
     if (!isGPUCodeGen) { return None }
 
-    //val declaredFields = f.getClass.getDeclaredFields
-    //val declaredMethods = f.getClass.getDeclaredMethods
-    //println(" + declared fields: " + declaredFields.size)
-    //declaredFields.foreach { f => println("     " + f) }
-    //println(" + declared methods: " + declaredMethods.size)
-    //declaredMethods.foreach { m => println("     " + m) }
+    // val declaredFields = f.getClass.getDeclaredFields
+    // val declaredMethods = f.getClass.getDeclaredMethods
+    // println(" + declared fields: " + declaredFields.size)
+    // declaredFields.foreach { f => println("     " + f) }
+    // println(" + declared methods: " + declaredMethods.size)
+    // declaredMethods.foreach { m => println("     " + m) }
 
     val e = new mapLambdaExpressionExtractor
     val cls = f.getClass
     classReader(cls).accept(e, 0)
-    val fullName = "_map_"+(f.getClass.getName+"."+e.methodName).replace(".", "_").replace("$", "_")
+    val fullName =
+      "_map_" + (f.getClass.getName+"."+e.methodName).replace(".", "_").replace("$", "_")
     if (!e.isValid) { return None }
 
     val ptxType = e.exprtype match {
@@ -259,7 +258,7 @@ object CUDACodeGenerator {
     }
     val ptxOpRound = e.exprtype match {
       case FLOAD | DLOAD => "rz."
-      case _             => ""
+      case _  => ""
     }
     val ptxOp = e.exprop match {
       case IADD => "add"
@@ -268,7 +267,7 @@ object CUDACodeGenerator {
         ( if (e.exprtype == ILOAD || e.exprtype == LLOAD) "mul.lo" else "mul" )
       case IDIV => "div"
       case IREM => "rem"
-      case 0    => "add"
+      case 0 => "add"
     }
     val ptxConst = e.exprtype match {
       case ILOAD => e.exprconst.toString
@@ -324,189 +323,188 @@ BB_RET:
     Some(cudaFunc)
   }
 
-  val ptxIntReduce="""
+  val ptxIntReduce = """
 .version 4.2
 .target sm_30
 .address_size 64
 
 .visible .entry _intReduce(
-	.param .u64 _intReduce_param_0,
-	.param .u64 _intReduce_param_1,
-	.param .u64 _intReduce_param_2
+        .param .u64 _intReduce_param_0,
+        .param .u64 _intReduce_param_1,
+        .param .u64 _intReduce_param_2
 )
 {
-	.reg .pred 	%p<4>;
-	.reg .b32 	%r<39>;
-	.reg .b64 	%rd<13>;
-
+        .reg .pred      %p<4>;
+        .reg .b32       %r<39>;
+        .reg .b64       %rd<13>;
 
-	ld.param.u64 	%rd6, [_intReduce_param_0];
-	ld.param.u64 	%rd7, [_intReduce_param_1];
-	ld.param.u64 	%rd8, [_intReduce_param_2];
+        ld.param.u64         %rd6, [_intReduce_param_0];
+        ld.param.u64         %rd7, [_intReduce_param_1];
+        ld.param.u64         %rd8, [_intReduce_param_2];
 
-	cvta.to.global.u64 	%rd11, %rd7;
+        cvta.to.global.u64         %rd11, %rd7;
         st.global.u32   [%rd11], 0;
 
-	mov.u32 	%r1, %ntid.x;
-	mov.u32 	%r9, %ctaid.x;
-	mov.u32 	%r2, %tid.x;
-	mad.lo.s32 	%r10, %r1, %r9, %r2;
-	cvt.u64.u32	%rd12, %r10;
-	mov.u32 	%r38, 0;
-	setp.ge.s64	%p1, %rd12, %rd8;
-	@%p1 bra 	BB_3;
-
-	cvta.to.global.u64 	%rd2, %rd6;
-	mov.u32 	%r12, %nctaid.x;
-	mul.lo.s32 	%r13, %r12, %r1;
-	cvt.u64.u32	%rd3, %r13;
-	mov.u32 	%r38, 0;
+        mov.u32         %r1, %ntid.x;
+        mov.u32         %r9, %ctaid.x;
+        mov.u32         %r2, %tid.x;
+        mad.lo.s32      %r10, %r1, %r9, %r2;
+        cvt.u64.u32     %rd12, %r10;
+        mov.u32         %r38, 0;
+        setp.ge.s64     %p1, %rd12, %rd8;
+        @%p1 bra         BB_3;
+
+        cvta.to.global.u64         %rd2, %rd6;
+        mov.u32         %r12, %nctaid.x;
+        mul.lo.s32      %r13, %r12, %r1;
+        cvt.u64.u32    %rd3, %r13;
+        mov.u32         %r38, 0;
 
 BB_2:
-	shl.b64 	%rd9, %rd12, 2;
-	add.s64 	%rd10, %rd2, %rd9;
-	ld.global.u32 	%r14, [%rd10];
-	add.s32 	%r38, %r14, %r38;
-	add.s64 	%rd12, %rd3, %rd12;
-	setp.lt.s64	%p2, %rd12, %rd8;
-	@%p2 bra 	BB_2;
+        shl.b64         %rd9, %rd12, 2;
+        add.s64         %rd10, %rd2, %rd9;
+        ld.global.u32   %r14, [%rd10];
+        add.s32         %r38, %r14, %r38;
+        add.s64         %rd12, %rd3, %rd12;
+        setp.lt.s64     %p2, %rd12, %rd8;
+        @%p2 bra        BB_2;
 
 BB_3:
-	mov.u32 	%r17, 16;
-	mov.u32 	%r34, 31;
-	shfl.down.b32 %r15, %r38, %r17, %r34;
-	add.s32 	%r20, %r15, %r38;
-	mov.u32 	%r21, 8;
-	shfl.down.b32 %r19, %r20, %r21, %r34;
-	add.s32 	%r24, %r19, %r20;
-	mov.u32 	%r25, 4;
-	shfl.down.b32 %r23, %r24, %r25, %r34;
-	add.s32 	%r28, %r23, %r24;
-	mov.u32 	%r29, 2;
-	shfl.down.b32 %r27, %r28, %r29, %r34;
-	add.s32 	%r32, %r27, %r28;
-	mov.u32 	%r33, 1;
-	shfl.down.b32 %r31, %r32, %r33, %r34;
-	and.b32  	%r35, %r2, 31;
-	setp.ne.s32	%p3, %r35, 0;
-	@%p3 bra 	BB_5;
-
-	cvta.to.global.u64 	%rd11, %rd7;
-	add.s32 	%r36, %r31, %r32;
-	atom.global.add.u32 	%r37, [%rd11], %r36;
+        mov.u32         %r17, 16;
+        mov.u32         %r34, 31;
+        shfl.down.b32 %r15, %r38, %r17, %r34;
+        add.s32         %r20, %r15, %r38;
+        mov.u32         %r21, 8;
+        shfl.down.b32 %r19, %r20, %r21, %r34;
+        add.s32         %r24, %r19, %r20;
+        mov.u32         %r25, 4;
+        shfl.down.b32 %r23, %r24, %r25, %r34;
+        add.s32         %r28, %r23, %r24;
+        mov.u32         %r29, 2;
+        shfl.down.b32 %r27, %r28, %r29, %r34;
+        add.s32         %r32, %r27, %r28;
+        mov.u32         %r33, 1;
+        shfl.down.b32 %r31, %r32, %r33, %r34;
+        and.b32         %r35, %r2, 31;
+        setp.ne.s32     %p3, %r35, 0;
+        @%p3 bra        BB_5;
+
+        cvta.to.global.u64         %rd11, %rd7;
+        add.s32         %r36, %r31, %r32;
+        atom.global.add.u32         %r37, [%rd11], %r36;
 
 BB_5:
-	ret;
+        ret;
 }
 """
 
-val ptxDoubleReduce="""
+val ptxDoubleReduce = """
 .version 4.2
 .target sm_30
 .address_size 64
 
 .visible .entry _doubleReduce(
-	.param .u64 _doubleReduce_param_0,
-	.param .u64 _doubleReduce_param_1,
-	.param .u64 _doubleReduce_param_2
+        .param .u64 _doubleReduce_param_0,
+        .param .u64 _doubleReduce_param_1,
+        .param .u64 _doubleReduce_param_2
 )
 {
-	.reg .pred 	%p<5>;
-	.reg .b32 	%r<68>;
-	.reg .f64 	%fd<21>;
-	.reg .b64 	%rd<18>;
-
-	ld.param.u64 	%rd10, [_doubleReduce_param_0];
-	ld.param.u64 	%rd11, [_doubleReduce_param_1];
-	ld.param.u64 	%rd12, [_doubleReduce_param_2];
-	mov.u32 	%r1, %ntid.x;
-	mov.u32 	%r3, %ctaid.x;
-	mov.u32 	%r2, %tid.x;
-	mad.lo.s32 	%r4, %r1, %r3, %r2;
-	cvt.u64.u32	%rd16, %r4;
-	mov.f64 	%fd20, 0d0000000000000000;
-	setp.ge.s64	%p1, %rd16, %rd12;
-	@%p1 bra 	BB_3;
-
-	cvta.to.global.u64 	%rd2, %rd10;
-	mov.u32 	%r5, %nctaid.x;
-	mul.lo.s32 	%r6, %r5, %r1;
-	cvt.u64.u32	%rd3, %r6;
-	mov.f64 	%fd20, 0d0000000000000000;
+        .reg .pred         %p<5>;
+        .reg .b32         %r<68>;
+        .reg .f64         %fd<21>;
+        .reg .b64         %rd<18>;
+
+        ld.param.u64         %rd10, [_doubleReduce_param_0];
+        ld.param.u64         %rd11, [_doubleReduce_param_1];
+        ld.param.u64         %rd12, [_doubleReduce_param_2];
+        mov.u32         %r1, %ntid.x;
+        mov.u32         %r3, %ctaid.x;
+        mov.u32         %r2, %tid.x;
+        mad.lo.s32         %r4, %r1, %r3, %r2;
+        cvt.u64.u32        %rd16, %r4;
+        mov.f64         %fd20, 0d0000000000000000;
+        setp.ge.s64        %p1, %rd16, %rd12;
+        @%p1 bra         BB_3;
+
+        cvta.to.global.u64         %rd2, %rd10;
+        mov.u32         %r5, %nctaid.x;
+        mul.lo.s32         %r6, %r5, %r1;
+        cvt.u64.u32        %rd3, %r6;
+        mov.f64         %fd20, 0d0000000000000000;
 
 BB_2:
-	shl.b64 	%rd13, %rd16, 3;
-	add.s64 	%rd14, %rd2, %rd13;
-	ld.global.f64 	%fd7, [%rd14];
-	add.f64 	%fd20, %fd20, %fd7;
-	add.s64 	%rd16, %rd3, %rd16;
-	setp.lt.s64	%p2, %rd16, %rd12;
-	@%p2 bra 	BB_2;
+        shl.b64         %rd13, %rd16, 3;
+        add.s64         %rd14, %rd2, %rd13;
+        ld.global.f64         %fd7, [%rd14];
+        add.f64         %fd20, %fd20, %fd7;
+        add.s64         %rd16, %rd3, %rd16;
+        setp.lt.s64        %p2, %rd16, %rd12;
+        @%p2 bra         BB_2;
 
 BB_3:
-//	mov.f64 	%fd4, %fd20;
-
-	mov.b64 {%r7,%r8}, %fd20;
-	mov.u32 	%r15, 16;
-	mov.u32 	%r64, 31;
-	shfl.down.b32 %r9, %r7, %r15, %r64;
-	shfl.down.b32 %r13, %r8, %r15, %r64;
-	mov.b64 %fd9, {%r9,%r13};
-	add.f64 	%fd10, %fd20, %fd9;
-	mov.b64 {%r19,%r20}, %fd10;
-	mov.u32 	%r27, 8;
-	shfl.down.b32 %r21, %r19, %r27, %r64;
-	shfl.down.b32 %r25, %r20, %r27, %r64;
-	mov.b64 %fd11, {%r21,%r25};
-	add.f64 	%fd12, %fd10, %fd11;
-	mov.b64 {%r31,%r32}, %fd12;
-	mov.u32 	%r39, 4;
-	shfl.down.b32 %r33, %r31, %r39, %r64;
-	shfl.down.b32 %r37, %r32, %r39, %r64;
-	mov.b64 %fd13, {%r33,%r37};
-	add.f64 	%fd14, %fd12, %fd13;
-	mov.b64 {%r43,%r44}, %fd14;
-	mov.u32 	%r51, 2;
-	shfl.down.b32 %r45, %r43, %r51, %r64;
-	shfl.down.b32 %r49, %r44, %r51, %r64;
-	mov.b64 %fd15, {%r45,%r49};
-	add.f64 	%fd16, %fd14, %fd15;
-	mov.b64 {%r55,%r56}, %fd16;
-	mov.u32 	%r63, 1;
-	shfl.down.b32 %r57, %r55, %r63, %r64;
-	shfl.down.b32 %r61, %r56, %r63, %r64;
-	mov.b64 %fd17, {%r57,%r61};
-	add.f64 	%fd4, %fd16, %fd17;
-	and.b32  	%r67, %r2, 31;
-	setp.ne.s32	%p3, %r67, 0;
-	@%p3 bra 	BB_6;
-
-	cvta.to.global.u64 	%rd6, %rd11;
-	ld.global.u64 	%rd17, [%rd6];
+//        mov.f64         %fd4, %fd20;
+
+        mov.b64 {%r7,%r8}, %fd20;
+        mov.u32         %r15, 16;
+        mov.u32         %r64, 31;
+        shfl.down.b32 %r9, %r7, %r15, %r64;
+        shfl.down.b32 %r13, %r8, %r15, %r64;
+        mov.b64 %fd9, {%r9,%r13};
+        add.f64         %fd10, %fd20, %fd9;
+        mov.b64 {%r19,%r20}, %fd10;
+        mov.u32         %r27, 8;
+        shfl.down.b32 %r21, %r19, %r27, %r64;
+        shfl.down.b32 %r25, %r20, %r27, %r64;
+        mov.b64 %fd11, {%r21,%r25};
+        add.f64         %fd12, %fd10, %fd11;
+        mov.b64 {%r31,%r32}, %fd12;
+        mov.u32         %r39, 4;
+        shfl.down.b32 %r33, %r31, %r39, %r64;
+        shfl.down.b32 %r37, %r32, %r39, %r64;
+        mov.b64 %fd13, {%r33,%r37};
+        add.f64         %fd14, %fd12, %fd13;
+        mov.b64 {%r43,%r44}, %fd14;
+        mov.u32         %r51, 2;
+        shfl.down.b32 %r45, %r43, %r51, %r64;
+        shfl.down.b32 %r49, %r44, %r51, %r64;
+        mov.b64 %fd15, {%r45,%r49};
+        add.f64         %fd16, %fd14, %fd15;
+        mov.b64 {%r55,%r56}, %fd16;
+        mov.u32         %r63, 1;
+        shfl.down.b32 %r57, %r55, %r63, %r64;
+        shfl.down.b32 %r61, %r56, %r63, %r64;
+        mov.b64 %fd17, {%r57,%r61};
+        add.f64         %fd4, %fd16, %fd17;
+        and.b32          %r67, %r2, 31;
+        setp.ne.s32        %p3, %r67, 0;
+        @%p3 bra         BB_6;
+
+        cvta.to.global.u64         %rd6, %rd11;
+        ld.global.u64         %rd17, [%rd6];
 
 BB_5:
-	mov.u64 	%rd8, %rd17;
-	mov.b64 	%fd18, %rd8;
-	add.f64 	%fd19, %fd4, %fd18;
-	mov.b64 	%rd15, %fd19;
-	atom.global.cas.b64 	%rd17, [%rd6], %rd8, %rd15;
-	setp.ne.s64	%p4, %rd8, %rd17;
-	@%p4 bra 	BB_5;
+        mov.u64         %rd8, %rd17;
+        mov.b64         %fd18, %rd8;
+        add.f64         %fd19, %fd4, %fd18;
+        mov.b64         %rd15, %fd19;
+        atom.global.cas.b64         %rd17, [%rd6], %rd8, %rd15;
+        setp.ne.s64        %p4, %rd8, %rd17;
+        @%p4 bra         BB_5;
 
 BB_6:
-	ret;
+        ret;
 }
 """
 
   def generateForReduce[T: ClassTag](f: (T, T) => T): Option[CUDAFunction] = {
     if (!isGPUCodeGen) { return None }
 
-    //val declaredFields = f.getClass.getDeclaredFields
-    //val declaredMethods = f.getClass.getDeclaredMethods
-    //println(" + declared fields: " + declaredFields.size)
-    //declaredFields.foreach { f => println("     " + f) }
-    //println(" + declared methods: " + declaredMethods.size)
-    //declaredMethods.foreach { m => println("     " + m) }
+    // val declaredFields = f.getClass.getDeclaredFields
+    // val declaredMethods = f.getClass.getDeclaredMethods
+    // println(" + declared fields: " + declaredFields.size)
+    // declaredFields.foreach { f => println("     " + f) }
+    // println(" + declared methods: " + declaredMethods.size)
+    // declaredMethods.foreach { m => println("     " + m) }
    
     val e = new reduceLambdaExpressionExtractor
     val cls = f.getClass
diff --git a/core/src/main/scala/org/apache/spark/cuda/CUDAManager.scala b/core/src/main/scala/org/apache/spark/cuda/CUDAManager.scala
index 657d0ee818..8c4ff2c373 100644
--- a/core/src/main/scala/org/apache/spark/cuda/CUDAManager.scala
+++ b/core/src/main/scala/org/apache/spark/cuda/CUDAManager.scala
@@ -233,7 +233,7 @@ class CUDAManager {
     JCuda.cudaStreamSynchronize(getStream(devIx))
   }
 
-  private[spark] def moduleGetFunction(resource:Any, kernelSignature: String): CUfunction = {
+  private[spark] def moduleGetFunction(resource: Any, kernelSignature: String): CUfunction = {
     val module = resource match {
       case url: URL => cachedLoadModule(Left(url))
       case (name: String, ptx: String) => cachedLoadModule(Right(name, ptx))
diff --git a/examples/src/main/scala/org/apache/spark/examples/SparkGPULR.scala b/examples/src/main/scala/org/apache/spark/examples/SparkGPULR.scala
index e9d1c66b03..8829d30b7d 100644
--- a/examples/src/main/scala/org/apache/spark/examples/SparkGPULR.scala
+++ b/examples/src/main/scala/org/apache/spark/examples/SparkGPULR.scala
@@ -87,7 +87,7 @@ object SparkGPULR {
       Array("this"),
       ptxURL))
     val threads = 1024
-    val blocks = min((N + threads- 1) / threads, 1024) 
+    val blocks = min((N + threads- 1) / threads, 1024)
     val dimensions = (size: Long, stage: Int) => stage match {
       case 0 => (blocks, threads)
     }
@@ -109,14 +109,14 @@ object SparkGPULR {
     // Initialize w to a random value
     var w = Array.fill(D){2 * rand.nextDouble - 1}
     printf("numSlices=%d, N=%d, D=%d, ITERATIONS=%d\n", numSlices, N, D, ITERATIONS)
-    //println("Initial w: " + w)
+    // println("Initial w: " + w)
 
     val now = System.nanoTime
     for (i <- 1 to ITERATIONS) {
       println("On iteration " + i)
       val wbc = sc.broadcast(w)
       val gradient = pointsColumnCached.mapExtFunc((p: DataPoint) =>
-        dmulvs(p.x,  (1 / (1 + exp(-p.y * (ddotvv(wbc.value, p.x)))) - 1) * p.y),
+        dmulvs(p.x, (1 / (1 + exp(-p.y * (ddotvv(wbc.value, p.x)))) - 1) * p.y),
         mapFunction.value, outputArraySizes = Array(D),
         inputFreeVariables = Array(wbc.value)
       ).reduceExtFunc((x: Array[Double], y: Array[Double]) => daddvv(x, y),
@@ -128,7 +128,7 @@ object SparkGPULR {
 
     pointsColumnCached.unCacheGpu()
 
-    //println("Final w: " + w)
+    // println("Final w: " + w)
 
     sc.stop()
   }
diff --git a/examples/src/main/scala/org/apache/spark/examples/SparkGPUPi.scala b/examples/src/main/scala/org/apache/spark/examples/SparkGPUPi.scala
index 1eb02a73c5..6facebb161 100644
--- a/examples/src/main/scala/org/apache/spark/examples/SparkGPUPi.scala
+++ b/examples/src/main/scala/org/apache/spark/examples/SparkGPUPi.scala
@@ -60,7 +60,7 @@ object SparkGPUPi {
       .mapExtFunc( (i : Int) => {
         val x = random * 2 - 1
         val y = random * 2 - 1
-        if (x * x + y * y < 1) 1 else 0 } ,  
+        if (x * x + y * y < 1) 1 else 0 },
         mapFunction)
       .reduceExtFunc((x: Int, y: Int) => x + y, reduceFunction)
     println("Pi is roughly " + 4.0 * count / n)
diff --git a/examples/src/main/scala/org/apache/spark/examples/SparkLR.scala b/examples/src/main/scala/org/apache/spark/examples/SparkLR.scala
index 0398d5e60f..750f552e44 100644
--- a/examples/src/main/scala/org/apache/spark/examples/SparkLR.scala
+++ b/examples/src/main/scala/org/apache/spark/examples/SparkLR.scala
@@ -74,11 +74,11 @@ object SparkLR {
     val skelton = sc.parallelize((1 to N), numSlices)
     val points = skelton.map(i => generateData(i, N, D, R)).cache()
     points.count()
- 
+
     // Initialize w to a random value
     var w = DenseVector.fill(D){2 * rand.nextDouble - 1}
     printf("numSlices=%d, N=%d, D=%d, ITERATIONS=%d\n", numSlices, N, D, ITERATIONS)
-    //println("Initial w: " + w)
+    // println("Initial w: " + w)
 
     val now = System.nanoTime
     for (i <- 1 to ITERATIONS) {
@@ -92,7 +92,7 @@ object SparkLR {
     val ms = (System.nanoTime - now) / 1000000
     println("Elapsed time: %d ms".format(ms))
 
-    //println("Final w: " + w)
+    // println("Final w: " + w)
 
     sc.stop()
   }
diff --git a/unsafe/src/test/java/org/apache/spark/unsafe/memory/HeapMemoryAllocatorSuite.java b/unsafe/src/test/java/org/apache/spark/unsafe/memory/HeapMemoryAllocatorSuite.java
index 63a457ccc5..7f2f1db889 100644
--- a/unsafe/src/test/java/org/apache/spark/unsafe/memory/HeapMemoryAllocatorSuite.java
+++ b/unsafe/src/test/java/org/apache/spark/unsafe/memory/HeapMemoryAllocatorSuite.java
@@ -26,12 +26,9 @@ public class HeapMemoryAllocatorSuite {
   @BeforeClass
   public static void setUp() {
     // normally it's set by CUDAManager
-    /*
-    @@@CUDA
     jcuda.driver.JCudaDriver.setExceptionsEnabled(true);
     jcuda.driver.JCudaDriver.cuInit(0);
     jcuda.runtime.JCuda.cudaSetDevice(0);
-    */
   }
 
   @Test