Fix various issues with 16b data type handling (pytorch#161)

* tab->spc * quantize * remove extraneous conversion
yanbing-j · Jul 17, 2024 · 33928a8 · 33928a8
1 parent 423f939
commit 33928a8
Show file tree

Hide file tree

Showing 2 changed files with 6 additions and 6 deletions.
diff --git a/.github/workflows/compile-bf16.yml b/.github/workflows/compile-bf16.yml
@@ -102,7 +102,7 @@ jobs:
             echo "******************************************"
             echo "******** INT4 group-wise quantized *******"
             echo "******************************************"
-  	    if [ ${DTYPE} == float16 ]; then
+              if [ ${DTYPE} == float16 ]; then
               DTYPE=bfloat16
             fi
 
@@ -115,6 +115,6 @@ jobs:
             cat ./output_aoti
 
             echo "tests complete for ${DTYPE}"
-  	  done
-	  
-	  echo "tests complete for all dtypes!"
+            done
+          
+          echo "tests complete for all dtypes!"
diff --git a/quantize.py b/quantize.py
@@ -756,12 +756,12 @@ def __init__(
         # MKG: torch.float
         self.register_buffer(
             "scales_and_zeros",
-            torch.empty((in_features // groupsize, out_features, 2), dtype=torch.float)
+            torch.empty((in_features // groupsize, out_features, 2), dtype=get_precision())
         )
 
     def forward(self, input: torch.Tensor) -> torch.Tensor:
         # MKG torch.float
-        input = input.to(torch.float)
+        # input = input.to(torch.float)
         if self.padding:
             import torch.nn.functional as F
             input = F.pad(input, pad=(0, self.in_features - self.origin_in_features))