|
30 | 30 | _INNER_TENSOR_NAMES_FOR_SHARDING,
|
31 | 31 | NF4Tensor,
|
32 | 32 | linear_nf4,
|
| 33 | + nf4_weight_only, |
33 | 34 | to_nf4,
|
34 | 35 | )
|
35 | 36 |
|
@@ -281,6 +282,32 @@ def test_empty_like(self, input_size: Union[Tuple[int], int]):
|
281 | 282 | self.assertEqual(new_tensor.get_device(), -1) # that it's on CPU
|
282 | 283 | self.assertEqual(new_tensor.size(), nf4_tensor.size())
|
283 | 284 |
|
| 285 | + @unittest.skipIf(not torch.cuda.is_available(), "Need CUDA available") |
| 286 | + @parametrize("compile", [False, True]) |
| 287 | + def test_quantize_api(self, compile): |
| 288 | + nf4_linear = nn.Linear(512, 512, device="cuda") |
| 289 | + torchao.quantize_(nf4_linear, nf4_weight_only()) |
| 290 | + assert isinstance(nf4_linear.weight, NF4Tensor) |
| 291 | + |
| 292 | + ref_linear = copy.deepcopy(nf4_linear) |
| 293 | + ref_linear.weight.data = ref_linear.weight.get_original_weight() # dequantize |
| 294 | + |
| 295 | + if compile: |
| 296 | + nf4_linear.compile() |
| 297 | + ref_linear.compile() |
| 298 | + |
| 299 | + nf4_x = torch.randn(2, 512, device="cuda").requires_grad_() |
| 300 | + ref_x = nf4_x.detach().clone().requires_grad_() |
| 301 | + |
| 302 | + nf4_out = nf4_linear(nf4_x) |
| 303 | + ref_out = ref_linear(ref_x) |
| 304 | + self.assertEqual(nf4_out, ref_out) |
| 305 | + |
| 306 | + grad_out = torch.randn(2, 512, device="cuda") |
| 307 | + nf4_out.backward(grad_out) |
| 308 | + ref_out.backward(grad_out) |
| 309 | + self.assertEqual(nf4_x.grad, ref_x.grad) |
| 310 | + |
284 | 311 |
|
285 | 312 | class TestFSDPOps(TestCase):
|
286 | 313 | @parametrize("input_size", [512 * 512, (512 * 512,), (512, 512)])
|
|
0 commit comments