From 31890838ff85135e3574a4e0a5952d9146c27f3f Mon Sep 17 00:00:00 2001 From: Zhong Hui Date: Wed, 19 Feb 2025 20:13:50 +0800 Subject: [PATCH 1/3] suppport float8 dtype. --- paddlenlp/utils/safetensors.py | 20 ++-- requirements.txt | 1 + tests/transformers/test_safetensors.py | 127 ++++++++++++++++++++++--- 3 files changed, 129 insertions(+), 19 deletions(-) diff --git a/paddlenlp/utils/safetensors.py b/paddlenlp/utils/safetensors.py index 54256023db7d..884896547290 100644 --- a/paddlenlp/utils/safetensors.py +++ b/paddlenlp/utils/safetensors.py @@ -17,6 +17,7 @@ import mmap from collections import OrderedDict +import ml_dtypes import numpy as np __all__ = [ @@ -24,6 +25,10 @@ "fast_load_file", ] +np.bfloat16 = ml_dtypes.bfloat16 +np.float8_e5m2 = ml_dtypes.float8_e5m2 +np.float8_e4m3fn = ml_dtypes.float8_e4m3fn + MAX_HEADER_SIZE = 100 * 1000 * 1000 @@ -49,8 +54,8 @@ "BOOL": np.bool_, "U8": np.uint8, "I8": np.int8, - "F8_E5M2": 1, # no fp8 - "F8_E4M3": 1, # no fp8 + "F8_E5M2": np.float8_e5m2, # no fp8 + "F8_E4M3": np.float8_e4m3fn, # no fp8 "I16": np.int16, "U16": np.uint16, "I32": np.int32, @@ -58,7 +63,7 @@ "I64": np.int64, "U64": np.uint64, "F16": np.float16, - "BF16": 2, # no bf16 + "BF16": np.bfloat16, # no bf16 "F32": np.float32, "F64": np.float64, } @@ -238,9 +243,12 @@ def __getitem__(self, index): return tensor.reshape(target_shape) def get(self, *args, **kwargs): - tensor = np.empty(shape=self.shape, dtype=self.dtype) - self.bufferfile.seek(self.start_offset) - self.bufferfile.readinto(memoryview(tensor)) + # tensor = np.empty(shape=self.shape, dtype=self.dtype) + # self.bufferfile.seek(self.start_offset) + # self.bufferfile.readinto(memoryview(tensor)) + nbytes = np.prod(self.shape) * np.dtype(self.dtype).itemsize + buffer = self.bufferfile.read(nbytes) + tensor = np.frombuffer(buffer, dtype=self.dtype).reshape(self.shape) return tensor @property diff --git a/requirements.txt b/requirements.txt index 0c191b7e426b..cc3965ebabe1 100644 --- a/requirements.txt +++ b/requirements.txt @@ -27,4 +27,5 @@ regex numpy<=1.26.4 tiktoken tokenizers>=0.21,<0.22 +ml_dtypes omegaconf diff --git a/tests/transformers/test_safetensors.py b/tests/transformers/test_safetensors.py index 14116703c6d2..6a9d578bedeb 100644 --- a/tests/transformers/test_safetensors.py +++ b/tests/transformers/test_safetensors.py @@ -17,17 +17,119 @@ import unittest import numpy as np +import paddle from safetensors.numpy import load_file, save_file from paddlenlp.utils.safetensors import fast_load_file, fast_safe_open from ..testing_utils import skip_platform +paddle.set_device("cpu") + + +def enhanced_to_tensor(tensor): + if tensor.dtype == np.bfloat16: + return paddle.to_tensor(tensor.view(np.uint16)) + if tensor.dtype == np.float8_e5m2: + t = paddle.to_tensor(tensor.view(np.int8)) + new_t = paddle.empty(t.shape, dtype=paddle.float8_e5m2) + new_t.get_tensor()._share_data_with(t.get_tensor()) + return new_t + if tensor.dtype == np.float8_e4m3fn: + t = paddle.to_tensor(tensor.view(np.int8)) + new_t = paddle.empty(t.shape, dtype=paddle.float8_e4m3fn) + new_t.get_tensor()._share_data_with(t.get_tensor()) + return new_t + # return paddle.to_tensor(tensor.view(np.int8), dtype=paddle.float8_e4m3fn) + raise ValueError() + return paddle.to_tensor(tensor) + + +class EextendDtypeNumpySafe(unittest.TestCase): + def setUp(self): + super().setUp() + self.weight_map = {} + self.tensors = [ + ([10, 1, 10], "float32"), + ([1, 1, 10], "float32"), + ([1, 1, 1, 10], "float32"), + ([10, 10], "float32"), + ([8], "float16"), + ([5, 5, 5], "int32"), + ] + + def get_target_dtype(self, dtype="float32"): + count = 0 + weight_map = {} + for shape, _ in self.tensors: + weight_map[f"weight_{count}"] = (np.random.random(shape) * 100).astype(dtype) + count += 1 + return weight_map + + def get_paddle_target_dtype(self, dtype="float32"): + weight_map = self.get_target_dtype(dtype) + for k, v in list(weight_map.items()): + weight_map[k] = enhanced_to_tensor(v) + return weight_map + + @skip_platform("win32", "cygwin") + def test_save_load_file_paddle(self): + with tempfile.TemporaryDirectory() as tmpdirname: + for dtype in ["bfloat16", "float8_e5m2", "float8_e4m3fn"]: + weight_map = self.get_paddle_target_dtype(dtype) + path = os.path.join(tmpdirname, "test.safetensors") + shard = {} + for k in list(weight_map.keys()): + if isinstance(weight_map[k], paddle.Tensor): + shard[k] = weight_map[k].cpu().numpy() + else: + shard[k] = weight_map[k] + + save_file(shard, path, metadata={"format": "np"}) + sf_load = load_file(path) + fs_sf_load = fast_load_file(path) + + for k, v in self.weight_map.items(): + paddle.allclose(v, enhanced_to_tensor(sf_load[k])) + paddle.allclose(v, enhanced_to_tensor(fs_sf_load[k])) + + @skip_platform("win32", "cygwin") + def test_save_load_file(self): + with tempfile.TemporaryDirectory() as tmpdirname: + for dtype in ["bfloat16", "float8_e4m3fn", "float8_e5m2"]: + weight_map = self.get_target_dtype(dtype) + path = os.path.join(tmpdirname, "test.safetensors") + save_file(weight_map, path, metadata={"format": "np"}) + sf_load = load_file(path) + fs_sf_load = fast_load_file(path) + for k, v in self.weight_map.items(): + np.testing.assert_equal(v, sf_load[k]) + np.testing.assert_equal(v, fs_sf_load[k]) + + @skip_platform("win32", "cygwin") + def test_dtype_safe_open(self): + with tempfile.TemporaryDirectory() as tmpdirname: + for dtype in ["float32", "int32", "bfloat16", "float8_e4m3fn", "float8_e5m2"]: + weight_map = self.get_target_dtype(dtype) + path = os.path.join(tmpdirname, "test.safetensors") + save_file(weight_map, path, metadata={"format": "np"}) + + with fast_safe_open(path, framework="np") as f: + for key in f.keys(): + safe_slice = f.get_slice(key) + # np.testing.assert_equal(self.weight_map[key][2:1, ...], safe_slice[2:1, ...]) + np.testing.assert_equal(weight_map[key][0, ...], safe_slice[0, ...]) + np.testing.assert_equal(weight_map[key][0:1, ...], safe_slice[0:1, ...]) + np.testing.assert_equal(weight_map[key][..., 2:], safe_slice[..., 2:]) + np.testing.assert_equal(weight_map[key][..., 1], safe_slice[..., 1]) + np.testing.assert_equal(weight_map[key][:2, ...], safe_slice[:2, ...]) + np.testing.assert_equal(weight_map[key][..., :4], safe_slice[..., :4]) + class FastSafetensors(unittest.TestCase): def setUp(self): super().setUp() - self.weigth_map = {} + self.weight_map = {} tensors = [ ([10, 1, 10], "float32"), ([1, 1, 10], "float32"), @@ -38,18 +140,17 @@ def setUp(self): ] count = 0 for shape, dtype in tensors: - self.weigth_map[f"weight_{count}"] = (np.random.random(shape) * 100).astype(dtype) + self.weight_map[f"weight_{count}"] = (np.random.random(shape) * 100).astype(dtype) count += 1 - print(self.weigth_map) @skip_platform("win32", "cygwin") def test_load_file(self): with tempfile.TemporaryDirectory() as tmpdirname: path = os.path.join(tmpdirname, "test.safetensors") - save_file(self.weigth_map, path, metadata={"format": "np"}) + save_file(self.weight_map, path, metadata={"format": "np"}) sf_load = load_file(path) fs_sf_load = fast_load_file(path) - for k, v in self.weigth_map.items(): + for k, v in self.weight_map.items(): np.testing.assert_equal(v, sf_load[k]) np.testing.assert_equal(v, fs_sf_load[k]) @@ -57,15 +158,15 @@ def test_load_file(self): def test_safe_open(self): with tempfile.TemporaryDirectory() as tmpdirname: path = os.path.join(tmpdirname, "test.safetensors") - save_file(self.weigth_map, path, metadata={"format": "np"}) + save_file(self.weight_map, path, metadata={"format": "np"}) with fast_safe_open(path, framework="np") as f: for key in f.keys(): safe_slice = f.get_slice(key) - # np.testing.assert_equal(self.weigth_map[key][2:1, ...], safe_slice[2:1, ...]) - np.testing.assert_equal(self.weigth_map[key][0, ...], safe_slice[0, ...]) - np.testing.assert_equal(self.weigth_map[key][0:1, ...], safe_slice[0:1, ...]) - np.testing.assert_equal(self.weigth_map[key][..., 2:], safe_slice[..., 2:]) - np.testing.assert_equal(self.weigth_map[key][..., 1], safe_slice[..., 1]) - np.testing.assert_equal(self.weigth_map[key][:2, ...], safe_slice[:2, ...]) - np.testing.assert_equal(self.weigth_map[key][..., :4], safe_slice[..., :4]) + # np.testing.assert_equal(self.weight_map[key][2:1, ...], safe_slice[2:1, ...]) + np.testing.assert_equal(self.weight_map[key][0, ...], safe_slice[0, ...]) + np.testing.assert_equal(self.weight_map[key][0:1, ...], safe_slice[0:1, ...]) + np.testing.assert_equal(self.weight_map[key][..., 2:], safe_slice[..., 2:]) + np.testing.assert_equal(self.weight_map[key][..., 1], safe_slice[..., 1]) + np.testing.assert_equal(self.weight_map[key][:2, ...], safe_slice[:2, ...]) + np.testing.assert_equal(self.weight_map[key][..., :4], safe_slice[..., :4]) From c17aa5ffe0fd2ac3e7a13389121f5b054536d7e2 Mon Sep 17 00:00:00 2001 From: Zhong Hui Date: Wed, 19 Feb 2025 20:23:55 +0800 Subject: [PATCH 2/3] fix --- tests/transformers/test_safetensors.py | 1 - 1 file changed, 1 deletion(-) diff --git a/tests/transformers/test_safetensors.py b/tests/transformers/test_safetensors.py index 6a9d578bedeb..92f80d064f69 100644 --- a/tests/transformers/test_safetensors.py +++ b/tests/transformers/test_safetensors.py @@ -41,7 +41,6 @@ def enhanced_to_tensor(tensor): new_t.get_tensor()._share_data_with(t.get_tensor()) return new_t # return paddle.to_tensor(tensor.view(np.int8), dtype=paddle.float8_e4m3fn) - raise ValueError() return paddle.to_tensor(tensor) From f97937b6c4450de08f579d9337a9591329094bc5 Mon Sep 17 00:00:00 2001 From: Zhong Hui Date: Fri, 21 Feb 2025 11:24:16 +0800 Subject: [PATCH 3/3] fix for empty shape. --- paddlenlp/utils/safetensors.py | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/paddlenlp/utils/safetensors.py b/paddlenlp/utils/safetensors.py index 884896547290..ef6291af4215 100644 --- a/paddlenlp/utils/safetensors.py +++ b/paddlenlp/utils/safetensors.py @@ -246,7 +246,8 @@ def get(self, *args, **kwargs): # tensor = np.empty(shape=self.shape, dtype=self.dtype) # self.bufferfile.seek(self.start_offset) # self.bufferfile.readinto(memoryview(tensor)) - nbytes = np.prod(self.shape) * np.dtype(self.dtype).itemsize + # int fix for empty shape [] + nbytes = int(np.prod(self.shape)) * np.dtype(self.dtype).itemsize buffer = self.bufferfile.read(nbytes) tensor = np.frombuffer(buffer, dtype=self.dtype).reshape(self.shape) return tensor