Move test data out of source file (#45)

li-plus · Jul 9, 2023 · 12f6865 · 12f6865
1 parent 19c0f31
commit 12f6865
Show file tree

Hide file tree

Showing 7 changed files with 290 additions and 649 deletions.
diff --git a/chatglm_test.cpp b/chatglm_test.cpp
diff --git a/tests/data/glm2_block.data b/tests/data/glm2_block.data
diff --git a/tests/data/glm_block.data b/tests/data/glm_block.data
diff --git a/tests/data/layer_norm.data b/tests/data/layer_norm.data
diff --git a/tests/data/linear.data b/tests/data/linear.data
diff --git a/tests/data/rms_norm.data b/tests/data/rms_norm.data
diff --git a/tests/test_convert.py b/tests/test_convert.py
@@ -5,6 +5,8 @@
 import torch.nn.functional as F
 from convert import quantize_q4_0, quantize_q4_1, quantize_q5_0, quantize_q5_1, quantize_q8_0
 
+HERE = Path(__file__).resolve().parent
+
 # generated by:
 #   torch.manual_seed(0)
 #   weight = torch.randn(2, 128)
@@ -167,11 +169,11 @@ def test_quantize_q5_1():
 
 
 CHATGLM_MODEL_PATH = Path(
-    "~/.cache/huggingface/hub/models--THUDM--chatglm-6b/snapshots/a70fe6b0a3cf1675b3aec07e3b7bb7a8ce73c6ae"
+    "~/.cache/huggingface/hub/models--THUDM--chatglm-6b/snapshots/294cb13118a1e08ad8449ca542624a5c6aecc401"
 ).expanduser()
 
 CHATGLM2_MODEL_PATH = Path(
-    "~/.cache/huggingface/hub/models--THUDM--chatglm2-6b/snapshots/fc442f7e7cf3ac073433cef0f301b4744c25edb6"
+    "~/.cache/huggingface/hub/models--THUDM--chatglm2-6b/snapshots/0ecfe0b857efd00836a4851b3dd2ed04bd4b197f"
 ).expanduser()
 
 
@@ -185,47 +187,58 @@ def make_data_embedding():
 
 
 def make_data_linear():
-    w = torch.randn(8, 3)
-    b = torch.randn(8)
-    x = torch.randn(2, 3)
+    w = torch.randn(16, 32)
+    b = torch.randn(16)
+    x = torch.randn(2, 32)
     y = F.linear(x, w, b)
 
-    print("w", w.flatten())
-    print("b", b.flatten())
-    print("x", x.flatten())
-    print("y", y.flatten())
+    with open(HERE / "data/linear.data", "wb") as f:
+        w.numpy().tofile(f)
+        b.numpy().tofile(f)
+        x.numpy().tofile(f)
+        y.numpy().tofile(f)
 
 
 def make_data_layernorm():
-    w = torch.randn(9)
-    b = torch.randn(9)
-    x = torch.randn(2, 9)
-    y = F.layer_norm(x, [9], w, b)
-    print("w", w.flatten())
-    print("b", b.flatten())
-    print("x", x.flatten())
-    print("y", y.flatten())
+    w = torch.randn(64)
+    b = torch.randn(64)
+    x = torch.randn(3, 64)
+    y = F.layer_norm(x, [64], w, b)
+
+    with open(HERE / "data/layer_norm.data", "wb") as f:
+        w.numpy().tofile(f)
+        b.numpy().tofile(f)
+        x.numpy().tofile(f)
+        y.numpy().tofile(f)
 
 
 def make_data_rms_norm():
     from modeling_chatglm import RMSNorm
 
-    m = RMSNorm(7, eps=1e-6).eval()
+    m = RMSNorm(64, eps=1e-6).eval()
     m.weight.data.uniform_()
 
-    x = torch.randn(2, 7)
+    x = torch.randn(3, 64)
     with torch.no_grad():
         y = m(x)
-    print("weight", m.weight.data.flatten())
-    print("x", x.flatten())
-    print("y", y.flatten())
+
+    with open(HERE / "data/rms_norm.data", "wb") as f:
+        m.weight.data.numpy().tofile(f)
+        x.numpy().tofile(f)
+        y.numpy().tofile(f)
 
 
-def make_data_glm_self_attention():
-    from modeling_chatglm import SelfAttention
+def make_data_glm_block():
+    from modeling_chatglm import GLMBlock
 
-    m = SelfAttention(16, 2, layer_id=3, empty_init=False).float().eval()
-    x = torch.randn(4, 1, 16)  # [seqlen, bs, hidden]
+    m = (
+        GLMBlock(
+            hidden_size=32, num_attention_heads=8, layernorm_epsilon=1e-5, layer_id=3, num_layers=28, empty_init=False
+        )
+        .float()
+        .eval()
+    )
+    x1 = torch.randn(4, 1, 32)  # [seqlen, bs, hidden]
     position_ids = torch.tensor([[[0, 1, 2, 2], [0, 0, 0, 1]]])
     attention_mask = torch.tensor(
         [
@@ -237,23 +250,15 @@ def make_data_glm_self_attention():
                     [0, 0, 0, 0],
                 ]
             ]
-        ]
-    ).bool()
-    y, layer_past = m(
-        x,
-        position_ids=position_ids,
-        attention_mask=attention_mask,
-        layer_id=m.layer_id,
-        use_cache=True,
+        ],
+        dtype=torch.bool,
+    )
+    y1, layer_past = m(
+        x1, position_ids=position_ids, attention_mask=attention_mask, layer_id=m.layer_id, use_cache=True
     )
-    print("x", x.flatten())
-    print("query_key_value.weight", m.query_key_value.weight.flatten())
-    print("query_key_value.bias", m.query_key_value.bias.flatten())
-    print("dense.weight", m.dense.weight.flatten())
-    print("dense.bias", m.dense.bias.flatten())
-    print("y", y.flatten())
 
-    x2 = torch.randn(1, 1, 16)
+    # cross attention
+    x2 = torch.randn(1, 1, 32)
     position_ids = torch.tensor([[[2], [2]]])
     attention_mask = torch.zeros(1, 1, dtype=torch.bool)
     y2, layer_past = m(
@@ -264,10 +269,8 @@ def make_data_glm_self_attention():
         layer_past=layer_past,
         use_cache=True,
     )
-    print("x2", x2.flatten())
-    print("y2", y2.flatten())
 
-    x3 = torch.randn(1, 1, 16)
+    x3 = torch.randn(1, 1, 32)
     position_ids = torch.tensor([[[2], [3]]])
     attention_mask = torch.zeros(1, 1, dtype=torch.bool)
     y3, layer_past = m(
@@ -278,47 +281,29 @@ def make_data_glm_self_attention():
         layer_past=layer_past,
         use_cache=True,
     )
-    print("x3", x3.flatten())
-    print("y3", y3.flatten())
 
+    print(m)
 
-def make_data_glm_block():
-    from modeling_chatglm import GLMBlock
-
-    m = (
-        GLMBlock(hidden_size=8, num_attention_heads=2, layernorm_epsilon=1e-5, layer_id=3, empty_init=False)
-        .float()
-        .eval()
-    )
-    x = torch.randn(4, 1, 8)  # [seqlen, bs, hidden]
-    position_ids = torch.tensor([[[0, 1, 2, 2], [0, 0, 0, 1]]])
-    attention_mask = torch.tensor(
-        [
-            [
-                [
-                    [0, 0, 0, 1],
-                    [0, 0, 0, 1],
-                    [0, 0, 0, 1],
-                    [0, 0, 0, 0],
-                ]
-            ]
-        ]
-    ).bool()
-    (y,) = m(x, position_ids=position_ids, attention_mask=attention_mask, layer_id=m.layer_id)
-    print("x", x.flatten())
-    print("input_layernorm.weight", m.input_layernorm.weight.data.flatten())
-    print("input_layernorm.bias", m.input_layernorm.bias.data.flatten())
-    print("query_key_value.weight", m.attention.query_key_value.weight.data.flatten())
-    print("query_key_value.bias", m.attention.query_key_value.bias.data.flatten())
-    print("dense.weight", m.attention.dense.weight.data.flatten())
-    print("dense.bias", m.attention.dense.bias.data.flatten())
-    print("post_attention_layernorm.weight", m.post_attention_layernorm.weight.data.flatten())
-    print("post_attention_layernorm.bias", m.post_attention_layernorm.bias.data.flatten())
-    print("dense_h_to_4h.weight", m.mlp.dense_h_to_4h.weight.data.flatten())
-    print("dense_h_to_4h.bias", m.mlp.dense_h_to_4h.bias.data.flatten())
-    print("dense_4h_to_h.weight", m.mlp.dense_4h_to_h.weight.data.flatten())
-    print("dense_4h_to_h.bias", m.mlp.dense_4h_to_h.bias.data.flatten())
-    print("y", y.flatten())
+    with open(HERE / "data/glm_block.data", "wb") as f:
+        m.input_layernorm.weight.data.numpy().tofile(f)
+        m.input_layernorm.bias.data.numpy().tofile(f)
+        m.attention.query_key_value.weight.data.numpy().tofile(f)
+        m.attention.query_key_value.bias.data.numpy().tofile(f)
+        m.attention.dense.weight.data.numpy().tofile(f)
+        m.attention.dense.bias.data.numpy().tofile(f)
+        m.post_attention_layernorm.weight.data.numpy().tofile(f)
+        m.post_attention_layernorm.bias.data.numpy().tofile(f)
+        m.mlp.dense_h_to_4h.weight.data.numpy().tofile(f)
+        m.mlp.dense_h_to_4h.bias.data.numpy().tofile(f)
+        m.mlp.dense_4h_to_h.weight.data.numpy().tofile(f)
+        m.mlp.dense_4h_to_h.bias.data.numpy().tofile(f)
+
+        x1.numpy().tofile(f)
+        y1.data.numpy().tofile(f)
+        x2.numpy().tofile(f)
+        y2.data.numpy().tofile(f)
+        x3.numpy().tofile(f)
+        y3.data.numpy().tofile(f)
 
 
 def make_data_glm2_block():
@@ -327,8 +312,8 @@ def make_data_glm2_block():
 
     config = AutoConfig.from_pretrained(CHATGLM2_MODEL_PATH, trust_remote_code=True)
     config.layernorm_epsilon = 1e-6
-    config.hidden_size = 16
-    config.num_attention_heads = 4
+    config.hidden_size = 32
+    config.num_attention_heads = 8
     config.multi_query_group_num = 2
     config.ffn_hidden_size = 6
     config.kv_channels = config.hidden_size // config.num_attention_heads
@@ -343,45 +328,52 @@ def make_data_glm2_block():
     rotary_pos_emb = rotary_pos_emb_module(8)[None, :seq_length].transpose(0, 1).contiguous()
 
     # self attention
-    x = torch.randn(seq_length, 1, config.hidden_size)
+    x1 = torch.randn(seq_length, 1, config.hidden_size)
     with torch.no_grad():
-        y, kv_cache = m(x, attention_mask=None, rotary_pos_emb=rotary_pos_emb)
-
-    print(m)
-
-    print("input_layernorm.weight", m.input_layernorm.weight.data.flatten())
-    print("attn.qkv.weight", m.self_attention.query_key_value.weight.data.flatten())
-    print("attn.qkv.bias", m.self_attention.query_key_value.bias.data.flatten())
-    print("attn.dense.weight", m.self_attention.dense.weight.data.flatten())
-    print("post_attention_layernorm.weight", m.post_attention_layernorm.weight.data.flatten())
-    print("mlp.dense_h_to_4h.weight", m.mlp.dense_h_to_4h.weight.data.flatten())
-    print("mlp.dense_4h_to_h.weight", m.mlp.dense_4h_to_h.weight.data.flatten())
-
-    print("x", x.flatten())
-    print("y", y.flatten())
+        y1, kv_cache = m(x1, attention_mask=None, rotary_pos_emb=rotary_pos_emb)
 
     # cross attention
     position_ids = torch.tensor([[seq_length]])
     rotary_pos_emb = rotary_pos_emb_module(8)[position_ids].transpose(0, 1).contiguous()
-    x = torch.randn(1, 1, config.hidden_size)
+    x2 = torch.randn(1, 1, config.hidden_size)
     with torch.no_grad():
-        y, kv_cache = m(x, attention_mask=None, rotary_pos_emb=rotary_pos_emb, kv_cache=kv_cache)
-    print("x2", x.flatten())
-    print("y2", y.flatten())
+        y2, kv_cache = m(x2, attention_mask=None, rotary_pos_emb=rotary_pos_emb, kv_cache=kv_cache)
 
+    # cross attention
     position_ids = torch.tensor([[seq_length + 1]])
     rotary_pos_emb = rotary_pos_emb_module(8)[position_ids].transpose(0, 1).contiguous()
-    x = torch.randn(1, 1, config.hidden_size)
+    x3 = torch.randn(1, 1, config.hidden_size)
     with torch.no_grad():
-        y, kv_cache = m(x, attention_mask=None, rotary_pos_emb=rotary_pos_emb, kv_cache=kv_cache)
-    print("x3", x.flatten())
-    print("y3", y.flatten())
+        y3, kv_cache = m(x3, attention_mask=None, rotary_pos_emb=rotary_pos_emb, kv_cache=kv_cache)
+
+    print(m)
+
+    with open(HERE / "data/glm2_block.data", "wb") as f:
+        m.input_layernorm.weight.data.numpy().tofile(f)
+        m.self_attention.query_key_value.weight.data.numpy().tofile(f)
+        m.self_attention.query_key_value.bias.data.numpy().tofile(f)
+        m.self_attention.dense.weight.data.numpy().tofile(f)
+        m.post_attention_layernorm.weight.data.numpy().tofile(f)
+        m.mlp.dense_h_to_4h.weight.data.numpy().tofile(f)
+        m.mlp.dense_4h_to_h.weight.data.numpy().tofile(f)
+
+        x1.numpy().tofile(f)
+        y1.numpy().tofile(f)
+        x2.numpy().tofile(f)
+        y2.numpy().tofile(f)
+        x3.numpy().tofile(f)
+        y3.numpy().tofile(f)
 
 
 def main():
-    sys.path.append(str(CHATGLM_MODEL_PATH))
+    sys.path.append(str(CHATGLM2_MODEL_PATH))
     torch.manual_seed(0)
-    make_data_glm2_block()
+    (HERE / "data").mkdir(parents=True, exist_ok=True)
+    # make_data_linear()
+    make_data_layernorm()
+    # make_data_rms_norm()
+    # make_data_glm_block()
+    # make_data_glm2_block()
 
 
 if __name__ == "__main__":