Skip to content

Commit

Permalink
Move test data out of source file (#45)
Browse files Browse the repository at this point in the history
  • Loading branch information
li-plus authored Jul 9, 2023
1 parent 19c0f31 commit 12f6865
Show file tree
Hide file tree
Showing 7 changed files with 290 additions and 649 deletions.
727 changes: 188 additions & 539 deletions chatglm_test.cpp

Large diffs are not rendered by default.

Binary file added tests/data/glm2_block.data
Binary file not shown.
Binary file added tests/data/glm_block.data
Binary file not shown.
Binary file added tests/data/layer_norm.data
Binary file not shown.
Binary file added tests/data/linear.data
Binary file not shown.
Binary file added tests/data/rms_norm.data
Binary file not shown.
212 changes: 102 additions & 110 deletions tests/test_convert.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,6 +5,8 @@
import torch.nn.functional as F
from convert import quantize_q4_0, quantize_q4_1, quantize_q5_0, quantize_q5_1, quantize_q8_0

HERE = Path(__file__).resolve().parent

# generated by:
# torch.manual_seed(0)
# weight = torch.randn(2, 128)
Expand Down Expand Up @@ -167,11 +169,11 @@ def test_quantize_q5_1():


CHATGLM_MODEL_PATH = Path(
"~/.cache/huggingface/hub/models--THUDM--chatglm-6b/snapshots/a70fe6b0a3cf1675b3aec07e3b7bb7a8ce73c6ae"
"~/.cache/huggingface/hub/models--THUDM--chatglm-6b/snapshots/294cb13118a1e08ad8449ca542624a5c6aecc401"
).expanduser()

CHATGLM2_MODEL_PATH = Path(
"~/.cache/huggingface/hub/models--THUDM--chatglm2-6b/snapshots/fc442f7e7cf3ac073433cef0f301b4744c25edb6"
"~/.cache/huggingface/hub/models--THUDM--chatglm2-6b/snapshots/0ecfe0b857efd00836a4851b3dd2ed04bd4b197f"
).expanduser()


Expand All @@ -185,47 +187,58 @@ def make_data_embedding():


def make_data_linear():
w = torch.randn(8, 3)
b = torch.randn(8)
x = torch.randn(2, 3)
w = torch.randn(16, 32)
b = torch.randn(16)
x = torch.randn(2, 32)
y = F.linear(x, w, b)

print("w", w.flatten())
print("b", b.flatten())
print("x", x.flatten())
print("y", y.flatten())
with open(HERE / "data/linear.data", "wb") as f:
w.numpy().tofile(f)
b.numpy().tofile(f)
x.numpy().tofile(f)
y.numpy().tofile(f)


def make_data_layernorm():
w = torch.randn(9)
b = torch.randn(9)
x = torch.randn(2, 9)
y = F.layer_norm(x, [9], w, b)
print("w", w.flatten())
print("b", b.flatten())
print("x", x.flatten())
print("y", y.flatten())
w = torch.randn(64)
b = torch.randn(64)
x = torch.randn(3, 64)
y = F.layer_norm(x, [64], w, b)

with open(HERE / "data/layer_norm.data", "wb") as f:
w.numpy().tofile(f)
b.numpy().tofile(f)
x.numpy().tofile(f)
y.numpy().tofile(f)


def make_data_rms_norm():
from modeling_chatglm import RMSNorm

m = RMSNorm(7, eps=1e-6).eval()
m = RMSNorm(64, eps=1e-6).eval()
m.weight.data.uniform_()

x = torch.randn(2, 7)
x = torch.randn(3, 64)
with torch.no_grad():
y = m(x)
print("weight", m.weight.data.flatten())
print("x", x.flatten())
print("y", y.flatten())

with open(HERE / "data/rms_norm.data", "wb") as f:
m.weight.data.numpy().tofile(f)
x.numpy().tofile(f)
y.numpy().tofile(f)


def make_data_glm_self_attention():
from modeling_chatglm import SelfAttention
def make_data_glm_block():
from modeling_chatglm import GLMBlock

m = SelfAttention(16, 2, layer_id=3, empty_init=False).float().eval()
x = torch.randn(4, 1, 16) # [seqlen, bs, hidden]
m = (
GLMBlock(
hidden_size=32, num_attention_heads=8, layernorm_epsilon=1e-5, layer_id=3, num_layers=28, empty_init=False
)
.float()
.eval()
)
x1 = torch.randn(4, 1, 32) # [seqlen, bs, hidden]
position_ids = torch.tensor([[[0, 1, 2, 2], [0, 0, 0, 1]]])
attention_mask = torch.tensor(
[
Expand All @@ -237,23 +250,15 @@ def make_data_glm_self_attention():
[0, 0, 0, 0],
]
]
]
).bool()
y, layer_past = m(
x,
position_ids=position_ids,
attention_mask=attention_mask,
layer_id=m.layer_id,
use_cache=True,
],
dtype=torch.bool,
)
y1, layer_past = m(
x1, position_ids=position_ids, attention_mask=attention_mask, layer_id=m.layer_id, use_cache=True
)
print("x", x.flatten())
print("query_key_value.weight", m.query_key_value.weight.flatten())
print("query_key_value.bias", m.query_key_value.bias.flatten())
print("dense.weight", m.dense.weight.flatten())
print("dense.bias", m.dense.bias.flatten())
print("y", y.flatten())

x2 = torch.randn(1, 1, 16)
# cross attention
x2 = torch.randn(1, 1, 32)
position_ids = torch.tensor([[[2], [2]]])
attention_mask = torch.zeros(1, 1, dtype=torch.bool)
y2, layer_past = m(
Expand All @@ -264,10 +269,8 @@ def make_data_glm_self_attention():
layer_past=layer_past,
use_cache=True,
)
print("x2", x2.flatten())
print("y2", y2.flatten())

x3 = torch.randn(1, 1, 16)
x3 = torch.randn(1, 1, 32)
position_ids = torch.tensor([[[2], [3]]])
attention_mask = torch.zeros(1, 1, dtype=torch.bool)
y3, layer_past = m(
Expand All @@ -278,47 +281,29 @@ def make_data_glm_self_attention():
layer_past=layer_past,
use_cache=True,
)
print("x3", x3.flatten())
print("y3", y3.flatten())

print(m)

def make_data_glm_block():
from modeling_chatglm import GLMBlock

m = (
GLMBlock(hidden_size=8, num_attention_heads=2, layernorm_epsilon=1e-5, layer_id=3, empty_init=False)
.float()
.eval()
)
x = torch.randn(4, 1, 8) # [seqlen, bs, hidden]
position_ids = torch.tensor([[[0, 1, 2, 2], [0, 0, 0, 1]]])
attention_mask = torch.tensor(
[
[
[
[0, 0, 0, 1],
[0, 0, 0, 1],
[0, 0, 0, 1],
[0, 0, 0, 0],
]
]
]
).bool()
(y,) = m(x, position_ids=position_ids, attention_mask=attention_mask, layer_id=m.layer_id)
print("x", x.flatten())
print("input_layernorm.weight", m.input_layernorm.weight.data.flatten())
print("input_layernorm.bias", m.input_layernorm.bias.data.flatten())
print("query_key_value.weight", m.attention.query_key_value.weight.data.flatten())
print("query_key_value.bias", m.attention.query_key_value.bias.data.flatten())
print("dense.weight", m.attention.dense.weight.data.flatten())
print("dense.bias", m.attention.dense.bias.data.flatten())
print("post_attention_layernorm.weight", m.post_attention_layernorm.weight.data.flatten())
print("post_attention_layernorm.bias", m.post_attention_layernorm.bias.data.flatten())
print("dense_h_to_4h.weight", m.mlp.dense_h_to_4h.weight.data.flatten())
print("dense_h_to_4h.bias", m.mlp.dense_h_to_4h.bias.data.flatten())
print("dense_4h_to_h.weight", m.mlp.dense_4h_to_h.weight.data.flatten())
print("dense_4h_to_h.bias", m.mlp.dense_4h_to_h.bias.data.flatten())
print("y", y.flatten())
with open(HERE / "data/glm_block.data", "wb") as f:
m.input_layernorm.weight.data.numpy().tofile(f)
m.input_layernorm.bias.data.numpy().tofile(f)
m.attention.query_key_value.weight.data.numpy().tofile(f)
m.attention.query_key_value.bias.data.numpy().tofile(f)
m.attention.dense.weight.data.numpy().tofile(f)
m.attention.dense.bias.data.numpy().tofile(f)
m.post_attention_layernorm.weight.data.numpy().tofile(f)
m.post_attention_layernorm.bias.data.numpy().tofile(f)
m.mlp.dense_h_to_4h.weight.data.numpy().tofile(f)
m.mlp.dense_h_to_4h.bias.data.numpy().tofile(f)
m.mlp.dense_4h_to_h.weight.data.numpy().tofile(f)
m.mlp.dense_4h_to_h.bias.data.numpy().tofile(f)

x1.numpy().tofile(f)
y1.data.numpy().tofile(f)
x2.numpy().tofile(f)
y2.data.numpy().tofile(f)
x3.numpy().tofile(f)
y3.data.numpy().tofile(f)


def make_data_glm2_block():
Expand All @@ -327,8 +312,8 @@ def make_data_glm2_block():

config = AutoConfig.from_pretrained(CHATGLM2_MODEL_PATH, trust_remote_code=True)
config.layernorm_epsilon = 1e-6
config.hidden_size = 16
config.num_attention_heads = 4
config.hidden_size = 32
config.num_attention_heads = 8
config.multi_query_group_num = 2
config.ffn_hidden_size = 6
config.kv_channels = config.hidden_size // config.num_attention_heads
Expand All @@ -343,45 +328,52 @@ def make_data_glm2_block():
rotary_pos_emb = rotary_pos_emb_module(8)[None, :seq_length].transpose(0, 1).contiguous()

# self attention
x = torch.randn(seq_length, 1, config.hidden_size)
x1 = torch.randn(seq_length, 1, config.hidden_size)
with torch.no_grad():
y, kv_cache = m(x, attention_mask=None, rotary_pos_emb=rotary_pos_emb)

print(m)

print("input_layernorm.weight", m.input_layernorm.weight.data.flatten())
print("attn.qkv.weight", m.self_attention.query_key_value.weight.data.flatten())
print("attn.qkv.bias", m.self_attention.query_key_value.bias.data.flatten())
print("attn.dense.weight", m.self_attention.dense.weight.data.flatten())
print("post_attention_layernorm.weight", m.post_attention_layernorm.weight.data.flatten())
print("mlp.dense_h_to_4h.weight", m.mlp.dense_h_to_4h.weight.data.flatten())
print("mlp.dense_4h_to_h.weight", m.mlp.dense_4h_to_h.weight.data.flatten())

print("x", x.flatten())
print("y", y.flatten())
y1, kv_cache = m(x1, attention_mask=None, rotary_pos_emb=rotary_pos_emb)

# cross attention
position_ids = torch.tensor([[seq_length]])
rotary_pos_emb = rotary_pos_emb_module(8)[position_ids].transpose(0, 1).contiguous()
x = torch.randn(1, 1, config.hidden_size)
x2 = torch.randn(1, 1, config.hidden_size)
with torch.no_grad():
y, kv_cache = m(x, attention_mask=None, rotary_pos_emb=rotary_pos_emb, kv_cache=kv_cache)
print("x2", x.flatten())
print("y2", y.flatten())
y2, kv_cache = m(x2, attention_mask=None, rotary_pos_emb=rotary_pos_emb, kv_cache=kv_cache)

# cross attention
position_ids = torch.tensor([[seq_length + 1]])
rotary_pos_emb = rotary_pos_emb_module(8)[position_ids].transpose(0, 1).contiguous()
x = torch.randn(1, 1, config.hidden_size)
x3 = torch.randn(1, 1, config.hidden_size)
with torch.no_grad():
y, kv_cache = m(x, attention_mask=None, rotary_pos_emb=rotary_pos_emb, kv_cache=kv_cache)
print("x3", x.flatten())
print("y3", y.flatten())
y3, kv_cache = m(x3, attention_mask=None, rotary_pos_emb=rotary_pos_emb, kv_cache=kv_cache)

print(m)

with open(HERE / "data/glm2_block.data", "wb") as f:
m.input_layernorm.weight.data.numpy().tofile(f)
m.self_attention.query_key_value.weight.data.numpy().tofile(f)
m.self_attention.query_key_value.bias.data.numpy().tofile(f)
m.self_attention.dense.weight.data.numpy().tofile(f)
m.post_attention_layernorm.weight.data.numpy().tofile(f)
m.mlp.dense_h_to_4h.weight.data.numpy().tofile(f)
m.mlp.dense_4h_to_h.weight.data.numpy().tofile(f)

x1.numpy().tofile(f)
y1.numpy().tofile(f)
x2.numpy().tofile(f)
y2.numpy().tofile(f)
x3.numpy().tofile(f)
y3.numpy().tofile(f)


def main():
sys.path.append(str(CHATGLM_MODEL_PATH))
sys.path.append(str(CHATGLM2_MODEL_PATH))
torch.manual_seed(0)
make_data_glm2_block()
(HERE / "data").mkdir(parents=True, exist_ok=True)
# make_data_linear()
make_data_layernorm()
# make_data_rms_norm()
# make_data_glm_block()
# make_data_glm2_block()


if __name__ == "__main__":
Expand Down

0 comments on commit 12f6865

Please sign in to comment.