- 
                Notifications
    You must be signed in to change notification settings 
- Fork 1.8k
Closed
Labels
bugConfirmed bugsConfirmed bugs
Description
NV 3060 laptop
cuda12.1
$ python build.py --model llama2/Llama-2-7b-chat-hf --target cuda --quantization q4f16_1 --artifact-path "./dist" --use-cache 0
$ python benchmark.py
from mlc_chat import ChatModule
cm = ChatModule(model="./dist/Llama-2-7b-chat-hf-q4f16_1/params/", lib_path="./dist/Llama-2-7b-chat-hf-q4f16_1/Llama-2-7b-chat-hf-q4f16_1-cuda.so")
output = cm.benchmark_generate("What's the meaning of life?", generate_length=256)
print(f"Generated text:\n{output}\n")
print(f"Statistics: {cm.stats()}")
$ python benchmark.py
Traceback (most recent call last):
  File "/home/kiwi/llm/mlc-llm/benchmark.py", line 9, in <module>
    cm = ChatModule(model="./dist/Llama-2-7b-chat-hf-q4f16_1/params/", lib_path="./dist/Llama-2-7b-chat-hf-q4f16_1/Llama-2-7b-chat-hf-q4f16_1-cuda.so",device="vulkan")
         ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
  File "/home/kiwi/miniconda3/envs/mlc-py11/lib/python3.11/site-packages/mlc_chat/chat_module.py", line 603, in __init__
    self._reload(self.lib_path, self.model_path, user_chat_config_json_str)
  File "/home/kiwi/miniconda3/envs/mlc-py11/lib/python3.11/site-packages/mlc_chat/chat_module.py", line 784, in _reload
    self._reload_func(lib, model_path, app_config_json)
  File "tvm/_ffi/_cython/./packed_func.pxi", line 332, in tvm._ffi._cy3.core.PackedFuncBase.__call__
  File "tvm/_ffi/_cython/./packed_func.pxi", line 263, in tvm._ffi._cy3.core.FuncCall
  File "tvm/_ffi/_cython/./packed_func.pxi", line 252, in tvm._ffi._cy3.core.FuncCall3
  File "tvm/_ffi/_cython/./base.pxi", line 182, in tvm._ffi._cy3.core.CHECK_CALL
  File "/home/kiwi/miniconda3/envs/mlc-py11/lib/python3.11/site-packages/tvm/_ffi/base.py", line 476, in raise_last_ffi_error
    raise py_err
  File "/workspace/mlc-llm/cpp/llm_chat.cc", line 1183, in mlc::llm::LLMChatModule::GetFunction(tvm::runtime::String const&, tvm::runtime::ObjectPtr<tvm::runtime::Object> const&)::{lambda(tvm::runtime::TVMArgs, tvm::runtime::TVMRetValue*)#1}::operator()(tvm::runtime::TVMArgs, tvm::runtime::TVMRetValue*) const
  File "/workspace/mlc-llm/cpp/llm_chat.cc", line 468, in mlc::llm::LLMChat::Reload(tvm::runtime::String, tvm::runtime::String, tvm::runtime::String)
  File "/workspace/mlc-llm/cpp/llm_chat.cc", line 448, in mlc::llm::LLMChat::LoadJSONOverride(std::__cxx11::basic_string<char, std::char_traits<char>, std::allocator<char> > const&, bool)
  File "/workspace/mlc-llm/cpp/llm_chat.cc", line 373, in mlc::llm::LLMChat::LoadJSONOverride(picojson::value const&, bool)
tvm._ffi.base.TVMError: Traceback (most recent call last):
  3: mlc::llm::LLMChatModule::GetFunction(tvm::runtime::String const&, tvm::runtime::ObjectPtr<tvm::runtime::Object> const&)::{lambda(tvm::runtime::TVMArgs, tvm::runtime::TVMRetValue*)#1}::operator()(tvm::runtime::TVMArgs, tvm::runtime::TVMRetValue*) const
        at /workspace/mlc-llm/cpp/llm_chat.cc:1183
  2: mlc::llm::LLMChat::Reload(tvm::runtime::String, tvm::runtime::String, tvm::runtime::String)
        at /workspace/mlc-llm/cpp/llm_chat.cc:468
  1: mlc::llm::LLMChat::LoadJSONOverride(std::__cxx11::basic_string<char, std::char_traits<char>, std::allocator<char> > const&, bool)
        at /workspace/mlc-llm/cpp/llm_chat.cc:448
  0: mlc::llm::LLMChat::LoadJSONOverride(picojson::value const&, bool)
        at /workspace/mlc-llm/cpp/llm_chat.cc:373
  File "/workspace/mlc-llm/cpp/llm_chat.cc", line 373
TVMError: Check failed: (partial_update) is false: Key "vocab_size" not found.
Metadata
Metadata
Assignees
Labels
bugConfirmed bugsConfirmed bugs