You signed in with another tab or window. Reload to refresh your session.You signed out in another tab or window. Reload to refresh your session.You switched accounts on another tab or window. Reload to refresh your session.Dismiss alert
I tried the latest commit of main b694522 on a V100 gpu using cuda 12.2 and 12.4
on a V100 GPU and i got the following error
epo_revision': 'main', 'file_path': 'tokenizer.json', 'downloaded': 9085657, 'downloaded_this_session': 0, 'total': 9085657, 'speed': 0, 'eta': 0.0, 'status'l': 54598, 'speed': 0, 'eta': 0.0, 'status': 'complete'}}, 'status': 'complete'}
Removing download task for Shard(model_id='unsloth/Llama-3.2-1B-Instruct', start_layer=0, end_layer=15, n_layers=16): True
Error processing prompt: Nvrtc Error 5, NVRTC_ERROR_INVALID_OPTION
nvrtc: error: invalid value for --gpu-architecture (-arch)
Traceback (most recent call last):
File "/dev/shm/exo/exo/main.py", line 192, in run_model_cli
await node.process_prompt(shard, prompt, request_id=request_id)
File "/dev/shm/exo/exo/orchestration/standard_node.py", line 170, in process_prompt
resp = await self._process_prompt(base_shard, prompt, request_id, inference_state)
^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
File "/dev/shm/exo/exo/orchestration/standard_node.py", line 203, in _process_prompt
result = await self.inference_engine.infer_prompt(request_id, shard, prompt, inference_state=inference_state)
^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
File "/dev/shm/exo/exo/inference/inference_engine.py", line 28, in infer_prompt
tokens = await self.encode(shard, prompt)
^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
File "/dev/shm/exo/exo/inference/tinygrad/inference.py", line 76, in encode
await self.ensure_shard(shard)
File "/dev/shm/exo/exo/inference/tinygrad/inference.py", line 100, in ensure_shard
model_shard = await loop.run_in_executor(self.executor, build_transformer, model_path, shard, parameters)
^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
File "/apps/sw/miniconda/envs/exo-4c98108/lib/python3.12/concurrent/futures/thread.py", line 58, in run
result = self.fn(*self.args, **self.kwargs)
^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
File "/dev/shm/exo/exo/inference/tinygrad/inference.py", line 45, in build_transformer
model = Transformer(**MODEL_PARAMS[model_size]["args"], linear=linear, max_context=8192, jit=True, shard=shard)
^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
File "/dev/shm/exo/exo/inference/tinygrad/models/llama.py", line 198, in __init__
self.freqs_cis = precompute_freqs_cis(dim // n_heads, self.max_context*2, rope_theta, rope_scaling=rope_scaling).contiguous()
^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
File "/dev/shm/exo/exo/inference/tinygrad/models/llama.py", line 17, in precompute_freqs_cis
freqs[:dim // 4] *= low_freq_factor
~~~~~^^^^^^^^^^^
File "/apps/sw/miniconda/envs/exo-4c98108/lib/python3.12/site-packages/tinygrad/tensor.py", line 3500, in _wrapper
ret = fn(*args, **kwargs)
^^^^^^^^^^^^^^^^^^^
File "/apps/sw/miniconda/envs/exo-4c98108/lib/python3.12/site-packages/tinygrad/tensor.py", line 1125, in __setitem__
res = self.realize()._getitem(indices, v)
^^^^^^^^^^^^^^
File "/apps/sw/miniconda/envs/exo-4c98108/lib/python3.12/site-packages/tinygrad/tensor.py", line 3475, in _wrapper
if_METADATA.get() is not None: return fn(*args, **kwargs)
^^^^^^^^^^^^^^^^^^^
File "/apps/sw/miniconda/envs/exo-4c98108/lib/python3.12/site-packages/tinygrad/tensor.py", line 213, in realize
run_schedule(*self.schedule_with_vars(*lst), do_update_stats=do_update_stats)
File "/apps/sw/miniconda/envs/exo-4c98108/lib/python3.12/site-packages/tinygrad/engine/realize.py", line 222, in run_schedule
foreiin lower_schedule(schedule):
^^^^^^^^^^^^^^^^^^^^^^^^
File "/apps/sw/miniconda/envs/exo-4c98108/lib/python3.12/site-packages/tinygrad/engine/realize.py", line 215, in lower_schedule
raise e
File "/apps/sw/miniconda/envs/exo-4c98108/lib/python3.12/site-packages/tinygrad/engine/realize.py", line 209, in lower_schedule
try: yield lower_schedule_item(si)
^^^^^^^^^^^^^^^^^^^^^^^
File "/apps/sw/miniconda/envs/exo-4c98108/lib/python3.12/site-packages/tinygrad/engine/realize.py", line 193, in lower_schedule_item
runner = get_runner(si.outputs[0].device, si.ast)
^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
File "/apps/sw/miniconda/envs/exo-4c98108/lib/python3.12/site-packages/tinygrad/engine/realize.py", line 162, in get_runner
method_cache[ckey] = method_cache[bkey] = ret = CompiledRunner(replace(prg, dname=dname))
^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
File "/apps/sw/miniconda/envs/exo-4c98108/lib/python3.12/site-packages/tinygrad/engine/realize.py", line 84, in __init__
self.lib:bytes = precompiled if precompiled is not None else Device[p.dname].compiler.compile_cached(p.src)
^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
File "/apps/sw/miniconda/envs/exo-4c98108/lib/python3.12/site-packages/tinygrad/device.py", line 183, in compile_cached
lib = self.compile(src)
^^^^^^^^^^^^^^^^^
File "/apps/sw/miniconda/envs/exo-4c98108/lib/python3.12/site-packages/tinygrad/runtime/support/compiler_cuda.py", line 60, in compile
def compile(self, src:str) -> bytes: return self._compile_program(src, nvrtc.nvrtcGetPTX, nvrtc.nvrtcGetPTXSize)
^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
File "/apps/sw/miniconda/envs/exo-4c98108/lib/python3.12/site-packages/tinygrad/runtime/support/compiler_cuda.py", line 56, in _compile_program
nvrtc_check(nvrtc.nvrtcCompileProgram(prog, len(self.compile_options), to_char_p_p([o.encode() foroin self.compile_options])), prog)
File "/apps/sw/miniconda/envs/exo-4c98108/lib/python3.12/site-packages/tinygrad/runtime/support/compiler_cuda.py", line 16, in nvrtc_check
raise CompileError(f"Nvrtc Error {status}, {ctypes.string_at(nvrtc.nvrtcGetErrorString(status)).decode()}\n{err_log}")
tinygrad.device.CompileError: Nvrtc Error 5, NVRTC_ERROR_INVALID_OPTION
nvrtc: error: invalid value for --gpu-architecture (-arch)
Received exit signal SIGTERM...
Thank you for using exo.
i think this is tinygrad related but i am not sure. Probably this can be reproduced tested on colab on a T4 since it is the same architecture as a the V100.
The text was updated successfully, but these errors were encountered:
Hi,
I tried the latest commit of main b694522 on a V100 gpu using cuda 12.2 and 12.4
on a V100 GPU and i got the following error
i think this is tinygrad related but i am not sure. Probably this can be reproduced tested on colab on a T4 since it is the same architecture as a the V100.
The text was updated successfully, but these errors were encountered: