diff --git a/pyproject.toml b/pyproject.toml index 5176f1b6d..c501d930e 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -12,7 +12,7 @@ readme = "README.md" license = {text = "Apache 2"} dependencies = [ "fms-model-optimizer[fp8]>=0.8.0", - "ibm-fms>=1.7.0,<2.0", + "ibm-fms>=1.8.0,<2.0", # NB: use strict < with the next patch version to not exclude versions with # build metadata suffixes "vllm>=0.18.0,<0.18.1", @@ -75,6 +75,10 @@ override-dependencies = [ # requests is used by many dependencies, make sure it's patched for CVEs "requests>=2.32.4", + + # temporarily bypass vllm 0.18.0 compressed-tensors==0.13.0 requirement + # TODO: remove once vllm pins correct version bounds + "compressed-tensors==0.14.0.1", ] # This ensures that we build with the empty backend for vLLM extra-build-variables = { vllm = { VLLM_TARGET_DEVICE = "empty" } } diff --git a/uv.lock b/uv.lock index eed4dead4..3b6a578b5 100644 --- a/uv.lock +++ b/uv.lock @@ -1,5 +1,5 @@ version = 1 -revision = 3 +revision = 2 requires-python = ">=3.11" resolution-markers = [ "python_full_version >= '3.14' and platform_machine != 'aarch64' and sys_platform == 'win32'", @@ -16,6 +16,7 @@ resolution-markers = [ [manifest] overrides = [ + { name = "compressed-tensors", specifier = "==0.14.0.1" }, { name = "intel-extension-for-pytorch", marker = "sys_platform == 'never'" }, { name = "llvmlite", marker = "platform_machine not in 's390x, ppc64le'", specifier = "==0.44.0" }, { name = "opencv-python-headless", specifier = "==4.12.0.88" }, @@ -256,6 +257,25 @@ wheels = [ { url = "https://files.pythonhosted.org/packages/3a/2a/7cc015f5b9f5db42b7d48157e23356022889fc354a2813c15934b7cb5c0e/attrs-25.4.0-py3-none-any.whl", hash = "sha256:adcf7e2a1fb3b36ac48d97835bb6d8ade15b8dcce26aba8bf1d14847b57a3373", size = 67615, upload-time = "2025-10-06T13:54:43.17Z" }, ] +[[package]] +name = "auto-round" +version = "0.10.2" +source = { registry = "https://pypi.org/simple" } +dependencies = [ + { name = "accelerate" }, + { name = "datasets" }, + { name = "numpy" }, + { name = "py-cpuinfo" }, + { name = "threadpoolctl" }, + { name = "torch", marker = "sys_platform == 'never'" }, + { name = "tqdm" }, + { name = "transformers" }, +] +sdist = { url = "https://files.pythonhosted.org/packages/76/36/cba4fc9acb678c3ec2b0c8ba24e964e9720819fa1f0f4d9674ad2388447f/auto_round-0.10.2.tar.gz", hash = "sha256:f00a5fd484363d9ab9f5628f46ae70fd880b5fa91fcd15d55a556b8e36d39e24", size = 431371, upload-time = "2026-02-24T17:44:36.899Z" } +wheels = [ + { url = "https://files.pythonhosted.org/packages/a6/8d/3899ad057f9b4df481a2f32c15d32dca9366842a0eac51914fbe6e0179d8/auto_round-0.10.2-py3-none-any.whl", hash = "sha256:42172962aede60d7f05d0632ce49d4b05ac78a0620cd6d1a65b28c9f625e4175", size = 563635, upload-time = "2026-02-24T17:44:35.298Z" }, +] + [[package]] name = "blake3" version = "1.0.8" @@ -585,7 +605,7 @@ wheels = [ [[package]] name = "compressed-tensors" -version = "0.13.0" +version = "0.14.0.1" source = { registry = "https://pypi.org/simple" } dependencies = [ { name = "loguru" }, @@ -593,9 +613,9 @@ dependencies = [ { name = "torch", marker = "sys_platform == 'never'" }, { name = "transformers" }, ] -sdist = { url = "https://files.pythonhosted.org/packages/fc/65/88dd1c58fb9d0ded51b5c86471b937a1525f91fad2211a6f051dc1ea822d/compressed_tensors-0.13.0.tar.gz", hash = "sha256:23893824d3498ea3f1a829f14a8fa85f9a5e76a34c711a038b8d7c619ca9a67c", size = 200995, upload-time = "2025-12-16T16:03:55.397Z" } +sdist = { url = "https://files.pythonhosted.org/packages/eb/f1/4c9b01ceaf82ad58ad00919223e09b8e74d4073a2ba8e3ab2f97521ef65c/compressed_tensors-0.14.0.1.tar.gz", hash = "sha256:5ad3841184b6f5020e06059b2463191c5c57a144bb97cab9159978d8118839b1", size = 226393, upload-time = "2026-03-11T17:04:35.57Z" } wheels = [ - { url = "https://files.pythonhosted.org/packages/0b/b5/61ac2563c62490922b603c09113a083fd74af3630ec3931e769484d6dcb5/compressed_tensors-0.13.0-py3-none-any.whl", hash = "sha256:3518799c9baf034eb642efb551db6b0537b8713d45a64fe4def26f7f8d6cabec", size = 192620, upload-time = "2025-12-16T16:03:53.041Z" }, + { url = "https://files.pythonhosted.org/packages/0a/26/16a13993ecf4fdc9c39d63b3a6daabafd32a452cf68b81aa9eb3b8170913/compressed_tensors-0.14.0.1-py3-none-any.whl", hash = "sha256:46c4940a3a779d3d97108c294bfcd9acf4bd0491f7c6737c320f0e815ec732e4", size = 196454, upload-time = "2026-03-11T17:04:33.2Z" }, ] [[package]] @@ -1291,13 +1311,14 @@ wheels = [ [[package]] name = "ibm-fms" -version = "1.7.0" +version = "1.8.0" source = { registry = "https://pypi.org/simple" } dependencies = [ { name = "torch", marker = "sys_platform == 'never'" }, + { name = "transformers" }, ] wheels = [ - { url = "https://files.pythonhosted.org/packages/74/2f/368d34b2eec3216691f97b095f65d71b07a0aebe0209794d07293bc8904c/ibm_fms-1.7.0-py3-none-any.whl", hash = "sha256:afd2f1a05a22bbfcd538d2fb211bc03be018c16f5ca5badf42c5fd62456f20d3", size = 212199, upload-time = "2026-02-16T20:40:54.943Z" }, + { url = "https://files.pythonhosted.org/packages/43/07/dc4f3b51e9ec46e1b87c0f1f1dca6875cbf3127b6942708edefca7d40949/ibm_fms-1.8.0-py3-none-any.whl", hash = "sha256:f931b9ee99891370d74293cf152b64cf4055749beb7078247cd39bbd54db0f7f", size = 225169, upload-time = "2026-03-20T14:20:21.334Z" }, ] [[package]] @@ -1594,10 +1615,11 @@ wheels = [ [[package]] name = "llmcompressor" -version = "0.9.0.2" +version = "0.10.0.1" source = { registry = "https://pypi.org/simple" } dependencies = [ { name = "accelerate" }, + { name = "auto-round" }, { name = "compressed-tensors" }, { name = "datasets" }, { name = "loguru" }, @@ -1610,9 +1632,9 @@ dependencies = [ { name = "tqdm" }, { name = "transformers" }, ] -sdist = { url = "https://files.pythonhosted.org/packages/dc/a6/4043dc3aa7dca3be5e8b54052a7e0e6cc778d5af11499e296b1f97940ad2/llmcompressor-0.9.0.2.tar.gz", hash = "sha256:4f1114afaf570c8757e4a9cde2d2d6a83f13ad4d29283119748139e514e8106c", size = 1161125, upload-time = "2026-02-13T22:48:20.939Z" } +sdist = { url = "https://files.pythonhosted.org/packages/d9/d7/bdb05249463271e46ed4dee18e2a9f491e37ea42aefffd2022927a732cab/llmcompressor-0.10.0.1.tar.gz", hash = "sha256:f8915064ab47f14245e01f1d99012420c7ddf77b56612d368522afb959893a65", size = 1932302, upload-time = "2026-03-13T14:34:27.467Z" } wheels = [ - { url = "https://files.pythonhosted.org/packages/72/82/a0830c29c869993565aac0a237dce2ee634adc2a67b30f5e5500e9a46242/llmcompressor-0.9.0.2-py3-none-any.whl", hash = "sha256:3fbe56196cc89845962d836f4e0d0b10b630f7900b52ed7b508fba2b10076f61", size = 282143, upload-time = "2026-02-13T22:48:17.659Z" }, + { url = "https://files.pythonhosted.org/packages/0e/c9/19eabb878ea886bbc464bdde693576d9f330adc49b04be59d664744269a4/llmcompressor-0.10.0.1-py3-none-any.whl", hash = "sha256:e2930edaa5ed8f2b96eeb9928bc1caa1dbc961981acd66836cc6cc0d099d76bc", size = 295488, upload-time = "2026-03-13T14:34:24.323Z" }, ] [[package]] @@ -4242,7 +4264,7 @@ wheels = [ [[package]] name = "transformers" -version = "4.57.3" +version = "4.57.6" source = { registry = "https://pypi.org/simple" } dependencies = [ { name = "filelock" }, @@ -4256,9 +4278,9 @@ dependencies = [ { name = "tokenizers" }, { name = "tqdm" }, ] -sdist = { url = "https://files.pythonhosted.org/packages/dd/70/d42a739e8dfde3d92bb2fff5819cbf331fe9657323221e79415cd5eb65ee/transformers-4.57.3.tar.gz", hash = "sha256:df4945029aaddd7c09eec5cad851f30662f8bd1746721b34cc031d70c65afebc", size = 10139680, upload-time = "2025-11-25T15:51:30.139Z" } +sdist = { url = "https://files.pythonhosted.org/packages/c4/35/67252acc1b929dc88b6602e8c4a982e64f31e733b804c14bc24b47da35e6/transformers-4.57.6.tar.gz", hash = "sha256:55e44126ece9dc0a291521b7e5492b572e6ef2766338a610b9ab5afbb70689d3", size = 10134912, upload-time = "2026-01-16T10:38:39.284Z" } wheels = [ - { url = "https://files.pythonhosted.org/packages/6a/6b/2f416568b3c4c91c96e5a365d164f8a4a4a88030aa8ab4644181fdadce97/transformers-4.57.3-py3-none-any.whl", hash = "sha256:c77d353a4851b1880191603d36acb313411d3577f6e2897814f333841f7003f4", size = 11993463, upload-time = "2025-11-25T15:51:26.493Z" }, + { url = "https://files.pythonhosted.org/packages/03/b8/e484ef633af3887baeeb4b6ad12743363af7cce68ae51e938e00aaa0529d/transformers-4.57.6-py3-none-any.whl", hash = "sha256:4c9e9de11333ddfe5114bc872c9f370509198acf0b87a832a0ab9458e2bd0550", size = 11993498, upload-time = "2026-01-16T10:38:31.289Z" }, ] [[package]] @@ -4385,7 +4407,7 @@ wheels = [ [[package]] name = "vllm" version = "0.18.0" -source = { git = "https://github.com/vllm-project/vllm?rev=v0.18.0#89138b21cc246ae944c741d5c399c148e2b770ab" } +source = { git = "https://github.com/vllm-project/vllm?rev=v0.18.0#bcf2be96120005e9aea171927f85055a6a5c0cf6" } dependencies = [ { name = "aiohttp" }, { name = "anthropic" }, @@ -4474,7 +4496,7 @@ dev = [ [package.metadata] requires-dist = [ { name = "fms-model-optimizer", extras = ["fp8"], specifier = ">=0.8.0" }, - { name = "ibm-fms", specifier = ">=1.7.0,<2.0" }, + { name = "ibm-fms", specifier = ">=1.8.0,<2.0" }, { name = "vllm", git = "https://github.com/vllm-project/vllm?rev=v0.18.0" }, ]