pytorch
diff --git a/‎.ci/scripts/test_model.sh‎
Lines changed: 1 addition & 1 deletion b/‎.ci/scripts/test_model.sh‎
Lines changed: 1 addition & 1 deletion
diff --git a/‎.github/workflows/trunk.yml‎
Lines changed: 4 additions & 4 deletions b/‎.github/workflows/trunk.yml‎
Lines changed: 4 additions & 4 deletions
diff --git a/‎README.md‎
Lines changed: 1 addition & 1 deletion b/‎README.md‎
Lines changed: 1 addition & 1 deletion
diff --git a/‎backends/apple/coreml/TARGETS‎
Lines changed: 24 additions & 5 deletions b/‎backends/apple/coreml/TARGETS‎
Lines changed: 24 additions & 5 deletions
diff --git a/‎backends/apple/coreml/compiler/torch_ops.py‎
Lines changed: 0 additions & 1 deletion b/‎backends/apple/coreml/compiler/torch_ops.py‎
Lines changed: 0 additions & 1 deletion
diff --git a/‎backends/apple/coreml/recipes/coreml_recipe_provider.py‎
Lines changed: 21 additions & 3 deletions b/‎backends/apple/coreml/recipes/coreml_recipe_provider.py‎
Lines changed: 21 additions & 3 deletions
diff --git a/‎backends/apple/coreml/test/test_coreml_recipes.py‎
Lines changed: 0 additions & 25 deletions b/‎backends/apple/coreml/test/test_coreml_recipes.py‎
Lines changed: 0 additions & 25 deletions
diff --git a/‎backends/apple/coreml/test/test_torch_ops.py‎
Lines changed: 30 additions & 2 deletions b/‎backends/apple/coreml/test/test_torch_ops.py‎
Lines changed: 30 additions & 2 deletions
diff --git a/‎backends/arm/_passes/__init__.py‎
Lines changed: 1 addition & 0 deletions b/‎backends/arm/_passes/__init__.py‎
Lines changed: 1 addition & 0 deletions
diff --git a/‎backends/arm/_passes/arm_pass_manager.py‎
Lines changed: 3 additions & 0 deletions b/‎backends/arm/_passes/arm_pass_manager.py‎
Lines changed: 3 additions & 0 deletions
@@ -97,7 +97,7 @@ test_model() {
     bash examples/models/llava/install_requirements.sh
     STRICT="--no-strict"
   fi
-  if [[ "${MODEL_NAME}" == "qwen2_5" ]]; then
+  if [[ "${MODEL_NAME}" == "qwen2_5_1_5b" ]]; then
       # Install requirements for export_llama
       bash examples/models/llama/install_requirements.sh
       # Test export_llm script: python3 -m extension.llm.export.export_llm.
 
@@ -176,7 +176,7 @@ jobs:
           - model: phi_4_mini
             backend: portable
             runner: linux.arm64.m7g.4xlarge
-          - model: qwen2_5
+          - model: qwen2_5_1_5b
             backend: portable
             runner: linux.arm64.2xlarge
           - model: llama3_2_vision_encoder
@@ -823,10 +823,10 @@ jobs:
           --tsv_path ${TSV_PATH}
         echo "::endgroup::"
 
-  test-huggingface-transformers-coreml:
+  test-huggingface-transformers-macos:
     # NB: Don't run this on fork PRs because they won't have access to the secret and would fail anyway
     if: ${{ !github.event.pull_request.head.repo.fork }}
-    name: test-huggingface-transformers-coreml
+    name: test-huggingface-transformers-macos
     uses: pytorch/test-infra/.github/workflows/macos_job.yml@main
     permissions:
       id-token: write
@@ -844,10 +844,10 @@ jobs:
           # phi4-mini|xnnpack|--quantize,
           # smollm2-135m|xnnpack|--quantize,
           # smollm3-3b|xnnpack|--quantize,
+          # qwen3-1.7b|xnnpack|--quantize,
           # CoreML.
           llama3.2-1b|coreml_fp32_gpu|--quantize,
           qwen3-0.6b|coreml_fp32_gpu|--quantize,
-          qwen3-1.7b|xnnpack|--quantize,
           smollm2-135m|coreml_fp32_gpu|--quantize,
           olmo-1b|coreml_fp32_gpu|--quantize,
           bert|coreml_fp32_gpu|--quantize,
 
@@ -52,7 +52,7 @@ To get started you can:
 
 - Visit the [Step by Step Tutorial](https://pytorch.org/executorch/stable/getting-started.html) to get things running locally and deploy a model to a device
 - Use this [Colab Notebook](https://colab.research.google.com/drive/1qpxrXC3YdJQzly3mRg-4ayYiOjC6rue3?usp=sharing) to start playing around right away
-- Jump straight into LLM use cases by following specific instructions for popular open-source models such as [Llama](examples/models/llama/README.md), [Qwen 3](examples/models/qwen3/README.md), [Phi-4-mini](examples/models/phi_4_mini/README.md), and [Llava](examples/models/llava/README.md)
+- Jump straight into LLM use cases by following specific instructions for popular open-source models such as [Llama](examples/models/llama/README.md), [Qwen 3](examples/models/qwen3/README.md), [Phi-4-mini](examples/models/phi_4_mini/README.md), [Llava](examples/models/llava/README.md), [Voxtral](examples/models/voxtral/README.md), and [LFM2](examples/models/lfm2/README.md).
 
 ## Feedback and Engagement
 
 
@@ -61,16 +61,21 @@ runtime.python_library(
 )
 
 runtime.python_library(
-    name = "recipes",
-    srcs = glob([
-        "recipes/*.py",
-    ]),
+    name = "coreml_recipes",
+    srcs = [
+        "recipes/__init__.py",
+        "recipes/coreml_recipe_provider.py"
+    ],
     visibility = [
         "@EXECUTORCH_CLIENTS",
+        "//executorch/export/...",
     ],
     deps = [
         "fbsource//third-party/pypi/coremltools:coremltools",
+        ":coreml_recipe_types",
         ":backend",
+        ":partitioner",
+        ":quantizer",
         "//caffe2:torch",
         "//executorch/exir:lib",
         "//executorch/exir/backend:compile_spec_schema",
@@ -80,6 +85,20 @@ runtime.python_library(
     ],
 )
 
+runtime.python_library(
+    name = "coreml_recipe_types",
+    srcs = [
+        "recipes/coreml_recipe_types.py",
+    ],
+    visibility = [
+        "@EXECUTORCH_CLIENTS",
+        "//executorch/export/...",
+    ],
+    deps = [
+        "//executorch/export:recipe",
+    ],
+)
+
 runtime.cxx_python_extension(
     name = "executorchcoreml",
     srcs = [
@@ -124,7 +143,7 @@ runtime.python_test(
         "fbsource//third-party/pypi/pytest:pytest",
         ":partitioner",
         ":quantizer",
-        ":recipes",
+        ":coreml_recipes",
         "//caffe2:torch",
         "//pytorch/vision:torchvision",
         "fbsource//third-party/pypi/scikit-learn:scikit-learn",
 
@@ -175,7 +175,6 @@ def dequantize_affine(context, node):
         int_data.astype(quantized_np_dtype),
         zero_point,
         scale,
-        axis=-1,
         name=node.name,
     )
     context.add(output, node.name)
 
@@ -3,6 +3,7 @@
 # Please refer to the license found in the LICENSE file in the root directory of the source tree.
 
 
+import logging
 from typing import Any, Optional, Sequence
 
 import coremltools as ct
@@ -111,8 +112,9 @@ def _validate_recipe_kwargs(self, recipe_type: RecipeType, **kwargs: Any) -> Non
 
         unexpected = set(kwargs.keys()) - expected_keys
         if unexpected:
-            raise ValueError(
-                f"Recipe '{recipe_type.value}' received unexpected parameters: {list(unexpected)}"
+            logging.warning(
+                f"CoreML recipe '{recipe_type.value}' ignoring unexpected parameters: {list(unexpected)}. "
+                f"Expected parameters: {list(expected_keys)}"
             )
 
         self._validate_base_parameters(kwargs)
@@ -121,7 +123,13 @@ def _validate_recipe_kwargs(self, recipe_type: RecipeType, **kwargs: Any) -> Non
 
     def _get_expected_keys(self, recipe_type: RecipeType) -> set:
         """Get expected parameter keys for a recipe type"""
-        common_keys = {"minimum_deployment_target", "compute_unit"}
+        common_keys = {
+            "minimum_deployment_target",
+            "compute_unit",
+            "skip_ops_for_coreml_delegation",
+            "lower_full_graph",
+            "take_over_constant_data",
+        }
 
         if recipe_type in [
             CoreMLRecipeType.TORCHAO_INT4_WEIGHT_ONLY_PER_GROUP,
@@ -377,9 +385,19 @@ def _get_coreml_lowering_recipe(
         if minimum_deployment_target and minimum_deployment_target < ct.target.iOS18:
             take_over_mutable_buffer = False
 
+        # Extract additional partitioner parameters
+        skip_ops_for_coreml_delegation = kwargs.get(
+            "skip_ops_for_coreml_delegation", None
+        )
+        lower_full_graph = kwargs.get("lower_full_graph", False)
+        take_over_constant_data = kwargs.get("take_over_constant_data", True)
+
         partitioner = CoreMLPartitioner(
             compile_specs=compile_specs,
             take_over_mutable_buffer=take_over_mutable_buffer,
+            skip_ops_for_coreml_delegation=skip_ops_for_coreml_delegation,
+            lower_full_graph=lower_full_graph,
+            take_over_constant_data=take_over_constant_data,
         )
 
         edge_compile_config = EdgeCompileConfig(
 
@@ -185,14 +185,6 @@ def test_int4_weight_only_per_group_validation(self):
             )
         self.assertIn("must be positive", str(cm.exception))
 
-        # Test unexpected parameter
-        with self.assertRaises(ValueError) as cm:
-            self.provider.create_recipe(
-                CoreMLRecipeType.TORCHAO_INT4_WEIGHT_ONLY_PER_CHANNEL,
-                group_size=32,  # group_size not valid for per-channel
-            )
-        self.assertIn("unexpected parameters", str(cm.exception))
-
     def test_int8_weight_only_per_channel(self):
         """Test INT8 weight-only per-channel quantization"""
         model = TestHelperModules.TwoLinearModule().eval()
@@ -385,23 +377,6 @@ def forward(self, x):
         self._compare_eager_quantized_model_outputs(session, example_inputs, atol=1e-2)
         self._compare_eager_unquantized_model_outputs(session, model, example_inputs)
 
-    def test_pt2e_recipes_parameter_rejection(self):
-        """Test that PT2E recipes reject TorchAO-specific parameters"""
-        # PT2E recipes should reject TorchAO-specific parameters
-        pt2e_recipes = [
-            CoreMLRecipeType.PT2E_INT8_STATIC,
-            CoreMLRecipeType.PT2E_INT8_WEIGHT_ONLY,
-        ]
-        torchao_params = ["filter_fn", "group_size", "bits", "block_size"]
-
-        for recipe_type in pt2e_recipes:
-            for param in torchao_params:
-                with self.subTest(recipe=recipe_type.value, param=param):
-                    kwargs = {param: "dummy_value"}
-                    with self.assertRaises(ValueError) as cm:
-                        self.provider.create_recipe(recipe_type, **kwargs)
-                    self.assertIn("unexpected parameters", str(cm.exception).lower())
-
     def test_filter_fn_comprehensive(self):
         """Comprehensive test for filter_fn parameter functionality"""
 
 
@@ -27,9 +27,9 @@
 class TestTorchOps(unittest.TestCase):
     edge_compile_config = executorch.exir.EdgeCompileConfig()
 
-    def _coreml_partitioner(self):
+    def _coreml_partitioner(self, *, minimum_deployment_target=ct.target.iOS18):
         compile_specs = CoreMLBackend.generate_compile_specs(
-            minimum_deployment_target=ct.target.iOS18
+            minimum_deployment_target=minimum_deployment_target
         )
         return CoreMLPartitioner(compile_specs=compile_specs)
 
@@ -158,6 +158,33 @@ def test_dequantize_affine_c8w_embedding_b4w_linear(self):
         et_prog = delegated_program.to_executorch()
         self._compare_outputs(et_prog, model, example_inputs)
 
+    def test_dequantize_affine_c8w_embedding_c8w_linear_ios16(self):
+        model, example_inputs = self._get_test_model()
+        quantize_(
+            model,
+            IntxWeightOnlyConfig(weight_dtype=torch.int8, granularity=PerAxis(0)),
+            lambda m, fqn: isinstance(m, torch.nn.Embedding),
+        )
+        quantize_(
+            model,
+            IntxWeightOnlyConfig(weight_dtype=torch.int8, granularity=PerAxis(0)),
+        )
+        ep = torch.export.export(model, example_inputs)
+        delegated_program = executorch.exir.to_edge_transform_and_lower(
+            ep,
+            partitioner=[
+                self._coreml_partitioner(minimum_deployment_target=ct.target.iOS16)
+            ],
+        )
+        for node in delegated_program.exported_program().graph.nodes:
+            if node.op == "call_function":
+                assert node.target.__name__ in [
+                    "executorch_call_delegate",
+                    "getitem",
+                ], f"Got unexpected node target after delegation: {node.target.__name__}"
+        et_prog = delegated_program.to_executorch()
+        self._compare_outputs(et_prog, model, example_inputs)
+
     def test_dequantize_codebook_linear_per_grouped_col(self):
         model, example_inputs = self._get_test_model()
         quantize_(
@@ -298,6 +325,7 @@ def forward(self, x):
     test_runner.test_dequantize_affine_c4w_embedding()
     test_runner.test_dequantize_affine_c4w_linear()
     test_runner.test_dequantize_affine_c8w_embedding_b4w_linear()
+    test_runner.test_dequantize_affine_c8w_embedding_c8w_linear_ios16()
     test_runner.test_dequantize_codebook_linear_per_grouped_col()
     test_runner.test_dequantize_codebook_linear_per_grouped_row()
     test_runner.test_dequantize_codebook_embedding_per_grouped_col()
 
@@ -37,6 +37,7 @@
 from .decompose_cosine_similarity_pass import DecomposeCosineSimilarityPass  # noqa
 from .decompose_cumsum_pass import DecomposeCumsumPass  # noqa
 from .decompose_div_pass import DecomposeDivPass  # noqa
+from .decompose_div_tensor_mode import DecomposeDivTensorModePass  # noqa
 from .decompose_elu_pass import DecomposeEluPass  # noqa
 from .decompose_embedding_pass import DecomposeEmbeddingPass  # noqa  # noqa
 from .decompose_expm1_pass import DecomposeExpm1Pass  # noqa
 
@@ -42,6 +42,7 @@
     DecomposeCosineSimilarityPass,
     DecomposeCumsumPass,
     DecomposeDivPass,
+    DecomposeDivTensorModePass,
     DecomposeEluPass,
     DecomposeEmbeddingPass,
     DecomposeExpm1Pass,
@@ -211,6 +212,7 @@ def _tosa_FP_pipeline(self, exported_program: ExportedProgram) -> GraphModule:
             DecomposeMeanDimPass(exported_program.graph_module, self.tosa_spec)
         )
         self.add_pass(DecomposeNotEqualPass())
+        self.add_pass(DecomposeDivTensorModePass())
         self.add_pass(DecomposeDivPass())
         self.add_pass(DecomposeSoftmaxPass())
         self.add_pass(DecomposeGeluPass())
@@ -289,6 +291,7 @@ def transform_for_annotation_pipeline(self, graph_module: GraphModule):
         self.add_pass(DecomposeNotEqualPass())
         self.add_pass(DecomposeCosineSimilarityPass())
         self.add_pass(DecomposeGluPass())
+        self.add_pass(DecomposeDivTensorModePass())
         self.add_pass(DecomposeDivPass())
         self.add_pass(DecomposeLeakyReLUPass())
         self.add_pass(DecomposeLinearVectorNormPass())
Original file line number	Diff line number	Diff line change
`@@ -175,7 +175,6 @@ def dequantize_affine(context, node):`
`175`	`175`	`int_data.astype(quantized_np_dtype),`
`176`	`176`	`zero_point,`
`177`	`177`	`scale,`
`178`		`- axis=-1,`
`179`	`178`	`name=node.name,`
`180`	`179`	`)`
`181`	`180`	`context.add(output, node.name)`