@@ -555,11 +555,11 @@ jobs:
555555 strategy :
556556 matrix :
557557 hf_model_id : [
558- google/gemma-2-2b ,
559- Qwen/Qwen2.5 -0.5B ,
558+ google/gemma-3-1b-it ,
559+ Qwen/Qwen3 -0.6B ,
560560 HuggingFaceTB/SmolLM2-135M,
561561 meta-llama/Llama-3.2-1B,
562- allenai/OLMo-1B-hf
562+ allenai/OLMo-1B-hf,
563563 ]
564564 fail-fast : false
565565 with :
@@ -569,44 +569,102 @@ jobs:
569569 submodules : ' recursive'
570570 ref : ${{ github.event_name == 'pull_request' && github.event.pull_request.head.sha || github.sha }}
571571 timeout : 90
572+ upload-artifact : profiling-artifacts-${{ strategy.job-index }}
572573 script : |
573574 echo "::group::Set up ExecuTorch"
574575 # The generic Linux job chooses to use base env, not the one setup by the image
575576 CONDA_ENV=$(conda env list --json | jq -r ".envs | .[-1]")
576577 conda activate "${CONDA_ENV}"
577578 PYTHON_EXECUTABLE=python bash .ci/scripts/setup-linux.sh --build-tool cmake
579+ # Build executor_runner with ETdump enabled
580+ PYTHON_EXECUTABLE=python cmake -DPYTHON_EXECUTABLE=python \
581+ -DCMAKE_INSTALL_PREFIX=cmake-out \
582+ -DEXECUTORCH_ENABLE_LOGGING=1 \
583+ -DCMAKE_BUILD_TYPE=Release \
584+ -DEXECUTORCH_BUILD_EXTENSION_DATA_LOADER=ON \
585+ -DEXECUTORCH_BUILD_EXTENSION_MODULE=ON \
586+ -DEXECUTORCH_BUILD_EXTENSION_TENSOR=ON \
587+ -DEXECUTORCH_BUILD_XNNPACK=ON \
588+ -DEXECUTORCH_BUILD_KERNELS_QUANTIZED=ON \
589+ -DEXECUTORCH_BUILD_KERNELS_OPTIMIZED=ON \
590+ -DEXECUTORCH_BUILD_KERNELS_CUSTOM=ON \
591+ -DEXECUTORCH_BUILD_DEVTOOLS=ON \
592+ -DEXECUTORCH_ENABLE_EVENT_TRACER=ON \
593+ -Bcmake-out .
594+ cmake --build cmake-out -j16 --target install --config Release
578595 echo "::endgroup::"
579596
580597 echo "::group::Set up Hugging Face"
581598 pip install -U "huggingface_hub[cli]"
582599 huggingface-cli login --token $SECRET_EXECUTORCH_HF_TOKEN
583600 git clone https://github.com/huggingface/optimum-executorch
584- cd optimum-executorch
601+ pushd optimum-executorch
585602 # There is no release yet, for CI stability, always test from the same commit on main
586- git checkout 577a2b19670e4c643a5c6ecb09bf47b9a699e7c6
603+ git checkout da80c9e35b3db5c7eea8731b7d660482fb4870a8
587604 pip install .[tests]
605+ popd
606+
607+ if [ "${{ matrix.hf_model_id }}" == "google/gemma-3-1b-it" ]; then
608+ # Fixes for gemma-3 is not available in the released version
609+ git clone https://github.com/huggingface/transformers.git
610+ pushd transformers
611+ git checkout a57274466f7f72efaa2662d1738cdaf28ae8071f
612+ pip install -e .
613+ popd
614+ fi
588615 pip list
589616 echo "::endgroup::"
590617
591- echo "::group::Export and Run ${{ matrix.hf_model_id }} "
618+ echo "::group::Export to ExecuTorch "
592619 # Pass matrix variable as environment variable
593620 export MODEL_ID="${{ matrix.hf_model_id }}"
621+ export OUTPUT_DIR="$(pwd)/${MODEL_ID}_custom_sdpa_8da4w"
622+ pushd optimum-executorch
623+
624+ optimum-cli export executorch \
625+ --model ${MODEL_ID} \
626+ --task text-generation \
627+ --recipe xnnpack \
628+ --use_custom_sdpa \
629+ --output_dir ${OUTPUT_DIR} \
630+ --qlinear
631+
632+ ls -FlAGhp ${OUTPUT_DIR}
633+ popd
634+ echo "::endgroup::"
635+
636+ echo "::group::Inference using python API"
637+ pushd optimum-executorch
594638 python -c "
595639 import os
596640 from optimum.executorch import ExecuTorchModelForCausalLM
597641 from transformers import AutoTokenizer
598642
599643 model_id = os.getenv('MODEL_ID')
600- print(f'Loading model: {model_id} ')
601- model = ExecuTorchModelForCausalLM.from_pretrained( model_id, recipe='xnnpack ')
602- tokenizer = AutoTokenizer .from_pretrained(model_id )
644+ pte_dir = os.getenv('OUTPUT_DIR ')
645+ print(f'Loading model { model_id} from {pte_dir}. ')
646+ model = ExecuTorchModelForCausalLM .from_pretrained(pte_dir )
603647 generated_text = model.text_generation(
604- tokenizer=tokenizer ,
648+ tokenizer=AutoTokenizer.from_pretrained(model_id) ,
605649 prompt='Simply put, the theory of relativity states that',
606650 max_seq_len=64
607651 )
608652 print(generated_text)
609653 "
654+ popd
655+ echo "::endgroup::"
656+
657+ echo "::group::Inference using executor_runner with ETDump"
658+ ./cmake-out/executor_runner \
659+ --model_path ${OUTPUT_DIR}/model.pte \
660+ --etdump_path ${OUTPUT_DIR}/etdump.etdp
661+
662+ export TSV_PATH=artifacts-to-be-uploaded/${MODEL_ID}_op_prof.tsv
663+ mkdir -p $(dirname "$TSV_PATH")
664+ python3 -m devtools.inspector.inspector_cli \
665+ --etdump_path ${OUTPUT_DIR}/etdump.etdp \
666+ --tsv_path ${TSV_PATH}
667+
610668 echo "::endgroup::"
611669
612670
0 commit comments