Here're some resources about Pruning Strategies for LLMs Inference
tag: Minitron
| NIPS24
| Nvidia
paper link: here
code link: here
modelhub link: here
@misc{muralidharan2024compactlanguagemodelspruning,
title={Compact Language Models via Pruning and Knowledge Distillation},
author={Saurav Muralidharan and Sharath Turuvekere Sreenivas and Raviraj Joshi and Marcin Chochowski and Mostofa Patwary and Mohammad Shoeybi and Bryan Catanzaro and Jan Kautz and Pavlo Molchanov},
year={2024},
eprint={2407.14679},
archivePrefix={arXiv},
primaryClass={cs.CL},
url={https://arxiv.org/abs/2407.14679},
}
tag: Wanda
| ICLR24
| Meta
| CMU
paper link: here
code link: here
citation:
@misc{sun2023simple,
title={A Simple and Effective Pruning Approach for Large Language Models},
author={Mingjie Sun and Zhuang Liu and Anna Bair and J. Zico Kolter},
year={2023},
eprint={2306.11695},
archivePrefix={arXiv},
primaryClass={cs.CL}
}
tag: LLM-Pruner
| NIPS23
| NUS
paper link: here
code link: here
citation:
@misc{ma2023llmpruner,
title={LLM-Pruner: On the Structural Pruning of Large Language Models},
author={Xinyin Ma and Gongfan Fang and Xinchao Wang},
year={2023},
eprint={2305.11627},
archivePrefix={arXiv},
primaryClass={cs.CL}
}
tag: SCOP
| NIPS20
| Noah’s Ark Lab
| Peking University
paper link: here
code link: here
citation:
@misc{tang2021scop,
title={SCOP: Scientific Control for Reliable Neural Network Pruning},
author={Yehui Tang and Yunhe Wang and Yixing Xu and Dacheng Tao and Chunjing Xu and Chao Xu and Chang Xu},
year={2021},
eprint={2010.10732},
archivePrefix={arXiv},
primaryClass={id='cs.CV' full_name='Computer Vision and Pattern Recognition' is_active=True alt_name=None in_archive='cs' is_general=False description='Covers image processing, computer vision, pattern recognition, and scene understanding. Roughly includes material in ACM Subject Classes I.2.10, I.4, and I.5.'}
}
tag: MHA
| Multi-head Attention
| NIPS19
| CMU
paper link: here
code link: here
citation:
@article{michel2019sixteen,
title={Are sixteen heads really better than one?},
author={Michel, Paul and Levy, Omer and Neubig, Graham},
journal={Advances in neural information processing systems},
volume={32},
year={2019}
}