Skip to content

Latest commit

 

History

History
112 lines (77 loc) · 3.38 KB

prune.md

File metadata and controls

112 lines (77 loc) · 3.38 KB

Pruning Strategies for LLMs Inference

Here're some resources about Pruning Strategies for LLMs Inference

Compact Language Models via Pruning and Knowledge Distillation

tag: Minitron | NIPS24 | Nvidia

paper link: here

code link: here

modelhub link: here

@misc{muralidharan2024compactlanguagemodelspruning,
      title={Compact Language Models via Pruning and Knowledge Distillation}, 
      author={Saurav Muralidharan and Sharath Turuvekere Sreenivas and Raviraj Joshi and Marcin Chochowski and Mostofa Patwary and Mohammad Shoeybi and Bryan Catanzaro and Jan Kautz and Pavlo Molchanov},
      year={2024},
      eprint={2407.14679},
      archivePrefix={arXiv},
      primaryClass={cs.CL},
      url={https://arxiv.org/abs/2407.14679}, 
}

A Simple and Effective Pruning Approach for Large Language Models

tag: Wanda | ICLR24 | Meta | CMU

paper link: here

code link: here

citation:

@misc{sun2023simple,
      title={A Simple and Effective Pruning Approach for Large Language Models}, 
      author={Mingjie Sun and Zhuang Liu and Anna Bair and J. Zico Kolter},
      year={2023},
      eprint={2306.11695},
      archivePrefix={arXiv},
      primaryClass={cs.CL}
}

LLM-Pruner: On the Structural Pruning of Large Language Models

tag: LLM-Pruner | NIPS23 | NUS

paper link: here

code link: here

citation:

@misc{ma2023llmpruner,
      title={LLM-Pruner: On the Structural Pruning of Large Language Models}, 
      author={Xinyin Ma and Gongfan Fang and Xinchao Wang},
      year={2023},
      eprint={2305.11627},
      archivePrefix={arXiv},
      primaryClass={cs.CL}
}

SCOP: Scientific Control for Reliable Neural Network Pruning

tag: SCOP | NIPS20 | Noah’s Ark Lab | Peking University

paper link: here

code link: here

citation:

@misc{tang2021scop,
      title={SCOP: Scientific Control for Reliable Neural Network Pruning}, 
      author={Yehui Tang and Yunhe Wang and Yixing Xu and Dacheng Tao and Chunjing Xu and Chao Xu and Chang Xu},
      year={2021},
      eprint={2010.10732},
      archivePrefix={arXiv},
      primaryClass={id='cs.CV' full_name='Computer Vision and Pattern Recognition' is_active=True alt_name=None in_archive='cs' is_general=False description='Covers image processing, computer vision, pattern recognition, and scene understanding. Roughly includes material in ACM Subject Classes I.2.10, I.4, and I.5.'}
}

Are sixteen heads really better than one?

tag: MHA | Multi-head Attention | NIPS19 | CMU

paper link: here

code link: here

citation:

@article{michel2019sixteen,
  title={Are sixteen heads really better than one?},
  author={Michel, Paul and Levy, Omer and Neubig, Graham},
  journal={Advances in neural information processing systems},
  volume={32},
  year={2019}
}