Here're some resources about Decoding Strategies for LLMs Inference
tag: Consensus Game
| Equilibrium Search
| ICLR24
| MIT
paper link: here
citation:
@misc{jacob2023consensus,
title={The Consensus Game: Language Model Generation via Equilibrium Search},
author={Athul Paul Jacob and Yikang Shen and Gabriele Farina and Jacob Andreas},
year={2023},
eprint={2310.09139},
archivePrefix={arXiv},
primaryClass={cs.GT}
}
tag: DoLa
| Contrastive Decoding
| ICLR24
| Microsoft
paper link: here
code link: here
citation:
@misc{chuang2024doladecodingcontrastinglayers,
title={DoLa: Decoding by Contrasting Layers Improves Factuality in Large Language Models},
author={Yung-Sung Chuang and Yujia Xie and Hongyin Luo and Yoon Kim and James Glass and Pengcheng He},
year={2024},
eprint={2309.03883},
archivePrefix={arXiv},
primaryClass={cs.CL},
url={https://arxiv.org/abs/2309.03883},
}
tag: Speculative Sampling
| Google DeepMind
paper link: here
citation:
@misc{chen2023acceleratinglargelanguagemodel,
title={Accelerating Large Language Model Decoding with Speculative Sampling},
author={Charlie Chen and Sebastian Borgeaud and Geoffrey Irving and Jean-Baptiste Lespiau and Laurent Sifre and John Jumper},
year={2023},
eprint={2302.01318},
archivePrefix={arXiv},
primaryClass={cs.CL},
url={https://arxiv.org/abs/2302.01318},
}
tag: Speculative Decoding
| ICML23
| Google
paper link: here
code link: here
citation:
@misc{leviathan2023fastinferencetransformersspeculative,
title={Fast Inference from Transformers via Speculative Decoding},
author={Yaniv Leviathan and Matan Kalman and Yossi Matias},
year={2023},
eprint={2211.17192},
archivePrefix={arXiv},
primaryClass={cs.LG},
url={https://arxiv.org/abs/2211.17192},
}
tag: Contrastive Decoding
| ACL23
| Stanford University
paper link: here
code link: here
citation:
@misc{li2023contrastive,
title={Contrastive Decoding: Open-ended Text Generation as Optimization},
author={Xiang Lisa Li and Ari Holtzman and Daniel Fried and Percy Liang and Jason Eisner and Tatsunori Hashimoto and Luke Zettlemoyer and Mike Lewis},
year={2023},
eprint={2210.15097},
archivePrefix={arXiv},
primaryClass={cs.CL}
}
tag: Nucleus Sampling
| ICLR20
| Allen AI
| University of Washington
paper link: here
code link: here
citation:
@misc{holtzman2020curious,
title={The Curious Case of Neural Text Degeneration},
author={Ari Holtzman and Jan Buys and Li Du and Maxwell Forbes and Yejin Choi},
year={2020},
eprint={1904.09751},
archivePrefix={arXiv},
primaryClass={cs.CL}
}