Here're some resources about Mixed-Precision strategies, especially low-precision training for LLMs Note that many of the methods here can be shared with the ones in quantization
tag: eXmY
| Google
paper link: here
citation:
@misc{agrawal2024exmydatatypetechnique,
title={eXmY: A Data Type and Technique for Arbitrary Bit Precision Quantization},
author={Aditya Agrawal and Matthew Hedlund and Blake Hechtman},
year={2024},
eprint={2405.13938},
archivePrefix={arXiv},
primaryClass={cs.LG},
url={https://arxiv.org/abs/2405.13938},
}
tag: Optimized SR
| Stochastic Rounding
paper link: here
citation:
@misc{ali2024stochasticroundingenabledlowprecisionfloatingpoint,
title={A Stochastic Rounding-Enabled Low-Precision Floating-Point MAC for DNN Training},
author={Sami Ben Ali and Silviu-Ioan Filip and Olivier Sentieys},
year={2024},
eprint={2404.14010},
archivePrefix={arXiv},
primaryClass={cs.AR},
url={https://arxiv.org/abs/2404.14010},
}
tag: FP8-LM
| FP8 Optimizer
| Microsoft
paper link: here
code link: here
citation:
@misc{peng2023fp8lm,
title={FP8-LM: Training FP8 Large Language Models},
author={Houwen Peng and Kan Wu and Yixuan Wei and Guoshuai Zhao and Yuxiang Yang and Ze Liu and Yifan Xiong and Ziyue Yang and Bolin Ni and Jingcheng Hu and Ruihang Li and Miaosen Zhang and Chen Li and Jia Ning and Ruizhe Wang and Zheng Zhang and Shuguang Liu and Joe Chau and Han Hu and Peng Cheng},
year={2023},
eprint={2310.18313},
archivePrefix={arXiv},
primaryClass={cs.LG}
}
tag: Microscaling
| MX
| MXFP8
| MXFP6
| MXFP4
| Microsoft
paper link: here
spec link: here
citation:
@misc{rouhani2023microscalingdataformatsdeep,
title={Microscaling Data Formats for Deep Learning},
author={Bita Darvish Rouhani and Ritchie Zhao and Ankit More and Mathew Hall and Alireza Khodamoradi and Summer Deng and Dhruv Choudhary and Marius Cornea and Eric Dellinger and Kristof Denolf and Stosic Dusan and Venmugil Elango and Maximilian Golub and Alexander Heinecke and Phil James-Roxby and Dharmesh Jani and Gaurav Kolhe and Martin Langhammer and Ada Li and Levi Melnick and Maral Mesmakhosroshahi and Andres Rodriguez and Michael Schulte and Rasoul Shafipour and Lei Shao and Michael Siu and Pradeep Dubey and Paulius Micikevicius and Maxim Naumov and Colin Verrilli and Ralph Wittig and Doug Burger and Eric Chung},
year={2023},
eprint={2310.10537},
archivePrefix={arXiv},
primaryClass={cs.LG},
url={https://arxiv.org/abs/2310.10537},
}
tag: FP8 Scaling Bias
| Graphcore Research
paper link: here
citation:
@misc{perez2023training,
title={Training and inference of large language models using 8-bit floating point},
author={Sergio P. Perez and Yan Zhang and James Briggs and Charlie Blake and Josh Levy-Kramer and Paul Balanca and Carlo Luschi and Stephen Barlow and Andrew William Fitzgibbon},
year={2023},
eprint={2309.17224},
archivePrefix={arXiv},
primaryClass={cs.LG}
}
tag: SwitchBlock
| StableAdamW
| NIPS23
| Meta
| Allen AI
| LAION
paper link: here
citation:
@misc{wortsman2023stablelowprecisiontraininglargescale,
title={Stable and low-precision training for large-scale vision-language models},
author={Mitchell Wortsman and Tim Dettmers and Luke Zettlemoyer and Ari Morcos and Ali Farhadi and Ludwig Schmidt},
year={2023},
eprint={2304.13013},
archivePrefix={arXiv},
primaryClass={cs.LG},
url={https://arxiv.org/abs/2304.13013},
}
tag: Unit Scaling
| SNR Analysis
| Graphcore Research
paper link: here
citation:
@misc{blake2023unitscalingoutoftheboxlowprecision,
title={Unit Scaling: Out-of-the-Box Low-Precision Training},
author={Charlie Blake and Douglas Orr and Carlo Luschi},
year={2023},
eprint={2303.11257},
archivePrefix={arXiv},
primaryClass={cs.LG},
url={https://arxiv.org/abs/2303.11257},
}
tag: TE FP8
| Transformer-Engine
| Nvidia
blog link: here
docs link: here
code link: here
citation:
@misc{NVIDIA2023TransformerEngine,
title={NVIDIA Transformer Engine: Accelerating PyTorch Training Workloads with FP8 (TE)},
author={Chaim Rand, and NVIDIA},
howpublished = {\url{https://github.com/NVIDIA/TransformerEngine}},
year={2023},
}
tag: Mixed Precision
| Nvidia
docs link: here
citation:
@manual{nvidia2024mixed,
title = {Train With Mixed Precision},
author= {{NVIDIA Corporation}},
month = {February},
year = {2023},
howpublished = {\url{https://docs.nvidia.com/deeplearning/performance/mixed-precision-training/index.html}}
}
tag: FP8 Quantization
| Qualcomm AI Research
paper link: here
code link: here
citation:
@misc{kuzmin2024fp8,
title={FP8 Quantization: The Power of the Exponent},
author={Andrey Kuzmin and Mart Van Baalen and Yuwei Ren and Markus Nagel and Jorn Peters and Tijmen Blankevoort},
year={2024},
eprint={2208.09225},
archivePrefix={arXiv},
primaryClass={cs.LG}
}
tag: FP8 Optimizer
| Blockwise Quantization
| Dynamic Tree Quantization
| Meta
| University of Washington
paper link: here
citation:
@misc{dettmers20228bit,
title={8-bit Optimizers via Block-wise Quantization},
author={Tim Dettmers and Mike Lewis and Sam Shleifer and Luke Zettlemoyer},
year={2022},
eprint={2110.02861},
archivePrefix={arXiv},
primaryClass={cs.LG}
}
tag: HFP8
| Hybrid FP8
| NIPS19
| IBM Research
paper link: here
citation:
@inproceedings{NEURIPS2019_65fc9fb4,
author = {Sun, Xiao and Choi, Jungwook and Chen, Chia-Yu and Wang, Naigang and Venkataramani, Swagath and Srinivasan, Vijayalakshmi (Viji) and Cui, Xiaodong and Zhang, Wei and Gopalakrishnan, Kailash},
booktitle = {Advances in Neural Information Processing Systems},
editor = {H. Wallach and H. Larochelle and A. Beygelzimer and F. d\textquotesingle Alch\'{e}-Buc and E. Fox and R. Garnett},
pages = {},
publisher = {Curran Associates, Inc.},
title = {Hybrid 8-bit Floating Point (HFP8) Training and Inference for Deep Neural Networks},
url = {https://proceedings.neurips.cc/paper_files/paper/2019/file/65fc9fb4897a89789352e211ca2d398f-Paper.pdf},
volume = {32},
year = {2019}
}
tag: BF16
| Google
blog link: here
citation:
@misc{shibo2019bfloat16,
author = {Shibo Wang and Pankaj Kanwar},
title = {bfloat16: The secret to high performance on Cloud TPUs},
year = {2019},
howpublished = {\url{https://cloud.google.com/blog/products/ai-machine-learning/bfloat16-the-secret-to-high-performance-on-cloud-tpus}}
}
tag: FP8-E5M2
| Chunk-based Accumulation
| Stochastic Rounding
| IBM Research
paper link: here
citation:
@misc{wang2018training,
title={Training Deep Neural Networks with 8-bit Floating Point Numbers},
author={Naigang Wang and Jungwook Choi and Daniel Brand and Chia-Yu Chen and Kailash Gopalakrishnan},
year={2018},
eprint={1812.08011},
archivePrefix={arXiv},
primaryClass={cs.LG}
}
tag: FP16
| Loss Scaling
| ICLR18
| Baidu
| Nvidia
paper link: here
citation:
@misc{micikevicius2018mixed,
title={Mixed Precision Training},
author={Paulius Micikevicius and Sharan Narang and Jonah Alben and Gregory Diamos and Erich Elsen and David Garcia and Boris Ginsburg and Michael Houston and Oleksii Kuchaiev and Ganesh Venkatesh and Hao Wu},
year={2018},
eprint={1710.03740},
archivePrefix={arXiv},
primaryClass={cs.AI}
}