Here're some resources about Device Placement and Memory Management for LLMs Training Note that some strategies here can be utilized in inference as well
tag: FlexMoE
| Dynamic Device Placement
| ACM MOD23
| Peking University
| CMU
| Microsoft
paper link: here
citation:
@article{nie2023flexmoe,
title={FlexMoE: Scaling Large-scale Sparse Pre-trained Model Training via Dynamic Device Placement},
author={Nie, Xiaonan and Miao, Xupeng and Wang, Zilong and Yang, Zichao and Xue, Jilong and Ma, Lingxiao and Cao, Gang and Cui, Bin},
journal={Proceedings of the ACM on Management of Data},
volume={1},
number={1},
pages={1--19},
year={2023},
publisher={ACM New York, NY, USA}
}
tag: Pipeline Parallelism
| NIPS20
| Microsoft
paper link: here
citation:
@misc{tarnawski2020efficient,
title={Efficient Algorithms for Device Placement of DNN Graph Operators},
author={Jakub Tarnawski and Amar Phanishayee and Nikhil R. Devanur and Divya Mahajan and Fanny Nina Paravecino},
year={2020},
eprint={2006.16423},
archivePrefix={arXiv},
primaryClass={cs.LG}
}
tag: MLSys20 ReCoML Workshop
| Google
paper link: here
citation:
@misc{pisarchyk2020efficientmemorymanagementdeep,
title={Efficient Memory Management for Deep Neural Net Inference},
author={Yury Pisarchyk and Juhyun Lee},
year={2020},
eprint={2001.03288},
archivePrefix={arXiv},
primaryClass={cs.LG},
url={https://arxiv.org/abs/2001.03288},
}
tag: Profile-Guided
| IBM Research
paper link: here
citation:
@misc{sekiyama2018profileguidedmemoryoptimizationdeep,
title={Profile-guided memory optimization for deep neural networks},
author={Taro Sekiyama and Takashi Imamichi and Haruki Imai and Rudy Raymond},
year={2018},
eprint={1804.10001},
archivePrefix={arXiv},
primaryClass={cs.DC},
url={https://arxiv.org/abs/1804.10001},
}
tag: Mosaic
| ACM MICRO17
| CMU
paper link: here
code link: here
citation:
@inproceedings{ausavarungnirun2017mosaic,
author = {Ausavarungnirun, Rachata and Landgraf, Joshua and Miller, Vance and Ghose, Saugata and Gandhi, Jayneel and Rossbach, Christopher J. and Mutlu, Onur},
title = {Mosaic: a GPU memory manager with application-transparent support for multiple page sizes},
year = {2017},
isbn = {9781450349529},
publisher = {Association for Computing Machinery},
address = {New York, NY, USA},
url = {https://doi.org/10.1145/3123939.3123975},
doi = {10.1145/3123939.3123975},
pages = {136–150},
numpages = {15},
keywords = {GPGPU applications, address translation, demand paging, graphics processing units, large pages, virtual memory management},
location = {Cambridge, Massachusetts},
series = {MICRO-50 '17}
}