megatron-MLB单机多机测试
环境:
anaconda
bash Anaconda3-2023.03-Linux-x86_64.sh
anaconda3/bin/
. ./activate
conda create -n liqy python=3.9.18
conda activate liqy
cd /opt/support/
bash cuda_11.7.0_515.43.04_linux.run
vim ~/.bashrc
export PATH=/usr/local/cuda-11.7/bin:$PATH
export LD_LIBRARY_PATH=/usr/local/cuda-11.7/lib64:$LD_LIBRARY_PATH
conda install pytorch==2.0.1 torchvision==0.15.2 torchaudio==2.0.2 pytorch-cuda=11.7 -c pytorch -c nvidia
pip install nvidia-nccl-cu11==2.14.3 -i https://pypi.mirrors.ustc.edu.cn/simple
pip install transformers==4.31.0 -i https://pypi.mirrors.ustc.edu.cn/simple
pip install SentencePiece -i https://pypi.mirrors.ustc.edu.cn/simple
pip install packaging -i https://pypi.mirrors.ustc.edu.cn/simple
pip install six
pip install six -i https://pypi.mirrors.ustc.edu.cn/simple
wget https://codeload.github.com/NVIDIA/apex/tar.gz/refs/tags/23.07
tar -zxvf apex-23.07.tar.gz
cd apex-23.07/
pip install -v --disable-pip-version-check --no-cache-dir --no-build-isolation --config-settings "--build-option=--cpp_ext" --config-settings "--build-option=--cuda_ext" ./
tar -zxvf gputest.tgz
cd gputest/
pip install Ninja -i https://pypi.mirrors.ustc.edu.cn/simple
cd LEO_debug/
bash scripts/debug.sh
vim scripts/debug.sh
bash scripts/debug.sh