Spaces:
Build error
Build error
| #SBATCH --job-name=tr_test-s3-download-and-convert-checkpoints | |
| #SBATCH --ntasks=1 | |
| #SBATCH --nodes=1 | |
| #SBATCH --time=3:00:00 | |
| #SBATCH --partition=production-cluster | |
| #SBATCH --output=/fsx/m4/experiments/local_experiment_dir/s3_async_temporary_checkpoint_folder/logs/%x-%j.out | |
| set -e | |
| # ----------------- Auto-Workdir ----------------- | |
| if [ -n $SLURM_JOB_ID ]; then | |
| # check the original location through scontrol and $SLURM_JOB_ID | |
| SCRIPT_PATH=$(scontrol show job $SLURM_JOB_ID | awk -F= '/Command=/{print $2}') | |
| else | |
| # otherwise: started with bash. Get the real location. | |
| SCRIPT_PATH=$(realpath $0) | |
| fi | |
| SCRIPT_DIR=$(dirname ${SCRIPT_PATH}) | |
| M4_REPO_PATH=$(builtin cd $SCRIPT_DIR/../../; pwd) | |
| # -------------------------------------------------- | |
| ### EDIT ME START ### | |
| CONDA_ENV_NAME=shared-m4 | |
| EXPERIMENT_NAME=tr_194_laion_cm4_mix | |
| opt_step_num_list=( | |
| "1000" | |
| "2000" | |
| ) | |
| ### EDIT ME END ### | |
| echo "START TIME: $(date)" | |
| source /fsx/m4/start-m4-user | |
| conda activate base | |
| conda activate $CONDA_ENV_NAME | |
| pushd $M4_REPO_PATH | |
| export PYTHONPATH=$WORKING_DIR:$PYTHONPATH | |
| echo "running checkpoint download, convert, upload for opt-steps: ${opt_step_num_list[@]} of experiment: $EXPERIMENT_NAME" | |
| python $M4_REPO_PATH/m4/scripts/s3_checkpoint_download_convert_upload.py $EXPERIMENT_NAME ${opt_step_num_list[@]} $M4_REPO_PATH | |
| echo "END TIME: $(date)" | |