Instructions for ubuntu
Instructions for ubuntu
Update
apt-get update
Install software
apt-get install -y git
apt-get install -y libcurl4-openssl-dev
apt-get install -y hdf5-tools
apt-get install -y rsync
apt-get install -y make
apt-get install -y gcc
apt-get install -y libblas-dev
apt-get install -y python3.7 python3-pip
ln -nsf /usr/bin/python3.7 /usr/bin/python
Anaconda Environment
conda create --name Atac python=3.7
conda activate Atac
Clone the libxsmm repository and set library path
cd /home/
git clone https://github.com/libxsmm/libxsmm.git
cd /home/libxsmm
git checkout b3da2b1bed9d27f9d6bae91a683f8cf76fe299b5
make -j # Use AVX=2 for AVX2 and AVX=3 for AVX512
cd /home/
export LD_LIBRARY_PATH=/home/libxsmm/lib/
Clone atacworks repo
git clone --branch v0.2.0 https://github.com/clara-parabricks/AtacWorks.git
Clone the OpenOmics version
git clone https://github.com/IntelLabs/Trans-Omics-Acceleration-Library.git
Apply patch
cd /home/AtacWorks/
git apply /home/Trans-Omics-Acceleration-Library/applications/ATAC-Seq/AtacWorks_cpu_optimization_patch.patch
Install python packages
python3.7 -m pip install -r requirements-base.txt
python3.7 -m pip install torch torchvision torchaudio
python3.7 -m pip install -r requirements-macs2.txt
(Optional) Install torch-ccl
# Install torch-ccl
# git clone --branch v1.1.0 https://github.com/intel/torch-ccl.git && cd torch-ccl
# git submodule sync
# git submodule update --init --recursive
# python3.7 setup.py install
Install 1D convolution module
cd /home/libxsmm/samples/deeplearning/conv1dopti_layer/Conv1dOpti-extension/
python setup.py install
Install AtacWorks folder ans set path
cd /home/AtacWorks/
python3.7 -m pip install .
atacworks=/home/AtacWorks/
Download data to train
wget https://atacworks-paper.s3.us-east-2.amazonaws.com/dsc_atac_blood_cell_denoising_experiments/50_cells/train_data/noisy_data/dsc.1.Mono.50.cutsites.smoothed.200.bw
wget https://atacworks-paper.s3.us-east-2.amazonaws.com/dsc_atac_blood_cell_denoising_experiments/50_cells/train_data/clean_data/dsc.Mono.2400.cutsites.smoothed.200.bw
wget https://atacworks-paper.s3.us-east-2.amazonaws.com/dsc_atac_blood_cell_denoising_experiments/50_cells/train_data/clean_data/dsc.Mono.2400.cutsites.smoothed.200.3.narrowPeak
Download file conversion binaries and set path
rsync -aP rsync://hgdownload.soe.ucsc.edu/genome/admin/exe/linux.x86_64/bedGraphToBigWig /home/
rsync -aP rsync://hgdownload.soe.ucsc.edu/genome/admin/exe/linux.x86_64/bigWigToBedGraph /home/
export PATH="$PATH:/home/" >> /home/.bashrc # set the path for bedGraphToBigWig binaries
Data preprocessing
python $atacworks/scripts/peak2bw.py \
--input dsc.Mono.2400.cutsites.smoothed.200.3.narrowPeak \
--sizes $atacworks/data/reference/hg19.chrom.sizes \
--out_dir ./ \
--skip 1
python $atacworks/scripts/get_intervals.py \
--sizes $atacworks/data/reference/hg19.auto.sizes \
--intervalsize 50000 \
--out_dir ./ \
--val chr20 \
--holdout chr10
python $atacworks/scripts/bw2h5.py \
--noisybw dsc.1.Mono.50.cutsites.smoothed.200.bw \
--cleanbw dsc.Mono.2400.cutsites.smoothed.200.bw \
--cleanpeakbw dsc.Mono.2400.cutsites.smoothed.200.3.narrowPeak.bw \
--intervals training_intervals.bed \
--out_dir ./ \
--prefix Mono.50.2400.train \
--pad 5000 \
--nonzero
python $atacworks/scripts/bw2h5.py \
--noisybw dsc.1.Mono.50.cutsites.smoothed.200.bw \
--cleanbw dsc.Mono.2400.cutsites.smoothed.200.bw \
--cleanpeakbw dsc.Mono.2400.cutsites.smoothed.200.3.narrowPeak.bw \
--intervals val_intervals.bed \
--out_dir ./ \
--prefix Mono.50.2400.val \
--pad 5000
Set affinity and threads
export KMP_AFFINITY=compact,1,0,granularity=fine
export LD_PRELOAD=/home/libtcmalloc.so # Copy these files in the /home folder first
export LD_PRELOAD=/home/libjemalloc.so
export OMP_NUM_THREADS=31 # (Available cores (N) - 1)
Training run (Single Socket)
# In numactl command, "-C 1-31" is for running on cores 1 to 31.
# General case for an N core machine is "-C 1-(N-1)".
# Keep batch size in config/train_config.yaml to a multiple of (N-1) for optimum performance
numactl --membind 0 -C 1-31 python $atacworks/scripts/main.py train \
--config configs/train_config.yaml \
--config_mparams configs/model_structure.yaml \
--files_train $atacworks/Mono.50.2400.train.h5 \
--val_files $atacworks/Mono.50.2400.val.h5
Option - Another option to use on machines without NUMA — “taskset -c 1-31 python …”
Training run (Multiple Sockets/Nodes)
export OMP_NUM_THREADS=30 # (Available cores (N) - 2)
# 1. change line 23 in configs/train_config.yaml with the following
# dist-backend: 'gloo'
# 2. change line 22 in configs/train_config.yaml with the following
# dist-backend: 'gloo'
# 3. Keep batch size (bs) in config/train_config.yaml to a multiple of (N-2) for optimum performance.
# Batch size gets multiplied by number of socket. Hence, if bs=30, no. of sockets = 16 than batch size = 30*16 = 480
# 4. Comment line the following line (79,80) in AtacWorks/claragenomics/dl4atac/utils.py and reinstall AtacWorks using "pip install ." command.
# if (os.path.islink(latest_symlink)):
# os.remove(latest_symlink)
# 5. Run the following Slurm batch script that uses MPI commands.
sbatch Batchfile_CPU.slurm