Singularity

Setup

The SLURM script for specific groups 1-4 are generated from this file such that,

  • allocation of CPUs based on the lowest common multiple of 32 and 56 as whole numbers of 448 SL3 cclake nodes.
  • both in SLURM and singularity.
export groups=(exon_CADD exon_reg exon_severe reg_Only)
for group in {1..4}
do 
  awk '/usr\/bin\/bash/,0' cclake.md | \
  sed '$d' | \
  sed "s/group_placeholder/${groups[$(expr ${group} - 1)]}/" > cclake-${group}.sb 
done

Somehow the number of message lines is smaller (to the number of chromosomes) than counterpart without Singularity.

SLURM scripts

#!/usr/bin/bash

#SBATCH --job-name=_rva
#SBATCH --account=PETERS-SL3-CPU
#SBATCH --partition=cclake-himem
#SBATCH --cpus-per-task=56
#SBATCH --array=20-368
#SBATCH --time=12:00:00
#SBATCH --output=/rds/user/jhz22/hpc-work/work/_rva_%A_%a.o
#SBATCH --error=/rds/user/jhz22/hpc-work/work/_rva_%A_%a.e
#SBATCH --export ALL

export TMPDIR=${HPC_WORK}/work
export SEQ=~/COVID-19/SCALLOP-Seq
export COHORT=INTERVAL

singularity_exec()
{
  if [ ! -d ${SEQ}/rva/${weswgs}-${pheno} ]; then mkdir -p ${SEQ}/rva/${weswgs}-${pheno}; fi
  singularity exec --env SEQ=${SEQ} \
                   --env COHORT=${COHORT} \
                   --env weswgs=${weswgs} \
                   --env chr=${chr} \
                   --env group_file=${group_file} \
                   --env pheno=${pheno} \
                   --bind ${SEQ} --workdir ${TMPDIR} --containall \
              ${SEQ}/burden_testing_latest.sif step2 \
                   --cohort-name ${COHORT} \
                   --GDS ${SEQ}/work/${weswgs}-${chr}.gds \
                   --group-file ${SEQ}/${group_file} \
                   --matrix-prefix ${SEQ}/work/${weswgs} \
                   --matrix-type GCTA \
                   --pheno ${SEQ}/work/${weswgs}/${pheno}-lr.pheno \
                   --out ${SEQ}/rva/${weswgs}-${pheno}/${COHORT}-${weswgs}-${pheno}-${group}-${chr} \
                   --threads 56
}

export groups=(group_placeholder)
for weswgs in wgs
do
  export pheno=$(ls ${SEQ}/work/${weswgs}/*-lr.pheno | xargs -I {} basename {} -lr.pheno | awk 'NR==ENVIRON["SLURM_ARRAY_TASK_ID"]')
  export weswgs=${weswgs}
  for chrs in {1..22}
  do
    export chr=chr${chrs}
    for group in ${groups[@]}
    do
      export group_file=${group}-${chrs}.groupfile.txt
      echo ${pheno} ${weswgs} ${chr} ${group}
      singularity_exec
    done
  done
done