Build a bash script that loops over variables and submits SGE jobs

This function builds a bash script that loops over a set of variables with pre-specified values to create an internal bash script that then gets submitted as a SGE job.

job_loop(
  loops,
  name,
  create_shell = FALSE,
  queue = "shared",
  memory = "10G",
  cores = 1L,
  email = "e",
  logdir = "logs",
  filesize = "100G",
  task_num = NULL,
  tc = 20
)

Arguments

loops: A named list where each of the elements are character vectors. The names of loops specify the variables used for the loops and the contents specify the options to loop through for each variable.
name: A character(1) vector with the name of the script. Any spaces will be replaced by underscores.
create_shell: A logical(1) vector specifying whether to create a shell file for the script.
queue: A character(1) vector with the name of the SGE queue. Check how busy a given queue is by running qpic -q queuename.
memory: The amount of memory per core to request in SGE syntax. You can check how much a current job is utilizing using the qmem JHPCE command. For more detail on the memory options, check https://jhpce.jhu.edu/knowledge-base/how-to/#MemSpec.
cores: The number of cores to request. Note that the total memory your job will request is cores multiplied by memory.
email: The email reporting option for the email. For more information check https://jhpce.jhu.edu/knowledge-base/how-to/#Email.
logdir: The directory for the SGE log files relative to the current working directory.
filesize: The maximum file size in SGE format.
task_num: The number of tasks for your job, which will make it into an array job. If NULL this is ignored.
tc: If task_num is specified, this option controls the number of concurrent tasks.

Value

A character vector with the script contents. If create_shell was specified then it also creates the actual script in the current working directory.

Author

Leonardo Collado-Torres

Examples


job_loop(
    loops = list(region = c("DLPFC", "HIPPO"), feature = c("gene", "exon", "tx", "jxn")),
    name = "bsp2_test"
)
#> #!/bin/bash
#> 
#> ## Usage:
#> # sh bsp2_test.sh
#> 
#> ## Create the logs directory
#> mkdir -p logs
#> 
#> for region in DLPFC HIPPO; do
#>     for feature in gene exon tx jxn; do
#> 
#>     ## Internal script name
#>     SHORT="bsp2_test_${region}_${feature}"
#> 
#>     # Construct shell file
#>     echo "Creating script bsp2_test_${region}_${feature}"
#>     cat > .${SHORT}.sh <<EOF
#> #!/bin/bash
#> #$ -cwd
#> #$ -l mem_free=10G,h_vmem=10G,h_fsize=100G
#> #$ -N ${SHORT}
#> #$ -o logs/${SHORT}.txt
#> #$ -e logs/${SHORT}.txt
#> #$ -m e
#> 
#> echo "**** Job starts ****"
#> date
#> 
#> echo "**** JHPCE info ****"
#> echo "User: \${USER}"
#> echo "Job id: \${JOB_ID}"
#> echo "Job name: \${JOB_NAME}"
#> echo "Hostname: \${HOSTNAME}"
#> echo "Task id: \${SGE_TASK_ID}"
#> 
#> ## Load the R module (absent since the JHPCE upgrade to CentOS v7)
#> module load conda_R
#> 
#> ## List current modules for reproducibility
#> module list
#> 
#> ## Edit with your job command
#> Rscript -e "options(width = 120); print('${region}'); print('${feature}'); sessioninfo::session_info()"
#> 
#> echo "**** Job ends ****"
#> date
#> 
#> ## This script was made using sgejobs version 0.99.2
#> ## available from http://research.libd.org/sgejobs/
#> 
#> 
#> EOF
#> 
#>     call="qsub .${SHORT}.sh"
#>     echo $call
#>     $call
#>     done
#> done
#> 

job_loop(
    loops = list(region = c("DLPFC", "HIPPO"), feature = c("gene", "exon", "tx", "jxn")),
    cores = 5,
    task_num = 10,
    name = "bsp2_test_array"
)
#> #!/bin/bash
#> 
#> ## Usage:
#> # sh bsp2_test_array.sh
#> 
#> ## Create the logs directory
#> mkdir -p logs
#> 
#> for region in DLPFC HIPPO; do
#>     for feature in gene exon tx jxn; do
#> 
#>     ## Internal script name
#>     SHORT="bsp2_test_array_${region}_${feature}"
#> 
#>     # Construct shell file
#>     echo "Creating script bsp2_test_array_${region}_${feature}"
#>     cat > .${SHORT}.sh <<EOF
#> #!/bin/bash
#> #$ -cwd
#> #$ -l mem_free=10G,h_vmem=10G,h_fsize=100G
#> #$ -pe local 5
#> #$ -N ${SHORT}
#> #$ -o logs/${SHORT}.\$TASK_ID.txt
#> #$ -e logs/${SHORT}.\$TASK_ID.txt
#> #$ -m e
#> #$ -t 1-10
#> #$ -tc 20
#> 
#> echo "**** Job starts ****"
#> date
#> 
#> echo "**** JHPCE info ****"
#> echo "User: \${USER}"
#> echo "Job id: \${JOB_ID}"
#> echo "Job name: \${JOB_NAME}"
#> echo "Hostname: \${HOSTNAME}"
#> echo "Task id: \${SGE_TASK_ID}"
#> 
#> ## Load the R module (absent since the JHPCE upgrade to CentOS v7)
#> module load conda_R
#> 
#> ## List current modules for reproducibility
#> module list
#> 
#> ## Edit with your job command
#> Rscript -e "options(width = 120); print('${region}'); print('${feature}'); sessioninfo::session_info()"
#> 
#> echo "**** Job ends ****"
#> date
#> 
#> ## This script was made using sgejobs version 0.99.2
#> ## available from http://research.libd.org/sgejobs/
#> 
#> 
#> EOF
#> 
#>     call="qsub .${SHORT}.sh"
#>     echo $call
#>     $call
#>     done
#> done
#>