For a given set of SGE job IDs, these functions extract the data from SGE using qacct and then parse it to produce a data.frame with the summary information recorded by SGE on the performance of these SGE jobs.

accounting(
  job_ids,
  accounting_files = "/cm/shared/apps/sge/sge-8.1.9/default/common/accounting",
  tz = "EST"
)

accounting_read(
  job_ids,
  accounting_files = "/cm/shared/apps/sge/sge-8.1.9/default/common/accounting"
)

accounting_parse(accounting_info, tz = "EST")

Arguments

job_ids

A character vector of SGE job IDs to inspect.

accounting_files

A character vector with the paths to the SGE accounting files.

tz

Time zone for your SGE cluster. Defaults to EST for JHPCE.

accounting_info

A named list where each element contains the output from qacct for a given job and the names of the list are the SGE job IDs. This can be produced using accounting_read().

Value

accounting_read: A list with the output from qacct by SGE for each of the SGE job IDs.

accounting_parse and accounting: a data.frame with the SGE accounting information parsed.

References

For the ss function used by accounting_parse():

Leonardo Collado-Torres, Andrew E. Jaffe and Emily E. Burke (2019). jaffelab: Commonly used functions by the Jaffe lab. R package version 0.99.27. https://github.com/LieberInstitute/jaffelab

Author

Leonardo Collado-Torres

Examples


## Requires JHPCE to run
accounting_file <- "/cm/shared/apps/sge/sge-8.1.9/default/common/accounting_20191007_0300.txt"
if (file.exists(accounting_file)) {
    head(
        accounting(
            c("92500", "77672"),
            accounting_file
        )
    )
}
## Example for a single job
acc_info <- list("92500" = readLines(
    system.file("extdata", "accounting", "92500.txt",
        package = "sgejobs"
    )
))
acc_info
#> $`92500`
#>  [1] "=============================================================="               
#>  [2] "qname        shared.q"                                                        
#>  [3] "hostname     compute-089.cm.cluster"                                          
#>  [4] "group        lieber_jaffe"                                                    
#>  [5] "owner        lcollado"                                                        
#>  [6] "project      NONE"                                                            
#>  [7] "department   defaultdepartment"                                               
#>  [8] "jobname      delete_bsp2"                                                     
#>  [9] "jobnumber    92500"                                                           
#> [10] "taskid       undefined"                                                       
#> [11] "account      sge"                                                             
#> [12] "priority     0"                                                               
#> [13] "qsub_time    Sat Sep  7 14:40:00 2019"                                        
#> [14] "start_time   Sat Sep  7 14:40:16 2019"                                        
#> [15] "end_time     Sat Sep  7 14:40:59 2019"                                        
#> [16] "granted_pe   NONE"                                                            
#> [17] "slots        1"                                                               
#> [18] "failed       0"                                                               
#> [19] "exit_status  0"                                                               
#> [20] "ru_wallclock 43s"                                                             
#> [21] "ru_utime     0.745s"                                                          
#> [22] "ru_stime     0.814s"                                                          
#> [23] "ru_maxrss    7.004KB"                                                         
#> [24] "ru_ixrss     0.000B"                                                          
#> [25] "ru_ismrss    0.000B"                                                          
#> [26] "ru_idrss     0.000B"                                                          
#> [27] "ru_isrss     0.000B"                                                          
#> [28] "ru_minflt    53903"                                                           
#> [29] "ru_majflt    0"                                                               
#> [30] "ru_nswap     0"                                                               
#> [31] "ru_inblock   64"                                                              
#> [32] "ru_oublock   184"                                                             
#> [33] "ru_msgsnd    0"                                                               
#> [34] "ru_msgrcv    0"                                                               
#> [35] "ru_nsignals  0"                                                               
#> [36] "ru_nvcsw     8837"                                                            
#> [37] "ru_nivcsw    107"                                                             
#> [38] "cpu          1.559s"                                                          
#> [39] "mem          607.126KBs"                                                      
#> [40] "io           779.092KB"                                                       
#> [41] "iow          0.000s"                                                          
#> [42] "maxvmem      5.867MB"                                                         
#> [43] "arid         undefined"                                                       
#> [44] "ar_sub_time  undefined"                                                       
#> [45] "category     -u lcollado -l h_fsize=100G,h_stack=512M,h_vmem=10G,mem_free=10G"
#> 

## Requires JHPCE access
accounting_file <- "/cm/shared/apps/sge/sge-8.1.9/default/common/accounting_20191007_0300.txt"
if (file.exists(accounting_file)) {
    acc_info_jhpce <- accounting_read("92500", accounting_file)
    identical(acc_info_jhpce, acc_info)
}

## The example file has been subset to just the first two tasks
acc_info_array <- list("77672" = readLines(
    system.file("extdata", "accounting", "77672.txt",
        package = "sgejobs"
    )
))

## Requires JHPCE access
#' ## Example for an array job
if (file.exists(accounting_file)) {
    acc_info_jhpce_array <- accounting_read("77672", accounting_file)
}

## Requires JHPCE access
accounting_file <- "/cm/shared/apps/sge/sge-8.1.9/default/common/accounting_20191007_0300.txt"
if (file.exists(accounting_file)) {
    accounting_info_jhpce <- accounting_read(
        c("92500", "77672"),
        accounting_file
    )
}

## Here we use the data included in the package to avoid depending on JHPCE
## where the data for job 77672 has been subset for the first two tasks.
accounting_info <- list(
    "92500" = readLines(system.file("extdata", "accounting", "92500.txt",
        package = "sgejobs"
    )),
    "77672" = readLines(system.file("extdata", "accounting", "77672.txt",
        package = "sgejobs"
    ))
)

## Here we parse the data from `qacct` into a data.frame
res <- accounting_parse(accounting_info)
#> 2023-05-07 07:12:12.944529 processing job 92500
#> 2023-05-07 07:12:13.036876 processing job 77672
#> Note: the column 'mem' is now in bytes / second.
res
#>   input_id account ar_sub_time      arid
#> 1  77672.1     sge   undefined undefined
#> 2  77672.2     sge   undefined undefined
#> 3  92500.0     sge   undefined undefined
#>                                                                     category
#> 1 -u lcollado -l h_fsize=100G,h_stack=512M,h_vmem=3G,mem_free=3G -pe local 4
#> 2 -u lcollado -l h_fsize=100G,h_stack=512M,h_vmem=3G,mem_free=3G -pe local 4
#> 3           -u lcollado -l h_fsize=100G,h_stack=512M,h_vmem=10G,mem_free=10G
#>        cpu        department            end_time exit_status failed granted_pe
#> 1 747.887s defaultdepartment 2019-09-04 11:55:14           0      0      local
#> 2 878.682s defaultdepartment 2019-09-04 12:00:26           0      0      local
#> 3   1.559s defaultdepartment 2019-09-07 14:40:59           0      0       NONE
#>          group               hostname          io    iow
#> 1 lieber_jaffe compute-051.cm.cluster 41198000000 0.000s
#> 2 lieber_jaffe compute-051.cm.cluster 35579000000 0.000s
#> 3 lieber_jaffe compute-089.cm.cluster      779092 0.000s
#>                                 jobname jobnumber   maxvmem         mem
#> 1 compute_aucs_duplicatesRemoved_v0.4.0     77672 953344000 6.93543e+11
#> 2 compute_aucs_duplicatesRemoved_v0.4.0     77672 953348000 6.31653e+11
#> 3                           delete_bsp2     92500   5867000 6.07126e+05
#>      owner priority project    qname           qsub_time ru_idrss ru_inblock
#> 1 lcollado        0    NONE shared.q 2019-09-04 11:45:54        0   33755664
#> 2 lcollado        0    NONE shared.q 2019-09-04 11:45:54        0   40880344
#> 3 lcollado        0    NONE shared.q 2019-09-07 14:40:00        0         64
#>   ru_ismrss ru_isrss ru_ixrss ru_majflt ru_maxrss ru_minflt ru_msgrcv ru_msgsnd
#> 1         0        0        0         1    953348    124265         0         0
#> 2         0        0        0         0    953348    132866         0         0
#> 3         0        0        0         0      7004     53903         0         0
#>   ru_nivcsw ru_nsignals ru_nswap ru_nvcsw ru_oublock ru_stime ru_utime
#> 1     44094           0        0  3237249        192 135.616s 612.272s
#> 2     63844           0        0  3572098        192 155.540s 723.142s
#> 3       107           0        0     8837        184   0.814s   0.745s
#>   ru_wallclock slots          start_time    taskid
#> 1         526s     4 2019-09-04 11:46:28         1
#> 2         832s     4 2019-09-04 11:46:34         2
#> 3          43s     1 2019-09-07 14:40:16 undefined

## Check the maximum memory use
as.numeric(res$maxvmem)
#> [1] 953344000 953348000   5867000

## And the absolute maximum
pryr:::show_bytes(max(res$maxvmem))
#> 953 MB