For a given set of SGE job IDs, these functions extract the data from SGE
using qacct
and then parse it to produce a data.frame with the summary
information recorded by SGE on the performance of these SGE jobs.
accounting(
job_ids,
accounting_files = "/cm/shared/apps/sge/sge-8.1.9/default/common/accounting",
tz = "EST"
)
accounting_read(
job_ids,
accounting_files = "/cm/shared/apps/sge/sge-8.1.9/default/common/accounting"
)
accounting_parse(accounting_info, tz = "EST")
A character
vector of SGE job IDs to inspect.
A character
vector with the paths to the SGE
accounting files.
Time zone for your SGE cluster. Defaults to EST
for JHPCE.
A named list where each element contains the output
from qacct
for a given job and the names of the list are the SGE job IDs.
This can be produced using accounting_read()
.
accounting_read
: A list with the output from qacct
by SGE for
each of the SGE job IDs.
accounting_parse
and accounting
: a data.frame
with the SGE
accounting information parsed.
## Requires JHPCE to run
accounting_file <- "/cm/shared/apps/sge/sge-8.1.9/default/common/accounting_20191007_0300.txt"
if (file.exists(accounting_file)) {
head(
accounting(
c("92500", "77672"),
accounting_file
)
)
}
## Example for a single job
acc_info <- list("92500" = readLines(
system.file("extdata", "accounting", "92500.txt",
package = "sgejobs"
)
))
acc_info
#> $`92500`
#> [1] "=============================================================="
#> [2] "qname shared.q"
#> [3] "hostname compute-089.cm.cluster"
#> [4] "group lieber_jaffe"
#> [5] "owner lcollado"
#> [6] "project NONE"
#> [7] "department defaultdepartment"
#> [8] "jobname delete_bsp2"
#> [9] "jobnumber 92500"
#> [10] "taskid undefined"
#> [11] "account sge"
#> [12] "priority 0"
#> [13] "qsub_time Sat Sep 7 14:40:00 2019"
#> [14] "start_time Sat Sep 7 14:40:16 2019"
#> [15] "end_time Sat Sep 7 14:40:59 2019"
#> [16] "granted_pe NONE"
#> [17] "slots 1"
#> [18] "failed 0"
#> [19] "exit_status 0"
#> [20] "ru_wallclock 43s"
#> [21] "ru_utime 0.745s"
#> [22] "ru_stime 0.814s"
#> [23] "ru_maxrss 7.004KB"
#> [24] "ru_ixrss 0.000B"
#> [25] "ru_ismrss 0.000B"
#> [26] "ru_idrss 0.000B"
#> [27] "ru_isrss 0.000B"
#> [28] "ru_minflt 53903"
#> [29] "ru_majflt 0"
#> [30] "ru_nswap 0"
#> [31] "ru_inblock 64"
#> [32] "ru_oublock 184"
#> [33] "ru_msgsnd 0"
#> [34] "ru_msgrcv 0"
#> [35] "ru_nsignals 0"
#> [36] "ru_nvcsw 8837"
#> [37] "ru_nivcsw 107"
#> [38] "cpu 1.559s"
#> [39] "mem 607.126KBs"
#> [40] "io 779.092KB"
#> [41] "iow 0.000s"
#> [42] "maxvmem 5.867MB"
#> [43] "arid undefined"
#> [44] "ar_sub_time undefined"
#> [45] "category -u lcollado -l h_fsize=100G,h_stack=512M,h_vmem=10G,mem_free=10G"
#>
## Requires JHPCE access
accounting_file <- "/cm/shared/apps/sge/sge-8.1.9/default/common/accounting_20191007_0300.txt"
if (file.exists(accounting_file)) {
acc_info_jhpce <- accounting_read("92500", accounting_file)
identical(acc_info_jhpce, acc_info)
}
## The example file has been subset to just the first two tasks
acc_info_array <- list("77672" = readLines(
system.file("extdata", "accounting", "77672.txt",
package = "sgejobs"
)
))
## Requires JHPCE access
#' ## Example for an array job
if (file.exists(accounting_file)) {
acc_info_jhpce_array <- accounting_read("77672", accounting_file)
}
## Requires JHPCE access
accounting_file <- "/cm/shared/apps/sge/sge-8.1.9/default/common/accounting_20191007_0300.txt"
if (file.exists(accounting_file)) {
accounting_info_jhpce <- accounting_read(
c("92500", "77672"),
accounting_file
)
}
## Here we use the data included in the package to avoid depending on JHPCE
## where the data for job 77672 has been subset for the first two tasks.
accounting_info <- list(
"92500" = readLines(system.file("extdata", "accounting", "92500.txt",
package = "sgejobs"
)),
"77672" = readLines(system.file("extdata", "accounting", "77672.txt",
package = "sgejobs"
))
)
## Here we parse the data from `qacct` into a data.frame
res <- accounting_parse(accounting_info)
#> 2023-05-07 07:12:12.944529 processing job 92500
#> 2023-05-07 07:12:13.036876 processing job 77672
#> Note: the column 'mem' is now in bytes / second.
res
#> input_id account ar_sub_time arid
#> 1 77672.1 sge undefined undefined
#> 2 77672.2 sge undefined undefined
#> 3 92500.0 sge undefined undefined
#> category
#> 1 -u lcollado -l h_fsize=100G,h_stack=512M,h_vmem=3G,mem_free=3G -pe local 4
#> 2 -u lcollado -l h_fsize=100G,h_stack=512M,h_vmem=3G,mem_free=3G -pe local 4
#> 3 -u lcollado -l h_fsize=100G,h_stack=512M,h_vmem=10G,mem_free=10G
#> cpu department end_time exit_status failed granted_pe
#> 1 747.887s defaultdepartment 2019-09-04 11:55:14 0 0 local
#> 2 878.682s defaultdepartment 2019-09-04 12:00:26 0 0 local
#> 3 1.559s defaultdepartment 2019-09-07 14:40:59 0 0 NONE
#> group hostname io iow
#> 1 lieber_jaffe compute-051.cm.cluster 41198000000 0.000s
#> 2 lieber_jaffe compute-051.cm.cluster 35579000000 0.000s
#> 3 lieber_jaffe compute-089.cm.cluster 779092 0.000s
#> jobname jobnumber maxvmem mem
#> 1 compute_aucs_duplicatesRemoved_v0.4.0 77672 953344000 6.93543e+11
#> 2 compute_aucs_duplicatesRemoved_v0.4.0 77672 953348000 6.31653e+11
#> 3 delete_bsp2 92500 5867000 6.07126e+05
#> owner priority project qname qsub_time ru_idrss ru_inblock
#> 1 lcollado 0 NONE shared.q 2019-09-04 11:45:54 0 33755664
#> 2 lcollado 0 NONE shared.q 2019-09-04 11:45:54 0 40880344
#> 3 lcollado 0 NONE shared.q 2019-09-07 14:40:00 0 64
#> ru_ismrss ru_isrss ru_ixrss ru_majflt ru_maxrss ru_minflt ru_msgrcv ru_msgsnd
#> 1 0 0 0 1 953348 124265 0 0
#> 2 0 0 0 0 953348 132866 0 0
#> 3 0 0 0 0 7004 53903 0 0
#> ru_nivcsw ru_nsignals ru_nswap ru_nvcsw ru_oublock ru_stime ru_utime
#> 1 44094 0 0 3237249 192 135.616s 612.272s
#> 2 63844 0 0 3572098 192 155.540s 723.142s
#> 3 107 0 0 8837 184 0.814s 0.745s
#> ru_wallclock slots start_time taskid
#> 1 526s 4 2019-09-04 11:46:28 1
#> 2 832s 4 2019-09-04 11:46:34 2
#> 3 43s 1 2019-09-07 14:40:16 undefined
## Check the maximum memory use
as.numeric(res$maxvmem)
#> [1] 953344000 953348000 5867000
## And the absolute maximum
pryr:::show_bytes(max(res$maxvmem))
#> 953 MB