require(parallel)
require(doSNOW)
require(foreach)
require(future)
require(future.apply)
nodes <- 5L
cl <- future::makeClusterPSOCK(nodes)
plan(cluster, workers=cl, persistent=TRUE)
foo <- function(i) {
if (i %in% 5:8) Sys.sleep(3L)
i
}
x <- 1:20
system.time(ans <- future_lapply(x, function(i) {
msg <- sprintf("[%s] [%5.0f] ans=%2.0f", as.character(Sys.time()), Sys.getpid(), i)
cat(msg, sep="\n")
ans <- foo(i)
}))
# [2019-04-18 13:17:08] [17620] ans= 1
# [2019-04-18 13:17:08] [17620] ans= 2
# [2019-04-18 13:17:08] [17620] ans= 3
# [2019-04-18 13:17:08] [17620] ans= 4
# [2019-04-18 13:17:08] [16016] ans= 5 # <~~ predefined chunk node = wait until previous one completes
# [2019-04-18 13:17:11] [16016] ans= 6 # <~~ predefined chunk node = wait until previous one completes
# [2019-04-18 13:17:14] [16016] ans= 7 # <~~ predefined chunk node = wait until previous one completes
# [2019-04-18 13:17:17] [16016] ans= 8 # <~~ predefined chunk node = wait until previous one completes
# [2019-04-18 13:17:08] [ 3992] ans= 9
# [2019-04-18 13:17:08] [ 3992] ans=10
# [2019-04-18 13:17:08] [ 3992] ans=11
# [2019-04-18 13:17:08] [ 3992] ans=12
# [2019-04-18 13:17:08] [ 3836] ans=13
# [2019-04-18 13:17:08] [ 3836] ans=14
# [2019-04-18 13:17:08] [ 3836] ans=15
# [2019-04-18 13:17:08] [ 3836] ans=16
# [2019-04-18 13:17:08] [ 8856] ans=17
# [2019-04-18 13:17:08] [ 8856] ans=18
# [2019-04-18 13:17:08] [ 8856] ans=19
# [2019-04-18 13:17:08] [ 8856] ans=20
# user system elapsed
# 0.03 0.02 12.14 # <~~ 4x more time
registerDoSNOW(cl)
system.time(ans <- foreach(i=x) %dopar% {
msg <- sprintf("[%s] [%5.0f] ans=%2.0f", as.character(Sys.time()), Sys.getpid(), i)
foo_i <- foo(i)
return(msg)
# [2019-04-18 13:17:20] [17620] ans= 1
# [2019-04-18 13:17:20] [16016] ans= 2
# [2019-04-18 13:17:20] [ 3992] ans= 3
# [2019-04-18 13:17:20] [ 3836] ans= 4
# [2019-04-18 13:17:20] [ 8856] ans= 5 # <~~ runs on next available free node
# [2019-04-18 13:17:20] [17620] ans= 6 # <~~
# [2019-04-18 13:17:20] [16016] ans= 7 # <~~
# [2019-04-18 13:17:20] [ 3992] ans= 8 # <~~
# [2019-04-18 13:17:20] [ 3836] ans= 9
# [2019-04-18 13:17:20] [ 3836] ans=10
# [2019-04-18 13:17:20] [ 3836] ans=11
# [2019-04-18 13:17:20] [ 3836] ans=12
# [2019-04-18 13:17:20] [ 3836] ans=13
# [2019-04-18 13:17:20] [ 3836] ans=14
# [2019-04-18 13:17:20] [ 3836] ans=15
# [2019-04-18 13:17:20] [ 3836] ans=16
# [2019-04-18 13:17:20] [ 3836] ans=17
# [2019-04-18 13:17:20] [ 3836] ans=18
# [2019-04-18 13:17:20] [ 3836] ans=19
# [2019-04-18 13:17:20] [ 3836] ans=20
# user system elapsed
# 0.01 0.00 3.07 # <~~ results in 4 times lesser runtime
The point is, even if things are random (I understand there's ordering="random"), there can be chunks that get stuck due to a big job, when other nodes are potentially free. I think it's much more efficient to look for free nodes and assign jobs on the fly than to determine chunk sizes / entries upfront.
What do you think?
The point is, even if things are random (I understand there's
ordering="random"), there can be chunks that get stuck due to a big job, when other nodes are potentially free. I think it's much more efficient to look for free nodes and assign jobs on the fly than to determine chunk sizes / entries upfront.What do you think?