diff --git a/test/error_path_intentionally_fail.jl b/test/error_path_intentionally_fail.jl new file mode 100644 index 0000000..97b81e3 --- /dev/null +++ b/test/error_path_intentionally_fail.jl @@ -0,0 +1,38 @@ +mktempdir() do tmpdir + fake_bindir = joinpath(tmpdir, "bin") + fake_srun = joinpath(tmpdir, "bin", "srun") + mkpath(fake_bindir) + open(fake_srun, "w") do io + println(io, "#!/usr/bin/env bash") + println(io, "set -euf -o pipefail") + # println(io, "set -x") + println(io, "echo [stdout] fake-srun: INTENTIONALLY ERROR-ING") + println(io, "echo [stderr] fake-srun: INTENTIONALLY ERROR-ING >&2") + println(io, "exit 1") + end + chmod(fake_srun, 0o700) # chmod +x + directory_separator = Sys.iswindows() ? ';' : ':' + new_env = Dict{String, String}() + new_env["SLURM_NTASKS"] = "8" + new_env["SLURM_JOB_ID"] = "1234" + if haskey(ENV, "PATH") + old_path = ENV["PATH"] + new_env["PATH"] = fake_bindir * directory_separator * old_path + else + new_env["PATH"] = fake_bindir + end + + @info "with old PATH" Sys.which("srun") + withenv(new_env...) do + @info "with new PATH" Sys.which("srun") + + if Base.VERSION >= v"1.2-" + T_expected = TaskFailedException + else + T_expected = Base.IOError + end + + mgr = SlurmClusterManager.SlurmManager() + @test_throws T_expected Distributed.addprocs(mgr) + end + end diff --git a/test/error_path_manager_timeout.jl b/test/error_path_manager_timeout.jl new file mode 100644 index 0000000..333db2f --- /dev/null +++ b/test/error_path_manager_timeout.jl @@ -0,0 +1,54 @@ +mktempdir() do tmpdir + fake_bindir = joinpath(tmpdir, "bin") + fake_srun = joinpath(tmpdir, "bin", "srun") + mkpath(fake_bindir) + open(fake_srun, "w") do io + println(io, "#!/usr/bin/env bash") + println(io, "set -euf -o pipefail") + # println(io, "set -x") + + # we only print this to stderr; don't print to stdout, or we won't hit the desired error path + # (we'll hit a different error path instead, not the one we want to test) + println(io, "echo [stderr] fake-srun: sleeping for 15 seconds... >&2") + + # Bash sleep for 15-seconds: + println(io, "sleep 15") + + println(io, "echo [stdout] fake-srun: INTENTIONALLY ERROR-ING") + println(io, "echo [stderr] fake-srun: INTENTIONALLY ERROR-ING >&2") + println(io, "exit 1") + end + chmod(fake_srun, 0o700) # chmod +x + directory_separator = Sys.iswindows() ? ';' : ':' + new_env = Dict{String, String}() + new_env["SLURM_NTASKS"] = "8" + new_env["SLURM_JOB_ID"] = "1234" + if haskey(ENV, "PATH") + old_path = ENV["PATH"] + new_env["PATH"] = fake_bindir * directory_separator * old_path + else + new_env["PATH"] = fake_bindir + end + + @info "with old PATH" Sys.which("srun") + withenv(new_env...) do + @info "with new PATH" Sys.which("srun") + + if Base.VERSION >= v"1.2-" + expected_outer_ex_T = TaskFailedException + expected_inner_ex_INSTANCE = ErrorException("launch_timeout exceeded") + else + expected_outer_ex_T = ErrorException + expected_inner_ex_INSTANCE = ErrorException("launch_timeout exceeded") + end + + mgr = SlurmClusterManager.SlurmManager(; launch_timeout = 2.0) + test_result = @test_throws expected_outer_ex_T Distributed.addprocs(mgr) + + cfg = ConfigForTestingTaskFailedException(; + expected_outer_ex_T=expected_outer_ex_T, + expected_inner_ex_INSTANCE=expected_inner_ex_INSTANCE, + ) + test_task_failed_exception(test_result, cfg) + end + end diff --git a/test/runtests.jl b/test/runtests.jl index bda4f85..59604e5 100644 --- a/test/runtests.jl +++ b/test/runtests.jl @@ -6,7 +6,7 @@ import Distributed import Test # Bring some names into scope, just for convenience: -using Test: @testset, @test, @test_logs +using Test: @testset, @test, @test_throws, @test_logs, @test_skip, @test_broken const original_JULIA_DEBUG = strip(get(ENV, "JULIA_DEBUG", "")) if isempty(original_JULIA_DEBUG) @@ -73,3 +73,14 @@ end # testset "SlurmClusterManager.jl" ) end end + +include("util.jl") + +@testset "Test some unhappy paths (error paths)" begin + @testset "intentionally fail" begin + include("error_path_intentionally_fail.jl") + end + @testset "manager's launch timeout" begin + include("error_path_manager_timeout.jl") + end +end diff --git a/test/util.jl b/test/util.jl new file mode 100644 index 0000000..f5a583b --- /dev/null +++ b/test/util.jl @@ -0,0 +1,27 @@ +extract_test_result_value(test_result::Test.Pass) = test_result.value + +recursively_unwrap_ex(ex::ErrorException) = ex +recursively_unwrap_ex(ex::Base.IOError) = ex + +@static if Base.VERSION >= v"1.2-" + function recursively_unwrap_ex(outer_ex::TaskFailedException) + new_thing = outer_ex.task.exception + return recursively_unwrap_ex(new_thing) + end +end + +Base.@kwdef struct ConfigForTestingTaskFailedException + expected_outer_ex_T + expected_inner_ex_INSTANCE +end + +function test_task_failed_exception(test_result::Test.Pass, cfg::ConfigForTestingTaskFailedException) + observed_outer_ex = extract_test_result_value(test_result) + @test observed_outer_ex isa cfg.expected_outer_ex_T + + observed_inner_ex = recursively_unwrap_ex(observed_outer_ex) + @test observed_inner_ex isa typeof(cfg.expected_inner_ex_INSTANCE) + @test observed_inner_ex == cfg.expected_inner_ex_INSTANCE + + return nothing +end