Squashed commit of the following:
commit b06b11cbb5d09c6d091551e39767cd3316f88376 Author: Ian Roddis <gitlab@ie2r.com> Date: Tue Oct 5 11:57:37 2021 -0300 Fixing failing unit test commit fe2a43a19b2a16a9aedd9e95e71e672935ecaeb1 Author: Ian Roddis <gitlab@ie2r.com> Date: Tue Oct 5 11:54:01 2021 -0300 Adding endpoints and updating documentation commit 46e0deeefb8b06291ae5e2d6b8ec4749c5b0ea6f Author: Ian Roddis <gitlab@ie2r.com> Date: Tue Oct 5 11:49:43 2021 -0300 Completing unit tests and relevant fixes commit e0569f370624844feee6aae4708bfe683f4156cf Author: Ian Roddis <gitlab@ie2r.com> Date: Mon Oct 4 17:30:59 2021 -0300 Adding in gcc tsan for debug builds to help with race conditions, fixing many of those, and fixing really crummy assumption about how futures worked that will speed up task execution by a ton. commit c748a4f592e1ada5546908be5281d04f4749539d Author: Ian Roddis <gitlab@ie2r.com> Date: Mon Oct 4 10:14:43 2021 -0300 Checkpointing work that seems to have resolved the race condition commit 7a79f2943e0d50545d976a28b4b379340a90dded Author: Ian Roddis <gitlab@ie2r.com> Date: Wed Sep 29 09:27:07 2021 -0300 Completing the rough-in for DAG killing / pausing / resuming commit 4cf8d81d5f6fcf4a7dd83d8fca3e23f153aa8acb Author: Ian Roddis <gitlab@ie2r.com> Date: Tue Sep 28 14:53:50 2021 -0300 Adding dagrunner unit tests, adding a resetRunning method to resume commit 54e2c1f9f5e7d5b339d71be024e0e390c4d2bf61 Author: Ian Roddis <gitlab@ie2r.com> Date: Tue Sep 28 14:45:57 2021 -0300 Refactoring runDAG into DAGRunner commit 682be7a11e2fae850e1bc3e207628d2335768c2b Author: Ian Roddis <gitlab@ie2r.com> Date: Tue Sep 28 14:34:43 2021 -0300 Adding DAGRunner class to replace Utilities::runDAG, making Slurm cancellation rc agree with SIGKILL commit 4171b3a6998791abfc71b04f8de1ae93c4f90a78 Author: Ian Roddis <gitlab@ie2r.com> Date: Tue Sep 28 14:14:17 2021 -0300 Adding unit tests for stopping jobs to slurm commit dc0b1ff26a5d98471164132d35bb8a552cc75ff8 Author: Ian Roddis <gitlab@ie2r.com> Date: Tue Sep 28 14:04:15 2021 -0300 Adding in stop method for task executors commit e752b44f55113be54392bcbb5c3d6f251d673cfa Author: Ian Roddis <gitlab@ie2r.com> Date: Tue Sep 28 12:32:06 2021 -0300 Adding additional tests for loggers commit f0773d5a84a422738fc17c9277a2b735a21a3d04 Author: Ian Roddis <gitlab@ie2r.com> Date: Tue Sep 28 12:29:21 2021 -0300 Unit tests pass commit 993ff2810de2d53dc6a59ab53d620fecf152d4a0 Author: Ian Roddis <gitlab@ie2r.com> Date: Tue Sep 28 12:24:34 2021 -0300 Adding handling for new routes, still need to add tests for new routes commit 676623b14e45759872a2dbcbc98f6a744e022a71 Author: Ian Roddis <gitlab@ie2r.com> Date: Tue Sep 28 12:12:43 2021 -0300 Adding handling for new routes, still need to add tests for new routes commit b9edb6ba291eb064f4c459a308ea6912fba9fa02 Author: Ian Roddis <gitlab@ie2r.com> Date: Mon Sep 27 11:59:14 2021 -0300 Defining new endpoints, fixing dag resumption code, adding PAUSED state, refactoring DAGSpec and adding deserializer
This commit is contained in:
@@ -2,6 +2,7 @@ project(tests)
|
||||
add_executable(tests main.cpp
|
||||
# unit tests
|
||||
unit_dag.cpp
|
||||
unit_dagrunner.cpp
|
||||
unit_dagrun_loggers.cpp
|
||||
unit_executor_forkingexecutor.cpp
|
||||
unit_executor_slurmexecutor.cpp
|
||||
@@ -14,4 +15,4 @@ add_executable(tests main.cpp
|
||||
# Performance checks
|
||||
perf_dag.cpp
|
||||
)
|
||||
target_link_libraries(tests libdaggy stdc++fs Catch2::Catch2)
|
||||
target_link_libraries(tests libdaggy stdc++fs Catch2::Catch2 curl)
|
||||
|
||||
@@ -2,11 +2,10 @@
|
||||
#include <filesystem>
|
||||
#include <fstream>
|
||||
#include <iostream>
|
||||
#include <sstream>
|
||||
|
||||
#include "daggy/loggers/dag_run/OStreamLogger.hpp"
|
||||
|
||||
namespace fs = std::filesystem;
|
||||
|
||||
using namespace daggy;
|
||||
using namespace daggy::loggers::dag_run;
|
||||
|
||||
@@ -20,28 +19,68 @@ const TaskSet SAMPLE_TASKS{
|
||||
{"work_c",
|
||||
Task{.job{{"command", std::vector<std::string>{"/bin/echo", "c"}}}}}};
|
||||
|
||||
inline DAGRunID testDAGRunInit(DAGRunLogger &logger, const std::string &name,
|
||||
inline DAGRunID testDAGRunInit(DAGRunLogger &logger, const std::string &tag,
|
||||
const TaskSet &tasks)
|
||||
{
|
||||
auto runID = logger.startDAGRun(name, tasks);
|
||||
auto dagRun = logger.getDAGRun(runID);
|
||||
auto runID = logger.startDAGRun(DAGSpec{.tag = tag, .tasks = tasks});
|
||||
|
||||
REQUIRE(dagRun.tasks == tasks);
|
||||
// Verify run shows up in the list
|
||||
{
|
||||
auto runs = logger.queryDAGRuns();
|
||||
REQUIRE(!runs.empty());
|
||||
auto it = std::find_if(runs.begin(), runs.end(),
|
||||
[runID](const auto &r) { return r.runID == runID; });
|
||||
REQUIRE(it != runs.end());
|
||||
REQUIRE(it->tag == tag);
|
||||
REQUIRE(it->runState == +RunState::QUEUED);
|
||||
}
|
||||
|
||||
REQUIRE(dagRun.taskRunStates.size() == tasks.size());
|
||||
auto nonQueuedTask =
|
||||
std::find_if(dagRun.taskRunStates.begin(), dagRun.taskRunStates.end(),
|
||||
[](const auto &a) { return a.second != +RunState::QUEUED; });
|
||||
REQUIRE(nonQueuedTask == dagRun.taskRunStates.end());
|
||||
// Verify states
|
||||
{
|
||||
REQUIRE(logger.getDAGRunState(runID) == +RunState::QUEUED);
|
||||
for (const auto &[k, _] : tasks) {
|
||||
REQUIRE(logger.getTaskState(runID, k) == +RunState::QUEUED);
|
||||
}
|
||||
}
|
||||
|
||||
// Verify integrity of run
|
||||
{
|
||||
auto dagRun = logger.getDAGRun(runID);
|
||||
|
||||
REQUIRE(dagRun.dagSpec.tag == tag);
|
||||
REQUIRE(dagRun.dagSpec.tasks == tasks);
|
||||
|
||||
REQUIRE(dagRun.taskRunStates.size() == tasks.size());
|
||||
auto nonQueuedTask = std::find_if(
|
||||
dagRun.taskRunStates.begin(), dagRun.taskRunStates.end(),
|
||||
[](const auto &a) { return a.second != +RunState::QUEUED; });
|
||||
REQUIRE(nonQueuedTask == dagRun.taskRunStates.end());
|
||||
REQUIRE(dagRun.dagStateChanges.size() == 1);
|
||||
REQUIRE(dagRun.dagStateChanges.back().newState == +RunState::QUEUED);
|
||||
}
|
||||
|
||||
// Update DAG state and ensure that it's updated;
|
||||
{
|
||||
logger.updateDAGRunState(runID, RunState::RUNNING);
|
||||
auto dagRun = logger.getDAGRun(runID);
|
||||
REQUIRE(dagRun.dagStateChanges.back().newState == +RunState::RUNNING);
|
||||
}
|
||||
|
||||
// Update a task state
|
||||
{
|
||||
for (const auto &[k, v] : tasks)
|
||||
logger.updateTaskState(runID, k, RunState::RUNNING);
|
||||
auto dagRun = logger.getDAGRun(runID);
|
||||
for (const auto &[k, v] : tasks) {
|
||||
REQUIRE(dagRun.taskRunStates.at(k) == +RunState::RUNNING);
|
||||
}
|
||||
}
|
||||
|
||||
REQUIRE(dagRun.dagStateChanges.size() == 1);
|
||||
REQUIRE(dagRun.dagStateChanges.back().newState == +RunState::QUEUED);
|
||||
return runID;
|
||||
}
|
||||
|
||||
TEST_CASE("ostream_logger", "[ostream_logger]")
|
||||
{
|
||||
// cleanup();
|
||||
std::stringstream ss;
|
||||
daggy::loggers::dag_run::OStreamLogger logger(ss);
|
||||
|
||||
@@ -49,6 +88,4 @@ TEST_CASE("ostream_logger", "[ostream_logger]")
|
||||
{
|
||||
testDAGRunInit(logger, "init_test", SAMPLE_TASKS);
|
||||
}
|
||||
|
||||
// cleanup();
|
||||
}
|
||||
|
||||
256
tests/unit_dagrunner.cpp
Normal file
256
tests/unit_dagrunner.cpp
Normal file
@@ -0,0 +1,256 @@
|
||||
#include <catch2/catch.hpp>
|
||||
#include <filesystem>
|
||||
#include <fstream>
|
||||
|
||||
#include "daggy/DAGRunner.hpp"
|
||||
#include "daggy/executors/task/ForkingTaskExecutor.hpp"
|
||||
#include "daggy/executors/task/NoopTaskExecutor.hpp"
|
||||
#include "daggy/loggers/dag_run/OStreamLogger.hpp"
|
||||
|
||||
namespace fs = std::filesystem;
|
||||
|
||||
TEST_CASE("dagrunner", "[dagrunner_order_preservation]")
|
||||
{
|
||||
daggy::executors::task::NoopTaskExecutor ex;
|
||||
std::stringstream ss;
|
||||
daggy::loggers::dag_run::OStreamLogger logger(ss);
|
||||
|
||||
daggy::TimePoint globalStartTime = daggy::Clock::now();
|
||||
|
||||
daggy::DAGSpec dagSpec;
|
||||
|
||||
std::string testParams{
|
||||
R"({"DATE": ["2021-05-06", "2021-05-07", "2021-05-08", "2021-05-09" ]})"};
|
||||
dagSpec.taskConfig.variables = daggy::configFromJSON(testParams);
|
||||
|
||||
std::string taskJSON = R"({
|
||||
"A": {"job": {"command": ["/usr/bin/touch", "{{DATE}}"]}, "children": [ "B","D" ]},
|
||||
"B": {"job": {"command": ["/usr/bin/touch", "{{DATE}}"]}, "children": [ "C","D","E" ]},
|
||||
"C": {"job": {"command": ["/usr/bin/touch", "{{DATE}}"]}, "children": [ "D"]},
|
||||
"D": {"job": {"command": ["/usr/bin/touch", "{{DATE}}"]}, "children": [ "E"]},
|
||||
"E": {"job": {"command": ["/usr/bin/touch", "{{DATE}}"]}}
|
||||
})";
|
||||
|
||||
dagSpec.tasks = expandTaskSet(daggy::tasksFromJSON(taskJSON), ex,
|
||||
dagSpec.taskConfig.variables);
|
||||
|
||||
REQUIRE(dagSpec.tasks.size() == 20);
|
||||
|
||||
auto dag = daggy::buildDAGFromTasks(dagSpec.tasks);
|
||||
auto runID = logger.startDAGRun(dagSpec);
|
||||
|
||||
daggy::DAGRunner runner(runID, ex, logger, dag, dagSpec.taskConfig);
|
||||
|
||||
auto endDAG = runner.run();
|
||||
|
||||
REQUIRE(endDAG.allVisited());
|
||||
|
||||
// Ensure the run order
|
||||
auto rec = logger.getDAGRun(runID);
|
||||
|
||||
daggy::TimePoint globalStopTime = daggy::Clock::now();
|
||||
std::array<daggy::TimePoint, 5> minTimes;
|
||||
minTimes.fill(globalStartTime);
|
||||
std::array<daggy::TimePoint, 5> maxTimes;
|
||||
maxTimes.fill(globalStopTime);
|
||||
|
||||
for (const auto &[k, v] : rec.taskAttempts) {
|
||||
size_t idx = k[0] - 65;
|
||||
auto &startTime = minTimes[idx];
|
||||
auto &stopTime = maxTimes[idx];
|
||||
startTime = std::max(startTime, v.front().startTime);
|
||||
stopTime = std::min(stopTime, v.back().stopTime);
|
||||
}
|
||||
|
||||
for (size_t i = 0; i < 5; ++i) {
|
||||
for (size_t j = i + 1; j < 4; ++j) {
|
||||
REQUIRE(maxTimes[i] < minTimes[j]);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
TEST_CASE("DAGRunner simple execution", "[dagrunner_simple]")
|
||||
{
|
||||
daggy::executors::task::ForkingTaskExecutor ex(10);
|
||||
std::stringstream ss;
|
||||
daggy::loggers::dag_run::OStreamLogger logger(ss);
|
||||
|
||||
daggy::DAGSpec dagSpec;
|
||||
|
||||
SECTION("Simple execution")
|
||||
{
|
||||
std::string prefix = (fs::current_path() / "asdlk").string();
|
||||
std::unordered_map<std::string, std::string> files{
|
||||
{"A", prefix + "_A"}, {"B", prefix + "_B"}, {"C", prefix + "_C"}};
|
||||
std::string taskJSON =
|
||||
R"({"A": {"job": {"command": ["/usr/bin/touch", ")" + files.at("A") +
|
||||
R"("]}, "children": ["C"]}, "B": {"job": {"command": ["/usr/bin/touch", ")" +
|
||||
files.at("B") +
|
||||
R"("]}, "children": ["C"]}, "C": {"job": {"command": ["/usr/bin/touch", ")" +
|
||||
files.at("C") + R"("]}}})";
|
||||
dagSpec.tasks = expandTaskSet(daggy::tasksFromJSON(taskJSON), ex);
|
||||
auto dag = daggy::buildDAGFromTasks(dagSpec.tasks);
|
||||
auto runID = logger.startDAGRun(dagSpec);
|
||||
daggy::DAGRunner runner(runID, ex, logger, dag, dagSpec.taskConfig);
|
||||
auto endDAG = runner.run();
|
||||
REQUIRE(endDAG.allVisited());
|
||||
|
||||
for (const auto &[_, file] : files) {
|
||||
REQUIRE(fs::exists(file));
|
||||
fs::remove(file);
|
||||
}
|
||||
|
||||
// Get the DAG Run Attempts
|
||||
auto record = logger.getDAGRun(runID);
|
||||
for (const auto &[_, attempts] : record.taskAttempts) {
|
||||
REQUIRE(attempts.size() == 1);
|
||||
REQUIRE(attempts.front().rc == 0);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
TEST_CASE("DAG Runner Restart old DAG", "[dagrunner_restart]")
|
||||
{
|
||||
daggy::executors::task::ForkingTaskExecutor ex(10);
|
||||
std::stringstream ss;
|
||||
daggy::loggers::dag_run::OStreamLogger logger(ss);
|
||||
daggy::DAGSpec dagSpec;
|
||||
|
||||
SECTION("Recovery from Error")
|
||||
{
|
||||
auto cleanup = []() {
|
||||
// Cleanup
|
||||
std::vector<fs::path> paths{"rec_error_A", "noexist"};
|
||||
for (const auto &pth : paths) {
|
||||
if (fs::exists(pth))
|
||||
fs::remove_all(pth);
|
||||
}
|
||||
};
|
||||
|
||||
cleanup();
|
||||
|
||||
std::string goodPrefix = "rec_error_";
|
||||
std::string badPrefix = "noexist/rec_error_";
|
||||
std::string taskJSON =
|
||||
R"({"A": {"job": {"command": ["/usr/bin/touch", ")" + goodPrefix +
|
||||
R"(A"]}, "children": ["C"]}, "B": {"job": {"command": ["/usr/bin/touch", ")" +
|
||||
badPrefix +
|
||||
R"(B"]}, "children": ["C"]}, "C": {"job": {"command": ["/usr/bin/touch", ")" +
|
||||
badPrefix + R"(C"]}}})";
|
||||
dagSpec.tasks = expandTaskSet(daggy::tasksFromJSON(taskJSON), ex);
|
||||
auto dag = daggy::buildDAGFromTasks(dagSpec.tasks);
|
||||
|
||||
auto runID = logger.startDAGRun(dagSpec);
|
||||
|
||||
daggy::DAGRunner runner(runID, ex, logger, dag, dagSpec.taskConfig);
|
||||
auto tryDAG = runner.run();
|
||||
|
||||
REQUIRE(!tryDAG.allVisited());
|
||||
|
||||
// Create the missing dir, then continue to run the DAG
|
||||
fs::create_directory("noexist");
|
||||
runner.resetRunning();
|
||||
auto endDAG = runner.run();
|
||||
|
||||
REQUIRE(endDAG.allVisited());
|
||||
|
||||
// Get the DAG Run Attempts
|
||||
auto record = logger.getDAGRun(runID);
|
||||
REQUIRE(record.taskAttempts["A_0"].size() == 1); // A ran fine
|
||||
REQUIRE(record.taskAttempts["B_0"].size() ==
|
||||
2); // B errored and had to be retried
|
||||
REQUIRE(record.taskAttempts["C_0"].size() ==
|
||||
1); // C wasn't run because B errored
|
||||
|
||||
cleanup();
|
||||
}
|
||||
}
|
||||
|
||||
TEST_CASE("DAG Runner Generator Tasks", "[dagrunner_generator]")
|
||||
{
|
||||
daggy::executors::task::ForkingTaskExecutor ex(10);
|
||||
std::stringstream ss;
|
||||
daggy::loggers::dag_run::OStreamLogger logger(ss);
|
||||
daggy::DAGSpec dagSpec;
|
||||
|
||||
SECTION("Generator tasks")
|
||||
{
|
||||
std::string testParams{R"({"DATE": ["2021-05-06", "2021-05-07" ]})"};
|
||||
dagSpec.taskConfig.variables = daggy::configFromJSON(testParams);
|
||||
|
||||
std::string generatorOutput =
|
||||
R"({"B": {"job": {"command": ["/usr/bin/echo", "-e", "{{DATE}}"]}, "children": ["C"]}})";
|
||||
fs::path ofn = fs::current_path() / "generator_test_output.json";
|
||||
std::ofstream ofh{ofn};
|
||||
ofh << generatorOutput << std::endl;
|
||||
ofh.close();
|
||||
|
||||
daggy::TimePoint globalStartTime = daggy::Clock::now();
|
||||
std::stringstream jsonTasks;
|
||||
jsonTasks
|
||||
<< R"({ "A": { "job": {"command": [ "/usr/bin/cat", )"
|
||||
<< std::quoted(ofn.string())
|
||||
<< R"(]}, "children": ["C"], "isGenerator": true},)"
|
||||
<< R"("C": { "job": {"command": [ "/usr/bin/echo", "hello!"]} } })";
|
||||
|
||||
dagSpec.tasks = daggy::tasksFromJSON(jsonTasks.str());
|
||||
REQUIRE(dagSpec.tasks.size() == 2);
|
||||
REQUIRE(dagSpec.tasks["A"].children ==
|
||||
std::unordered_set<std::string>{"C"});
|
||||
dagSpec.tasks =
|
||||
daggy::expandTaskSet(dagSpec.tasks, ex, dagSpec.taskConfig.variables);
|
||||
REQUIRE(dagSpec.tasks.size() == 2);
|
||||
REQUIRE(dagSpec.tasks["A_0"].children ==
|
||||
std::unordered_set<std::string>{"C"});
|
||||
auto dag = daggy::buildDAGFromTasks(dagSpec.tasks);
|
||||
REQUIRE(dag.size() == 2);
|
||||
|
||||
auto runID = logger.startDAGRun(dagSpec);
|
||||
daggy::DAGRunner runner(runID, ex, logger, dag, dagSpec.taskConfig);
|
||||
auto finalDAG = runner.run();
|
||||
|
||||
REQUIRE(finalDAG.allVisited());
|
||||
REQUIRE(finalDAG.size() == 4);
|
||||
|
||||
// Check the logger
|
||||
auto record = logger.getDAGRun(runID);
|
||||
|
||||
REQUIRE(record.dagSpec.tasks.size() == 4);
|
||||
REQUIRE(record.taskRunStates.size() == 4);
|
||||
for (const auto &[taskName, attempts] : record.taskAttempts) {
|
||||
REQUIRE(attempts.size() == 1);
|
||||
REQUIRE(attempts.back().rc == 0);
|
||||
}
|
||||
|
||||
// Ensure that children were updated properly
|
||||
REQUIRE(record.dagSpec.tasks["A_0"].children ==
|
||||
std::unordered_set<std::string>{"B_0", "B_1", "C"});
|
||||
REQUIRE(record.dagSpec.tasks["B_0"].children ==
|
||||
std::unordered_set<std::string>{"C"});
|
||||
REQUIRE(record.dagSpec.tasks["B_1"].children ==
|
||||
std::unordered_set<std::string>{"C"});
|
||||
REQUIRE(record.dagSpec.tasks["C_0"].children.empty());
|
||||
|
||||
// Ensure they were run in the right order
|
||||
// All A's get run before B's, which run before C's
|
||||
daggy::TimePoint globalStopTime = daggy::Clock::now();
|
||||
std::array<daggy::TimePoint, 3> minTimes;
|
||||
minTimes.fill(globalStartTime);
|
||||
std::array<daggy::TimePoint, 3> maxTimes;
|
||||
maxTimes.fill(globalStopTime);
|
||||
|
||||
for (const auto &[k, v] : record.taskAttempts) {
|
||||
size_t idx = k[0] - 65;
|
||||
auto &startTime = minTimes[idx];
|
||||
auto &stopTime = maxTimes[idx];
|
||||
startTime = std::max(startTime, v.front().startTime);
|
||||
stopTime = std::min(stopTime, v.back().stopTime);
|
||||
}
|
||||
|
||||
for (size_t i = 0; i < 3; ++i) {
|
||||
for (size_t j = i + 1; j < 2; ++j) {
|
||||
REQUIRE(maxTimes[i] < minTimes[j]);
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
@@ -1,6 +1,7 @@
|
||||
#include <catch2/catch.hpp>
|
||||
#include <filesystem>
|
||||
#include <iostream>
|
||||
#include <thread>
|
||||
|
||||
#include "daggy/Serialization.hpp"
|
||||
#include "daggy/Utilities.hpp"
|
||||
@@ -18,7 +19,7 @@ TEST_CASE("forking_executor", "[forking_executor]")
|
||||
|
||||
REQUIRE(ex.validateTaskParameters(task.job));
|
||||
|
||||
auto recFuture = ex.execute("command", task);
|
||||
auto recFuture = ex.execute(0, "command", task);
|
||||
auto rec = recFuture.get();
|
||||
|
||||
REQUIRE(rec.rc == 0);
|
||||
@@ -32,7 +33,7 @@ TEST_CASE("forking_executor", "[forking_executor]")
|
||||
.job{{"command", daggy::executors::task::ForkingTaskExecutor::Command{
|
||||
"/usr/bin/expr", "1", "+", "+"}}}};
|
||||
|
||||
auto recFuture = ex.execute("command", task);
|
||||
auto recFuture = ex.execute(0, "command", task);
|
||||
auto rec = recFuture.get();
|
||||
|
||||
REQUIRE(rec.rc == 2);
|
||||
@@ -40,6 +41,28 @@ TEST_CASE("forking_executor", "[forking_executor]")
|
||||
REQUIRE(rec.outputLog.empty());
|
||||
}
|
||||
|
||||
SECTION("Killing a long task")
|
||||
{
|
||||
daggy::Task task{
|
||||
.job{{"command", daggy::executors::task::ForkingTaskExecutor::Command{
|
||||
"/usr/bin/sleep", "30"}}}};
|
||||
|
||||
auto start = daggy::Clock::now();
|
||||
auto recFuture = ex.execute(0, "command", task);
|
||||
std::this_thread::sleep_for(1s);
|
||||
ex.stop(0, "command");
|
||||
auto rec = recFuture.get();
|
||||
auto stop = daggy::Clock::now();
|
||||
|
||||
REQUIRE(rec.rc == 9);
|
||||
REQUIRE(rec.errorLog.empty());
|
||||
REQUIRE(rec.outputLog.empty());
|
||||
REQUIRE(rec.executorLog == "Killed");
|
||||
REQUIRE(
|
||||
std::chrono::duration_cast<std::chrono::seconds>(stop - start).count() <
|
||||
20);
|
||||
}
|
||||
|
||||
SECTION("Large Output")
|
||||
{
|
||||
const std::vector<std::string> BIG_FILES{"/usr/share/dict/linux.words",
|
||||
@@ -54,7 +77,7 @@ TEST_CASE("forking_executor", "[forking_executor]")
|
||||
.job{{"command", daggy::executors::task::ForkingTaskExecutor::Command{
|
||||
"/usr/bin/cat", bigFile}}}};
|
||||
|
||||
auto recFuture = ex.execute("command", task);
|
||||
auto recFuture = ex.execute(0, "command", task);
|
||||
auto rec = recFuture.get();
|
||||
|
||||
REQUIRE(rec.rc == 0);
|
||||
|
||||
@@ -34,7 +34,7 @@ TEST_CASE("slurm_execution", "[slurm_executor]")
|
||||
|
||||
REQUIRE(ex.validateTaskParameters(task.job));
|
||||
|
||||
auto recFuture = ex.execute("command", task);
|
||||
auto recFuture = ex.execute(0, "command", task);
|
||||
auto rec = recFuture.get();
|
||||
|
||||
REQUIRE(rec.rc == 0);
|
||||
@@ -49,7 +49,7 @@ TEST_CASE("slurm_execution", "[slurm_executor]")
|
||||
"/usr/bin/expr", "1", "+", "+"}}}};
|
||||
task.job.merge(defaultJobValues);
|
||||
|
||||
auto recFuture = ex.execute("command", task);
|
||||
auto recFuture = ex.execute(0, "command", task);
|
||||
auto rec = recFuture.get();
|
||||
|
||||
REQUIRE(rec.rc != 0);
|
||||
@@ -57,6 +57,23 @@ TEST_CASE("slurm_execution", "[slurm_executor]")
|
||||
REQUIRE(rec.outputLog.empty());
|
||||
}
|
||||
|
||||
SECTION("Killing a long task")
|
||||
{
|
||||
daggy::Task task{
|
||||
.job{{"command", daggy::executors::task::SlurmTaskExecutor::Command{
|
||||
"/usr/bin/sleep", "30"}}}};
|
||||
task.job.merge(defaultJobValues);
|
||||
|
||||
auto recFuture = ex.execute(0, "command", task);
|
||||
ex.stop(0, "command");
|
||||
auto rec = recFuture.get();
|
||||
|
||||
REQUIRE(rec.rc == 9);
|
||||
REQUIRE(rec.errorLog.empty());
|
||||
REQUIRE(rec.outputLog.empty());
|
||||
REQUIRE(rec.executorLog == "Job cancelled by user.\n");
|
||||
}
|
||||
|
||||
SECTION("Large Output")
|
||||
{
|
||||
const std::vector<std::string> BIG_FILES{"/usr/share/dict/linux.words",
|
||||
@@ -72,7 +89,7 @@ TEST_CASE("slurm_execution", "[slurm_executor]")
|
||||
"/usr/bin/cat", bigFile}}}};
|
||||
task.job.merge(defaultJobValues);
|
||||
|
||||
auto recFuture = ex.execute("command", task);
|
||||
auto recFuture = ex.execute(0, "command", task);
|
||||
auto rec = recFuture.get();
|
||||
|
||||
REQUIRE(rec.rc == 0);
|
||||
|
||||
@@ -1,51 +1,131 @@
|
||||
#include <curl/curl.h>
|
||||
#include <pistache/client.h>
|
||||
#include <rapidjson/document.h>
|
||||
#include <sys/stat.h>
|
||||
|
||||
#include <catch2/catch.hpp>
|
||||
#include <daggy/Serialization.hpp>
|
||||
#include <daggy/Server.hpp>
|
||||
#include <daggy/executors/task/ForkingTaskExecutor.hpp>
|
||||
#include <daggy/executors/task/NoopTaskExecutor.hpp>
|
||||
#include <daggy/loggers/dag_run/OStreamLogger.hpp>
|
||||
#include <filesystem>
|
||||
#include <iostream>
|
||||
#include <thread>
|
||||
|
||||
namespace rj = rapidjson;
|
||||
|
||||
Pistache::Http::Response REQUEST(const std::string &url,
|
||||
const std::string &payload = "")
|
||||
{
|
||||
Pistache::Http::Experimental::Client client;
|
||||
client.init();
|
||||
Pistache::Http::Response response;
|
||||
auto reqSpec = (payload.empty() ? client.get(url) : client.post(url));
|
||||
reqSpec.timeout(std::chrono::seconds(2));
|
||||
if (!payload.empty()) {
|
||||
reqSpec.body(payload);
|
||||
}
|
||||
auto request = reqSpec.send();
|
||||
bool ok = false, error = false;
|
||||
std::string msg;
|
||||
request.then(
|
||||
[&](Pistache::Http::Response rsp) {
|
||||
ok = true;
|
||||
response = std::move(rsp);
|
||||
},
|
||||
[&](std::exception_ptr ptr) {
|
||||
error = true;
|
||||
try {
|
||||
std::rethrow_exception(std::move(ptr));
|
||||
}
|
||||
catch (std::exception &e) {
|
||||
msg = e.what();
|
||||
}
|
||||
});
|
||||
using namespace daggy;
|
||||
|
||||
Pistache::Async::Barrier<Pistache::Http::Response> barrier(request);
|
||||
barrier.wait_for(std::chrono::seconds(2));
|
||||
client.shutdown();
|
||||
if (error) {
|
||||
throw std::runtime_error(msg);
|
||||
#ifdef DEBUG_HTTP
|
||||
static int my_trace(CURL *handle, curl_infotype type, char *data, size_t size,
|
||||
void *userp)
|
||||
{
|
||||
const char *text;
|
||||
(void)handle; /* prevent compiler warning */
|
||||
(void)userp;
|
||||
|
||||
switch (type) {
|
||||
case CURLINFO_TEXT:
|
||||
fprintf(stderr, "== Info: %s", data);
|
||||
default: /* in case a new one is introduced to shock us */
|
||||
return 0;
|
||||
|
||||
case CURLINFO_HEADER_OUT:
|
||||
text = "=> Send header";
|
||||
break;
|
||||
case CURLINFO_DATA_OUT:
|
||||
text = "=> Send data";
|
||||
break;
|
||||
case CURLINFO_SSL_DATA_OUT:
|
||||
text = "=> Send SSL data";
|
||||
break;
|
||||
case CURLINFO_HEADER_IN:
|
||||
text = "<= Recv header";
|
||||
break;
|
||||
case CURLINFO_DATA_IN:
|
||||
text = "<= Recv data";
|
||||
break;
|
||||
case CURLINFO_SSL_DATA_IN:
|
||||
text = "<= Recv SSL data";
|
||||
break;
|
||||
}
|
||||
|
||||
std::cerr << "\n================== " << text
|
||||
<< " ==================" << std::endl
|
||||
<< data << std::endl;
|
||||
return 0;
|
||||
}
|
||||
#endif
|
||||
|
||||
enum HTTPCode : long
|
||||
{
|
||||
Ok = 200,
|
||||
Not_Found = 404
|
||||
};
|
||||
|
||||
struct HTTPResponse
|
||||
{
|
||||
HTTPCode code;
|
||||
std::string body;
|
||||
};
|
||||
|
||||
uint curlWriter(char *in, uint size, uint nmemb, std::stringstream *out)
|
||||
{
|
||||
uint r;
|
||||
r = size * nmemb;
|
||||
out->write(in, r);
|
||||
return r;
|
||||
}
|
||||
|
||||
HTTPResponse REQUEST(const std::string &url, const std::string &payload = "",
|
||||
const std::string &method = "GET")
|
||||
{
|
||||
HTTPResponse response;
|
||||
|
||||
CURL *curl;
|
||||
CURLcode res;
|
||||
struct curl_slist *headers = NULL;
|
||||
|
||||
curl_global_init(CURL_GLOBAL_ALL);
|
||||
|
||||
curl = curl_easy_init();
|
||||
if (curl) {
|
||||
std::stringstream buffer;
|
||||
|
||||
#ifdef DEBUG_HTTP
|
||||
curl_easy_setopt(curl, CURLOPT_DEBUGFUNCTION, my_trace);
|
||||
curl_easy_setopt(curl, CURLOPT_VERBOSE, 1L);
|
||||
#endif
|
||||
|
||||
curl_easy_setopt(curl, CURLOPT_URL, url.c_str());
|
||||
curl_easy_setopt(curl, CURLOPT_WRITEFUNCTION, curlWriter);
|
||||
curl_easy_setopt(curl, CURLOPT_WRITEDATA, &buffer);
|
||||
|
||||
if (!payload.empty()) {
|
||||
curl_easy_setopt(curl, CURLOPT_POSTFIELDSIZE, payload.size());
|
||||
curl_easy_setopt(curl, CURLOPT_POSTFIELDS, payload.c_str());
|
||||
headers = curl_slist_append(headers, "Content-Type: Application/Json");
|
||||
}
|
||||
curl_easy_setopt(curl, CURLOPT_CUSTOMREQUEST, method.c_str());
|
||||
headers = curl_slist_append(headers, "Expect:");
|
||||
curl_easy_setopt(curl, CURLOPT_HTTPHEADER, headers);
|
||||
|
||||
res = curl_easy_perform(curl);
|
||||
|
||||
if (res != CURLE_OK) {
|
||||
curl_easy_cleanup(curl);
|
||||
throw std::runtime_error(std::string{"CURL Failed: "} +
|
||||
curl_easy_strerror(res));
|
||||
}
|
||||
curl_easy_cleanup(curl);
|
||||
|
||||
curl_easy_getinfo(curl, CURLINFO_RESPONSE_CODE, &response.code);
|
||||
response.body = buffer.str();
|
||||
}
|
||||
|
||||
curl_global_cleanup();
|
||||
|
||||
return response;
|
||||
}
|
||||
|
||||
@@ -68,19 +148,19 @@ TEST_CASE("rest_endpoint", "[server_basic]")
|
||||
SECTION("Ready Endpoint")
|
||||
{
|
||||
auto response = REQUEST(baseURL + "/ready");
|
||||
REQUIRE(response.code() == Pistache::Http::Code::Ok);
|
||||
REQUIRE(response.code == HTTPCode::Ok);
|
||||
}
|
||||
|
||||
SECTION("Querying a non-existent dagrunid should fail ")
|
||||
{
|
||||
auto response = REQUEST(baseURL + "/v1/dagrun/100");
|
||||
REQUIRE(response.code() != Pistache::Http::Code::Ok);
|
||||
REQUIRE(response.code != HTTPCode::Ok);
|
||||
}
|
||||
|
||||
SECTION("Simple DAGRun Submission")
|
||||
{
|
||||
std::string dagRun = R"({
|
||||
"name": "unit_server",
|
||||
"tag": "unit_server",
|
||||
"parameters": { "FILE": [ "A", "B" ] },
|
||||
"tasks": {
|
||||
"touch": { "job": { "command": [ "/usr/bin/touch", "dagrun_{{FILE}}" ]} },
|
||||
@@ -90,14 +170,16 @@ TEST_CASE("rest_endpoint", "[server_basic]")
|
||||
}
|
||||
})";
|
||||
|
||||
auto dagSpec = daggy::dagFromJSON(dagRun);
|
||||
|
||||
// Submit, and get the runID
|
||||
daggy::DAGRunID runID = 0;
|
||||
{
|
||||
auto response = REQUEST(baseURL + "/v1/dagrun/", dagRun);
|
||||
REQUIRE(response.code() == Pistache::Http::Code::Ok);
|
||||
auto response = REQUEST(baseURL + "/v1/dagrun/", dagRun, "POST");
|
||||
REQUIRE(response.code == HTTPCode::Ok);
|
||||
|
||||
rj::Document doc;
|
||||
daggy::checkRJParse(doc.Parse(response.body().c_str()));
|
||||
daggy::checkRJParse(doc.Parse(response.body.c_str()));
|
||||
REQUIRE(doc.IsObject());
|
||||
REQUIRE(doc.HasMember("runID"));
|
||||
|
||||
@@ -106,11 +188,11 @@ TEST_CASE("rest_endpoint", "[server_basic]")
|
||||
|
||||
// Ensure our runID shows up in the list of running DAGs
|
||||
{
|
||||
auto response = REQUEST(baseURL + "/v1/dagrun/");
|
||||
REQUIRE(response.code() == Pistache::Http::Code::Ok);
|
||||
auto response = REQUEST(baseURL + "/v1/dagruns?all=1");
|
||||
REQUIRE(response.code == HTTPCode::Ok);
|
||||
|
||||
rj::Document doc;
|
||||
daggy::checkRJParse(doc.Parse(response.body().c_str()));
|
||||
daggy::checkRJParse(doc.Parse(response.body.c_str()));
|
||||
REQUIRE(doc.IsArray());
|
||||
REQUIRE(doc.Size() >= 1);
|
||||
|
||||
@@ -120,10 +202,10 @@ TEST_CASE("rest_endpoint", "[server_basic]")
|
||||
for (size_t i = 0; i < runs.Size(); ++i) {
|
||||
const auto &run = runs[i];
|
||||
REQUIRE(run.IsObject());
|
||||
REQUIRE(run.HasMember("name"));
|
||||
REQUIRE(run.HasMember("tag"));
|
||||
REQUIRE(run.HasMember("runID"));
|
||||
|
||||
std::string runName = run["name"].GetString();
|
||||
std::string runName = run["tag"].GetString();
|
||||
if (runName == "unit_server") {
|
||||
REQUIRE(run["runID"].GetUint64() == runID);
|
||||
found = true;
|
||||
@@ -133,13 +215,28 @@ TEST_CASE("rest_endpoint", "[server_basic]")
|
||||
REQUIRE(found);
|
||||
}
|
||||
|
||||
// Ensure we can get one of our tasks
|
||||
{
|
||||
auto response = REQUEST(baseURL + "/v1/dagrun/" + std::to_string(runID) +
|
||||
"/task/cat_0");
|
||||
REQUIRE(response.code == HTTPCode::Ok);
|
||||
|
||||
rj::Document doc;
|
||||
daggy::checkRJParse(doc.Parse(response.body.c_str()));
|
||||
|
||||
REQUIRE_NOTHROW(daggy::taskFromJSON("cat", doc));
|
||||
auto task = daggy::taskFromJSON("cat", doc);
|
||||
|
||||
REQUIRE(task == dagSpec.tasks.at("cat"));
|
||||
}
|
||||
|
||||
// Wait until our DAG is complete
|
||||
bool complete = true;
|
||||
for (auto i = 0; i < 10; ++i) {
|
||||
auto response = REQUEST(baseURL + "/v1/dagrun/" + std::to_string(runID));
|
||||
REQUIRE(response.code() == Pistache::Http::Code::Ok);
|
||||
REQUIRE(response.code == HTTPCode::Ok);
|
||||
rj::Document doc;
|
||||
daggy::checkRJParse(doc.Parse(response.body().c_str()));
|
||||
daggy::checkRJParse(doc.Parse(response.body.c_str()));
|
||||
REQUIRE(doc.IsObject());
|
||||
|
||||
REQUIRE(doc.HasMember("taskStates"));
|
||||
@@ -173,6 +270,113 @@ TEST_CASE("rest_endpoint", "[server_basic]")
|
||||
fs::remove(pth);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
TEST_CASE("Server cancels and resumes execution", "[server_resume]")
|
||||
{
|
||||
std::stringstream ss;
|
||||
daggy::executors::task::ForkingTaskExecutor executor(10);
|
||||
daggy::loggers::dag_run::OStreamLogger logger(ss);
|
||||
Pistache::Address listenSpec("localhost", Pistache::Port(0));
|
||||
|
||||
const size_t nDAGRunners = 10, nWebThreads = 10;
|
||||
|
||||
daggy::Server server(listenSpec, logger, executor, nDAGRunners);
|
||||
server.init(nWebThreads);
|
||||
server.start();
|
||||
|
||||
const std::string host = "localhost:";
|
||||
const std::string baseURL = host + std::to_string(server.getPort());
|
||||
|
||||
SECTION("Cancel / Resume DAGRun")
|
||||
{
|
||||
std::string dagRunJSON = R"({
|
||||
"tag": "unit_server",
|
||||
"tasks": {
|
||||
"touch_A": { "job": { "command": [ "/usr/bin/touch", "resume_touch_a" ]}, "children": ["touch_C"] },
|
||||
"sleep_B": { "job": { "command": [ "/usr/bin/sleep", "3" ]}, "children": ["touch_C"] },
|
||||
"touch_C": { "job": { "command": [ "/usr/bin/touch", "resume_touch_c" ]} }
|
||||
}
|
||||
})";
|
||||
|
||||
auto dagSpec = daggy::dagFromJSON(dagRunJSON);
|
||||
|
||||
// Submit, and get the runID
|
||||
daggy::DAGRunID runID;
|
||||
{
|
||||
auto response = REQUEST(baseURL + "/v1/dagrun/", dagRunJSON, "POST");
|
||||
REQUIRE(response.code == HTTPCode::Ok);
|
||||
|
||||
rj::Document doc;
|
||||
daggy::checkRJParse(doc.Parse(response.body.c_str()));
|
||||
REQUIRE(doc.IsObject());
|
||||
REQUIRE(doc.HasMember("runID"));
|
||||
|
||||
runID = doc["runID"].GetUint64();
|
||||
}
|
||||
|
||||
std::this_thread::sleep_for(1s);
|
||||
|
||||
// Stop the current run
|
||||
{
|
||||
auto response = REQUEST(
|
||||
baseURL + "/v1/dagrun/" + std::to_string(runID) + "/state/KILLED", "",
|
||||
"PATCH");
|
||||
REQUIRE(response.code == HTTPCode::Ok);
|
||||
REQUIRE(logger.getDAGRunState(runID) == +daggy::RunState::KILLED);
|
||||
}
|
||||
|
||||
// Verify that the run still exists
|
||||
{
|
||||
auto dagRun = logger.getDAGRun(runID);
|
||||
REQUIRE(dagRun.taskRunStates.at("touch_A_0") ==
|
||||
+daggy::RunState::COMPLETED);
|
||||
REQUIRE(fs::exists("resume_touch_a"));
|
||||
|
||||
REQUIRE(dagRun.taskRunStates.at("sleep_B_0") ==
|
||||
+daggy::RunState::ERRORED);
|
||||
REQUIRE(dagRun.taskRunStates.at("touch_C_0") == +daggy::RunState::QUEUED);
|
||||
}
|
||||
|
||||
// Set the errored task state
|
||||
{
|
||||
auto url = baseURL + "/v1/dagrun/" + std::to_string(runID) +
|
||||
"/task/sleep_B_0/state/QUEUED";
|
||||
auto response = REQUEST(url, "", "PATCH");
|
||||
REQUIRE(response.code == HTTPCode::Ok);
|
||||
REQUIRE(logger.getTaskState(runID, "sleep_B_0") ==
|
||||
+daggy::RunState::QUEUED);
|
||||
}
|
||||
|
||||
// Resume
|
||||
{
|
||||
struct stat s;
|
||||
|
||||
lstat("resume_touch_A", &s);
|
||||
auto preMTime = s.st_mtim.tv_sec;
|
||||
|
||||
auto response = REQUEST(
|
||||
baseURL + "/v1/dagrun/" + std::to_string(runID) + "/state/QUEUED", "",
|
||||
"PATCH");
|
||||
|
||||
// Wait for run to complete
|
||||
std::this_thread::sleep_for(5s);
|
||||
REQUIRE(logger.getDAGRunState(runID) == +daggy::RunState::COMPLETED);
|
||||
|
||||
REQUIRE(fs::exists("resume_touch_c"));
|
||||
REQUIRE(fs::exists("resume_touch_a"));
|
||||
|
||||
for (const auto &[taskName, task] : dagSpec.tasks) {
|
||||
REQUIRE(logger.getTaskState(runID, taskName + "_0") ==
|
||||
+daggy::RunState::COMPLETED);
|
||||
}
|
||||
|
||||
// Ensure "touch_A" wasn't run again
|
||||
lstat("resume_touch_A", &s);
|
||||
auto postMTime = s.st_mtim.tv_sec;
|
||||
REQUIRE(preMTime == postMTime);
|
||||
}
|
||||
}
|
||||
|
||||
server.shutdown();
|
||||
}
|
||||
|
||||
@@ -8,11 +8,6 @@
|
||||
|
||||
#include "daggy/Serialization.hpp"
|
||||
#include "daggy/Utilities.hpp"
|
||||
#include "daggy/executors/task/ForkingTaskExecutor.hpp"
|
||||
#include "daggy/executors/task/NoopTaskExecutor.hpp"
|
||||
#include "daggy/loggers/dag_run/OStreamLogger.hpp"
|
||||
|
||||
namespace fs = std::filesystem;
|
||||
|
||||
TEST_CASE("string_utilities", "[utilities_string]")
|
||||
{
|
||||
@@ -59,234 +54,3 @@ TEST_CASE("string_expansion", "[utilities_parameter_expansion]")
|
||||
REQUIRE(result.size() == 4);
|
||||
}
|
||||
}
|
||||
|
||||
TEST_CASE("dag_runner_order", "[dagrun_order]")
|
||||
{
|
||||
daggy::executors::task::NoopTaskExecutor ex;
|
||||
std::stringstream ss;
|
||||
daggy::loggers::dag_run::OStreamLogger logger(ss);
|
||||
|
||||
daggy::TimePoint globalStartTime = daggy::Clock::now();
|
||||
|
||||
std::string testParams{
|
||||
R"({"DATE": ["2021-05-06", "2021-05-07", "2021-05-08", "2021-05-09" ]})"};
|
||||
auto params = daggy::configFromJSON(testParams);
|
||||
|
||||
std::string taskJSON = R"({
|
||||
"A": {"job": {"command": ["/usr/bin/touch", "{{DATE}}"]}, "children": [ "B","D" ]},
|
||||
"B": {"job": {"command": ["/usr/bin/touch", "{{DATE}}"]}, "children": [ "C","D","E" ]},
|
||||
"C": {"job": {"command": ["/usr/bin/touch", "{{DATE}}"]}, "children": [ "D"]},
|
||||
"D": {"job": {"command": ["/usr/bin/touch", "{{DATE}}"]}, "children": [ "E"]},
|
||||
"E": {"job": {"command": ["/usr/bin/touch", "{{DATE}}"]}}
|
||||
})";
|
||||
|
||||
auto tasks = expandTaskSet(daggy::tasksFromJSON(taskJSON), ex, params);
|
||||
|
||||
REQUIRE(tasks.size() == 20);
|
||||
|
||||
auto dag = daggy::buildDAGFromTasks(tasks);
|
||||
auto runID = logger.startDAGRun("test_run", tasks);
|
||||
auto endDAG = daggy::runDAG(runID, ex, logger, dag);
|
||||
|
||||
REQUIRE(endDAG.allVisited());
|
||||
|
||||
// Ensure the run order
|
||||
auto rec = logger.getDAGRun(runID);
|
||||
|
||||
daggy::TimePoint globalStopTime = daggy::Clock::now();
|
||||
std::array<daggy::TimePoint, 5> minTimes;
|
||||
minTimes.fill(globalStartTime);
|
||||
std::array<daggy::TimePoint, 5> maxTimes;
|
||||
maxTimes.fill(globalStopTime);
|
||||
|
||||
for (const auto &[k, v] : rec.taskAttempts) {
|
||||
size_t idx = k[0] - 65;
|
||||
auto &startTime = minTimes[idx];
|
||||
auto &stopTime = maxTimes[idx];
|
||||
startTime = std::max(startTime, v.front().startTime);
|
||||
stopTime = std::min(stopTime, v.back().stopTime);
|
||||
}
|
||||
|
||||
for (size_t i = 0; i < 5; ++i) {
|
||||
for (size_t j = i + 1; j < 4; ++j) {
|
||||
REQUIRE(maxTimes[i] < minTimes[j]);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
TEST_CASE("dag_runner", "[utilities_dag_runner]")
|
||||
{
|
||||
daggy::executors::task::ForkingTaskExecutor ex(10);
|
||||
std::stringstream ss;
|
||||
daggy::loggers::dag_run::OStreamLogger logger(ss);
|
||||
|
||||
SECTION("Simple execution")
|
||||
{
|
||||
std::string prefix = (fs::current_path() / "asdlk").string();
|
||||
std::unordered_map<std::string, std::string> files{
|
||||
{"A", prefix + "_A"}, {"B", prefix + "_B"}, {"C", prefix + "_C"}};
|
||||
std::string taskJSON =
|
||||
R"({"A": {"job": {"command": ["/usr/bin/touch", ")" + files.at("A") +
|
||||
R"("]}, "children": ["C"]}, "B": {"job": {"command": ["/usr/bin/touch", ")" +
|
||||
files.at("B") +
|
||||
R"("]}, "children": ["C"]}, "C": {"job": {"command": ["/usr/bin/touch", ")" +
|
||||
files.at("C") + R"("]}}})";
|
||||
auto tasks = expandTaskSet(daggy::tasksFromJSON(taskJSON), ex);
|
||||
auto dag = daggy::buildDAGFromTasks(tasks);
|
||||
auto runID = logger.startDAGRun("test_run", tasks);
|
||||
auto endDAG = daggy::runDAG(runID, ex, logger, dag);
|
||||
|
||||
REQUIRE(endDAG.allVisited());
|
||||
|
||||
for (const auto &[_, file] : files) {
|
||||
REQUIRE(fs::exists(file));
|
||||
fs::remove(file);
|
||||
}
|
||||
|
||||
// Get the DAG Run Attempts
|
||||
auto record = logger.getDAGRun(runID);
|
||||
for (const auto &[_, attempts] : record.taskAttempts) {
|
||||
REQUIRE(attempts.size() == 1);
|
||||
REQUIRE(attempts.front().rc == 0);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
TEST_CASE("runDAG_recovery", "[runDAG]")
|
||||
{
|
||||
daggy::executors::task::ForkingTaskExecutor ex(10);
|
||||
std::stringstream ss;
|
||||
daggy::loggers::dag_run::OStreamLogger logger(ss);
|
||||
|
||||
SECTION("Recovery from Error")
|
||||
{
|
||||
auto cleanup = []() {
|
||||
// Cleanup
|
||||
std::vector<fs::path> paths{"rec_error_A", "noexist"};
|
||||
for (const auto &pth : paths) {
|
||||
if (fs::exists(pth))
|
||||
fs::remove_all(pth);
|
||||
}
|
||||
};
|
||||
|
||||
cleanup();
|
||||
|
||||
std::string goodPrefix = "rec_error_";
|
||||
std::string badPrefix = "noexist/rec_error_";
|
||||
std::string taskJSON =
|
||||
R"({"A": {"job": {"command": ["/usr/bin/touch", ")" + goodPrefix +
|
||||
R"(A"]}, "children": ["C"]}, "B": {"job": {"command": ["/usr/bin/touch", ")" +
|
||||
badPrefix +
|
||||
R"(B"]}, "children": ["C"]}, "C": {"job": {"command": ["/usr/bin/touch", ")" +
|
||||
badPrefix + R"(C"]}}})";
|
||||
auto tasks = expandTaskSet(daggy::tasksFromJSON(taskJSON), ex);
|
||||
auto dag = daggy::buildDAGFromTasks(tasks);
|
||||
|
||||
auto runID = logger.startDAGRun("test_run", tasks);
|
||||
|
||||
auto tryDAG = daggy::runDAG(runID, ex, logger, dag);
|
||||
|
||||
REQUIRE(!tryDAG.allVisited());
|
||||
|
||||
// Create the missing dir, then continue to run the DAG
|
||||
fs::create_directory("noexist");
|
||||
tryDAG.resetRunning();
|
||||
auto endDAG = daggy::runDAG(runID, ex, logger, tryDAG);
|
||||
|
||||
REQUIRE(endDAG.allVisited());
|
||||
|
||||
// Get the DAG Run Attempts
|
||||
auto record = logger.getDAGRun(runID);
|
||||
REQUIRE(record.taskAttempts["A_0"].size() == 1); // A ran fine
|
||||
REQUIRE(record.taskAttempts["B_0"].size() ==
|
||||
2); // B errored and had to be retried
|
||||
REQUIRE(record.taskAttempts["C_0"].size() ==
|
||||
1); // C wasn't run because B errored
|
||||
|
||||
cleanup();
|
||||
}
|
||||
}
|
||||
|
||||
TEST_CASE("runDAG_generator", "[runDAG_generator]")
|
||||
{
|
||||
daggy::executors::task::ForkingTaskExecutor ex(10);
|
||||
std::stringstream ss;
|
||||
daggy::loggers::dag_run::OStreamLogger logger(ss);
|
||||
|
||||
SECTION("Generator tasks")
|
||||
{
|
||||
std::string testParams{R"({"DATE": ["2021-05-06", "2021-05-07" ]})"};
|
||||
auto params = daggy::configFromJSON(testParams);
|
||||
|
||||
std::string generatorOutput =
|
||||
R"({"B": {"job": {"command": ["/usr/bin/echo", "-e", "{{DATE}}"]}, "children": ["C"]}})";
|
||||
fs::path ofn = fs::current_path() / "generator_test_output.json";
|
||||
std::ofstream ofh{ofn};
|
||||
ofh << generatorOutput << std::endl;
|
||||
ofh.close();
|
||||
|
||||
daggy::TimePoint globalStartTime = daggy::Clock::now();
|
||||
std::stringstream jsonTasks;
|
||||
jsonTasks
|
||||
<< R"({ "A": { "job": {"command": [ "/usr/bin/cat", )"
|
||||
<< std::quoted(ofn.string())
|
||||
<< R"(]}, "children": ["C"], "isGenerator": true},)"
|
||||
<< R"("C": { "job": {"command": [ "/usr/bin/echo", "hello!"]} } })";
|
||||
|
||||
auto baseTasks = daggy::tasksFromJSON(jsonTasks.str());
|
||||
REQUIRE(baseTasks.size() == 2);
|
||||
REQUIRE(baseTasks["A"].children == std::unordered_set<std::string>{"C"});
|
||||
auto tasks = daggy::expandTaskSet(baseTasks, ex, params);
|
||||
REQUIRE(tasks.size() == 2);
|
||||
REQUIRE(tasks["A_0"].children == std::unordered_set<std::string>{"C"});
|
||||
auto dag = daggy::buildDAGFromTasks(tasks);
|
||||
REQUIRE(dag.size() == 2);
|
||||
|
||||
auto runID = logger.startDAGRun("generator_run", tasks);
|
||||
auto finalDAG = daggy::runDAG(runID, ex, logger, dag, params);
|
||||
|
||||
REQUIRE(finalDAG.allVisited());
|
||||
REQUIRE(finalDAG.size() == 4);
|
||||
|
||||
// Check the logger
|
||||
auto record = logger.getDAGRun(runID);
|
||||
|
||||
REQUIRE(record.tasks.size() == 4);
|
||||
REQUIRE(record.taskRunStates.size() == 4);
|
||||
for (const auto &[taskName, attempts] : record.taskAttempts) {
|
||||
REQUIRE(attempts.size() == 1);
|
||||
REQUIRE(attempts.back().rc == 0);
|
||||
}
|
||||
|
||||
// Ensure that children were updated properly
|
||||
REQUIRE(record.tasks["A_0"].children ==
|
||||
std::unordered_set<std::string>{"B_0", "B_1", "C"});
|
||||
REQUIRE(record.tasks["B_0"].children ==
|
||||
std::unordered_set<std::string>{"C"});
|
||||
REQUIRE(record.tasks["B_1"].children ==
|
||||
std::unordered_set<std::string>{"C"});
|
||||
REQUIRE(record.tasks["C_0"].children.empty());
|
||||
|
||||
// Ensure they were run in the right order
|
||||
// All A's get run before B's, which run before C's
|
||||
daggy::TimePoint globalStopTime = daggy::Clock::now();
|
||||
std::array<daggy::TimePoint, 3> minTimes;
|
||||
minTimes.fill(globalStartTime);
|
||||
std::array<daggy::TimePoint, 3> maxTimes;
|
||||
maxTimes.fill(globalStopTime);
|
||||
|
||||
for (const auto &[k, v] : record.taskAttempts) {
|
||||
size_t idx = k[0] - 65;
|
||||
auto &startTime = minTimes[idx];
|
||||
auto &stopTime = maxTimes[idx];
|
||||
startTime = std::max(startTime, v.front().startTime);
|
||||
stopTime = std::min(stopTime, v.back().stopTime);
|
||||
}
|
||||
|
||||
for (size_t i = 0; i < 3; ++i) {
|
||||
for (size_t j = i + 1; j < 2; ++j) {
|
||||
REQUIRE(maxTimes[i] < minTimes[j]);
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
Reference in New Issue
Block a user