Adding support for isGenerator tasks

- Changing how DAG is represented, both in code and how DAGs are defined
  in JSON.
- Removing std::vector<Task> representation in favour of a map that will
  enforce unique task names
- Task names now have a name (generated), and a definedName.
- Adding support to loggers to add tasks after a DAGRun has been
  initialized.
This commit is contained in:
Ian Roddis
2021-08-30 22:05:37 -03:00
parent dd6159dda8
commit 2c00001e0b
22 changed files with 672 additions and 396 deletions

View File

@@ -62,20 +62,20 @@ TEST_CASE("DAG Runner", "[utilities_dag_runner]") {
SECTION("Simple execution") {
std::string prefix = "asdlk_";
std::string taskJSON = R"([{"name": "A", "command": ["/usr/bin/touch", ")"
+ prefix + R"(A"], "children": ["C"]}, {"name": "B", "command": ["/usr/bin/touch", ")"
+ prefix + R"(B"], "children": ["C"]}, {"name": "C", "command": ["/usr/bin/touch", ")"
+ prefix + R"(C"]}])";
std::string taskJSON = R"({"A": {"command": ["/usr/bin/touch", ")"
+ prefix + R"(A"], "children": ["C"]}, "B": {"command": ["/usr/bin/touch", ")"
+ prefix + R"(B"], "children": ["C"]}, "C": {"command": ["/usr/bin/touch", ")"
+ prefix + R"(C"]}})";
auto tasks = daggy::tasksFromJSON(taskJSON);
auto dag = daggy::buildDAGFromTasks(tasks);
auto runID = logger.startDAGRun("test_run", tasks);
auto endDAG = daggy::runDAG(runID, tasks, ex, logger, dag);
auto endDAG = daggy::runDAG(runID, ex, logger, dag);
REQUIRE(endDAG.allVisited());
std::vector<std::string> letters{"A", "B", "C"};
for (const auto &letter : letters) {
for (const auto &letter: letters) {
fs::path file{prefix + letter};
REQUIRE(fs::exists(file));
fs::remove(file);
@@ -83,7 +83,7 @@ TEST_CASE("DAG Runner", "[utilities_dag_runner]") {
// Get the DAG Run Attempts
auto record = logger.getDAGRun(runID);
for (const auto &attempts : record.taskAttempts) {
for (const auto &[_, attempts]: record.taskAttempts) {
REQUIRE(attempts.size() == 1);
REQUIRE(attempts.front().rc == 0);
}
@@ -93,45 +93,80 @@ TEST_CASE("DAG Runner", "[utilities_dag_runner]") {
auto cleanup = []() {
// Cleanup
std::vector<fs::path> paths{"rec_error_A", "noexist"};
for (const auto &pth : paths) {
for (const auto &pth: paths) {
if (fs::exists(pth)) fs::remove_all(pth);
}
};
cleanup();
// daggy::loggers::dag_run::OStreamLogger logger(std::cout);
std::string goodPrefix = "rec_error_";
std::string badPrefix = "noexist/rec_error_";
std::string taskJSON = R"([{"name": "A", "command": ["/usr/bin/touch", ")"
std::string taskJSON = R"({"A": {"command": ["/usr/bin/touch", ")"
+ goodPrefix +
R"(A"], "children": ["C"]}, {"name": "B", "command": ["/usr/bin/touch", ")"
+ badPrefix + R"(B"], "children": ["C"]}, {"name": "C", "command": ["/usr/bin/touch", ")"
+ badPrefix + R"(C"]}])";
R"(A"], "children": ["C"]}, "B": {"command": ["/usr/bin/touch", ")"
+ badPrefix + R"(B"], "children": ["C"]}, "C": {"command": ["/usr/bin/touch", ")"
+ badPrefix + R"(C"]}})";
auto tasks = daggy::tasksFromJSON(taskJSON);
auto dag = daggy::buildDAGFromTasks(tasks);
auto runID = logger.startDAGRun("test_run", tasks);
auto tryDAG = daggy::runDAG(runID, tasks, ex, logger, dag);
auto tryDAG = daggy::runDAG(runID, ex, logger, dag);
REQUIRE(!tryDAG.allVisited());
// Create the missing dir, then continue to run the DAG
fs::create_directory("noexist");
tryDAG.resetRunning();
auto endDAG = daggy::runDAG(runID, tasks, ex, logger, tryDAG);
auto endDAG = daggy::runDAG(runID, ex, logger, tryDAG);
REQUIRE(endDAG.allVisited());
// Get the DAG Run Attempts
auto record = logger.getDAGRun(runID);
REQUIRE(record.taskAttempts[0].size() == 1); // A ran fine
REQUIRE(record.taskAttempts[1].size() == 2); // B errored and had to be retried
REQUIRE(record.taskAttempts[2].size() == 1); // C wasn't run because B errored
REQUIRE(record.taskAttempts["A"].size() == 1); // A ran fine
REQUIRE(record.taskAttempts["B"].size() == 2); // B errored and had to be retried
REQUIRE(record.taskAttempts["C"].size() == 1); // C wasn't run because B errored
cleanup();
}
SECTION("Generator tasks") {
std::string testParams{R"({"DATE": ["2021-05-06", "2021-05-07" ]})"};
auto params = daggy::parametersFromJSON(testParams);
std::string generatorOutput = R"({"B": {"command": ["/usr/bin/echo", "{{DATE}}"], "children": ["C"]}})";
std::stringstream jsonTasks;
jsonTasks << R"({ "A": { "command": [ "/usr/bin/echo", )" << std::quoted(generatorOutput)
<< R"(], "children": ["C"], "isGenerator": true},)"
<< R"("C": { "command": [ "/usr/bin/echo", "hello!"] } })";
auto tasks = daggy::tasksFromJSON(jsonTasks.str());
auto dag = daggy::buildDAGFromTasks(tasks);
REQUIRE(dag.size() == 2);
auto runID = logger.startDAGRun("generator_run", tasks);
auto finalDAG = daggy::runDAG(runID, ex, logger, dag, params);
REQUIRE(finalDAG.size() == 4);
// Check the logger
auto record = logger.getDAGRun(runID);
REQUIRE(record.tasks.size() == 4);
REQUIRE(record.taskRunStates.size() == 4);
for (const auto & [taskName, attempts] : record.taskAttempts) {
REQUIRE(attempts.size() == 1);
REQUIRE(attempts.back().rc == 0);
}
// Ensure that children were updated properly
REQUIRE(record.tasks["A"].children == std::unordered_set<std::string>{"B_0", "B_1", "C"});
REQUIRE(record.tasks["B_0"].children == std::unordered_set<std::string>{"C"});
REQUIRE(record.tasks["B_1"].children == std::unordered_set<std::string>{"C"});
REQUIRE(record.tasks["C"].children.empty());
}
}