Adding support for isGenerator tasks

- Changing how DAG is represented, both in code and how DAGs are defined
  in JSON.
- Removing std::vector<Task> representation in favour of a map that will
  enforce unique task names
- Task names now have a name (generated), and a definedName.
- Adding support to loggers to add tasks after a DAGRun has been
  initialized.
This commit is contained in:
Ian Roddis
2021-08-30 22:05:37 -03:00
parent dd6159dda8
commit 2c00001e0b
22 changed files with 672 additions and 396 deletions

View File

@@ -5,14 +5,16 @@
#include <catch2/catch.hpp>
TEST_CASE("DAG Construction Tests", "[dag]") {
daggy::DAG dag;
daggy::DAG<size_t, size_t> dag;
REQUIRE(dag.size() == 0);
REQUIRE(dag.empty());
REQUIRE_NOTHROW(dag.addVertex());
REQUIRE_NOTHROW(dag.addVertex(0, 0));
for (int i = 1; i < 10; ++i) {
dag.addVertex();
dag.addVertex(i, i);
REQUIRE(dag.hasVertex(i));
REQUIRE(dag.getVertex(i).data == i);
dag.addEdge(i - 1, i);
}
@@ -26,9 +28,6 @@ TEST_CASE("DAG Construction Tests", "[dag]") {
SECTION("addEdge Bounds Checking") {
REQUIRE_THROWS(dag.addEdge(20, 0));
REQUIRE_THROWS(dag.addEdge(0, 20));
}SECTION("dropEdge Bounds Checking") {
REQUIRE_THROWS(dag.dropEdge(20, 0));
REQUIRE_THROWS(dag.dropEdge(0, 20));
}SECTION("hasPath Bounds Checking") {
REQUIRE_THROWS(dag.hasPath(20, 0));
REQUIRE_THROWS(dag.hasPath(0, 20));
@@ -36,11 +35,11 @@ TEST_CASE("DAG Construction Tests", "[dag]") {
}
TEST_CASE("DAG Traversal Tests", "[dag]") {
daggy::DAG dag;
daggy::DAG<size_t, size_t> dag;
const int N_VERTICES = 10;
for (int i = 0; i < N_VERTICES; ++i) { dag.addVertex(); }
for (int i = 0; i < N_VERTICES; ++i) { dag.addVertex(i, i); }
/*
0 ---------------------\
@@ -61,24 +60,30 @@ TEST_CASE("DAG Traversal Tests", "[dag]") {
{7, 9}
};
for (auto const[from, to] : edges) {
for (auto const[from, to]: edges) {
dag.addEdge(from, to);
}
SECTION("Baisc Traversal") {
SECTION("Basic Traversal") {
dag.reset();
std::vector<int> visitOrder(N_VERTICES);
size_t i = 0;
while (!dag.allVisited()) {
const auto &v = dag.visitNext().value();
dag.completeVisit(v);
visitOrder[v] = i;
dag.completeVisit(v.key);
visitOrder[v.key] = i;
++i;
}
// Ensure visit order is preserved
for (auto const[from, to] : edges) {
for (auto const[from, to]: edges) {
REQUIRE(visitOrder[from] <= visitOrder[to]);
}
}
SECTION("Iteration") {
size_t nVisited = 0;
dag.forEach([&](const daggy::Vertex<size_t, size_t> &) { ++nVisited; });
REQUIRE(nVisited == dag.size());
}
}

View File

@@ -0,0 +1,67 @@
#include <iostream>
#include <filesystem>
#include <fstream>
#include <catch2/catch.hpp>
#include "daggy/loggers/dag_run/FileSystemLogger.hpp"
#include "daggy/loggers/dag_run/OStreamLogger.hpp"
namespace fs = std::filesystem;
using namespace daggy;
using namespace daggy::loggers::dag_run;
const TaskList SAMPLE_TASKS{
{"work_a", Task{.command{"/bin/echo", "a"}, .children{"c"}}},
{"work_b", Task{.command{"/bin/echo", "b"}, .children{"c"}}},
{"work_c", Task{.command{"/bin/echo", "c"}}}
};
inline DAGRunID testDAGRunInit(DAGRunLogger &logger, const std::string &name, const TaskList &tasks) {
auto runID = logger.startDAGRun(name, tasks);
auto dagRun = logger.getDAGRun(runID);
REQUIRE(dagRun.tasks == tasks);
REQUIRE(dagRun.taskRunStates.size() == tasks.size());
auto nonQueuedTask = std::find_if(dagRun.taskRunStates.begin(), dagRun.taskRunStates.end(),
[](const auto &a) { return a.second != +RunState::QUEUED; });
REQUIRE(nonQueuedTask == dagRun.taskRunStates.end());
REQUIRE(dagRun.dagStateChanges.size() == 1);
REQUIRE(dagRun.dagStateChanges.back().newState == +RunState::QUEUED);
return runID;
}
/*
TEST_CASE("Filesystem Logger", "[filesystem_logger]") {
const fs::path logRoot{"fs_logger_unit"};
auto cleanup = [&]() {
if (fs::exists(logRoot)) {
fs::remove_all(logRoot);
}
};
//cleanup();
daggy::loggers::dag_run::FileSystemLogger logger(logRoot);
SECTION("DAGRun Starts") {
testDAGRunInit(logger, "init_test", SAMPLE_TASKS);
}
// cleanup();
}
*/
TEST_CASE("ostream Logger", "[ostream_logger]") {
//cleanup();
std::stringstream ss;
daggy::loggers::dag_run::OStreamLogger logger(ss);
SECTION("DAGRun Starts") {
testDAGRunInit(logger, "init_test", SAMPLE_TASKS);
}
// cleanup();
}

View File

@@ -29,7 +29,7 @@ TEST_CASE("Deserialize Parameters", "[deserialize_parameters]") {
TEST_CASE("Task Deserialization", "[deserialize_task]") {
SECTION("Build with no expansion") {
std::string testTasks = R"([{"name": "A", "command": ["/bin/echo", "A"], "children": ["C"]}, {"name": "B", "command": ["/bin/echo", "B"], "children": ["C"]},{"name": "C", "command": ["/bin/echo", "C"]}])";
std::string testTasks = R"({ "A": {"command": ["/bin/echo", "A"], "children": ["C"]}, "B": {"command": ["/bin/echo", "B"], "children": ["C"]}, "C": {"command": ["/bin/echo", "C"]}})";
auto tasks = daggy::tasksFromJSON(testTasks);
REQUIRE(tasks.size() == 3);
}
@@ -37,7 +37,7 @@ TEST_CASE("Task Deserialization", "[deserialize_task]") {
SECTION("Build with expansion") {
std::string testParams{R"({"DATE": ["2021-05-06", "2021-05-07" ], "SOURCE": "name"})"};
auto params = daggy::parametersFromJSON(testParams);
std::string testTasks = R"([{"name": "A", "command": ["/bin/echo", "A"], "children": ["B"]}, {"name": "B", "command": ["/bin/echo", "B", "{{SOURCE}}", "{{DATE}}"], "children": ["C"]},{"name": "C", "command": ["/bin/echo", "C"]}])";
std::string testTasks = R"({"A": {"command": ["/bin/echo", "A"], "children": ["B"]}, "B": {"command": ["/bin/echo", "B", "{{SOURCE}}", "{{DATE}}"], "children": ["C"]}, "C": {"command": ["/bin/echo", "C"]}})";
auto tasks = daggy::tasksFromJSON(testTasks, params);
REQUIRE(tasks.size() == 4);
}
@@ -45,7 +45,7 @@ TEST_CASE("Task Deserialization", "[deserialize_task]") {
SECTION("Build with expansion using parents instead of children") {
std::string testParams{R"({"DATE": ["2021-05-06", "2021-05-07" ], "SOURCE": "name"})"};
auto params = daggy::parametersFromJSON(testParams);
std::string testTasks = R"([{"name": "A", "command": ["/bin/echo", "A"]}, {"name": "B", "command": ["/bin/echo", "B", "{{SOURCE}}", "{{DATE}}"], "parents": ["A"]},{"name": "C", "command": ["/bin/echo", "C"], "parents": ["A"]}])";
std::string testTasks = R"({"A": {"command": ["/bin/echo", "A"]}, "B": {"command": ["/bin/echo", "B", "{{SOURCE}}", "{{DATE}}"], "parents": ["A"]}, "C": {"command": ["/bin/echo", "C"], "parents": ["A"]}})";
auto tasks = daggy::tasksFromJSON(testTasks, params);
REQUIRE(tasks.size() == 4);
}
@@ -53,21 +53,16 @@ TEST_CASE("Task Deserialization", "[deserialize_task]") {
TEST_CASE("Task Serialization", "[serialize_tasks]") {
SECTION("Build with no expansion") {
std::string testTasks = R"([{"name": "A", "command": ["/bin/echo", "A"], "children": ["C"]}, {"name": "B", "command": ["/bin/echo", "B"], "children": ["C"]},{"name": "C", "command": ["/bin/echo", "C"]}])";
std::string testTasks = R"({"A": {"command": ["/bin/echo", "A"], "children": ["C"]}, "B": {"command": ["/bin/echo", "B"], "children": ["C"]}, "C": {"command": ["/bin/echo", "C"]}})";
auto tasks = daggy::tasksFromJSON(testTasks);
std::unordered_map<std::string, size_t> taskMap;
for (size_t i = 0; i < tasks.size(); ++i) {
taskMap[tasks[i].name] = i;
}
auto genJSON = daggy::tasksToJSON(tasks);
auto regenTasks = daggy::tasksFromJSON(genJSON);
REQUIRE(regenTasks.size() == tasks.size());
for (const auto &task : regenTasks) {
const auto &other = tasks[taskMap[task.name]];
for (const auto &[name, task]: regenTasks) {
const auto &other = tasks[name];
REQUIRE(task == other);
}
}

View File

@@ -74,16 +74,12 @@ TEST_CASE("Server Basic Endpoints", "[server_basic]") {
std::string dagRun = R"({
"name": "unit_server",
"taskParameters": { "FILE": [ "A", "B" ] },
"tasks": [
{ "name": "touch",
"command": [ "/usr/bin/touch", "dagrun_{{FILE}}" ]
},
{
"name": "cat",
"command": [ "/usr/bin/cat", "dagrun_A", "dagrun_B" ],
"tasks": {
"touch": { "command": [ "/usr/bin/touch", "dagrun_{{FILE}}" ] },
"cat": { "command": [ "/usr/bin/cat", "dagrun_A", "dagrun_B" ],
"parents": [ "touch" ]
}
]
}
})";
@@ -160,7 +156,7 @@ TEST_CASE("Server Basic Endpoints", "[server_basic]") {
REQUIRE(complete);
std::this_thread::sleep_for(std::chrono::seconds(2));
for (const auto &pth : std::vector<fs::path>{"dagrun_A", "dagrun_B"}) {
for (const auto &pth: std::vector<fs::path>{"dagrun_A", "dagrun_B"}) {
REQUIRE(fs::exists(pth));
fs::remove(pth);
}

View File

@@ -62,20 +62,20 @@ TEST_CASE("DAG Runner", "[utilities_dag_runner]") {
SECTION("Simple execution") {
std::string prefix = "asdlk_";
std::string taskJSON = R"([{"name": "A", "command": ["/usr/bin/touch", ")"
+ prefix + R"(A"], "children": ["C"]}, {"name": "B", "command": ["/usr/bin/touch", ")"
+ prefix + R"(B"], "children": ["C"]}, {"name": "C", "command": ["/usr/bin/touch", ")"
+ prefix + R"(C"]}])";
std::string taskJSON = R"({"A": {"command": ["/usr/bin/touch", ")"
+ prefix + R"(A"], "children": ["C"]}, "B": {"command": ["/usr/bin/touch", ")"
+ prefix + R"(B"], "children": ["C"]}, "C": {"command": ["/usr/bin/touch", ")"
+ prefix + R"(C"]}})";
auto tasks = daggy::tasksFromJSON(taskJSON);
auto dag = daggy::buildDAGFromTasks(tasks);
auto runID = logger.startDAGRun("test_run", tasks);
auto endDAG = daggy::runDAG(runID, tasks, ex, logger, dag);
auto endDAG = daggy::runDAG(runID, ex, logger, dag);
REQUIRE(endDAG.allVisited());
std::vector<std::string> letters{"A", "B", "C"};
for (const auto &letter : letters) {
for (const auto &letter: letters) {
fs::path file{prefix + letter};
REQUIRE(fs::exists(file));
fs::remove(file);
@@ -83,7 +83,7 @@ TEST_CASE("DAG Runner", "[utilities_dag_runner]") {
// Get the DAG Run Attempts
auto record = logger.getDAGRun(runID);
for (const auto &attempts : record.taskAttempts) {
for (const auto &[_, attempts]: record.taskAttempts) {
REQUIRE(attempts.size() == 1);
REQUIRE(attempts.front().rc == 0);
}
@@ -93,45 +93,80 @@ TEST_CASE("DAG Runner", "[utilities_dag_runner]") {
auto cleanup = []() {
// Cleanup
std::vector<fs::path> paths{"rec_error_A", "noexist"};
for (const auto &pth : paths) {
for (const auto &pth: paths) {
if (fs::exists(pth)) fs::remove_all(pth);
}
};
cleanup();
// daggy::loggers::dag_run::OStreamLogger logger(std::cout);
std::string goodPrefix = "rec_error_";
std::string badPrefix = "noexist/rec_error_";
std::string taskJSON = R"([{"name": "A", "command": ["/usr/bin/touch", ")"
std::string taskJSON = R"({"A": {"command": ["/usr/bin/touch", ")"
+ goodPrefix +
R"(A"], "children": ["C"]}, {"name": "B", "command": ["/usr/bin/touch", ")"
+ badPrefix + R"(B"], "children": ["C"]}, {"name": "C", "command": ["/usr/bin/touch", ")"
+ badPrefix + R"(C"]}])";
R"(A"], "children": ["C"]}, "B": {"command": ["/usr/bin/touch", ")"
+ badPrefix + R"(B"], "children": ["C"]}, "C": {"command": ["/usr/bin/touch", ")"
+ badPrefix + R"(C"]}})";
auto tasks = daggy::tasksFromJSON(taskJSON);
auto dag = daggy::buildDAGFromTasks(tasks);
auto runID = logger.startDAGRun("test_run", tasks);
auto tryDAG = daggy::runDAG(runID, tasks, ex, logger, dag);
auto tryDAG = daggy::runDAG(runID, ex, logger, dag);
REQUIRE(!tryDAG.allVisited());
// Create the missing dir, then continue to run the DAG
fs::create_directory("noexist");
tryDAG.resetRunning();
auto endDAG = daggy::runDAG(runID, tasks, ex, logger, tryDAG);
auto endDAG = daggy::runDAG(runID, ex, logger, tryDAG);
REQUIRE(endDAG.allVisited());
// Get the DAG Run Attempts
auto record = logger.getDAGRun(runID);
REQUIRE(record.taskAttempts[0].size() == 1); // A ran fine
REQUIRE(record.taskAttempts[1].size() == 2); // B errored and had to be retried
REQUIRE(record.taskAttempts[2].size() == 1); // C wasn't run because B errored
REQUIRE(record.taskAttempts["A"].size() == 1); // A ran fine
REQUIRE(record.taskAttempts["B"].size() == 2); // B errored and had to be retried
REQUIRE(record.taskAttempts["C"].size() == 1); // C wasn't run because B errored
cleanup();
}
SECTION("Generator tasks") {
std::string testParams{R"({"DATE": ["2021-05-06", "2021-05-07" ]})"};
auto params = daggy::parametersFromJSON(testParams);
std::string generatorOutput = R"({"B": {"command": ["/usr/bin/echo", "{{DATE}}"], "children": ["C"]}})";
std::stringstream jsonTasks;
jsonTasks << R"({ "A": { "command": [ "/usr/bin/echo", )" << std::quoted(generatorOutput)
<< R"(], "children": ["C"], "isGenerator": true},)"
<< R"("C": { "command": [ "/usr/bin/echo", "hello!"] } })";
auto tasks = daggy::tasksFromJSON(jsonTasks.str());
auto dag = daggy::buildDAGFromTasks(tasks);
REQUIRE(dag.size() == 2);
auto runID = logger.startDAGRun("generator_run", tasks);
auto finalDAG = daggy::runDAG(runID, ex, logger, dag, params);
REQUIRE(finalDAG.size() == 4);
// Check the logger
auto record = logger.getDAGRun(runID);
REQUIRE(record.tasks.size() == 4);
REQUIRE(record.taskRunStates.size() == 4);
for (const auto & [taskName, attempts] : record.taskAttempts) {
REQUIRE(attempts.size() == 1);
REQUIRE(attempts.back().rc == 0);
}
// Ensure that children were updated properly
REQUIRE(record.tasks["A"].children == std::unordered_set<std::string>{"B_0", "B_1", "C"});
REQUIRE(record.tasks["B_0"].children == std::unordered_set<std::string>{"C"});
REQUIRE(record.tasks["B_1"].children == std::unordered_set<std::string>{"C"});
REQUIRE(record.tasks["C"].children.empty());
}
}