Adding support for isGenerator tasks
- Changing how DAG is represented, both in code and how DAGs are defined in JSON. - Removing std::vector<Task> representation in favour of a map that will enforce unique task names - Task names now have a name (generated), and a definedName. - Adding support to loggers to add tasks after a DAGRun has been initialized.
This commit is contained in:
@@ -5,14 +5,16 @@
|
||||
#include <catch2/catch.hpp>
|
||||
|
||||
TEST_CASE("DAG Construction Tests", "[dag]") {
|
||||
daggy::DAG dag;
|
||||
daggy::DAG<size_t, size_t> dag;
|
||||
|
||||
REQUIRE(dag.size() == 0);
|
||||
REQUIRE(dag.empty());
|
||||
|
||||
REQUIRE_NOTHROW(dag.addVertex());
|
||||
REQUIRE_NOTHROW(dag.addVertex(0, 0));
|
||||
for (int i = 1; i < 10; ++i) {
|
||||
dag.addVertex();
|
||||
dag.addVertex(i, i);
|
||||
REQUIRE(dag.hasVertex(i));
|
||||
REQUIRE(dag.getVertex(i).data == i);
|
||||
dag.addEdge(i - 1, i);
|
||||
}
|
||||
|
||||
@@ -26,9 +28,6 @@ TEST_CASE("DAG Construction Tests", "[dag]") {
|
||||
SECTION("addEdge Bounds Checking") {
|
||||
REQUIRE_THROWS(dag.addEdge(20, 0));
|
||||
REQUIRE_THROWS(dag.addEdge(0, 20));
|
||||
}SECTION("dropEdge Bounds Checking") {
|
||||
REQUIRE_THROWS(dag.dropEdge(20, 0));
|
||||
REQUIRE_THROWS(dag.dropEdge(0, 20));
|
||||
}SECTION("hasPath Bounds Checking") {
|
||||
REQUIRE_THROWS(dag.hasPath(20, 0));
|
||||
REQUIRE_THROWS(dag.hasPath(0, 20));
|
||||
@@ -36,11 +35,11 @@ TEST_CASE("DAG Construction Tests", "[dag]") {
|
||||
}
|
||||
|
||||
TEST_CASE("DAG Traversal Tests", "[dag]") {
|
||||
daggy::DAG dag;
|
||||
daggy::DAG<size_t, size_t> dag;
|
||||
|
||||
const int N_VERTICES = 10;
|
||||
|
||||
for (int i = 0; i < N_VERTICES; ++i) { dag.addVertex(); }
|
||||
for (int i = 0; i < N_VERTICES; ++i) { dag.addVertex(i, i); }
|
||||
|
||||
/*
|
||||
0 ---------------------\
|
||||
@@ -61,24 +60,30 @@ TEST_CASE("DAG Traversal Tests", "[dag]") {
|
||||
{7, 9}
|
||||
};
|
||||
|
||||
for (auto const[from, to] : edges) {
|
||||
for (auto const[from, to]: edges) {
|
||||
dag.addEdge(from, to);
|
||||
}
|
||||
|
||||
SECTION("Baisc Traversal") {
|
||||
SECTION("Basic Traversal") {
|
||||
dag.reset();
|
||||
std::vector<int> visitOrder(N_VERTICES);
|
||||
size_t i = 0;
|
||||
while (!dag.allVisited()) {
|
||||
const auto &v = dag.visitNext().value();
|
||||
dag.completeVisit(v);
|
||||
visitOrder[v] = i;
|
||||
dag.completeVisit(v.key);
|
||||
visitOrder[v.key] = i;
|
||||
++i;
|
||||
}
|
||||
|
||||
// Ensure visit order is preserved
|
||||
for (auto const[from, to] : edges) {
|
||||
for (auto const[from, to]: edges) {
|
||||
REQUIRE(visitOrder[from] <= visitOrder[to]);
|
||||
}
|
||||
}
|
||||
|
||||
SECTION("Iteration") {
|
||||
size_t nVisited = 0;
|
||||
dag.forEach([&](const daggy::Vertex<size_t, size_t> &) { ++nVisited; });
|
||||
REQUIRE(nVisited == dag.size());
|
||||
}
|
||||
}
|
||||
|
||||
67
tests/unit_dagrun_loggers.cpp
Normal file
67
tests/unit_dagrun_loggers.cpp
Normal file
@@ -0,0 +1,67 @@
|
||||
#include <iostream>
|
||||
#include <filesystem>
|
||||
#include <fstream>
|
||||
|
||||
#include <catch2/catch.hpp>
|
||||
|
||||
#include "daggy/loggers/dag_run/FileSystemLogger.hpp"
|
||||
#include "daggy/loggers/dag_run/OStreamLogger.hpp"
|
||||
|
||||
namespace fs = std::filesystem;
|
||||
|
||||
using namespace daggy;
|
||||
using namespace daggy::loggers::dag_run;
|
||||
|
||||
const TaskList SAMPLE_TASKS{
|
||||
{"work_a", Task{.command{"/bin/echo", "a"}, .children{"c"}}},
|
||||
{"work_b", Task{.command{"/bin/echo", "b"}, .children{"c"}}},
|
||||
{"work_c", Task{.command{"/bin/echo", "c"}}}
|
||||
};
|
||||
|
||||
inline DAGRunID testDAGRunInit(DAGRunLogger &logger, const std::string &name, const TaskList &tasks) {
|
||||
auto runID = logger.startDAGRun(name, tasks);
|
||||
auto dagRun = logger.getDAGRun(runID);
|
||||
|
||||
REQUIRE(dagRun.tasks == tasks);
|
||||
|
||||
REQUIRE(dagRun.taskRunStates.size() == tasks.size());
|
||||
auto nonQueuedTask = std::find_if(dagRun.taskRunStates.begin(), dagRun.taskRunStates.end(),
|
||||
[](const auto &a) { return a.second != +RunState::QUEUED; });
|
||||
REQUIRE(nonQueuedTask == dagRun.taskRunStates.end());
|
||||
|
||||
REQUIRE(dagRun.dagStateChanges.size() == 1);
|
||||
REQUIRE(dagRun.dagStateChanges.back().newState == +RunState::QUEUED);
|
||||
return runID;
|
||||
}
|
||||
|
||||
/*
|
||||
TEST_CASE("Filesystem Logger", "[filesystem_logger]") {
|
||||
const fs::path logRoot{"fs_logger_unit"};
|
||||
auto cleanup = [&]() {
|
||||
if (fs::exists(logRoot)) {
|
||||
fs::remove_all(logRoot);
|
||||
}
|
||||
};
|
||||
|
||||
//cleanup();
|
||||
daggy::loggers::dag_run::FileSystemLogger logger(logRoot);
|
||||
|
||||
SECTION("DAGRun Starts") {
|
||||
testDAGRunInit(logger, "init_test", SAMPLE_TASKS);
|
||||
}
|
||||
|
||||
// cleanup();
|
||||
}
|
||||
*/
|
||||
|
||||
TEST_CASE("ostream Logger", "[ostream_logger]") {
|
||||
//cleanup();
|
||||
std::stringstream ss;
|
||||
daggy::loggers::dag_run::OStreamLogger logger(ss);
|
||||
|
||||
SECTION("DAGRun Starts") {
|
||||
testDAGRunInit(logger, "init_test", SAMPLE_TASKS);
|
||||
}
|
||||
|
||||
// cleanup();
|
||||
}
|
||||
@@ -29,7 +29,7 @@ TEST_CASE("Deserialize Parameters", "[deserialize_parameters]") {
|
||||
|
||||
TEST_CASE("Task Deserialization", "[deserialize_task]") {
|
||||
SECTION("Build with no expansion") {
|
||||
std::string testTasks = R"([{"name": "A", "command": ["/bin/echo", "A"], "children": ["C"]}, {"name": "B", "command": ["/bin/echo", "B"], "children": ["C"]},{"name": "C", "command": ["/bin/echo", "C"]}])";
|
||||
std::string testTasks = R"({ "A": {"command": ["/bin/echo", "A"], "children": ["C"]}, "B": {"command": ["/bin/echo", "B"], "children": ["C"]}, "C": {"command": ["/bin/echo", "C"]}})";
|
||||
auto tasks = daggy::tasksFromJSON(testTasks);
|
||||
REQUIRE(tasks.size() == 3);
|
||||
}
|
||||
@@ -37,7 +37,7 @@ TEST_CASE("Task Deserialization", "[deserialize_task]") {
|
||||
SECTION("Build with expansion") {
|
||||
std::string testParams{R"({"DATE": ["2021-05-06", "2021-05-07" ], "SOURCE": "name"})"};
|
||||
auto params = daggy::parametersFromJSON(testParams);
|
||||
std::string testTasks = R"([{"name": "A", "command": ["/bin/echo", "A"], "children": ["B"]}, {"name": "B", "command": ["/bin/echo", "B", "{{SOURCE}}", "{{DATE}}"], "children": ["C"]},{"name": "C", "command": ["/bin/echo", "C"]}])";
|
||||
std::string testTasks = R"({"A": {"command": ["/bin/echo", "A"], "children": ["B"]}, "B": {"command": ["/bin/echo", "B", "{{SOURCE}}", "{{DATE}}"], "children": ["C"]}, "C": {"command": ["/bin/echo", "C"]}})";
|
||||
auto tasks = daggy::tasksFromJSON(testTasks, params);
|
||||
REQUIRE(tasks.size() == 4);
|
||||
}
|
||||
@@ -45,7 +45,7 @@ TEST_CASE("Task Deserialization", "[deserialize_task]") {
|
||||
SECTION("Build with expansion using parents instead of children") {
|
||||
std::string testParams{R"({"DATE": ["2021-05-06", "2021-05-07" ], "SOURCE": "name"})"};
|
||||
auto params = daggy::parametersFromJSON(testParams);
|
||||
std::string testTasks = R"([{"name": "A", "command": ["/bin/echo", "A"]}, {"name": "B", "command": ["/bin/echo", "B", "{{SOURCE}}", "{{DATE}}"], "parents": ["A"]},{"name": "C", "command": ["/bin/echo", "C"], "parents": ["A"]}])";
|
||||
std::string testTasks = R"({"A": {"command": ["/bin/echo", "A"]}, "B": {"command": ["/bin/echo", "B", "{{SOURCE}}", "{{DATE}}"], "parents": ["A"]}, "C": {"command": ["/bin/echo", "C"], "parents": ["A"]}})";
|
||||
auto tasks = daggy::tasksFromJSON(testTasks, params);
|
||||
REQUIRE(tasks.size() == 4);
|
||||
}
|
||||
@@ -53,21 +53,16 @@ TEST_CASE("Task Deserialization", "[deserialize_task]") {
|
||||
|
||||
TEST_CASE("Task Serialization", "[serialize_tasks]") {
|
||||
SECTION("Build with no expansion") {
|
||||
std::string testTasks = R"([{"name": "A", "command": ["/bin/echo", "A"], "children": ["C"]}, {"name": "B", "command": ["/bin/echo", "B"], "children": ["C"]},{"name": "C", "command": ["/bin/echo", "C"]}])";
|
||||
std::string testTasks = R"({"A": {"command": ["/bin/echo", "A"], "children": ["C"]}, "B": {"command": ["/bin/echo", "B"], "children": ["C"]}, "C": {"command": ["/bin/echo", "C"]}})";
|
||||
auto tasks = daggy::tasksFromJSON(testTasks);
|
||||
|
||||
std::unordered_map<std::string, size_t> taskMap;
|
||||
for (size_t i = 0; i < tasks.size(); ++i) {
|
||||
taskMap[tasks[i].name] = i;
|
||||
}
|
||||
|
||||
auto genJSON = daggy::tasksToJSON(tasks);
|
||||
auto regenTasks = daggy::tasksFromJSON(genJSON);
|
||||
|
||||
REQUIRE(regenTasks.size() == tasks.size());
|
||||
|
||||
for (const auto &task : regenTasks) {
|
||||
const auto &other = tasks[taskMap[task.name]];
|
||||
for (const auto &[name, task]: regenTasks) {
|
||||
const auto &other = tasks[name];
|
||||
REQUIRE(task == other);
|
||||
}
|
||||
}
|
||||
|
||||
@@ -74,16 +74,12 @@ TEST_CASE("Server Basic Endpoints", "[server_basic]") {
|
||||
std::string dagRun = R"({
|
||||
"name": "unit_server",
|
||||
"taskParameters": { "FILE": [ "A", "B" ] },
|
||||
"tasks": [
|
||||
{ "name": "touch",
|
||||
"command": [ "/usr/bin/touch", "dagrun_{{FILE}}" ]
|
||||
},
|
||||
{
|
||||
"name": "cat",
|
||||
"command": [ "/usr/bin/cat", "dagrun_A", "dagrun_B" ],
|
||||
"tasks": {
|
||||
"touch": { "command": [ "/usr/bin/touch", "dagrun_{{FILE}}" ] },
|
||||
"cat": { "command": [ "/usr/bin/cat", "dagrun_A", "dagrun_B" ],
|
||||
"parents": [ "touch" ]
|
||||
}
|
||||
]
|
||||
}
|
||||
})";
|
||||
|
||||
|
||||
@@ -160,7 +156,7 @@ TEST_CASE("Server Basic Endpoints", "[server_basic]") {
|
||||
REQUIRE(complete);
|
||||
|
||||
std::this_thread::sleep_for(std::chrono::seconds(2));
|
||||
for (const auto &pth : std::vector<fs::path>{"dagrun_A", "dagrun_B"}) {
|
||||
for (const auto &pth: std::vector<fs::path>{"dagrun_A", "dagrun_B"}) {
|
||||
REQUIRE(fs::exists(pth));
|
||||
fs::remove(pth);
|
||||
}
|
||||
|
||||
@@ -62,20 +62,20 @@ TEST_CASE("DAG Runner", "[utilities_dag_runner]") {
|
||||
|
||||
SECTION("Simple execution") {
|
||||
std::string prefix = "asdlk_";
|
||||
std::string taskJSON = R"([{"name": "A", "command": ["/usr/bin/touch", ")"
|
||||
+ prefix + R"(A"], "children": ["C"]}, {"name": "B", "command": ["/usr/bin/touch", ")"
|
||||
+ prefix + R"(B"], "children": ["C"]}, {"name": "C", "command": ["/usr/bin/touch", ")"
|
||||
+ prefix + R"(C"]}])";
|
||||
std::string taskJSON = R"({"A": {"command": ["/usr/bin/touch", ")"
|
||||
+ prefix + R"(A"], "children": ["C"]}, "B": {"command": ["/usr/bin/touch", ")"
|
||||
+ prefix + R"(B"], "children": ["C"]}, "C": {"command": ["/usr/bin/touch", ")"
|
||||
+ prefix + R"(C"]}})";
|
||||
auto tasks = daggy::tasksFromJSON(taskJSON);
|
||||
auto dag = daggy::buildDAGFromTasks(tasks);
|
||||
|
||||
auto runID = logger.startDAGRun("test_run", tasks);
|
||||
auto endDAG = daggy::runDAG(runID, tasks, ex, logger, dag);
|
||||
auto endDAG = daggy::runDAG(runID, ex, logger, dag);
|
||||
|
||||
REQUIRE(endDAG.allVisited());
|
||||
|
||||
std::vector<std::string> letters{"A", "B", "C"};
|
||||
for (const auto &letter : letters) {
|
||||
for (const auto &letter: letters) {
|
||||
fs::path file{prefix + letter};
|
||||
REQUIRE(fs::exists(file));
|
||||
fs::remove(file);
|
||||
@@ -83,7 +83,7 @@ TEST_CASE("DAG Runner", "[utilities_dag_runner]") {
|
||||
|
||||
// Get the DAG Run Attempts
|
||||
auto record = logger.getDAGRun(runID);
|
||||
for (const auto &attempts : record.taskAttempts) {
|
||||
for (const auto &[_, attempts]: record.taskAttempts) {
|
||||
REQUIRE(attempts.size() == 1);
|
||||
REQUIRE(attempts.front().rc == 0);
|
||||
}
|
||||
@@ -93,45 +93,80 @@ TEST_CASE("DAG Runner", "[utilities_dag_runner]") {
|
||||
auto cleanup = []() {
|
||||
// Cleanup
|
||||
std::vector<fs::path> paths{"rec_error_A", "noexist"};
|
||||
for (const auto &pth : paths) {
|
||||
for (const auto &pth: paths) {
|
||||
if (fs::exists(pth)) fs::remove_all(pth);
|
||||
}
|
||||
};
|
||||
|
||||
cleanup();
|
||||
|
||||
|
||||
// daggy::loggers::dag_run::OStreamLogger logger(std::cout);
|
||||
|
||||
std::string goodPrefix = "rec_error_";
|
||||
std::string badPrefix = "noexist/rec_error_";
|
||||
std::string taskJSON = R"([{"name": "A", "command": ["/usr/bin/touch", ")"
|
||||
std::string taskJSON = R"({"A": {"command": ["/usr/bin/touch", ")"
|
||||
+ goodPrefix +
|
||||
R"(A"], "children": ["C"]}, {"name": "B", "command": ["/usr/bin/touch", ")"
|
||||
+ badPrefix + R"(B"], "children": ["C"]}, {"name": "C", "command": ["/usr/bin/touch", ")"
|
||||
+ badPrefix + R"(C"]}])";
|
||||
R"(A"], "children": ["C"]}, "B": {"command": ["/usr/bin/touch", ")"
|
||||
+ badPrefix + R"(B"], "children": ["C"]}, "C": {"command": ["/usr/bin/touch", ")"
|
||||
+ badPrefix + R"(C"]}})";
|
||||
auto tasks = daggy::tasksFromJSON(taskJSON);
|
||||
auto dag = daggy::buildDAGFromTasks(tasks);
|
||||
|
||||
auto runID = logger.startDAGRun("test_run", tasks);
|
||||
|
||||
auto tryDAG = daggy::runDAG(runID, tasks, ex, logger, dag);
|
||||
auto tryDAG = daggy::runDAG(runID, ex, logger, dag);
|
||||
|
||||
REQUIRE(!tryDAG.allVisited());
|
||||
|
||||
// Create the missing dir, then continue to run the DAG
|
||||
fs::create_directory("noexist");
|
||||
tryDAG.resetRunning();
|
||||
auto endDAG = daggy::runDAG(runID, tasks, ex, logger, tryDAG);
|
||||
auto endDAG = daggy::runDAG(runID, ex, logger, tryDAG);
|
||||
|
||||
REQUIRE(endDAG.allVisited());
|
||||
|
||||
// Get the DAG Run Attempts
|
||||
auto record = logger.getDAGRun(runID);
|
||||
REQUIRE(record.taskAttempts[0].size() == 1); // A ran fine
|
||||
REQUIRE(record.taskAttempts[1].size() == 2); // B errored and had to be retried
|
||||
REQUIRE(record.taskAttempts[2].size() == 1); // C wasn't run because B errored
|
||||
REQUIRE(record.taskAttempts["A"].size() == 1); // A ran fine
|
||||
REQUIRE(record.taskAttempts["B"].size() == 2); // B errored and had to be retried
|
||||
REQUIRE(record.taskAttempts["C"].size() == 1); // C wasn't run because B errored
|
||||
|
||||
cleanup();
|
||||
}
|
||||
|
||||
SECTION("Generator tasks") {
|
||||
std::string testParams{R"({"DATE": ["2021-05-06", "2021-05-07" ]})"};
|
||||
auto params = daggy::parametersFromJSON(testParams);
|
||||
|
||||
std::string generatorOutput = R"({"B": {"command": ["/usr/bin/echo", "{{DATE}}"], "children": ["C"]}})";
|
||||
std::stringstream jsonTasks;
|
||||
|
||||
jsonTasks << R"({ "A": { "command": [ "/usr/bin/echo", )" << std::quoted(generatorOutput)
|
||||
<< R"(], "children": ["C"], "isGenerator": true},)"
|
||||
<< R"("C": { "command": [ "/usr/bin/echo", "hello!"] } })";
|
||||
|
||||
auto tasks = daggy::tasksFromJSON(jsonTasks.str());
|
||||
auto dag = daggy::buildDAGFromTasks(tasks);
|
||||
REQUIRE(dag.size() == 2);
|
||||
|
||||
auto runID = logger.startDAGRun("generator_run", tasks);
|
||||
auto finalDAG = daggy::runDAG(runID, ex, logger, dag, params);
|
||||
|
||||
REQUIRE(finalDAG.size() == 4);
|
||||
|
||||
// Check the logger
|
||||
auto record = logger.getDAGRun(runID);
|
||||
|
||||
REQUIRE(record.tasks.size() == 4);
|
||||
REQUIRE(record.taskRunStates.size() == 4);
|
||||
for (const auto & [taskName, attempts] : record.taskAttempts) {
|
||||
REQUIRE(attempts.size() == 1);
|
||||
REQUIRE(attempts.back().rc == 0);
|
||||
}
|
||||
|
||||
// Ensure that children were updated properly
|
||||
REQUIRE(record.tasks["A"].children == std::unordered_set<std::string>{"B_0", "B_1", "C"});
|
||||
REQUIRE(record.tasks["B_0"].children == std::unordered_set<std::string>{"C"});
|
||||
REQUIRE(record.tasks["B_1"].children == std::unordered_set<std::string>{"C"});
|
||||
REQUIRE(record.tasks["C"].children.empty());
|
||||
|
||||
}
|
||||
}
|
||||
|
||||
Reference in New Issue
Block a user