Squashed commit of the following: commit dc3a1bf07b5e7afdfd45e56f34596300dab6fd70 Author: Ian Roddis <gitlab@ie2r.com> Date: Wed Oct 13 15:15:28 2021 -0300 Updating documentation a bit commit 8ec9c8c74f587368b32d034d3240a5537a69d4b1 Author: Ian Roddis <gitlab@ie2r.com> Date: Wed Oct 13 15:11:23 2021 -0300 Completing tests for redis commit a6308dfa35b40b5a147394af8e3322ada871eb92 Author: Ian Roddis <gitlab@ie2r.com> Date: Wed Oct 13 14:56:22 2021 -0300 Resolving some errors with forking environment commit 34691b6f85abae67001f4a4c234a4f7314407331 Author: Ian Roddis <gitlab@ie2r.com> Date: Wed Oct 13 10:53:55 2021 -0300 Checkpointing work on unit tests commit 44c2b50fde30348938d901703ead9e279c3cd237 Author: Ian Roddis <gitlab@ie2r.com> Date: Wed Oct 13 09:09:58 2021 -0300 Checkpointing work on redis commit a8051b725257087e25bc452673633ba6b40e3985 Author: Ian Roddis <gitlab@ie2r.com> Date: Fri Oct 8 13:31:41 2021 -0300 Checkpointing progress, changing state updates to a single record type commit 456b84ad8c7dee0ff0dd39d5a7caead1ccd1126c Author: Ian Roddis <gitlab@ie2r.com> Date: Thu Oct 7 16:43:48 2021 -0300 Checkpointing progress commit f19dcaa4e417c3f2f6e527c288fe51401c9fe1d7 Author: Ian Roddis <gitlab@ie2r.com> Date: Thu Oct 7 11:53:35 2021 -0300 Moving back to hiredis to avoid boost dependency commit e4bea6c589e82c82fd41476f164d946d77677193 Author: Ian Roddis <gitlab@ie2r.com> Date: Wed Oct 6 10:41:16 2021 -0300 fixing comments commit 807a73c2a406817001eec048483938545a60194c Author: Ian Roddis <gitlab@ie2r.com> Date: Wed Oct 6 10:40:38 2021 -0300 Switching to redis-cpp commit d060c008d4d96bf3a81a19d35067f95f3638b8ca Author: Ian Roddis <gitlab@ie2r.com> Date: Tue Oct 5 17:54:06 2021 -0300 Adding hiredis dep
258 lines
8.7 KiB
C++
258 lines
8.7 KiB
C++
#include <catch2/catch.hpp>
|
|
#include <filesystem>
|
|
#include <fstream>
|
|
|
|
#include "daggy/DAGRunner.hpp"
|
|
#include "daggy/executors/task/ForkingTaskExecutor.hpp"
|
|
#include "daggy/executors/task/NoopTaskExecutor.hpp"
|
|
#include "daggy/loggers/dag_run/OStreamLogger.hpp"
|
|
#include "daggy/loggers/dag_run/RedisLogger.hpp"
|
|
|
|
namespace fs = std::filesystem;
|
|
|
|
TEST_CASE("dagrunner", "[dagrunner_order_preservation]")
|
|
{
|
|
daggy::executors::task::NoopTaskExecutor ex;
|
|
std::stringstream ss;
|
|
daggy::loggers::dag_run::OStreamLogger logger(ss);
|
|
|
|
daggy::TimePoint globalStartTime = daggy::Clock::now();
|
|
|
|
daggy::DAGSpec dagSpec;
|
|
|
|
std::string testParams{
|
|
R"({"DATE": ["2021-05-06", "2021-05-07", "2021-05-08", "2021-05-09" ]})"};
|
|
dagSpec.taskConfig.variables = daggy::configFromJSON(testParams);
|
|
|
|
std::string taskJSON = R"({
|
|
"A": {"job": {"command": ["/usr/bin/touch", "{{DATE}}"]}, "children": [ "B","D" ]},
|
|
"B": {"job": {"command": ["/usr/bin/touch", "{{DATE}}"]}, "children": [ "C","D","E" ]},
|
|
"C": {"job": {"command": ["/usr/bin/touch", "{{DATE}}"]}, "children": [ "D"]},
|
|
"D": {"job": {"command": ["/usr/bin/touch", "{{DATE}}"]}, "children": [ "E"]},
|
|
"E": {"job": {"command": ["/usr/bin/touch", "{{DATE}}"]}}
|
|
})";
|
|
|
|
dagSpec.tasks = expandTaskSet(daggy::tasksFromJSON(taskJSON), ex,
|
|
dagSpec.taskConfig.variables);
|
|
|
|
REQUIRE(dagSpec.tasks.size() == 20);
|
|
|
|
auto dag = daggy::buildDAGFromTasks(dagSpec.tasks);
|
|
auto runID = logger.startDAGRun(dagSpec);
|
|
|
|
daggy::DAGRunner runner(runID, ex, logger, dag, dagSpec.taskConfig);
|
|
|
|
auto endDAG = runner.run();
|
|
|
|
REQUIRE(endDAG.allVisited());
|
|
|
|
// Ensure the run order
|
|
auto rec = logger.getDAGRun(runID);
|
|
|
|
daggy::TimePoint globalStopTime = daggy::Clock::now();
|
|
std::array<daggy::TimePoint, 5> minTimes;
|
|
minTimes.fill(globalStartTime);
|
|
std::array<daggy::TimePoint, 5> maxTimes;
|
|
maxTimes.fill(globalStopTime);
|
|
|
|
for (const auto &[k, v] : rec.taskAttempts) {
|
|
size_t idx = k[0] - 65;
|
|
auto &startTime = minTimes[idx];
|
|
auto &stopTime = maxTimes[idx];
|
|
startTime = std::max(startTime, v.front().startTime);
|
|
stopTime = std::min(stopTime, v.back().stopTime);
|
|
}
|
|
|
|
for (size_t i = 0; i < 5; ++i) {
|
|
for (size_t j = i + 1; j < 4; ++j) {
|
|
REQUIRE(maxTimes[i] < minTimes[j]);
|
|
}
|
|
}
|
|
}
|
|
|
|
TEST_CASE("DAGRunner simple execution", "[dagrunner_simple]")
|
|
{
|
|
daggy::executors::task::ForkingTaskExecutor ex(10);
|
|
std::stringstream ss;
|
|
daggy::loggers::dag_run::OStreamLogger logger(ss);
|
|
|
|
daggy::DAGSpec dagSpec;
|
|
|
|
SECTION("Simple execution")
|
|
{
|
|
std::string prefix = (fs::current_path() / "asdlk").string();
|
|
std::unordered_map<std::string, std::string> files{
|
|
{"A", prefix + "_A"}, {"B", prefix + "_B"}, {"C", prefix + "_C"}};
|
|
std::string taskJSON =
|
|
R"({"A": {"job": {"command": ["/usr/bin/touch", ")" + files.at("A") +
|
|
R"("]}, "children": ["C"]}, "B": {"job": {"command": ["/usr/bin/touch", ")" +
|
|
files.at("B") +
|
|
R"("]}, "children": ["C"]}, "C": {"job": {"command": ["/usr/bin/touch", ")" +
|
|
files.at("C") + R"("]}}})";
|
|
dagSpec.tasks = expandTaskSet(daggy::tasksFromJSON(taskJSON), ex);
|
|
auto dag = daggy::buildDAGFromTasks(dagSpec.tasks);
|
|
auto runID = logger.startDAGRun(dagSpec);
|
|
daggy::DAGRunner runner(runID, ex, logger, dag, dagSpec.taskConfig);
|
|
auto endDAG = runner.run();
|
|
REQUIRE(endDAG.allVisited());
|
|
|
|
for (const auto &[_, file] : files) {
|
|
REQUIRE(fs::exists(file));
|
|
fs::remove(file);
|
|
}
|
|
|
|
// Get the DAG Run Attempts
|
|
auto record = logger.getDAGRun(runID);
|
|
for (const auto &[_, attempts] : record.taskAttempts) {
|
|
REQUIRE(attempts.size() == 1);
|
|
REQUIRE(attempts.front().rc == 0);
|
|
}
|
|
}
|
|
}
|
|
|
|
TEST_CASE("DAG Runner Restart old DAG", "[dagrunner_restart]")
|
|
{
|
|
daggy::executors::task::ForkingTaskExecutor ex(10);
|
|
std::stringstream ss;
|
|
daggy::loggers::dag_run::OStreamLogger logger(ss);
|
|
daggy::DAGSpec dagSpec;
|
|
|
|
SECTION("Recovery from Error")
|
|
{
|
|
auto cleanup = []() {
|
|
// Cleanup
|
|
std::vector<fs::path> paths{"rec_error_A", "noexist"};
|
|
for (const auto &pth : paths) {
|
|
if (fs::exists(pth))
|
|
fs::remove_all(pth);
|
|
}
|
|
};
|
|
|
|
cleanup();
|
|
|
|
std::string goodPrefix = "rec_error_";
|
|
std::string badPrefix = "noexist/rec_error_";
|
|
std::string taskJSON =
|
|
R"({"A": {"job": {"command": ["/usr/bin/touch", ")" + goodPrefix +
|
|
R"(A"]}, "children": ["C"]}, "B": {"job": {"command": ["/usr/bin/touch", ")" +
|
|
badPrefix +
|
|
R"(B"]}, "children": ["C"]}, "C": {"job": {"command": ["/usr/bin/touch", ")" +
|
|
badPrefix + R"(C"]}}})";
|
|
dagSpec.tasks = expandTaskSet(daggy::tasksFromJSON(taskJSON), ex);
|
|
auto dag = daggy::buildDAGFromTasks(dagSpec.tasks);
|
|
|
|
auto runID = logger.startDAGRun(dagSpec);
|
|
|
|
daggy::DAGRunner runner(runID, ex, logger, dag, dagSpec.taskConfig);
|
|
auto tryDAG = runner.run();
|
|
|
|
REQUIRE(!tryDAG.allVisited());
|
|
|
|
// Create the missing dir, then continue to run the DAG
|
|
fs::create_directory("noexist");
|
|
runner.resetRunning();
|
|
auto endDAG = runner.run();
|
|
|
|
REQUIRE(endDAG.allVisited());
|
|
|
|
// Get the DAG Run Attempts
|
|
auto record = logger.getDAGRun(runID);
|
|
REQUIRE(record.taskAttempts["A_0"].size() == 1); // A ran fine
|
|
REQUIRE(record.taskAttempts["B_0"].size() ==
|
|
2); // B errored and had to be retried
|
|
REQUIRE(record.taskAttempts["C_0"].size() ==
|
|
1); // C wasn't run because B errored
|
|
|
|
cleanup();
|
|
}
|
|
}
|
|
|
|
TEST_CASE("DAG Runner Generator Tasks", "[dagrunner_generator]")
|
|
{
|
|
daggy::executors::task::ForkingTaskExecutor ex(10);
|
|
std::stringstream ss;
|
|
daggy::loggers::dag_run::OStreamLogger logger(ss);
|
|
daggy::DAGSpec dagSpec;
|
|
|
|
SECTION("Generator tasks")
|
|
{
|
|
std::string testParams{R"({"DATE": ["2021-05-06", "2021-05-07" ]})"};
|
|
dagSpec.taskConfig.variables = daggy::configFromJSON(testParams);
|
|
|
|
std::string generatorOutput =
|
|
R"({"B": {"job": {"command": ["/usr/bin/echo", "-e", "{{DATE}}"]}, "children": ["C"]}})";
|
|
fs::path ofn = fs::current_path() / "generator_test_output.json";
|
|
std::ofstream ofh{ofn};
|
|
ofh << generatorOutput << std::endl;
|
|
ofh.close();
|
|
|
|
daggy::TimePoint globalStartTime = daggy::Clock::now();
|
|
std::stringstream jsonTasks;
|
|
jsonTasks
|
|
<< R"({ "A": { "job": {"command": [ "/usr/bin/cat", )"
|
|
<< std::quoted(ofn.string())
|
|
<< R"(]}, "children": ["C"], "isGenerator": true},)"
|
|
<< R"("C": { "job": {"command": [ "/usr/bin/echo", "hello!"]} } })";
|
|
|
|
dagSpec.tasks = daggy::tasksFromJSON(jsonTasks.str());
|
|
REQUIRE(dagSpec.tasks.size() == 2);
|
|
REQUIRE(dagSpec.tasks["A"].children ==
|
|
std::unordered_set<std::string>{"C"});
|
|
dagSpec.tasks =
|
|
daggy::expandTaskSet(dagSpec.tasks, ex, dagSpec.taskConfig.variables);
|
|
REQUIRE(dagSpec.tasks.size() == 2);
|
|
REQUIRE(dagSpec.tasks["A_0"].children ==
|
|
std::unordered_set<std::string>{"C"});
|
|
auto dag = daggy::buildDAGFromTasks(dagSpec.tasks);
|
|
REQUIRE(dag.size() == 2);
|
|
|
|
auto runID = logger.startDAGRun(dagSpec);
|
|
daggy::DAGRunner runner(runID, ex, logger, dag, dagSpec.taskConfig);
|
|
auto finalDAG = runner.run();
|
|
|
|
REQUIRE(finalDAG.allVisited());
|
|
REQUIRE(finalDAG.size() == 4);
|
|
|
|
// Check the logger
|
|
auto record = logger.getDAGRun(runID);
|
|
|
|
REQUIRE(record.dagSpec.tasks.size() == 4);
|
|
REQUIRE(record.taskRunStates.size() == 4);
|
|
for (const auto &[taskName, attempts] : record.taskAttempts) {
|
|
REQUIRE(attempts.size() == 1);
|
|
REQUIRE(attempts.back().rc == 0);
|
|
}
|
|
|
|
// Ensure that children were updated properly
|
|
REQUIRE(record.dagSpec.tasks["A_0"].children ==
|
|
std::unordered_set<std::string>{"B_0", "B_1", "C"});
|
|
REQUIRE(record.dagSpec.tasks["B_0"].children ==
|
|
std::unordered_set<std::string>{"C"});
|
|
REQUIRE(record.dagSpec.tasks["B_1"].children ==
|
|
std::unordered_set<std::string>{"C"});
|
|
REQUIRE(record.dagSpec.tasks["C_0"].children.empty());
|
|
|
|
// Ensure they were run in the right order
|
|
// All A's get run before B's, which run before C's
|
|
daggy::TimePoint globalStopTime = daggy::Clock::now();
|
|
std::array<daggy::TimePoint, 3> minTimes;
|
|
minTimes.fill(globalStartTime);
|
|
std::array<daggy::TimePoint, 3> maxTimes;
|
|
maxTimes.fill(globalStopTime);
|
|
|
|
for (const auto &[k, v] : record.taskAttempts) {
|
|
size_t idx = k[0] - 65;
|
|
auto &startTime = minTimes[idx];
|
|
auto &stopTime = maxTimes[idx];
|
|
startTime = std::max(startTime, v.front().startTime);
|
|
stopTime = std::min(stopTime, v.back().stopTime);
|
|
}
|
|
|
|
for (size_t i = 0; i < 3; ++i) {
|
|
for (size_t j = i + 1; j < 2; ++j) {
|
|
REQUIRE(maxTimes[i] < minTimes[j]);
|
|
}
|
|
}
|
|
}
|
|
}
|