#include #include #include #include "daggy/DAGRunner.hpp" #include "daggy/executors/task/ForkingTaskExecutor.hpp" #include "daggy/executors/task/NoopTaskExecutor.hpp" #include "daggy/loggers/dag_run/OStreamLogger.hpp" namespace fs = std::filesystem; TEST_CASE("dagrunner", "[dagrunner][dagrunner_order_preservation]") { daggy::executors::task::NoopTaskExecutor ex; std::stringstream ss; daggy::loggers::dag_run::OStreamLogger logger(ss); daggy::TimePoint globalStartTime = daggy::Clock::now(); daggy::DAGSpec dagSpec; std::string testParams{ R"({"DATE": ["2021-05-06", "2021-05-07", "2021-05-08", "2021-05-09" ]})"}; dagSpec.taskConfig.variables = daggy::configFromJSON(testParams); std::string taskJSON = R"({ "A": {"job": {"command": ["/usr/bin/touch", "{{DATE}}"]}, "children": [ "B","D" ]}, "B": {"job": {"command": ["/usr/bin/touch", "{{DATE}}"]}, "children": [ "C","D","E" ]}, "C": {"job": {"command": ["/usr/bin/touch", "{{DATE}}"]}, "children": [ "D"]}, "D": {"job": {"command": ["/usr/bin/touch", "{{DATE}}"]}, "children": [ "E"]}, "E": {"job": {"command": ["/usr/bin/touch", "{{DATE}}"]}} })"; dagSpec.tasks = expandTaskSet(daggy::tasksFromJSON(taskJSON), ex, dagSpec.taskConfig.variables); REQUIRE(dagSpec.tasks.size() == 20); auto dag = daggy::buildDAGFromTasks(dagSpec.tasks); auto runID = logger.startDAGRun(dagSpec); daggy::DAGRunner runner(runID, ex, logger, dag, dagSpec.taskConfig); auto endDAG = runner.run(); REQUIRE(endDAG.allVisited()); // Ensure the run order auto rec = logger.getDAGRun(runID); daggy::TimePoint globalStopTime = daggy::Clock::now(); std::array minTimes; minTimes.fill(globalStartTime); std::array maxTimes; maxTimes.fill(globalStopTime); for (const auto &[k, v] : rec.taskAttempts) { size_t idx = k[0] - 65; auto &startTime = minTimes[idx]; auto &stopTime = maxTimes[idx]; startTime = std::max(startTime, v.front().startTime); stopTime = std::min(stopTime, v.back().stopTime); } for (size_t i = 0; i < 5; ++i) { for (size_t j = i + 1; j < 4; ++j) { REQUIRE(maxTimes[i] < minTimes[j]); } } } TEST_CASE("DAGRunner simple execution", "[dagrunner][dagrunner_simple]") { daggy::executors::task::ForkingTaskExecutor ex(10); std::stringstream ss; daggy::loggers::dag_run::OStreamLogger logger(ss); daggy::DAGSpec dagSpec; SECTION("Simple execution") { std::string prefix = (fs::current_path() / "asdlk").string(); std::unordered_map files{ {"A", prefix + "_A"}, {"B", prefix + "_B"}, {"C", prefix + "_C"}}; std::string taskJSON = R"({"A": {"job": {"command": ["/usr/bin/touch", ")" + files.at("A") + R"("]}, "children": ["C"]}, "B": {"job": {"command": ["/usr/bin/touch", ")" + files.at("B") + R"("]}, "children": ["C"]}, "C": {"job": {"command": ["/usr/bin/touch", ")" + files.at("C") + R"("]}}})"; dagSpec.tasks = expandTaskSet(daggy::tasksFromJSON(taskJSON), ex); auto dag = daggy::buildDAGFromTasks(dagSpec.tasks); auto runID = logger.startDAGRun(dagSpec); daggy::DAGRunner runner(runID, ex, logger, dag, dagSpec.taskConfig); auto endDAG = runner.run(); REQUIRE(endDAG.allVisited()); for (const auto &[_, file] : files) { REQUIRE(fs::exists(file)); fs::remove(file); } // Get the DAG Run Attempts auto record = logger.getDAGRun(runID); for (const auto &[_, attempts] : record.taskAttempts) { REQUIRE(attempts.size() == 1); REQUIRE(attempts.front().rc == 0); } } } TEST_CASE("DAG Runner Restart old DAG", "[dagrunner][dagrunner_restart]") { daggy::executors::task::ForkingTaskExecutor ex(10); std::stringstream ss; daggy::loggers::dag_run::OStreamLogger logger(ss); daggy::DAGSpec dagSpec; SECTION("Recovery from Error") { auto cleanup = []() { // Cleanup std::vector paths{"rec_error_A", "noexist"}; for (const auto &pth : paths) { if (fs::exists(pth)) fs::remove_all(pth); } }; cleanup(); std::string goodPrefix = "rec_error_"; std::string badPrefix = "noexist/rec_error_"; std::string taskJSON = R"({"A": {"job": {"command": ["/usr/bin/touch", ")" + goodPrefix + R"(A"]}, "children": ["C"]}, "B": {"job": {"command": ["/usr/bin/touch", ")" + badPrefix + R"(B"]}, "children": ["C"]}, "C": {"job": {"command": ["/usr/bin/touch", ")" + badPrefix + R"(C"]}}})"; dagSpec.tasks = expandTaskSet(daggy::tasksFromJSON(taskJSON), ex); auto dag = daggy::buildDAGFromTasks(dagSpec.tasks); auto runID = logger.startDAGRun(dagSpec); daggy::DAGRunner runner(runID, ex, logger, dag, dagSpec.taskConfig); auto tryDAG = runner.run(); REQUIRE(!tryDAG.allVisited()); // Create the missing dir, then continue to run the DAG fs::create_directory("noexist"); runner.resetRunning(); auto endDAG = runner.run(); REQUIRE(endDAG.allVisited()); // Get the DAG Run Attempts auto record = logger.getDAGRun(runID); REQUIRE(record.taskAttempts["A_0"].size() == 1); // A ran fine REQUIRE(record.taskAttempts["B_0"].size() == 2); // B errored and had to be retried REQUIRE(record.taskAttempts["C_0"].size() == 1); // C wasn't run because B errored cleanup(); } } TEST_CASE("DAG Runner Generator Tasks", "[dagrunner][dagrunner_generator]") { daggy::executors::task::ForkingTaskExecutor ex(10); std::stringstream ss; daggy::loggers::dag_run::OStreamLogger logger(ss); daggy::DAGSpec dagSpec; SECTION("Generator tasks") { std::string testParams{R"({"DATE": ["2021-05-06", "2021-05-07" ]})"}; dagSpec.taskConfig.variables = daggy::configFromJSON(testParams); std::string generatorOutput = R"({"B": {"job": {"command": ["/usr/bin/echo", "-e", "{{DATE}}"]}, "children": ["C"]}})"; fs::path ofn = fs::current_path() / "generator_test_output.json"; std::ofstream ofh{ofn}; ofh << generatorOutput << std::endl; ofh.close(); daggy::TimePoint globalStartTime = daggy::Clock::now(); std::stringstream jsonTasks; jsonTasks << R"({ "A": { "job": {"command": [ "/usr/bin/cat", )" << std::quoted(ofn.string()) << R"(]}, "children": ["C"], "isGenerator": true},)" << R"("C": { "job": {"command": [ "/usr/bin/echo", "hello!"]} } })"; dagSpec.tasks = daggy::tasksFromJSON(jsonTasks.str()); REQUIRE(dagSpec.tasks.size() == 2); REQUIRE(dagSpec.tasks["A"].children == std::unordered_set{"C"}); dagSpec.tasks = daggy::expandTaskSet(dagSpec.tasks, ex, dagSpec.taskConfig.variables); REQUIRE(dagSpec.tasks.size() == 2); REQUIRE(dagSpec.tasks["A_0"].children == std::unordered_set{"C"}); auto dag = daggy::buildDAGFromTasks(dagSpec.tasks); REQUIRE(dag.size() == 2); auto runID = logger.startDAGRun(dagSpec); daggy::DAGRunner runner(runID, ex, logger, dag, dagSpec.taskConfig); auto finalDAG = runner.run(); REQUIRE(finalDAG.allVisited()); REQUIRE(finalDAG.size() == 4); // Check the logger auto record = logger.getDAGRun(runID); REQUIRE(record.dagSpec.tasks.size() == 4); REQUIRE(record.taskRunStates.size() == 4); for (const auto &[taskName, attempts] : record.taskAttempts) { REQUIRE(attempts.size() == 1); REQUIRE(attempts.back().rc == 0); } // Ensure that children were updated properly REQUIRE(record.dagSpec.tasks["A_0"].children == std::unordered_set{"B_0", "B_1", "C"}); REQUIRE(record.dagSpec.tasks["B_0"].children == std::unordered_set{"C"}); REQUIRE(record.dagSpec.tasks["B_1"].children == std::unordered_set{"C"}); REQUIRE(record.dagSpec.tasks["C_0"].children.empty()); // Ensure they were run in the right order // All A's get run before B's, which run before C's daggy::TimePoint globalStopTime = daggy::Clock::now(); std::array minTimes; minTimes.fill(globalStartTime); std::array maxTimes; maxTimes.fill(globalStopTime); for (const auto &[k, v] : record.taskAttempts) { size_t idx = k[0] - 65; auto &startTime = minTimes[idx]; auto &stopTime = maxTimes[idx]; startTime = std::max(startTime, v.front().startTime); stopTime = std::min(stopTime, v.back().stopTime); } for (size_t i = 0; i < 3; ++i) { for (size_t j = i + 1; j < 2; ++j) { REQUIRE(maxTimes[i] < minTimes[j]); } } } }