#include #include #include #include #include #include #include #include "daggy/Utilities.hpp" #include "daggy/Serialization.hpp" #include "daggy/executors/task/ForkingTaskExecutor.hpp" #include "daggy/loggers/dag_run/OStreamLogger.hpp" namespace fs = std::filesystem; TEST_CASE("String Utilities", "[utilities_string]") { std::string test = "/this/is/{{A}}/test/{{A}}"; auto res = daggy::globalSub(test, "{{A}}", "hello"); REQUIRE(res == "/this/is/hello/test/hello"); } TEST_CASE("Parameter Expansion", "[utilities_parameter_expansion]") { SECTION("Basic expansion") { std::string testParams{R"({"DATE": ["2021-05-06", "2021-05-07" ], "SOURCE": "name", "TYPE": ["a", "b", "c"]})"}; auto params = daggy::parametersFromJSON(testParams); std::vector cmd{"/usr/bin/echo", "{{DATE}}", "{{SOURCE}}", "{{TYPE}}"}; auto allCommands = daggy::expandCommands(cmd, params); REQUIRE(allCommands.size() == 6); } SECTION("Skip over unused parameters") { std::string testParams{R"({"DATE": ["2021-05-06", "2021-05-07" ], "SOURCE": "name", "TYPE": ["a", "b", "c"]})"}; auto params = daggy::parametersFromJSON(testParams); std::vector cmd{"/usr/bin/echo", "{{DATE}}", "{{SOURCE}}"}; auto allCommands = daggy::expandCommands(cmd, params); // TYPE isn't used, so it's just |DATE| * |SOURCE| REQUIRE(allCommands.size() == 2); } SECTION("Expand within a command part") { std::string testParams{ R"({"DATE": ["2021-05-06", "2021-05-07" ], "SOURCE": ["A", "B"], "TYPE": ["a", "b", "c"]})"}; auto params = daggy::parametersFromJSON(testParams); std::vector cmd{"/usr/bin/touch", "{{DATE}}_{{SOURCE}}"}; auto result = daggy::expandCommands(cmd, params); // TYPE isn't used, so it's just |DATE| * |SOURCE| REQUIRE(result.size() == 4); } } TEST_CASE("DAG Runner", "[utilities_dag_runner]") { daggy::executors::task::ForkingTaskExecutor ex(10); std::stringstream ss; daggy::loggers::dag_run::OStreamLogger logger(ss); SECTION("Simple execution") { std::string prefix = "asdlk_"; std::string taskJSON = R"({"A": {"command": ["/usr/bin/touch", ")" + prefix + R"(A"], "children": ["C"]}, "B": {"command": ["/usr/bin/touch", ")" + prefix + R"(B"], "children": ["C"]}, "C": {"command": ["/usr/bin/touch", ")" + prefix + R"(C"]}})"; auto tasks = daggy::tasksFromJSON(taskJSON); auto dag = daggy::buildDAGFromTasks(tasks); auto runID = logger.startDAGRun("test_run", tasks); auto endDAG = daggy::runDAG(runID, ex, logger, dag); REQUIRE(endDAG.allVisited()); std::vector letters{"A", "B", "C"}; for (const auto &letter: letters) { fs::path file{prefix + letter}; REQUIRE(fs::exists(file)); fs::remove(file); } // Get the DAG Run Attempts auto record = logger.getDAGRun(runID); for (const auto &[_, attempts]: record.taskAttempts) { REQUIRE(attempts.size() == 1); REQUIRE(attempts.front().rc == 0); } } SECTION("Recovery from Error") { auto cleanup = []() { // Cleanup std::vector paths{"rec_error_A", "noexist"}; for (const auto &pth: paths) { if (fs::exists(pth)) fs::remove_all(pth); } }; cleanup(); std::string goodPrefix = "rec_error_"; std::string badPrefix = "noexist/rec_error_"; std::string taskJSON = R"({"A": {"command": ["/usr/bin/touch", ")" + goodPrefix + R"(A"], "children": ["C"]}, "B": {"command": ["/usr/bin/touch", ")" + badPrefix + R"(B"], "children": ["C"]}, "C": {"command": ["/usr/bin/touch", ")" + badPrefix + R"(C"]}})"; auto tasks = daggy::tasksFromJSON(taskJSON); auto dag = daggy::buildDAGFromTasks(tasks); auto runID = logger.startDAGRun("test_run", tasks); auto tryDAG = daggy::runDAG(runID, ex, logger, dag); REQUIRE(!tryDAG.allVisited()); // Create the missing dir, then continue to run the DAG fs::create_directory("noexist"); tryDAG.resetRunning(); auto endDAG = daggy::runDAG(runID, ex, logger, tryDAG); REQUIRE(endDAG.allVisited()); // Get the DAG Run Attempts auto record = logger.getDAGRun(runID); REQUIRE(record.taskAttempts["A"].size() == 1); // A ran fine REQUIRE(record.taskAttempts["B"].size() == 2); // B errored and had to be retried REQUIRE(record.taskAttempts["C"].size() == 1); // C wasn't run because B errored cleanup(); } SECTION("Generator tasks") { std::string testParams{R"({"DATE": ["2021-05-06", "2021-05-07" ]})"}; auto params = daggy::parametersFromJSON(testParams); std::string generatorOutput = R"({"B": {"command": ["/usr/bin/echo", "{{DATE}}"], "children": ["C"]}})"; std::stringstream jsonTasks; jsonTasks << R"({ "A": { "command": [ "/usr/bin/echo", )" << std::quoted(generatorOutput) << R"(], "children": ["C"], "isGenerator": true},)" << R"("C": { "command": [ "/usr/bin/echo", "hello!"] } })"; auto tasks = daggy::tasksFromJSON(jsonTasks.str()); auto dag = daggy::buildDAGFromTasks(tasks); REQUIRE(dag.size() == 2); auto runID = logger.startDAGRun("generator_run", tasks); auto finalDAG = daggy::runDAG(runID, ex, logger, dag, params); REQUIRE(finalDAG.size() == 4); // Check the logger auto record = logger.getDAGRun(runID); REQUIRE(record.tasks.size() == 4); REQUIRE(record.taskRunStates.size() == 4); for (const auto & [taskName, attempts] : record.taskAttempts) { REQUIRE(attempts.size() == 1); REQUIRE(attempts.back().rc == 0); } // Ensure that children were updated properly REQUIRE(record.tasks["A"].children == std::unordered_set{"B_0", "B_1", "C"}); REQUIRE(record.tasks["B_0"].children == std::unordered_set{"C"}); REQUIRE(record.tasks["B_1"].children == std::unordered_set{"C"}); REQUIRE(record.tasks["C"].children.empty()); } }