Massive re-org to allow per-executor job specification formats and executor-specific task validation and expansion.

A few different renames to try and keep things more consistent.
This commit is contained in:
Ian Roddis
2021-09-03 09:10:38 -03:00
parent e746f8c163
commit d15580f47f
22 changed files with 509 additions and 300 deletions

View File

@@ -12,13 +12,13 @@ namespace fs = std::filesystem;
using namespace daggy;
using namespace daggy::loggers::dag_run;
const TaskList SAMPLE_TASKS{
{"work_a", Task{.command{"/bin/echo", "a"}, .children{"c"}}},
{"work_b", Task{.command{"/bin/echo", "b"}, .children{"c"}}},
{"work_c", Task{.command{"/bin/echo", "c"}}}
const TaskSet SAMPLE_TASKS{
{"work_a", Task{.job{{"command", std::vector<std::string>{"/bin/echo", "a"}}}, .children{"c"}}},
{"work_b", Task{.job{{"command", std::vector<std::string>{"/bin/echo", "b"}}}, .children{"c"}}},
{"work_c", Task{.job{{"command", std::vector<std::string>{"/bin/echo", "c"}}}}}
};
inline DAGRunID testDAGRunInit(DAGRunLogger &logger, const std::string &name, const TaskList &tasks) {
inline DAGRunID testDAGRunInit(DAGRunLogger &logger, const std::string &name, const TaskSet &tasks) {
auto runID = logger.startDAGRun(name, tasks);
auto dagRun = logger.getDAGRun(runID);

View File

@@ -2,6 +2,8 @@
#include <filesystem>
#include "daggy/executors/task/ForkingTaskExecutor.hpp"
#include "daggy/Serialization.hpp"
#include "daggy/Utilities.hpp"
#include <catch2/catch.hpp>
@@ -9,9 +11,12 @@ TEST_CASE("Basic Execution", "[forking_executor]") {
daggy::executors::task::ForkingTaskExecutor ex(10);
SECTION("Simple Run") {
daggy::Task task{.command{"/usr/bin/echo", "abc", "123"}};
daggy::Task task{.job{
{"command", daggy::executors::task::ForkingTaskExecutor::Command{"/usr/bin/echo", "abc", "123"}}}};
auto rec = ex.runCommand(task);
REQUIRE(ex.validateTaskParameters(task.job));
auto rec = ex.execute(task);
REQUIRE(rec.rc == 0);
REQUIRE(rec.outputLog.size() >= 6);
@@ -19,9 +24,10 @@ TEST_CASE("Basic Execution", "[forking_executor]") {
}
SECTION("Error Run") {
daggy::Task task{.command{"/usr/bin/expr", "1", "+", "+"}};
daggy::Task task{.job{
{"command", daggy::executors::task::ForkingTaskExecutor::Command{"/usr/bin/expr", "1", "+", "+"}}}};
auto rec = ex.runCommand(task);
auto rec = ex.execute(task);
REQUIRE(rec.rc == 2);
REQUIRE(rec.errorLog.size() >= 20);
@@ -33,16 +39,45 @@ TEST_CASE("Basic Execution", "[forking_executor]") {
"/usr/share/dict/linux.words", "/usr/share/dict/cracklib-small", "/etc/ssh/moduli"
};
for (const auto &bigFile : BIG_FILES) {
for (const auto &bigFile: BIG_FILES) {
if (!std::filesystem::exists(bigFile)) continue;
daggy::Task task{.command{"/usr/bin/cat", bigFile}};
daggy::Task task{.job{
{"command", daggy::executors::task::ForkingTaskExecutor::Command{"/usr/bin/cat", bigFile}}}};
auto rec = ex.runCommand(task);
auto rec = ex.execute(task);
REQUIRE(rec.rc == 0);
REQUIRE(rec.outputLog.size() == std::filesystem::file_size(bigFile));
REQUIRE(rec.errorLog.empty());
}
}
}
SECTION("Parameter Expansion") {
std::string testParams{R"({"DATE": ["2021-05-06", "2021-05-07" ]})"};
auto params = daggy::configFromJSON(testParams);
std::string taskJSON = R"({"B": {"job": {"command": ["/usr/bin/echo", "{{DATE}}"]}, "children": ["C"]}})";
auto tasks = daggy::tasksFromJSON(taskJSON);
auto result = daggy::expandTaskSet(tasks, ex, params);
REQUIRE(result.size() == 2);
}
SECTION("Build with expansion") {
std::string testParams{R"({"DATE": ["2021-05-06", "2021-05-07" ], "SOURCE": "name"})"};
auto params = daggy::configFromJSON(testParams);
std::string testTasks = R"({"A": {"job": {"command": ["/bin/echo", "A"]}, "children": ["B"]}, "B": {"job": {"command": ["/bin/echo", "B", "{{SOURCE}}", "{{DATE}}"]}, "children": ["C"]}, "C": {"job": {"command": ["/bin/echo", "C"]}}})";
auto tasks = daggy::expandTaskSet(daggy::tasksFromJSON(testTasks), ex, params);
REQUIRE(tasks.size() == 4);
}
SECTION("Build with expansion using parents instead of children") {
std::string testParams{R"({"DATE": ["2021-05-06", "2021-05-07" ], "SOURCE": "name"})"};
auto params = daggy::configFromJSON(testParams);
std::string testTasks = R"({"A": {"job": {"command": ["/bin/echo", "A"]}}, "B": {"job": {"command": ["/bin/echo", "B", "{{SOURCE}}", "{{DATE}}"]}, "parents": ["A"]}, "C": {"job": {"command": ["/bin/echo", "C"]}, "parents": ["A"]}})";
auto tasks = daggy::expandTaskSet(daggy::tasksFromJSON(testTasks), ex, params);
REQUIRE(tasks.size() == 4);
}
}

View File

@@ -11,49 +11,68 @@ namespace fs = std::filesystem;
TEST_CASE("Deserialize Parameters", "[deserialize_parameters]") {
SECTION("Basic Parse") {
std::string testParams{R"({"DATE": ["2021-05-06", "2021-05-07" ], "SOURCE": "name"})"};
auto params = daggy::parametersFromJSON(testParams);
auto params = daggy::configFromJSON(testParams);
REQUIRE(params.size() == 2);
REQUIRE(std::holds_alternative<std::vector<std::string>>(params["DATE"]));
REQUIRE(std::holds_alternative<std::string>(params["SOURCE"]));
}SECTION("Invalid JSON") {
std::string testParams{R"({"DATE": ["2021-05-06", "2021-05-07" ], "SOURCE": "name")"};
REQUIRE_THROWS(daggy::parametersFromJSON(testParams));
REQUIRE_THROWS(daggy::configFromJSON(testParams));
}SECTION("Non-string Keys") {
std::string testParams{R"({"DATE": ["2021-05-06", "2021-05-07" ], 6: "name"})"};
REQUIRE_THROWS(daggy::parametersFromJSON(testParams));
REQUIRE_THROWS(daggy::configFromJSON(testParams));
}SECTION("Non-array/Non-string values") {
std::string testParams{R"({"DATE": ["2021-05-06", "2021-05-07" ], "SOURCE": {"name": "kevin"}})"};
REQUIRE_THROWS(daggy::parametersFromJSON(testParams));
REQUIRE_THROWS(daggy::configFromJSON(testParams));
}
}
TEST_CASE("Task Deserialization", "[deserialize_task]") {
SECTION("Build with no expansion") {
std::string testTasks = R"({ "A": {"command": ["/bin/echo", "A"], "children": ["C"]}, "B": {"command": ["/bin/echo", "B"], "children": ["C"]}, "C": {"command": ["/bin/echo", "C"]}})";
std::string testTasks = R"({
"A": {
"job": { "command": ["/bin/echo", "A"] },
"children": ["C"]
},
"B": {
"job": {"command": ["/bin/echo", "B"]},
"children": ["C"]
},
"C": {
"job": {"command": ["/bin/echo", "C"]}
}
})";
auto tasks = daggy::tasksFromJSON(testTasks);
REQUIRE(tasks.size() == 3);
}
SECTION("Build with expansion") {
std::string testParams{R"({"DATE": ["2021-05-06", "2021-05-07" ], "SOURCE": "name"})"};
auto params = daggy::parametersFromJSON(testParams);
std::string testTasks = R"({"A": {"command": ["/bin/echo", "A"], "children": ["B"]}, "B": {"command": ["/bin/echo", "B", "{{SOURCE}}", "{{DATE}}"], "children": ["C"]}, "C": {"command": ["/bin/echo", "C"]}})";
auto tasks = daggy::tasksFromJSON(testTasks, params);
REQUIRE(tasks.size() == 4);
}
SECTION("Build with expansion using parents instead of children") {
std::string testParams{R"({"DATE": ["2021-05-06", "2021-05-07" ], "SOURCE": "name"})"};
auto params = daggy::parametersFromJSON(testParams);
std::string testTasks = R"({"A": {"command": ["/bin/echo", "A"]}, "B": {"command": ["/bin/echo", "B", "{{SOURCE}}", "{{DATE}}"], "parents": ["A"]}, "C": {"command": ["/bin/echo", "C"], "parents": ["A"]}})";
auto tasks = daggy::tasksFromJSON(testTasks, params);
REQUIRE(tasks.size() == 4);
SECTION("Build with job defaults") {
std::string testTasks = R"({
"A": {
"job": { "command": ["/bin/echo", "A"] },
"children": ["B"]
},
"B": {
"job": {
"command": ["/bin/echo", "C"],
"memory": "1G"
}
}
})";
daggy::ConfigValues jobDefaults{{"runtime", "60"},
{"memory", "300M"}};
auto tasks = daggy::tasksFromJSON(testTasks, jobDefaults);
REQUIRE(tasks.size() == 2);
REQUIRE(std::get<std::string>(tasks["A"].job["runtime"]) == "60");
REQUIRE(std::get<std::string>(tasks["A"].job["memory"]) == "300M");
REQUIRE(std::get<std::string>(tasks["B"].job["runtime"]) == "60");
REQUIRE(std::get<std::string>(tasks["B"].job["memory"]) == "1G");
}
}
TEST_CASE("Task Serialization", "[serialize_tasks]") {
SECTION("Build with no expansion") {
std::string testTasks = R"({"A": {"command": ["/bin/echo", "A"], "children": ["C"]}, "B": {"command": ["/bin/echo", "B"], "children": ["C"]}, "C": {"command": ["/bin/echo", "C"]}})";
std::string testTasks = R"({"A": {"job": {"command": ["/bin/echo", "A"]}, "children": ["C"]}, "B": {"job": {"command": ["/bin/echo", "B"]}, "children": ["C"]}, "C": {"job": {"command": ["/bin/echo", "C"]}}})";
auto tasks = daggy::tasksFromJSON(testTasks);
auto genJSON = daggy::tasksToJSON(tasks);

View File

@@ -6,9 +6,10 @@
#include <pistache/client.h>
#include <rapidjson/document.h>
#include "daggy/Server.hpp"
#include "daggy/executors/task/ForkingTaskExecutor.hpp"
#include "daggy/loggers/dag_run/OStreamLogger.hpp"
#include <daggy/Server.hpp>
#include <daggy/Serialization.hpp>
#include <daggy/executors/task/ForkingTaskExecutor.hpp>
#include <daggy/loggers/dag_run/OStreamLogger.hpp>
namespace rj = rapidjson;
@@ -73,10 +74,10 @@ TEST_CASE("Server Basic Endpoints", "[server_basic]") {
SECTION("Simple DAGRun Submission") {
std::string dagRun = R"({
"name": "unit_server",
"taskParameters": { "FILE": [ "A", "B" ] },
"parameters": { "FILE": [ "A", "B" ] },
"tasks": {
"touch": { "command": [ "/usr/bin/touch", "dagrun_{{FILE}}" ] },
"cat": { "command": [ "/usr/bin/cat", "dagrun_A", "dagrun_B" ],
"touch": { "job": { "command": [ "/usr/bin/touch", "dagrun_{{FILE}}" ]} },
"cat": { "job": { "command": [ "/usr/bin/cat", "dagrun_A", "dagrun_B" ]},
"parents": [ "touch" ]
}
}
@@ -90,8 +91,7 @@ TEST_CASE("Server Basic Endpoints", "[server_basic]") {
REQUIRE(response.code() == Pistache::Http::Code::Ok);
rj::Document doc;
rj::ParseResult parseResult = doc.Parse(response.body().c_str());
REQUIRE(parseResult);
daggy::checkRJParse(doc.Parse(response.body().c_str()));
REQUIRE(doc.IsObject());
REQUIRE(doc.HasMember("runID"));
@@ -104,8 +104,7 @@ TEST_CASE("Server Basic Endpoints", "[server_basic]") {
REQUIRE(response.code() == Pistache::Http::Code::Ok);
rj::Document doc;
rj::ParseResult parseResult = doc.Parse(response.body().c_str());
REQUIRE(parseResult);
daggy::checkRJParse(doc.Parse(response.body().c_str()));
REQUIRE(doc.IsArray());
REQUIRE(doc.Size() >= 1);
@@ -134,8 +133,7 @@ TEST_CASE("Server Basic Endpoints", "[server_basic]") {
auto response = REQUEST(baseURL + "/v1/dagrun/" + std::to_string(runID));
REQUIRE(response.code() == Pistache::Http::Code::Ok);
rj::Document doc;
rj::ParseResult parseResult = doc.Parse(response.body().c_str());
REQUIRE(parseResult);
daggy::checkRJParse(doc.Parse(response.body().c_str()));
REQUIRE(doc.IsObject());
REQUIRE(doc.HasMember("taskStates"));

View File

@@ -24,18 +24,18 @@ TEST_CASE("String Utilities", "[utilities_string]") {
TEST_CASE("Parameter Expansion", "[utilities_parameter_expansion]") {
SECTION("Basic expansion") {
std::string testParams{R"({"DATE": ["2021-05-06", "2021-05-07" ], "SOURCE": "name", "TYPE": ["a", "b", "c"]})"};
auto params = daggy::parametersFromJSON(testParams);
auto params = daggy::configFromJSON(testParams);
std::vector<std::string> cmd{"/usr/bin/echo", "{{DATE}}", "{{SOURCE}}", "{{TYPE}}"};
auto allCommands = daggy::expandCommands(cmd, params);
auto allCommands = daggy::interpolateValues(cmd, params);
REQUIRE(allCommands.size() == 6);
}
SECTION("Skip over unused parameters") {
std::string testParams{R"({"DATE": ["2021-05-06", "2021-05-07" ], "SOURCE": "name", "TYPE": ["a", "b", "c"]})"};
auto params = daggy::parametersFromJSON(testParams);
auto params = daggy::configFromJSON(testParams);
std::vector<std::string> cmd{"/usr/bin/echo", "{{DATE}}", "{{SOURCE}}"};
auto allCommands = daggy::expandCommands(cmd, params);
auto allCommands = daggy::interpolateValues(cmd, params);
// TYPE isn't used, so it's just |DATE| * |SOURCE|
REQUIRE(allCommands.size() == 2);
@@ -44,9 +44,9 @@ TEST_CASE("Parameter Expansion", "[utilities_parameter_expansion]") {
SECTION("Expand within a command part") {
std::string testParams{
R"({"DATE": ["2021-05-06", "2021-05-07" ], "SOURCE": ["A", "B"], "TYPE": ["a", "b", "c"]})"};
auto params = daggy::parametersFromJSON(testParams);
auto params = daggy::configFromJSON(testParams);
std::vector<std::string> cmd{"/usr/bin/touch", "{{DATE}}_{{SOURCE}}"};
auto result = daggy::expandCommands(cmd, params);
auto result = daggy::interpolateValues(cmd, params);
// TYPE isn't used, so it's just |DATE| * |SOURCE|
REQUIRE(result.size() == 4);
@@ -62,11 +62,11 @@ TEST_CASE("DAG Runner", "[utilities_dag_runner]") {
SECTION("Simple execution") {
std::string prefix = "asdlk_";
std::string taskJSON = R"({"A": {"command": ["/usr/bin/touch", ")"
+ prefix + R"(A"], "children": ["C"]}, "B": {"command": ["/usr/bin/touch", ")"
+ prefix + R"(B"], "children": ["C"]}, "C": {"command": ["/usr/bin/touch", ")"
+ prefix + R"(C"]}})";
auto tasks = daggy::tasksFromJSON(taskJSON);
std::string taskJSON = R"({"A": {"job": {"command": ["/usr/bin/touch", ")"
+ prefix + R"(A"]}, "children": ["C"]}, "B": {"job": {"command": ["/usr/bin/touch", ")"
+ prefix + R"(B"]}, "children": ["C"]}, "C": {"job": {"command": ["/usr/bin/touch", ")"
+ prefix + R"(C"]}}})";
auto tasks = expandTaskSet(daggy::tasksFromJSON(taskJSON), ex);
auto dag = daggy::buildDAGFromTasks(tasks);
auto runID = logger.startDAGRun("test_run", tasks);
@@ -102,12 +102,13 @@ TEST_CASE("DAG Runner", "[utilities_dag_runner]") {
std::string goodPrefix = "rec_error_";
std::string badPrefix = "noexist/rec_error_";
std::string taskJSON = R"({"A": {"command": ["/usr/bin/touch", ")"
std::string taskJSON = R"({"A": {"job": {"command": ["/usr/bin/touch", ")"
+ goodPrefix +
R"(A"], "children": ["C"]}, "B": {"command": ["/usr/bin/touch", ")"
+ badPrefix + R"(B"], "children": ["C"]}, "C": {"command": ["/usr/bin/touch", ")"
+ badPrefix + R"(C"]}})";
auto tasks = daggy::tasksFromJSON(taskJSON);
R"(A"]}, "children": ["C"]}, "B": {"job": {"command": ["/usr/bin/touch", ")"
+ badPrefix +
R"(B"]}, "children": ["C"]}, "C": {"job": {"command": ["/usr/bin/touch", ")"
+ badPrefix + R"(C"]}}})";
auto tasks = expandTaskSet(daggy::tasksFromJSON(taskJSON), ex);
auto dag = daggy::buildDAGFromTasks(tasks);
auto runID = logger.startDAGRun("test_run", tasks);
@@ -134,22 +135,31 @@ TEST_CASE("DAG Runner", "[utilities_dag_runner]") {
SECTION("Generator tasks") {
std::string testParams{R"({"DATE": ["2021-05-06", "2021-05-07" ]})"};
auto params = daggy::parametersFromJSON(testParams);
auto params = daggy::configFromJSON(testParams);
std::string generatorOutput = R"({"B": {"job": {"command": ["/usr/bin/echo", "-e", "{{DATE}}"]}, "children": ["C"]}})";
fs::path ofn = fs::current_path() / "generator_test_output.json";
std::ofstream ofh{ofn};
ofh << generatorOutput << std::endl;
ofh.close();
std::string generatorOutput = R"({"B": {"command": ["/usr/bin/echo", "{{DATE}}"], "children": ["C"]}})";
std::stringstream jsonTasks;
jsonTasks << R"({ "A": { "command": [ "/usr/bin/echo", )" << std::quoted(generatorOutput)
<< R"(], "children": ["C"], "isGenerator": true},)"
<< R"("C": { "command": [ "/usr/bin/echo", "hello!"] } })";
jsonTasks << R"({ "A": { "job": {"command": [ "/usr/bin/cat", )" << std::quoted(ofn.string())
<< R"(]}, "children": ["C"], "isGenerator": true},)"
<< R"("C": { "job": {"command": [ "/usr/bin/echo", "hello!"]} } })";
auto tasks = daggy::tasksFromJSON(jsonTasks.str());
auto baseTasks = daggy::tasksFromJSON(jsonTasks.str());
REQUIRE(baseTasks.size() == 2);
auto tasks = daggy::expandTaskSet(baseTasks, ex, params);
REQUIRE(tasks.size() == 2);
auto dag = daggy::buildDAGFromTasks(tasks);
REQUIRE(dag.size() == 2);
auto runID = logger.startDAGRun("generator_run", tasks);
auto finalDAG = daggy::runDAG(runID, ex, logger, dag, params);
REQUIRE(finalDAG.allVisited());
REQUIRE(finalDAG.size() == 4);
// Check the logger
@@ -157,16 +167,15 @@ TEST_CASE("DAG Runner", "[utilities_dag_runner]") {
REQUIRE(record.tasks.size() == 4);
REQUIRE(record.taskRunStates.size() == 4);
for (const auto & [taskName, attempts] : record.taskAttempts) {
for (const auto &[taskName, attempts]: record.taskAttempts) {
REQUIRE(attempts.size() == 1);
REQUIRE(attempts.back().rc == 0);
}
// Ensure that children were updated properly
REQUIRE(record.tasks["A"].children == std::unordered_set<std::string>{"B_0", "B_1", "C"});
REQUIRE(record.tasks["A"].children == std::unordered_set<std::string>{"B_0", "B_1", "C"});
REQUIRE(record.tasks["B_0"].children == std::unordered_set<std::string>{"C"});
REQUIRE(record.tasks["B_1"].children == std::unordered_set<std::string>{"C"});
REQUIRE(record.tasks["C"].children.empty());
}
}