Adding a No-op task executor for testing

Fixing DFS implementation of DAG validation to be much faster
Adding in additional tests to ensure the run order of expanded tasks is preserved
Adding additional compile-time checks, resolving issues that came up as a result
This commit is contained in:
Ian Roddis
2021-09-20 19:05:56 -03:00
parent 2daaa83d82
commit 39d5ae08be
14 changed files with 187 additions and 113 deletions

View File

@@ -1,4 +1,5 @@
#include <iostream>
#include <chrono>
#include <filesystem>
#include <fstream>
@@ -12,6 +13,7 @@
#include "daggy/Utilities.hpp"
#include "daggy/Serialization.hpp"
#include "daggy/executors/task/ForkingTaskExecutor.hpp"
#include "daggy/executors/task/NoopTaskExecutor.hpp"
#include "daggy/loggers/dag_run/OStreamLogger.hpp"
namespace fs = std::filesystem;
@@ -56,6 +58,56 @@ TEST_CASE("string_expansion", "[utilities_parameter_expansion]") {
}
}
TEST_CASE("dag_runner_order", "[dagrun_order]") {
daggy::executors::task::NoopTaskExecutor ex;
std::stringstream ss;
daggy::loggers::dag_run::OStreamLogger logger(ss);
daggy::TimePoint startTime = daggy::Clock::now();
std::string testParams{R"({"DATE": ["2021-05-06", "2021-05-07", "2021-05-08", "2021-05-09" ]})"};
auto params = daggy::configFromJSON(testParams);
std::string taskJSON = R"({
"A": {"job": {"command": ["/usr/bin/touch", "{{DATE}}"]}, "children": [ "B","D" ]},
"B": {"job": {"command": ["/usr/bin/touch", "{{DATE}}"]}, "children": [ "C","D","E" ]},
"C": {"job": {"command": ["/usr/bin/touch", "{{DATE}}"]}, "children": [ "D"]},
"D": {"job": {"command": ["/usr/bin/touch", "{{DATE}}"]}, "children": [ "E"]},
"E": {"job": {"command": ["/usr/bin/touch", "{{DATE}}"]}}
})";
auto tasks = expandTaskSet(daggy::tasksFromJSON(taskJSON), ex, params);
REQUIRE(tasks.size() == 20);
auto dag = daggy::buildDAGFromTasks(tasks);
auto runID = logger.startDAGRun("test_run", tasks);
auto endDAG = daggy::runDAG(runID, ex, logger, dag);
REQUIRE(endDAG.allVisited());
// Ensure the run order
auto rec = logger.getDAGRun(runID);
daggy::TimePoint stopTime = daggy::Clock::now();
std::array<daggy::TimePoint, 5> minTimes; minTimes.fill(startTime);
std::array<daggy::TimePoint, 5> maxTimes; maxTimes.fill(stopTime);
for (const auto &[k, v] : rec.taskAttempts) {
size_t idx = k[0] - 65;
auto & startTime = minTimes[idx];
auto & stopTime = maxTimes[idx];
startTime = std::max(startTime, v.front().startTime);
stopTime = std::min(stopTime, v.back().stopTime);
}
for (size_t i = 0; i < 5; ++i) {
for (size_t j = i+1; j < 4; ++j) {
REQUIRE(maxTimes[i] < minTimes[j]);
}
}
}
TEST_CASE("dag_runner", "[utilities_dag_runner]") {
daggy::executors::task::ForkingTaskExecutor ex(10);
std::stringstream ss;
@@ -181,55 +233,3 @@ TEST_CASE("dag_runner", "[utilities_dag_runner]") {
REQUIRE(record.tasks["C_0"].children.empty());
}
}
TEST_CASE("dag_runner_stress", "[utilities_dag_runner_stress]") {
daggy::executors::task::ForkingTaskExecutor ex(10);
std::stringstream ss;
daggy::loggers::dag_run::OStreamLogger logger(ss);
SECTION("Stress-test") {
static std::random_device dev;
static std::mt19937 rng(dev());
std::uniform_int_distribution<size_t> nDepDist(0, 10);
const size_t N_NODES = 100;
daggy::TaskSet tasks;
std::vector<fs::path> fileNames;
std::vector<std::string> taskNames;
for (size_t i = 0; i < N_NODES; ++i) {
std::string taskName = std::to_string(i);
std::uniform_int_distribution<size_t> depDist(i+1, N_NODES-1);
std::unordered_set<std::string> deps;
size_t nChildren = nDepDist(rng);
for (size_t c = 0; c < nChildren; ++c) {
deps.insert(std::to_string(depDist(rng)));
}
tasks.emplace(taskName, daggy::Task{
.definedName = taskName,
.job = { { "command", std::vector<std::string>{"/usr/bin/echo", taskName}}},
.children = deps
});
}
auto dag = daggy::buildDAGFromTasks(tasks);
/**
auto runID = logger.startDAGRun("test_run", tasks);
auto tryDAG = daggy::runDAG(runID, ex, logger, dag);
REQUIRE(tryDAG.allVisited());
// Get the DAG Run Attempts
auto record = logger.getDAGRun(runID);
for (const auto & [k, attempts] : record.taskAttempts) {
REQUIRE(attempts.size() == 1);
}
*/
}
}