Adding clang-format, and reformating all sourcecode

This commit is contained in:
Ian Roddis
2021-09-21 09:41:11 -03:00
parent 39d5ae08be
commit 288ce28d29
36 changed files with 3355 additions and 2802 deletions

View File

@@ -1,9 +1,9 @@
#include <catch2/catch.hpp>
#include <iostream>
#include "daggy/DAG.hpp"
#include <catch2/catch.hpp>
TEST_CASE("General tests", "[general]") {
REQUIRE(1 == 1);
TEST_CASE("General tests", "[general]")
{
REQUIRE(1 == 1);
}

View File

@@ -6,8 +6,9 @@
#include <catch2/catch.hpp>
TEST_CASE("Sanity tests", "[sanity]") {
REQUIRE(1 == 1);
TEST_CASE("Sanity tests", "[sanity]")
{
REQUIRE(1 == 1);
}
// compile and run

View File

@@ -1,90 +1,87 @@
#include <catch2/catch.hpp>
#include <iostream>
#include "daggy/DAG.hpp"
#include <catch2/catch.hpp>
TEST_CASE("dag_construction", "[dag]")
{
daggy::DAG<size_t, size_t> dag;
TEST_CASE("dag_construction", "[dag]") {
daggy::DAG<size_t, size_t> dag;
REQUIRE(dag.size() == 0);
REQUIRE(dag.empty());
REQUIRE(dag.size() == 0);
REQUIRE(dag.empty());
REQUIRE_NOTHROW(dag.addVertex(0, 0));
for (size_t i = 1; i < 10; ++i) {
dag.addVertex(i, i);
REQUIRE(dag.hasVertex(i));
REQUIRE(dag.getVertex(i).data == i);
dag.addEdge(i - 1, i);
}
REQUIRE_NOTHROW(dag.addVertex(0, 0));
for (size_t i = 1; i < 10; ++i) {
dag.addVertex(i, i);
REQUIRE(dag.hasVertex(i));
REQUIRE(dag.getVertex(i).data == i);
dag.addEdge(i - 1, i);
}
REQUIRE(dag.size() == 10);
REQUIRE(!dag.empty());
REQUIRE(dag.size() == 10);
REQUIRE(!dag.empty());
// Cannot add an edge that would result in a cycle
dag.addEdge(9, 5);
REQUIRE(!dag.isValid());
// Cannot add an edge that would result in a cycle
dag.addEdge(9, 5);
REQUIRE(!dag.isValid());
// Bounds checking
SECTION("addEdge Bounds Checking") {
REQUIRE_THROWS(dag.addEdge(20, 0));
REQUIRE_THROWS(dag.addEdge(0, 20));
}
// Bounds checking
SECTION("addEdge Bounds Checking")
{
REQUIRE_THROWS(dag.addEdge(20, 0));
REQUIRE_THROWS(dag.addEdge(0, 20));
}
}
TEST_CASE("dag_traversal", "[dag]") {
daggy::DAG<size_t, size_t> dag;
TEST_CASE("dag_traversal", "[dag]")
{
daggy::DAG<size_t, size_t> dag;
const int N_VERTICES = 10;
const int N_VERTICES = 10;
for (int i = 0; i < N_VERTICES; ++i) { dag.addVertex(i, i); }
for (int i = 0; i < N_VERTICES; ++i) {
dag.addVertex(i, i);
}
/*
0 ---------------------\
1 ---------- \ \ /-----> 8
2 ---- 3 ---- > 5 -------> 6 -----> 7
4 -------------------------------/ \-----> 9
*/
/*
0 ---------------------\
1 ---------- \ \ /-----> 8
2 ---- 3 ---- > 5 -------> 6 -----> 7
4 -------------------------------/ \-----> 9
*/
std::vector<std::pair<int, int>> edges{
{0, 6},
{1, 5},
{5, 6},
{6, 7},
{2, 3},
{3, 5},
{4, 7},
{7, 8},
{7, 9}
};
std::vector<std::pair<int, int>> edges{{0, 6}, {1, 5}, {5, 6}, {6, 7}, {2, 3},
{3, 5}, {4, 7}, {7, 8}, {7, 9}};
for (auto const[from, to]: edges) {
dag.addEdge(from, to);
for (auto const [from, to] : edges) {
dag.addEdge(from, to);
}
SECTION("Basic Traversal")
{
dag.reset();
std::vector<int> visitOrder(N_VERTICES);
size_t i = 0;
while (!dag.allVisited()) {
const auto v = dag.visitNext().value();
dag.completeVisit(v.first);
visitOrder[v.first] = i;
++i;
}
SECTION("Basic Traversal") {
dag.reset();
std::vector<int> visitOrder(N_VERTICES);
size_t i = 0;
while (!dag.allVisited()) {
const auto v = dag.visitNext().value();
dag.completeVisit(v.first);
visitOrder[v.first] = i;
++i;
}
// Ensure visit order is preserved
for (auto const[from, to]: edges) {
REQUIRE(visitOrder[from] <= visitOrder[to]);
}
// Ensure visit order is preserved
for (auto const [from, to] : edges) {
REQUIRE(visitOrder[from] <= visitOrder[to]);
}
}
SECTION("Iteration") {
size_t nVisited = 0;
dag.forEach([&](auto &k) {
(void) k;
++nVisited;
});
REQUIRE(nVisited == dag.size());
}
SECTION("Iteration")
{
size_t nVisited = 0;
dag.forEach([&](auto &k) {
(void)k;
++nVisited;
});
REQUIRE(nVisited == dag.size());
}
}

View File

@@ -1,8 +1,7 @@
#include <iostream>
#include <catch2/catch.hpp>
#include <filesystem>
#include <fstream>
#include <catch2/catch.hpp>
#include <iostream>
#include "daggy/loggers/dag_run/FileSystemLogger.hpp"
#include "daggy/loggers/dag_run/OStreamLogger.hpp"
@@ -13,25 +12,32 @@ using namespace daggy;
using namespace daggy::loggers::dag_run;
const TaskSet SAMPLE_TASKS{
{"work_a", Task{.job{{"command", std::vector<std::string>{"/bin/echo", "a"}}}, .children{"c"}}},
{"work_b", Task{.job{{"command", std::vector<std::string>{"/bin/echo", "b"}}}, .children{"c"}}},
{"work_c", Task{.job{{"command", std::vector<std::string>{"/bin/echo", "c"}}}}}
};
{"work_a",
Task{.job{{"command", std::vector<std::string>{"/bin/echo", "a"}}},
.children{"c"}}},
{"work_b",
Task{.job{{"command", std::vector<std::string>{"/bin/echo", "b"}}},
.children{"c"}}},
{"work_c",
Task{.job{{"command", std::vector<std::string>{"/bin/echo", "c"}}}}}};
inline DAGRunID testDAGRunInit(DAGRunLogger &logger, const std::string &name, const TaskSet &tasks) {
auto runID = logger.startDAGRun(name, tasks);
auto dagRun = logger.getDAGRun(runID);
inline DAGRunID testDAGRunInit(DAGRunLogger &logger, const std::string &name,
const TaskSet &tasks)
{
auto runID = logger.startDAGRun(name, tasks);
auto dagRun = logger.getDAGRun(runID);
REQUIRE(dagRun.tasks == tasks);
REQUIRE(dagRun.tasks == tasks);
REQUIRE(dagRun.taskRunStates.size() == tasks.size());
auto nonQueuedTask = std::find_if(dagRun.taskRunStates.begin(), dagRun.taskRunStates.end(),
[](const auto &a) { return a.second != +RunState::QUEUED; });
REQUIRE(nonQueuedTask == dagRun.taskRunStates.end());
REQUIRE(dagRun.taskRunStates.size() == tasks.size());
auto nonQueuedTask =
std::find_if(dagRun.taskRunStates.begin(), dagRun.taskRunStates.end(),
[](const auto &a) { return a.second != +RunState::QUEUED; });
REQUIRE(nonQueuedTask == dagRun.taskRunStates.end());
REQUIRE(dagRun.dagStateChanges.size() == 1);
REQUIRE(dagRun.dagStateChanges.back().newState == +RunState::QUEUED);
return runID;
REQUIRE(dagRun.dagStateChanges.size() == 1);
REQUIRE(dagRun.dagStateChanges.back().newState == +RunState::QUEUED);
return runID;
}
/*
@@ -54,14 +60,16 @@ TEST_CASE("Filesystem Logger", "[filesystem_logger]") {
}
*/
TEST_CASE("ostream_logger", "[ostream_logger]") {
//cleanup();
std::stringstream ss;
daggy::loggers::dag_run::OStreamLogger logger(ss);
TEST_CASE("ostream_logger", "[ostream_logger]")
{
// cleanup();
std::stringstream ss;
daggy::loggers::dag_run::OStreamLogger logger(ss);
SECTION("DAGRun Starts") {
testDAGRunInit(logger, "init_test", SAMPLE_TASKS);
}
SECTION("DAGRun Starts")
{
testDAGRunInit(logger, "init_test", SAMPLE_TASKS);
}
// cleanup();
// cleanup();
}

View File

@@ -1,86 +1,103 @@
#include <iostream>
#include <catch2/catch.hpp>
#include <filesystem>
#include <iostream>
#include "daggy/executors/task/ForkingTaskExecutor.hpp"
#include "daggy/Serialization.hpp"
#include "daggy/Utilities.hpp"
#include "daggy/executors/task/ForkingTaskExecutor.hpp"
#include <catch2/catch.hpp>
TEST_CASE("forking_executor", "[forking_executor]")
{
daggy::executors::task::ForkingTaskExecutor ex(10);
TEST_CASE("forking_executor", "[forking_executor]") {
daggy::executors::task::ForkingTaskExecutor ex(10);
SECTION("Simple Run")
{
daggy::Task task{
.job{{"command", daggy::executors::task::ForkingTaskExecutor::Command{
"/usr/bin/echo", "abc", "123"}}}};
SECTION("Simple Run") {
daggy::Task task{.job{
{"command", daggy::executors::task::ForkingTaskExecutor::Command{"/usr/bin/echo", "abc", "123"}}}};
REQUIRE(ex.validateTaskParameters(task.job));
REQUIRE(ex.validateTaskParameters(task.job));
auto recFuture = ex.execute("command", task);
auto rec = recFuture.get();
auto recFuture = ex.execute("command", task);
auto rec = recFuture.get();
REQUIRE(rec.rc == 0);
REQUIRE(rec.outputLog.size() >= 6);
REQUIRE(rec.errorLog.empty());
}
REQUIRE(rec.rc == 0);
REQUIRE(rec.outputLog.size() >= 6);
REQUIRE(rec.errorLog.empty());
SECTION("Error Run")
{
daggy::Task task{
.job{{"command", daggy::executors::task::ForkingTaskExecutor::Command{
"/usr/bin/expr", "1", "+", "+"}}}};
auto recFuture = ex.execute("command", task);
auto rec = recFuture.get();
REQUIRE(rec.rc == 2);
REQUIRE(rec.errorLog.size() >= 20);
REQUIRE(rec.outputLog.empty());
}
SECTION("Large Output")
{
const std::vector<std::string> BIG_FILES{"/usr/share/dict/linux.words",
"/usr/share/dict/cracklib-small",
"/etc/ssh/moduli"};
for (const auto &bigFile : BIG_FILES) {
if (!std::filesystem::exists(bigFile))
continue;
daggy::Task task{
.job{{"command", daggy::executors::task::ForkingTaskExecutor::Command{
"/usr/bin/cat", bigFile}}}};
auto recFuture = ex.execute("command", task);
auto rec = recFuture.get();
REQUIRE(rec.rc == 0);
REQUIRE(rec.outputLog.size() == std::filesystem::file_size(bigFile));
REQUIRE(rec.errorLog.empty());
}
}
SECTION("Error Run") {
daggy::Task task{.job{
{"command", daggy::executors::task::ForkingTaskExecutor::Command{"/usr/bin/expr", "1", "+", "+"}}}};
SECTION("Parameter Expansion")
{
std::string testParams{R"({"DATE": ["2021-05-06", "2021-05-07" ]})"};
auto params = daggy::configFromJSON(testParams);
auto recFuture = ex.execute("command", task);
auto rec = recFuture.get();
std::string taskJSON =
R"({"B": {"job": {"command": ["/usr/bin/echo", "{{DATE}}"]}, "children": ["C"]}})";
auto tasks = daggy::tasksFromJSON(taskJSON);
REQUIRE(rec.rc == 2);
REQUIRE(rec.errorLog.size() >= 20);
REQUIRE(rec.outputLog.empty());
}
auto result = daggy::expandTaskSet(tasks, ex, params);
REQUIRE(result.size() == 2);
}
SECTION("Large Output") {
const std::vector<std::string> BIG_FILES{
"/usr/share/dict/linux.words", "/usr/share/dict/cracklib-small", "/etc/ssh/moduli"
};
SECTION("Build with expansion")
{
std::string testParams{
R"({"DATE": ["2021-05-06", "2021-05-07" ], "SOURCE": "name"})"};
auto params = daggy::configFromJSON(testParams);
std::string testTasks =
R"({"A": {"job": {"command": ["/bin/echo", "A"]}, "children": ["B"]}, "B": {"job": {"command": ["/bin/echo", "B", "{{SOURCE}}", "{{DATE}}"]}, "children": ["C"]}, "C": {"job": {"command": ["/bin/echo", "C"]}}})";
auto tasks =
daggy::expandTaskSet(daggy::tasksFromJSON(testTasks), ex, params);
REQUIRE(tasks.size() == 4);
}
for (const auto &bigFile: BIG_FILES) {
if (!std::filesystem::exists(bigFile)) continue;
SECTION("Build with expansion using parents instead of children")
{
std::string testParams{
R"({"DATE": ["2021-05-06", "2021-05-07" ], "SOURCE": "name"})"};
auto params = daggy::configFromJSON(testParams);
std::string testTasks =
R"({"A": {"job": {"command": ["/bin/echo", "A"]}}, "B": {"job": {"command": ["/bin/echo", "B", "{{SOURCE}}", "{{DATE}}"]}, "parents": ["A"]}, "C": {"job": {"command": ["/bin/echo", "C"]}, "parents": ["A"]}})";
auto tasks =
daggy::expandTaskSet(daggy::tasksFromJSON(testTasks), ex, params);
daggy::Task task{.job{
{"command", daggy::executors::task::ForkingTaskExecutor::Command{"/usr/bin/cat", bigFile}}}};
auto recFuture = ex.execute("command", task);
auto rec = recFuture.get();
REQUIRE(rec.rc == 0);
REQUIRE(rec.outputLog.size() == std::filesystem::file_size(bigFile));
REQUIRE(rec.errorLog.empty());
}
}
SECTION("Parameter Expansion") {
std::string testParams{R"({"DATE": ["2021-05-06", "2021-05-07" ]})"};
auto params = daggy::configFromJSON(testParams);
std::string taskJSON = R"({"B": {"job": {"command": ["/usr/bin/echo", "{{DATE}}"]}, "children": ["C"]}})";
auto tasks = daggy::tasksFromJSON(taskJSON);
auto result = daggy::expandTaskSet(tasks, ex, params);
REQUIRE(result.size() == 2);
}
SECTION("Build with expansion") {
std::string testParams{R"({"DATE": ["2021-05-06", "2021-05-07" ], "SOURCE": "name"})"};
auto params = daggy::configFromJSON(testParams);
std::string testTasks = R"({"A": {"job": {"command": ["/bin/echo", "A"]}, "children": ["B"]}, "B": {"job": {"command": ["/bin/echo", "B", "{{SOURCE}}", "{{DATE}}"]}, "children": ["C"]}, "C": {"job": {"command": ["/bin/echo", "C"]}}})";
auto tasks = daggy::expandTaskSet(daggy::tasksFromJSON(testTasks), ex, params);
REQUIRE(tasks.size() == 4);
}
SECTION("Build with expansion using parents instead of children") {
std::string testParams{R"({"DATE": ["2021-05-06", "2021-05-07" ], "SOURCE": "name"})"};
auto params = daggy::configFromJSON(testParams);
std::string testTasks = R"({"A": {"job": {"command": ["/bin/echo", "A"]}}, "B": {"job": {"command": ["/bin/echo", "B", "{{SOURCE}}", "{{DATE}}"]}, "parents": ["A"]}, "C": {"job": {"command": ["/bin/echo", "C"]}, "parents": ["A"]}})";
auto tasks = daggy::expandTaskSet(daggy::tasksFromJSON(testTasks), ex, params);
REQUIRE(tasks.size() == 4);
}
REQUIRE(tasks.size() == 4);
}
}

View File

@@ -1,111 +1,124 @@
#include <iostream>
#include <filesystem>
#include <unistd.h>
#include <sys/types.h>
#include "daggy/executors/task/SlurmTaskExecutor.hpp"
#include "daggy/Serialization.hpp"
#include "daggy/Utilities.hpp"
#include <unistd.h>
#include <catch2/catch.hpp>
#include <filesystem>
#include <iostream>
#include "daggy/Serialization.hpp"
#include "daggy/Utilities.hpp"
#include "daggy/executors/task/SlurmTaskExecutor.hpp"
namespace fs = std::filesystem;
#ifdef DAGGY_ENABLE_SLURM
TEST_CASE("slurm_execution", "[slurm_executor]") {
daggy::executors::task::SlurmTaskExecutor ex;
TEST_CASE("slurm_execution", "[slurm_executor]")
{
daggy::executors::task::SlurmTaskExecutor ex;
daggy::ConfigValues defaultJobValues{
{"minCPUs", "1"},
{"minMemoryMB", "100"},
{"minTmpDiskMB", "0"},
{"priority", "1"},
{"timeLimitSeconds", "200"},
{"userID", std::to_string(getuid())},
{"workDir", fs::current_path().string()},
{"tmpDir", fs::current_path().string()}
};
daggy::ConfigValues defaultJobValues{{"minCPUs", "1"},
{"minMemoryMB", "100"},
{"minTmpDiskMB", "0"},
{"priority", "1"},
{"timeLimitSeconds", "200"},
{"userID", std::to_string(getuid())},
{"workDir", fs::current_path().string()},
{"tmpDir", fs::current_path().string()}};
SECTION("Simple Run") {
daggy::Task task{.job{
{"command", std::vector<std::string>{"/usr/bin/echo", "abc", "123"}}
}};
SECTION("Simple Run")
{
daggy::Task task{.job{
{"command", std::vector<std::string>{"/usr/bin/echo", "abc", "123"}}}};
task.job.merge(defaultJobValues);
task.job.merge(defaultJobValues);
REQUIRE(ex.validateTaskParameters(task.job));
REQUIRE(ex.validateTaskParameters(task.job));
auto recFuture = ex.execute("command", task);
auto rec = recFuture.get();
auto recFuture = ex.execute("command", task);
auto rec = recFuture.get();
REQUIRE(rec.rc == 0);
REQUIRE(rec.outputLog.size() >= 6);
REQUIRE(rec.errorLog.empty());
REQUIRE(rec.rc == 0);
REQUIRE(rec.outputLog.size() >= 6);
REQUIRE(rec.errorLog.empty());
}
SECTION("Error Run")
{
daggy::Task task{
.job{{"command", daggy::executors::task::SlurmTaskExecutor::Command{
"/usr/bin/expr", "1", "+", "+"}}}};
task.job.merge(defaultJobValues);
auto recFuture = ex.execute("command", task);
auto rec = recFuture.get();
REQUIRE(rec.rc != 0);
REQUIRE(rec.errorLog.size() >= 20);
REQUIRE(rec.outputLog.empty());
}
SECTION("Large Output")
{
const std::vector<std::string> BIG_FILES{"/usr/share/dict/linux.words",
"/usr/share/dict/cracklib-small",
"/etc/ssh/moduli"};
for (const auto &bigFile : BIG_FILES) {
if (!std::filesystem::exists(bigFile))
continue;
daggy::Task task{
.job{{"command", daggy::executors::task::SlurmTaskExecutor::Command{
"/usr/bin/cat", bigFile}}}};
task.job.merge(defaultJobValues);
auto recFuture = ex.execute("command", task);
auto rec = recFuture.get();
REQUIRE(rec.rc == 0);
REQUIRE(rec.outputLog.size() == std::filesystem::file_size(bigFile));
REQUIRE(rec.errorLog.empty());
break;
}
}
SECTION("Error Run") {
daggy::Task task{.job{
{"command", daggy::executors::task::SlurmTaskExecutor::Command{"/usr/bin/expr", "1", "+", "+"}}}};
task.job.merge(defaultJobValues);
SECTION("Parameter Expansion")
{
std::string testParams{R"({"DATE": ["2021-05-06", "2021-05-07" ]})"};
auto params = daggy::configFromJSON(testParams);
auto recFuture = ex.execute("command", task);
auto rec = recFuture.get();
std::string taskJSON =
R"({"B": {"job": {"command": ["/usr/bin/echo", "{{DATE}}"]}, "children": ["C"]}})";
auto tasks = daggy::tasksFromJSON(taskJSON, defaultJobValues);
REQUIRE(rec.rc != 0);
REQUIRE(rec.errorLog.size() >= 20);
REQUIRE(rec.outputLog.empty());
}
auto result = daggy::expandTaskSet(tasks, ex, params);
REQUIRE(result.size() == 2);
}
SECTION("Large Output") {
const std::vector<std::string> BIG_FILES{
"/usr/share/dict/linux.words", "/usr/share/dict/cracklib-small", "/etc/ssh/moduli"
};
SECTION("Build with expansion")
{
std::string testParams{
R"({"DATE": ["2021-05-06", "2021-05-07" ], "SOURCE": "name"})"};
auto params = daggy::configFromJSON(testParams);
std::string testTasks =
R"({"A": {"job": {"command": ["/bin/echo", "A"]}, "children": ["B"]}, "B": {"job": {"command": ["/bin/echo", "B", "{{SOURCE}}", "{{DATE}}"]}, "children": ["C"]}, "C": {"job": {"command": ["/bin/echo", "C"]}}})";
auto tasks = daggy::expandTaskSet(
daggy::tasksFromJSON(testTasks, defaultJobValues), ex, params);
REQUIRE(tasks.size() == 4);
}
for (const auto &bigFile: BIG_FILES) {
if (!std::filesystem::exists(bigFile)) continue;
SECTION("Build with expansion using parents instead of children")
{
std::string testParams{
R"({"DATE": ["2021-05-06", "2021-05-07" ], "SOURCE": "name"})"};
auto params = daggy::configFromJSON(testParams);
std::string testTasks =
R"({"A": {"job": {"command": ["/bin/echo", "A"]}}, "B": {"job": {"command": ["/bin/echo", "B", "{{SOURCE}}", "{{DATE}}"]}, "parents": ["A"]}, "C": {"job": {"command": ["/bin/echo", "C"]}, "parents": ["A"]}})";
auto tasks = daggy::expandTaskSet(
daggy::tasksFromJSON(testTasks, defaultJobValues), ex, params);
daggy::Task task{.job{
{"command", daggy::executors::task::SlurmTaskExecutor::Command{"/usr/bin/cat", bigFile}}}};
task.job.merge(defaultJobValues);
auto recFuture = ex.execute("command", task);
auto rec = recFuture.get();
REQUIRE(rec.rc == 0);
REQUIRE(rec.outputLog.size() == std::filesystem::file_size(bigFile));
REQUIRE(rec.errorLog.empty());
break;
}
}
SECTION("Parameter Expansion") {
std::string testParams{R"({"DATE": ["2021-05-06", "2021-05-07" ]})"};
auto params = daggy::configFromJSON(testParams);
std::string taskJSON = R"({"B": {"job": {"command": ["/usr/bin/echo", "{{DATE}}"]}, "children": ["C"]}})";
auto tasks = daggy::tasksFromJSON(taskJSON, defaultJobValues);
auto result = daggy::expandTaskSet(tasks, ex, params);
REQUIRE(result.size() == 2);
}
SECTION("Build with expansion") {
std::string testParams{R"({"DATE": ["2021-05-06", "2021-05-07" ], "SOURCE": "name"})"};
auto params = daggy::configFromJSON(testParams);
std::string testTasks = R"({"A": {"job": {"command": ["/bin/echo", "A"]}, "children": ["B"]}, "B": {"job": {"command": ["/bin/echo", "B", "{{SOURCE}}", "{{DATE}}"]}, "children": ["C"]}, "C": {"job": {"command": ["/bin/echo", "C"]}}})";
auto tasks = daggy::expandTaskSet(daggy::tasksFromJSON(testTasks, defaultJobValues), ex, params);
REQUIRE(tasks.size() == 4);
}
SECTION("Build with expansion using parents instead of children") {
std::string testParams{R"({"DATE": ["2021-05-06", "2021-05-07" ], "SOURCE": "name"})"};
auto params = daggy::configFromJSON(testParams);
std::string testTasks = R"({"A": {"job": {"command": ["/bin/echo", "A"]}}, "B": {"job": {"command": ["/bin/echo", "B", "{{SOURCE}}", "{{DATE}}"]}, "parents": ["A"]}, "C": {"job": {"command": ["/bin/echo", "C"]}, "parents": ["A"]}})";
auto tasks = daggy::expandTaskSet(daggy::tasksFromJSON(testTasks, defaultJobValues), ex, params);
REQUIRE(tasks.size() == 4);
}
REQUIRE(tasks.size() == 4);
}
}
#endif

View File

@@ -1,35 +1,48 @@
#include <iostream>
#include <catch2/catch.hpp>
#include <filesystem>
#include <fstream>
#include <catch2/catch.hpp>
#include <iostream>
#include "daggy/Serialization.hpp"
namespace fs = std::filesystem;
TEST_CASE("parameter_deserialization", "[deserialize_parameters]") {
SECTION("Basic Parse") {
std::string testParams{R"({"DATE": ["2021-05-06", "2021-05-07" ], "SOURCE": "name"})"};
auto params = daggy::configFromJSON(testParams);
REQUIRE(params.size() == 2);
REQUIRE(std::holds_alternative<std::vector<std::string>>(params["DATE"]));
REQUIRE(std::holds_alternative<std::string>(params["SOURCE"]));
}SECTION("Invalid JSON") {
std::string testParams{R"({"DATE": ["2021-05-06", "2021-05-07" ], "SOURCE": "name")"};
REQUIRE_THROWS(daggy::configFromJSON(testParams));
}SECTION("Non-string Keys") {
std::string testParams{R"({"DATE": ["2021-05-06", "2021-05-07" ], 6: "name"})"};
REQUIRE_THROWS(daggy::configFromJSON(testParams));
}SECTION("Non-array/Non-string values") {
std::string testParams{R"({"DATE": ["2021-05-06", "2021-05-07" ], "SOURCE": {"name": "kevin"}})"};
REQUIRE_THROWS(daggy::configFromJSON(testParams));
}
TEST_CASE("parameter_deserialization", "[deserialize_parameters]")
{
SECTION("Basic Parse")
{
std::string testParams{
R"({"DATE": ["2021-05-06", "2021-05-07" ], "SOURCE": "name"})"};
auto params = daggy::configFromJSON(testParams);
REQUIRE(params.size() == 2);
REQUIRE(std::holds_alternative<std::vector<std::string>>(params["DATE"]));
REQUIRE(std::holds_alternative<std::string>(params["SOURCE"]));
}
SECTION("Invalid JSON")
{
std::string testParams{
R"({"DATE": ["2021-05-06", "2021-05-07" ], "SOURCE": "name")"};
REQUIRE_THROWS(daggy::configFromJSON(testParams));
}
SECTION("Non-string Keys")
{
std::string testParams{
R"({"DATE": ["2021-05-06", "2021-05-07" ], 6: "name"})"};
REQUIRE_THROWS(daggy::configFromJSON(testParams));
}
SECTION("Non-array/Non-string values")
{
std::string testParams{
R"({"DATE": ["2021-05-06", "2021-05-07" ], "SOURCE": {"name": "kevin"}})"};
REQUIRE_THROWS(daggy::configFromJSON(testParams));
}
}
TEST_CASE("task_deserialization", "[deserialize_task]") {
SECTION("Build with no expansion") {
std::string testTasks = R"({
TEST_CASE("task_deserialization", "[deserialize_task]")
{
SECTION("Build with no expansion")
{
std::string testTasks = R"({
"A": {
"job": { "command": ["/bin/echo", "A"] },
"children": ["C"]
@@ -42,12 +55,13 @@ TEST_CASE("task_deserialization", "[deserialize_task]") {
"job": {"command": ["/bin/echo", "C"]}
}
})";
auto tasks = daggy::tasksFromJSON(testTasks);
REQUIRE(tasks.size() == 3);
}
auto tasks = daggy::tasksFromJSON(testTasks);
REQUIRE(tasks.size() == 3);
}
SECTION("Build with job defaults") {
std::string testTasks = R"({
SECTION("Build with job defaults")
{
std::string testTasks = R"({
"A": {
"job": { "command": ["/bin/echo", "A"] },
"children": ["B"]
@@ -59,30 +73,32 @@ TEST_CASE("task_deserialization", "[deserialize_task]") {
}
}
})";
daggy::ConfigValues jobDefaults{{"runtime", "60"},
{"memory", "300M"}};
auto tasks = daggy::tasksFromJSON(testTasks, jobDefaults);
REQUIRE(tasks.size() == 2);
REQUIRE(std::get<std::string>(tasks["A"].job["runtime"]) == "60");
REQUIRE(std::get<std::string>(tasks["A"].job["memory"]) == "300M");
REQUIRE(std::get<std::string>(tasks["B"].job["runtime"]) == "60");
REQUIRE(std::get<std::string>(tasks["B"].job["memory"]) == "1G");
}
daggy::ConfigValues jobDefaults{{"runtime", "60"}, {"memory", "300M"}};
auto tasks = daggy::tasksFromJSON(testTasks, jobDefaults);
REQUIRE(tasks.size() == 2);
REQUIRE(std::get<std::string>(tasks["A"].job["runtime"]) == "60");
REQUIRE(std::get<std::string>(tasks["A"].job["memory"]) == "300M");
REQUIRE(std::get<std::string>(tasks["B"].job["runtime"]) == "60");
REQUIRE(std::get<std::string>(tasks["B"].job["memory"]) == "1G");
}
}
TEST_CASE("task_serialization", "[serialize_tasks]") {
SECTION("Build with no expansion") {
std::string testTasks = R"({"A": {"job": {"command": ["/bin/echo", "A"]}, "children": ["C"]}, "B": {"job": {"command": ["/bin/echo", "B"]}, "children": ["C"]}, "C": {"job": {"command": ["/bin/echo", "C"]}}})";
auto tasks = daggy::tasksFromJSON(testTasks);
TEST_CASE("task_serialization", "[serialize_tasks]")
{
SECTION("Build with no expansion")
{
std::string testTasks =
R"({"A": {"job": {"command": ["/bin/echo", "A"]}, "children": ["C"]}, "B": {"job": {"command": ["/bin/echo", "B"]}, "children": ["C"]}, "C": {"job": {"command": ["/bin/echo", "C"]}}})";
auto tasks = daggy::tasksFromJSON(testTasks);
auto genJSON = daggy::tasksToJSON(tasks);
auto regenTasks = daggy::tasksFromJSON(genJSON);
auto genJSON = daggy::tasksToJSON(tasks);
auto regenTasks = daggy::tasksFromJSON(genJSON);
REQUIRE(regenTasks.size() == tasks.size());
REQUIRE(regenTasks.size() == tasks.size());
for (const auto &[name, task]: regenTasks) {
const auto &other = tasks[name];
REQUIRE(task == other);
}
for (const auto &[name, task] : regenTasks) {
const auto &other = tasks[name];
REQUIRE(task == other);
}
}
}

View File

@@ -1,83 +1,85 @@
#include <iostream>
#include <filesystem>
#include <fstream>
#include <catch2/catch.hpp>
#include <pistache/client.h>
#include <rapidjson/document.h>
#include <daggy/Server.hpp>
#include <catch2/catch.hpp>
#include <daggy/Serialization.hpp>
#include <daggy/Server.hpp>
#include <daggy/executors/task/ForkingTaskExecutor.hpp>
#include <daggy/loggers/dag_run/OStreamLogger.hpp>
#include <filesystem>
#include <fstream>
#include <iostream>
namespace rj = rapidjson;
Pistache::Http::Response
REQUEST(std::string url, std::string payload = "") {
Pistache::Http::Experimental::Client client;
client.init();
Pistache::Http::Response response;
auto reqSpec = (payload.empty() ? client.get(url) : client.post(url));
reqSpec.timeout(std::chrono::seconds(2));
if (!payload.empty()) {
reqSpec.body(payload);
}
auto request = reqSpec.send();
bool ok = false, error = false;
std::string msg;
request.then(
[&](Pistache::Http::Response rsp) {
ok = true;
response = rsp;
},
[&](std::exception_ptr ptr) {
error = true;
try {
std::rethrow_exception(ptr);
} catch (std::exception &e) {
msg = e.what();
}
}
);
Pistache::Http::Response REQUEST(std::string url, std::string payload = "")
{
Pistache::Http::Experimental::Client client;
client.init();
Pistache::Http::Response response;
auto reqSpec = (payload.empty() ? client.get(url) : client.post(url));
reqSpec.timeout(std::chrono::seconds(2));
if (!payload.empty()) {
reqSpec.body(payload);
}
auto request = reqSpec.send();
bool ok = false, error = false;
std::string msg;
request.then(
[&](Pistache::Http::Response rsp) {
ok = true;
response = rsp;
},
[&](std::exception_ptr ptr) {
error = true;
try {
std::rethrow_exception(ptr);
}
catch (std::exception &e) {
msg = e.what();
}
});
Pistache::Async::Barrier<Pistache::Http::Response> barrier(request);
barrier.wait_for(std::chrono::seconds(2));
client.shutdown();
if (error) {
throw std::runtime_error(msg);
}
return response;
Pistache::Async::Barrier<Pistache::Http::Response> barrier(request);
barrier.wait_for(std::chrono::seconds(2));
client.shutdown();
if (error) {
throw std::runtime_error(msg);
}
return response;
}
TEST_CASE("rest_endpoint", "[server_basic]") {
std::stringstream ss;
daggy::executors::task::ForkingTaskExecutor executor(10);
daggy::loggers::dag_run::OStreamLogger logger(ss);
Pistache::Address listenSpec("localhost", Pistache::Port(0));
TEST_CASE("rest_endpoint", "[server_basic]")
{
std::stringstream ss;
daggy::executors::task::ForkingTaskExecutor executor(10);
daggy::loggers::dag_run::OStreamLogger logger(ss);
Pistache::Address listenSpec("localhost", Pistache::Port(0));
const size_t nDAGRunners = 10,
nWebThreads = 10;
const size_t nDAGRunners = 10, nWebThreads = 10;
daggy::Server server(listenSpec, logger, executor, nDAGRunners);
server.init(nWebThreads);
server.start();
daggy::Server server(listenSpec, logger, executor, nDAGRunners);
server.init(nWebThreads);
server.start();
const std::string host = "localhost:";
const std::string baseURL = host + std::to_string(server.getPort());
const std::string host = "localhost:";
const std::string baseURL = host + std::to_string(server.getPort());
SECTION ("Ready Endpoint") {
auto response = REQUEST(baseURL + "/ready");
REQUIRE(response.code() == Pistache::Http::Code::Ok);
}
SECTION("Ready Endpoint")
{
auto response = REQUEST(baseURL + "/ready");
REQUIRE(response.code() == Pistache::Http::Code::Ok);
}
SECTION ("Querying a non-existent dagrunid should fail ") {
auto response = REQUEST(baseURL + "/v1/dagrun/100");
REQUIRE(response.code() != Pistache::Http::Code::Ok);
}
SECTION("Querying a non-existent dagrunid should fail ")
{
auto response = REQUEST(baseURL + "/v1/dagrun/100");
REQUIRE(response.code() != Pistache::Http::Code::Ok);
}
SECTION("Simple DAGRun Submission") {
std::string dagRun = R"({
SECTION("Simple DAGRun Submission")
{
std::string dagRun = R"({
"name": "unit_server",
"parameters": { "FILE": [ "A", "B" ] },
"tasks": {
@@ -88,87 +90,89 @@ TEST_CASE("rest_endpoint", "[server_basic]") {
}
})";
// Submit, and get the runID
daggy::DAGRunID runID = 0;
{
auto response = REQUEST(baseURL + "/v1/dagrun/", dagRun);
REQUIRE(response.code() == Pistache::Http::Code::Ok);
// Submit, and get the runID
daggy::DAGRunID runID = 0;
{
auto response = REQUEST(baseURL + "/v1/dagrun/", dagRun);
REQUIRE(response.code() == Pistache::Http::Code::Ok);
rj::Document doc;
daggy::checkRJParse(doc.Parse(response.body().c_str()));
REQUIRE(doc.IsObject());
REQUIRE(doc.HasMember("runID"));
rj::Document doc;
daggy::checkRJParse(doc.Parse(response.body().c_str()));
REQUIRE(doc.IsObject());
REQUIRE(doc.HasMember("runID"));
runID = doc["runID"].GetUint64();
}
// Ensure our runID shows up in the list of running DAGs
{
auto response = REQUEST(baseURL + "/v1/dagrun/");
REQUIRE(response.code() == Pistache::Http::Code::Ok);
rj::Document doc;
daggy::checkRJParse(doc.Parse(response.body().c_str()));
REQUIRE(doc.IsArray());
REQUIRE(doc.Size() >= 1);
// Ensure that our DAG is in the list and matches our given DAGRunID
bool found = false;
const auto &runs = doc.GetArray();
for (size_t i = 0; i < runs.Size(); ++i) {
const auto &run = runs[i];
REQUIRE(run.IsObject());
REQUIRE(run.HasMember("name"));
REQUIRE(run.HasMember("runID"));
std::string runName = run["name"].GetString();
if (runName == "unit_server") {
REQUIRE(run["runID"].GetUint64() == runID);
found = true;
break;
}
}
REQUIRE(found);
}
// Wait until our DAG is complete
bool complete = true;
for (auto i = 0; i < 10; ++i) {
auto response = REQUEST(baseURL + "/v1/dagrun/" + std::to_string(runID));
REQUIRE(response.code() == Pistache::Http::Code::Ok);
rj::Document doc;
daggy::checkRJParse(doc.Parse(response.body().c_str()));
REQUIRE(doc.IsObject());
REQUIRE(doc.HasMember("taskStates"));
const auto &taskStates = doc["taskStates"].GetObject();
size_t nStates = 0;
for (auto it = taskStates.MemberBegin(); it != taskStates.MemberEnd(); ++it) {
nStates++;
}
REQUIRE(nStates == 3);
complete = true;
for (auto it = taskStates.MemberBegin(); it != taskStates.MemberEnd(); ++it) {
std::string state = it->value.GetString();
if (state != "COMPLETED") {
complete = false;
break;
}
}
if (complete) break;
std::this_thread::sleep_for(std::chrono::seconds(1));
}
REQUIRE(complete);
std::this_thread::sleep_for(std::chrono::seconds(2));
for (const auto &pth: std::vector<fs::path>{"dagrun_A", "dagrun_B"}) {
REQUIRE(fs::exists(pth));
fs::remove(pth);
}
runID = doc["runID"].GetUint64();
}
server.shutdown();
// Ensure our runID shows up in the list of running DAGs
{
auto response = REQUEST(baseURL + "/v1/dagrun/");
REQUIRE(response.code() == Pistache::Http::Code::Ok);
rj::Document doc;
daggy::checkRJParse(doc.Parse(response.body().c_str()));
REQUIRE(doc.IsArray());
REQUIRE(doc.Size() >= 1);
// Ensure that our DAG is in the list and matches our given DAGRunID
bool found = false;
const auto &runs = doc.GetArray();
for (size_t i = 0; i < runs.Size(); ++i) {
const auto &run = runs[i];
REQUIRE(run.IsObject());
REQUIRE(run.HasMember("name"));
REQUIRE(run.HasMember("runID"));
std::string runName = run["name"].GetString();
if (runName == "unit_server") {
REQUIRE(run["runID"].GetUint64() == runID);
found = true;
break;
}
}
REQUIRE(found);
}
// Wait until our DAG is complete
bool complete = true;
for (auto i = 0; i < 10; ++i) {
auto response = REQUEST(baseURL + "/v1/dagrun/" + std::to_string(runID));
REQUIRE(response.code() == Pistache::Http::Code::Ok);
rj::Document doc;
daggy::checkRJParse(doc.Parse(response.body().c_str()));
REQUIRE(doc.IsObject());
REQUIRE(doc.HasMember("taskStates"));
const auto &taskStates = doc["taskStates"].GetObject();
size_t nStates = 0;
for (auto it = taskStates.MemberBegin(); it != taskStates.MemberEnd();
++it) {
nStates++;
}
REQUIRE(nStates == 3);
complete = true;
for (auto it = taskStates.MemberBegin(); it != taskStates.MemberEnd();
++it) {
std::string state = it->value.GetString();
if (state != "COMPLETED") {
complete = false;
break;
}
}
if (complete)
break;
std::this_thread::sleep_for(std::chrono::seconds(1));
}
REQUIRE(complete);
std::this_thread::sleep_for(std::chrono::seconds(2));
for (const auto &pth : std::vector<fs::path>{"dagrun_A", "dagrun_B"}) {
REQUIRE(fs::exists(pth));
fs::remove(pth);
}
}
server.shutdown();
}

View File

@@ -1,41 +1,45 @@
#include <iostream>
#include <catch2/catch.hpp>
#include <future>
#include <iostream>
#include "daggy/ThreadPool.hpp"
#include <catch2/catch.hpp>
using namespace daggy;
TEST_CASE("threadpool", "[threadpool]") {
std::atomic<uint32_t> cnt(0);
ThreadPool tp(10);
TEST_CASE("threadpool", "[threadpool]")
{
std::atomic<uint32_t> cnt(0);
ThreadPool tp(10);
std::vector<std::future<uint32_t>> rets;
std::vector<std::future<uint32_t>> rets;
SECTION("Adding large tasks queues with return values") {
auto tq = std::make_shared<daggy::TaskQueue>();
std::vector<std::future<uint32_t>> res;
for (size_t i = 0; i < 100; ++i)
res.emplace_back(std::move(tq->addTask([&cnt]() {
cnt++;
return cnt.load();
})));
tp.addTasks(tq);
for (auto &r: res) r.get();
REQUIRE(cnt == 100);
}
SECTION("Adding large tasks queues with return values")
{
auto tq = std::make_shared<daggy::TaskQueue>();
std::vector<std::future<uint32_t>> res;
for (size_t i = 0; i < 100; ++i)
res.emplace_back(std::move(tq->addTask([&cnt]() {
cnt++;
return cnt.load();
})));
tp.addTasks(tq);
for (auto &r : res)
r.get();
REQUIRE(cnt == 100);
}
SECTION("Slow runs") {
std::vector<std::future<void>> res;
using namespace std::chrono_literals;
for (size_t i = 0; i < 100; ++i)
res.push_back(tp.addTask([&cnt]() {
std::this_thread::sleep_for(20ms);
cnt++;
return;
}));
for (auto &r: res) r.get();
REQUIRE(cnt == 100);
}
SECTION("Slow runs")
{
std::vector<std::future<void>> res;
using namespace std::chrono_literals;
for (size_t i = 0; i < 100; ++i)
res.push_back(tp.addTask([&cnt]() {
std::this_thread::sleep_for(20ms);
cnt++;
return;
}));
for (auto &r : res)
r.get();
REQUIRE(cnt == 100);
}
}

View File

@@ -1,74 +1,79 @@
#include <iostream>
#include <algorithm>
#include <catch2/catch.hpp>
#include <chrono>
#include <filesystem>
#include <fstream>
#include <iomanip>
#include <algorithm>
#include <iostream>
#include <random>
#include <catch2/catch.hpp>
#include "daggy/Utilities.hpp"
#include "daggy/Serialization.hpp"
#include "daggy/Utilities.hpp"
#include "daggy/executors/task/ForkingTaskExecutor.hpp"
#include "daggy/executors/task/NoopTaskExecutor.hpp"
#include "daggy/loggers/dag_run/OStreamLogger.hpp"
namespace fs = std::filesystem;
TEST_CASE("string_utilities", "[utilities_string]") {
std::string test = "/this/is/{{A}}/test/{{A}}";
auto res = daggy::globalSub(test, "{{A}}", "hello");
REQUIRE(res == "/this/is/hello/test/hello");
TEST_CASE("string_utilities", "[utilities_string]")
{
std::string test = "/this/is/{{A}}/test/{{A}}";
auto res = daggy::globalSub(test, "{{A}}", "hello");
REQUIRE(res == "/this/is/hello/test/hello");
}
TEST_CASE("string_expansion", "[utilities_parameter_expansion]") {
SECTION("Basic expansion") {
std::string testParams{R"({"DATE": ["2021-05-06", "2021-05-07" ], "SOURCE": "name", "TYPE": ["a", "b", "c"]})"};
auto params = daggy::configFromJSON(testParams);
std::vector<std::string> cmd{"/usr/bin/echo", "{{DATE}}", "{{SOURCE}}", "{{TYPE}}"};
auto allCommands = daggy::interpolateValues(cmd, params);
REQUIRE(allCommands.size() == 6);
}
SECTION("Skip over unused parameters") {
std::string testParams{R"({"DATE": ["2021-05-06", "2021-05-07" ], "SOURCE": "name", "TYPE": ["a", "b", "c"]})"};
auto params = daggy::configFromJSON(testParams);
std::vector<std::string> cmd{"/usr/bin/echo", "{{DATE}}", "{{SOURCE}}"};
auto allCommands = daggy::interpolateValues(cmd, params);
// TYPE isn't used, so it's just |DATE| * |SOURCE|
REQUIRE(allCommands.size() == 2);
}
SECTION("Expand within a command part") {
std::string testParams{
R"({"DATE": ["2021-05-06", "2021-05-07" ], "SOURCE": ["A", "B"], "TYPE": ["a", "b", "c"]})"};
auto params = daggy::configFromJSON(testParams);
std::vector<std::string> cmd{"/usr/bin/touch", "{{DATE}}_{{SOURCE}}"};
auto result = daggy::interpolateValues(cmd, params);
// TYPE isn't used, so it's just |DATE| * |SOURCE|
REQUIRE(result.size() == 4);
}
}
TEST_CASE("dag_runner_order", "[dagrun_order]") {
daggy::executors::task::NoopTaskExecutor ex;
std::stringstream ss;
daggy::loggers::dag_run::OStreamLogger logger(ss);
daggy::TimePoint startTime = daggy::Clock::now();
std::string testParams{R"({"DATE": ["2021-05-06", "2021-05-07", "2021-05-08", "2021-05-09" ]})"};
TEST_CASE("string_expansion", "[utilities_parameter_expansion]")
{
SECTION("Basic expansion")
{
std::string testParams{
R"({"DATE": ["2021-05-06", "2021-05-07" ], "SOURCE": "name", "TYPE": ["a", "b", "c"]})"};
auto params = daggy::configFromJSON(testParams);
std::vector<std::string> cmd{"/usr/bin/echo", "{{DATE}}", "{{SOURCE}}",
"{{TYPE}}"};
auto allCommands = daggy::interpolateValues(cmd, params);
std::string taskJSON = R"({
REQUIRE(allCommands.size() == 6);
}
SECTION("Skip over unused parameters")
{
std::string testParams{
R"({"DATE": ["2021-05-06", "2021-05-07" ], "SOURCE": "name", "TYPE": ["a", "b", "c"]})"};
auto params = daggy::configFromJSON(testParams);
std::vector<std::string> cmd{"/usr/bin/echo", "{{DATE}}", "{{SOURCE}}"};
auto allCommands = daggy::interpolateValues(cmd, params);
// TYPE isn't used, so it's just |DATE| * |SOURCE|
REQUIRE(allCommands.size() == 2);
}
SECTION("Expand within a command part")
{
std::string testParams{
R"({"DATE": ["2021-05-06", "2021-05-07" ], "SOURCE": ["A", "B"], "TYPE": ["a", "b", "c"]})"};
auto params = daggy::configFromJSON(testParams);
std::vector<std::string> cmd{"/usr/bin/touch", "{{DATE}}_{{SOURCE}}"};
auto result = daggy::interpolateValues(cmd, params);
// TYPE isn't used, so it's just |DATE| * |SOURCE|
REQUIRE(result.size() == 4);
}
}
TEST_CASE("dag_runner_order", "[dagrun_order]")
{
daggy::executors::task::NoopTaskExecutor ex;
std::stringstream ss;
daggy::loggers::dag_run::OStreamLogger logger(ss);
daggy::TimePoint startTime = daggy::Clock::now();
std::string testParams{
R"({"DATE": ["2021-05-06", "2021-05-07", "2021-05-08", "2021-05-09" ]})"};
auto params = daggy::configFromJSON(testParams);
std::string taskJSON = R"({
"A": {"job": {"command": ["/usr/bin/touch", "{{DATE}}"]}, "children": [ "B","D" ]},
"B": {"job": {"command": ["/usr/bin/touch", "{{DATE}}"]}, "children": [ "C","D","E" ]},
"C": {"job": {"command": ["/usr/bin/touch", "{{DATE}}"]}, "children": [ "D"]},
@@ -76,160 +81,175 @@ TEST_CASE("dag_runner_order", "[dagrun_order]") {
"E": {"job": {"command": ["/usr/bin/touch", "{{DATE}}"]}}
})";
auto tasks = expandTaskSet(daggy::tasksFromJSON(taskJSON), ex, params);
auto tasks = expandTaskSet(daggy::tasksFromJSON(taskJSON), ex, params);
REQUIRE(tasks.size() == 20);
REQUIRE(tasks.size() == 20);
auto dag = daggy::buildDAGFromTasks(tasks);
auto runID = logger.startDAGRun("test_run", tasks);
auto dag = daggy::buildDAGFromTasks(tasks);
auto runID = logger.startDAGRun("test_run", tasks);
auto endDAG = daggy::runDAG(runID, ex, logger, dag);
REQUIRE(endDAG.allVisited());
// Ensure the run order
auto rec = logger.getDAGRun(runID);
daggy::TimePoint stopTime = daggy::Clock::now();
std::array<daggy::TimePoint, 5> minTimes;
minTimes.fill(startTime);
std::array<daggy::TimePoint, 5> maxTimes;
maxTimes.fill(stopTime);
for (const auto &[k, v] : rec.taskAttempts) {
size_t idx = k[0] - 65;
auto &startTime = minTimes[idx];
auto &stopTime = maxTimes[idx];
startTime = std::max(startTime, v.front().startTime);
stopTime = std::min(stopTime, v.back().stopTime);
}
for (size_t i = 0; i < 5; ++i) {
for (size_t j = i + 1; j < 4; ++j) {
REQUIRE(maxTimes[i] < minTimes[j]);
}
}
}
TEST_CASE("dag_runner", "[utilities_dag_runner]")
{
daggy::executors::task::ForkingTaskExecutor ex(10);
std::stringstream ss;
daggy::loggers::dag_run::OStreamLogger logger(ss);
SECTION("Simple execution")
{
std::string prefix = (fs::current_path() / "asdlk").string();
std::unordered_map<std::string, std::string> files{
{"A", prefix + "_A"}, {"B", prefix + "_B"}, {"C", prefix + "_C"}};
std::string taskJSON =
R"({"A": {"job": {"command": ["/usr/bin/touch", ")" + files.at("A") +
R"("]}, "children": ["C"]}, "B": {"job": {"command": ["/usr/bin/touch", ")" +
files.at("B") +
R"("]}, "children": ["C"]}, "C": {"job": {"command": ["/usr/bin/touch", ")" +
files.at("C") + R"("]}}})";
auto tasks = expandTaskSet(daggy::tasksFromJSON(taskJSON), ex);
auto dag = daggy::buildDAGFromTasks(tasks);
auto runID = logger.startDAGRun("test_run", tasks);
auto endDAG = daggy::runDAG(runID, ex, logger, dag);
REQUIRE(endDAG.allVisited());
// Ensure the run order
auto rec = logger.getDAGRun(runID);
daggy::TimePoint stopTime = daggy::Clock::now();
std::array<daggy::TimePoint, 5> minTimes; minTimes.fill(startTime);
std::array<daggy::TimePoint, 5> maxTimes; maxTimes.fill(stopTime);
for (const auto &[k, v] : rec.taskAttempts) {
size_t idx = k[0] - 65;
auto & startTime = minTimes[idx];
auto & stopTime = maxTimes[idx];
startTime = std::max(startTime, v.front().startTime);
stopTime = std::min(stopTime, v.back().stopTime);
for (const auto &[_, file] : files) {
REQUIRE(fs::exists(file));
fs::remove(file);
}
for (size_t i = 0; i < 5; ++i) {
for (size_t j = i+1; j < 4; ++j) {
REQUIRE(maxTimes[i] < minTimes[j]);
}
}
}
TEST_CASE("dag_runner", "[utilities_dag_runner]") {
daggy::executors::task::ForkingTaskExecutor ex(10);
std::stringstream ss;
daggy::loggers::dag_run::OStreamLogger logger(ss);
SECTION("Simple execution") {
std::string prefix = (fs::current_path() / "asdlk").string();
std::unordered_map<std::string, std::string> files{
{"A", prefix + "_A"},
{"B", prefix + "_B"},
{"C", prefix + "_C"}};
std::string taskJSON = R"({"A": {"job": {"command": ["/usr/bin/touch", ")"
+ files.at("A") + R"("]}, "children": ["C"]}, "B": {"job": {"command": ["/usr/bin/touch", ")"
+ files.at("B") + R"("]}, "children": ["C"]}, "C": {"job": {"command": ["/usr/bin/touch", ")"
+ files.at("C") + R"("]}}})";
auto tasks = expandTaskSet(daggy::tasksFromJSON(taskJSON), ex);
auto dag = daggy::buildDAGFromTasks(tasks);
auto runID = logger.startDAGRun("test_run", tasks);
auto endDAG = daggy::runDAG(runID, ex, logger, dag);
REQUIRE(endDAG.allVisited());
for (const auto &[_, file] : files) {
REQUIRE(fs::exists(file));
fs::remove(file);
}
// Get the DAG Run Attempts
auto record = logger.getDAGRun(runID);
for (const auto &[_, attempts]: record.taskAttempts) {
REQUIRE(attempts.size() == 1);
REQUIRE(attempts.front().rc == 0);
}
}
SECTION("Recovery from Error") {
auto cleanup = []() {
// Cleanup
std::vector<fs::path> paths{"rec_error_A", "noexist"};
for (const auto &pth: paths) {
if (fs::exists(pth)) fs::remove_all(pth);
}
};
cleanup();
std::string goodPrefix = "rec_error_";
std::string badPrefix = "noexist/rec_error_";
std::string taskJSON = R"({"A": {"job": {"command": ["/usr/bin/touch", ")"
+ goodPrefix +
R"(A"]}, "children": ["C"]}, "B": {"job": {"command": ["/usr/bin/touch", ")"
+ badPrefix +
R"(B"]}, "children": ["C"]}, "C": {"job": {"command": ["/usr/bin/touch", ")"
+ badPrefix + R"(C"]}}})";
auto tasks = expandTaskSet(daggy::tasksFromJSON(taskJSON), ex);
auto dag = daggy::buildDAGFromTasks(tasks);
auto runID = logger.startDAGRun("test_run", tasks);
auto tryDAG = daggy::runDAG(runID, ex, logger, dag);
REQUIRE(!tryDAG.allVisited());
// Create the missing dir, then continue to run the DAG
fs::create_directory("noexist");
tryDAG.resetRunning();
auto endDAG = daggy::runDAG(runID, ex, logger, tryDAG);
REQUIRE(endDAG.allVisited());
// Get the DAG Run Attempts
auto record = logger.getDAGRun(runID);
REQUIRE(record.taskAttempts["A_0"].size() == 1); // A ran fine
REQUIRE(record.taskAttempts["B_0"].size() == 2); // B errored and had to be retried
REQUIRE(record.taskAttempts["C_0"].size() == 1); // C wasn't run because B errored
cleanup();
}
SECTION("Generator tasks") {
std::string testParams{R"({"DATE": ["2021-05-06", "2021-05-07" ]})"};
auto params = daggy::configFromJSON(testParams);
std::string generatorOutput = R"({"B": {"job": {"command": ["/usr/bin/echo", "-e", "{{DATE}}"]}, "children": ["C"]}})";
fs::path ofn = fs::current_path() / "generator_test_output.json";
std::ofstream ofh{ofn};
ofh << generatorOutput << std::endl;
ofh.close();
std::stringstream jsonTasks;
jsonTasks << R"({ "A": { "job": {"command": [ "/usr/bin/cat", )" << std::quoted(ofn.string())
<< R"(]}, "children": ["C"], "isGenerator": true},)"
<< R"("C": { "job": {"command": [ "/usr/bin/echo", "hello!"]} } })";
auto baseTasks = daggy::tasksFromJSON(jsonTasks.str());
REQUIRE(baseTasks.size() == 2);
auto tasks = daggy::expandTaskSet(baseTasks, ex, params);
REQUIRE(tasks.size() == 2);
auto dag = daggy::buildDAGFromTasks(tasks);
REQUIRE(dag.size() == 2);
auto runID = logger.startDAGRun("generator_run", tasks);
auto finalDAG = daggy::runDAG(runID, ex, logger, dag, params);
REQUIRE(finalDAG.allVisited());
REQUIRE(finalDAG.size() == 4);
// Check the logger
auto record = logger.getDAGRun(runID);
REQUIRE(record.tasks.size() == 4);
REQUIRE(record.taskRunStates.size() == 4);
for (const auto &[taskName, attempts]: record.taskAttempts) {
REQUIRE(attempts.size() == 1);
REQUIRE(attempts.back().rc == 0);
}
// Ensure that children were updated properly
REQUIRE(record.tasks["A_0"].children == std::unordered_set<std::string>{"B_0", "B_1", "C"});
REQUIRE(record.tasks["B_0"].children == std::unordered_set<std::string>{"C"});
REQUIRE(record.tasks["B_1"].children == std::unordered_set<std::string>{"C"});
REQUIRE(record.tasks["C_0"].children.empty());
// Get the DAG Run Attempts
auto record = logger.getDAGRun(runID);
for (const auto &[_, attempts] : record.taskAttempts) {
REQUIRE(attempts.size() == 1);
REQUIRE(attempts.front().rc == 0);
}
}
SECTION("Recovery from Error")
{
auto cleanup = []() {
// Cleanup
std::vector<fs::path> paths{"rec_error_A", "noexist"};
for (const auto &pth : paths) {
if (fs::exists(pth))
fs::remove_all(pth);
}
};
cleanup();
std::string goodPrefix = "rec_error_";
std::string badPrefix = "noexist/rec_error_";
std::string taskJSON =
R"({"A": {"job": {"command": ["/usr/bin/touch", ")" + goodPrefix +
R"(A"]}, "children": ["C"]}, "B": {"job": {"command": ["/usr/bin/touch", ")" +
badPrefix +
R"(B"]}, "children": ["C"]}, "C": {"job": {"command": ["/usr/bin/touch", ")" +
badPrefix + R"(C"]}}})";
auto tasks = expandTaskSet(daggy::tasksFromJSON(taskJSON), ex);
auto dag = daggy::buildDAGFromTasks(tasks);
auto runID = logger.startDAGRun("test_run", tasks);
auto tryDAG = daggy::runDAG(runID, ex, logger, dag);
REQUIRE(!tryDAG.allVisited());
// Create the missing dir, then continue to run the DAG
fs::create_directory("noexist");
tryDAG.resetRunning();
auto endDAG = daggy::runDAG(runID, ex, logger, tryDAG);
REQUIRE(endDAG.allVisited());
// Get the DAG Run Attempts
auto record = logger.getDAGRun(runID);
REQUIRE(record.taskAttempts["A_0"].size() == 1); // A ran fine
REQUIRE(record.taskAttempts["B_0"].size() ==
2); // B errored and had to be retried
REQUIRE(record.taskAttempts["C_0"].size() ==
1); // C wasn't run because B errored
cleanup();
}
SECTION("Generator tasks")
{
std::string testParams{R"({"DATE": ["2021-05-06", "2021-05-07" ]})"};
auto params = daggy::configFromJSON(testParams);
std::string generatorOutput =
R"({"B": {"job": {"command": ["/usr/bin/echo", "-e", "{{DATE}}"]}, "children": ["C"]}})";
fs::path ofn = fs::current_path() / "generator_test_output.json";
std::ofstream ofh{ofn};
ofh << generatorOutput << std::endl;
ofh.close();
std::stringstream jsonTasks;
jsonTasks
<< R"({ "A": { "job": {"command": [ "/usr/bin/cat", )"
<< std::quoted(ofn.string())
<< R"(]}, "children": ["C"], "isGenerator": true},)"
<< R"("C": { "job": {"command": [ "/usr/bin/echo", "hello!"]} } })";
auto baseTasks = daggy::tasksFromJSON(jsonTasks.str());
REQUIRE(baseTasks.size() == 2);
auto tasks = daggy::expandTaskSet(baseTasks, ex, params);
REQUIRE(tasks.size() == 2);
auto dag = daggy::buildDAGFromTasks(tasks);
REQUIRE(dag.size() == 2);
auto runID = logger.startDAGRun("generator_run", tasks);
auto finalDAG = daggy::runDAG(runID, ex, logger, dag, params);
REQUIRE(finalDAG.allVisited());
REQUIRE(finalDAG.size() == 4);
// Check the logger
auto record = logger.getDAGRun(runID);
REQUIRE(record.tasks.size() == 4);
REQUIRE(record.taskRunStates.size() == 4);
for (const auto &[taskName, attempts] : record.taskAttempts) {
REQUIRE(attempts.size() == 1);
REQUIRE(attempts.back().rc == 0);
}
// Ensure that children were updated properly
REQUIRE(record.tasks["A_0"].children ==
std::unordered_set<std::string>{"B_0", "B_1", "C"});
REQUIRE(record.tasks["B_0"].children ==
std::unordered_set<std::string>{"C"});
REQUIRE(record.tasks["B_1"].children ==
std::unordered_set<std::string>{"C"});
REQUIRE(record.tasks["C_0"].children.empty());
}
}