Large re-organization to split daggyd away from the core libdaggy.

This paves the way for implementing daggys and other utilities.

Squashed commit of the following:

commit 1f77239ab3c9e44d190eef94531a39501c8c4dfe
Author: Ian Roddis <gitlab@ie2r.com>
Date:   Mon Oct 18 16:25:02 2021 -0300

    Adding README, stdout support for daggyd logging

commit c2c237224e84a3be68aaa597ce98af1365e74a13
Author: Ian Roddis <gitlab@ie2r.com>
Date:   Mon Oct 18 16:10:29 2021 -0300

    removing old daggyd

commit cfea2baf61ca10c535801c5a391d2d525a1a2d04
Author: Ian Roddis <gitlab@ie2r.com>
Date:   Mon Oct 18 16:10:09 2021 -0300

    Moving tests into their sub-project folders

commit e41ca42069bea1db16dd76b6684a3f692fef6b15
Author: Ian Roddis <gitlab@ie2r.com>
Date:   Mon Oct 18 15:57:40 2021 -0300

    Splitting out daggyd from libdaggy

commit be97b146c1d2446f5c03cb78707e921f18c60bd8
Author: Ian Roddis <gitlab@ie2r.com>
Date:   Mon Oct 18 15:56:55 2021 -0300

    Splitting out daggyd from libdaggy

commit cb61e140e9d6d8832d61fb7037fd4c0ff6edad00
Author: Ian Roddis <gitlab@ie2r.com>
Date:   Mon Oct 18 15:49:47 2021 -0300

    moving daggy to libdaggy
This commit is contained in:
Ian Roddis
2021-10-18 16:28:40 -03:00
parent 612bc8af8a
commit 470a6f2bb7
59 changed files with 586 additions and 52 deletions

View File

@@ -0,0 +1,20 @@
project(libdaggy_tests)
add_executable(${PROJECT_NAME} main.cpp
# unit tests
unit_dag.cpp
unit_dagrunner.cpp
unit_dagrun_loggers.cpp
unit_executor_forkingexecutor.cpp
unit_executor_slurmexecutor.cpp
unit_serialization.cpp
unit_threadpool.cpp
unit_utilities.cpp
# integration tests
int_basic.cpp
# Performance checks
perf_dag.cpp
)
target_link_libraries(${PROJECT_NAME} libdaggy stdc++fs Catch2::Catch2)
add_test(${PROJECT_NAME} ${CMAKE_CURRENT_BINARY_DIR}/${PROJECT_NAME})

View File

@@ -0,0 +1,7 @@
#include <catch2/catch.hpp>
#include <iostream>
TEST_CASE("General tests", "[general]")
{
REQUIRE(1 == 1);
}

15
libdaggy/tests/main.cpp Normal file
View File

@@ -0,0 +1,15 @@
#include <iostream>
#include "daggy/DAG.hpp"
#define CATCH_CONFIG_MAIN
#include <catch2/catch.hpp>
TEST_CASE("Sanity tests", "[sanity]")
{
REQUIRE(1 == 1);
}
// compile and run
// g++ -std=c++17 -o test test.cpp && ./test

View File

@@ -0,0 +1,64 @@
#ifdef CATCH_CONFIG_ENABLE_BENCHMARKING
#include <catch2/catch.hpp>
#include <iostream>
#include "daggy/DAG.hpp"
inline std::string taskName(size_t i)
{
return "action_node" + std::to_string(i);
}
daggy::DAG<std::string, size_t> createDAG(size_t N_NODES, size_t MAX_CHILDREN)
{
daggy::DAG<std::string, size_t> dag;
for (size_t i = 0; i < N_NODES; ++i) {
dag.addVertex(taskName(i), i);
}
static std::random_device dev;
static std::mt19937 rng(dev());
std::uniform_int_distribution<size_t> nDepDist(1, MAX_CHILDREN);
for (size_t i = 0; i < N_NODES - 1; ++i) {
std::string parent = taskName(i);
std::uniform_int_distribution<size_t> depDist(i + 1, N_NODES - 1);
size_t nChildren = std::min(nDepDist(rng), N_NODES - i);
std::unordered_set<size_t> found;
size_t tries = 0;
while (found.size() < nChildren) {
++tries;
if (tries > nChildren * 2)
break;
auto child = depDist(rng);
if (found.count(child) > 0)
continue;
found.insert(child);
dag.addEdge(parent, taskName(child));
}
}
return dag;
}
const size_t N_NODES = 10'000;
const size_t MAX_CHILDREN = 10;
static auto DAG = createDAG(N_NODES, MAX_CHILDREN);
TEST_CASE("massive DAGs", "[dag_performance]")
{
BENCHMARK_ADVANCED("dag.reset")(Catch::Benchmark::Chronometer meter)
{
meter.measure([&] { return DAG.reset(); });
};
BENCHMARK_ADVANCED("dag.isValid")(Catch::Benchmark::Chronometer meter)
{
meter.measure([&] { return DAG.isValid(); });
};
}
#endif

View File

@@ -0,0 +1,89 @@
#include <catch2/catch.hpp>
#include <iostream>
#include "daggy/DAG.hpp"
TEST_CASE("dag_construction", "[dag]")
{
daggy::DAG<size_t, size_t> dag;
REQUIRE(dag.size() == 0);
REQUIRE(dag.empty());
REQUIRE_NOTHROW(dag.addVertex(0, 0));
for (size_t i = 1; i < 10; ++i) {
dag.addVertex(i, i);
REQUIRE(dag.hasVertex(i));
REQUIRE(dag.getVertex(i).data == i);
dag.addEdge(i - 1, i);
}
REQUIRE(dag.size() == 10);
REQUIRE(!dag.empty());
// Cannot add an edge that would result in a cycle
dag.addEdge(9, 5);
REQUIRE(!dag.isValid());
// Bounds checking
SECTION("addEdge Bounds Checking")
{
REQUIRE_THROWS(dag.addEdge(20, 0));
REQUIRE_THROWS(dag.addEdge(0, 20));
}
}
TEST_CASE("dag_traversal", "[dag]")
{
daggy::DAG<size_t, size_t> dag;
const int N_VERTICES = 10;
for (int i = 0; i < N_VERTICES; ++i) {
dag.addVertex(i, i);
}
/*
0 ---------------------\
1 ---------- \ \ /-----> 8
2 ---- 3 ---- > 5 -------> 6 -----> 7
4 -------------------------------/ \-----> 9
*/
std::vector<std::pair<int, int>> edges{{0, 6}, {1, 5}, {5, 6}, {6, 7}, {2, 3},
{3, 5}, {4, 7}, {7, 8}, {7, 9}};
for (const auto &[from, to] : edges) {
dag.addEdge(from, to);
}
SECTION("Basic Traversal")
{
dag.reset();
std::vector<size_t> visitOrder(N_VERTICES);
size_t i = 0;
while (!dag.allVisited()) {
auto o = dag.visitNext();
REQUIRE(o.has_value());
const auto v = o.value();
dag.completeVisit(v.first);
visitOrder[v.first] = i;
++i;
}
// Ensure visit order is preserved
for (const auto &[from, to] : edges) {
REQUIRE(visitOrder[from] <= visitOrder[to]);
}
}
SECTION("Iteration")
{
size_t nVisited = 0;
dag.forEach([&](auto &k) {
(void)k;
++nVisited;
});
REQUIRE(nVisited == dag.size());
}
}

View File

@@ -0,0 +1,129 @@
#include <catch2/catch.hpp>
#include <filesystem>
#include <fstream>
#include <iostream>
#include <sstream>
#include "daggy/Serialization.hpp"
#include "daggy/loggers/dag_run/OStreamLogger.hpp"
#include "daggy/loggers/dag_run/RedisLogger.hpp"
using namespace daggy;
using namespace daggy::loggers::dag_run;
const TaskSet SAMPLE_TASKS{
{"work_a",
Task{.definedName{"work_a"},
.job{{"command", std::vector<std::string>{"/bin/echo", "a"}}},
.children{"c"}}},
{"work_b",
Task{.definedName{"work_b"},
.job{{"command", std::vector<std::string>{"/bin/echo", "b"}}},
.children{"c"}}},
{"work_c",
Task{.definedName{"work_c"},
.job{{"command", std::vector<std::string>{"/bin/echo", "c"}}}}}};
namespace {
void testDAGRunInit(DAGRunLogger &logger, const std::string &tag,
const TaskSet &tasks)
{
auto runID = logger.startDAGRun(DAGSpec{.tag = tag, .tasks = tasks});
// Verify run shows up in the list
SECTION("New run shows up in list of runs")
{
auto runs = logger.queryDAGRuns();
REQUIRE(!runs.empty());
auto it = std::find_if(runs.begin(), runs.end(), [runID](const auto &r) {
return r.runID == runID;
});
REQUIRE(it != runs.end());
REQUIRE(it->tag == tag);
REQUIRE(it->runState == +RunState::QUEUED);
}
// Verify dagSpec matches
SECTION("Can retrieve DAG Spec")
{
auto spec = logger.getDAGSpec(runID);
REQUIRE(spec.tag == tag);
REQUIRE(spec.tasks == tasks);
}
// Verify states
SECTION("DAG State matches expectations")
{
REQUIRE(logger.getDAGRunState(runID) == +RunState::QUEUED);
for (const auto &[k, _] : tasks) {
REQUIRE(logger.getTaskState(runID, k) == +RunState::QUEUED);
}
}
// Verify integrity of run
SECTION("Can retrieve the full run")
{
auto dagRun = logger.getDAGRun(runID);
REQUIRE(dagRun.dagSpec.tag == tag);
REQUIRE(dagRun.dagSpec.tasks == tasks);
REQUIRE(dagRun.taskRunStates.size() == tasks.size());
auto nonQueuedTask = std::find_if(
dagRun.taskRunStates.begin(), dagRun.taskRunStates.end(),
[](const auto &a) { return a.second != +RunState::QUEUED; });
REQUIRE(nonQueuedTask == dagRun.taskRunStates.end());
REQUIRE(dagRun.dagStateChanges.size() == 1);
REQUIRE(dagRun.dagStateChanges.back().state == +RunState::QUEUED);
}
// Update DAG state and ensure that it's updated;
SECTION("Can update DAG state and retrieve new state")
{
logger.updateDAGRunState(runID, RunState::RUNNING);
auto dagRun = logger.getDAGRun(runID);
REQUIRE(dagRun.dagStateChanges.back().state == +RunState::RUNNING);
}
// Update a task state
SECTION("Can update task state and retrieve new state")
{
for (const auto &[k, v] : tasks)
logger.updateTaskState(runID, k, RunState::RUNNING);
auto dagRun = logger.getDAGRun(runID);
for (const auto &[k, v] : tasks) {
REQUIRE(dagRun.taskRunStates.at(k) == +RunState::RUNNING);
}
}
SECTION("Log task attempt and retrieve it")
{
std::string error = "long error string\nwith new\n lines";
logger.logTaskAttempt(runID, "work_a",
AttemptRecord{.rc = 2, .errorLog = error});
auto dagRun = logger.getDAGRun(runID);
REQUIRE(dagRun.taskAttempts["work_a"].size() == 1);
REQUIRE(dagRun.taskAttempts["work_a"][0].errorLog == error);
REQUIRE(dagRun.taskAttempts["work_a"][0].rc == 2);
}
}
} // namespace
TEST_CASE("ostream_logger", "[ostream_logger]")
{
std::stringstream ss;
daggy::loggers::dag_run::OStreamLogger logger(ss);
testDAGRunInit(logger, "init_test", SAMPLE_TASKS);
}
#ifdef DAGGY_ENABLE_REDIS
TEST_CASE("redis_logger", "[redis_logger]")
{
daggy::loggers::dag_run::RedisLogger logger;
testDAGRunInit(logger, "init_test", SAMPLE_TASKS);
}
#endif

View File

@@ -0,0 +1,256 @@
#include <catch2/catch.hpp>
#include <filesystem>
#include <fstream>
#include "daggy/DAGRunner.hpp"
#include "daggy/executors/task/ForkingTaskExecutor.hpp"
#include "daggy/executors/task/NoopTaskExecutor.hpp"
#include "daggy/loggers/dag_run/OStreamLogger.hpp"
namespace fs = std::filesystem;
TEST_CASE("dagrunner", "[dagrunner][dagrunner_order_preservation]")
{
daggy::executors::task::NoopTaskExecutor ex;
std::stringstream ss;
daggy::loggers::dag_run::OStreamLogger logger(ss);
daggy::TimePoint globalStartTime = daggy::Clock::now();
daggy::DAGSpec dagSpec;
std::string testParams{
R"({"DATE": ["2021-05-06", "2021-05-07", "2021-05-08", "2021-05-09" ]})"};
dagSpec.taskConfig.variables = daggy::configFromJSON(testParams);
std::string taskJSON = R"({
"A": {"job": {"command": ["/usr/bin/touch", "{{DATE}}"]}, "children": [ "B","D" ]},
"B": {"job": {"command": ["/usr/bin/touch", "{{DATE}}"]}, "children": [ "C","D","E" ]},
"C": {"job": {"command": ["/usr/bin/touch", "{{DATE}}"]}, "children": [ "D"]},
"D": {"job": {"command": ["/usr/bin/touch", "{{DATE}}"]}, "children": [ "E"]},
"E": {"job": {"command": ["/usr/bin/touch", "{{DATE}}"]}}
})";
dagSpec.tasks = expandTaskSet(daggy::tasksFromJSON(taskJSON), ex,
dagSpec.taskConfig.variables);
REQUIRE(dagSpec.tasks.size() == 20);
auto dag = daggy::buildDAGFromTasks(dagSpec.tasks);
auto runID = logger.startDAGRun(dagSpec);
daggy::DAGRunner runner(runID, ex, logger, dag, dagSpec.taskConfig);
auto endDAG = runner.run();
REQUIRE(endDAG.allVisited());
// Ensure the run order
auto rec = logger.getDAGRun(runID);
daggy::TimePoint globalStopTime = daggy::Clock::now();
std::array<daggy::TimePoint, 5> minTimes;
minTimes.fill(globalStartTime);
std::array<daggy::TimePoint, 5> maxTimes;
maxTimes.fill(globalStopTime);
for (const auto &[k, v] : rec.taskAttempts) {
size_t idx = k[0] - 65;
auto &startTime = minTimes[idx];
auto &stopTime = maxTimes[idx];
startTime = std::max(startTime, v.front().startTime);
stopTime = std::min(stopTime, v.back().stopTime);
}
for (size_t i = 0; i < 5; ++i) {
for (size_t j = i + 1; j < 4; ++j) {
REQUIRE(maxTimes[i] < minTimes[j]);
}
}
}
TEST_CASE("DAGRunner simple execution", "[dagrunner][dagrunner_simple]")
{
daggy::executors::task::ForkingTaskExecutor ex(10);
std::stringstream ss;
daggy::loggers::dag_run::OStreamLogger logger(ss);
daggy::DAGSpec dagSpec;
SECTION("Simple execution")
{
std::string prefix = (fs::current_path() / "asdlk").string();
std::unordered_map<std::string, std::string> files{
{"A", prefix + "_A"}, {"B", prefix + "_B"}, {"C", prefix + "_C"}};
std::string taskJSON =
R"({"A": {"job": {"command": ["/usr/bin/touch", ")" + files.at("A") +
R"("]}, "children": ["C"]}, "B": {"job": {"command": ["/usr/bin/touch", ")" +
files.at("B") +
R"("]}, "children": ["C"]}, "C": {"job": {"command": ["/usr/bin/touch", ")" +
files.at("C") + R"("]}}})";
dagSpec.tasks = expandTaskSet(daggy::tasksFromJSON(taskJSON), ex);
auto dag = daggy::buildDAGFromTasks(dagSpec.tasks);
auto runID = logger.startDAGRun(dagSpec);
daggy::DAGRunner runner(runID, ex, logger, dag, dagSpec.taskConfig);
auto endDAG = runner.run();
REQUIRE(endDAG.allVisited());
for (const auto &[_, file] : files) {
REQUIRE(fs::exists(file));
fs::remove(file);
}
// Get the DAG Run Attempts
auto record = logger.getDAGRun(runID);
for (const auto &[_, attempts] : record.taskAttempts) {
REQUIRE(attempts.size() == 1);
REQUIRE(attempts.front().rc == 0);
}
}
}
TEST_CASE("DAG Runner Restart old DAG", "[dagrunner][dagrunner_restart]")
{
daggy::executors::task::ForkingTaskExecutor ex(10);
std::stringstream ss;
daggy::loggers::dag_run::OStreamLogger logger(ss);
daggy::DAGSpec dagSpec;
SECTION("Recovery from Error")
{
auto cleanup = []() {
// Cleanup
std::vector<fs::path> paths{"rec_error_A", "noexist"};
for (const auto &pth : paths) {
if (fs::exists(pth))
fs::remove_all(pth);
}
};
cleanup();
std::string goodPrefix = "rec_error_";
std::string badPrefix = "noexist/rec_error_";
std::string taskJSON =
R"({"A": {"job": {"command": ["/usr/bin/touch", ")" + goodPrefix +
R"(A"]}, "children": ["C"]}, "B": {"job": {"command": ["/usr/bin/touch", ")" +
badPrefix +
R"(B"]}, "children": ["C"]}, "C": {"job": {"command": ["/usr/bin/touch", ")" +
badPrefix + R"(C"]}}})";
dagSpec.tasks = expandTaskSet(daggy::tasksFromJSON(taskJSON), ex);
auto dag = daggy::buildDAGFromTasks(dagSpec.tasks);
auto runID = logger.startDAGRun(dagSpec);
daggy::DAGRunner runner(runID, ex, logger, dag, dagSpec.taskConfig);
auto tryDAG = runner.run();
REQUIRE(!tryDAG.allVisited());
// Create the missing dir, then continue to run the DAG
fs::create_directory("noexist");
runner.resetRunning();
auto endDAG = runner.run();
REQUIRE(endDAG.allVisited());
// Get the DAG Run Attempts
auto record = logger.getDAGRun(runID);
REQUIRE(record.taskAttempts["A_0"].size() == 1); // A ran fine
REQUIRE(record.taskAttempts["B_0"].size() ==
2); // B errored and had to be retried
REQUIRE(record.taskAttempts["C_0"].size() ==
1); // C wasn't run because B errored
cleanup();
}
}
TEST_CASE("DAG Runner Generator Tasks", "[dagrunner][dagrunner_generator]")
{
daggy::executors::task::ForkingTaskExecutor ex(10);
std::stringstream ss;
daggy::loggers::dag_run::OStreamLogger logger(ss);
daggy::DAGSpec dagSpec;
SECTION("Generator tasks")
{
std::string testParams{R"({"DATE": ["2021-05-06", "2021-05-07" ]})"};
dagSpec.taskConfig.variables = daggy::configFromJSON(testParams);
std::string generatorOutput =
R"({"B": {"job": {"command": ["/usr/bin/echo", "-e", "{{DATE}}"]}, "children": ["C"]}})";
fs::path ofn = fs::current_path() / "generator_test_output.json";
std::ofstream ofh{ofn};
ofh << generatorOutput << std::endl;
ofh.close();
daggy::TimePoint globalStartTime = daggy::Clock::now();
std::stringstream jsonTasks;
jsonTasks
<< R"({ "A": { "job": {"command": [ "/usr/bin/cat", )"
<< std::quoted(ofn.string())
<< R"(]}, "children": ["C"], "isGenerator": true},)"
<< R"("C": { "job": {"command": [ "/usr/bin/echo", "hello!"]} } })";
dagSpec.tasks = daggy::tasksFromJSON(jsonTasks.str());
REQUIRE(dagSpec.tasks.size() == 2);
REQUIRE(dagSpec.tasks["A"].children ==
std::unordered_set<std::string>{"C"});
dagSpec.tasks =
daggy::expandTaskSet(dagSpec.tasks, ex, dagSpec.taskConfig.variables);
REQUIRE(dagSpec.tasks.size() == 2);
REQUIRE(dagSpec.tasks["A_0"].children ==
std::unordered_set<std::string>{"C"});
auto dag = daggy::buildDAGFromTasks(dagSpec.tasks);
REQUIRE(dag.size() == 2);
auto runID = logger.startDAGRun(dagSpec);
daggy::DAGRunner runner(runID, ex, logger, dag, dagSpec.taskConfig);
auto finalDAG = runner.run();
REQUIRE(finalDAG.allVisited());
REQUIRE(finalDAG.size() == 4);
// Check the logger
auto record = logger.getDAGRun(runID);
REQUIRE(record.dagSpec.tasks.size() == 4);
REQUIRE(record.taskRunStates.size() == 4);
for (const auto &[taskName, attempts] : record.taskAttempts) {
REQUIRE(attempts.size() == 1);
REQUIRE(attempts.back().rc == 0);
}
// Ensure that children were updated properly
REQUIRE(record.dagSpec.tasks["A_0"].children ==
std::unordered_set<std::string>{"B_0", "B_1", "C"});
REQUIRE(record.dagSpec.tasks["B_0"].children ==
std::unordered_set<std::string>{"C"});
REQUIRE(record.dagSpec.tasks["B_1"].children ==
std::unordered_set<std::string>{"C"});
REQUIRE(record.dagSpec.tasks["C_0"].children.empty());
// Ensure they were run in the right order
// All A's get run before B's, which run before C's
daggy::TimePoint globalStopTime = daggy::Clock::now();
std::array<daggy::TimePoint, 3> minTimes;
minTimes.fill(globalStartTime);
std::array<daggy::TimePoint, 3> maxTimes;
maxTimes.fill(globalStopTime);
for (const auto &[k, v] : record.taskAttempts) {
size_t idx = k[0] - 65;
auto &startTime = minTimes[idx];
auto &stopTime = maxTimes[idx];
startTime = std::max(startTime, v.front().startTime);
stopTime = std::min(stopTime, v.back().stopTime);
}
for (size_t i = 0; i < 3; ++i) {
for (size_t j = i + 1; j < 2; ++j) {
REQUIRE(maxTimes[i] < minTimes[j]);
}
}
}
}

View File

@@ -0,0 +1,181 @@
#include <catch2/catch.hpp>
#include <filesystem>
#include <fstream>
#include <iostream>
#include <thread>
#include "daggy/Serialization.hpp"
#include "daggy/Utilities.hpp"
#include "daggy/executors/task/ForkingTaskExecutor.hpp"
namespace fs = std::filesystem;
TEST_CASE("forking_executor", "[forking_executor]")
{
daggy::executors::task::ForkingTaskExecutor ex(10);
SECTION("Simple Run")
{
daggy::Task task{
.job{{"command", daggy::executors::task::ForkingTaskExecutor::Command{
"/usr/bin/echo", "abc", "123"}}}};
REQUIRE(ex.validateTaskParameters(task.job));
auto recFuture = ex.execute(0, "command", task);
auto rec = recFuture.get();
REQUIRE(rec.rc == 0);
REQUIRE(rec.outputLog.size() >= 6);
REQUIRE(rec.errorLog.empty());
}
SECTION("Simple Run using commandString")
{
daggy::Task task{.job{{"commandString", R"(/usr/bin/echo "abc 123")"}}};
REQUIRE(ex.validateTaskParameters(task.job));
auto recFuture = ex.execute(0, "command", task);
auto rec = recFuture.get();
REQUIRE(rec.rc == 0);
REQUIRE(rec.outputLog.size() >= 6);
REQUIRE(rec.errorLog.empty());
}
SECTION("Simple run with environment")
{
// Create the shell script
auto scriptFile = fs::current_path() / "fork_simple.sh";
if (fs::exists(scriptFile))
fs::remove_all(scriptFile);
std::ofstream ofh(scriptFile);
ofh << "#!/bin/bash\necho \"${DAGGY_TEST_VAR}\"\necho "
"\"${DAGGY_TEST_VAR2}\"\n";
ofh.close();
fs::permissions(scriptFile, fs::perms::owner_all,
fs::perm_options::replace);
std::string valOne = "funky_times";
std::string valTwo = "bleep_bloop";
daggy::Task task{.job{{"command",
daggy::executors::task::ForkingTaskExecutor::Command{
scriptFile.string()}},
{"environment", std::vector<std::string>{
"DAGGY_TEST_VAR=" + valOne,
"DAGGY_TEST_VAR2=" + valTwo}}}};
REQUIRE(ex.validateTaskParameters(task.job));
auto recFuture = ex.execute(0, "command", task);
auto rec = recFuture.get();
REQUIRE(rec.rc == 0);
REQUIRE(rec.outputLog.size() >= 6);
REQUIRE(rec.outputLog.find(valOne) != std::string::npos);
REQUIRE(rec.outputLog.find(valTwo) != std::string::npos);
REQUIRE(rec.errorLog.empty());
// if (fs::exists(scriptFile)) fs::remove_all(scriptFile);
}
SECTION("Error Run")
{
daggy::Task task{
.job{{"command", daggy::executors::task::ForkingTaskExecutor::Command{
"/usr/bin/expr", "1", "+", "+"}}}};
auto recFuture = ex.execute(0, "command", task);
auto rec = recFuture.get();
REQUIRE(rec.rc == 2);
REQUIRE(rec.errorLog.size() >= 20);
REQUIRE(rec.outputLog.empty());
}
SECTION("Killing a long task")
{
daggy::Task task{
.job{{"command", daggy::executors::task::ForkingTaskExecutor::Command{
"/usr/bin/sleep", "30"}}}};
auto start = daggy::Clock::now();
auto recFuture = ex.execute(0, "command", task);
std::this_thread::sleep_for(1s);
ex.stop(0, "command");
auto rec = recFuture.get();
auto stop = daggy::Clock::now();
REQUIRE(rec.rc == 9);
REQUIRE(rec.errorLog.empty());
REQUIRE(rec.outputLog.empty());
REQUIRE(rec.executorLog == "Killed");
REQUIRE(
std::chrono::duration_cast<std::chrono::seconds>(stop - start).count() <
20);
}
SECTION("Large Output")
{
const std::vector<std::string> BIG_FILES{"/usr/share/dict/linux.words",
"/usr/share/dict/cracklib-small",
"/etc/ssh/moduli"};
for (const auto &bigFile : BIG_FILES) {
if (!std::filesystem::exists(bigFile))
continue;
daggy::Task task{
.job{{"command", daggy::executors::task::ForkingTaskExecutor::Command{
"/usr/bin/cat", bigFile}}}};
auto recFuture = ex.execute(0, "command", task);
auto rec = recFuture.get();
REQUIRE(rec.rc == 0);
REQUIRE(rec.outputLog.size() == std::filesystem::file_size(bigFile));
REQUIRE(rec.errorLog.empty());
}
}
SECTION("Parameter Expansion")
{
std::string testParams{R"({"DATE": ["2021-05-06", "2021-05-07" ]})"};
auto params = daggy::configFromJSON(testParams);
std::string taskJSON =
R"({"B": {"job": {"command": ["/usr/bin/echo", "{{DATE}}"]}, "children": ["C"]}})";
auto tasks = daggy::tasksFromJSON(taskJSON);
auto result = daggy::expandTaskSet(tasks, ex, params);
REQUIRE(result.size() == 2);
}
SECTION("Build with expansion")
{
std::string testParams{
R"({"DATE": ["2021-05-06", "2021-05-07" ], "SOURCE": "name"})"};
auto params = daggy::configFromJSON(testParams);
std::string testTasks =
R"({"A": {"job": {"command": ["/bin/echo", "A"]}, "children": ["B"]}, "B": {"job": {"command": ["/bin/echo", "B", "{{SOURCE}}", "{{DATE}}"]}, "children": ["C"]}, "C": {"job": {"command": ["/bin/echo", "C"]}}})";
auto tasks =
daggy::expandTaskSet(daggy::tasksFromJSON(testTasks), ex, params);
REQUIRE(tasks.size() == 4);
}
SECTION("Build with expansion using parents instead of children")
{
std::string testParams{
R"({"DATE": ["2021-05-06", "2021-05-07" ], "SOURCE": "name"})"};
auto params = daggy::configFromJSON(testParams);
std::string testTasks =
R"({"A": {"job": {"command": ["/bin/echo", "A"]}}, "B": {"job": {"command": ["/bin/echo", "B", "{{SOURCE}}", "{{DATE}}"]}, "parents": ["A"]}, "C": {"job": {"command": ["/bin/echo", "C"]}, "parents": ["A"]}})";
auto tasks =
daggy::expandTaskSet(daggy::tasksFromJSON(testTasks), ex, params);
REQUIRE(tasks.size() == 4);
}
}

View File

@@ -0,0 +1,211 @@
#include <unistd.h>
#include <catch2/catch.hpp>
#include <filesystem>
#include <fstream>
#include <iostream>
#include "daggy/Serialization.hpp"
#include "daggy/Utilities.hpp"
#include "daggy/executors/task/SlurmTaskExecutor.hpp"
namespace fs = std::filesystem;
#ifdef DAGGY_ENABLE_SLURM
TEST_CASE("slurm environment", "[slurm_env]")
{
daggy::executors::task::SlurmTaskExecutor ex;
daggy::ConfigValues defaultJobValues{{"minCPUs", "1"},
{"minMemoryMB", "100"},
{"minTmpDiskMB", "0"},
{"priority", "1"},
{"timeLimitSeconds", "200"},
{"userID", std::to_string(getuid())},
{"workDir", fs::current_path().string()},
{"tmpDir", fs::current_path().string()}};
}
TEST_CASE("slurm_execution", "[slurm_executor]")
{
daggy::executors::task::SlurmTaskExecutor ex;
daggy::ConfigValues defaultJobValues{{"minCPUs", "1"},
{"minMemoryMB", "100"},
{"minTmpDiskMB", "0"},
{"priority", "1"},
{"timeLimitSeconds", "200"},
{"userID", std::to_string(getuid())},
{"workDir", fs::current_path().string()},
{"tmpDir", fs::current_path().string()}};
SECTION("Simple Run")
{
daggy::Task task{.job{
{"command", std::vector<std::string>{"/usr/bin/echo", "abc", "123"}}}};
task.job.merge(defaultJobValues);
REQUIRE(ex.validateTaskParameters(task.job));
auto recFuture = ex.execute(0, "command", task);
auto rec = recFuture.get();
REQUIRE(rec.rc == 0);
REQUIRE(rec.outputLog.size() >= 6);
REQUIRE(rec.errorLog.empty());
}
SECTION("Simple run with environment")
{
// Create the shell script
auto scriptFile = fs::current_path() / "slurm_simple_env.sh";
if (fs::exists(scriptFile))
fs::remove_all(scriptFile);
std::ofstream ofh(scriptFile);
ofh << "#!/bin/bash\necho \"${DAGGY_TEST_VAR}\"\necho "
"\"${DAGGY_TEST_VAR2}\"\n";
ofh.close();
fs::permissions(scriptFile, fs::perms::owner_all,
fs::perm_options::replace);
std::string valOne = "funky_times";
std::string valTwo = "bleep_bloop";
daggy::Task task{.job{{"command",
daggy::executors::task::SlurmTaskExecutor::Command{
scriptFile.string()}},
{"environment", std::vector<std::string>{
"DAGGY_TEST_VAR=" + valOne,
"DAGGY_TEST_VAR2=" + valTwo}}}};
task.job.merge(defaultJobValues);
REQUIRE(ex.validateTaskParameters(task.job));
auto recFuture = ex.execute(0, "command", task);
auto rec = recFuture.get();
REQUIRE(rec.rc == 0);
REQUIRE(rec.outputLog.size() >= 6);
REQUIRE(rec.outputLog.find(valOne) != std::string::npos);
REQUIRE(rec.outputLog.find(valTwo) != std::string::npos);
REQUIRE(rec.errorLog.empty());
if (fs::exists(scriptFile))
fs::remove_all(scriptFile);
}
SECTION("Simple Run using commandString")
{
daggy::Task task{.job{{"commandString", R"(/usr/bin/echo "abc 123")"}}};
task.job.merge(defaultJobValues);
REQUIRE(ex.validateTaskParameters(task.job));
auto recFuture = ex.execute(0, "command", task);
auto rec = recFuture.get();
REQUIRE(rec.rc == 0);
REQUIRE(rec.outputLog.size() >= 6);
REQUIRE(rec.errorLog.empty());
}
SECTION("Error Run")
{
daggy::Task task{
.job{{"command", daggy::executors::task::SlurmTaskExecutor::Command{
"/usr/bin/expr", "1", "+", "+"}}}};
task.job.merge(defaultJobValues);
auto recFuture = ex.execute(0, "command", task);
auto rec = recFuture.get();
REQUIRE(rec.rc != 0);
REQUIRE(rec.errorLog.size() >= 20);
REQUIRE(rec.outputLog.empty());
}
SECTION("Killing a long task")
{
daggy::Task task{
.job{{"command", daggy::executors::task::SlurmTaskExecutor::Command{
"/usr/bin/sleep", "30"}}}};
task.job.merge(defaultJobValues);
auto recFuture = ex.execute(0, "command", task);
ex.stop(0, "command");
auto rec = recFuture.get();
REQUIRE(rec.rc == 9);
REQUIRE(rec.errorLog.empty());
REQUIRE(rec.outputLog.empty());
REQUIRE(rec.executorLog == "Job cancelled by user.\n");
}
SECTION("Large Output")
{
const std::vector<std::string> BIG_FILES{"/usr/share/dict/linux.words",
"/usr/share/dict/cracklib-small",
"/etc/ssh/moduli"};
for (const auto &bigFile : BIG_FILES) {
if (!std::filesystem::exists(bigFile))
continue;
daggy::Task task{
.job{{"command", daggy::executors::task::SlurmTaskExecutor::Command{
"/usr/bin/cat", bigFile}}}};
task.job.merge(defaultJobValues);
auto recFuture = ex.execute(0, "command", task);
auto rec = recFuture.get();
REQUIRE(rec.rc == 0);
REQUIRE(rec.outputLog.size() == std::filesystem::file_size(bigFile));
REQUIRE(rec.errorLog.empty());
break;
}
}
SECTION("Parameter Expansion")
{
std::string testParams{R"({"DATE": ["2021-05-06", "2021-05-07" ]})"};
auto params = daggy::configFromJSON(testParams);
std::string taskJSON =
R"({"B": {"job": {"command": ["/usr/bin/echo", "{{DATE}}"]}, "children": ["C"]}})";
auto tasks = daggy::tasksFromJSON(taskJSON, defaultJobValues);
auto result = daggy::expandTaskSet(tasks, ex, params);
REQUIRE(result.size() == 2);
}
SECTION("Build with expansion")
{
std::string testParams{
R"({"DATE": ["2021-05-06", "2021-05-07" ], "SOURCE": "name"})"};
auto params = daggy::configFromJSON(testParams);
std::string testTasks =
R"({"A": {"job": {"command": ["/bin/echo", "A"]}, "children": ["B"]}, "B": {"job": {"command": ["/bin/echo", "B", "{{SOURCE}}", "{{DATE}}"]}, "children": ["C"]}, "C": {"job": {"command": ["/bin/echo", "C"]}}})";
auto tasks = daggy::expandTaskSet(
daggy::tasksFromJSON(testTasks, defaultJobValues), ex, params);
REQUIRE(tasks.size() == 4);
}
SECTION("Build with expansion using parents instead of children")
{
std::string testParams{
R"({"DATE": ["2021-05-06", "2021-05-07" ], "SOURCE": "name"})"};
auto params = daggy::configFromJSON(testParams);
std::string testTasks =
R"({"A": {"job": {"command": ["/bin/echo", "A"]}}, "B": {"job": {"command": ["/bin/echo", "B", "{{SOURCE}}", "{{DATE}}"]}, "parents": ["A"]}, "C": {"job": {"command": ["/bin/echo", "C"]}, "parents": ["A"]}})";
auto tasks = daggy::expandTaskSet(
daggy::tasksFromJSON(testTasks, defaultJobValues), ex, params);
REQUIRE(tasks.size() == 4);
}
}
#endif

View File

@@ -0,0 +1,104 @@
#include <catch2/catch.hpp>
#include <filesystem>
#include <fstream>
#include <iostream>
#include "daggy/Serialization.hpp"
namespace fs = std::filesystem;
TEST_CASE("parameter_deserialization", "[deserialize_parameters]")
{
SECTION("Basic Parse")
{
std::string testParams{
R"({"DATE": ["2021-05-06", "2021-05-07" ], "SOURCE": "name"})"};
auto params = daggy::configFromJSON(testParams);
REQUIRE(params.size() == 2);
REQUIRE(std::holds_alternative<std::vector<std::string>>(params["DATE"]));
REQUIRE(std::holds_alternative<std::string>(params["SOURCE"]));
}
SECTION("Invalid JSON")
{
std::string testParams{
R"({"DATE": ["2021-05-06", "2021-05-07" ], "SOURCE": "name")"};
REQUIRE_THROWS(daggy::configFromJSON(testParams));
}
SECTION("Non-string Keys")
{
std::string testParams{
R"({"DATE": ["2021-05-06", "2021-05-07" ], 6: "name"})"};
REQUIRE_THROWS(daggy::configFromJSON(testParams));
}
SECTION("Non-array/Non-string values")
{
std::string testParams{
R"({"DATE": ["2021-05-06", "2021-05-07" ], "SOURCE": {"name": "kevin"}})"};
REQUIRE_THROWS(daggy::configFromJSON(testParams));
}
}
TEST_CASE("task_deserialization", "[deserialize_task]")
{
SECTION("Build with no expansion")
{
std::string testTasks = R"({
"A": {
"job": { "command": ["/bin/echo", "A"] },
"children": ["C"]
},
"B": {
"job": {"command": ["/bin/echo", "B"]},
"children": ["C"]
},
"C": {
"job": {"command": ["/bin/echo", "C"]}
}
})";
auto tasks = daggy::tasksFromJSON(testTasks);
REQUIRE(tasks.size() == 3);
}
SECTION("Build with job defaults")
{
std::string testTasks = R"({
"A": {
"job": { "command": ["/bin/echo", "A"] },
"children": ["B"]
},
"B": {
"job": {
"command": ["/bin/echo", "C"],
"memory": "1G"
}
}
})";
daggy::ConfigValues jobDefaults{{"runtime", "60"}, {"memory", "300M"}};
auto tasks = daggy::tasksFromJSON(testTasks, jobDefaults);
REQUIRE(tasks.size() == 2);
REQUIRE(std::get<std::string>(tasks["A"].job["runtime"]) == "60");
REQUIRE(std::get<std::string>(tasks["A"].job["memory"]) == "300M");
REQUIRE(std::get<std::string>(tasks["B"].job["runtime"]) == "60");
REQUIRE(std::get<std::string>(tasks["B"].job["memory"]) == "1G");
}
}
TEST_CASE("task_serialization", "[serialize_tasks]")
{
SECTION("Build with no expansion")
{
std::string testTasks =
R"({"A": {"job": {"command": ["/bin/echo", "A"]}, "children": ["C"]}, "B": {"job": {"command": ["/bin/echo", "B"]}, "children": ["C"]}, "C": {"job": {"command": ["/bin/echo", "C"]}}})";
auto tasks = daggy::tasksFromJSON(testTasks);
auto genJSON = daggy::tasksToJSON(tasks);
auto regenTasks = daggy::tasksFromJSON(genJSON);
REQUIRE(regenTasks.size() == tasks.size());
for (const auto &[name, task] : regenTasks) {
const auto &other = tasks[name];
REQUIRE(task == other);
}
}
}

View File

@@ -0,0 +1,382 @@
#include <curl/curl.h>
#include <pistache/client.h>
#include <rapidjson/document.h>
#include <sys/stat.h>
#include <catch2/catch.hpp>
#include <daggy/Serialization.hpp>
#include <daggy/Server.hpp>
#include <daggy/executors/task/ForkingTaskExecutor.hpp>
#include <daggy/executors/task/NoopTaskExecutor.hpp>
#include <daggy/loggers/dag_run/OStreamLogger.hpp>
#include <filesystem>
#include <iostream>
#include <thread>
namespace rj = rapidjson;
using namespace daggy;
#ifdef DEBUG_HTTP
static int my_trace(CURL *handle, curl_infotype type, char *data, size_t size,
void *userp)
{
const char *text;
(void)handle; /* prevent compiler warning */
(void)userp;
switch (type) {
case CURLINFO_TEXT:
fprintf(stderr, "== Info: %s", data);
default: /* in case a new one is introduced to shock us */
return 0;
case CURLINFO_HEADER_OUT:
text = "=> Send header";
break;
case CURLINFO_DATA_OUT:
text = "=> Send data";
break;
case CURLINFO_SSL_DATA_OUT:
text = "=> Send SSL data";
break;
case CURLINFO_HEADER_IN:
text = "<= Recv header";
break;
case CURLINFO_DATA_IN:
text = "<= Recv data";
break;
case CURLINFO_SSL_DATA_IN:
text = "<= Recv SSL data";
break;
}
std::cerr << "\n================== " << text
<< " ==================" << std::endl
<< data << std::endl;
return 0;
}
#endif
enum HTTPCode : long
{
Ok = 200,
Not_Found = 404
};
struct HTTPResponse
{
HTTPCode code;
std::string body;
};
uint curlWriter(char *in, uint size, uint nmemb, std::stringstream *out)
{
uint r;
r = size * nmemb;
out->write(in, r);
return r;
}
HTTPResponse REQUEST(const std::string &url, const std::string &payload = "",
const std::string &method = "GET")
{
HTTPResponse response;
CURL *curl;
CURLcode res;
struct curl_slist *headers = NULL;
curl_global_init(CURL_GLOBAL_ALL);
curl = curl_easy_init();
if (curl) {
std::stringstream buffer;
#ifdef DEBUG_HTTP
curl_easy_setopt(curl, CURLOPT_DEBUGFUNCTION, my_trace);
curl_easy_setopt(curl, CURLOPT_VERBOSE, 1L);
#endif
curl_easy_setopt(curl, CURLOPT_URL, url.c_str());
curl_easy_setopt(curl, CURLOPT_WRITEFUNCTION, curlWriter);
curl_easy_setopt(curl, CURLOPT_WRITEDATA, &buffer);
if (!payload.empty()) {
curl_easy_setopt(curl, CURLOPT_POSTFIELDSIZE, payload.size());
curl_easy_setopt(curl, CURLOPT_POSTFIELDS, payload.c_str());
headers = curl_slist_append(headers, "Content-Type: Application/Json");
}
curl_easy_setopt(curl, CURLOPT_CUSTOMREQUEST, method.c_str());
headers = curl_slist_append(headers, "Expect:");
curl_easy_setopt(curl, CURLOPT_HTTPHEADER, headers);
res = curl_easy_perform(curl);
if (res != CURLE_OK) {
curl_easy_cleanup(curl);
throw std::runtime_error(std::string{"CURL Failed: "} +
curl_easy_strerror(res));
}
curl_easy_cleanup(curl);
curl_easy_getinfo(curl, CURLINFO_RESPONSE_CODE, &response.code);
response.body = buffer.str();
}
curl_global_cleanup();
return response;
}
TEST_CASE("rest_endpoint", "[server_basic]")
{
std::stringstream ss;
daggy::executors::task::ForkingTaskExecutor executor(10);
daggy::loggers::dag_run::OStreamLogger logger(ss);
Pistache::Address listenSpec("localhost", Pistache::Port(0));
const size_t nDAGRunners = 10, nWebThreads = 10;
daggy::Server server(listenSpec, logger, executor, nDAGRunners);
server.init(nWebThreads);
server.start();
const std::string host = "localhost:";
const std::string baseURL = host + std::to_string(server.getPort());
SECTION("Ready Endpoint")
{
auto response = REQUEST(baseURL + "/ready");
REQUIRE(response.code == HTTPCode::Ok);
}
SECTION("Querying a non-existent dagrunid should fail ")
{
auto response = REQUEST(baseURL + "/v1/dagrun/100");
REQUIRE(response.code != HTTPCode::Ok);
}
SECTION("Simple DAGRun Submission")
{
std::string dagRun = R"({
"tag": "unit_server",
"parameters": { "FILE": [ "A", "B" ] },
"tasks": {
"touch": { "job": { "command": [ "/usr/bin/touch", "dagrun_{{FILE}}" ]} },
"cat": { "job": { "command": [ "/usr/bin/cat", "dagrun_A", "dagrun_B" ]},
"parents": [ "touch" ]
}
}
})";
auto dagSpec = daggy::dagFromJSON(dagRun);
// Submit, and get the runID
daggy::DAGRunID runID = 0;
{
auto response = REQUEST(baseURL + "/v1/dagrun/", dagRun, "POST");
REQUIRE(response.code == HTTPCode::Ok);
rj::Document doc;
daggy::checkRJParse(doc.Parse(response.body.c_str()));
REQUIRE(doc.IsObject());
REQUIRE(doc.HasMember("runID"));
runID = doc["runID"].GetUint64();
}
// Ensure our runID shows up in the list of running DAGs
{
auto response = REQUEST(baseURL + "/v1/dagruns?all=1");
REQUIRE(response.code == HTTPCode::Ok);
rj::Document doc;
daggy::checkRJParse(doc.Parse(response.body.c_str()));
REQUIRE(doc.IsArray());
REQUIRE(doc.Size() >= 1);
// Ensure that our DAG is in the list and matches our given DAGRunID
bool found = false;
const auto &runs = doc.GetArray();
for (size_t i = 0; i < runs.Size(); ++i) {
const auto &run = runs[i];
REQUIRE(run.IsObject());
REQUIRE(run.HasMember("tag"));
REQUIRE(run.HasMember("runID"));
std::string runName = run["tag"].GetString();
if (runName == "unit_server") {
REQUIRE(run["runID"].GetUint64() == runID);
found = true;
break;
}
}
REQUIRE(found);
}
// Ensure we can get one of our tasks
{
auto response = REQUEST(baseURL + "/v1/dagrun/" + std::to_string(runID) +
"/task/cat_0");
REQUIRE(response.code == HTTPCode::Ok);
rj::Document doc;
daggy::checkRJParse(doc.Parse(response.body.c_str()));
REQUIRE_NOTHROW(daggy::taskFromJSON("cat", doc));
auto task = daggy::taskFromJSON("cat", doc);
REQUIRE(task == dagSpec.tasks.at("cat"));
}
// Wait until our DAG is complete
bool complete = true;
for (auto i = 0; i < 10; ++i) {
auto response = REQUEST(baseURL + "/v1/dagrun/" + std::to_string(runID));
REQUIRE(response.code == HTTPCode::Ok);
rj::Document doc;
daggy::checkRJParse(doc.Parse(response.body.c_str()));
REQUIRE(doc.IsObject());
REQUIRE(doc.HasMember("taskStates"));
const auto &taskStates = doc["taskStates"].GetObject();
size_t nStates = 0;
for (auto it = taskStates.MemberBegin(); it != taskStates.MemberEnd();
++it) {
nStates++;
}
REQUIRE(nStates == 3);
complete = true;
for (auto it = taskStates.MemberBegin(); it != taskStates.MemberEnd();
++it) {
std::string state = it->value.GetString();
if (state != "COMPLETED") {
complete = false;
break;
}
}
if (complete)
break;
std::this_thread::sleep_for(std::chrono::seconds(1));
}
REQUIRE(complete);
std::this_thread::sleep_for(std::chrono::seconds(2));
for (const auto &pth : std::vector<fs::path>{"dagrun_A", "dagrun_B"}) {
REQUIRE(fs::exists(pth));
fs::remove(pth);
}
}
}
TEST_CASE("Server cancels and resumes execution", "[server_resume]")
{
std::stringstream ss;
daggy::executors::task::ForkingTaskExecutor executor(10);
daggy::loggers::dag_run::OStreamLogger logger(ss);
Pistache::Address listenSpec("localhost", Pistache::Port(0));
const size_t nDAGRunners = 10, nWebThreads = 10;
daggy::Server server(listenSpec, logger, executor, nDAGRunners);
server.init(nWebThreads);
server.start();
const std::string host = "localhost:";
const std::string baseURL = host + std::to_string(server.getPort());
SECTION("Cancel / Resume DAGRun")
{
std::string dagRunJSON = R"({
"tag": "unit_server",
"tasks": {
"touch_A": { "job": { "command": [ "/usr/bin/touch", "resume_touch_a" ]}, "children": ["touch_C"] },
"sleep_B": { "job": { "command": [ "/usr/bin/sleep", "3" ]}, "children": ["touch_C"] },
"touch_C": { "job": { "command": [ "/usr/bin/touch", "resume_touch_c" ]} }
}
})";
auto dagSpec = daggy::dagFromJSON(dagRunJSON);
// Submit, and get the runID
daggy::DAGRunID runID;
{
auto response = REQUEST(baseURL + "/v1/dagrun/", dagRunJSON, "POST");
REQUIRE(response.code == HTTPCode::Ok);
rj::Document doc;
daggy::checkRJParse(doc.Parse(response.body.c_str()));
REQUIRE(doc.IsObject());
REQUIRE(doc.HasMember("runID"));
runID = doc["runID"].GetUint64();
}
std::this_thread::sleep_for(1s);
// Stop the current run
{
auto response = REQUEST(
baseURL + "/v1/dagrun/" + std::to_string(runID) + "/state/KILLED", "",
"PATCH");
REQUIRE(response.code == HTTPCode::Ok);
REQUIRE(logger.getDAGRunState(runID) == +daggy::RunState::KILLED);
}
// Verify that the run still exists
{
auto dagRun = logger.getDAGRun(runID);
REQUIRE(dagRun.taskRunStates.at("touch_A_0") ==
+daggy::RunState::COMPLETED);
REQUIRE(fs::exists("resume_touch_a"));
REQUIRE(dagRun.taskRunStates.at("sleep_B_0") ==
+daggy::RunState::ERRORED);
REQUIRE(dagRun.taskRunStates.at("touch_C_0") == +daggy::RunState::QUEUED);
}
// Set the errored task state
{
auto url = baseURL + "/v1/dagrun/" + std::to_string(runID) +
"/task/sleep_B_0/state/QUEUED";
auto response = REQUEST(url, "", "PATCH");
REQUIRE(response.code == HTTPCode::Ok);
REQUIRE(logger.getTaskState(runID, "sleep_B_0") ==
+daggy::RunState::QUEUED);
}
// Resume
{
struct stat s;
lstat("resume_touch_A", &s);
auto preMTime = s.st_mtim.tv_sec;
auto response = REQUEST(
baseURL + "/v1/dagrun/" + std::to_string(runID) + "/state/QUEUED", "",
"PATCH");
// Wait for run to complete
std::this_thread::sleep_for(5s);
REQUIRE(logger.getDAGRunState(runID) == +daggy::RunState::COMPLETED);
REQUIRE(fs::exists("resume_touch_c"));
REQUIRE(fs::exists("resume_touch_a"));
for (const auto &[taskName, task] : dagSpec.tasks) {
REQUIRE(logger.getTaskState(runID, taskName + "_0") ==
+daggy::RunState::COMPLETED);
}
// Ensure "touch_A" wasn't run again
lstat("resume_touch_A", &s);
auto postMTime = s.st_mtim.tv_sec;
REQUIRE(preMTime == postMTime);
}
}
server.shutdown();
}

View File

@@ -0,0 +1,45 @@
#include <catch2/catch.hpp>
#include <future>
#include <iostream>
#include "daggy/ThreadPool.hpp"
using namespace daggy;
TEST_CASE("threadpool", "[threadpool]")
{
std::atomic<uint32_t> cnt(0);
ThreadPool tp(10);
std::vector<std::future<uint32_t>> rets;
SECTION("Adding large tasks queues with return values")
{
auto tq = std::make_shared<daggy::TaskQueue>();
std::vector<std::future<uint32_t>> res;
for (size_t i = 0; i < 100; ++i)
res.emplace_back(tq->addTask([&cnt]() {
cnt++;
return cnt.load();
}));
tp.addTasks(tq);
for (auto &r : res)
r.get();
REQUIRE(cnt == 100);
}
SECTION("Slow runs")
{
std::vector<std::future<void>> res;
using namespace std::chrono_literals;
for (size_t i = 0; i < 100; ++i)
res.push_back(tp.addTask([&cnt]() {
std::this_thread::sleep_for(20ms);
cnt++;
return;
}));
for (auto &r : res)
r.get();
REQUIRE(cnt == 100);
}
}

View File

@@ -0,0 +1,56 @@
#include <algorithm>
#include <catch2/catch.hpp>
#include <chrono>
#include <filesystem>
#include <fstream>
#include <iomanip>
#include <iostream>
#include "daggy/Serialization.hpp"
#include "daggy/Utilities.hpp"
TEST_CASE("string_utilities", "[utilities_string]")
{
std::string test = "/this/is/{{A}}/test/{{A}}";
auto res = daggy::globalSub(test, "{{A}}", "hello");
REQUIRE(res == "/this/is/hello/test/hello");
}
TEST_CASE("string_expansion", "[utilities_parameter_expansion]")
{
SECTION("Basic expansion")
{
std::string testParams{
R"({"DATE": ["2021-05-06", "2021-05-07" ], "SOURCE": "name", "TYPE": ["a", "b", "c"]})"};
auto params = daggy::configFromJSON(testParams);
std::vector<std::string> cmd{"/usr/bin/echo", "{{DATE}}", "{{SOURCE}}",
"{{TYPE}}"};
auto allCommands = daggy::interpolateValues(cmd, params);
REQUIRE(allCommands.size() == 6);
}
SECTION("Skip over unused parameters")
{
std::string testParams{
R"({"DATE": ["2021-05-06", "2021-05-07" ], "SOURCE": "name", "TYPE": ["a", "b", "c"]})"};
auto params = daggy::configFromJSON(testParams);
std::vector<std::string> cmd{"/usr/bin/echo", "{{DATE}}", "{{SOURCE}}"};
auto allCommands = daggy::interpolateValues(cmd, params);
// TYPE isn't used, so it's just |DATE| * |SOURCE|
REQUIRE(allCommands.size() == 2);
}
SECTION("Expand within a command part")
{
std::string testParams{
R"({"DATE": ["2021-05-06", "2021-05-07" ], "SOURCE": ["A", "B"], "TYPE": ["a", "b", "c"]})"};
auto params = daggy::configFromJSON(testParams);
std::vector<std::string> cmd{"/usr/bin/touch", "{{DATE}}_{{SOURCE}}"};
auto result = daggy::interpolateValues(cmd, params);
// TYPE isn't used, so it's just |DATE| * |SOURCE|
REQUIRE(result.size() == 4);
}
}