Squashed commit of the following:

commit b06b11cbb5d09c6d091551e39767cd3316f88376
Author: Ian Roddis <gitlab@ie2r.com>
Date:   Tue Oct 5 11:57:37 2021 -0300

    Fixing failing unit test

commit fe2a43a19b2a16a9aedd9e95e71e672935ecaeb1
Author: Ian Roddis <gitlab@ie2r.com>
Date:   Tue Oct 5 11:54:01 2021 -0300

    Adding endpoints and updating documentation

commit 46e0deeefb8b06291ae5e2d6b8ec4749c5b0ea6f
Author: Ian Roddis <gitlab@ie2r.com>
Date:   Tue Oct 5 11:49:43 2021 -0300

    Completing unit tests and relevant fixes

commit e0569f370624844feee6aae4708bfe683f4156cf
Author: Ian Roddis <gitlab@ie2r.com>
Date:   Mon Oct 4 17:30:59 2021 -0300

    Adding in gcc tsan for debug builds to help with race conditions, fixing many of those, and fixing really crummy assumption about how futures worked that will speed up task execution by a ton.

commit c748a4f592e1ada5546908be5281d04f4749539d
Author: Ian Roddis <gitlab@ie2r.com>
Date:   Mon Oct 4 10:14:43 2021 -0300

    Checkpointing work that seems to have resolved the race condition

commit 7a79f2943e0d50545d976a28b4b379340a90dded
Author: Ian Roddis <gitlab@ie2r.com>
Date:   Wed Sep 29 09:27:07 2021 -0300

    Completing the rough-in for DAG killing / pausing / resuming

commit 4cf8d81d5f6fcf4a7dd83d8fca3e23f153aa8acb
Author: Ian Roddis <gitlab@ie2r.com>
Date:   Tue Sep 28 14:53:50 2021 -0300

    Adding dagrunner unit tests, adding a resetRunning method to resume

commit 54e2c1f9f5e7d5b339d71be024e0e390c4d2bf61
Author: Ian Roddis <gitlab@ie2r.com>
Date:   Tue Sep 28 14:45:57 2021 -0300

    Refactoring runDAG into DAGRunner

commit 682be7a11e2fae850e1bc3e207628d2335768c2b
Author: Ian Roddis <gitlab@ie2r.com>
Date:   Tue Sep 28 14:34:43 2021 -0300

    Adding DAGRunner class to replace Utilities::runDAG, making Slurm cancellation rc agree with SIGKILL

commit 4171b3a6998791abfc71b04f8de1ae93c4f90a78
Author: Ian Roddis <gitlab@ie2r.com>
Date:   Tue Sep 28 14:14:17 2021 -0300

    Adding unit tests for stopping jobs to slurm

commit dc0b1ff26a5d98471164132d35bb8a552cc75ff8
Author: Ian Roddis <gitlab@ie2r.com>
Date:   Tue Sep 28 14:04:15 2021 -0300

    Adding in stop method for task executors

commit e752b44f55113be54392bcbb5c3d6f251d673cfa
Author: Ian Roddis <gitlab@ie2r.com>
Date:   Tue Sep 28 12:32:06 2021 -0300

    Adding additional tests for loggers

commit f0773d5a84a422738fc17c9277a2b735a21a3d04
Author: Ian Roddis <gitlab@ie2r.com>
Date:   Tue Sep 28 12:29:21 2021 -0300

    Unit tests pass

commit 993ff2810de2d53dc6a59ab53d620fecf152d4a0
Author: Ian Roddis <gitlab@ie2r.com>
Date:   Tue Sep 28 12:24:34 2021 -0300

    Adding handling for new routes, still need to add tests for new routes

commit 676623b14e45759872a2dbcbc98f6a744e022a71
Author: Ian Roddis <gitlab@ie2r.com>
Date:   Tue Sep 28 12:12:43 2021 -0300

    Adding handling for new routes, still need to add tests for new routes

commit b9edb6ba291eb064f4c459a308ea6912fba9fa02
Author: Ian Roddis <gitlab@ie2r.com>
Date:   Mon Sep 27 11:59:14 2021 -0300

    Defining new endpoints, fixing dag resumption code, adding PAUSED state, refactoring DAGSpec and adding deserializer
This commit is contained in:
Ian Roddis
2021-10-05 11:57:55 -03:00
parent dded91220f
commit 65ab439848
32 changed files with 1538 additions and 618 deletions

View File

@@ -2,6 +2,7 @@ project(tests)
add_executable(tests main.cpp
# unit tests
unit_dag.cpp
unit_dagrunner.cpp
unit_dagrun_loggers.cpp
unit_executor_forkingexecutor.cpp
unit_executor_slurmexecutor.cpp
@@ -14,4 +15,4 @@ add_executable(tests main.cpp
# Performance checks
perf_dag.cpp
)
target_link_libraries(tests libdaggy stdc++fs Catch2::Catch2)
target_link_libraries(tests libdaggy stdc++fs Catch2::Catch2 curl)

View File

@@ -2,11 +2,10 @@
#include <filesystem>
#include <fstream>
#include <iostream>
#include <sstream>
#include "daggy/loggers/dag_run/OStreamLogger.hpp"
namespace fs = std::filesystem;
using namespace daggy;
using namespace daggy::loggers::dag_run;
@@ -20,28 +19,68 @@ const TaskSet SAMPLE_TASKS{
{"work_c",
Task{.job{{"command", std::vector<std::string>{"/bin/echo", "c"}}}}}};
inline DAGRunID testDAGRunInit(DAGRunLogger &logger, const std::string &name,
inline DAGRunID testDAGRunInit(DAGRunLogger &logger, const std::string &tag,
const TaskSet &tasks)
{
auto runID = logger.startDAGRun(name, tasks);
auto dagRun = logger.getDAGRun(runID);
auto runID = logger.startDAGRun(DAGSpec{.tag = tag, .tasks = tasks});
REQUIRE(dagRun.tasks == tasks);
// Verify run shows up in the list
{
auto runs = logger.queryDAGRuns();
REQUIRE(!runs.empty());
auto it = std::find_if(runs.begin(), runs.end(),
[runID](const auto &r) { return r.runID == runID; });
REQUIRE(it != runs.end());
REQUIRE(it->tag == tag);
REQUIRE(it->runState == +RunState::QUEUED);
}
REQUIRE(dagRun.taskRunStates.size() == tasks.size());
auto nonQueuedTask =
std::find_if(dagRun.taskRunStates.begin(), dagRun.taskRunStates.end(),
[](const auto &a) { return a.second != +RunState::QUEUED; });
REQUIRE(nonQueuedTask == dagRun.taskRunStates.end());
// Verify states
{
REQUIRE(logger.getDAGRunState(runID) == +RunState::QUEUED);
for (const auto &[k, _] : tasks) {
REQUIRE(logger.getTaskState(runID, k) == +RunState::QUEUED);
}
}
// Verify integrity of run
{
auto dagRun = logger.getDAGRun(runID);
REQUIRE(dagRun.dagSpec.tag == tag);
REQUIRE(dagRun.dagSpec.tasks == tasks);
REQUIRE(dagRun.taskRunStates.size() == tasks.size());
auto nonQueuedTask = std::find_if(
dagRun.taskRunStates.begin(), dagRun.taskRunStates.end(),
[](const auto &a) { return a.second != +RunState::QUEUED; });
REQUIRE(nonQueuedTask == dagRun.taskRunStates.end());
REQUIRE(dagRun.dagStateChanges.size() == 1);
REQUIRE(dagRun.dagStateChanges.back().newState == +RunState::QUEUED);
}
// Update DAG state and ensure that it's updated;
{
logger.updateDAGRunState(runID, RunState::RUNNING);
auto dagRun = logger.getDAGRun(runID);
REQUIRE(dagRun.dagStateChanges.back().newState == +RunState::RUNNING);
}
// Update a task state
{
for (const auto &[k, v] : tasks)
logger.updateTaskState(runID, k, RunState::RUNNING);
auto dagRun = logger.getDAGRun(runID);
for (const auto &[k, v] : tasks) {
REQUIRE(dagRun.taskRunStates.at(k) == +RunState::RUNNING);
}
}
REQUIRE(dagRun.dagStateChanges.size() == 1);
REQUIRE(dagRun.dagStateChanges.back().newState == +RunState::QUEUED);
return runID;
}
TEST_CASE("ostream_logger", "[ostream_logger]")
{
// cleanup();
std::stringstream ss;
daggy::loggers::dag_run::OStreamLogger logger(ss);
@@ -49,6 +88,4 @@ TEST_CASE("ostream_logger", "[ostream_logger]")
{
testDAGRunInit(logger, "init_test", SAMPLE_TASKS);
}
// cleanup();
}

256
tests/unit_dagrunner.cpp Normal file
View File

@@ -0,0 +1,256 @@
#include <catch2/catch.hpp>
#include <filesystem>
#include <fstream>
#include "daggy/DAGRunner.hpp"
#include "daggy/executors/task/ForkingTaskExecutor.hpp"
#include "daggy/executors/task/NoopTaskExecutor.hpp"
#include "daggy/loggers/dag_run/OStreamLogger.hpp"
namespace fs = std::filesystem;
TEST_CASE("dagrunner", "[dagrunner_order_preservation]")
{
daggy::executors::task::NoopTaskExecutor ex;
std::stringstream ss;
daggy::loggers::dag_run::OStreamLogger logger(ss);
daggy::TimePoint globalStartTime = daggy::Clock::now();
daggy::DAGSpec dagSpec;
std::string testParams{
R"({"DATE": ["2021-05-06", "2021-05-07", "2021-05-08", "2021-05-09" ]})"};
dagSpec.taskConfig.variables = daggy::configFromJSON(testParams);
std::string taskJSON = R"({
"A": {"job": {"command": ["/usr/bin/touch", "{{DATE}}"]}, "children": [ "B","D" ]},
"B": {"job": {"command": ["/usr/bin/touch", "{{DATE}}"]}, "children": [ "C","D","E" ]},
"C": {"job": {"command": ["/usr/bin/touch", "{{DATE}}"]}, "children": [ "D"]},
"D": {"job": {"command": ["/usr/bin/touch", "{{DATE}}"]}, "children": [ "E"]},
"E": {"job": {"command": ["/usr/bin/touch", "{{DATE}}"]}}
})";
dagSpec.tasks = expandTaskSet(daggy::tasksFromJSON(taskJSON), ex,
dagSpec.taskConfig.variables);
REQUIRE(dagSpec.tasks.size() == 20);
auto dag = daggy::buildDAGFromTasks(dagSpec.tasks);
auto runID = logger.startDAGRun(dagSpec);
daggy::DAGRunner runner(runID, ex, logger, dag, dagSpec.taskConfig);
auto endDAG = runner.run();
REQUIRE(endDAG.allVisited());
// Ensure the run order
auto rec = logger.getDAGRun(runID);
daggy::TimePoint globalStopTime = daggy::Clock::now();
std::array<daggy::TimePoint, 5> minTimes;
minTimes.fill(globalStartTime);
std::array<daggy::TimePoint, 5> maxTimes;
maxTimes.fill(globalStopTime);
for (const auto &[k, v] : rec.taskAttempts) {
size_t idx = k[0] - 65;
auto &startTime = minTimes[idx];
auto &stopTime = maxTimes[idx];
startTime = std::max(startTime, v.front().startTime);
stopTime = std::min(stopTime, v.back().stopTime);
}
for (size_t i = 0; i < 5; ++i) {
for (size_t j = i + 1; j < 4; ++j) {
REQUIRE(maxTimes[i] < minTimes[j]);
}
}
}
TEST_CASE("DAGRunner simple execution", "[dagrunner_simple]")
{
daggy::executors::task::ForkingTaskExecutor ex(10);
std::stringstream ss;
daggy::loggers::dag_run::OStreamLogger logger(ss);
daggy::DAGSpec dagSpec;
SECTION("Simple execution")
{
std::string prefix = (fs::current_path() / "asdlk").string();
std::unordered_map<std::string, std::string> files{
{"A", prefix + "_A"}, {"B", prefix + "_B"}, {"C", prefix + "_C"}};
std::string taskJSON =
R"({"A": {"job": {"command": ["/usr/bin/touch", ")" + files.at("A") +
R"("]}, "children": ["C"]}, "B": {"job": {"command": ["/usr/bin/touch", ")" +
files.at("B") +
R"("]}, "children": ["C"]}, "C": {"job": {"command": ["/usr/bin/touch", ")" +
files.at("C") + R"("]}}})";
dagSpec.tasks = expandTaskSet(daggy::tasksFromJSON(taskJSON), ex);
auto dag = daggy::buildDAGFromTasks(dagSpec.tasks);
auto runID = logger.startDAGRun(dagSpec);
daggy::DAGRunner runner(runID, ex, logger, dag, dagSpec.taskConfig);
auto endDAG = runner.run();
REQUIRE(endDAG.allVisited());
for (const auto &[_, file] : files) {
REQUIRE(fs::exists(file));
fs::remove(file);
}
// Get the DAG Run Attempts
auto record = logger.getDAGRun(runID);
for (const auto &[_, attempts] : record.taskAttempts) {
REQUIRE(attempts.size() == 1);
REQUIRE(attempts.front().rc == 0);
}
}
}
TEST_CASE("DAG Runner Restart old DAG", "[dagrunner_restart]")
{
daggy::executors::task::ForkingTaskExecutor ex(10);
std::stringstream ss;
daggy::loggers::dag_run::OStreamLogger logger(ss);
daggy::DAGSpec dagSpec;
SECTION("Recovery from Error")
{
auto cleanup = []() {
// Cleanup
std::vector<fs::path> paths{"rec_error_A", "noexist"};
for (const auto &pth : paths) {
if (fs::exists(pth))
fs::remove_all(pth);
}
};
cleanup();
std::string goodPrefix = "rec_error_";
std::string badPrefix = "noexist/rec_error_";
std::string taskJSON =
R"({"A": {"job": {"command": ["/usr/bin/touch", ")" + goodPrefix +
R"(A"]}, "children": ["C"]}, "B": {"job": {"command": ["/usr/bin/touch", ")" +
badPrefix +
R"(B"]}, "children": ["C"]}, "C": {"job": {"command": ["/usr/bin/touch", ")" +
badPrefix + R"(C"]}}})";
dagSpec.tasks = expandTaskSet(daggy::tasksFromJSON(taskJSON), ex);
auto dag = daggy::buildDAGFromTasks(dagSpec.tasks);
auto runID = logger.startDAGRun(dagSpec);
daggy::DAGRunner runner(runID, ex, logger, dag, dagSpec.taskConfig);
auto tryDAG = runner.run();
REQUIRE(!tryDAG.allVisited());
// Create the missing dir, then continue to run the DAG
fs::create_directory("noexist");
runner.resetRunning();
auto endDAG = runner.run();
REQUIRE(endDAG.allVisited());
// Get the DAG Run Attempts
auto record = logger.getDAGRun(runID);
REQUIRE(record.taskAttempts["A_0"].size() == 1); // A ran fine
REQUIRE(record.taskAttempts["B_0"].size() ==
2); // B errored and had to be retried
REQUIRE(record.taskAttempts["C_0"].size() ==
1); // C wasn't run because B errored
cleanup();
}
}
TEST_CASE("DAG Runner Generator Tasks", "[dagrunner_generator]")
{
daggy::executors::task::ForkingTaskExecutor ex(10);
std::stringstream ss;
daggy::loggers::dag_run::OStreamLogger logger(ss);
daggy::DAGSpec dagSpec;
SECTION("Generator tasks")
{
std::string testParams{R"({"DATE": ["2021-05-06", "2021-05-07" ]})"};
dagSpec.taskConfig.variables = daggy::configFromJSON(testParams);
std::string generatorOutput =
R"({"B": {"job": {"command": ["/usr/bin/echo", "-e", "{{DATE}}"]}, "children": ["C"]}})";
fs::path ofn = fs::current_path() / "generator_test_output.json";
std::ofstream ofh{ofn};
ofh << generatorOutput << std::endl;
ofh.close();
daggy::TimePoint globalStartTime = daggy::Clock::now();
std::stringstream jsonTasks;
jsonTasks
<< R"({ "A": { "job": {"command": [ "/usr/bin/cat", )"
<< std::quoted(ofn.string())
<< R"(]}, "children": ["C"], "isGenerator": true},)"
<< R"("C": { "job": {"command": [ "/usr/bin/echo", "hello!"]} } })";
dagSpec.tasks = daggy::tasksFromJSON(jsonTasks.str());
REQUIRE(dagSpec.tasks.size() == 2);
REQUIRE(dagSpec.tasks["A"].children ==
std::unordered_set<std::string>{"C"});
dagSpec.tasks =
daggy::expandTaskSet(dagSpec.tasks, ex, dagSpec.taskConfig.variables);
REQUIRE(dagSpec.tasks.size() == 2);
REQUIRE(dagSpec.tasks["A_0"].children ==
std::unordered_set<std::string>{"C"});
auto dag = daggy::buildDAGFromTasks(dagSpec.tasks);
REQUIRE(dag.size() == 2);
auto runID = logger.startDAGRun(dagSpec);
daggy::DAGRunner runner(runID, ex, logger, dag, dagSpec.taskConfig);
auto finalDAG = runner.run();
REQUIRE(finalDAG.allVisited());
REQUIRE(finalDAG.size() == 4);
// Check the logger
auto record = logger.getDAGRun(runID);
REQUIRE(record.dagSpec.tasks.size() == 4);
REQUIRE(record.taskRunStates.size() == 4);
for (const auto &[taskName, attempts] : record.taskAttempts) {
REQUIRE(attempts.size() == 1);
REQUIRE(attempts.back().rc == 0);
}
// Ensure that children were updated properly
REQUIRE(record.dagSpec.tasks["A_0"].children ==
std::unordered_set<std::string>{"B_0", "B_1", "C"});
REQUIRE(record.dagSpec.tasks["B_0"].children ==
std::unordered_set<std::string>{"C"});
REQUIRE(record.dagSpec.tasks["B_1"].children ==
std::unordered_set<std::string>{"C"});
REQUIRE(record.dagSpec.tasks["C_0"].children.empty());
// Ensure they were run in the right order
// All A's get run before B's, which run before C's
daggy::TimePoint globalStopTime = daggy::Clock::now();
std::array<daggy::TimePoint, 3> minTimes;
minTimes.fill(globalStartTime);
std::array<daggy::TimePoint, 3> maxTimes;
maxTimes.fill(globalStopTime);
for (const auto &[k, v] : record.taskAttempts) {
size_t idx = k[0] - 65;
auto &startTime = minTimes[idx];
auto &stopTime = maxTimes[idx];
startTime = std::max(startTime, v.front().startTime);
stopTime = std::min(stopTime, v.back().stopTime);
}
for (size_t i = 0; i < 3; ++i) {
for (size_t j = i + 1; j < 2; ++j) {
REQUIRE(maxTimes[i] < minTimes[j]);
}
}
}
}

View File

@@ -1,6 +1,7 @@
#include <catch2/catch.hpp>
#include <filesystem>
#include <iostream>
#include <thread>
#include "daggy/Serialization.hpp"
#include "daggy/Utilities.hpp"
@@ -18,7 +19,7 @@ TEST_CASE("forking_executor", "[forking_executor]")
REQUIRE(ex.validateTaskParameters(task.job));
auto recFuture = ex.execute("command", task);
auto recFuture = ex.execute(0, "command", task);
auto rec = recFuture.get();
REQUIRE(rec.rc == 0);
@@ -32,7 +33,7 @@ TEST_CASE("forking_executor", "[forking_executor]")
.job{{"command", daggy::executors::task::ForkingTaskExecutor::Command{
"/usr/bin/expr", "1", "+", "+"}}}};
auto recFuture = ex.execute("command", task);
auto recFuture = ex.execute(0, "command", task);
auto rec = recFuture.get();
REQUIRE(rec.rc == 2);
@@ -40,6 +41,28 @@ TEST_CASE("forking_executor", "[forking_executor]")
REQUIRE(rec.outputLog.empty());
}
SECTION("Killing a long task")
{
daggy::Task task{
.job{{"command", daggy::executors::task::ForkingTaskExecutor::Command{
"/usr/bin/sleep", "30"}}}};
auto start = daggy::Clock::now();
auto recFuture = ex.execute(0, "command", task);
std::this_thread::sleep_for(1s);
ex.stop(0, "command");
auto rec = recFuture.get();
auto stop = daggy::Clock::now();
REQUIRE(rec.rc == 9);
REQUIRE(rec.errorLog.empty());
REQUIRE(rec.outputLog.empty());
REQUIRE(rec.executorLog == "Killed");
REQUIRE(
std::chrono::duration_cast<std::chrono::seconds>(stop - start).count() <
20);
}
SECTION("Large Output")
{
const std::vector<std::string> BIG_FILES{"/usr/share/dict/linux.words",
@@ -54,7 +77,7 @@ TEST_CASE("forking_executor", "[forking_executor]")
.job{{"command", daggy::executors::task::ForkingTaskExecutor::Command{
"/usr/bin/cat", bigFile}}}};
auto recFuture = ex.execute("command", task);
auto recFuture = ex.execute(0, "command", task);
auto rec = recFuture.get();
REQUIRE(rec.rc == 0);

View File

@@ -34,7 +34,7 @@ TEST_CASE("slurm_execution", "[slurm_executor]")
REQUIRE(ex.validateTaskParameters(task.job));
auto recFuture = ex.execute("command", task);
auto recFuture = ex.execute(0, "command", task);
auto rec = recFuture.get();
REQUIRE(rec.rc == 0);
@@ -49,7 +49,7 @@ TEST_CASE("slurm_execution", "[slurm_executor]")
"/usr/bin/expr", "1", "+", "+"}}}};
task.job.merge(defaultJobValues);
auto recFuture = ex.execute("command", task);
auto recFuture = ex.execute(0, "command", task);
auto rec = recFuture.get();
REQUIRE(rec.rc != 0);
@@ -57,6 +57,23 @@ TEST_CASE("slurm_execution", "[slurm_executor]")
REQUIRE(rec.outputLog.empty());
}
SECTION("Killing a long task")
{
daggy::Task task{
.job{{"command", daggy::executors::task::SlurmTaskExecutor::Command{
"/usr/bin/sleep", "30"}}}};
task.job.merge(defaultJobValues);
auto recFuture = ex.execute(0, "command", task);
ex.stop(0, "command");
auto rec = recFuture.get();
REQUIRE(rec.rc == 9);
REQUIRE(rec.errorLog.empty());
REQUIRE(rec.outputLog.empty());
REQUIRE(rec.executorLog == "Job cancelled by user.\n");
}
SECTION("Large Output")
{
const std::vector<std::string> BIG_FILES{"/usr/share/dict/linux.words",
@@ -72,7 +89,7 @@ TEST_CASE("slurm_execution", "[slurm_executor]")
"/usr/bin/cat", bigFile}}}};
task.job.merge(defaultJobValues);
auto recFuture = ex.execute("command", task);
auto recFuture = ex.execute(0, "command", task);
auto rec = recFuture.get();
REQUIRE(rec.rc == 0);

View File

@@ -1,51 +1,131 @@
#include <curl/curl.h>
#include <pistache/client.h>
#include <rapidjson/document.h>
#include <sys/stat.h>
#include <catch2/catch.hpp>
#include <daggy/Serialization.hpp>
#include <daggy/Server.hpp>
#include <daggy/executors/task/ForkingTaskExecutor.hpp>
#include <daggy/executors/task/NoopTaskExecutor.hpp>
#include <daggy/loggers/dag_run/OStreamLogger.hpp>
#include <filesystem>
#include <iostream>
#include <thread>
namespace rj = rapidjson;
Pistache::Http::Response REQUEST(const std::string &url,
const std::string &payload = "")
{
Pistache::Http::Experimental::Client client;
client.init();
Pistache::Http::Response response;
auto reqSpec = (payload.empty() ? client.get(url) : client.post(url));
reqSpec.timeout(std::chrono::seconds(2));
if (!payload.empty()) {
reqSpec.body(payload);
}
auto request = reqSpec.send();
bool ok = false, error = false;
std::string msg;
request.then(
[&](Pistache::Http::Response rsp) {
ok = true;
response = std::move(rsp);
},
[&](std::exception_ptr ptr) {
error = true;
try {
std::rethrow_exception(std::move(ptr));
}
catch (std::exception &e) {
msg = e.what();
}
});
using namespace daggy;
Pistache::Async::Barrier<Pistache::Http::Response> barrier(request);
barrier.wait_for(std::chrono::seconds(2));
client.shutdown();
if (error) {
throw std::runtime_error(msg);
#ifdef DEBUG_HTTP
static int my_trace(CURL *handle, curl_infotype type, char *data, size_t size,
void *userp)
{
const char *text;
(void)handle; /* prevent compiler warning */
(void)userp;
switch (type) {
case CURLINFO_TEXT:
fprintf(stderr, "== Info: %s", data);
default: /* in case a new one is introduced to shock us */
return 0;
case CURLINFO_HEADER_OUT:
text = "=> Send header";
break;
case CURLINFO_DATA_OUT:
text = "=> Send data";
break;
case CURLINFO_SSL_DATA_OUT:
text = "=> Send SSL data";
break;
case CURLINFO_HEADER_IN:
text = "<= Recv header";
break;
case CURLINFO_DATA_IN:
text = "<= Recv data";
break;
case CURLINFO_SSL_DATA_IN:
text = "<= Recv SSL data";
break;
}
std::cerr << "\n================== " << text
<< " ==================" << std::endl
<< data << std::endl;
return 0;
}
#endif
enum HTTPCode : long
{
Ok = 200,
Not_Found = 404
};
struct HTTPResponse
{
HTTPCode code;
std::string body;
};
uint curlWriter(char *in, uint size, uint nmemb, std::stringstream *out)
{
uint r;
r = size * nmemb;
out->write(in, r);
return r;
}
HTTPResponse REQUEST(const std::string &url, const std::string &payload = "",
const std::string &method = "GET")
{
HTTPResponse response;
CURL *curl;
CURLcode res;
struct curl_slist *headers = NULL;
curl_global_init(CURL_GLOBAL_ALL);
curl = curl_easy_init();
if (curl) {
std::stringstream buffer;
#ifdef DEBUG_HTTP
curl_easy_setopt(curl, CURLOPT_DEBUGFUNCTION, my_trace);
curl_easy_setopt(curl, CURLOPT_VERBOSE, 1L);
#endif
curl_easy_setopt(curl, CURLOPT_URL, url.c_str());
curl_easy_setopt(curl, CURLOPT_WRITEFUNCTION, curlWriter);
curl_easy_setopt(curl, CURLOPT_WRITEDATA, &buffer);
if (!payload.empty()) {
curl_easy_setopt(curl, CURLOPT_POSTFIELDSIZE, payload.size());
curl_easy_setopt(curl, CURLOPT_POSTFIELDS, payload.c_str());
headers = curl_slist_append(headers, "Content-Type: Application/Json");
}
curl_easy_setopt(curl, CURLOPT_CUSTOMREQUEST, method.c_str());
headers = curl_slist_append(headers, "Expect:");
curl_easy_setopt(curl, CURLOPT_HTTPHEADER, headers);
res = curl_easy_perform(curl);
if (res != CURLE_OK) {
curl_easy_cleanup(curl);
throw std::runtime_error(std::string{"CURL Failed: "} +
curl_easy_strerror(res));
}
curl_easy_cleanup(curl);
curl_easy_getinfo(curl, CURLINFO_RESPONSE_CODE, &response.code);
response.body = buffer.str();
}
curl_global_cleanup();
return response;
}
@@ -68,19 +148,19 @@ TEST_CASE("rest_endpoint", "[server_basic]")
SECTION("Ready Endpoint")
{
auto response = REQUEST(baseURL + "/ready");
REQUIRE(response.code() == Pistache::Http::Code::Ok);
REQUIRE(response.code == HTTPCode::Ok);
}
SECTION("Querying a non-existent dagrunid should fail ")
{
auto response = REQUEST(baseURL + "/v1/dagrun/100");
REQUIRE(response.code() != Pistache::Http::Code::Ok);
REQUIRE(response.code != HTTPCode::Ok);
}
SECTION("Simple DAGRun Submission")
{
std::string dagRun = R"({
"name": "unit_server",
"tag": "unit_server",
"parameters": { "FILE": [ "A", "B" ] },
"tasks": {
"touch": { "job": { "command": [ "/usr/bin/touch", "dagrun_{{FILE}}" ]} },
@@ -90,14 +170,16 @@ TEST_CASE("rest_endpoint", "[server_basic]")
}
})";
auto dagSpec = daggy::dagFromJSON(dagRun);
// Submit, and get the runID
daggy::DAGRunID runID = 0;
{
auto response = REQUEST(baseURL + "/v1/dagrun/", dagRun);
REQUIRE(response.code() == Pistache::Http::Code::Ok);
auto response = REQUEST(baseURL + "/v1/dagrun/", dagRun, "POST");
REQUIRE(response.code == HTTPCode::Ok);
rj::Document doc;
daggy::checkRJParse(doc.Parse(response.body().c_str()));
daggy::checkRJParse(doc.Parse(response.body.c_str()));
REQUIRE(doc.IsObject());
REQUIRE(doc.HasMember("runID"));
@@ -106,11 +188,11 @@ TEST_CASE("rest_endpoint", "[server_basic]")
// Ensure our runID shows up in the list of running DAGs
{
auto response = REQUEST(baseURL + "/v1/dagrun/");
REQUIRE(response.code() == Pistache::Http::Code::Ok);
auto response = REQUEST(baseURL + "/v1/dagruns?all=1");
REQUIRE(response.code == HTTPCode::Ok);
rj::Document doc;
daggy::checkRJParse(doc.Parse(response.body().c_str()));
daggy::checkRJParse(doc.Parse(response.body.c_str()));
REQUIRE(doc.IsArray());
REQUIRE(doc.Size() >= 1);
@@ -120,10 +202,10 @@ TEST_CASE("rest_endpoint", "[server_basic]")
for (size_t i = 0; i < runs.Size(); ++i) {
const auto &run = runs[i];
REQUIRE(run.IsObject());
REQUIRE(run.HasMember("name"));
REQUIRE(run.HasMember("tag"));
REQUIRE(run.HasMember("runID"));
std::string runName = run["name"].GetString();
std::string runName = run["tag"].GetString();
if (runName == "unit_server") {
REQUIRE(run["runID"].GetUint64() == runID);
found = true;
@@ -133,13 +215,28 @@ TEST_CASE("rest_endpoint", "[server_basic]")
REQUIRE(found);
}
// Ensure we can get one of our tasks
{
auto response = REQUEST(baseURL + "/v1/dagrun/" + std::to_string(runID) +
"/task/cat_0");
REQUIRE(response.code == HTTPCode::Ok);
rj::Document doc;
daggy::checkRJParse(doc.Parse(response.body.c_str()));
REQUIRE_NOTHROW(daggy::taskFromJSON("cat", doc));
auto task = daggy::taskFromJSON("cat", doc);
REQUIRE(task == dagSpec.tasks.at("cat"));
}
// Wait until our DAG is complete
bool complete = true;
for (auto i = 0; i < 10; ++i) {
auto response = REQUEST(baseURL + "/v1/dagrun/" + std::to_string(runID));
REQUIRE(response.code() == Pistache::Http::Code::Ok);
REQUIRE(response.code == HTTPCode::Ok);
rj::Document doc;
daggy::checkRJParse(doc.Parse(response.body().c_str()));
daggy::checkRJParse(doc.Parse(response.body.c_str()));
REQUIRE(doc.IsObject());
REQUIRE(doc.HasMember("taskStates"));
@@ -173,6 +270,113 @@ TEST_CASE("rest_endpoint", "[server_basic]")
fs::remove(pth);
}
}
}
TEST_CASE("Server cancels and resumes execution", "[server_resume]")
{
std::stringstream ss;
daggy::executors::task::ForkingTaskExecutor executor(10);
daggy::loggers::dag_run::OStreamLogger logger(ss);
Pistache::Address listenSpec("localhost", Pistache::Port(0));
const size_t nDAGRunners = 10, nWebThreads = 10;
daggy::Server server(listenSpec, logger, executor, nDAGRunners);
server.init(nWebThreads);
server.start();
const std::string host = "localhost:";
const std::string baseURL = host + std::to_string(server.getPort());
SECTION("Cancel / Resume DAGRun")
{
std::string dagRunJSON = R"({
"tag": "unit_server",
"tasks": {
"touch_A": { "job": { "command": [ "/usr/bin/touch", "resume_touch_a" ]}, "children": ["touch_C"] },
"sleep_B": { "job": { "command": [ "/usr/bin/sleep", "3" ]}, "children": ["touch_C"] },
"touch_C": { "job": { "command": [ "/usr/bin/touch", "resume_touch_c" ]} }
}
})";
auto dagSpec = daggy::dagFromJSON(dagRunJSON);
// Submit, and get the runID
daggy::DAGRunID runID;
{
auto response = REQUEST(baseURL + "/v1/dagrun/", dagRunJSON, "POST");
REQUIRE(response.code == HTTPCode::Ok);
rj::Document doc;
daggy::checkRJParse(doc.Parse(response.body.c_str()));
REQUIRE(doc.IsObject());
REQUIRE(doc.HasMember("runID"));
runID = doc["runID"].GetUint64();
}
std::this_thread::sleep_for(1s);
// Stop the current run
{
auto response = REQUEST(
baseURL + "/v1/dagrun/" + std::to_string(runID) + "/state/KILLED", "",
"PATCH");
REQUIRE(response.code == HTTPCode::Ok);
REQUIRE(logger.getDAGRunState(runID) == +daggy::RunState::KILLED);
}
// Verify that the run still exists
{
auto dagRun = logger.getDAGRun(runID);
REQUIRE(dagRun.taskRunStates.at("touch_A_0") ==
+daggy::RunState::COMPLETED);
REQUIRE(fs::exists("resume_touch_a"));
REQUIRE(dagRun.taskRunStates.at("sleep_B_0") ==
+daggy::RunState::ERRORED);
REQUIRE(dagRun.taskRunStates.at("touch_C_0") == +daggy::RunState::QUEUED);
}
// Set the errored task state
{
auto url = baseURL + "/v1/dagrun/" + std::to_string(runID) +
"/task/sleep_B_0/state/QUEUED";
auto response = REQUEST(url, "", "PATCH");
REQUIRE(response.code == HTTPCode::Ok);
REQUIRE(logger.getTaskState(runID, "sleep_B_0") ==
+daggy::RunState::QUEUED);
}
// Resume
{
struct stat s;
lstat("resume_touch_A", &s);
auto preMTime = s.st_mtim.tv_sec;
auto response = REQUEST(
baseURL + "/v1/dagrun/" + std::to_string(runID) + "/state/QUEUED", "",
"PATCH");
// Wait for run to complete
std::this_thread::sleep_for(5s);
REQUIRE(logger.getDAGRunState(runID) == +daggy::RunState::COMPLETED);
REQUIRE(fs::exists("resume_touch_c"));
REQUIRE(fs::exists("resume_touch_a"));
for (const auto &[taskName, task] : dagSpec.tasks) {
REQUIRE(logger.getTaskState(runID, taskName + "_0") ==
+daggy::RunState::COMPLETED);
}
// Ensure "touch_A" wasn't run again
lstat("resume_touch_A", &s);
auto postMTime = s.st_mtim.tv_sec;
REQUIRE(preMTime == postMTime);
}
}
server.shutdown();
}

View File

@@ -8,11 +8,6 @@
#include "daggy/Serialization.hpp"
#include "daggy/Utilities.hpp"
#include "daggy/executors/task/ForkingTaskExecutor.hpp"
#include "daggy/executors/task/NoopTaskExecutor.hpp"
#include "daggy/loggers/dag_run/OStreamLogger.hpp"
namespace fs = std::filesystem;
TEST_CASE("string_utilities", "[utilities_string]")
{
@@ -59,234 +54,3 @@ TEST_CASE("string_expansion", "[utilities_parameter_expansion]")
REQUIRE(result.size() == 4);
}
}
TEST_CASE("dag_runner_order", "[dagrun_order]")
{
daggy::executors::task::NoopTaskExecutor ex;
std::stringstream ss;
daggy::loggers::dag_run::OStreamLogger logger(ss);
daggy::TimePoint globalStartTime = daggy::Clock::now();
std::string testParams{
R"({"DATE": ["2021-05-06", "2021-05-07", "2021-05-08", "2021-05-09" ]})"};
auto params = daggy::configFromJSON(testParams);
std::string taskJSON = R"({
"A": {"job": {"command": ["/usr/bin/touch", "{{DATE}}"]}, "children": [ "B","D" ]},
"B": {"job": {"command": ["/usr/bin/touch", "{{DATE}}"]}, "children": [ "C","D","E" ]},
"C": {"job": {"command": ["/usr/bin/touch", "{{DATE}}"]}, "children": [ "D"]},
"D": {"job": {"command": ["/usr/bin/touch", "{{DATE}}"]}, "children": [ "E"]},
"E": {"job": {"command": ["/usr/bin/touch", "{{DATE}}"]}}
})";
auto tasks = expandTaskSet(daggy::tasksFromJSON(taskJSON), ex, params);
REQUIRE(tasks.size() == 20);
auto dag = daggy::buildDAGFromTasks(tasks);
auto runID = logger.startDAGRun("test_run", tasks);
auto endDAG = daggy::runDAG(runID, ex, logger, dag);
REQUIRE(endDAG.allVisited());
// Ensure the run order
auto rec = logger.getDAGRun(runID);
daggy::TimePoint globalStopTime = daggy::Clock::now();
std::array<daggy::TimePoint, 5> minTimes;
minTimes.fill(globalStartTime);
std::array<daggy::TimePoint, 5> maxTimes;
maxTimes.fill(globalStopTime);
for (const auto &[k, v] : rec.taskAttempts) {
size_t idx = k[0] - 65;
auto &startTime = minTimes[idx];
auto &stopTime = maxTimes[idx];
startTime = std::max(startTime, v.front().startTime);
stopTime = std::min(stopTime, v.back().stopTime);
}
for (size_t i = 0; i < 5; ++i) {
for (size_t j = i + 1; j < 4; ++j) {
REQUIRE(maxTimes[i] < minTimes[j]);
}
}
}
TEST_CASE("dag_runner", "[utilities_dag_runner]")
{
daggy::executors::task::ForkingTaskExecutor ex(10);
std::stringstream ss;
daggy::loggers::dag_run::OStreamLogger logger(ss);
SECTION("Simple execution")
{
std::string prefix = (fs::current_path() / "asdlk").string();
std::unordered_map<std::string, std::string> files{
{"A", prefix + "_A"}, {"B", prefix + "_B"}, {"C", prefix + "_C"}};
std::string taskJSON =
R"({"A": {"job": {"command": ["/usr/bin/touch", ")" + files.at("A") +
R"("]}, "children": ["C"]}, "B": {"job": {"command": ["/usr/bin/touch", ")" +
files.at("B") +
R"("]}, "children": ["C"]}, "C": {"job": {"command": ["/usr/bin/touch", ")" +
files.at("C") + R"("]}}})";
auto tasks = expandTaskSet(daggy::tasksFromJSON(taskJSON), ex);
auto dag = daggy::buildDAGFromTasks(tasks);
auto runID = logger.startDAGRun("test_run", tasks);
auto endDAG = daggy::runDAG(runID, ex, logger, dag);
REQUIRE(endDAG.allVisited());
for (const auto &[_, file] : files) {
REQUIRE(fs::exists(file));
fs::remove(file);
}
// Get the DAG Run Attempts
auto record = logger.getDAGRun(runID);
for (const auto &[_, attempts] : record.taskAttempts) {
REQUIRE(attempts.size() == 1);
REQUIRE(attempts.front().rc == 0);
}
}
}
TEST_CASE("runDAG_recovery", "[runDAG]")
{
daggy::executors::task::ForkingTaskExecutor ex(10);
std::stringstream ss;
daggy::loggers::dag_run::OStreamLogger logger(ss);
SECTION("Recovery from Error")
{
auto cleanup = []() {
// Cleanup
std::vector<fs::path> paths{"rec_error_A", "noexist"};
for (const auto &pth : paths) {
if (fs::exists(pth))
fs::remove_all(pth);
}
};
cleanup();
std::string goodPrefix = "rec_error_";
std::string badPrefix = "noexist/rec_error_";
std::string taskJSON =
R"({"A": {"job": {"command": ["/usr/bin/touch", ")" + goodPrefix +
R"(A"]}, "children": ["C"]}, "B": {"job": {"command": ["/usr/bin/touch", ")" +
badPrefix +
R"(B"]}, "children": ["C"]}, "C": {"job": {"command": ["/usr/bin/touch", ")" +
badPrefix + R"(C"]}}})";
auto tasks = expandTaskSet(daggy::tasksFromJSON(taskJSON), ex);
auto dag = daggy::buildDAGFromTasks(tasks);
auto runID = logger.startDAGRun("test_run", tasks);
auto tryDAG = daggy::runDAG(runID, ex, logger, dag);
REQUIRE(!tryDAG.allVisited());
// Create the missing dir, then continue to run the DAG
fs::create_directory("noexist");
tryDAG.resetRunning();
auto endDAG = daggy::runDAG(runID, ex, logger, tryDAG);
REQUIRE(endDAG.allVisited());
// Get the DAG Run Attempts
auto record = logger.getDAGRun(runID);
REQUIRE(record.taskAttempts["A_0"].size() == 1); // A ran fine
REQUIRE(record.taskAttempts["B_0"].size() ==
2); // B errored and had to be retried
REQUIRE(record.taskAttempts["C_0"].size() ==
1); // C wasn't run because B errored
cleanup();
}
}
TEST_CASE("runDAG_generator", "[runDAG_generator]")
{
daggy::executors::task::ForkingTaskExecutor ex(10);
std::stringstream ss;
daggy::loggers::dag_run::OStreamLogger logger(ss);
SECTION("Generator tasks")
{
std::string testParams{R"({"DATE": ["2021-05-06", "2021-05-07" ]})"};
auto params = daggy::configFromJSON(testParams);
std::string generatorOutput =
R"({"B": {"job": {"command": ["/usr/bin/echo", "-e", "{{DATE}}"]}, "children": ["C"]}})";
fs::path ofn = fs::current_path() / "generator_test_output.json";
std::ofstream ofh{ofn};
ofh << generatorOutput << std::endl;
ofh.close();
daggy::TimePoint globalStartTime = daggy::Clock::now();
std::stringstream jsonTasks;
jsonTasks
<< R"({ "A": { "job": {"command": [ "/usr/bin/cat", )"
<< std::quoted(ofn.string())
<< R"(]}, "children": ["C"], "isGenerator": true},)"
<< R"("C": { "job": {"command": [ "/usr/bin/echo", "hello!"]} } })";
auto baseTasks = daggy::tasksFromJSON(jsonTasks.str());
REQUIRE(baseTasks.size() == 2);
REQUIRE(baseTasks["A"].children == std::unordered_set<std::string>{"C"});
auto tasks = daggy::expandTaskSet(baseTasks, ex, params);
REQUIRE(tasks.size() == 2);
REQUIRE(tasks["A_0"].children == std::unordered_set<std::string>{"C"});
auto dag = daggy::buildDAGFromTasks(tasks);
REQUIRE(dag.size() == 2);
auto runID = logger.startDAGRun("generator_run", tasks);
auto finalDAG = daggy::runDAG(runID, ex, logger, dag, params);
REQUIRE(finalDAG.allVisited());
REQUIRE(finalDAG.size() == 4);
// Check the logger
auto record = logger.getDAGRun(runID);
REQUIRE(record.tasks.size() == 4);
REQUIRE(record.taskRunStates.size() == 4);
for (const auto &[taskName, attempts] : record.taskAttempts) {
REQUIRE(attempts.size() == 1);
REQUIRE(attempts.back().rc == 0);
}
// Ensure that children were updated properly
REQUIRE(record.tasks["A_0"].children ==
std::unordered_set<std::string>{"B_0", "B_1", "C"});
REQUIRE(record.tasks["B_0"].children ==
std::unordered_set<std::string>{"C"});
REQUIRE(record.tasks["B_1"].children ==
std::unordered_set<std::string>{"C"});
REQUIRE(record.tasks["C_0"].children.empty());
// Ensure they were run in the right order
// All A's get run before B's, which run before C's
daggy::TimePoint globalStopTime = daggy::Clock::now();
std::array<daggy::TimePoint, 3> minTimes;
minTimes.fill(globalStartTime);
std::array<daggy::TimePoint, 3> maxTimes;
maxTimes.fill(globalStopTime);
for (const auto &[k, v] : record.taskAttempts) {
size_t idx = k[0] - 65;
auto &startTime = minTimes[idx];
auto &stopTime = maxTimes[idx];
startTime = std::max(startTime, v.front().startTime);
stopTime = std::min(stopTime, v.back().stopTime);
}
for (size_t i = 0; i < 3; ++i) {
for (size_t j = i + 1; j < 2; ++j) {
REQUIRE(maxTimes[i] < minTimes[j]);
}
}
}
}