Large re-organization to split daggyd away from the core libdaggy.

This paves the way for implementing daggys and other utilities.

Squashed commit of the following:

commit 1f77239ab3c9e44d190eef94531a39501c8c4dfe
Author: Ian Roddis <gitlab@ie2r.com>
Date:   Mon Oct 18 16:25:02 2021 -0300

    Adding README, stdout support for daggyd logging

commit c2c237224e84a3be68aaa597ce98af1365e74a13
Author: Ian Roddis <gitlab@ie2r.com>
Date:   Mon Oct 18 16:10:29 2021 -0300

    removing old daggyd

commit cfea2baf61ca10c535801c5a391d2d525a1a2d04
Author: Ian Roddis <gitlab@ie2r.com>
Date:   Mon Oct 18 16:10:09 2021 -0300

    Moving tests into their sub-project folders

commit e41ca42069bea1db16dd76b6684a3f692fef6b15
Author: Ian Roddis <gitlab@ie2r.com>
Date:   Mon Oct 18 15:57:40 2021 -0300

    Splitting out daggyd from libdaggy

commit be97b146c1d2446f5c03cb78707e921f18c60bd8
Author: Ian Roddis <gitlab@ie2r.com>
Date:   Mon Oct 18 15:56:55 2021 -0300

    Splitting out daggyd from libdaggy

commit cb61e140e9d6d8832d61fb7037fd4c0ff6edad00
Author: Ian Roddis <gitlab@ie2r.com>
Date:   Mon Oct 18 15:49:47 2021 -0300

    moving daggy to libdaggy
This commit is contained in:
Ian Roddis
2021-10-18 16:28:40 -03:00
parent 612bc8af8a
commit 470a6f2bb7
59 changed files with 586 additions and 52 deletions

3
daggyd/CMakeLists.txt Normal file
View File

@@ -0,0 +1,3 @@
add_subdirectory(libdaggyd)
add_subdirectory(daggyd)
add_subdirectory(tests)

127
daggyd/README.md Normal file
View File

@@ -0,0 +1,127 @@
# Daggy Daemon
`daggyd` is the REST server process that handles receiving and running DAG specs.
# Running it
```bash
daggyd # That's it, will listen on 127.0.0.1:2503 , and run with a local executor
daggyd -d # Daemonize
daggyd --config FILE # Run with a config file
```
# Config Files
```json
{
"web-threads": 50,
"dag-threads": 50,
"port": 2503,
"ip": "localhost",
"logger": {
"name": "LoggerName",
"config": {
...
}
},
"executor": {
"name": "ExecutorName"
"config": {
...
}
}
}
```
## Loggers
### OStreamLogger
OStreamLogger doesn't persist data, but can write even updates to a file or
stdout.
The config for OStreamLogger looks like this:
```
{
...
"logger": {
"name": "OStreamLogger",
"config": {
"file": "/path/to/file"
}
}
...
}
```
If `file` is equal to `"-"`, then the logger will print events to stdout. This configuration
is the default if no logger is specified at all.
### RedisLogger
RedisLogger stores state in a [Redis](https://redis.io) instance.
The config for OStreamLogger looks like this (along with default values):
```
{
...
"logger": {
"name": "RedisLogger",
"config": {
"prefix": "daggy",
"host": "localhost",
"port": 6379
}
}
...
}
```
The `prefix` attribute is used to distinguish daggy instances. All keys will be prefixed with
the value of `prefix`.
## Executors
### ForkingTaskExecutor
ForkingTaskExecutor does pretty much what the name implies: it will execute tasks by
forking on the local machine.
It's config with default values looks like:
```
{
...
"executor": {
"name": "ForkingTaskExecutor",
"config": {
"threads": 10
}
}
...
}
```
If no executor is sepcified in the config, this is the executor used.
### SlurmTaskExecutor
The SlurmTaskExecutor will execute tasks on a [slurm](https://slurm.schedmd.com) cluster. It relies
on the slurm config to manage any parallelism limits and quotas.
It's config with default values looks like:
```
{
...
"executor": {
"name": "ForkingTaskExecutor",
"config": { }
}
...
}
```

View File

@@ -0,0 +1,4 @@
project(daggyd)
file(GLOB SOURCES daggyd.cpp)
add_executable(${PROJECT_NAME} ${SOURCES})
target_link_libraries(${PROJECT_NAME} argparse libdaggyd libdaggy)

258
daggyd/daggyd/daggyd.cpp Normal file
View File

@@ -0,0 +1,258 @@
#include <rapidjson/document.h>
#include <sys/stat.h>
#include <argparse.hpp>
#include <atomic>
#include <csignal>
#include <daggy/Serialization.hpp>
#include <daggyd/Server.hpp>
#include <fstream>
#include <iostream>
// Add executors here
#include <daggy/executors/task/ForkingTaskExecutor.hpp>
#include <daggy/executors/task/SlurmTaskExecutor.hpp>
// Add loggers here
#include <daggy/executors/task/TaskExecutor.hpp>
#include <daggy/loggers/dag_run/DAGRunLogger.hpp>
#include <daggy/loggers/dag_run/OStreamLogger.hpp>
#include <daggy/loggers/dag_run/RedisLogger.hpp>
namespace rj = rapidjson;
static std::atomic<bool> running{true};
void signalHandler(int signal)
{
switch (signal) {
case SIGHUP:
break;
case SIGINT:
case SIGTERM:
running = false;
break;
default:
break;
}
}
void daemonize()
{
pid_t pid;
struct sigaction newSigAction;
sigset_t newSigSet;
/* Check if parent process id is set */
if (getppid() == 1) {
return;
}
/* Set signal mask - signals we want to block */
sigemptyset(&newSigSet);
sigaddset(&newSigSet,
SIGCHLD); /* ignore child - i.e. we don't need to wait for it */
sigaddset(&newSigSet, SIGTSTP); /* ignore Tty stop signals */
sigaddset(&newSigSet, SIGTTOU); /* ignore Tty background writes */
sigaddset(&newSigSet, SIGTTIN); /* ignore Tty background reads */
sigprocmask(SIG_BLOCK, &newSigSet,
nullptr); /* Block the above specified signals */
/* Set up a signal handler */
newSigAction.sa_handler = signalHandler;
sigemptyset(&newSigAction.sa_mask);
newSigAction.sa_flags = 0;
/* Signals to handle */
sigaction(SIGHUP, &newSigAction, nullptr); /* catch hangup signal */
sigaction(SIGTERM, &newSigAction, nullptr); /* catch term signal */
sigaction(SIGINT, &newSigAction, nullptr); /* catch interrupt signal */
// Fork once
pid = fork();
if (pid < 0) {
exit(EXIT_FAILURE);
}
if (pid > 0) {
exit(EXIT_SUCCESS);
}
/* On success: The child process becomes session leader */
if (setsid() < 0) {
std::cerr << "Unable to setsid" << std::endl;
exit(EXIT_FAILURE);
}
/* Catch, ignore and handle signals */
signal(SIGCHLD, SIG_IGN);
signal(SIGHUP, SIG_IGN);
/* Fork off for the second time*/
pid = fork();
if (pid < 0)
exit(EXIT_FAILURE);
if (pid > 0)
exit(EXIT_SUCCESS);
umask(0);
/* Change the working directory to the root directory */
/* or another appropriated directory */
auto rc = chdir("/");
(void)rc;
/* Close all open file descriptors */
for (int x = sysconf(_SC_OPEN_MAX); x >= 0; x--) {
close(x);
}
}
namespace dl = daggy::loggers::dag_run;
std::unique_ptr<dl::DAGRunLogger> loggerFactory(const rj::Value &config)
{
if (config.HasMember("logger")) {
const auto &logConf = config["logger"];
if (!logConf.IsObject())
throw std::runtime_error("logger config is not an object");
if (!logConf.HasMember("name"))
throw std::runtime_error("logger config is missing logger name");
if (!logConf.HasMember("config"))
throw std::runtime_error("logger config is missing logger config");
std::string name = logConf["name"].GetString();
const auto &logConfig = logConf["config"];
if (name == "OStreamLogger") {
if (logConfig.HasMember("file")) {
std::string fn = logConfig["file"].GetString();
if (fn == "-")
return std::make_unique<dl::OStreamLogger>(std::cout);
std::ofstream ofh(logConfig["file"].GetString());
return std::make_unique<dl::OStreamLogger>(ofh);
}
}
else if (name == "RedisLogger") {
std::string host = "localhost";
uint16_t port = 6379;
std::string prefix = "daggy";
if (logConfig.HasMember("prefix"))
prefix = logConfig["prefix"].GetString();
if (logConfig.HasMember("host"))
host = logConfig["host"].GetString();
if (logConfig.HasMember("port"))
port = logConfig["port"].GetInt();
return std::make_unique<dl::RedisLogger>(prefix, host, port);
}
else
throw std::runtime_error("Unknown logger type: " + name);
}
return std::make_unique<dl::OStreamLogger>(std::cout);
}
namespace de = daggy::executors::task;
std::unique_ptr<de::TaskExecutor> executorFactory(const rj::Value &config)
{
if (config.HasMember("executor")) {
const auto &execConf = config["executor"];
if (!execConf.IsObject())
throw std::runtime_error("Executor config is not an object");
if (!execConf.HasMember("name"))
throw std::runtime_error("Executor config is missing execger name");
if (!execConf.HasMember("config"))
throw std::runtime_error("Executor config is missing config");
std::string name = execConf["name"].GetString();
const auto &execConfig = execConf["config"];
if (name == "ForkingTaskExecutor") {
size_t threads = 10;
if (execConfig.HasMember("threads"))
threads = execConfig["threads"].GetInt64();
return std::make_unique<de::ForkingTaskExecutor>(threads);
}
else if (name == "SlurmTaskExecutor") {
return std::make_unique<de::SlurmTaskExecutor>();
}
else
throw std::runtime_error("Unknown executor type: " + name);
}
return std::make_unique<de::ForkingTaskExecutor>(10);
}
int main(int argc, char **argv)
{
argparse::ArgumentParser args("Daggy");
args.add_argument("-v", "--verbose")
.default_value(false)
.implicit_value(true);
args.add_argument("-d", "--daemon").default_value(false).implicit_value(true);
args.add_argument("--config");
try {
args.parse_args(argc, argv);
}
catch (std::exception &e) {
std::cout << "Error: " << e.what() << std::endl;
std::cout << args;
exit(1);
}
bool verbose = args.get<bool>("--verbose");
bool asDaemon = args.get<bool>("--daemon");
auto configFile = args.get<std::string>("--config");
std::ifstream ifh(configFile);
std::string config;
std::getline(ifh, config, '\0');
ifh.close();
rj::Document doc;
daggy::checkRJParse(doc.Parse(config.c_str()));
std::string listenIP = "127.0.0.1";
int listenPort = 2503;
size_t webThreads = 50;
size_t dagThreads = 50;
if (doc.HasMember("ip"))
listenIP = doc["ip"].GetString();
if (doc.HasMember("port"))
listenPort = doc["port"].GetInt();
if (doc.HasMember("web-threads"))
webThreads = doc["web-threads"].GetInt64();
if (doc.HasMember("dag-threads"))
dagThreads = doc["dag-threads"].GetInt64();
if (verbose) {
std::cout << "Server running at http://" << listenIP << ':' << listenPort
<< std::endl
<< "Max DAG Processing: " << dagThreads << std::endl
<< "Max Web Clients: " << webThreads << std::endl
<< std::endl
<< "Ctrl-C to exit" << std::endl;
}
if (asDaemon) {
daemonize();
}
auto logger = loggerFactory(doc);
auto executor = executorFactory(doc);
Pistache::Address listenSpec(listenIP, listenPort);
daggy::Server server(listenSpec, *logger, *executor, dagThreads);
server.init(webThreads);
server.start();
running = true;
while (running) {
std::this_thread::sleep_for(std::chrono::seconds(30));
}
server.shutdown();
}

View File

@@ -0,0 +1,8 @@
project(libdaggyd)
add_library(${PROJECT_NAME} STATIC)
target_include_directories(${PROJECT_NAME} PUBLIC include)
target_link_libraries(${PROJECT_NAME} libdaggy)
add_subdirectory(src)

View File

@@ -0,0 +1,67 @@
#pragma once
#include <pistache/description.h>
#include <pistache/endpoint.h>
#include <pistache/http.h>
#include <daggy/DAGRunner.hpp>
#include <daggy/ThreadPool.hpp>
#include <daggy/executors/task/TaskExecutor.hpp>
#include <daggy/loggers/dag_run/DAGRunLogger.hpp>
#include <filesystem>
#define DAGGY_REST_HANDLER(func) \
void func(const Pistache::Rest::Request &request, \
Pistache::Http::ResponseWriter response);
namespace fs = std::filesystem;
namespace daggy {
class Server
{
public:
Server(const Pistache::Address &listenSpec,
loggers::dag_run::DAGRunLogger &logger,
executors::task::TaskExecutor &executor, size_t nDAGRunners);
~Server();
Server &setSSLCertificates(const fs::path &cert, const fs::path &key);
void init(size_t threads = 1);
void start();
uint16_t getPort() const;
void shutdown();
private:
void createDescription();
void queueDAG_(DAGRunID runID, const TaskDAG &dag,
const TaskParameters &taskParameters);
DAGGY_REST_HANDLER(handleReady); // X
DAGGY_REST_HANDLER(handleQueryDAGs); // X
DAGGY_REST_HANDLER(handleRunDAG); // X
DAGGY_REST_HANDLER(handleValidateDAG); // X
DAGGY_REST_HANDLER(handleGetDAGRun); // X
DAGGY_REST_HANDLER(handleGetDAGRunState); // X
DAGGY_REST_HANDLER(handleSetDAGRunState); // X
DAGGY_REST_HANDLER(handleGetTask); // X
DAGGY_REST_HANDLER(handleGetTaskState); // X
DAGGY_REST_HANDLER(handleSetTaskState); // X
bool handleAuth(const Pistache::Rest::Request &request);
Pistache::Http::Endpoint endpoint_;
Pistache::Rest::Description desc_;
Pistache::Rest::Router router_;
loggers::dag_run::DAGRunLogger &logger_;
executors::task::TaskExecutor &executor_;
ThreadPool runnerPool_;
std::mutex runnerGuard_;
std::unordered_map<DAGRunID, std::shared_ptr<DAGRunner>> runners_;
};
} // namespace daggy

View File

@@ -0,0 +1,3 @@
target_sources(${PROJECT_NAME} PRIVATE
Server.cpp
)

View File

@@ -0,0 +1,514 @@
#include <enum.h>
#include <daggy/Serialization.hpp>
#include <daggy/Utilities.hpp>
#include <daggyd/Server.hpp>
#include <iomanip>
#include <mutex>
#include <numeric>
#include <stdexcept>
#include <thread>
#include <utility>
#define REQ_RESPONSE(code, msg) \
std::stringstream ss; \
ss << R"({"message": )" << std::quoted(msg) << "}"; \
response.send(Pistache::Http::Code::code, ss.str()); \
return;
using namespace Pistache;
namespace daggy {
void Server::init(size_t threads)
{
auto opts = Http::Endpoint::options()
.threads(threads)
.flags(Pistache::Tcp::Options::ReuseAddr |
Pistache::Tcp::Options::ReusePort)
.maxRequestSize(4294967296)
.maxResponseSize(4294967296);
endpoint_.init(opts);
createDescription();
}
Server::Server(const Pistache::Address &listenSpec,
loggers::dag_run::DAGRunLogger &logger,
executors::task::TaskExecutor &executor, size_t nDAGRunners)
: endpoint_(listenSpec)
, desc_("Daggy API", "0.1")
, logger_(logger)
, executor_(executor)
, runnerPool_(nDAGRunners)
{
}
Server::~Server()
{
shutdown();
}
void Server::start()
{
router_.initFromDescription(desc_);
endpoint_.setHandler(router_.handler());
endpoint_.serveThreaded();
}
Server &Server::setSSLCertificates(const fs::path &cert, const fs::path &key)
{
endpoint_.useSSL(cert, key);
return *this;
}
void Server::shutdown()
{
endpoint_.shutdown();
runnerPool_.shutdown();
}
uint16_t Server::getPort() const
{
return endpoint_.getPort();
}
void Server::createDescription()
{
desc_.info().license("MIT", "https://opensource.org/licenses/MIT");
auto backendErrorResponse =
desc_.response(Http::Code::Internal_Server_Error,
R"({"error": "An error occurred with the backend"})");
desc_.schemes(Rest::Scheme::Http)
.basePath("/v1")
.produces(MIME(Application, Json))
.consumes(MIME(Application, Json));
desc_.route(desc_.get("/ready"))
.bind(&Server::handleReady, this)
.response(Http::Code::Ok, "Response to the /ready call")
.hide();
auto versionPath = desc_.path("/v1");
/*
DAG Run Summaries
*/
auto dagRunsPath = versionPath.path("/dagruns");
dagRunsPath.route(desc_.get("/"))
.bind(&Server::handleQueryDAGs, this)
.produces(MIME(Application, Json))
.response(Http::Code::Ok, "List summaries DAGs");
/*
Individual DAG Run routes
*/
auto dagRunPath = versionPath.path("/dagrun");
dagRunPath.route(desc_.post("/"))
.bind(&Server::handleRunDAG, this)
.produces(MIME(Application, Json))
.response(Http::Code::Ok, "Run a DAG");
dagRunPath.route(desc_.post("/validate"))
.bind(&Server::handleValidateDAG, this)
.produces(MIME(Application, Json))
.response(Http::Code::Ok, "Validate a DAG Run Spec");
/*
Management of a specific DAG
*/
auto specificDAGRunPath = dagRunPath.path("/:runID");
specificDAGRunPath.route(desc_.get("/"))
.bind(&Server::handleGetDAGRun, this)
.produces(MIME(Application, Json))
.response(Http::Code::Ok, "Full DAG Run");
specificDAGRunPath.route(desc_.get("/state"))
.bind(&Server::handleGetDAGRunState, this)
.produces(MIME(Application, Json))
.response(Http::Code::Ok,
"Structure of a DAG and DAG and Task run states");
specificDAGRunPath.route(desc_.patch("/state/:state"))
.bind(&Server::handleSetDAGRunState, this)
.produces(MIME(Application, Json))
.response(Http::Code::Ok, "Change the state of a DAG");
/*
Task paths
*/
auto taskPath = specificDAGRunPath.path("/task/:taskName");
taskPath.route(desc_.get("/"))
.bind(&Server::handleGetTask, this)
.produces(MIME(Application, Json))
.response(Http::Code::Ok, "Details of a specific task");
/*
Task State paths
*/
auto taskStatePath = taskPath.path("/state");
taskStatePath.route(desc_.get("/"))
.bind(&Server::handleGetTaskState, this)
.produces(MIME(Application, Json))
.response(Http::Code::Ok, "Get a task state");
taskStatePath.route(desc_.patch("/:state"))
.bind(&Server::handleSetTaskState, this)
.produces(MIME(Application, Json))
.response(Http::Code::Ok, "Set a task state");
}
void Server::handleRunDAG(const Pistache::Rest::Request &request,
Pistache::Http::ResponseWriter response)
{
if (!handleAuth(request))
return;
auto dagSpec = dagFromJSON(request.body());
dagSpec.tasks =
expandTaskSet(dagSpec.tasks, executor_, dagSpec.taskConfig.variables);
// Get a run ID
DAGRunID runID = logger_.startDAGRun(dagSpec);
auto dag = buildDAGFromTasks(dagSpec.tasks);
queueDAG_(runID, dag, dagSpec.taskConfig);
response.send(Pistache::Http::Code::Ok,
R"({"runID": )" + std::to_string(runID) + "}");
}
void Server::handleValidateDAG(const Pistache::Rest::Request &request,
Pistache::Http::ResponseWriter response)
{
try {
dagFromJSON(request.body());
response.send(Pistache::Http::Code::Ok, R"({"valid": true})");
}
catch (std::exception &e) {
std::string error = e.what();
response.send(Pistache::Http::Code::Ok,
std::string{R"({"valid": true, "error": })"} + error + "}");
}
}
void Server::handleQueryDAGs(const Pistache::Rest::Request &request,
Pistache::Http::ResponseWriter response)
{
if (!handleAuth(request))
return;
bool all = false;
std::string tag = "";
if (request.query().has("tag")) {
tag = request.query().get("tag").value();
}
if (request.hasParam(":all")) {
auto val = request.query().get("all").value();
if (val == "true" or val == "1") {
all = true;
}
}
auto dagRuns = logger_.queryDAGRuns(tag, all);
std::stringstream ss;
ss << '[';
bool first = true;
for (const auto &run : dagRuns) {
if (first) {
first = false;
}
else {
ss << ", ";
}
ss << " {"
<< R"("runID": )" << run.runID << ',' << R"("tag": )"
<< std::quoted(run.tag) << ","
<< R"("startTime": )" << std::quoted(timePointToString(run.startTime))
<< ',' << R"("lastUpdate": )"
<< std::quoted(timePointToString(run.lastUpdate)) << ','
<< R"("taskCounts": {)";
bool firstState = true;
for (const auto &[state, count] : run.taskStateCounts) {
if (firstState) {
firstState = false;
}
else {
ss << ", ";
}
ss << std::quoted(state._to_string()) << ':' << count;
}
ss << '}' // end of taskCounts
<< '}'; // end of item
}
ss << ']';
response.send(Pistache::Http::Code::Ok, ss.str());
}
void Server::handleGetDAGRun(const Pistache::Rest::Request &request,
Pistache::Http::ResponseWriter response)
{
if (!handleAuth(request))
return;
if (!request.hasParam(":runID")) {
REQ_RESPONSE(Not_Found, "No runID provided in URL");
}
auto runID = request.param(":runID").as<size_t>();
auto run = logger_.getDAGRun(runID);
bool first = true;
std::stringstream ss;
ss << "{"
<< R"("runID": )" << runID << ',' << R"("tag": )"
<< std::quoted(run.dagSpec.tag) << ',' << R"("tasks": )"
<< tasksToJSON(run.dagSpec.tasks) << ',';
// task run states
ss << R"("taskStates": { )";
first = true;
for (const auto &[name, state] : run.taskRunStates) {
if (first) {
first = false;
}
else {
ss << ',';
}
ss << std::quoted(name) << ": " << std::quoted(state._to_string());
}
ss << "},";
// Attempt records
first = true;
ss << R"("taskAttempts": { )";
for (const auto &[taskName, attempts] : run.taskAttempts) {
if (first) {
first = false;
}
else {
ss << ',';
}
ss << std::quoted(taskName) << ": [";
bool firstAttempt = true;
for (const auto &attempt : attempts) {
if (firstAttempt) {
firstAttempt = false;
}
else {
ss << ',';
}
ss << '{' << R"("startTime":)"
<< std::quoted(timePointToString(attempt.startTime)) << ','
<< R"("stopTime":)"
<< std::quoted(timePointToString(attempt.stopTime)) << ','
<< R"("rc":)" << attempt.rc << ',' << R"("outputLog":)"
<< std::quoted(attempt.outputLog) << ',' << R"("errorLog":)"
<< std::quoted(attempt.errorLog) << ',' << R"("executorLog":)"
<< std::quoted(attempt.executorLog) << '}';
}
ss << ']';
}
ss << "},";
// DAG state changes
first = true;
ss << R"("dagStateChanges": [ )";
for (const auto &change : run.dagStateChanges) {
if (first) {
first = false;
}
else {
ss << ',';
}
ss << stateUpdateRecordToJSON(change);
}
ss << "]";
ss << '}';
response.send(Pistache::Http::Code::Ok, ss.str());
}
void Server::handleGetDAGRunState(const Pistache::Rest::Request &request,
Pistache::Http::ResponseWriter response)
{
if (!handleAuth(request))
return;
DAGRunID runID = request.param(":runID").as<DAGRunID>();
RunState state = RunState::QUEUED;
try {
state = logger_.getDAGRunState(runID);
std::stringstream ss;
ss << R"({ "runID": )" << runID << R"(, "state": )"
<< std::quoted(state._to_string()) << '}';
response.send(Pistache::Http::Code::Ok, ss.str());
}
catch (std::exception &e) {
REQ_RESPONSE(Not_Found, e.what());
}
}
void Server::queueDAG_(DAGRunID runID, const TaskDAG &dag,
const TaskParameters &taskParameters)
{
std::lock_guard<std::mutex> lock(runnerGuard_);
/*
auto it = runners_.emplace(
std::piecewise_construct, std::forward_as_tuple(runID),
std::forward_as_tuple(runID, executor_, logger_, dag,
taskParameters));
*/
auto it = runners_.emplace(
runID, std::make_shared<DAGRunner>(runID, executor_, logger_, dag,
taskParameters));
if (!it.second)
throw std::runtime_error("A DAGRun with the same ID is already running");
auto runner = it.first->second;
runnerPool_.addTask([runner, runID, this]() {
runner->run();
std::lock_guard<std::mutex> lock(this->runnerGuard_);
this->runners_.extract(runID);
});
}
void Server::handleSetDAGRunState(const Pistache::Rest::Request &request,
Pistache::Http::ResponseWriter response)
{
if (!handleAuth(request))
return;
// TODO handle state transition
DAGRunID runID = request.param(":runID").as<DAGRunID>();
RunState newState = RunState::_from_string(
request.param(":state").as<std::string>().c_str());
std::shared_ptr<DAGRunner> runner{nullptr};
{
std::lock_guard<std::mutex> lock(runnerGuard_);
auto it = runners_.find(runID);
if (runners_.find(runID) != runners_.end()) {
runner = it->second;
}
}
if (runner) {
switch (newState) {
case RunState::PAUSED:
case RunState::KILLED: {
runner->stop(true, true);
logger_.updateDAGRunState(runID, newState);
break;
}
default: {
REQ_RESPONSE(Method_Not_Allowed,
std::string{"Cannot transition to state "} +
newState._to_string());
}
}
}
else {
switch (newState) {
case RunState::QUEUED: {
auto dagRun = logger_.getDAGRun(runID);
auto dag =
buildDAGFromTasks(dagRun.dagSpec.tasks, dagRun.taskStateChanges);
dag.resetRunning();
queueDAG_(runID, dag, dagRun.dagSpec.taskConfig);
break;
}
default:
REQ_RESPONSE(
Method_Not_Allowed,
std::string{"DAG not running, cannot transition to state "} +
newState._to_string());
}
}
REQ_RESPONSE(Ok, "");
}
void Server::handleGetTask(const Pistache::Rest::Request &request,
Pistache::Http::ResponseWriter response)
{
if (!handleAuth(request))
return;
auto runID = request.param(":runID").as<DAGRunID>();
auto taskName = request.param(":taskName").as<std::string>();
try {
auto task = logger_.getTask(runID, taskName);
response.send(Pistache::Http::Code::Ok, taskToJSON(task));
}
catch (std::exception &e) {
REQ_RESPONSE(Not_Found, e.what());
}
}
void Server::handleGetTaskState(const Pistache::Rest::Request &request,
Pistache::Http::ResponseWriter response)
{
if (!handleAuth(request))
return;
auto runID = request.param(":runID").as<DAGRunID>();
auto taskName = request.param(":taskName").as<std::string>();
try {
auto state = logger_.getTaskState(runID, taskName);
std::stringstream ss;
ss << R"({ "runID": )" << runID << R"(, "taskName": )"
<< std::quoted(taskName) << R"(, "state": )"
<< std::quoted(state._to_string()) << '}';
response.send(Pistache::Http::Code::Ok, ss.str());
}
catch (std::exception &e) {
REQ_RESPONSE(Not_Found, e.what());
}
}
void Server::handleSetTaskState(const Pistache::Rest::Request &request,
Pistache::Http::ResponseWriter response)
{
if (!handleAuth(request))
return;
// TODO implement handling of task state
auto runID = request.param(":runID").as<DAGRunID>();
auto taskName = request.param(":taskName").as<std::string>();
RunState state = RunState::_from_string(
request.param(":state").as<std::string>().c_str());
try {
logger_.updateTaskState(runID, taskName, state);
response.send(Pistache::Http::Code::Ok, "");
}
catch (std::exception &e) {
REQ_RESPONSE(Not_Found, e.what());
}
}
void Server::handleReady(const Pistache::Rest::Request &request,
Pistache::Http::ResponseWriter response)
{
response.send(Pistache::Http::Code::Ok, R"({ "msg": "Ya like DAGs?"})");
}
/*
* handleAuth will check any auth methods and handle any responses in the
* case of failed auth. If it returns false, callers should cease handling
* the response
*/
bool Server::handleAuth(const Pistache::Rest::Request &request)
{
return true;
}
} // namespace daggy

View File

@@ -0,0 +1,9 @@
project(daggyd_tests)
add_executable(${PROJECT_NAME} main.cpp
# unit tests
unit_server.cpp
)
target_link_libraries(${PROJECT_NAME} libdaggyd libdaggy stdc++fs Catch2::Catch2 curl)
add_test(${PROJECT_NAME} ${CMAKE_CURRENT_BINARY_DIR}/${PROJECT_NAME})

15
daggyd/tests/main.cpp Normal file
View File

@@ -0,0 +1,15 @@
#include <iostream>
#include "daggy/DAG.hpp"
#define CATCH_CONFIG_MAIN
#include <catch2/catch.hpp>
TEST_CASE("Sanity tests", "[sanity]")
{
REQUIRE(1 == 1);
}
// compile and run
// g++ -std=c++17 -o test test.cpp && ./test

View File

@@ -0,0 +1,382 @@
#include <curl/curl.h>
#include <pistache/client.h>
#include <rapidjson/document.h>
#include <sys/stat.h>
#include <catch2/catch.hpp>
#include <daggy/Serialization.hpp>
#include <daggy/executors/task/ForkingTaskExecutor.hpp>
#include <daggy/executors/task/NoopTaskExecutor.hpp>
#include <daggy/loggers/dag_run/OStreamLogger.hpp>
#include <daggyd/Server.hpp>
#include <filesystem>
#include <iostream>
#include <thread>
namespace rj = rapidjson;
using namespace daggy;
#ifdef DEBUG_HTTP
static int my_trace(CURL *handle, curl_infotype type, char *data, size_t size,
void *userp)
{
const char *text;
(void)handle; /* prevent compiler warning */
(void)userp;
switch (type) {
case CURLINFO_TEXT:
fprintf(stderr, "== Info: %s", data);
default: /* in case a new one is introduced to shock us */
return 0;
case CURLINFO_HEADER_OUT:
text = "=> Send header";
break;
case CURLINFO_DATA_OUT:
text = "=> Send data";
break;
case CURLINFO_SSL_DATA_OUT:
text = "=> Send SSL data";
break;
case CURLINFO_HEADER_IN:
text = "<= Recv header";
break;
case CURLINFO_DATA_IN:
text = "<= Recv data";
break;
case CURLINFO_SSL_DATA_IN:
text = "<= Recv SSL data";
break;
}
std::cerr << "\n================== " << text
<< " ==================" << std::endl
<< data << std::endl;
return 0;
}
#endif
enum HTTPCode : long
{
Ok = 200,
Not_Found = 404
};
struct HTTPResponse
{
HTTPCode code;
std::string body;
};
uint curlWriter(char *in, uint size, uint nmemb, std::stringstream *out)
{
uint r;
r = size * nmemb;
out->write(in, r);
return r;
}
HTTPResponse REQUEST(const std::string &url, const std::string &payload = "",
const std::string &method = "GET")
{
HTTPResponse response;
CURL *curl;
CURLcode res;
struct curl_slist *headers = NULL;
curl_global_init(CURL_GLOBAL_ALL);
curl = curl_easy_init();
if (curl) {
std::stringstream buffer;
#ifdef DEBUG_HTTP
curl_easy_setopt(curl, CURLOPT_DEBUGFUNCTION, my_trace);
curl_easy_setopt(curl, CURLOPT_VERBOSE, 1L);
#endif
curl_easy_setopt(curl, CURLOPT_URL, url.c_str());
curl_easy_setopt(curl, CURLOPT_WRITEFUNCTION, curlWriter);
curl_easy_setopt(curl, CURLOPT_WRITEDATA, &buffer);
if (!payload.empty()) {
curl_easy_setopt(curl, CURLOPT_POSTFIELDSIZE, payload.size());
curl_easy_setopt(curl, CURLOPT_POSTFIELDS, payload.c_str());
headers = curl_slist_append(headers, "Content-Type: Application/Json");
}
curl_easy_setopt(curl, CURLOPT_CUSTOMREQUEST, method.c_str());
headers = curl_slist_append(headers, "Expect:");
curl_easy_setopt(curl, CURLOPT_HTTPHEADER, headers);
res = curl_easy_perform(curl);
if (res != CURLE_OK) {
curl_easy_cleanup(curl);
throw std::runtime_error(std::string{"CURL Failed: "} +
curl_easy_strerror(res));
}
curl_easy_cleanup(curl);
curl_easy_getinfo(curl, CURLINFO_RESPONSE_CODE, &response.code);
response.body = buffer.str();
}
curl_global_cleanup();
return response;
}
TEST_CASE("rest_endpoint", "[server_basic]")
{
std::stringstream ss;
daggy::executors::task::ForkingTaskExecutor executor(10);
daggy::loggers::dag_run::OStreamLogger logger(ss);
Pistache::Address listenSpec("localhost", Pistache::Port(0));
const size_t nDAGRunners = 10, nWebThreads = 10;
daggy::Server server(listenSpec, logger, executor, nDAGRunners);
server.init(nWebThreads);
server.start();
const std::string host = "localhost:";
const std::string baseURL = host + std::to_string(server.getPort());
SECTION("Ready Endpoint")
{
auto response = REQUEST(baseURL + "/ready");
REQUIRE(response.code == HTTPCode::Ok);
}
SECTION("Querying a non-existent dagrunid should fail ")
{
auto response = REQUEST(baseURL + "/v1/dagrun/100");
REQUIRE(response.code != HTTPCode::Ok);
}
SECTION("Simple DAGRun Submission")
{
std::string dagRun = R"({
"tag": "unit_server",
"parameters": { "FILE": [ "A", "B" ] },
"tasks": {
"touch": { "job": { "command": [ "/usr/bin/touch", "dagrun_{{FILE}}" ]} },
"cat": { "job": { "command": [ "/usr/bin/cat", "dagrun_A", "dagrun_B" ]},
"parents": [ "touch" ]
}
}
})";
auto dagSpec = daggy::dagFromJSON(dagRun);
// Submit, and get the runID
daggy::DAGRunID runID = 0;
{
auto response = REQUEST(baseURL + "/v1/dagrun/", dagRun, "POST");
REQUIRE(response.code == HTTPCode::Ok);
rj::Document doc;
daggy::checkRJParse(doc.Parse(response.body.c_str()));
REQUIRE(doc.IsObject());
REQUIRE(doc.HasMember("runID"));
runID = doc["runID"].GetUint64();
}
// Ensure our runID shows up in the list of running DAGs
{
auto response = REQUEST(baseURL + "/v1/dagruns?all=1");
REQUIRE(response.code == HTTPCode::Ok);
rj::Document doc;
daggy::checkRJParse(doc.Parse(response.body.c_str()));
REQUIRE(doc.IsArray());
REQUIRE(doc.Size() >= 1);
// Ensure that our DAG is in the list and matches our given DAGRunID
bool found = false;
const auto &runs = doc.GetArray();
for (size_t i = 0; i < runs.Size(); ++i) {
const auto &run = runs[i];
REQUIRE(run.IsObject());
REQUIRE(run.HasMember("tag"));
REQUIRE(run.HasMember("runID"));
std::string runName = run["tag"].GetString();
if (runName == "unit_server") {
REQUIRE(run["runID"].GetUint64() == runID);
found = true;
break;
}
}
REQUIRE(found);
}
// Ensure we can get one of our tasks
{
auto response = REQUEST(baseURL + "/v1/dagrun/" + std::to_string(runID) +
"/task/cat_0");
REQUIRE(response.code == HTTPCode::Ok);
rj::Document doc;
daggy::checkRJParse(doc.Parse(response.body.c_str()));
REQUIRE_NOTHROW(daggy::taskFromJSON("cat", doc));
auto task = daggy::taskFromJSON("cat", doc);
REQUIRE(task == dagSpec.tasks.at("cat"));
}
// Wait until our DAG is complete
bool complete = true;
for (auto i = 0; i < 10; ++i) {
auto response = REQUEST(baseURL + "/v1/dagrun/" + std::to_string(runID));
REQUIRE(response.code == HTTPCode::Ok);
rj::Document doc;
daggy::checkRJParse(doc.Parse(response.body.c_str()));
REQUIRE(doc.IsObject());
REQUIRE(doc.HasMember("taskStates"));
const auto &taskStates = doc["taskStates"].GetObject();
size_t nStates = 0;
for (auto it = taskStates.MemberBegin(); it != taskStates.MemberEnd();
++it) {
nStates++;
}
REQUIRE(nStates == 3);
complete = true;
for (auto it = taskStates.MemberBegin(); it != taskStates.MemberEnd();
++it) {
std::string state = it->value.GetString();
if (state != "COMPLETED") {
complete = false;
break;
}
}
if (complete)
break;
std::this_thread::sleep_for(std::chrono::seconds(1));
}
REQUIRE(complete);
std::this_thread::sleep_for(std::chrono::seconds(2));
for (const auto &pth : std::vector<fs::path>{"dagrun_A", "dagrun_B"}) {
REQUIRE(fs::exists(pth));
fs::remove(pth);
}
}
}
TEST_CASE("Server cancels and resumes execution", "[server_resume]")
{
std::stringstream ss;
daggy::executors::task::ForkingTaskExecutor executor(10);
daggy::loggers::dag_run::OStreamLogger logger(ss);
Pistache::Address listenSpec("localhost", Pistache::Port(0));
const size_t nDAGRunners = 10, nWebThreads = 10;
daggy::Server server(listenSpec, logger, executor, nDAGRunners);
server.init(nWebThreads);
server.start();
const std::string host = "localhost:";
const std::string baseURL = host + std::to_string(server.getPort());
SECTION("Cancel / Resume DAGRun")
{
std::string dagRunJSON = R"({
"tag": "unit_server",
"tasks": {
"touch_A": { "job": { "command": [ "/usr/bin/touch", "resume_touch_a" ]}, "children": ["touch_C"] },
"sleep_B": { "job": { "command": [ "/usr/bin/sleep", "3" ]}, "children": ["touch_C"] },
"touch_C": { "job": { "command": [ "/usr/bin/touch", "resume_touch_c" ]} }
}
})";
auto dagSpec = daggy::dagFromJSON(dagRunJSON);
// Submit, and get the runID
daggy::DAGRunID runID;
{
auto response = REQUEST(baseURL + "/v1/dagrun/", dagRunJSON, "POST");
REQUIRE(response.code == HTTPCode::Ok);
rj::Document doc;
daggy::checkRJParse(doc.Parse(response.body.c_str()));
REQUIRE(doc.IsObject());
REQUIRE(doc.HasMember("runID"));
runID = doc["runID"].GetUint64();
}
std::this_thread::sleep_for(1s);
// Stop the current run
{
auto response = REQUEST(
baseURL + "/v1/dagrun/" + std::to_string(runID) + "/state/KILLED", "",
"PATCH");
REQUIRE(response.code == HTTPCode::Ok);
REQUIRE(logger.getDAGRunState(runID) == +daggy::RunState::KILLED);
}
// Verify that the run still exists
{
auto dagRun = logger.getDAGRun(runID);
REQUIRE(dagRun.taskRunStates.at("touch_A_0") ==
+daggy::RunState::COMPLETED);
REQUIRE(fs::exists("resume_touch_a"));
REQUIRE(dagRun.taskRunStates.at("sleep_B_0") ==
+daggy::RunState::ERRORED);
REQUIRE(dagRun.taskRunStates.at("touch_C_0") == +daggy::RunState::QUEUED);
}
// Set the errored task state
{
auto url = baseURL + "/v1/dagrun/" + std::to_string(runID) +
"/task/sleep_B_0/state/QUEUED";
auto response = REQUEST(url, "", "PATCH");
REQUIRE(response.code == HTTPCode::Ok);
REQUIRE(logger.getTaskState(runID, "sleep_B_0") ==
+daggy::RunState::QUEUED);
}
// Resume
{
struct stat s;
lstat("resume_touch_A", &s);
auto preMTime = s.st_mtim.tv_sec;
auto response = REQUEST(
baseURL + "/v1/dagrun/" + std::to_string(runID) + "/state/QUEUED", "",
"PATCH");
// Wait for run to complete
std::this_thread::sleep_for(5s);
REQUIRE(logger.getDAGRunState(runID) == +daggy::RunState::COMPLETED);
REQUIRE(fs::exists("resume_touch_c"));
REQUIRE(fs::exists("resume_touch_a"));
for (const auto &[taskName, task] : dagSpec.tasks) {
REQUIRE(logger.getTaskState(runID, taskName + "_0") ==
+daggy::RunState::COMPLETED);
}
// Ensure "touch_A" wasn't run again
lstat("resume_touch_A", &s);
auto postMTime = s.st_mtim.tv_sec;
REQUIRE(preMTime == postMTime);
}
}
server.shutdown();
}