Large re-organization to split daggyd away from the core libdaggy.
This paves the way for implementing daggys and other utilities. Squashed commit of the following: commit 1f77239ab3c9e44d190eef94531a39501c8c4dfe Author: Ian Roddis <gitlab@ie2r.com> Date: Mon Oct 18 16:25:02 2021 -0300 Adding README, stdout support for daggyd logging commit c2c237224e84a3be68aaa597ce98af1365e74a13 Author: Ian Roddis <gitlab@ie2r.com> Date: Mon Oct 18 16:10:29 2021 -0300 removing old daggyd commit cfea2baf61ca10c535801c5a391d2d525a1a2d04 Author: Ian Roddis <gitlab@ie2r.com> Date: Mon Oct 18 16:10:09 2021 -0300 Moving tests into their sub-project folders commit e41ca42069bea1db16dd76b6684a3f692fef6b15 Author: Ian Roddis <gitlab@ie2r.com> Date: Mon Oct 18 15:57:40 2021 -0300 Splitting out daggyd from libdaggy commit be97b146c1d2446f5c03cb78707e921f18c60bd8 Author: Ian Roddis <gitlab@ie2r.com> Date: Mon Oct 18 15:56:55 2021 -0300 Splitting out daggyd from libdaggy commit cb61e140e9d6d8832d61fb7037fd4c0ff6edad00 Author: Ian Roddis <gitlab@ie2r.com> Date: Mon Oct 18 15:49:47 2021 -0300 moving daggy to libdaggy
This commit is contained in:
3
daggyd/CMakeLists.txt
Normal file
3
daggyd/CMakeLists.txt
Normal file
@@ -0,0 +1,3 @@
|
||||
add_subdirectory(libdaggyd)
|
||||
add_subdirectory(daggyd)
|
||||
add_subdirectory(tests)
|
||||
127
daggyd/README.md
Normal file
127
daggyd/README.md
Normal file
@@ -0,0 +1,127 @@
|
||||
# Daggy Daemon
|
||||
|
||||
`daggyd` is the REST server process that handles receiving and running DAG specs.
|
||||
|
||||
# Running it
|
||||
|
||||
```bash
|
||||
daggyd # That's it, will listen on 127.0.0.1:2503 , and run with a local executor
|
||||
daggyd -d # Daemonize
|
||||
|
||||
daggyd --config FILE # Run with a config file
|
||||
```
|
||||
|
||||
# Config Files
|
||||
|
||||
```json
|
||||
{
|
||||
"web-threads": 50,
|
||||
"dag-threads": 50,
|
||||
"port": 2503,
|
||||
"ip": "localhost",
|
||||
"logger": {
|
||||
"name": "LoggerName",
|
||||
"config": {
|
||||
...
|
||||
}
|
||||
},
|
||||
"executor": {
|
||||
"name": "ExecutorName"
|
||||
"config": {
|
||||
...
|
||||
}
|
||||
}
|
||||
}
|
||||
```
|
||||
|
||||
## Loggers
|
||||
|
||||
### OStreamLogger
|
||||
|
||||
OStreamLogger doesn't persist data, but can write even updates to a file or
|
||||
stdout.
|
||||
|
||||
The config for OStreamLogger looks like this:
|
||||
|
||||
```
|
||||
{
|
||||
...
|
||||
"logger": {
|
||||
"name": "OStreamLogger",
|
||||
"config": {
|
||||
"file": "/path/to/file"
|
||||
}
|
||||
}
|
||||
...
|
||||
}
|
||||
```
|
||||
|
||||
If `file` is equal to `"-"`, then the logger will print events to stdout. This configuration
|
||||
is the default if no logger is specified at all.
|
||||
|
||||
### RedisLogger
|
||||
|
||||
RedisLogger stores state in a [Redis](https://redis.io) instance.
|
||||
|
||||
The config for OStreamLogger looks like this (along with default values):
|
||||
|
||||
```
|
||||
{
|
||||
...
|
||||
"logger": {
|
||||
"name": "RedisLogger",
|
||||
"config": {
|
||||
"prefix": "daggy",
|
||||
"host": "localhost",
|
||||
"port": 6379
|
||||
}
|
||||
}
|
||||
...
|
||||
}
|
||||
```
|
||||
|
||||
The `prefix` attribute is used to distinguish daggy instances. All keys will be prefixed with
|
||||
the value of `prefix`.
|
||||
|
||||
|
||||
## Executors
|
||||
|
||||
### ForkingTaskExecutor
|
||||
|
||||
ForkingTaskExecutor does pretty much what the name implies: it will execute tasks by
|
||||
forking on the local machine.
|
||||
|
||||
It's config with default values looks like:
|
||||
|
||||
```
|
||||
{
|
||||
...
|
||||
"executor": {
|
||||
"name": "ForkingTaskExecutor",
|
||||
"config": {
|
||||
"threads": 10
|
||||
}
|
||||
}
|
||||
...
|
||||
}
|
||||
```
|
||||
|
||||
If no executor is sepcified in the config, this is the executor used.
|
||||
|
||||
### SlurmTaskExecutor
|
||||
|
||||
The SlurmTaskExecutor will execute tasks on a [slurm](https://slurm.schedmd.com) cluster. It relies
|
||||
on the slurm config to manage any parallelism limits and quotas.
|
||||
|
||||
It's config with default values looks like:
|
||||
|
||||
```
|
||||
{
|
||||
...
|
||||
"executor": {
|
||||
"name": "ForkingTaskExecutor",
|
||||
"config": { }
|
||||
}
|
||||
...
|
||||
}
|
||||
```
|
||||
4
daggyd/daggyd/CMakeLists.txt
Normal file
4
daggyd/daggyd/CMakeLists.txt
Normal file
@@ -0,0 +1,4 @@
|
||||
project(daggyd)
|
||||
file(GLOB SOURCES daggyd.cpp)
|
||||
add_executable(${PROJECT_NAME} ${SOURCES})
|
||||
target_link_libraries(${PROJECT_NAME} argparse libdaggyd libdaggy)
|
||||
258
daggyd/daggyd/daggyd.cpp
Normal file
258
daggyd/daggyd/daggyd.cpp
Normal file
@@ -0,0 +1,258 @@
|
||||
#include <rapidjson/document.h>
|
||||
#include <sys/stat.h>
|
||||
|
||||
#include <argparse.hpp>
|
||||
#include <atomic>
|
||||
#include <csignal>
|
||||
#include <daggy/Serialization.hpp>
|
||||
#include <daggyd/Server.hpp>
|
||||
#include <fstream>
|
||||
#include <iostream>
|
||||
|
||||
// Add executors here
|
||||
#include <daggy/executors/task/ForkingTaskExecutor.hpp>
|
||||
#include <daggy/executors/task/SlurmTaskExecutor.hpp>
|
||||
|
||||
// Add loggers here
|
||||
#include <daggy/executors/task/TaskExecutor.hpp>
|
||||
#include <daggy/loggers/dag_run/DAGRunLogger.hpp>
|
||||
#include <daggy/loggers/dag_run/OStreamLogger.hpp>
|
||||
#include <daggy/loggers/dag_run/RedisLogger.hpp>
|
||||
|
||||
namespace rj = rapidjson;
|
||||
|
||||
static std::atomic<bool> running{true};
|
||||
|
||||
void signalHandler(int signal)
|
||||
{
|
||||
switch (signal) {
|
||||
case SIGHUP:
|
||||
break;
|
||||
case SIGINT:
|
||||
case SIGTERM:
|
||||
running = false;
|
||||
break;
|
||||
default:
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
void daemonize()
|
||||
{
|
||||
pid_t pid;
|
||||
|
||||
struct sigaction newSigAction;
|
||||
sigset_t newSigSet;
|
||||
|
||||
/* Check if parent process id is set */
|
||||
if (getppid() == 1) {
|
||||
return;
|
||||
}
|
||||
|
||||
/* Set signal mask - signals we want to block */
|
||||
sigemptyset(&newSigSet);
|
||||
sigaddset(&newSigSet,
|
||||
SIGCHLD); /* ignore child - i.e. we don't need to wait for it */
|
||||
sigaddset(&newSigSet, SIGTSTP); /* ignore Tty stop signals */
|
||||
sigaddset(&newSigSet, SIGTTOU); /* ignore Tty background writes */
|
||||
sigaddset(&newSigSet, SIGTTIN); /* ignore Tty background reads */
|
||||
sigprocmask(SIG_BLOCK, &newSigSet,
|
||||
nullptr); /* Block the above specified signals */
|
||||
|
||||
/* Set up a signal handler */
|
||||
newSigAction.sa_handler = signalHandler;
|
||||
sigemptyset(&newSigAction.sa_mask);
|
||||
newSigAction.sa_flags = 0;
|
||||
|
||||
/* Signals to handle */
|
||||
sigaction(SIGHUP, &newSigAction, nullptr); /* catch hangup signal */
|
||||
sigaction(SIGTERM, &newSigAction, nullptr); /* catch term signal */
|
||||
sigaction(SIGINT, &newSigAction, nullptr); /* catch interrupt signal */
|
||||
|
||||
// Fork once
|
||||
pid = fork();
|
||||
if (pid < 0) {
|
||||
exit(EXIT_FAILURE);
|
||||
}
|
||||
if (pid > 0) {
|
||||
exit(EXIT_SUCCESS);
|
||||
}
|
||||
|
||||
/* On success: The child process becomes session leader */
|
||||
if (setsid() < 0) {
|
||||
std::cerr << "Unable to setsid" << std::endl;
|
||||
exit(EXIT_FAILURE);
|
||||
}
|
||||
|
||||
/* Catch, ignore and handle signals */
|
||||
signal(SIGCHLD, SIG_IGN);
|
||||
signal(SIGHUP, SIG_IGN);
|
||||
|
||||
/* Fork off for the second time*/
|
||||
pid = fork();
|
||||
if (pid < 0)
|
||||
exit(EXIT_FAILURE);
|
||||
if (pid > 0)
|
||||
exit(EXIT_SUCCESS);
|
||||
|
||||
umask(0);
|
||||
|
||||
/* Change the working directory to the root directory */
|
||||
/* or another appropriated directory */
|
||||
auto rc = chdir("/");
|
||||
(void)rc;
|
||||
|
||||
/* Close all open file descriptors */
|
||||
for (int x = sysconf(_SC_OPEN_MAX); x >= 0; x--) {
|
||||
close(x);
|
||||
}
|
||||
}
|
||||
|
||||
namespace dl = daggy::loggers::dag_run;
|
||||
|
||||
std::unique_ptr<dl::DAGRunLogger> loggerFactory(const rj::Value &config)
|
||||
{
|
||||
if (config.HasMember("logger")) {
|
||||
const auto &logConf = config["logger"];
|
||||
if (!logConf.IsObject())
|
||||
throw std::runtime_error("logger config is not an object");
|
||||
if (!logConf.HasMember("name"))
|
||||
throw std::runtime_error("logger config is missing logger name");
|
||||
if (!logConf.HasMember("config"))
|
||||
throw std::runtime_error("logger config is missing logger config");
|
||||
|
||||
std::string name = logConf["name"].GetString();
|
||||
const auto &logConfig = logConf["config"];
|
||||
if (name == "OStreamLogger") {
|
||||
if (logConfig.HasMember("file")) {
|
||||
std::string fn = logConfig["file"].GetString();
|
||||
if (fn == "-")
|
||||
return std::make_unique<dl::OStreamLogger>(std::cout);
|
||||
|
||||
std::ofstream ofh(logConfig["file"].GetString());
|
||||
return std::make_unique<dl::OStreamLogger>(ofh);
|
||||
}
|
||||
}
|
||||
else if (name == "RedisLogger") {
|
||||
std::string host = "localhost";
|
||||
uint16_t port = 6379;
|
||||
std::string prefix = "daggy";
|
||||
|
||||
if (logConfig.HasMember("prefix"))
|
||||
prefix = logConfig["prefix"].GetString();
|
||||
if (logConfig.HasMember("host"))
|
||||
host = logConfig["host"].GetString();
|
||||
if (logConfig.HasMember("port"))
|
||||
port = logConfig["port"].GetInt();
|
||||
|
||||
return std::make_unique<dl::RedisLogger>(prefix, host, port);
|
||||
}
|
||||
else
|
||||
throw std::runtime_error("Unknown logger type: " + name);
|
||||
}
|
||||
return std::make_unique<dl::OStreamLogger>(std::cout);
|
||||
}
|
||||
|
||||
namespace de = daggy::executors::task;
|
||||
|
||||
std::unique_ptr<de::TaskExecutor> executorFactory(const rj::Value &config)
|
||||
{
|
||||
if (config.HasMember("executor")) {
|
||||
const auto &execConf = config["executor"];
|
||||
if (!execConf.IsObject())
|
||||
throw std::runtime_error("Executor config is not an object");
|
||||
if (!execConf.HasMember("name"))
|
||||
throw std::runtime_error("Executor config is missing execger name");
|
||||
if (!execConf.HasMember("config"))
|
||||
throw std::runtime_error("Executor config is missing config");
|
||||
std::string name = execConf["name"].GetString();
|
||||
const auto &execConfig = execConf["config"];
|
||||
|
||||
if (name == "ForkingTaskExecutor") {
|
||||
size_t threads = 10;
|
||||
if (execConfig.HasMember("threads"))
|
||||
threads = execConfig["threads"].GetInt64();
|
||||
return std::make_unique<de::ForkingTaskExecutor>(threads);
|
||||
}
|
||||
else if (name == "SlurmTaskExecutor") {
|
||||
return std::make_unique<de::SlurmTaskExecutor>();
|
||||
}
|
||||
else
|
||||
throw std::runtime_error("Unknown executor type: " + name);
|
||||
}
|
||||
|
||||
return std::make_unique<de::ForkingTaskExecutor>(10);
|
||||
}
|
||||
|
||||
int main(int argc, char **argv)
|
||||
{
|
||||
argparse::ArgumentParser args("Daggy");
|
||||
|
||||
args.add_argument("-v", "--verbose")
|
||||
.default_value(false)
|
||||
.implicit_value(true);
|
||||
args.add_argument("-d", "--daemon").default_value(false).implicit_value(true);
|
||||
args.add_argument("--config");
|
||||
try {
|
||||
args.parse_args(argc, argv);
|
||||
}
|
||||
catch (std::exception &e) {
|
||||
std::cout << "Error: " << e.what() << std::endl;
|
||||
std::cout << args;
|
||||
exit(1);
|
||||
}
|
||||
|
||||
bool verbose = args.get<bool>("--verbose");
|
||||
bool asDaemon = args.get<bool>("--daemon");
|
||||
auto configFile = args.get<std::string>("--config");
|
||||
|
||||
std::ifstream ifh(configFile);
|
||||
std::string config;
|
||||
std::getline(ifh, config, '\0');
|
||||
ifh.close();
|
||||
|
||||
rj::Document doc;
|
||||
daggy::checkRJParse(doc.Parse(config.c_str()));
|
||||
|
||||
std::string listenIP = "127.0.0.1";
|
||||
int listenPort = 2503;
|
||||
size_t webThreads = 50;
|
||||
size_t dagThreads = 50;
|
||||
|
||||
if (doc.HasMember("ip"))
|
||||
listenIP = doc["ip"].GetString();
|
||||
if (doc.HasMember("port"))
|
||||
listenPort = doc["port"].GetInt();
|
||||
if (doc.HasMember("web-threads"))
|
||||
webThreads = doc["web-threads"].GetInt64();
|
||||
if (doc.HasMember("dag-threads"))
|
||||
dagThreads = doc["dag-threads"].GetInt64();
|
||||
|
||||
if (verbose) {
|
||||
std::cout << "Server running at http://" << listenIP << ':' << listenPort
|
||||
<< std::endl
|
||||
<< "Max DAG Processing: " << dagThreads << std::endl
|
||||
<< "Max Web Clients: " << webThreads << std::endl
|
||||
<< std::endl
|
||||
<< "Ctrl-C to exit" << std::endl;
|
||||
}
|
||||
|
||||
if (asDaemon) {
|
||||
daemonize();
|
||||
}
|
||||
|
||||
auto logger = loggerFactory(doc);
|
||||
auto executor = executorFactory(doc);
|
||||
|
||||
Pistache::Address listenSpec(listenIP, listenPort);
|
||||
|
||||
daggy::Server server(listenSpec, *logger, *executor, dagThreads);
|
||||
server.init(webThreads);
|
||||
server.start();
|
||||
|
||||
running = true;
|
||||
while (running) {
|
||||
std::this_thread::sleep_for(std::chrono::seconds(30));
|
||||
}
|
||||
server.shutdown();
|
||||
}
|
||||
8
daggyd/libdaggyd/CMakeLists.txt
Normal file
8
daggyd/libdaggyd/CMakeLists.txt
Normal file
@@ -0,0 +1,8 @@
|
||||
project(libdaggyd)
|
||||
|
||||
add_library(${PROJECT_NAME} STATIC)
|
||||
|
||||
target_include_directories(${PROJECT_NAME} PUBLIC include)
|
||||
target_link_libraries(${PROJECT_NAME} libdaggy)
|
||||
|
||||
add_subdirectory(src)
|
||||
67
daggyd/libdaggyd/include/daggyd/Server.hpp
Normal file
67
daggyd/libdaggyd/include/daggyd/Server.hpp
Normal file
@@ -0,0 +1,67 @@
|
||||
#pragma once
|
||||
|
||||
#include <pistache/description.h>
|
||||
#include <pistache/endpoint.h>
|
||||
#include <pistache/http.h>
|
||||
|
||||
#include <daggy/DAGRunner.hpp>
|
||||
#include <daggy/ThreadPool.hpp>
|
||||
#include <daggy/executors/task/TaskExecutor.hpp>
|
||||
#include <daggy/loggers/dag_run/DAGRunLogger.hpp>
|
||||
#include <filesystem>
|
||||
|
||||
#define DAGGY_REST_HANDLER(func) \
|
||||
void func(const Pistache::Rest::Request &request, \
|
||||
Pistache::Http::ResponseWriter response);
|
||||
|
||||
namespace fs = std::filesystem;
|
||||
|
||||
namespace daggy {
|
||||
class Server
|
||||
{
|
||||
public:
|
||||
Server(const Pistache::Address &listenSpec,
|
||||
loggers::dag_run::DAGRunLogger &logger,
|
||||
executors::task::TaskExecutor &executor, size_t nDAGRunners);
|
||||
~Server();
|
||||
|
||||
Server &setSSLCertificates(const fs::path &cert, const fs::path &key);
|
||||
|
||||
void init(size_t threads = 1);
|
||||
|
||||
void start();
|
||||
|
||||
uint16_t getPort() const;
|
||||
|
||||
void shutdown();
|
||||
|
||||
private:
|
||||
void createDescription();
|
||||
void queueDAG_(DAGRunID runID, const TaskDAG &dag,
|
||||
const TaskParameters &taskParameters);
|
||||
|
||||
DAGGY_REST_HANDLER(handleReady); // X
|
||||
DAGGY_REST_HANDLER(handleQueryDAGs); // X
|
||||
DAGGY_REST_HANDLER(handleRunDAG); // X
|
||||
DAGGY_REST_HANDLER(handleValidateDAG); // X
|
||||
DAGGY_REST_HANDLER(handleGetDAGRun); // X
|
||||
DAGGY_REST_HANDLER(handleGetDAGRunState); // X
|
||||
DAGGY_REST_HANDLER(handleSetDAGRunState); // X
|
||||
DAGGY_REST_HANDLER(handleGetTask); // X
|
||||
DAGGY_REST_HANDLER(handleGetTaskState); // X
|
||||
DAGGY_REST_HANDLER(handleSetTaskState); // X
|
||||
|
||||
bool handleAuth(const Pistache::Rest::Request &request);
|
||||
|
||||
Pistache::Http::Endpoint endpoint_;
|
||||
Pistache::Rest::Description desc_;
|
||||
Pistache::Rest::Router router_;
|
||||
|
||||
loggers::dag_run::DAGRunLogger &logger_;
|
||||
executors::task::TaskExecutor &executor_;
|
||||
ThreadPool runnerPool_;
|
||||
|
||||
std::mutex runnerGuard_;
|
||||
std::unordered_map<DAGRunID, std::shared_ptr<DAGRunner>> runners_;
|
||||
};
|
||||
} // namespace daggy
|
||||
3
daggyd/libdaggyd/src/CMakeLists.txt
Normal file
3
daggyd/libdaggyd/src/CMakeLists.txt
Normal file
@@ -0,0 +1,3 @@
|
||||
target_sources(${PROJECT_NAME} PRIVATE
|
||||
Server.cpp
|
||||
)
|
||||
514
daggyd/libdaggyd/src/Server.cpp
Normal file
514
daggyd/libdaggyd/src/Server.cpp
Normal file
@@ -0,0 +1,514 @@
|
||||
#include <enum.h>
|
||||
|
||||
#include <daggy/Serialization.hpp>
|
||||
#include <daggy/Utilities.hpp>
|
||||
#include <daggyd/Server.hpp>
|
||||
#include <iomanip>
|
||||
#include <mutex>
|
||||
#include <numeric>
|
||||
#include <stdexcept>
|
||||
#include <thread>
|
||||
#include <utility>
|
||||
|
||||
#define REQ_RESPONSE(code, msg) \
|
||||
std::stringstream ss; \
|
||||
ss << R"({"message": )" << std::quoted(msg) << "}"; \
|
||||
response.send(Pistache::Http::Code::code, ss.str()); \
|
||||
return;
|
||||
|
||||
using namespace Pistache;
|
||||
|
||||
namespace daggy {
|
||||
void Server::init(size_t threads)
|
||||
{
|
||||
auto opts = Http::Endpoint::options()
|
||||
.threads(threads)
|
||||
.flags(Pistache::Tcp::Options::ReuseAddr |
|
||||
Pistache::Tcp::Options::ReusePort)
|
||||
.maxRequestSize(4294967296)
|
||||
.maxResponseSize(4294967296);
|
||||
endpoint_.init(opts);
|
||||
createDescription();
|
||||
}
|
||||
|
||||
Server::Server(const Pistache::Address &listenSpec,
|
||||
loggers::dag_run::DAGRunLogger &logger,
|
||||
executors::task::TaskExecutor &executor, size_t nDAGRunners)
|
||||
: endpoint_(listenSpec)
|
||||
, desc_("Daggy API", "0.1")
|
||||
, logger_(logger)
|
||||
, executor_(executor)
|
||||
, runnerPool_(nDAGRunners)
|
||||
{
|
||||
}
|
||||
|
||||
Server::~Server()
|
||||
{
|
||||
shutdown();
|
||||
}
|
||||
|
||||
void Server::start()
|
||||
{
|
||||
router_.initFromDescription(desc_);
|
||||
|
||||
endpoint_.setHandler(router_.handler());
|
||||
endpoint_.serveThreaded();
|
||||
}
|
||||
|
||||
Server &Server::setSSLCertificates(const fs::path &cert, const fs::path &key)
|
||||
{
|
||||
endpoint_.useSSL(cert, key);
|
||||
return *this;
|
||||
}
|
||||
|
||||
void Server::shutdown()
|
||||
{
|
||||
endpoint_.shutdown();
|
||||
runnerPool_.shutdown();
|
||||
}
|
||||
|
||||
uint16_t Server::getPort() const
|
||||
{
|
||||
return endpoint_.getPort();
|
||||
}
|
||||
|
||||
void Server::createDescription()
|
||||
{
|
||||
desc_.info().license("MIT", "https://opensource.org/licenses/MIT");
|
||||
|
||||
auto backendErrorResponse =
|
||||
desc_.response(Http::Code::Internal_Server_Error,
|
||||
R"({"error": "An error occurred with the backend"})");
|
||||
|
||||
desc_.schemes(Rest::Scheme::Http)
|
||||
.basePath("/v1")
|
||||
.produces(MIME(Application, Json))
|
||||
.consumes(MIME(Application, Json));
|
||||
|
||||
desc_.route(desc_.get("/ready"))
|
||||
.bind(&Server::handleReady, this)
|
||||
.response(Http::Code::Ok, "Response to the /ready call")
|
||||
.hide();
|
||||
|
||||
auto versionPath = desc_.path("/v1");
|
||||
|
||||
/*
|
||||
DAG Run Summaries
|
||||
*/
|
||||
auto dagRunsPath = versionPath.path("/dagruns");
|
||||
|
||||
dagRunsPath.route(desc_.get("/"))
|
||||
.bind(&Server::handleQueryDAGs, this)
|
||||
.produces(MIME(Application, Json))
|
||||
.response(Http::Code::Ok, "List summaries DAGs");
|
||||
|
||||
/*
|
||||
Individual DAG Run routes
|
||||
*/
|
||||
auto dagRunPath = versionPath.path("/dagrun");
|
||||
|
||||
dagRunPath.route(desc_.post("/"))
|
||||
.bind(&Server::handleRunDAG, this)
|
||||
.produces(MIME(Application, Json))
|
||||
.response(Http::Code::Ok, "Run a DAG");
|
||||
|
||||
dagRunPath.route(desc_.post("/validate"))
|
||||
.bind(&Server::handleValidateDAG, this)
|
||||
.produces(MIME(Application, Json))
|
||||
.response(Http::Code::Ok, "Validate a DAG Run Spec");
|
||||
|
||||
/*
|
||||
Management of a specific DAG
|
||||
*/
|
||||
auto specificDAGRunPath = dagRunPath.path("/:runID");
|
||||
|
||||
specificDAGRunPath.route(desc_.get("/"))
|
||||
.bind(&Server::handleGetDAGRun, this)
|
||||
.produces(MIME(Application, Json))
|
||||
.response(Http::Code::Ok, "Full DAG Run");
|
||||
|
||||
specificDAGRunPath.route(desc_.get("/state"))
|
||||
.bind(&Server::handleGetDAGRunState, this)
|
||||
.produces(MIME(Application, Json))
|
||||
.response(Http::Code::Ok,
|
||||
"Structure of a DAG and DAG and Task run states");
|
||||
|
||||
specificDAGRunPath.route(desc_.patch("/state/:state"))
|
||||
.bind(&Server::handleSetDAGRunState, this)
|
||||
.produces(MIME(Application, Json))
|
||||
.response(Http::Code::Ok, "Change the state of a DAG");
|
||||
|
||||
/*
|
||||
Task paths
|
||||
*/
|
||||
auto taskPath = specificDAGRunPath.path("/task/:taskName");
|
||||
taskPath.route(desc_.get("/"))
|
||||
.bind(&Server::handleGetTask, this)
|
||||
.produces(MIME(Application, Json))
|
||||
.response(Http::Code::Ok, "Details of a specific task");
|
||||
|
||||
/*
|
||||
Task State paths
|
||||
*/
|
||||
auto taskStatePath = taskPath.path("/state");
|
||||
|
||||
taskStatePath.route(desc_.get("/"))
|
||||
.bind(&Server::handleGetTaskState, this)
|
||||
.produces(MIME(Application, Json))
|
||||
.response(Http::Code::Ok, "Get a task state");
|
||||
|
||||
taskStatePath.route(desc_.patch("/:state"))
|
||||
.bind(&Server::handleSetTaskState, this)
|
||||
.produces(MIME(Application, Json))
|
||||
.response(Http::Code::Ok, "Set a task state");
|
||||
}
|
||||
|
||||
void Server::handleRunDAG(const Pistache::Rest::Request &request,
|
||||
Pistache::Http::ResponseWriter response)
|
||||
{
|
||||
if (!handleAuth(request))
|
||||
return;
|
||||
|
||||
auto dagSpec = dagFromJSON(request.body());
|
||||
dagSpec.tasks =
|
||||
expandTaskSet(dagSpec.tasks, executor_, dagSpec.taskConfig.variables);
|
||||
|
||||
// Get a run ID
|
||||
DAGRunID runID = logger_.startDAGRun(dagSpec);
|
||||
auto dag = buildDAGFromTasks(dagSpec.tasks);
|
||||
queueDAG_(runID, dag, dagSpec.taskConfig);
|
||||
|
||||
response.send(Pistache::Http::Code::Ok,
|
||||
R"({"runID": )" + std::to_string(runID) + "}");
|
||||
}
|
||||
|
||||
void Server::handleValidateDAG(const Pistache::Rest::Request &request,
|
||||
Pistache::Http::ResponseWriter response)
|
||||
{
|
||||
try {
|
||||
dagFromJSON(request.body());
|
||||
response.send(Pistache::Http::Code::Ok, R"({"valid": true})");
|
||||
}
|
||||
catch (std::exception &e) {
|
||||
std::string error = e.what();
|
||||
response.send(Pistache::Http::Code::Ok,
|
||||
std::string{R"({"valid": true, "error": })"} + error + "}");
|
||||
}
|
||||
}
|
||||
|
||||
void Server::handleQueryDAGs(const Pistache::Rest::Request &request,
|
||||
Pistache::Http::ResponseWriter response)
|
||||
{
|
||||
if (!handleAuth(request))
|
||||
return;
|
||||
|
||||
bool all = false;
|
||||
std::string tag = "";
|
||||
|
||||
if (request.query().has("tag")) {
|
||||
tag = request.query().get("tag").value();
|
||||
}
|
||||
|
||||
if (request.hasParam(":all")) {
|
||||
auto val = request.query().get("all").value();
|
||||
if (val == "true" or val == "1") {
|
||||
all = true;
|
||||
}
|
||||
}
|
||||
|
||||
auto dagRuns = logger_.queryDAGRuns(tag, all);
|
||||
std::stringstream ss;
|
||||
ss << '[';
|
||||
|
||||
bool first = true;
|
||||
for (const auto &run : dagRuns) {
|
||||
if (first) {
|
||||
first = false;
|
||||
}
|
||||
else {
|
||||
ss << ", ";
|
||||
}
|
||||
|
||||
ss << " {"
|
||||
<< R"("runID": )" << run.runID << ',' << R"("tag": )"
|
||||
<< std::quoted(run.tag) << ","
|
||||
<< R"("startTime": )" << std::quoted(timePointToString(run.startTime))
|
||||
<< ',' << R"("lastUpdate": )"
|
||||
<< std::quoted(timePointToString(run.lastUpdate)) << ','
|
||||
<< R"("taskCounts": {)";
|
||||
bool firstState = true;
|
||||
for (const auto &[state, count] : run.taskStateCounts) {
|
||||
if (firstState) {
|
||||
firstState = false;
|
||||
}
|
||||
else {
|
||||
ss << ", ";
|
||||
}
|
||||
ss << std::quoted(state._to_string()) << ':' << count;
|
||||
}
|
||||
ss << '}' // end of taskCounts
|
||||
<< '}'; // end of item
|
||||
}
|
||||
|
||||
ss << ']';
|
||||
response.send(Pistache::Http::Code::Ok, ss.str());
|
||||
}
|
||||
|
||||
void Server::handleGetDAGRun(const Pistache::Rest::Request &request,
|
||||
Pistache::Http::ResponseWriter response)
|
||||
{
|
||||
if (!handleAuth(request))
|
||||
return;
|
||||
if (!request.hasParam(":runID")) {
|
||||
REQ_RESPONSE(Not_Found, "No runID provided in URL");
|
||||
}
|
||||
auto runID = request.param(":runID").as<size_t>();
|
||||
auto run = logger_.getDAGRun(runID);
|
||||
|
||||
bool first = true;
|
||||
std::stringstream ss;
|
||||
ss << "{"
|
||||
<< R"("runID": )" << runID << ',' << R"("tag": )"
|
||||
<< std::quoted(run.dagSpec.tag) << ',' << R"("tasks": )"
|
||||
<< tasksToJSON(run.dagSpec.tasks) << ',';
|
||||
|
||||
// task run states
|
||||
ss << R"("taskStates": { )";
|
||||
first = true;
|
||||
for (const auto &[name, state] : run.taskRunStates) {
|
||||
if (first) {
|
||||
first = false;
|
||||
}
|
||||
else {
|
||||
ss << ',';
|
||||
}
|
||||
ss << std::quoted(name) << ": " << std::quoted(state._to_string());
|
||||
}
|
||||
ss << "},";
|
||||
|
||||
// Attempt records
|
||||
first = true;
|
||||
ss << R"("taskAttempts": { )";
|
||||
for (const auto &[taskName, attempts] : run.taskAttempts) {
|
||||
if (first) {
|
||||
first = false;
|
||||
}
|
||||
else {
|
||||
ss << ',';
|
||||
}
|
||||
ss << std::quoted(taskName) << ": [";
|
||||
bool firstAttempt = true;
|
||||
for (const auto &attempt : attempts) {
|
||||
if (firstAttempt) {
|
||||
firstAttempt = false;
|
||||
}
|
||||
else {
|
||||
ss << ',';
|
||||
}
|
||||
ss << '{' << R"("startTime":)"
|
||||
<< std::quoted(timePointToString(attempt.startTime)) << ','
|
||||
<< R"("stopTime":)"
|
||||
<< std::quoted(timePointToString(attempt.stopTime)) << ','
|
||||
<< R"("rc":)" << attempt.rc << ',' << R"("outputLog":)"
|
||||
<< std::quoted(attempt.outputLog) << ',' << R"("errorLog":)"
|
||||
<< std::quoted(attempt.errorLog) << ',' << R"("executorLog":)"
|
||||
<< std::quoted(attempt.executorLog) << '}';
|
||||
}
|
||||
ss << ']';
|
||||
}
|
||||
ss << "},";
|
||||
|
||||
// DAG state changes
|
||||
first = true;
|
||||
ss << R"("dagStateChanges": [ )";
|
||||
for (const auto &change : run.dagStateChanges) {
|
||||
if (first) {
|
||||
first = false;
|
||||
}
|
||||
else {
|
||||
ss << ',';
|
||||
}
|
||||
ss << stateUpdateRecordToJSON(change);
|
||||
}
|
||||
ss << "]";
|
||||
ss << '}';
|
||||
|
||||
response.send(Pistache::Http::Code::Ok, ss.str());
|
||||
}
|
||||
|
||||
void Server::handleGetDAGRunState(const Pistache::Rest::Request &request,
|
||||
Pistache::Http::ResponseWriter response)
|
||||
{
|
||||
if (!handleAuth(request))
|
||||
return;
|
||||
|
||||
DAGRunID runID = request.param(":runID").as<DAGRunID>();
|
||||
RunState state = RunState::QUEUED;
|
||||
try {
|
||||
state = logger_.getDAGRunState(runID);
|
||||
std::stringstream ss;
|
||||
ss << R"({ "runID": )" << runID << R"(, "state": )"
|
||||
<< std::quoted(state._to_string()) << '}';
|
||||
response.send(Pistache::Http::Code::Ok, ss.str());
|
||||
}
|
||||
catch (std::exception &e) {
|
||||
REQ_RESPONSE(Not_Found, e.what());
|
||||
}
|
||||
}
|
||||
|
||||
void Server::queueDAG_(DAGRunID runID, const TaskDAG &dag,
|
||||
const TaskParameters &taskParameters)
|
||||
{
|
||||
std::lock_guard<std::mutex> lock(runnerGuard_);
|
||||
/*
|
||||
auto it = runners_.emplace(
|
||||
std::piecewise_construct, std::forward_as_tuple(runID),
|
||||
std::forward_as_tuple(runID, executor_, logger_, dag,
|
||||
taskParameters));
|
||||
*/
|
||||
auto it = runners_.emplace(
|
||||
runID, std::make_shared<DAGRunner>(runID, executor_, logger_, dag,
|
||||
taskParameters));
|
||||
|
||||
if (!it.second)
|
||||
throw std::runtime_error("A DAGRun with the same ID is already running");
|
||||
auto runner = it.first->second;
|
||||
runnerPool_.addTask([runner, runID, this]() {
|
||||
runner->run();
|
||||
std::lock_guard<std::mutex> lock(this->runnerGuard_);
|
||||
this->runners_.extract(runID);
|
||||
});
|
||||
}
|
||||
|
||||
void Server::handleSetDAGRunState(const Pistache::Rest::Request &request,
|
||||
Pistache::Http::ResponseWriter response)
|
||||
{
|
||||
if (!handleAuth(request))
|
||||
return;
|
||||
|
||||
// TODO handle state transition
|
||||
DAGRunID runID = request.param(":runID").as<DAGRunID>();
|
||||
RunState newState = RunState::_from_string(
|
||||
request.param(":state").as<std::string>().c_str());
|
||||
|
||||
std::shared_ptr<DAGRunner> runner{nullptr};
|
||||
{
|
||||
std::lock_guard<std::mutex> lock(runnerGuard_);
|
||||
auto it = runners_.find(runID);
|
||||
if (runners_.find(runID) != runners_.end()) {
|
||||
runner = it->second;
|
||||
}
|
||||
}
|
||||
|
||||
if (runner) {
|
||||
switch (newState) {
|
||||
case RunState::PAUSED:
|
||||
case RunState::KILLED: {
|
||||
runner->stop(true, true);
|
||||
logger_.updateDAGRunState(runID, newState);
|
||||
break;
|
||||
}
|
||||
default: {
|
||||
REQ_RESPONSE(Method_Not_Allowed,
|
||||
std::string{"Cannot transition to state "} +
|
||||
newState._to_string());
|
||||
}
|
||||
}
|
||||
}
|
||||
else {
|
||||
switch (newState) {
|
||||
case RunState::QUEUED: {
|
||||
auto dagRun = logger_.getDAGRun(runID);
|
||||
auto dag =
|
||||
buildDAGFromTasks(dagRun.dagSpec.tasks, dagRun.taskStateChanges);
|
||||
dag.resetRunning();
|
||||
queueDAG_(runID, dag, dagRun.dagSpec.taskConfig);
|
||||
break;
|
||||
}
|
||||
default:
|
||||
REQ_RESPONSE(
|
||||
Method_Not_Allowed,
|
||||
std::string{"DAG not running, cannot transition to state "} +
|
||||
newState._to_string());
|
||||
}
|
||||
}
|
||||
REQ_RESPONSE(Ok, "");
|
||||
}
|
||||
|
||||
void Server::handleGetTask(const Pistache::Rest::Request &request,
|
||||
Pistache::Http::ResponseWriter response)
|
||||
{
|
||||
if (!handleAuth(request))
|
||||
return;
|
||||
|
||||
auto runID = request.param(":runID").as<DAGRunID>();
|
||||
auto taskName = request.param(":taskName").as<std::string>();
|
||||
|
||||
try {
|
||||
auto task = logger_.getTask(runID, taskName);
|
||||
response.send(Pistache::Http::Code::Ok, taskToJSON(task));
|
||||
}
|
||||
catch (std::exception &e) {
|
||||
REQ_RESPONSE(Not_Found, e.what());
|
||||
}
|
||||
}
|
||||
|
||||
void Server::handleGetTaskState(const Pistache::Rest::Request &request,
|
||||
Pistache::Http::ResponseWriter response)
|
||||
{
|
||||
if (!handleAuth(request))
|
||||
return;
|
||||
|
||||
auto runID = request.param(":runID").as<DAGRunID>();
|
||||
auto taskName = request.param(":taskName").as<std::string>();
|
||||
|
||||
try {
|
||||
auto state = logger_.getTaskState(runID, taskName);
|
||||
std::stringstream ss;
|
||||
ss << R"({ "runID": )" << runID << R"(, "taskName": )"
|
||||
<< std::quoted(taskName) << R"(, "state": )"
|
||||
<< std::quoted(state._to_string()) << '}';
|
||||
response.send(Pistache::Http::Code::Ok, ss.str());
|
||||
}
|
||||
catch (std::exception &e) {
|
||||
REQ_RESPONSE(Not_Found, e.what());
|
||||
}
|
||||
}
|
||||
|
||||
void Server::handleSetTaskState(const Pistache::Rest::Request &request,
|
||||
Pistache::Http::ResponseWriter response)
|
||||
{
|
||||
if (!handleAuth(request))
|
||||
return;
|
||||
|
||||
// TODO implement handling of task state
|
||||
auto runID = request.param(":runID").as<DAGRunID>();
|
||||
auto taskName = request.param(":taskName").as<std::string>();
|
||||
RunState state = RunState::_from_string(
|
||||
request.param(":state").as<std::string>().c_str());
|
||||
|
||||
try {
|
||||
logger_.updateTaskState(runID, taskName, state);
|
||||
response.send(Pistache::Http::Code::Ok, "");
|
||||
}
|
||||
catch (std::exception &e) {
|
||||
REQ_RESPONSE(Not_Found, e.what());
|
||||
}
|
||||
}
|
||||
|
||||
void Server::handleReady(const Pistache::Rest::Request &request,
|
||||
Pistache::Http::ResponseWriter response)
|
||||
{
|
||||
response.send(Pistache::Http::Code::Ok, R"({ "msg": "Ya like DAGs?"})");
|
||||
}
|
||||
|
||||
/*
|
||||
* handleAuth will check any auth methods and handle any responses in the
|
||||
* case of failed auth. If it returns false, callers should cease handling
|
||||
* the response
|
||||
*/
|
||||
bool Server::handleAuth(const Pistache::Rest::Request &request)
|
||||
{
|
||||
return true;
|
||||
}
|
||||
} // namespace daggy
|
||||
9
daggyd/tests/CMakeLists.txt
Normal file
9
daggyd/tests/CMakeLists.txt
Normal file
@@ -0,0 +1,9 @@
|
||||
project(daggyd_tests)
|
||||
|
||||
add_executable(${PROJECT_NAME} main.cpp
|
||||
# unit tests
|
||||
unit_server.cpp
|
||||
)
|
||||
target_link_libraries(${PROJECT_NAME} libdaggyd libdaggy stdc++fs Catch2::Catch2 curl)
|
||||
|
||||
add_test(${PROJECT_NAME} ${CMAKE_CURRENT_BINARY_DIR}/${PROJECT_NAME})
|
||||
15
daggyd/tests/main.cpp
Normal file
15
daggyd/tests/main.cpp
Normal file
@@ -0,0 +1,15 @@
|
||||
#include <iostream>
|
||||
|
||||
#include "daggy/DAG.hpp"
|
||||
|
||||
#define CATCH_CONFIG_MAIN
|
||||
|
||||
#include <catch2/catch.hpp>
|
||||
|
||||
TEST_CASE("Sanity tests", "[sanity]")
|
||||
{
|
||||
REQUIRE(1 == 1);
|
||||
}
|
||||
|
||||
// compile and run
|
||||
// g++ -std=c++17 -o test test.cpp && ./test
|
||||
382
daggyd/tests/unit_server.cpp
Normal file
382
daggyd/tests/unit_server.cpp
Normal file
@@ -0,0 +1,382 @@
|
||||
#include <curl/curl.h>
|
||||
#include <pistache/client.h>
|
||||
#include <rapidjson/document.h>
|
||||
#include <sys/stat.h>
|
||||
|
||||
#include <catch2/catch.hpp>
|
||||
#include <daggy/Serialization.hpp>
|
||||
#include <daggy/executors/task/ForkingTaskExecutor.hpp>
|
||||
#include <daggy/executors/task/NoopTaskExecutor.hpp>
|
||||
#include <daggy/loggers/dag_run/OStreamLogger.hpp>
|
||||
#include <daggyd/Server.hpp>
|
||||
#include <filesystem>
|
||||
#include <iostream>
|
||||
#include <thread>
|
||||
|
||||
namespace rj = rapidjson;
|
||||
|
||||
using namespace daggy;
|
||||
|
||||
#ifdef DEBUG_HTTP
|
||||
static int my_trace(CURL *handle, curl_infotype type, char *data, size_t size,
|
||||
void *userp)
|
||||
{
|
||||
const char *text;
|
||||
(void)handle; /* prevent compiler warning */
|
||||
(void)userp;
|
||||
|
||||
switch (type) {
|
||||
case CURLINFO_TEXT:
|
||||
fprintf(stderr, "== Info: %s", data);
|
||||
default: /* in case a new one is introduced to shock us */
|
||||
return 0;
|
||||
|
||||
case CURLINFO_HEADER_OUT:
|
||||
text = "=> Send header";
|
||||
break;
|
||||
case CURLINFO_DATA_OUT:
|
||||
text = "=> Send data";
|
||||
break;
|
||||
case CURLINFO_SSL_DATA_OUT:
|
||||
text = "=> Send SSL data";
|
||||
break;
|
||||
case CURLINFO_HEADER_IN:
|
||||
text = "<= Recv header";
|
||||
break;
|
||||
case CURLINFO_DATA_IN:
|
||||
text = "<= Recv data";
|
||||
break;
|
||||
case CURLINFO_SSL_DATA_IN:
|
||||
text = "<= Recv SSL data";
|
||||
break;
|
||||
}
|
||||
|
||||
std::cerr << "\n================== " << text
|
||||
<< " ==================" << std::endl
|
||||
<< data << std::endl;
|
||||
return 0;
|
||||
}
|
||||
#endif
|
||||
|
||||
enum HTTPCode : long
|
||||
{
|
||||
Ok = 200,
|
||||
Not_Found = 404
|
||||
};
|
||||
|
||||
struct HTTPResponse
|
||||
{
|
||||
HTTPCode code;
|
||||
std::string body;
|
||||
};
|
||||
|
||||
uint curlWriter(char *in, uint size, uint nmemb, std::stringstream *out)
|
||||
{
|
||||
uint r;
|
||||
r = size * nmemb;
|
||||
out->write(in, r);
|
||||
return r;
|
||||
}
|
||||
|
||||
HTTPResponse REQUEST(const std::string &url, const std::string &payload = "",
|
||||
const std::string &method = "GET")
|
||||
{
|
||||
HTTPResponse response;
|
||||
|
||||
CURL *curl;
|
||||
CURLcode res;
|
||||
struct curl_slist *headers = NULL;
|
||||
|
||||
curl_global_init(CURL_GLOBAL_ALL);
|
||||
|
||||
curl = curl_easy_init();
|
||||
if (curl) {
|
||||
std::stringstream buffer;
|
||||
|
||||
#ifdef DEBUG_HTTP
|
||||
curl_easy_setopt(curl, CURLOPT_DEBUGFUNCTION, my_trace);
|
||||
curl_easy_setopt(curl, CURLOPT_VERBOSE, 1L);
|
||||
#endif
|
||||
|
||||
curl_easy_setopt(curl, CURLOPT_URL, url.c_str());
|
||||
curl_easy_setopt(curl, CURLOPT_WRITEFUNCTION, curlWriter);
|
||||
curl_easy_setopt(curl, CURLOPT_WRITEDATA, &buffer);
|
||||
|
||||
if (!payload.empty()) {
|
||||
curl_easy_setopt(curl, CURLOPT_POSTFIELDSIZE, payload.size());
|
||||
curl_easy_setopt(curl, CURLOPT_POSTFIELDS, payload.c_str());
|
||||
headers = curl_slist_append(headers, "Content-Type: Application/Json");
|
||||
}
|
||||
curl_easy_setopt(curl, CURLOPT_CUSTOMREQUEST, method.c_str());
|
||||
headers = curl_slist_append(headers, "Expect:");
|
||||
curl_easy_setopt(curl, CURLOPT_HTTPHEADER, headers);
|
||||
|
||||
res = curl_easy_perform(curl);
|
||||
|
||||
if (res != CURLE_OK) {
|
||||
curl_easy_cleanup(curl);
|
||||
throw std::runtime_error(std::string{"CURL Failed: "} +
|
||||
curl_easy_strerror(res));
|
||||
}
|
||||
curl_easy_cleanup(curl);
|
||||
|
||||
curl_easy_getinfo(curl, CURLINFO_RESPONSE_CODE, &response.code);
|
||||
response.body = buffer.str();
|
||||
}
|
||||
|
||||
curl_global_cleanup();
|
||||
|
||||
return response;
|
||||
}
|
||||
|
||||
TEST_CASE("rest_endpoint", "[server_basic]")
|
||||
{
|
||||
std::stringstream ss;
|
||||
daggy::executors::task::ForkingTaskExecutor executor(10);
|
||||
daggy::loggers::dag_run::OStreamLogger logger(ss);
|
||||
Pistache::Address listenSpec("localhost", Pistache::Port(0));
|
||||
|
||||
const size_t nDAGRunners = 10, nWebThreads = 10;
|
||||
|
||||
daggy::Server server(listenSpec, logger, executor, nDAGRunners);
|
||||
server.init(nWebThreads);
|
||||
server.start();
|
||||
|
||||
const std::string host = "localhost:";
|
||||
const std::string baseURL = host + std::to_string(server.getPort());
|
||||
|
||||
SECTION("Ready Endpoint")
|
||||
{
|
||||
auto response = REQUEST(baseURL + "/ready");
|
||||
REQUIRE(response.code == HTTPCode::Ok);
|
||||
}
|
||||
|
||||
SECTION("Querying a non-existent dagrunid should fail ")
|
||||
{
|
||||
auto response = REQUEST(baseURL + "/v1/dagrun/100");
|
||||
REQUIRE(response.code != HTTPCode::Ok);
|
||||
}
|
||||
|
||||
SECTION("Simple DAGRun Submission")
|
||||
{
|
||||
std::string dagRun = R"({
|
||||
"tag": "unit_server",
|
||||
"parameters": { "FILE": [ "A", "B" ] },
|
||||
"tasks": {
|
||||
"touch": { "job": { "command": [ "/usr/bin/touch", "dagrun_{{FILE}}" ]} },
|
||||
"cat": { "job": { "command": [ "/usr/bin/cat", "dagrun_A", "dagrun_B" ]},
|
||||
"parents": [ "touch" ]
|
||||
}
|
||||
}
|
||||
})";
|
||||
|
||||
auto dagSpec = daggy::dagFromJSON(dagRun);
|
||||
|
||||
// Submit, and get the runID
|
||||
daggy::DAGRunID runID = 0;
|
||||
{
|
||||
auto response = REQUEST(baseURL + "/v1/dagrun/", dagRun, "POST");
|
||||
REQUIRE(response.code == HTTPCode::Ok);
|
||||
|
||||
rj::Document doc;
|
||||
daggy::checkRJParse(doc.Parse(response.body.c_str()));
|
||||
REQUIRE(doc.IsObject());
|
||||
REQUIRE(doc.HasMember("runID"));
|
||||
|
||||
runID = doc["runID"].GetUint64();
|
||||
}
|
||||
|
||||
// Ensure our runID shows up in the list of running DAGs
|
||||
{
|
||||
auto response = REQUEST(baseURL + "/v1/dagruns?all=1");
|
||||
REQUIRE(response.code == HTTPCode::Ok);
|
||||
|
||||
rj::Document doc;
|
||||
daggy::checkRJParse(doc.Parse(response.body.c_str()));
|
||||
REQUIRE(doc.IsArray());
|
||||
REQUIRE(doc.Size() >= 1);
|
||||
|
||||
// Ensure that our DAG is in the list and matches our given DAGRunID
|
||||
bool found = false;
|
||||
const auto &runs = doc.GetArray();
|
||||
for (size_t i = 0; i < runs.Size(); ++i) {
|
||||
const auto &run = runs[i];
|
||||
REQUIRE(run.IsObject());
|
||||
REQUIRE(run.HasMember("tag"));
|
||||
REQUIRE(run.HasMember("runID"));
|
||||
|
||||
std::string runName = run["tag"].GetString();
|
||||
if (runName == "unit_server") {
|
||||
REQUIRE(run["runID"].GetUint64() == runID);
|
||||
found = true;
|
||||
break;
|
||||
}
|
||||
}
|
||||
REQUIRE(found);
|
||||
}
|
||||
|
||||
// Ensure we can get one of our tasks
|
||||
{
|
||||
auto response = REQUEST(baseURL + "/v1/dagrun/" + std::to_string(runID) +
|
||||
"/task/cat_0");
|
||||
REQUIRE(response.code == HTTPCode::Ok);
|
||||
|
||||
rj::Document doc;
|
||||
daggy::checkRJParse(doc.Parse(response.body.c_str()));
|
||||
|
||||
REQUIRE_NOTHROW(daggy::taskFromJSON("cat", doc));
|
||||
auto task = daggy::taskFromJSON("cat", doc);
|
||||
|
||||
REQUIRE(task == dagSpec.tasks.at("cat"));
|
||||
}
|
||||
|
||||
// Wait until our DAG is complete
|
||||
bool complete = true;
|
||||
for (auto i = 0; i < 10; ++i) {
|
||||
auto response = REQUEST(baseURL + "/v1/dagrun/" + std::to_string(runID));
|
||||
REQUIRE(response.code == HTTPCode::Ok);
|
||||
rj::Document doc;
|
||||
daggy::checkRJParse(doc.Parse(response.body.c_str()));
|
||||
REQUIRE(doc.IsObject());
|
||||
|
||||
REQUIRE(doc.HasMember("taskStates"));
|
||||
const auto &taskStates = doc["taskStates"].GetObject();
|
||||
|
||||
size_t nStates = 0;
|
||||
for (auto it = taskStates.MemberBegin(); it != taskStates.MemberEnd();
|
||||
++it) {
|
||||
nStates++;
|
||||
}
|
||||
REQUIRE(nStates == 3);
|
||||
|
||||
complete = true;
|
||||
for (auto it = taskStates.MemberBegin(); it != taskStates.MemberEnd();
|
||||
++it) {
|
||||
std::string state = it->value.GetString();
|
||||
if (state != "COMPLETED") {
|
||||
complete = false;
|
||||
break;
|
||||
}
|
||||
}
|
||||
if (complete)
|
||||
break;
|
||||
std::this_thread::sleep_for(std::chrono::seconds(1));
|
||||
}
|
||||
REQUIRE(complete);
|
||||
|
||||
std::this_thread::sleep_for(std::chrono::seconds(2));
|
||||
for (const auto &pth : std::vector<fs::path>{"dagrun_A", "dagrun_B"}) {
|
||||
REQUIRE(fs::exists(pth));
|
||||
fs::remove(pth);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
TEST_CASE("Server cancels and resumes execution", "[server_resume]")
|
||||
{
|
||||
std::stringstream ss;
|
||||
daggy::executors::task::ForkingTaskExecutor executor(10);
|
||||
daggy::loggers::dag_run::OStreamLogger logger(ss);
|
||||
Pistache::Address listenSpec("localhost", Pistache::Port(0));
|
||||
|
||||
const size_t nDAGRunners = 10, nWebThreads = 10;
|
||||
|
||||
daggy::Server server(listenSpec, logger, executor, nDAGRunners);
|
||||
server.init(nWebThreads);
|
||||
server.start();
|
||||
|
||||
const std::string host = "localhost:";
|
||||
const std::string baseURL = host + std::to_string(server.getPort());
|
||||
|
||||
SECTION("Cancel / Resume DAGRun")
|
||||
{
|
||||
std::string dagRunJSON = R"({
|
||||
"tag": "unit_server",
|
||||
"tasks": {
|
||||
"touch_A": { "job": { "command": [ "/usr/bin/touch", "resume_touch_a" ]}, "children": ["touch_C"] },
|
||||
"sleep_B": { "job": { "command": [ "/usr/bin/sleep", "3" ]}, "children": ["touch_C"] },
|
||||
"touch_C": { "job": { "command": [ "/usr/bin/touch", "resume_touch_c" ]} }
|
||||
}
|
||||
})";
|
||||
|
||||
auto dagSpec = daggy::dagFromJSON(dagRunJSON);
|
||||
|
||||
// Submit, and get the runID
|
||||
daggy::DAGRunID runID;
|
||||
{
|
||||
auto response = REQUEST(baseURL + "/v1/dagrun/", dagRunJSON, "POST");
|
||||
REQUIRE(response.code == HTTPCode::Ok);
|
||||
|
||||
rj::Document doc;
|
||||
daggy::checkRJParse(doc.Parse(response.body.c_str()));
|
||||
REQUIRE(doc.IsObject());
|
||||
REQUIRE(doc.HasMember("runID"));
|
||||
|
||||
runID = doc["runID"].GetUint64();
|
||||
}
|
||||
|
||||
std::this_thread::sleep_for(1s);
|
||||
|
||||
// Stop the current run
|
||||
{
|
||||
auto response = REQUEST(
|
||||
baseURL + "/v1/dagrun/" + std::to_string(runID) + "/state/KILLED", "",
|
||||
"PATCH");
|
||||
REQUIRE(response.code == HTTPCode::Ok);
|
||||
REQUIRE(logger.getDAGRunState(runID) == +daggy::RunState::KILLED);
|
||||
}
|
||||
|
||||
// Verify that the run still exists
|
||||
{
|
||||
auto dagRun = logger.getDAGRun(runID);
|
||||
REQUIRE(dagRun.taskRunStates.at("touch_A_0") ==
|
||||
+daggy::RunState::COMPLETED);
|
||||
REQUIRE(fs::exists("resume_touch_a"));
|
||||
|
||||
REQUIRE(dagRun.taskRunStates.at("sleep_B_0") ==
|
||||
+daggy::RunState::ERRORED);
|
||||
REQUIRE(dagRun.taskRunStates.at("touch_C_0") == +daggy::RunState::QUEUED);
|
||||
}
|
||||
|
||||
// Set the errored task state
|
||||
{
|
||||
auto url = baseURL + "/v1/dagrun/" + std::to_string(runID) +
|
||||
"/task/sleep_B_0/state/QUEUED";
|
||||
auto response = REQUEST(url, "", "PATCH");
|
||||
REQUIRE(response.code == HTTPCode::Ok);
|
||||
REQUIRE(logger.getTaskState(runID, "sleep_B_0") ==
|
||||
+daggy::RunState::QUEUED);
|
||||
}
|
||||
|
||||
// Resume
|
||||
{
|
||||
struct stat s;
|
||||
|
||||
lstat("resume_touch_A", &s);
|
||||
auto preMTime = s.st_mtim.tv_sec;
|
||||
|
||||
auto response = REQUEST(
|
||||
baseURL + "/v1/dagrun/" + std::to_string(runID) + "/state/QUEUED", "",
|
||||
"PATCH");
|
||||
|
||||
// Wait for run to complete
|
||||
std::this_thread::sleep_for(5s);
|
||||
REQUIRE(logger.getDAGRunState(runID) == +daggy::RunState::COMPLETED);
|
||||
|
||||
REQUIRE(fs::exists("resume_touch_c"));
|
||||
REQUIRE(fs::exists("resume_touch_a"));
|
||||
|
||||
for (const auto &[taskName, task] : dagSpec.tasks) {
|
||||
REQUIRE(logger.getTaskState(runID, taskName + "_0") ==
|
||||
+daggy::RunState::COMPLETED);
|
||||
}
|
||||
|
||||
// Ensure "touch_A" wasn't run again
|
||||
lstat("resume_touch_A", &s);
|
||||
auto postMTime = s.st_mtim.tv_sec;
|
||||
REQUIRE(preMTime == postMTime);
|
||||
}
|
||||
}
|
||||
|
||||
server.shutdown();
|
||||
}
|
||||
Reference in New Issue
Block a user