Large re-organization to split daggyd away from the core libdaggy.

This paves the way for implementing daggys and other utilities.

Squashed commit of the following:

commit 1f77239ab3c9e44d190eef94531a39501c8c4dfe
Author: Ian Roddis <gitlab@ie2r.com>
Date:   Mon Oct 18 16:25:02 2021 -0300

    Adding README, stdout support for daggyd logging

commit c2c237224e84a3be68aaa597ce98af1365e74a13
Author: Ian Roddis <gitlab@ie2r.com>
Date:   Mon Oct 18 16:10:29 2021 -0300

    removing old daggyd

commit cfea2baf61ca10c535801c5a391d2d525a1a2d04
Author: Ian Roddis <gitlab@ie2r.com>
Date:   Mon Oct 18 16:10:09 2021 -0300

    Moving tests into their sub-project folders

commit e41ca42069bea1db16dd76b6684a3f692fef6b15
Author: Ian Roddis <gitlab@ie2r.com>
Date:   Mon Oct 18 15:57:40 2021 -0300

    Splitting out daggyd from libdaggy

commit be97b146c1d2446f5c03cb78707e921f18c60bd8
Author: Ian Roddis <gitlab@ie2r.com>
Date:   Mon Oct 18 15:56:55 2021 -0300

    Splitting out daggyd from libdaggy

commit cb61e140e9d6d8832d61fb7037fd4c0ff6edad00
Author: Ian Roddis <gitlab@ie2r.com>
Date:   Mon Oct 18 15:49:47 2021 -0300

    moving daggy to libdaggy
This commit is contained in:
Ian Roddis
2021-10-18 16:28:40 -03:00
parent 612bc8af8a
commit 470a6f2bb7
59 changed files with 586 additions and 52 deletions

View File

@@ -46,6 +46,5 @@ set(CMAKE_INSTALL_RPATH_USE_LINK_PATH TRUE)
enable_testing()
add_subdirectory(daggy)
add_subdirectory(tests)
add_subdirectory(utils)
add_subdirectory(libdaggy)
add_subdirectory(daggyd)

3
daggyd/CMakeLists.txt Normal file
View File

@@ -0,0 +1,3 @@
add_subdirectory(libdaggyd)
add_subdirectory(daggyd)
add_subdirectory(tests)

127
daggyd/README.md Normal file
View File

@@ -0,0 +1,127 @@
# Daggy Daemon
`daggyd` is the REST server process that handles receiving and running DAG specs.
# Running it
```bash
daggyd # That's it, will listen on 127.0.0.1:2503 , and run with a local executor
daggyd -d # Daemonize
daggyd --config FILE # Run with a config file
```
# Config Files
```json
{
"web-threads": 50,
"dag-threads": 50,
"port": 2503,
"ip": "localhost",
"logger": {
"name": "LoggerName",
"config": {
...
}
},
"executor": {
"name": "ExecutorName"
"config": {
...
}
}
}
```
## Loggers
### OStreamLogger
OStreamLogger doesn't persist data, but can write even updates to a file or
stdout.
The config for OStreamLogger looks like this:
```
{
...
"logger": {
"name": "OStreamLogger",
"config": {
"file": "/path/to/file"
}
}
...
}
```
If `file` is equal to `"-"`, then the logger will print events to stdout. This configuration
is the default if no logger is specified at all.
### RedisLogger
RedisLogger stores state in a [Redis](https://redis.io) instance.
The config for OStreamLogger looks like this (along with default values):
```
{
...
"logger": {
"name": "RedisLogger",
"config": {
"prefix": "daggy",
"host": "localhost",
"port": 6379
}
}
...
}
```
The `prefix` attribute is used to distinguish daggy instances. All keys will be prefixed with
the value of `prefix`.
## Executors
### ForkingTaskExecutor
ForkingTaskExecutor does pretty much what the name implies: it will execute tasks by
forking on the local machine.
It's config with default values looks like:
```
{
...
"executor": {
"name": "ForkingTaskExecutor",
"config": {
"threads": 10
}
}
...
}
```
If no executor is sepcified in the config, this is the executor used.
### SlurmTaskExecutor
The SlurmTaskExecutor will execute tasks on a [slurm](https://slurm.schedmd.com) cluster. It relies
on the slurm config to manage any parallelism limits and quotas.
It's config with default values looks like:
```
{
...
"executor": {
"name": "ForkingTaskExecutor",
"config": { }
}
...
}
```

View File

@@ -0,0 +1,4 @@
project(daggyd)
file(GLOB SOURCES daggyd.cpp)
add_executable(${PROJECT_NAME} ${SOURCES})
target_link_libraries(${PROJECT_NAME} argparse libdaggyd libdaggy)

View File

@@ -5,7 +5,7 @@
#include <atomic>
#include <csignal>
#include <daggy/Serialization.hpp>
#include <daggy/Server.hpp>
#include <daggyd/Server.hpp>
#include <fstream>
#include <iostream>
@@ -14,12 +14,11 @@
#include <daggy/executors/task/SlurmTaskExecutor.hpp>
// Add loggers here
#include <daggy/executors/task/TaskExecutor.hpp>
#include <daggy/loggers/dag_run/DAGRunLogger.hpp>
#include <daggy/loggers/dag_run/OStreamLogger.hpp>
#include <daggy/loggers/dag_run/RedisLogger.hpp>
#include "daggy/executors/task/TaskExecutor.hpp"
#include "daggy/loggers/dag_run/DAGRunLogger.hpp"
namespace rj = rapidjson;
static std::atomic<bool> running{true};
@@ -126,6 +125,10 @@ std::unique_ptr<dl::DAGRunLogger> loggerFactory(const rj::Value &config)
const auto &logConfig = logConf["config"];
if (name == "OStreamLogger") {
if (logConfig.HasMember("file")) {
std::string fn = logConfig["file"].GetString();
if (fn == "-")
return std::make_unique<dl::OStreamLogger>(std::cout);
std::ofstream ofh(logConfig["file"].GetString());
return std::make_unique<dl::OStreamLogger>(ofh);
}

View File

@@ -0,0 +1,8 @@
project(libdaggyd)
add_library(${PROJECT_NAME} STATIC)
target_include_directories(${PROJECT_NAME} PUBLIC include)
target_link_libraries(${PROJECT_NAME} libdaggy)
add_subdirectory(src)

View File

@@ -4,13 +4,12 @@
#include <pistache/endpoint.h>
#include <pistache/http.h>
#include <daggy/DAGRunner.hpp>
#include <daggy/ThreadPool.hpp>
#include <daggy/executors/task/TaskExecutor.hpp>
#include <daggy/loggers/dag_run/DAGRunLogger.hpp>
#include <filesystem>
#include "DAGRunner.hpp"
#include "ThreadPool.hpp"
#include "executors/task/TaskExecutor.hpp"
#include "loggers/dag_run/DAGRunLogger.hpp"
#define DAGGY_REST_HANDLER(func) \
void func(const Pistache::Rest::Request &request, \
Pistache::Http::ResponseWriter response);

View File

@@ -0,0 +1,3 @@
target_sources(${PROJECT_NAME} PRIVATE
Server.cpp
)

View File

@@ -1,8 +1,8 @@
#include <enum.h>
#include <daggy/Serialization.hpp>
#include <daggy/Server.hpp>
#include <daggy/Utilities.hpp>
#include <daggyd/Server.hpp>
#include <iomanip>
#include <mutex>
#include <numeric>

View File

@@ -0,0 +1,9 @@
project(daggyd_tests)
add_executable(${PROJECT_NAME} main.cpp
# unit tests
unit_server.cpp
)
target_link_libraries(${PROJECT_NAME} libdaggyd libdaggy stdc++fs Catch2::Catch2 curl)
add_test(${PROJECT_NAME} ${CMAKE_CURRENT_BINARY_DIR}/${PROJECT_NAME})

View File

@@ -0,0 +1,382 @@
#include <curl/curl.h>
#include <pistache/client.h>
#include <rapidjson/document.h>
#include <sys/stat.h>
#include <catch2/catch.hpp>
#include <daggy/Serialization.hpp>
#include <daggy/executors/task/ForkingTaskExecutor.hpp>
#include <daggy/executors/task/NoopTaskExecutor.hpp>
#include <daggy/loggers/dag_run/OStreamLogger.hpp>
#include <daggyd/Server.hpp>
#include <filesystem>
#include <iostream>
#include <thread>
namespace rj = rapidjson;
using namespace daggy;
#ifdef DEBUG_HTTP
static int my_trace(CURL *handle, curl_infotype type, char *data, size_t size,
void *userp)
{
const char *text;
(void)handle; /* prevent compiler warning */
(void)userp;
switch (type) {
case CURLINFO_TEXT:
fprintf(stderr, "== Info: %s", data);
default: /* in case a new one is introduced to shock us */
return 0;
case CURLINFO_HEADER_OUT:
text = "=> Send header";
break;
case CURLINFO_DATA_OUT:
text = "=> Send data";
break;
case CURLINFO_SSL_DATA_OUT:
text = "=> Send SSL data";
break;
case CURLINFO_HEADER_IN:
text = "<= Recv header";
break;
case CURLINFO_DATA_IN:
text = "<= Recv data";
break;
case CURLINFO_SSL_DATA_IN:
text = "<= Recv SSL data";
break;
}
std::cerr << "\n================== " << text
<< " ==================" << std::endl
<< data << std::endl;
return 0;
}
#endif
enum HTTPCode : long
{
Ok = 200,
Not_Found = 404
};
struct HTTPResponse
{
HTTPCode code;
std::string body;
};
uint curlWriter(char *in, uint size, uint nmemb, std::stringstream *out)
{
uint r;
r = size * nmemb;
out->write(in, r);
return r;
}
HTTPResponse REQUEST(const std::string &url, const std::string &payload = "",
const std::string &method = "GET")
{
HTTPResponse response;
CURL *curl;
CURLcode res;
struct curl_slist *headers = NULL;
curl_global_init(CURL_GLOBAL_ALL);
curl = curl_easy_init();
if (curl) {
std::stringstream buffer;
#ifdef DEBUG_HTTP
curl_easy_setopt(curl, CURLOPT_DEBUGFUNCTION, my_trace);
curl_easy_setopt(curl, CURLOPT_VERBOSE, 1L);
#endif
curl_easy_setopt(curl, CURLOPT_URL, url.c_str());
curl_easy_setopt(curl, CURLOPT_WRITEFUNCTION, curlWriter);
curl_easy_setopt(curl, CURLOPT_WRITEDATA, &buffer);
if (!payload.empty()) {
curl_easy_setopt(curl, CURLOPT_POSTFIELDSIZE, payload.size());
curl_easy_setopt(curl, CURLOPT_POSTFIELDS, payload.c_str());
headers = curl_slist_append(headers, "Content-Type: Application/Json");
}
curl_easy_setopt(curl, CURLOPT_CUSTOMREQUEST, method.c_str());
headers = curl_slist_append(headers, "Expect:");
curl_easy_setopt(curl, CURLOPT_HTTPHEADER, headers);
res = curl_easy_perform(curl);
if (res != CURLE_OK) {
curl_easy_cleanup(curl);
throw std::runtime_error(std::string{"CURL Failed: "} +
curl_easy_strerror(res));
}
curl_easy_cleanup(curl);
curl_easy_getinfo(curl, CURLINFO_RESPONSE_CODE, &response.code);
response.body = buffer.str();
}
curl_global_cleanup();
return response;
}
TEST_CASE("rest_endpoint", "[server_basic]")
{
std::stringstream ss;
daggy::executors::task::ForkingTaskExecutor executor(10);
daggy::loggers::dag_run::OStreamLogger logger(ss);
Pistache::Address listenSpec("localhost", Pistache::Port(0));
const size_t nDAGRunners = 10, nWebThreads = 10;
daggy::Server server(listenSpec, logger, executor, nDAGRunners);
server.init(nWebThreads);
server.start();
const std::string host = "localhost:";
const std::string baseURL = host + std::to_string(server.getPort());
SECTION("Ready Endpoint")
{
auto response = REQUEST(baseURL + "/ready");
REQUIRE(response.code == HTTPCode::Ok);
}
SECTION("Querying a non-existent dagrunid should fail ")
{
auto response = REQUEST(baseURL + "/v1/dagrun/100");
REQUIRE(response.code != HTTPCode::Ok);
}
SECTION("Simple DAGRun Submission")
{
std::string dagRun = R"({
"tag": "unit_server",
"parameters": { "FILE": [ "A", "B" ] },
"tasks": {
"touch": { "job": { "command": [ "/usr/bin/touch", "dagrun_{{FILE}}" ]} },
"cat": { "job": { "command": [ "/usr/bin/cat", "dagrun_A", "dagrun_B" ]},
"parents": [ "touch" ]
}
}
})";
auto dagSpec = daggy::dagFromJSON(dagRun);
// Submit, and get the runID
daggy::DAGRunID runID = 0;
{
auto response = REQUEST(baseURL + "/v1/dagrun/", dagRun, "POST");
REQUIRE(response.code == HTTPCode::Ok);
rj::Document doc;
daggy::checkRJParse(doc.Parse(response.body.c_str()));
REQUIRE(doc.IsObject());
REQUIRE(doc.HasMember("runID"));
runID = doc["runID"].GetUint64();
}
// Ensure our runID shows up in the list of running DAGs
{
auto response = REQUEST(baseURL + "/v1/dagruns?all=1");
REQUIRE(response.code == HTTPCode::Ok);
rj::Document doc;
daggy::checkRJParse(doc.Parse(response.body.c_str()));
REQUIRE(doc.IsArray());
REQUIRE(doc.Size() >= 1);
// Ensure that our DAG is in the list and matches our given DAGRunID
bool found = false;
const auto &runs = doc.GetArray();
for (size_t i = 0; i < runs.Size(); ++i) {
const auto &run = runs[i];
REQUIRE(run.IsObject());
REQUIRE(run.HasMember("tag"));
REQUIRE(run.HasMember("runID"));
std::string runName = run["tag"].GetString();
if (runName == "unit_server") {
REQUIRE(run["runID"].GetUint64() == runID);
found = true;
break;
}
}
REQUIRE(found);
}
// Ensure we can get one of our tasks
{
auto response = REQUEST(baseURL + "/v1/dagrun/" + std::to_string(runID) +
"/task/cat_0");
REQUIRE(response.code == HTTPCode::Ok);
rj::Document doc;
daggy::checkRJParse(doc.Parse(response.body.c_str()));
REQUIRE_NOTHROW(daggy::taskFromJSON("cat", doc));
auto task = daggy::taskFromJSON("cat", doc);
REQUIRE(task == dagSpec.tasks.at("cat"));
}
// Wait until our DAG is complete
bool complete = true;
for (auto i = 0; i < 10; ++i) {
auto response = REQUEST(baseURL + "/v1/dagrun/" + std::to_string(runID));
REQUIRE(response.code == HTTPCode::Ok);
rj::Document doc;
daggy::checkRJParse(doc.Parse(response.body.c_str()));
REQUIRE(doc.IsObject());
REQUIRE(doc.HasMember("taskStates"));
const auto &taskStates = doc["taskStates"].GetObject();
size_t nStates = 0;
for (auto it = taskStates.MemberBegin(); it != taskStates.MemberEnd();
++it) {
nStates++;
}
REQUIRE(nStates == 3);
complete = true;
for (auto it = taskStates.MemberBegin(); it != taskStates.MemberEnd();
++it) {
std::string state = it->value.GetString();
if (state != "COMPLETED") {
complete = false;
break;
}
}
if (complete)
break;
std::this_thread::sleep_for(std::chrono::seconds(1));
}
REQUIRE(complete);
std::this_thread::sleep_for(std::chrono::seconds(2));
for (const auto &pth : std::vector<fs::path>{"dagrun_A", "dagrun_B"}) {
REQUIRE(fs::exists(pth));
fs::remove(pth);
}
}
}
TEST_CASE("Server cancels and resumes execution", "[server_resume]")
{
std::stringstream ss;
daggy::executors::task::ForkingTaskExecutor executor(10);
daggy::loggers::dag_run::OStreamLogger logger(ss);
Pistache::Address listenSpec("localhost", Pistache::Port(0));
const size_t nDAGRunners = 10, nWebThreads = 10;
daggy::Server server(listenSpec, logger, executor, nDAGRunners);
server.init(nWebThreads);
server.start();
const std::string host = "localhost:";
const std::string baseURL = host + std::to_string(server.getPort());
SECTION("Cancel / Resume DAGRun")
{
std::string dagRunJSON = R"({
"tag": "unit_server",
"tasks": {
"touch_A": { "job": { "command": [ "/usr/bin/touch", "resume_touch_a" ]}, "children": ["touch_C"] },
"sleep_B": { "job": { "command": [ "/usr/bin/sleep", "3" ]}, "children": ["touch_C"] },
"touch_C": { "job": { "command": [ "/usr/bin/touch", "resume_touch_c" ]} }
}
})";
auto dagSpec = daggy::dagFromJSON(dagRunJSON);
// Submit, and get the runID
daggy::DAGRunID runID;
{
auto response = REQUEST(baseURL + "/v1/dagrun/", dagRunJSON, "POST");
REQUIRE(response.code == HTTPCode::Ok);
rj::Document doc;
daggy::checkRJParse(doc.Parse(response.body.c_str()));
REQUIRE(doc.IsObject());
REQUIRE(doc.HasMember("runID"));
runID = doc["runID"].GetUint64();
}
std::this_thread::sleep_for(1s);
// Stop the current run
{
auto response = REQUEST(
baseURL + "/v1/dagrun/" + std::to_string(runID) + "/state/KILLED", "",
"PATCH");
REQUIRE(response.code == HTTPCode::Ok);
REQUIRE(logger.getDAGRunState(runID) == +daggy::RunState::KILLED);
}
// Verify that the run still exists
{
auto dagRun = logger.getDAGRun(runID);
REQUIRE(dagRun.taskRunStates.at("touch_A_0") ==
+daggy::RunState::COMPLETED);
REQUIRE(fs::exists("resume_touch_a"));
REQUIRE(dagRun.taskRunStates.at("sleep_B_0") ==
+daggy::RunState::ERRORED);
REQUIRE(dagRun.taskRunStates.at("touch_C_0") == +daggy::RunState::QUEUED);
}
// Set the errored task state
{
auto url = baseURL + "/v1/dagrun/" + std::to_string(runID) +
"/task/sleep_B_0/state/QUEUED";
auto response = REQUEST(url, "", "PATCH");
REQUIRE(response.code == HTTPCode::Ok);
REQUIRE(logger.getTaskState(runID, "sleep_B_0") ==
+daggy::RunState::QUEUED);
}
// Resume
{
struct stat s;
lstat("resume_touch_A", &s);
auto preMTime = s.st_mtim.tv_sec;
auto response = REQUEST(
baseURL + "/v1/dagrun/" + std::to_string(runID) + "/state/QUEUED", "",
"PATCH");
// Wait for run to complete
std::this_thread::sleep_for(5s);
REQUIRE(logger.getDAGRunState(runID) == +daggy::RunState::COMPLETED);
REQUIRE(fs::exists("resume_touch_c"));
REQUIRE(fs::exists("resume_touch_a"));
for (const auto &[taskName, task] : dagSpec.tasks) {
REQUIRE(logger.getTaskState(runID, taskName + "_0") ==
+daggy::RunState::COMPLETED);
}
// Ensure "touch_A" wasn't run again
lstat("resume_touch_A", &s);
auto postMTime = s.st_mtim.tv_sec;
REQUIRE(preMTime == postMTime);
}
}
server.shutdown();
}

View File

@@ -14,3 +14,4 @@ target_include_directories(${PROJECT_NAME} PUBLIC include)
target_link_libraries(${PROJECT_NAME} pistache pthread rapidjson better-enums)
add_subdirectory(src)
add_subdirectory(tests)

View File

@@ -1,6 +1,5 @@
target_sources(${PROJECT_NAME} PRIVATE
Serialization.cpp
Server.cpp
Utilities.cpp
DAGRunner.cpp
)

View File

@@ -0,0 +1,20 @@
project(libdaggy_tests)
add_executable(${PROJECT_NAME} main.cpp
# unit tests
unit_dag.cpp
unit_dagrunner.cpp
unit_dagrun_loggers.cpp
unit_executor_forkingexecutor.cpp
unit_executor_slurmexecutor.cpp
unit_serialization.cpp
unit_threadpool.cpp
unit_utilities.cpp
# integration tests
int_basic.cpp
# Performance checks
perf_dag.cpp
)
target_link_libraries(${PROJECT_NAME} libdaggy stdc++fs Catch2::Catch2)
add_test(${PROJECT_NAME} ${CMAKE_CURRENT_BINARY_DIR}/${PROJECT_NAME})

15
libdaggy/tests/main.cpp Normal file
View File

@@ -0,0 +1,15 @@
#include <iostream>
#include "daggy/DAG.hpp"
#define CATCH_CONFIG_MAIN
#include <catch2/catch.hpp>
TEST_CASE("Sanity tests", "[sanity]")
{
REQUIRE(1 == 1);
}
// compile and run
// g++ -std=c++17 -o test test.cpp && ./test

View File

@@ -1,33 +0,0 @@
add_executable(unit_tests main.cpp
# unit tests
unit_dag.cpp
unit_dagrunner.cpp
unit_dagrun_loggers.cpp
unit_executor_forkingexecutor.cpp
unit_executor_slurmexecutor.cpp
unit_serialization.cpp
unit_server.cpp
unit_threadpool.cpp
unit_utilities.cpp
# integration tests
int_basic.cpp
# Performance checks
perf_dag.cpp
)
target_link_libraries(unit_tests libdaggy stdc++fs Catch2::Catch2 curl)
add_executable(integration_tests main.cpp
# unit tests
int_basic.cpp
)
target_link_libraries(integration_tests libdaggy stdc++fs Catch2::Catch2 curl)
add_executable(perf_tests main.cpp
# Performance checks
perf_dag.cpp
)
target_link_libraries(perf_tests libdaggy stdc++fs Catch2::Catch2 curl)
add_test(UNIT_TESTS ${CMAKE_BINARY_DIR}/tests/unit_tests)
add_test(INT_TESTS ${CMAKE_BINARY_DIR}/tests/integration_tests)
add_test(PERF_TESTS ${CMAKE_BINARY_DIR}/tests/perf_tests)

View File

@@ -1,2 +1 @@
add_subdirectory(daggyd)
add_subdirectory(daggyc)

View File

@@ -1,4 +0,0 @@
project(daggyd)
file(GLOB SOURCES *.cpp)
add_executable(${PROJECT_NAME} ${SOURCES})
target_link_libraries(${PROJECT_NAME} pistache stdc++fs rapidjson argparse libdaggy)