Adding support for remote execution daemons.
Squashed commit of the following: commit 69d5ef7a256b86a86d46e5ae374c00fded1497ea Author: Ian Roddis <tech@kinesin.ca> Date: Thu Dec 16 12:15:55 2021 -0400 Updating readme commit 94a9f676d0f9cc0b55cdc18c4927eaea40d82c77 Author: Ian Roddis <tech@kinesin.ca> Date: Thu Dec 16 12:05:36 2021 -0400 Fixing serialization of attempt records when querying entire dag commit 945e5f90b24abf07c9af1bc4c6bbcb33e93b8069 Author: Ian Roddis <tech@kinesin.ca> Date: Thu Dec 16 11:37:59 2021 -0400 Compiles cleanly... commit 8b23e46081d47fb80dc1a2d998fc6dc4bbf301a8 Author: Ian Roddis <tech@kinesin.ca> Date: Thu Dec 16 10:43:03 2021 -0400 Adding in missing source file to cmake build list commit 6d10d9791206e2bc15788beadeea580b8e43a853 Author: Ian Roddis <tech@kinesin.ca> Date: Thu Dec 16 10:41:43 2021 -0400 Adding new executors commit 42a2c67f4d6ae99df95d917c8621d78cd99837a1 Author: Ian Roddis <tech@kinesin.ca> Date: Thu Dec 16 10:27:14 2021 -0400 Fixing missing curl cmake dependency commit 394bc4c5d51ecee7bf14712f719c8bf7e97fb0fa Author: Ian Roddis <tech@kinesin.ca> Date: Thu Dec 16 10:21:58 2021 -0400 Fixing missing curl cmake dependency commit dd9efc8e7e7770ea1bcbccb70a1af9cfcff0414c Author: Ian Roddis <tech@kinesin.ca> Date: Wed Dec 15 17:15:38 2021 -0400 Checkpointing progress commit 3b3b55d6037bb96e46de6763f486f4ecb92fe6a0 Author: Ian Roddis <tech@kinesin.ca> Date: Wed Dec 15 14:21:18 2021 -0400 updating readme commit 303027c11452941b2a0c0d1b04ac5942e79efd74 Author: Ian Roddis <tech@kinesin.ca> Date: Wed Dec 15 14:17:16 2021 -0400 Namespacing daggyd Adding more error checking around deserialization of parameters Adding tests for runner agent commit c592eaeba12e2a449bae401e8c1d9ed236416d52 Author: Ian Roddis <tech@kinesin.ca> Date: Wed Dec 15 11:20:21 2021 -0400 Checkpointing work commit fb1862d1cefe2b53a98659cce3c8c73d88bf5d84 Author: Ian Roddis <tech@kinesin.ca> Date: Wed Dec 15 09:52:29 2021 -0400 Copying daggyd for daggyr template, adding in basic routes
This commit is contained in:
@@ -10,6 +10,7 @@
|
||||
#include <iostream>
|
||||
|
||||
// Add executors here
|
||||
#include <daggy/executors/task/DaggyRunnerTaskExecutor.hpp>
|
||||
#include <daggy/executors/task/ForkingTaskExecutor.hpp>
|
||||
#include <daggy/executors/task/SlurmTaskExecutor.hpp>
|
||||
|
||||
@@ -177,6 +178,27 @@ std::unique_ptr<de::TaskExecutor> executorFactory(const rj::Value &config)
|
||||
else if (name == "SlurmTaskExecutor") {
|
||||
return std::make_unique<de::SlurmTaskExecutor>();
|
||||
}
|
||||
else if (name == "DaggyRunnerTaskExecutor") {
|
||||
if (!execConfig.HasMember("runners"))
|
||||
throw std::runtime_error(
|
||||
"DaggyRunnerExecutor config needs at least one remote runner");
|
||||
|
||||
auto exe = std::make_unique<de::DaggyRunnerTaskExecutor>();
|
||||
|
||||
const auto &runners = execConfig["runners"];
|
||||
if (!runners.IsArray()) {
|
||||
throw std::runtime_error(
|
||||
"DaggyRunnerExecutor runners must be an array of urls");
|
||||
|
||||
for (size_t i = 0; i < runners.Size(); ++i) {
|
||||
if (!runners[i].IsString())
|
||||
throw std::runtime_error(
|
||||
"DaggyRunnerExecutor runners must be an array of urls");
|
||||
exe->addRunner(runners[i].GetString());
|
||||
}
|
||||
return exe;
|
||||
}
|
||||
}
|
||||
else
|
||||
throw std::runtime_error("Unknown executor type: " + name);
|
||||
}
|
||||
@@ -246,7 +268,7 @@ int main(int argc, char **argv)
|
||||
|
||||
Pistache::Address listenSpec(listenIP, listenPort);
|
||||
|
||||
daggy::Server server(listenSpec, *logger, *executor, dagThreads);
|
||||
daggy::daggyd::Server server(listenSpec, *logger, *executor, dagThreads);
|
||||
server.init(webThreads);
|
||||
server.start();
|
||||
|
||||
|
||||
@@ -16,7 +16,7 @@
|
||||
|
||||
namespace fs = std::filesystem;
|
||||
|
||||
namespace daggy {
|
||||
namespace daggy::daggyd {
|
||||
class Server
|
||||
{
|
||||
public:
|
||||
@@ -64,4 +64,4 @@ namespace daggy {
|
||||
std::mutex runnerGuard_;
|
||||
std::unordered_map<DAGRunID, std::shared_ptr<DAGRunner>> runners_;
|
||||
};
|
||||
} // namespace daggy
|
||||
} // namespace daggy::daggyd
|
||||
|
||||
@@ -18,7 +18,7 @@
|
||||
|
||||
using namespace Pistache;
|
||||
|
||||
namespace daggy {
|
||||
namespace daggy::daggyd {
|
||||
void Server::init(size_t threads)
|
||||
{
|
||||
auto opts = Http::Endpoint::options()
|
||||
@@ -305,14 +305,7 @@ namespace daggy {
|
||||
else {
|
||||
ss << ',';
|
||||
}
|
||||
ss << '{' << R"("startTime":)"
|
||||
<< std::quoted(timePointToString(attempt.startTime)) << ','
|
||||
<< R"("stopTime":)"
|
||||
<< std::quoted(timePointToString(attempt.stopTime)) << ','
|
||||
<< R"("rc":)" << attempt.rc << ',' << R"("outputLog":)"
|
||||
<< std::quoted(attempt.outputLog) << ',' << R"("errorLog":)"
|
||||
<< std::quoted(attempt.errorLog) << ',' << R"("executorLog":)"
|
||||
<< std::quoted(attempt.executorLog) << '}';
|
||||
ss << attemptRecordToJSON(attempt);
|
||||
}
|
||||
ss << ']';
|
||||
}
|
||||
@@ -511,4 +504,4 @@ namespace daggy {
|
||||
{
|
||||
return true;
|
||||
}
|
||||
} // namespace daggy
|
||||
} // namespace daggy::daggyd
|
||||
|
||||
@@ -17,118 +17,6 @@ namespace rj = rapidjson;
|
||||
|
||||
using namespace daggy;
|
||||
|
||||
#ifdef DEBUG_HTTP
|
||||
static int my_trace(CURL *handle, curl_infotype type, char *data, size_t size,
|
||||
void *userp)
|
||||
{
|
||||
const char *text;
|
||||
(void)handle; /* prevent compiler warning */
|
||||
(void)userp;
|
||||
|
||||
switch (type) {
|
||||
case CURLINFO_TEXT:
|
||||
fprintf(stderr, "== Info: %s", data);
|
||||
default: /* in case a new one is introduced to shock us */
|
||||
return 0;
|
||||
|
||||
case CURLINFO_HEADER_OUT:
|
||||
text = "=> Send header";
|
||||
break;
|
||||
case CURLINFO_DATA_OUT:
|
||||
text = "=> Send data";
|
||||
break;
|
||||
case CURLINFO_SSL_DATA_OUT:
|
||||
text = "=> Send SSL data";
|
||||
break;
|
||||
case CURLINFO_HEADER_IN:
|
||||
text = "<= Recv header";
|
||||
break;
|
||||
case CURLINFO_DATA_IN:
|
||||
text = "<= Recv data";
|
||||
break;
|
||||
case CURLINFO_SSL_DATA_IN:
|
||||
text = "<= Recv SSL data";
|
||||
break;
|
||||
}
|
||||
|
||||
std::cerr << "\n================== " << text
|
||||
<< " ==================" << std::endl
|
||||
<< data << std::endl;
|
||||
return 0;
|
||||
}
|
||||
#endif
|
||||
|
||||
enum HTTPCode : long
|
||||
{
|
||||
Ok = 200,
|
||||
Not_Found = 404
|
||||
};
|
||||
|
||||
struct HTTPResponse
|
||||
{
|
||||
HTTPCode code;
|
||||
std::string body;
|
||||
};
|
||||
|
||||
uint curlWriter(char *in, uint size, uint nmemb, std::stringstream *out)
|
||||
{
|
||||
uint r;
|
||||
r = size * nmemb;
|
||||
out->write(in, r);
|
||||
return r;
|
||||
}
|
||||
|
||||
HTTPResponse REQUEST(const std::string &url, const std::string &payload = "",
|
||||
const std::string &method = "GET")
|
||||
{
|
||||
HTTPResponse response;
|
||||
|
||||
CURL *curl;
|
||||
CURLcode res;
|
||||
struct curl_slist *headers = NULL;
|
||||
|
||||
curl_global_init(CURL_GLOBAL_ALL);
|
||||
|
||||
curl = curl_easy_init();
|
||||
if (curl) {
|
||||
std::stringstream buffer;
|
||||
|
||||
#ifdef DEBUG_HTTP
|
||||
curl_easy_setopt(curl, CURLOPT_DEBUGFUNCTION, my_trace);
|
||||
curl_easy_setopt(curl, CURLOPT_VERBOSE, 1L);
|
||||
#endif
|
||||
|
||||
curl_easy_setopt(curl, CURLOPT_URL, url.c_str());
|
||||
curl_easy_setopt(curl, CURLOPT_WRITEFUNCTION, curlWriter);
|
||||
curl_easy_setopt(curl, CURLOPT_WRITEDATA, &buffer);
|
||||
|
||||
if (!payload.empty()) {
|
||||
curl_easy_setopt(curl, CURLOPT_POSTFIELDSIZE, payload.size());
|
||||
curl_easy_setopt(curl, CURLOPT_POSTFIELDS, payload.c_str());
|
||||
headers = curl_slist_append(headers, "Content-Type: Application/Json");
|
||||
}
|
||||
curl_easy_setopt(curl, CURLOPT_CUSTOMREQUEST, method.c_str());
|
||||
headers = curl_slist_append(headers, "Expect:");
|
||||
curl_easy_setopt(curl, CURLOPT_HTTPHEADER, headers);
|
||||
|
||||
res = curl_easy_perform(curl);
|
||||
|
||||
if (res != CURLE_OK) {
|
||||
curl_easy_cleanup(curl);
|
||||
throw std::runtime_error(std::string{"CURL Failed: "} +
|
||||
curl_easy_strerror(res));
|
||||
}
|
||||
curl_easy_cleanup(curl);
|
||||
|
||||
curl_easy_getinfo(curl, CURLINFO_RESPONSE_CODE, &response.code);
|
||||
response.body = buffer.str();
|
||||
}
|
||||
|
||||
curl_global_cleanup();
|
||||
|
||||
return response;
|
||||
}
|
||||
|
||||
TEST_CASE("rest_endpoint", "[server_basic]")
|
||||
{
|
||||
std::stringstream ss;
|
||||
@@ -138,7 +26,7 @@ TEST_CASE("rest_endpoint", "[server_basic]")
|
||||
|
||||
const size_t nDAGRunners = 10, nWebThreads = 10;
|
||||
|
||||
daggy::Server server(listenSpec, logger, executor, nDAGRunners);
|
||||
daggy::daggyd::Server server(listenSpec, logger, executor, nDAGRunners);
|
||||
server.init(nWebThreads);
|
||||
server.start();
|
||||
|
||||
@@ -147,13 +35,13 @@ TEST_CASE("rest_endpoint", "[server_basic]")
|
||||
|
||||
SECTION("Ready Endpoint")
|
||||
{
|
||||
auto response = REQUEST(baseURL + "/ready");
|
||||
auto response = HTTP_REQUEST(baseURL + "/ready");
|
||||
REQUIRE(response.code == HTTPCode::Ok);
|
||||
}
|
||||
|
||||
SECTION("Querying a non-existent dagrunid should fail ")
|
||||
{
|
||||
auto response = REQUEST(baseURL + "/v1/dagrun/100");
|
||||
auto response = HTTP_REQUEST(baseURL + "/v1/dagrun/100");
|
||||
REQUIRE(response.code != HTTPCode::Ok);
|
||||
}
|
||||
|
||||
@@ -175,7 +63,7 @@ TEST_CASE("rest_endpoint", "[server_basic]")
|
||||
// Submit, and get the runID
|
||||
daggy::DAGRunID runID = 0;
|
||||
{
|
||||
auto response = REQUEST(baseURL + "/v1/dagrun/", dagRun, "POST");
|
||||
auto response = HTTP_REQUEST(baseURL + "/v1/dagrun/", dagRun, "POST");
|
||||
REQUIRE(response.code == HTTPCode::Ok);
|
||||
|
||||
rj::Document doc;
|
||||
@@ -188,7 +76,7 @@ TEST_CASE("rest_endpoint", "[server_basic]")
|
||||
|
||||
// Ensure our runID shows up in the list of running DAGs
|
||||
{
|
||||
auto response = REQUEST(baseURL + "/v1/dagruns?all=1");
|
||||
auto response = HTTP_REQUEST(baseURL + "/v1/dagruns?all=1");
|
||||
REQUIRE(response.code == HTTPCode::Ok);
|
||||
|
||||
rj::Document doc;
|
||||
@@ -217,8 +105,8 @@ TEST_CASE("rest_endpoint", "[server_basic]")
|
||||
|
||||
// Ensure we can get one of our tasks
|
||||
{
|
||||
auto response = REQUEST(baseURL + "/v1/dagrun/" + std::to_string(runID) +
|
||||
"/task/cat_0");
|
||||
auto response = HTTP_REQUEST(baseURL + "/v1/dagrun/" +
|
||||
std::to_string(runID) + "/task/cat_0");
|
||||
REQUIRE(response.code == HTTPCode::Ok);
|
||||
|
||||
rj::Document doc;
|
||||
@@ -233,7 +121,8 @@ TEST_CASE("rest_endpoint", "[server_basic]")
|
||||
// Wait until our DAG is complete
|
||||
bool complete = true;
|
||||
for (auto i = 0; i < 10; ++i) {
|
||||
auto response = REQUEST(baseURL + "/v1/dagrun/" + std::to_string(runID));
|
||||
auto response =
|
||||
HTTP_REQUEST(baseURL + "/v1/dagrun/" + std::to_string(runID));
|
||||
REQUIRE(response.code == HTTPCode::Ok);
|
||||
rj::Document doc;
|
||||
daggy::checkRJParse(doc.Parse(response.body.c_str()));
|
||||
@@ -281,7 +170,7 @@ TEST_CASE("Server cancels and resumes execution", "[server_resume]")
|
||||
|
||||
const size_t nDAGRunners = 10, nWebThreads = 10;
|
||||
|
||||
daggy::Server server(listenSpec, logger, executor, nDAGRunners);
|
||||
daggy::daggyd::Server server(listenSpec, logger, executor, nDAGRunners);
|
||||
server.init(nWebThreads);
|
||||
server.start();
|
||||
|
||||
@@ -304,7 +193,7 @@ TEST_CASE("Server cancels and resumes execution", "[server_resume]")
|
||||
// Submit, and get the runID
|
||||
daggy::DAGRunID runID;
|
||||
{
|
||||
auto response = REQUEST(baseURL + "/v1/dagrun/", dagRunJSON, "POST");
|
||||
auto response = HTTP_REQUEST(baseURL + "/v1/dagrun/", dagRunJSON, "POST");
|
||||
REQUIRE(response.code == HTTPCode::Ok);
|
||||
|
||||
rj::Document doc;
|
||||
@@ -319,7 +208,7 @@ TEST_CASE("Server cancels and resumes execution", "[server_resume]")
|
||||
|
||||
// Stop the current run
|
||||
{
|
||||
auto response = REQUEST(
|
||||
auto response = HTTP_REQUEST(
|
||||
baseURL + "/v1/dagrun/" + std::to_string(runID) + "/state/KILLED", "",
|
||||
"PATCH");
|
||||
REQUIRE(response.code == HTTPCode::Ok);
|
||||
@@ -342,7 +231,7 @@ TEST_CASE("Server cancels and resumes execution", "[server_resume]")
|
||||
{
|
||||
auto url = baseURL + "/v1/dagrun/" + std::to_string(runID) +
|
||||
"/task/sleep_B_0/state/QUEUED";
|
||||
auto response = REQUEST(url, "", "PATCH");
|
||||
auto response = HTTP_REQUEST(url, "", "PATCH");
|
||||
REQUIRE(response.code == HTTPCode::Ok);
|
||||
REQUIRE(logger.getTaskState(runID, "sleep_B_0") ==
|
||||
+daggy::RunState::QUEUED);
|
||||
@@ -355,7 +244,7 @@ TEST_CASE("Server cancels and resumes execution", "[server_resume]")
|
||||
lstat("resume_touch_A", &s);
|
||||
auto preMTime = s.st_mtim.tv_sec;
|
||||
|
||||
auto response = REQUEST(
|
||||
auto response = HTTP_REQUEST(
|
||||
baseURL + "/v1/dagrun/" + std::to_string(runID) + "/state/QUEUED", "",
|
||||
"PATCH");
|
||||
|
||||
|
||||
Reference in New Issue
Block a user