Adding support for remote execution daemons.
Squashed commit of the following: commit 69d5ef7a256b86a86d46e5ae374c00fded1497ea Author: Ian Roddis <tech@kinesin.ca> Date: Thu Dec 16 12:15:55 2021 -0400 Updating readme commit 94a9f676d0f9cc0b55cdc18c4927eaea40d82c77 Author: Ian Roddis <tech@kinesin.ca> Date: Thu Dec 16 12:05:36 2021 -0400 Fixing serialization of attempt records when querying entire dag commit 945e5f90b24abf07c9af1bc4c6bbcb33e93b8069 Author: Ian Roddis <tech@kinesin.ca> Date: Thu Dec 16 11:37:59 2021 -0400 Compiles cleanly... commit 8b23e46081d47fb80dc1a2d998fc6dc4bbf301a8 Author: Ian Roddis <tech@kinesin.ca> Date: Thu Dec 16 10:43:03 2021 -0400 Adding in missing source file to cmake build list commit 6d10d9791206e2bc15788beadeea580b8e43a853 Author: Ian Roddis <tech@kinesin.ca> Date: Thu Dec 16 10:41:43 2021 -0400 Adding new executors commit 42a2c67f4d6ae99df95d917c8621d78cd99837a1 Author: Ian Roddis <tech@kinesin.ca> Date: Thu Dec 16 10:27:14 2021 -0400 Fixing missing curl cmake dependency commit 394bc4c5d51ecee7bf14712f719c8bf7e97fb0fa Author: Ian Roddis <tech@kinesin.ca> Date: Thu Dec 16 10:21:58 2021 -0400 Fixing missing curl cmake dependency commit dd9efc8e7e7770ea1bcbccb70a1af9cfcff0414c Author: Ian Roddis <tech@kinesin.ca> Date: Wed Dec 15 17:15:38 2021 -0400 Checkpointing progress commit 3b3b55d6037bb96e46de6763f486f4ecb92fe6a0 Author: Ian Roddis <tech@kinesin.ca> Date: Wed Dec 15 14:21:18 2021 -0400 updating readme commit 303027c11452941b2a0c0d1b04ac5942e79efd74 Author: Ian Roddis <tech@kinesin.ca> Date: Wed Dec 15 14:17:16 2021 -0400 Namespacing daggyd Adding more error checking around deserialization of parameters Adding tests for runner agent commit c592eaeba12e2a449bae401e8c1d9ed236416d52 Author: Ian Roddis <tech@kinesin.ca> Date: Wed Dec 15 11:20:21 2021 -0400 Checkpointing work commit fb1862d1cefe2b53a98659cce3c8c73d88bf5d84 Author: Ian Roddis <tech@kinesin.ca> Date: Wed Dec 15 09:52:29 2021 -0400 Copying daggyd for daggyr template, adding in basic routes
This commit is contained in:
8
daggyr/libdaggyr/CMakeLists.txt
Normal file
8
daggyr/libdaggyr/CMakeLists.txt
Normal file
@@ -0,0 +1,8 @@
|
||||
project(libdaggyr)
|
||||
|
||||
add_library(${PROJECT_NAME} STATIC)
|
||||
|
||||
target_include_directories(${PROJECT_NAME} PUBLIC include)
|
||||
target_link_libraries(${PROJECT_NAME} libdaggy stdc++fs)
|
||||
|
||||
add_subdirectory(src)
|
||||
84
daggyr/libdaggyr/include/daggyr/Server.hpp
Normal file
84
daggyr/libdaggyr/include/daggyr/Server.hpp
Normal file
@@ -0,0 +1,84 @@
|
||||
#pragma once
|
||||
|
||||
#include <pistache/description.h>
|
||||
#include <pistache/endpoint.h>
|
||||
#include <pistache/http.h>
|
||||
|
||||
#include <daggy/DAGRunner.hpp>
|
||||
#include <daggy/ThreadPool.hpp>
|
||||
#include <daggy/executors/task/DaggyRunnerTaskExecutor.hpp>
|
||||
#include <daggy/executors/task/ForkingTaskExecutor.hpp>
|
||||
#include <daggy/loggers/dag_run/DAGRunLogger.hpp>
|
||||
#include <filesystem>
|
||||
|
||||
#define DAGGY_REST_HANDLER(func) \
|
||||
void func(const Pistache::Rest::Request &request, \
|
||||
Pistache::Http::ResponseWriter response);
|
||||
|
||||
namespace fs = std::filesystem;
|
||||
using namespace daggy::executors::task::daggy_runner;
|
||||
|
||||
namespace daggy::daggyr {
|
||||
|
||||
class Server
|
||||
{
|
||||
public:
|
||||
Server(const Pistache::Address &listenSpec, ssize_t maxCores,
|
||||
ssize_t maxMemoryMB);
|
||||
~Server();
|
||||
|
||||
Server &setSSLCertificates(const fs::path &cert, const fs::path &key);
|
||||
|
||||
void init(size_t threads = 1);
|
||||
|
||||
void start();
|
||||
|
||||
uint16_t getPort() const;
|
||||
|
||||
void shutdown();
|
||||
|
||||
static void validateTask(const Task &task);
|
||||
|
||||
private:
|
||||
void createDescription();
|
||||
|
||||
bool handleAuth(const Pistache::Rest::Request &request);
|
||||
|
||||
DAGGY_REST_HANDLER(handleReady);
|
||||
DAGGY_REST_HANDLER(handleGetCapacity);
|
||||
DAGGY_REST_HANDLER(handleRunTask);
|
||||
DAGGY_REST_HANDLER(handleGetTask);
|
||||
DAGGY_REST_HANDLER(handleStopTask);
|
||||
DAGGY_REST_HANDLER(handleValidateTask);
|
||||
|
||||
Pistache::Http::Endpoint endpoint_;
|
||||
Pistache::Rest::Description desc_;
|
||||
Pistache::Rest::Router router_;
|
||||
|
||||
executors::task::ForkingTaskExecutor executor_;
|
||||
|
||||
struct TaskRecord
|
||||
{
|
||||
RunState state;
|
||||
AttemptRecord attempt;
|
||||
};
|
||||
|
||||
std::mutex capacityGuard_;
|
||||
Capacity maxCapacity_;
|
||||
Capacity curCapacity_;
|
||||
|
||||
std::mutex pendingGuard_;
|
||||
|
||||
struct PendingJob
|
||||
{
|
||||
std::future<AttemptRecord> fut;
|
||||
Capacity resourcesUsed;
|
||||
};
|
||||
|
||||
std::unordered_map<std::pair<DAGRunID, std::string>, PendingJob> pending_;
|
||||
|
||||
std::mutex resultsGuard_;
|
||||
std::unordered_map<std::pair<DAGRunID, std::string>, AttemptRecord>
|
||||
results_;
|
||||
};
|
||||
} // namespace daggy::daggyr
|
||||
3
daggyr/libdaggyr/src/CMakeLists.txt
Normal file
3
daggyr/libdaggyr/src/CMakeLists.txt
Normal file
@@ -0,0 +1,3 @@
|
||||
target_sources(${PROJECT_NAME} PRIVATE
|
||||
Server.cpp
|
||||
)
|
||||
259
daggyr/libdaggyr/src/Server.cpp
Normal file
259
daggyr/libdaggyr/src/Server.cpp
Normal file
@@ -0,0 +1,259 @@
|
||||
#include <enum.h>
|
||||
|
||||
#include <daggy/Serialization.hpp>
|
||||
#include <daggy/executors/task/DaggyRunnerTaskExecutor.hpp>
|
||||
#include <daggyr/Server.hpp>
|
||||
#include <iomanip>
|
||||
#include <mutex>
|
||||
#include <sstream>
|
||||
#include <stdexcept>
|
||||
#include <thread>
|
||||
#include <utility>
|
||||
|
||||
#define REQ_RESPONSE(code, msg) \
|
||||
std::stringstream ss; \
|
||||
ss << R"({"message": )" << std::quoted(msg) << "}"; \
|
||||
response.send(Pistache::Http::Code::code, ss.str()); \
|
||||
return;
|
||||
|
||||
using namespace Pistache;
|
||||
|
||||
namespace daggy::daggyr {
|
||||
void Server::init(size_t threads)
|
||||
{
|
||||
auto opts = Http::Endpoint::options()
|
||||
.threads(threads)
|
||||
.flags(Pistache::Tcp::Options::ReuseAddr |
|
||||
Pistache::Tcp::Options::ReusePort)
|
||||
.maxRequestSize(102400)
|
||||
.maxResponseSize(102400);
|
||||
endpoint_.init(opts);
|
||||
createDescription();
|
||||
}
|
||||
|
||||
Server::Server(const Pistache::Address &listenSpec, ssize_t maxCores,
|
||||
ssize_t maxMemoryMB)
|
||||
: endpoint_(listenSpec)
|
||||
, desc_("Daggy Runner API", "0.1")
|
||||
, executor_(maxCores)
|
||||
, maxCapacity_{maxCores, maxMemoryMB}
|
||||
, curCapacity_{maxCores, maxMemoryMB}
|
||||
{
|
||||
}
|
||||
|
||||
Server::~Server()
|
||||
{
|
||||
shutdown();
|
||||
}
|
||||
|
||||
void Server::start()
|
||||
{
|
||||
router_.initFromDescription(desc_);
|
||||
|
||||
endpoint_.setHandler(router_.handler());
|
||||
endpoint_.serveThreaded();
|
||||
}
|
||||
|
||||
Server &Server::setSSLCertificates(const fs::path &cert, const fs::path &key)
|
||||
{
|
||||
endpoint_.useSSL(cert, key);
|
||||
return *this;
|
||||
}
|
||||
|
||||
void Server::shutdown()
|
||||
{
|
||||
endpoint_.shutdown();
|
||||
}
|
||||
|
||||
uint16_t Server::getPort() const
|
||||
{
|
||||
return endpoint_.getPort();
|
||||
}
|
||||
|
||||
void Server::createDescription()
|
||||
{
|
||||
desc_.info().license("MIT", "https://opensource.org/licenses/MIT");
|
||||
|
||||
desc_.schemes(Rest::Scheme::Http)
|
||||
.basePath("/v1")
|
||||
.produces(MIME(Application, Json))
|
||||
.consumes(MIME(Application, Json));
|
||||
|
||||
desc_.route(desc_.get("/ready"))
|
||||
.bind(&Server::handleReady, this)
|
||||
.response(Http::Code::Ok, "Response to the /ready call")
|
||||
.hide();
|
||||
|
||||
auto versionPath = desc_.path("/v1");
|
||||
|
||||
versionPath.route(desc_.post("/validate"))
|
||||
.bind(&Server::handleValidateTask, this)
|
||||
.produces(MIME(Application, Json))
|
||||
.response(Http::Code::Ok, "Validate a task");
|
||||
|
||||
versionPath.route(desc_.post("/task/:runID/:taskName"))
|
||||
.bind(&Server::handleRunTask, this)
|
||||
.produces(MIME(Application, Json))
|
||||
.response(Http::Code::Ok, "Run a task");
|
||||
|
||||
versionPath.route(desc_.get("/task/:runID/:taskName"))
|
||||
.bind(&Server::handleGetTask, this)
|
||||
.produces(MIME(Application, Json))
|
||||
.response(Http::Code::Ok,
|
||||
"Get the state and potentially the AttemptRecord of a task");
|
||||
|
||||
versionPath.route(desc_.del("/task/:runID/:taskName"))
|
||||
.bind(&Server::handleStopTask, this)
|
||||
.produces(MIME(Application, Json))
|
||||
.response(Http::Code::Ok, "Stop a task");
|
||||
|
||||
versionPath.route(desc_.get("/capacity"))
|
||||
.bind(&Server::handleGetCapacity, this)
|
||||
.produces(MIME(Application, Json))
|
||||
.response(Http::Code::Ok, "Get capacities of worker");
|
||||
}
|
||||
|
||||
void Server::handleValidateTask(const Pistache::Rest::Request &request,
|
||||
Pistache::Http::ResponseWriter response)
|
||||
{
|
||||
try {
|
||||
auto task = taskFromJSON("sample_task", request.body());
|
||||
daggy::executors::task::daggy_runner::validateTaskParameters(task.job);
|
||||
}
|
||||
catch (std::exception &e) {
|
||||
REQ_RESPONSE(Not_Acceptable, e.what());
|
||||
}
|
||||
REQ_RESPONSE(Ok, "Task is valid");
|
||||
}
|
||||
|
||||
void Server::handleRunTask(const Pistache::Rest::Request &request,
|
||||
Pistache::Http::ResponseWriter response)
|
||||
{
|
||||
if (!handleAuth(request))
|
||||
return;
|
||||
|
||||
auto runID = request.param(":runID").as<DAGRunID>();
|
||||
auto taskName = request.param(":taskName").as<std::string>();
|
||||
|
||||
Capacity resourcesUsed;
|
||||
Task task;
|
||||
try {
|
||||
task = taskFromJSON(taskName, request.body());
|
||||
resourcesUsed = capacityFromTask(task);
|
||||
}
|
||||
catch (std::exception &e) {
|
||||
REQ_RESPONSE(Not_Acceptable, e.what());
|
||||
}
|
||||
|
||||
{
|
||||
std::lock_guard<std::mutex> lock(capacityGuard_);
|
||||
curCapacity_.cores -= resourcesUsed.cores;
|
||||
curCapacity_.memoryMB -= resourcesUsed.memoryMB;
|
||||
}
|
||||
|
||||
{
|
||||
std::lock_guard<std::mutex> lock(pendingGuard_);
|
||||
pending_.emplace(
|
||||
std::make_pair(runID, taskName),
|
||||
PendingJob{.fut = executor_.execute(runID, taskName, task),
|
||||
.resourcesUsed = resourcesUsed});
|
||||
}
|
||||
|
||||
response.send(Pistache::Http::Code::Ok, "");
|
||||
}
|
||||
|
||||
void Server::handleGetTask(const Pistache::Rest::Request &request,
|
||||
Pistache::Http::ResponseWriter response)
|
||||
{
|
||||
if (!handleAuth(request))
|
||||
return;
|
||||
|
||||
auto runID = request.param(":runID").as<DAGRunID>();
|
||||
auto taskName = request.param(":taskName").as<std::string>();
|
||||
|
||||
auto taskID = std::make_pair(runID, taskName);
|
||||
|
||||
std::string payload;
|
||||
|
||||
bool found = false;
|
||||
{
|
||||
std::lock_guard<std::mutex> lock(pendingGuard_);
|
||||
auto it = pending_.find(taskID);
|
||||
if (it != pending_.end()) {
|
||||
// poll it
|
||||
if (it->second.fut.valid() and
|
||||
it->second.fut.wait_for(1ms) == std::future_status::ready) {
|
||||
auto attempt = it->second.fut.get();
|
||||
{
|
||||
std::lock_guard<std::mutex> rlock(resultsGuard_);
|
||||
results_.emplace(taskID, attempt);
|
||||
}
|
||||
{
|
||||
std::lock_guard<std::mutex> rlock(capacityGuard_);
|
||||
curCapacity_.cores += it->second.resourcesUsed.cores;
|
||||
curCapacity_.memoryMB += it->second.resourcesUsed.memoryMB;
|
||||
}
|
||||
}
|
||||
else {
|
||||
payload = R"({ "state": "RUNNING" })";
|
||||
found = true;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
if (!found) {
|
||||
std::lock_guard<std::mutex> lock(resultsGuard_);
|
||||
auto it = results_.find(taskID);
|
||||
if (it == results_.end()) {
|
||||
REQ_RESPONSE(Not_Found, "No such task");
|
||||
}
|
||||
|
||||
payload = R"({ "state": "COMPLETED", "attempt": )" +
|
||||
attemptRecordToJSON(it->second) + "}";
|
||||
}
|
||||
response.send(Pistache::Http::Code::Ok, payload);
|
||||
}
|
||||
|
||||
void Server::handleStopTask(const Pistache::Rest::Request &request,
|
||||
Pistache::Http::ResponseWriter response)
|
||||
{
|
||||
if (!handleAuth(request))
|
||||
return;
|
||||
|
||||
auto runID = request.param(":runID").as<DAGRunID>();
|
||||
auto taskName = request.param(":taskName").as<std::string>();
|
||||
|
||||
executor_.stop(runID, taskName);
|
||||
|
||||
REQ_RESPONSE(Ok, "");
|
||||
}
|
||||
|
||||
void Server::handleGetCapacity(const Pistache::Rest::Request &request,
|
||||
Pistache::Http::ResponseWriter response)
|
||||
{
|
||||
std::string payload;
|
||||
{
|
||||
std::lock_guard<std::mutex> lock(capacityGuard_);
|
||||
payload = R"({ "current": )" + capacityToJSON(curCapacity_) +
|
||||
R"(, "total": )" + capacityToJSON(maxCapacity_) + "}";
|
||||
}
|
||||
|
||||
response.send(Pistache::Http::Code::Ok, payload);
|
||||
}
|
||||
|
||||
void Server::handleReady(const Pistache::Rest::Request &request,
|
||||
Pistache::Http::ResponseWriter response)
|
||||
{
|
||||
response.send(Pistache::Http::Code::Ok, R"({ "msg": "Ready for tasks!"})");
|
||||
}
|
||||
|
||||
/*
|
||||
* handleAuth will check any auth methods and handle any responses in the
|
||||
* case of failed auth. If it returns false, callers should cease handling
|
||||
* the response
|
||||
*/
|
||||
bool Server::handleAuth(const Pistache::Rest::Request &request)
|
||||
{
|
||||
return true;
|
||||
}
|
||||
} // namespace daggy::daggyr
|
||||
Reference in New Issue
Block a user