- Missed closing of file descriptor made ForkingTaskExecutor silently die after running out of FDs - Tightened up scope for locks to prevent http timeout - Simplified threadpool
259 lines
7.3 KiB
C++
259 lines
7.3 KiB
C++
#include <enum.h>
|
|
|
|
#include <daggy/Serialization.hpp>
|
|
#include <daggy/executors/task/DaggyRunnerTaskExecutor.hpp>
|
|
#include <daggyr/Server.hpp>
|
|
#include <iomanip>
|
|
#include <mutex>
|
|
#include <sstream>
|
|
#include <stdexcept>
|
|
#include <thread>
|
|
#include <utility>
|
|
|
|
#define REQ_RESPONSE(code, msg) \
|
|
std::stringstream ss; \
|
|
ss << R"({"message": )" << std::quoted(msg) << "}"; \
|
|
response.send(Pistache::Http::Code::code, ss.str()); \
|
|
return;
|
|
|
|
using namespace Pistache;
|
|
|
|
namespace daggy::daggyr {
|
|
void Server::init(size_t threads)
|
|
{
|
|
auto opts = Http::Endpoint::options()
|
|
.threads(threads)
|
|
.flags(Pistache::Tcp::Options::ReuseAddr |
|
|
Pistache::Tcp::Options::ReusePort)
|
|
.maxRequestSize(102400)
|
|
.maxResponseSize(102400);
|
|
endpoint_.init(opts);
|
|
createDescription();
|
|
}
|
|
|
|
Server::Server(const Pistache::Address &listenSpec, ssize_t maxCores,
|
|
ssize_t maxMemoryMB)
|
|
: endpoint_(listenSpec)
|
|
, desc_("Daggy Runner API", "0.1")
|
|
, executor_(maxCores)
|
|
, maxCapacity_{maxCores, maxMemoryMB}
|
|
, curCapacity_{maxCores, maxMemoryMB}
|
|
{
|
|
}
|
|
|
|
Server::~Server()
|
|
{
|
|
shutdown();
|
|
}
|
|
|
|
void Server::start()
|
|
{
|
|
router_.initFromDescription(desc_);
|
|
|
|
endpoint_.setHandler(router_.handler());
|
|
endpoint_.serveThreaded();
|
|
}
|
|
|
|
Server &Server::setSSLCertificates(const fs::path &cert, const fs::path &key)
|
|
{
|
|
endpoint_.useSSL(cert, key);
|
|
return *this;
|
|
}
|
|
|
|
void Server::shutdown()
|
|
{
|
|
endpoint_.shutdown();
|
|
}
|
|
|
|
uint16_t Server::getPort() const
|
|
{
|
|
return endpoint_.getPort();
|
|
}
|
|
|
|
void Server::createDescription()
|
|
{
|
|
desc_.info().license("MIT", "https://opensource.org/licenses/MIT");
|
|
|
|
desc_.schemes(Rest::Scheme::Http)
|
|
.basePath("/v1")
|
|
.produces(MIME(Application, Json))
|
|
.consumes(MIME(Application, Json));
|
|
|
|
desc_.route(desc_.get("/ready"))
|
|
.bind(&Server::handleReady, this)
|
|
.response(Http::Code::Ok, "Response to the /ready call")
|
|
.hide();
|
|
|
|
auto versionPath = desc_.path("/v1");
|
|
|
|
versionPath.route(desc_.post("/validate"))
|
|
.bind(&Server::handleValidateTask, this)
|
|
.produces(MIME(Application, Json))
|
|
.response(Http::Code::Ok, "Validate a task");
|
|
|
|
versionPath.route(desc_.post("/task/:runID/:taskName"))
|
|
.bind(&Server::handleRunTask, this)
|
|
.produces(MIME(Application, Json))
|
|
.response(Http::Code::Ok, "Run a task");
|
|
|
|
versionPath.route(desc_.get("/poll"))
|
|
.bind(&Server::handlePollTasks, this)
|
|
.produces(MIME(Application, Json))
|
|
.response(
|
|
Http::Code::Ok,
|
|
"Poll all running tasks, getting completed attempts and state");
|
|
|
|
versionPath.route(desc_.del("/task/:runID/:taskName"))
|
|
.bind(&Server::handleStopTask, this)
|
|
.produces(MIME(Application, Json))
|
|
.response(Http::Code::Ok, "Stop a task");
|
|
|
|
versionPath.route(desc_.get("/capacity"))
|
|
.bind(&Server::handleGetCapacity, this)
|
|
.produces(MIME(Application, Json))
|
|
.response(Http::Code::Ok, "Get capacities of worker");
|
|
}
|
|
|
|
void Server::handleValidateTask(const Pistache::Rest::Request &request,
|
|
Pistache::Http::ResponseWriter response)
|
|
{
|
|
try {
|
|
auto task = taskFromJSON("sample_task", request.body());
|
|
daggy::executors::task::daggy_runner::validateTaskParameters(task.job);
|
|
}
|
|
catch (std::exception &e) {
|
|
REQ_RESPONSE(Not_Acceptable, e.what());
|
|
}
|
|
REQ_RESPONSE(Ok, "Task is valid");
|
|
}
|
|
|
|
void Server::handleRunTask(const Pistache::Rest::Request &request,
|
|
Pistache::Http::ResponseWriter response)
|
|
{
|
|
if (!handleAuth(request))
|
|
return;
|
|
|
|
auto runID = request.param(":runID").as<DAGRunID>();
|
|
auto taskName = request.param(":taskName").as<std::string>();
|
|
|
|
Capacity resourcesUsed;
|
|
Task task;
|
|
try {
|
|
task = taskFromJSON(taskName, request.body());
|
|
resourcesUsed = capacityFromTask(task);
|
|
}
|
|
catch (std::exception &e) {
|
|
REQ_RESPONSE(Not_Acceptable, e.what());
|
|
}
|
|
|
|
{
|
|
std::lock_guard<std::mutex> lock(capacityGuard_);
|
|
curCapacity_.cores -= resourcesUsed.cores;
|
|
curCapacity_.memoryMB -= resourcesUsed.memoryMB;
|
|
}
|
|
|
|
{
|
|
std::lock_guard<std::mutex> lock(pendingGuard_);
|
|
pending_.push_back(
|
|
PendingJob{.runID = runID,
|
|
.taskName = taskName,
|
|
.fut = executor_.execute(runID, taskName, task),
|
|
.resourcesUsed = resourcesUsed});
|
|
}
|
|
|
|
response.send(Pistache::Http::Code::Ok, "");
|
|
}
|
|
|
|
void Server::handlePollTasks(const Pistache::Rest::Request &request,
|
|
Pistache::Http::ResponseWriter response)
|
|
{
|
|
if (!handleAuth(request))
|
|
return;
|
|
|
|
std::stringstream payload;
|
|
payload << "[";
|
|
bool first = true;
|
|
|
|
// Check to see if it's pending
|
|
std::lock_guard<std::mutex> lock(pendingGuard_);
|
|
auto it = pending_.begin();
|
|
while (it != pending_.end()) {
|
|
if (first) {
|
|
first = false;
|
|
}
|
|
else {
|
|
payload << ", ";
|
|
}
|
|
|
|
payload << R"({ "runID": )" << it->runID << R"(, "taskName": )"
|
|
<< std::quoted(it->taskName) << ", ";
|
|
|
|
// poll it
|
|
if (it->fut.valid() and
|
|
it->fut.wait_for(1ms) == std::future_status::ready) {
|
|
auto attempt = it->fut.get();
|
|
|
|
payload << R"("state": "COMPLETED", "attempt":)"
|
|
<< attemptRecordToJSON(attempt);
|
|
{
|
|
std::lock_guard<std::mutex> rlock(capacityGuard_);
|
|
curCapacity_.cores += it->resourcesUsed.cores;
|
|
curCapacity_.memoryMB += it->resourcesUsed.memoryMB;
|
|
}
|
|
it = pending_.erase(it);
|
|
}
|
|
else {
|
|
payload << R"("state": "PENDING")";
|
|
++it;
|
|
}
|
|
payload << "}";
|
|
}
|
|
payload << "]";
|
|
|
|
response.send(Pistache::Http::Code::Ok, payload.str());
|
|
}
|
|
|
|
void Server::handleStopTask(const Pistache::Rest::Request &request,
|
|
Pistache::Http::ResponseWriter response)
|
|
{
|
|
if (!handleAuth(request))
|
|
return;
|
|
|
|
auto runID = request.param(":runID").as<DAGRunID>();
|
|
auto taskName = request.param(":taskName").as<std::string>();
|
|
|
|
executor_.stop(runID, taskName);
|
|
|
|
REQ_RESPONSE(Ok, "");
|
|
}
|
|
|
|
void Server::handleGetCapacity(const Pistache::Rest::Request &request,
|
|
Pistache::Http::ResponseWriter response)
|
|
{
|
|
std::string payload;
|
|
{
|
|
std::lock_guard<std::mutex> lock(capacityGuard_);
|
|
payload = R"({ "current": )" + capacityToJSON(curCapacity_) +
|
|
R"(, "total": )" + capacityToJSON(maxCapacity_) + "}";
|
|
}
|
|
|
|
response.send(Pistache::Http::Code::Ok, payload);
|
|
}
|
|
|
|
void Server::handleReady(const Pistache::Rest::Request &request,
|
|
Pistache::Http::ResponseWriter response)
|
|
{
|
|
response.send(Pistache::Http::Code::Ok, R"({ "msg": "Ready for tasks!"})");
|
|
}
|
|
|
|
/*
|
|
* handleAuth will check any auth methods and handle any responses in the
|
|
* case of failed auth. If it returns false, callers should cease handling
|
|
* the response
|
|
*/
|
|
bool Server::handleAuth(const Pistache::Rest::Request &request)
|
|
{
|
|
return true;
|
|
}
|
|
} // namespace daggy::daggyr
|