Files
daggy/daggyr/libdaggyr/src/Server.cpp
Ian Roddis 53308c063d Fixing a number of scaling issues:
- Missed closing of file descriptor made ForkingTaskExecutor
  silently die after running out of FDs
- Tightened up scope for locks to prevent http timeout
- Simplified threadpool
2022-01-10 13:02:10 -04:00

259 lines
7.3 KiB
C++

#include <enum.h>
#include <daggy/Serialization.hpp>
#include <daggy/executors/task/DaggyRunnerTaskExecutor.hpp>
#include <daggyr/Server.hpp>
#include <iomanip>
#include <mutex>
#include <sstream>
#include <stdexcept>
#include <thread>
#include <utility>
#define REQ_RESPONSE(code, msg) \
std::stringstream ss; \
ss << R"({"message": )" << std::quoted(msg) << "}"; \
response.send(Pistache::Http::Code::code, ss.str()); \
return;
using namespace Pistache;
namespace daggy::daggyr {
void Server::init(size_t threads)
{
auto opts = Http::Endpoint::options()
.threads(threads)
.flags(Pistache::Tcp::Options::ReuseAddr |
Pistache::Tcp::Options::ReusePort)
.maxRequestSize(102400)
.maxResponseSize(102400);
endpoint_.init(opts);
createDescription();
}
Server::Server(const Pistache::Address &listenSpec, ssize_t maxCores,
ssize_t maxMemoryMB)
: endpoint_(listenSpec)
, desc_("Daggy Runner API", "0.1")
, executor_(maxCores)
, maxCapacity_{maxCores, maxMemoryMB}
, curCapacity_{maxCores, maxMemoryMB}
{
}
Server::~Server()
{
shutdown();
}
void Server::start()
{
router_.initFromDescription(desc_);
endpoint_.setHandler(router_.handler());
endpoint_.serveThreaded();
}
Server &Server::setSSLCertificates(const fs::path &cert, const fs::path &key)
{
endpoint_.useSSL(cert, key);
return *this;
}
void Server::shutdown()
{
endpoint_.shutdown();
}
uint16_t Server::getPort() const
{
return endpoint_.getPort();
}
void Server::createDescription()
{
desc_.info().license("MIT", "https://opensource.org/licenses/MIT");
desc_.schemes(Rest::Scheme::Http)
.basePath("/v1")
.produces(MIME(Application, Json))
.consumes(MIME(Application, Json));
desc_.route(desc_.get("/ready"))
.bind(&Server::handleReady, this)
.response(Http::Code::Ok, "Response to the /ready call")
.hide();
auto versionPath = desc_.path("/v1");
versionPath.route(desc_.post("/validate"))
.bind(&Server::handleValidateTask, this)
.produces(MIME(Application, Json))
.response(Http::Code::Ok, "Validate a task");
versionPath.route(desc_.post("/task/:runID/:taskName"))
.bind(&Server::handleRunTask, this)
.produces(MIME(Application, Json))
.response(Http::Code::Ok, "Run a task");
versionPath.route(desc_.get("/poll"))
.bind(&Server::handlePollTasks, this)
.produces(MIME(Application, Json))
.response(
Http::Code::Ok,
"Poll all running tasks, getting completed attempts and state");
versionPath.route(desc_.del("/task/:runID/:taskName"))
.bind(&Server::handleStopTask, this)
.produces(MIME(Application, Json))
.response(Http::Code::Ok, "Stop a task");
versionPath.route(desc_.get("/capacity"))
.bind(&Server::handleGetCapacity, this)
.produces(MIME(Application, Json))
.response(Http::Code::Ok, "Get capacities of worker");
}
void Server::handleValidateTask(const Pistache::Rest::Request &request,
Pistache::Http::ResponseWriter response)
{
try {
auto task = taskFromJSON("sample_task", request.body());
daggy::executors::task::daggy_runner::validateTaskParameters(task.job);
}
catch (std::exception &e) {
REQ_RESPONSE(Not_Acceptable, e.what());
}
REQ_RESPONSE(Ok, "Task is valid");
}
void Server::handleRunTask(const Pistache::Rest::Request &request,
Pistache::Http::ResponseWriter response)
{
if (!handleAuth(request))
return;
auto runID = request.param(":runID").as<DAGRunID>();
auto taskName = request.param(":taskName").as<std::string>();
Capacity resourcesUsed;
Task task;
try {
task = taskFromJSON(taskName, request.body());
resourcesUsed = capacityFromTask(task);
}
catch (std::exception &e) {
REQ_RESPONSE(Not_Acceptable, e.what());
}
{
std::lock_guard<std::mutex> lock(capacityGuard_);
curCapacity_.cores -= resourcesUsed.cores;
curCapacity_.memoryMB -= resourcesUsed.memoryMB;
}
{
std::lock_guard<std::mutex> lock(pendingGuard_);
pending_.push_back(
PendingJob{.runID = runID,
.taskName = taskName,
.fut = executor_.execute(runID, taskName, task),
.resourcesUsed = resourcesUsed});
}
response.send(Pistache::Http::Code::Ok, "");
}
void Server::handlePollTasks(const Pistache::Rest::Request &request,
Pistache::Http::ResponseWriter response)
{
if (!handleAuth(request))
return;
std::stringstream payload;
payload << "[";
bool first = true;
// Check to see if it's pending
std::lock_guard<std::mutex> lock(pendingGuard_);
auto it = pending_.begin();
while (it != pending_.end()) {
if (first) {
first = false;
}
else {
payload << ", ";
}
payload << R"({ "runID": )" << it->runID << R"(, "taskName": )"
<< std::quoted(it->taskName) << ", ";
// poll it
if (it->fut.valid() and
it->fut.wait_for(1ms) == std::future_status::ready) {
auto attempt = it->fut.get();
payload << R"("state": "COMPLETED", "attempt":)"
<< attemptRecordToJSON(attempt);
{
std::lock_guard<std::mutex> rlock(capacityGuard_);
curCapacity_.cores += it->resourcesUsed.cores;
curCapacity_.memoryMB += it->resourcesUsed.memoryMB;
}
it = pending_.erase(it);
}
else {
payload << R"("state": "PENDING")";
++it;
}
payload << "}";
}
payload << "]";
response.send(Pistache::Http::Code::Ok, payload.str());
}
void Server::handleStopTask(const Pistache::Rest::Request &request,
Pistache::Http::ResponseWriter response)
{
if (!handleAuth(request))
return;
auto runID = request.param(":runID").as<DAGRunID>();
auto taskName = request.param(":taskName").as<std::string>();
executor_.stop(runID, taskName);
REQ_RESPONSE(Ok, "");
}
void Server::handleGetCapacity(const Pistache::Rest::Request &request,
Pistache::Http::ResponseWriter response)
{
std::string payload;
{
std::lock_guard<std::mutex> lock(capacityGuard_);
payload = R"({ "current": )" + capacityToJSON(curCapacity_) +
R"(, "total": )" + capacityToJSON(maxCapacity_) + "}";
}
response.send(Pistache::Http::Code::Ok, payload);
}
void Server::handleReady(const Pistache::Rest::Request &request,
Pistache::Http::ResponseWriter response)
{
response.send(Pistache::Http::Code::Ok, R"({ "msg": "Ready for tasks!"})");
}
/*
* handleAuth will check any auth methods and handle any responses in the
* case of failed auth. If it returns false, callers should cease handling
* the response
*/
bool Server::handleAuth(const Pistache::Rest::Request &request)
{
return true;
}
} // namespace daggy::daggyr