Moving to a poll method for workers, and daggyd-preserved capacities
This commit is contained in:
@@ -119,7 +119,7 @@ int main(int argc, char **argv)
|
||||
args.add_argument("-d", "--daemon").default_value(false).implicit_value(true);
|
||||
args.add_argument("--config").default_value(std::string{});
|
||||
args.add_argument("--ip").default_value(std::string{"127.0.0.1"});
|
||||
args.add_argument("--port").default_value(int{2504});
|
||||
args.add_argument("--port").default_value(2504u);
|
||||
|
||||
try {
|
||||
args.parse_args(argc, argv);
|
||||
@@ -138,7 +138,7 @@ int main(int argc, char **argv)
|
||||
bool asDaemon = args.get<bool>("--daemon");
|
||||
auto configFile = args.get<std::string>("--config");
|
||||
std::string listenIP = args.get<std::string>("--ip");
|
||||
int listenPort = args.get<int>("--port");
|
||||
int listenPort = args.get<uint32_t>("--port");
|
||||
size_t webThreads = 50;
|
||||
ssize_t maxCores = std::max(1U, std::thread::hardware_concurrency() - 2);
|
||||
ssize_t maxMemoryMB =
|
||||
@@ -164,7 +164,7 @@ int main(int argc, char **argv)
|
||||
if (co.HasMember("cores"))
|
||||
maxCores = co["cores"].GetInt64();
|
||||
if (co.HasMember("memoryMB"))
|
||||
maxCores = co["memoryMB"].GetInt64();
|
||||
maxMemoryMB = co["memoryMB"].GetInt64();
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
@@ -10,6 +10,7 @@
|
||||
#include <daggy/executors/task/ForkingTaskExecutor.hpp>
|
||||
#include <daggy/loggers/dag_run/DAGRunLogger.hpp>
|
||||
#include <filesystem>
|
||||
#include <list>
|
||||
|
||||
#define DAGGY_REST_HANDLER(func) \
|
||||
void func(const Pistache::Rest::Request &request, \
|
||||
@@ -47,7 +48,7 @@ namespace daggy::daggyr {
|
||||
DAGGY_REST_HANDLER(handleReady);
|
||||
DAGGY_REST_HANDLER(handleGetCapacity);
|
||||
DAGGY_REST_HANDLER(handleRunTask);
|
||||
DAGGY_REST_HANDLER(handleGetTask);
|
||||
DAGGY_REST_HANDLER(handlePollTasks);
|
||||
DAGGY_REST_HANDLER(handleStopTask);
|
||||
DAGGY_REST_HANDLER(handleValidateTask);
|
||||
|
||||
@@ -71,14 +72,13 @@ namespace daggy::daggyr {
|
||||
|
||||
struct PendingJob
|
||||
{
|
||||
DAGRunID runID;
|
||||
std::string taskName;
|
||||
std::future<AttemptRecord> fut;
|
||||
Capacity resourcesUsed;
|
||||
bool resolved;
|
||||
};
|
||||
|
||||
std::unordered_map<std::pair<DAGRunID, std::string>, PendingJob> pending_;
|
||||
|
||||
std::mutex resultsGuard_;
|
||||
std::unordered_map<std::pair<DAGRunID, std::string>, AttemptRecord>
|
||||
results_;
|
||||
std::list<PendingJob> pending_;
|
||||
};
|
||||
} // namespace daggy::daggyr
|
||||
|
||||
@@ -96,11 +96,12 @@ namespace daggy::daggyr {
|
||||
.produces(MIME(Application, Json))
|
||||
.response(Http::Code::Ok, "Run a task");
|
||||
|
||||
versionPath.route(desc_.get("/task/:runID/:taskName"))
|
||||
.bind(&Server::handleGetTask, this)
|
||||
versionPath.route(desc_.get("/poll"))
|
||||
.bind(&Server::handlePollTasks, this)
|
||||
.produces(MIME(Application, Json))
|
||||
.response(Http::Code::Ok,
|
||||
"Get the state and potentially the AttemptRecord of a task");
|
||||
.response(
|
||||
Http::Code::Ok,
|
||||
"Poll all running tasks, getting completed attempts and state");
|
||||
|
||||
versionPath.route(desc_.del("/task/:runID/:taskName"))
|
||||
.bind(&Server::handleStopTask, this)
|
||||
@@ -153,70 +154,65 @@ namespace daggy::daggyr {
|
||||
|
||||
{
|
||||
std::lock_guard<std::mutex> lock(pendingGuard_);
|
||||
pending_.emplace(
|
||||
std::make_pair(runID, taskName),
|
||||
PendingJob{.fut = executor_.execute(runID, taskName, task),
|
||||
pending_.push_back(
|
||||
PendingJob{.runID = runID,
|
||||
.taskName = taskName,
|
||||
.fut = executor_.execute(runID, taskName, task),
|
||||
.resourcesUsed = resourcesUsed});
|
||||
}
|
||||
|
||||
std::cout << "Enqueuing " << runID << " / " << taskName << std::endl;
|
||||
|
||||
response.send(Pistache::Http::Code::Ok, "");
|
||||
}
|
||||
|
||||
void Server::handleGetTask(const Pistache::Rest::Request &request,
|
||||
Pistache::Http::ResponseWriter response)
|
||||
void Server::handlePollTasks(const Pistache::Rest::Request &request,
|
||||
Pistache::Http::ResponseWriter response)
|
||||
{
|
||||
if (!handleAuth(request))
|
||||
return;
|
||||
|
||||
auto runID = request.param(":runID").as<DAGRunID>();
|
||||
auto taskName = request.param(":taskName").as<std::string>();
|
||||
|
||||
auto taskID = std::make_pair(runID, taskName);
|
||||
|
||||
std::string payload;
|
||||
std::stringstream payload;
|
||||
payload << "[";
|
||||
bool first = true;
|
||||
|
||||
// Check to see if it's pending
|
||||
bool found = false;
|
||||
{
|
||||
std::lock_guard<std::mutex> lock(pendingGuard_);
|
||||
auto it = pending_.find(taskID);
|
||||
if (it != pending_.end()) {
|
||||
// poll it
|
||||
if (it->second.fut.valid() and
|
||||
it->second.fut.wait_for(1ms) == std::future_status::ready) {
|
||||
auto attempt = it->second.fut.get();
|
||||
{
|
||||
std::lock_guard<std::mutex> rlock(resultsGuard_);
|
||||
results_.emplace(taskID, attempt);
|
||||
}
|
||||
{
|
||||
std::lock_guard<std::mutex> rlock(capacityGuard_);
|
||||
curCapacity_.cores += it->second.resourcesUsed.cores;
|
||||
curCapacity_.memoryMB += it->second.resourcesUsed.memoryMB;
|
||||
}
|
||||
std::cout << "Resolved " << it->first.first << " / "
|
||||
<< it->first.second << std::endl;
|
||||
pending_.extract(it);
|
||||
}
|
||||
else {
|
||||
payload = R"({ "state": "RUNNING" })";
|
||||
found = true;
|
||||
}
|
||||
std::lock_guard<std::mutex> lock(pendingGuard_);
|
||||
auto it = pending_.begin();
|
||||
while (it != pending_.end()) {
|
||||
if (first) {
|
||||
first = false;
|
||||
}
|
||||
}
|
||||
|
||||
if (!found) {
|
||||
std::lock_guard<std::mutex> lock(resultsGuard_);
|
||||
auto it = results_.find(taskID);
|
||||
if (it == results_.end()) {
|
||||
REQ_RESPONSE(Not_Found, "No such task");
|
||||
else {
|
||||
payload << ", ";
|
||||
}
|
||||
|
||||
payload = R"({ "state": "COMPLETED", "attempt": )" +
|
||||
attemptRecordToJSON(it->second) + "}";
|
||||
}
|
||||
payload << R"({ "runID": )" << it->runID << R"(, "taskName": )"
|
||||
<< std::quoted(it->taskName) << ", ";
|
||||
|
||||
response.send(Pistache::Http::Code::Ok, payload);
|
||||
// poll it
|
||||
if (it->fut.valid() and
|
||||
it->fut.wait_for(1ms) == std::future_status::ready) {
|
||||
auto attempt = it->fut.get();
|
||||
|
||||
payload << R"("state": "COMPLETED", "attempt":)"
|
||||
<< attemptRecordToJSON(attempt);
|
||||
{
|
||||
std::lock_guard<std::mutex> rlock(capacityGuard_);
|
||||
curCapacity_.cores += it->resourcesUsed.cores;
|
||||
curCapacity_.memoryMB += it->resourcesUsed.memoryMB;
|
||||
}
|
||||
std::cout << "Resolved " << it->runID << " / " << it->taskName
|
||||
<< std::endl;
|
||||
}
|
||||
else {
|
||||
payload << R"("state": "PENDING")";
|
||||
}
|
||||
payload << "}";
|
||||
}
|
||||
payload << "]";
|
||||
|
||||
response.send(Pistache::Http::Code::Ok, payload.str());
|
||||
}
|
||||
|
||||
void Server::handleStopTask(const Pistache::Rest::Request &request,
|
||||
|
||||
Reference in New Issue
Block a user