Fixing daggyr issues when reporting on tasks with very large outputs
(>10kb). Squashed commit of the following: commit b87fa418b4aca78928186a8fa992bef701e044a4 Author: Ian Roddis <tech@kinesin.ca> Date: Mon Feb 14 12:55:34 2022 -0400 removing memory leak commit 5e284ab92dbea991262a08c0cd50d6fc2f912e3b Author: Ian Roddis <tech@kinesin.ca> Date: Mon Feb 14 11:58:57 2022 -0400 Speeding up serialization, fixing payload sizing issue on daggyr commit e5e358820da4c2587741abdc3b6b103e5a4d4dd3 Author: Ian Roddis <tech@kinesin.ca> Date: Sun Feb 13 22:24:04 2022 -0400 changing newlines to std::endl for flush goodness commit 705ec86b75be947e64f4124ec8017cba2c8465e6 Author: Ian Roddis <tech@kinesin.ca> Date: Sun Feb 13 22:16:56 2022 -0400 adding more logging commit aa3db9c23e55da7a0523dc57e268b605ce8faac3 Author: Ian Roddis <tech@kinesin.ca> Date: Sun Feb 13 22:13:56 2022 -0400 Adding threadid commit 3b1a0f1333b2d43bc5ecad0746435504babbaa61 Author: Ian Roddis <tech@kinesin.ca> Date: Sun Feb 13 22:13:24 2022 -0400 Adding some debugging commit 804507e65251858fa597b7c27bcece8d8dfd589d Author: Ian Roddis <tech@kinesin.ca> Date: Sun Feb 13 21:52:53 2022 -0400 Removing curl global cleanup
This commit is contained in:
@@ -126,6 +126,7 @@ daggy::GeneralLogger getLogger(const std::string &logFile,
|
||||
|
||||
int main(int argc, char **argv)
|
||||
{
|
||||
curl_global_init(CURL_GLOBAL_ALL);
|
||||
std::ios::sync_with_stdio(false);
|
||||
argparse::ArgumentParser args("Daggy");
|
||||
|
||||
@@ -216,4 +217,5 @@ int main(int argc, char **argv)
|
||||
}
|
||||
server.shutdown();
|
||||
logger.shutdown();
|
||||
curl_global_cleanup();
|
||||
}
|
||||
|
||||
@@ -43,6 +43,7 @@ namespace daggy::daggyr {
|
||||
|
||||
private:
|
||||
void createDescription();
|
||||
void serializeResults();
|
||||
|
||||
bool handleAuth(const Pistache::Rest::Request &request);
|
||||
|
||||
@@ -59,11 +60,17 @@ namespace daggy::daggyr {
|
||||
|
||||
GeneralLogger &logger_;
|
||||
executors::task::ForkingTaskExecutor executor_;
|
||||
std::thread serializer_;
|
||||
std::atomic<bool> running_;
|
||||
|
||||
using TaskID = std::pair<DAGRunID, std::string>;
|
||||
Capacity maxCapacity_;
|
||||
std::mutex rtGuard_;
|
||||
std::unordered_map<TaskID, daggy::executors::task::TaskFuture>
|
||||
runningTasks_;
|
||||
|
||||
std::mutex resultsGuard_;
|
||||
std::unordered_map<TaskID, Future<std::string>>
|
||||
results_;
|
||||
};
|
||||
} // namespace daggy::daggyr
|
||||
|
||||
@@ -25,8 +25,8 @@ namespace daggy::daggyr {
|
||||
.threads(threads)
|
||||
.flags(Pistache::Tcp::Options::ReuseAddr |
|
||||
Pistache::Tcp::Options::ReusePort)
|
||||
.maxRequestSize(102400)
|
||||
.maxResponseSize(102400);
|
||||
.maxRequestSize(4294967296)
|
||||
.maxResponseSize(4294967296);
|
||||
endpoint_.init(opts);
|
||||
createDescription();
|
||||
}
|
||||
@@ -37,6 +37,8 @@ namespace daggy::daggyr {
|
||||
, desc_("Daggy Runner API", "0.1")
|
||||
, logger_(logger)
|
||||
, executor_(maxCores)
|
||||
, serializer_(&Server::serializeResults, this)
|
||||
, running_(true)
|
||||
, maxCapacity_{maxCores, maxMemoryMB}
|
||||
{
|
||||
}
|
||||
@@ -63,6 +65,40 @@ namespace daggy::daggyr {
|
||||
void Server::shutdown()
|
||||
{
|
||||
endpoint_.shutdown();
|
||||
running_ = false;
|
||||
serializer_.join();
|
||||
}
|
||||
|
||||
void Server::serializeResults() {
|
||||
using Node = std::unordered_map<TaskID, daggy::executors::task::TaskFuture>::node_type;
|
||||
|
||||
std::vector<TaskID> ready;
|
||||
std::vector<Node> nodes;
|
||||
while (running_) {
|
||||
ready.clear();
|
||||
nodes.clear();
|
||||
std::this_thread::sleep_for(1s);
|
||||
{
|
||||
std::lock_guard<std::mutex> rtLock(rtGuard_);
|
||||
for (const auto & [taskid, fut] : runningTasks_) {
|
||||
if (fut->ready())
|
||||
ready.push_back(taskid);
|
||||
}
|
||||
|
||||
for (const auto & tid : ready) {
|
||||
nodes.emplace_back(runningTasks_.extract(tid));
|
||||
}
|
||||
}
|
||||
|
||||
// Insert the results
|
||||
{
|
||||
std::lock_guard<std::mutex> resultsLock(resultsGuard_);
|
||||
for (const auto & node : nodes) {
|
||||
auto json = attemptRecordToJSON(node.mapped()->get());
|
||||
results_[node.key()].set(json);
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
uint16_t Server::getPort() const
|
||||
@@ -153,10 +189,15 @@ namespace daggy::daggyr {
|
||||
|
||||
auto tid = std::make_pair(runID, taskName);
|
||||
auto fut = executor_.execute(runID, taskName, task);
|
||||
Future<std::string> strFut;
|
||||
|
||||
{
|
||||
std::lock_guard<std::mutex> lock(rtGuard_);
|
||||
runningTasks_.emplace(std::move(tid), std::move(fut));
|
||||
runningTasks_.emplace(tid, std::move(fut));
|
||||
}
|
||||
{
|
||||
std::lock_guard<std::mutex> lock(resultsGuard_);
|
||||
results_[tid];
|
||||
}
|
||||
|
||||
logger_.debug(requestID + ": Task successfully enqueued");
|
||||
@@ -175,42 +216,42 @@ namespace daggy::daggyr {
|
||||
auto requestID = std::to_string(runID) + "/" + taskName;
|
||||
|
||||
auto taskID = std::make_pair(runID, taskName);
|
||||
std::unordered_map<TaskID, daggy::executors::task::TaskFuture>::node_type
|
||||
std::unordered_map<TaskID, Future<std::string>>::node_type
|
||||
node;
|
||||
bool notFound = false;
|
||||
Pistache::Http::Code code = Pistache::Http::Code::Ok;
|
||||
{
|
||||
std::lock_guard<std::mutex> lock(rtGuard_);
|
||||
auto it = runningTasks_.find(taskID);
|
||||
if (it == runningTasks_.end()) {
|
||||
std::lock_guard<std::mutex> lock(resultsGuard_);
|
||||
auto it = results_.find(taskID);
|
||||
if (it == results_.end()) {
|
||||
logger_.warn(requestID + ": Polled about unknown task");
|
||||
notFound = true;
|
||||
code = Pistache::Http::Code::Not_Found;
|
||||
}
|
||||
else if (!it->second->ready()) {
|
||||
else if (!it->second.ready()) {
|
||||
logger_.debug(requestID + ": Polled but task not ready yet");
|
||||
notFound = true;
|
||||
code = Pistache::Http::Code::Precondition_Required;
|
||||
}
|
||||
else {
|
||||
logger_.debug(requestID + ": Polled and ready.");
|
||||
node = runningTasks_.extract(taskID);
|
||||
node = results_.extract(taskID);
|
||||
}
|
||||
}
|
||||
|
||||
if (notFound) {
|
||||
response.send(Pistache::Http::Code::Not_Found, "");
|
||||
if (code != Pistache::Http::Code::Ok) {
|
||||
response.send(code, "");
|
||||
return;
|
||||
}
|
||||
|
||||
auto prom = response.send(Pistache::Http::Code::Ok,
|
||||
attemptRecordToJSON(node.mapped()->get()));
|
||||
// If the promise fails, then reinsert the result for later polling
|
||||
std::string payload = node.mapped().get();
|
||||
auto prom = response.send(Pistache::Http::Code::Ok, payload);
|
||||
|
||||
if (prom.isRejected()) {
|
||||
logger_.warn(requestID +
|
||||
": Record sent to poller, but failed to complete transfer.");
|
||||
std::lock_guard<std::mutex> lock(rtGuard_);
|
||||
runningTasks_.insert(std::move(node));
|
||||
std::lock_guard<std::mutex> lock(resultsGuard_);
|
||||
results_.insert(std::move(node));
|
||||
}
|
||||
else {
|
||||
logger_.debug(requestID + ": Record send successfully");
|
||||
logger_.debug(requestID + ": Record send successfully. ");
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
Reference in New Issue
Block a user