Adding fix for race condition in task resource management
This commit is contained in:
@@ -268,34 +268,39 @@ void DaggyRunnerTaskExecutor::monitor()
|
||||
{
|
||||
std::lock_guard<std::mutex> lock(runnersGuard_);
|
||||
for (auto &[runnerURL, caps] : runners_) {
|
||||
rj::Document doc;
|
||||
try {
|
||||
const auto &[code, json] = JSON_HTTP_REQUEST(runnerURL + "/v1/poll");
|
||||
auto [code, json] = JSON_HTTP_REQUEST(runnerURL + "/v1/poll");
|
||||
if (code != HTTPCode::Ok)
|
||||
continue;
|
||||
doc.Swap(json);
|
||||
}
|
||||
catch (std::exception &e) {
|
||||
std::cout << "Curl failed for runner " << runnerURL << ": "
|
||||
<< e.what() << std::endl;
|
||||
}
|
||||
|
||||
const auto tasks = json.GetArray();
|
||||
for (size_t idx = 0; idx < tasks.Size(); ++idx) {
|
||||
const auto &task = tasks[idx];
|
||||
if (task["state"] == "PENDING") {
|
||||
resolvedJobs.emplace(std::make_pair(task["runID"].GetInt64(),
|
||||
task["taskName"].GetString()),
|
||||
std::nullopt);
|
||||
}
|
||||
else {
|
||||
auto tid = std::make_pair(task["runID"].GetInt64(),
|
||||
task["taskName"].GetString());
|
||||
const auto tasks = doc.GetArray();
|
||||
for (size_t idx = 0; idx < tasks.Size(); ++idx) {
|
||||
const auto &task = tasks[idx];
|
||||
if (task["state"] == "PENDING") {
|
||||
resolvedJobs.emplace(std::make_pair(task["runID"].GetInt64(),
|
||||
task["taskName"].GetString()),
|
||||
std::nullopt);
|
||||
}
|
||||
else {
|
||||
auto tid = std::make_pair(task["runID"].GetInt64(),
|
||||
task["taskName"].GetString());
|
||||
auto it = taskResources.find(tid);
|
||||
if (it != taskResources.end()) {
|
||||
const auto &res = taskResources.at(tid);
|
||||
caps.current.cores += res.cores;
|
||||
caps.current.memoryMB += res.memoryMB;
|
||||
|
||||
resolvedJobs.emplace(tid, attemptRecordFromJSON(task["attempt"]));
|
||||
}
|
||||
|
||||
resolvedJobs.emplace(tid, attemptRecordFromJSON(task["attempt"]));
|
||||
}
|
||||
}
|
||||
catch (std::exception &e) {
|
||||
std::cout << "Curl timeout failed for runner " << runnerURL << ": "
|
||||
<< e.what() << std::endl;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
Reference in New Issue
Block a user