Checkpointing work
This commit is contained in:
@@ -87,8 +87,7 @@ namespace daggy::executors::task {
|
||||
// Resolve the remaining futures
|
||||
std::lock_guard<std::mutex> lock(promiseGuard_);
|
||||
for (auto &[jobID, job] : runningJobs_) {
|
||||
job.prom.set_value(
|
||||
AttemptRecord{.rc = -1, .executorLog = "executor killed"});
|
||||
job.fut->set(AttemptRecord{.rc = -1, .executorLog = "executor killed"});
|
||||
}
|
||||
runningJobs_.clear();
|
||||
}
|
||||
@@ -153,8 +152,9 @@ namespace daggy::executors::task {
|
||||
return newValues;
|
||||
}
|
||||
|
||||
std::future<AttemptRecord> SlurmTaskExecutor::execute(
|
||||
DAGRunID runID, const std::string &taskName, const Task &task)
|
||||
TaskFuture SlurmTaskExecutor::execute(DAGRunID runID,
|
||||
const std::string &taskName,
|
||||
const Task &task)
|
||||
{
|
||||
std::stringstream executorLog;
|
||||
|
||||
@@ -247,12 +247,12 @@ namespace daggy::executors::task {
|
||||
slurm_free_submit_response_response_msg(resp_msg);
|
||||
|
||||
std::lock_guard<std::mutex> lock(promiseGuard_);
|
||||
Job newJob{.prom{},
|
||||
Job newJob{.fut = std::make_shared<Future<AttemptRecord>>(),
|
||||
.stdoutFile = stdoutFile,
|
||||
.stderrFile = stderrFile,
|
||||
.runID = runID,
|
||||
.taskName = taskName};
|
||||
auto fut = newJob.prom.get_future();
|
||||
auto fut = newJob.fut;
|
||||
runningJobs_.emplace(jobID, std::move(newJob));
|
||||
|
||||
return fut;
|
||||
@@ -348,7 +348,7 @@ namespace daggy::executors::task {
|
||||
readAndClean(job.stdoutFile, record.outputLog);
|
||||
readAndClean(job.stderrFile, record.errorLog);
|
||||
|
||||
job.prom.set_value(std::move(record));
|
||||
job.fut->set(std::move(record));
|
||||
resolvedJobs.insert(jobID);
|
||||
}
|
||||
|
||||
|
||||
Reference in New Issue
Block a user