Checkpointing work

This commit is contained in:
Ian Roddis
2022-01-12 12:50:46 -04:00
parent 04e95cfcf3
commit 9a5a247f15
21 changed files with 320 additions and 160 deletions

View File

@@ -9,8 +9,8 @@
#include <daggy/executors/task/DaggyRunnerTaskExecutor.hpp>
#include <daggy/executors/task/ForkingTaskExecutor.hpp>
#include <daggy/loggers/dag_run/DAGRunLogger.hpp>
#include <deque>
#include <filesystem>
#include <list>
#define DAGGY_REST_HANDLER(func) \
void func(const Pistache::Rest::Request &request, \
@@ -58,6 +58,8 @@ namespace daggy::daggyr {
executors::task::ForkingTaskExecutor executor_;
using TaskID = std::pair<DAGRunID, std::string>;
struct TaskRecord
{
RunState state;
@@ -68,17 +70,21 @@ namespace daggy::daggyr {
Capacity maxCapacity_;
Capacity curCapacity_;
std::mutex pendingGuard_;
struct PendingJob
{
DAGRunID runID;
std::string taskName;
std::future<AttemptRecord> fut;
daggy::executors::task::TaskFuture fut;
Capacity resourcesUsed;
bool resolved;
};
std::list<PendingJob> pending_;
void monitor();
std::atomic<bool> running_;
std::thread monitorWorker_;
std::mutex pendingGuard_;
std::unordered_map<TaskID, PendingJob> pending_;
std::mutex resolvedGuard_;
std::deque<std::string> resolved_;
};
} // namespace daggy::daggyr