Checkpointing work
This commit is contained in:
@@ -47,7 +47,8 @@ namespace daggy {
|
||||
|
||||
ssize_t nRunningTasks_;
|
||||
ssize_t nErroredTasks_;
|
||||
std::unordered_map<std::string, std::future<AttemptRecord>> runningTasks_;
|
||||
std::unordered_map<std::string, daggy::executors::task::TaskFuture>
|
||||
runningTasks_;
|
||||
std::unordered_map<std::string, size_t> taskAttemptCounts_;
|
||||
|
||||
std::mutex runGuard_;
|
||||
|
||||
@@ -9,6 +9,8 @@
|
||||
#include <variant>
|
||||
#include <vector>
|
||||
|
||||
#include "Future.hpp"
|
||||
|
||||
namespace daggy {
|
||||
// Commands and parameters
|
||||
using ConfigValue = std::variant<std::string, std::vector<std::string>>;
|
||||
@@ -72,6 +74,7 @@ namespace daggy {
|
||||
std::string outputLog; // stdout from command
|
||||
std::string errorLog; // stderr from command
|
||||
};
|
||||
|
||||
} // namespace daggy
|
||||
|
||||
BETTER_ENUMS_DECLARE_STD_HASH(daggy::RunState)
|
||||
|
||||
113
libdaggy/include/daggy/Future.hpp
Normal file
113
libdaggy/include/daggy/Future.hpp
Normal file
@@ -0,0 +1,113 @@
|
||||
#pragma once
|
||||
|
||||
#include <atomic>
|
||||
#include <chrono>
|
||||
#include <exception>
|
||||
#include <iostream>
|
||||
#include <optional>
|
||||
#include <thread>
|
||||
|
||||
namespace daggy {
|
||||
|
||||
enum class FutureState : uint8_t
|
||||
{
|
||||
NOT_READY,
|
||||
OK,
|
||||
ERROR,
|
||||
};
|
||||
|
||||
template <class T>
|
||||
class Future
|
||||
{
|
||||
public:
|
||||
Future()
|
||||
: state_{FutureState::NOT_READY}
|
||||
, val_(std::nullopt)
|
||||
{
|
||||
}
|
||||
|
||||
FutureState state()
|
||||
{
|
||||
return state_;
|
||||
}
|
||||
|
||||
void set(const T val)
|
||||
{
|
||||
if (val_) {
|
||||
std::cout << "Future already has a value!" << std::endl;
|
||||
throw std::runtime_error("Future already has a value");
|
||||
}
|
||||
val_.emplace(val);
|
||||
state_ = FutureState::OK;
|
||||
}
|
||||
|
||||
bool ready() const
|
||||
{
|
||||
return state_.load() != FutureState::NOT_READY;
|
||||
}
|
||||
|
||||
void setException(const std::exception &e)
|
||||
{
|
||||
exp_ = e;
|
||||
state_ = FutureState::ERROR;
|
||||
}
|
||||
|
||||
T get()
|
||||
{
|
||||
while (!ready()) {
|
||||
std::this_thread::sleep_for(std::chrono::microseconds(100));
|
||||
}
|
||||
if (state_ == FutureState::ERROR)
|
||||
throw exp_;
|
||||
return *val_;
|
||||
}
|
||||
|
||||
private:
|
||||
std::atomic<FutureState> state_;
|
||||
std::optional<T> val_;
|
||||
std::exception exp_;
|
||||
};
|
||||
|
||||
template <>
|
||||
struct Future<void>
|
||||
{
|
||||
public:
|
||||
Future()
|
||||
: state_{FutureState::NOT_READY}
|
||||
{
|
||||
}
|
||||
|
||||
FutureState state()
|
||||
{
|
||||
return state_;
|
||||
}
|
||||
|
||||
bool ready() const
|
||||
{
|
||||
return state_ != FutureState::NOT_READY;
|
||||
}
|
||||
|
||||
void set()
|
||||
{
|
||||
state_ = FutureState::OK;
|
||||
}
|
||||
|
||||
void setException(const std::exception &e)
|
||||
{
|
||||
exp_ = e;
|
||||
state_ = FutureState::ERROR;
|
||||
}
|
||||
|
||||
void get()
|
||||
{
|
||||
if (state_ == FutureState::NOT_READY)
|
||||
throw std::runtime_error("Value is not ready");
|
||||
if (state_ == FutureState::ERROR)
|
||||
throw exp_;
|
||||
}
|
||||
|
||||
private:
|
||||
std::atomic<FutureState> state_;
|
||||
std::exception exp_;
|
||||
};
|
||||
} // namespace daggy
|
||||
@@ -3,17 +3,17 @@
|
||||
#include <atomic>
|
||||
#include <condition_variable>
|
||||
#include <functional>
|
||||
#include <future>
|
||||
#include <iostream>
|
||||
#include <list>
|
||||
#include <memory>
|
||||
#include <queue>
|
||||
#include <thread>
|
||||
#include <vector>
|
||||
|
||||
#include "Future.hpp"
|
||||
|
||||
using namespace std::chrono_literals;
|
||||
|
||||
namespace daggy {
|
||||
|
||||
class ThreadPool
|
||||
{
|
||||
public:
|
||||
@@ -65,7 +65,7 @@ namespace daggy {
|
||||
|
||||
for (size_t i = 0; i < nWorkers; ++i)
|
||||
workers_.emplace_back([&] {
|
||||
std::packaged_task<void()> task;
|
||||
std::function<void()> task;
|
||||
while (true) {
|
||||
{
|
||||
std::unique_lock<std::mutex> lock(mtx_);
|
||||
@@ -88,15 +88,30 @@ namespace daggy {
|
||||
{
|
||||
if (drain_)
|
||||
throw std::runtime_error("Unable to add task to draining pool");
|
||||
|
||||
using return_type = std::invoke_result_t<F, Args...>;
|
||||
|
||||
std::packaged_task<return_type()> task(
|
||||
std::bind(std::forward<F>(f), std::forward<Args>(args)...));
|
||||
auto callable =
|
||||
std::bind(std::forward<F>(f), std::forward<Args>(args)...);
|
||||
auto res = std::make_shared<Future<return_type>>();
|
||||
|
||||
std::future<return_type> res = task.get_future();
|
||||
{
|
||||
std::lock_guard<std::mutex> guard(mtx_);
|
||||
tasks_.emplace(std::move(task));
|
||||
tasks_.emplace([res, task = std::move(callable)]() -> void {
|
||||
try {
|
||||
if constexpr ((std::is_same<return_type, void>::value)) {
|
||||
task();
|
||||
res->set();
|
||||
}
|
||||
else {
|
||||
return_type val = task();
|
||||
res->set(val);
|
||||
}
|
||||
}
|
||||
catch (std::exception &e) {
|
||||
res->setException(e);
|
||||
}
|
||||
});
|
||||
}
|
||||
cv_.notify_one();
|
||||
return res;
|
||||
@@ -117,7 +132,7 @@ namespace daggy {
|
||||
// need to keep track of threads, so we can join them
|
||||
std::vector<std::thread> workers_;
|
||||
// the task queue
|
||||
std::queue<std::packaged_task<void()>> tasks_;
|
||||
std::queue<std::function<void()>> tasks_;
|
||||
|
||||
// synchronization
|
||||
std::mutex mtx_;
|
||||
|
||||
@@ -45,9 +45,8 @@ namespace daggy::executors::task {
|
||||
const ConfigValues &job, const ConfigValues &expansionValues) override;
|
||||
|
||||
// Runs the task
|
||||
std::future<AttemptRecord> execute(DAGRunID runID,
|
||||
const std::string &taskName,
|
||||
const Task &task) override;
|
||||
TaskFuture execute(DAGRunID runID, const std::string &taskName,
|
||||
const Task &task) override;
|
||||
|
||||
bool stop(DAGRunID runID, const std::string &taskName) override;
|
||||
|
||||
@@ -60,7 +59,7 @@ namespace daggy::executors::task {
|
||||
|
||||
struct RunningTask
|
||||
{
|
||||
std::promise<AttemptRecord> prom;
|
||||
TaskFuture fut;
|
||||
DAGRunID runID;
|
||||
std::string taskName;
|
||||
std::string runnerURL;
|
||||
|
||||
@@ -25,9 +25,8 @@ namespace daggy::executors::task {
|
||||
const ConfigValues &job, const ConfigValues &expansionValues) override;
|
||||
|
||||
// Runs the task
|
||||
std::future<AttemptRecord> execute(DAGRunID runID,
|
||||
const std::string &taskName,
|
||||
const Task &task) override;
|
||||
TaskFuture execute(DAGRunID runID, const std::string &taskName,
|
||||
const Task &task) override;
|
||||
|
||||
bool stop(DAGRunID runID, const std::string &taskName) override;
|
||||
|
||||
|
||||
@@ -16,12 +16,11 @@ namespace daggy::executors::task {
|
||||
const ConfigValues &job, const ConfigValues &expansionValues) override;
|
||||
|
||||
// Runs the task
|
||||
std::future<AttemptRecord> execute(DAGRunID runID,
|
||||
const std::string &taskName,
|
||||
const Task &task) override;
|
||||
TaskFuture execute(DAGRunID runID, const std::string &taskName,
|
||||
const Task &task) override;
|
||||
|
||||
bool stop(DAGRunID runID, const std::string &taskName) override;
|
||||
|
||||
std::string description() const;
|
||||
std::string description() const override;
|
||||
};
|
||||
} // namespace daggy::executors::task
|
||||
|
||||
@@ -19,9 +19,8 @@ namespace daggy::executors::task {
|
||||
const ConfigValues &job, const ConfigValues &expansionValues) override;
|
||||
|
||||
// Runs the task
|
||||
std::future<AttemptRecord> execute(DAGRunID runID,
|
||||
const std::string &taskName,
|
||||
const Task &task) override;
|
||||
TaskFuture execute(DAGRunID runID, const std::string &taskName,
|
||||
const Task &task) override;
|
||||
|
||||
bool stop(DAGRunID runID, const std::string &taskName) override;
|
||||
|
||||
@@ -30,7 +29,7 @@ namespace daggy::executors::task {
|
||||
private:
|
||||
struct Job
|
||||
{
|
||||
std::promise<AttemptRecord> prom;
|
||||
TaskFuture fut;
|
||||
std::string stdoutFile;
|
||||
std::string stderrFile;
|
||||
DAGRunID runID;
|
||||
|
||||
@@ -13,6 +13,8 @@
|
||||
*/
|
||||
|
||||
namespace daggy::executors::task {
|
||||
using TaskFuture = std::shared_ptr<Future<AttemptRecord>>;
|
||||
|
||||
class TaskExecutor
|
||||
{
|
||||
public:
|
||||
@@ -27,9 +29,8 @@ namespace daggy::executors::task {
|
||||
const ConfigValues &job, const ConfigValues &expansionValues) = 0;
|
||||
|
||||
// Blocking execution of a task
|
||||
virtual std::future<AttemptRecord> execute(DAGRunID runID,
|
||||
const std::string &taskName,
|
||||
const Task &task) = 0;
|
||||
virtual TaskFuture execute(DAGRunID runID, const std::string &taskName,
|
||||
const Task &task) = 0;
|
||||
|
||||
// Kill a currently executing task. This will resolve the future.
|
||||
virtual bool stop(DAGRunID runID, const std::string &taskName) = 0;
|
||||
|
||||
Reference in New Issue
Block a user