Large re-organization to split daggyd away from the core libdaggy.

This paves the way for implementing daggys and other utilities.

Squashed commit of the following:

commit 1f77239ab3c9e44d190eef94531a39501c8c4dfe
Author: Ian Roddis <gitlab@ie2r.com>
Date:   Mon Oct 18 16:25:02 2021 -0300

    Adding README, stdout support for daggyd logging

commit c2c237224e84a3be68aaa597ce98af1365e74a13
Author: Ian Roddis <gitlab@ie2r.com>
Date:   Mon Oct 18 16:10:29 2021 -0300

    removing old daggyd

commit cfea2baf61ca10c535801c5a391d2d525a1a2d04
Author: Ian Roddis <gitlab@ie2r.com>
Date:   Mon Oct 18 16:10:09 2021 -0300

    Moving tests into their sub-project folders

commit e41ca42069bea1db16dd76b6684a3f692fef6b15
Author: Ian Roddis <gitlab@ie2r.com>
Date:   Mon Oct 18 15:57:40 2021 -0300

    Splitting out daggyd from libdaggy

commit be97b146c1d2446f5c03cb78707e921f18c60bd8
Author: Ian Roddis <gitlab@ie2r.com>
Date:   Mon Oct 18 15:56:55 2021 -0300

    Splitting out daggyd from libdaggy

commit cb61e140e9d6d8832d61fb7037fd4c0ff6edad00
Author: Ian Roddis <gitlab@ie2r.com>
Date:   Mon Oct 18 15:49:47 2021 -0300

    moving daggy to libdaggy
This commit is contained in:
Ian Roddis
2021-10-18 16:28:40 -03:00
parent 612bc8af8a
commit 470a6f2bb7
59 changed files with 586 additions and 52 deletions

213
libdaggy/src/DAGRunner.cpp Normal file
View File

@@ -0,0 +1,213 @@
#include <chrono>
#include <daggy/DAGRunner.hpp>
#include <mutex>
#include <stdexcept>
namespace daggy {
DAGRunner::DAGRunner(DAGRunID runID, executors::task::TaskExecutor &executor,
loggers::dag_run::DAGRunLogger &logger, TaskDAG dag,
const TaskParameters &taskParams)
: runID_(runID)
, executor_(executor)
, logger_(logger)
, dag_(dag)
, taskParams_(taskParams)
, running_(true)
, kill_(true)
, nRunningTasks_(0)
, nErroredTasks_(0)
{
}
DAGRunner::~DAGRunner()
{
std::lock_guard<std::mutex> lock(runGuard_);
}
TaskDAG DAGRunner::run()
{
kill_ = false;
running_ = true;
logger_.updateDAGRunState(runID_, RunState::RUNNING);
bool allVisited;
{
std::lock_guard<std::mutex> lock(runGuard_);
allVisited = dag_.allVisited();
}
while (!allVisited) {
{
std::lock_guard<std::mutex> runLock(runGuard_);
if (!running_ and kill_) {
killRunning();
}
collectFinished();
queuePending();
if (!running_ and (nRunningTasks_ - nErroredTasks_ <= 0)) {
logger_.updateDAGRunState(runID_, RunState::KILLED);
break;
}
if (nRunningTasks_ > 0 and nErroredTasks_ == nRunningTasks_) {
logger_.updateDAGRunState(runID_, RunState::ERRORED);
break;
}
}
std::this_thread::sleep_for(250ms);
{
std::lock_guard<std::mutex> lock(runGuard_);
allVisited = dag_.allVisited();
}
}
if (dag_.allVisited()) {
logger_.updateDAGRunState(runID_, RunState::COMPLETED);
}
running_ = false;
return dag_;
}
void DAGRunner::resetRunning()
{
if (running_)
throw std::runtime_error("Unable to reset while DAG is running.");
std::lock_guard<std::mutex> lock(runGuard_);
nRunningTasks_ = 0;
nErroredTasks_ = 0;
runningTasks_.clear();
taskAttemptCounts_.clear();
dag_.resetRunning();
}
void DAGRunner::killRunning()
{
for (const auto &[taskName, _] : runningTasks_) {
executor_.stop(runID_, taskName);
}
}
void DAGRunner::queuePending()
{
if (!running_)
return;
// Check for any completed tasks
// Add all remaining tasks in a task queue to avoid dominating the thread
// pool
auto t = dag_.visitNext();
while (t.has_value()) {
// Schedule the task to run
auto &taskName = t.value().first;
auto &task = t.value().second;
taskAttemptCounts_[taskName] = 1;
logger_.updateTaskState(runID_, taskName, RunState::RUNNING);
runningTasks_.emplace(taskName,
executor_.execute(runID_, taskName, task));
++nRunningTasks_;
auto nextTask = dag_.visitNext();
if (not nextTask.has_value())
break;
t.emplace(nextTask.value());
}
}
void DAGRunner::collectFinished()
{
for (auto &[taskName, fut] : runningTasks_) {
if (fut.valid() and fut.wait_for(1ms) == std::future_status::ready) {
auto attempt = fut.get();
logger_.logTaskAttempt(runID_, taskName, attempt);
// Not a reference, since adding tasks will invalidate references
auto vert = dag_.getVertex(taskName);
auto &task = vert.data;
if (attempt.rc == 0) {
logger_.updateTaskState(runID_, taskName, RunState::COMPLETED);
if (task.isGenerator) {
// Parse the output and update the DAGs
try {
auto parsedTasks =
tasksFromJSON(attempt.outputLog, taskParams_.jobDefaults);
auto newTasks =
expandTaskSet(parsedTasks, executor_, taskParams_.variables);
updateDAGFromTasks(dag_, newTasks);
// Add in dependencies from current task to new tasks
for (const auto &[ntName, ntTask] : newTasks) {
logger_.addTask(runID_, ntName, ntTask);
task.children.insert(ntName);
}
// Efficiently add new edges from generator task
// to children
std::unordered_set<std::string> baseNames;
for (const auto &[k, v] : parsedTasks) {
baseNames.insert(v.definedName);
}
dag_.addEdgeIf(taskName, [&](const auto &v) {
return baseNames.count(v.data.definedName) > 0;
});
logger_.updateTask(runID_, taskName, task);
}
catch (std::exception &e) {
logger_.logTaskAttempt(
runID_, taskName,
AttemptRecord{
.executorLog =
std::string{"Failed to parse JSON output: "} +
e.what()});
logger_.updateTaskState(runID_, taskName, RunState::ERRORED);
++nErroredTasks_;
}
}
dag_.completeVisit(taskName);
--nRunningTasks_;
}
else {
// RC isn't 0
if (taskAttemptCounts_[taskName] <= task.maxRetries) {
logger_.updateTaskState(runID_, taskName, RunState::RETRY);
runningTasks_[taskName] = executor_.execute(runID_, taskName, task);
++taskAttemptCounts_[taskName];
}
else {
if (logger_.getTaskState(runID_, taskName) == +RunState::RUNNING or
logger_.getTaskState(runID_, taskName) == +RunState::RETRY) {
logger_.updateTaskState(runID_, taskName, RunState::ERRORED);
++nErroredTasks_;
}
else {
// Task was killed
--nRunningTasks_;
}
}
}
}
}
}
void DAGRunner::stop(bool kill, bool blocking)
{
kill_ = kill;
running_ = false;
if (blocking) {
while (true) {
{
std::lock_guard<std::mutex> lock(runGuard_);
if (nRunningTasks_ - nErroredTasks_ == 0)
break;
}
std::this_thread::sleep_for(250ms);
}
}
}
} // namespace daggy