Large re-organization to split daggyd away from the core libdaggy.

This paves the way for implementing daggys and other utilities.

Squashed commit of the following:

commit 1f77239ab3c9e44d190eef94531a39501c8c4dfe
Author: Ian Roddis <gitlab@ie2r.com>
Date:   Mon Oct 18 16:25:02 2021 -0300

    Adding README, stdout support for daggyd logging

commit c2c237224e84a3be68aaa597ce98af1365e74a13
Author: Ian Roddis <gitlab@ie2r.com>
Date:   Mon Oct 18 16:10:29 2021 -0300

    removing old daggyd

commit cfea2baf61ca10c535801c5a391d2d525a1a2d04
Author: Ian Roddis <gitlab@ie2r.com>
Date:   Mon Oct 18 16:10:09 2021 -0300

    Moving tests into their sub-project folders

commit e41ca42069bea1db16dd76b6684a3f692fef6b15
Author: Ian Roddis <gitlab@ie2r.com>
Date:   Mon Oct 18 15:57:40 2021 -0300

    Splitting out daggyd from libdaggy

commit be97b146c1d2446f5c03cb78707e921f18c60bd8
Author: Ian Roddis <gitlab@ie2r.com>
Date:   Mon Oct 18 15:56:55 2021 -0300

    Splitting out daggyd from libdaggy

commit cb61e140e9d6d8832d61fb7037fd4c0ff6edad00
Author: Ian Roddis <gitlab@ie2r.com>
Date:   Mon Oct 18 15:49:47 2021 -0300

    moving daggy to libdaggy
This commit is contained in:
Ian Roddis
2021-10-18 16:28:40 -03:00
parent 612bc8af8a
commit 470a6f2bb7
59 changed files with 586 additions and 52 deletions

View File

@@ -0,0 +1,82 @@
#pragma once
#include <deque>
#include <functional>
#include <iostream>
#include <iterator>
#include <optional>
#include <queue>
#include <sstream>
#include <stdexcept>
#include <unordered_map>
#include <unordered_set>
#include "Defines.hpp"
/*
The DAG structure in daggy is just to ensure that tasks are run
in the correct dependent order.
*/
namespace daggy {
template <typename T>
struct Vertex
{
RunState state = RunState::QUEUED;
uint32_t depCount = 0;
T data;
std::unordered_set<size_t> children;
};
template <typename K, typename V>
class DAG
{
public:
// Vertices
void addVertex(K id, V data);
std::unordered_set<K> getVertices() const;
// Edges
void addEdge(const K &from, const K &to);
void addEdgeIf(const K &src,
std::function<bool(const Vertex<V> &v)> predicate);
[[nodiscard]] bool isValid() const;
bool hasVertex(const K &id);
// Attributes
[[nodiscard]] size_t size() const;
[[nodiscard]] bool empty() const;
// Reset the DAG to completely unvisited
void reset();
// Reset any vertex with RUNNING state to QUEUED
void resetRunning();
void setVertexState(const K &id, RunState state);
void forEach(std::function<void(const Vertex<V> &)> fun) const;
[[nodiscard]] bool allVisited() const;
std::optional<std::pair<K, V>> visitNext();
// WARNING: reference potentially invalidated on insertions.
Vertex<V> &getVertex(const K &id);
void completeVisit(const K &id);
private:
std::unordered_map<K, size_t> keyMap_;
std::vector<K> vertexName_;
std::vector<Vertex<V>> vertices_;
};
} // namespace daggy
#include "DAG.impl.hxx"

View File

@@ -0,0 +1,176 @@
namespace daggy {
template <typename K, typename V>
size_t DAG<K, V>::size() const
{
return vertices_.size();
}
template <typename K, typename V>
bool DAG<K, V>::empty() const
{
return vertices_.empty();
}
template <typename K, typename V>
bool DAG<K, V>::hasVertex(const K &id)
{
return keyMap_.count(id) != 0;
}
template <typename K, typename V>
Vertex<V> &DAG<K, V>::getVertex(const K &id)
{
return vertices_[keyMap_.at(id)];
}
template <typename K, typename V>
std::unordered_set<K> DAG<K, V>::getVertices() const
{
std::unordered_set<K> keys;
for (const auto it : keyMap_) {
keys.insert(it.first);
}
return keys;
}
template <typename K, typename V>
void DAG<K, V>::addVertex(K id, V data)
{
if (keyMap_.count(id) != 0) {
std::stringstream ss;
ss << "A vertex with ID " << id << " already exists in the DAG";
throw std::runtime_error(ss.str());
}
size_t idx = vertices_.size();
vertexName_.emplace_back(id);
vertices_.emplace_back(
Vertex<V>{.state = RunState::QUEUED, .depCount = 0, .data = data});
keyMap_.emplace(id, idx);
}
template <typename K, typename V>
void DAG<K, V>::addEdge(const K &from, const K &to)
{
size_t src = keyMap_.at(from);
size_t dst = keyMap_.at(to);
vertices_[src].children.insert(dst);
vertices_[dst].depCount++;
}
template <typename K, typename V>
void DAG<K, V>::addEdgeIf(const K &src,
std::function<bool(const Vertex<V> &v)> predicate)
{
size_t parentIdx = keyMap_.at(src);
auto &parent = vertices_[parentIdx];
for (size_t i = 0; i < vertices_.size(); ++i) {
if (!predicate(vertices_[i]))
continue;
if (i == parentIdx)
continue;
parent.children.insert(i);
vertices_[i].depCount++;
}
}
template <typename K, typename V>
bool DAG<K, V>::isValid() const
{
std::vector<size_t> depCounts(vertices_.size(), 0);
std::queue<size_t> ready;
size_t processed = 0;
for (size_t i = 0; i < vertices_.size(); ++i) {
depCounts[i] = vertices_[i].depCount;
if (depCounts[i] == 0)
ready.push(i);
}
while (!ready.empty()) {
const auto &k = ready.front();
for (const auto &child : vertices_[k].children) {
auto dc = --depCounts[child];
if (dc == 0)
ready.push(child);
}
processed++;
ready.pop();
}
return processed == vertices_.size();
}
template <typename K, typename V>
void DAG<K, V>::reset()
{
// Reset the state of all vertices
for (auto &v : vertices_) {
v.state = RunState::QUEUED;
v.depCount = 0;
}
// Calculate the upstream count
for (auto &v : vertices_) {
for (auto c : v.children) {
vertices_[c].depCount++;
}
}
}
template <typename K, typename V>
void DAG<K, V>::resetRunning()
{
for (auto &v : vertices_) {
if (v.state != +RunState::RUNNING)
continue;
v.state = RunState::QUEUED;
}
}
template <typename K, typename V>
void DAG<K, V>::setVertexState(const K &id, RunState state)
{
vertices_[keyMap_.at(id)].state = state;
}
template <typename K, typename V>
bool DAG<K, V>::allVisited() const
{
return not std::any_of(
vertices_.begin(), vertices_.end(),
[](const auto &v) { return v.state != +RunState::COMPLETED; });
}
template <typename K, typename V>
std::optional<std::pair<K, V>> DAG<K, V>::visitNext()
{
for (size_t i = 0; i < vertices_.size(); ++i) {
auto &v = vertices_[i];
if (v.state != +RunState::QUEUED)
continue;
if (v.depCount != 0)
continue;
v.state = RunState::RUNNING;
return std::make_pair(vertexName_[i], v.data);
}
return {};
}
template <typename K, typename V>
void DAG<K, V>::completeVisit(const K &id)
{
auto &v = vertices_[keyMap_.at(id)];
v.state = RunState::COMPLETED;
for (auto c : v.children) {
--vertices_[c].depCount;
}
}
template <typename K, typename V>
void DAG<K, V>::forEach(std::function<void(const Vertex<V> &)> fun) const
{
for (auto it = vertices_.begin(); it != vertices_.end(); ++it) {
fun(*it);
}
}
} // namespace daggy

View File

@@ -0,0 +1,55 @@
#pragma once
#include <rapidjson/document.h>
#include <future>
#include <iomanip>
#include <string>
#include <unordered_map>
#include <variant>
#include <vector>
#include "DAG.hpp"
#include "Defines.hpp"
#include "Serialization.hpp"
#include "Utilities.hpp"
#include "daggy/executors/task/TaskExecutor.hpp"
#include "daggy/loggers/dag_run/DAGRunLogger.hpp"
using namespace std::chrono_literals;
namespace daggy {
class DAGRunner
{
public:
DAGRunner(DAGRunID runID, executors::task::TaskExecutor &executor,
loggers::dag_run::DAGRunLogger &logger, TaskDAG dag,
const TaskParameters &taskParams);
~DAGRunner();
TaskDAG run();
void resetRunning();
void stop(bool kill = false, bool blocking = false);
private:
void collectFinished();
void queuePending();
void killRunning();
DAGRunID runID_;
executors::task::TaskExecutor &executor_;
loggers::dag_run::DAGRunLogger &logger_;
TaskDAG dag_;
const TaskParameters &taskParams_;
std::atomic<bool> running_;
std::atomic<bool> kill_;
ssize_t nRunningTasks_;
ssize_t nErroredTasks_;
std::unordered_map<std::string, std::future<AttemptRecord>> runningTasks_;
std::unordered_map<std::string, size_t> taskAttemptCounts_;
std::mutex runGuard_;
};
} // namespace daggy

View File

@@ -0,0 +1,77 @@
#pragma once
#include <enum.h>
#include <chrono>
#include <string>
#include <unordered_map>
#include <unordered_set>
#include <variant>
#include <vector>
namespace daggy {
// Commands and parameters
using ConfigValue = std::variant<std::string, std::vector<std::string>>;
using ConfigValues = std::unordered_map<std::string, ConfigValue>;
using Command = std::vector<std::string>;
// Time
using Clock = std::chrono::high_resolution_clock;
using TimePoint = std::chrono::time_point<Clock>;
// DAG Runs
using DAGRunID = size_t;
BETTER_ENUM(RunState, uint32_t, QUEUED = 1, RUNNING, RETRY, ERRORED, KILLED,
PAUSED, COMPLETED);
struct Task
{
std::string definedName;
bool isGenerator; // True if the output of this task is a JSON set of tasks
// to complete
uint32_t maxRetries;
uint32_t retryIntervalSeconds; // Time to wait between retries
ConfigValues job; // It's up to the individual inspectors to convert values
// from strings // array of strings
std::unordered_set<std::string> children;
std::unordered_set<std::string> parents;
bool operator==(const Task &other) const
{
return (definedName == other.definedName) and
(maxRetries == other.maxRetries) and
(retryIntervalSeconds == other.retryIntervalSeconds) and
(job == other.job) and (children == other.children) and
(parents == other.parents) and (isGenerator == other.isGenerator);
}
};
using TaskSet = std::unordered_map<std::string, Task>;
// All the components required to define and run a DAG
struct TaskParameters
{
ConfigValues variables;
ConfigValues jobDefaults;
};
struct DAGSpec
{
std::string tag;
TaskSet tasks;
TaskParameters taskConfig;
};
struct AttemptRecord
{
TimePoint startTime;
TimePoint stopTime;
int rc; // RC from the task
std::string executorLog; // Logs from the dag_executor
std::string outputLog; // stdout from command
std::string errorLog; // stderr from command
};
} // namespace daggy
BETTER_ENUMS_DECLARE_STD_HASH(daggy::RunState)

View File

@@ -0,0 +1,68 @@
#pragma once
#include <rapidjson/document.h>
#include <string>
#include <unordered_map>
#include <variant>
#include <vector>
#include "Defines.hpp"
#include "Utilities.hpp"
#include "loggers/dag_run/Defines.hpp"
namespace rj = rapidjson;
namespace daggy {
void checkRJParse(const rj::ParseResult &result,
const std::string &prefix = "");
std::string dumpJSON(const rj::Value &doc);
// Parameters
ConfigValues configFromJSON(const std::string &jsonSpec);
ConfigValues configFromJSON(const rj::Value &spec);
std::string configToJSON(const ConfigValues &config);
// Tasks
Task taskFromJSON(const std::string &name, const rj::Value &spec,
const ConfigValues &jobDefaults = {});
Task taskFromJSON(const std::string &name, const std::string &spec,
const ConfigValues &jobDefaults = {});
TaskSet tasksFromJSON(const std::string &jsonSpec,
const ConfigValues &jobDefaults = {});
TaskSet tasksFromJSON(const rj::Value &spec,
const ConfigValues &jobDefaults = {});
std::string taskToJSON(const Task &task);
std::string tasksToJSON(const TaskSet &tasks);
// Full specs
DAGSpec dagFromJSON(const rj::Value &spec);
DAGSpec dagFromJSON(const std::string &jsonSpec);
// Attempt Records
std::string attemptRecordToJSON(const AttemptRecord &attemptRecord);
AttemptRecord attemptRecordFromJSON(const std::string &json);
AttemptRecord attemptRecordFromJSON(const rj::Value &spec);
// default serialization
std::ostream &operator<<(std::ostream &os, const Task &task);
std::string timePointToString(const TimePoint &tp);
TimePoint stringToTimePoint(const std::string &timeStr);
/*
DAGRun Loggers
*/
namespace logger = loggers::dag_run;
std::string stateUpdateRecordToJSON(const logger::StateUpdateRecord &rec);
logger::StateUpdateRecord stateUpdateRecordFromJSON(const rj::Value &json);
logger::StateUpdateRecord stateUpdateRecordFromJSON(const std::string &json);
} // namespace daggy

View File

@@ -0,0 +1,186 @@
#pragma once
#include <atomic>
#include <condition_variable>
#include <functional>
#include <future>
#include <list>
#include <memory>
#include <queue>
#include <thread>
#include <vector>
using namespace std::chrono_literals;
namespace daggy {
/*
A Task Queue is a collection of async tasks to be executed by the
thread pool. Using individual task queues allows for a rough QoS
when a single thread may be submitting batches of requests --
one producer won't starve out another, but all tasks will be run
as quickly as possible.
*/
class TaskQueue
{
public:
template <class F, class... Args>
decltype(auto) addTask(F &&f, Args &&...args)
{
// using return_type = std::invoke_result<F, Args...>::type;
using return_type = std::invoke_result_t<F, Args...>;
std::packaged_task<return_type()> task(
std::bind(std::forward<F>(f), std::forward<Args>(args)...));
std::future<return_type> res = task.get_future();
{
std::lock_guard<std::mutex> guard(mtx_);
tasks_.emplace(std::move(task));
}
return res;
}
std::packaged_task<void()> pop()
{
std::lock_guard<std::mutex> guard(mtx_);
auto task = std::move(tasks_.front());
tasks_.pop();
return task;
}
size_t size()
{
std::lock_guard<std::mutex> guard(mtx_);
return tasks_.size();
}
bool empty()
{
std::lock_guard<std::mutex> guard(mtx_);
return tasks_.empty();
}
private:
std::queue<std::packaged_task<void()>> tasks_;
std::mutex mtx_;
};
class ThreadPool
{
public:
explicit ThreadPool(size_t nWorkers)
: tqit_(taskQueues_.begin())
, stop_(false)
, drain_(false)
{
resize(nWorkers);
}
~ThreadPool()
{
shutdown();
}
void shutdown()
{
stop_ = true;
cv_.notify_all();
for (std::thread &worker : workers_) {
if (worker.joinable())
worker.join();
}
}
void drain()
{
drain_ = true;
while (true) {
{
std::lock_guard<std::mutex> guard(mtx_);
if (taskQueues_.empty())
break;
}
std::this_thread::sleep_for(250ms);
}
}
void restart()
{
drain_ = false;
}
void resize(size_t nWorkers)
{
shutdown();
workers_.clear();
stop_ = false;
for (size_t i = 0; i < nWorkers; ++i)
workers_.emplace_back([&] {
while (true) {
std::packaged_task<void()> task;
{
std::unique_lock<std::mutex> lock(mtx_);
cv_.wait(lock, [&] { return stop_ || !taskQueues_.empty(); });
if (taskQueues_.empty()) {
if (stop_)
return;
continue;
}
if (tqit_ == taskQueues_.end())
tqit_ = taskQueues_.begin();
task = (*tqit_)->pop();
if ((*tqit_)->empty()) {
tqit_ = taskQueues_.erase(tqit_);
}
else {
tqit_++;
}
}
task();
}
});
};
template <class F, class... Args>
decltype(auto) addTask(F &&f, Args &&...args)
{
if (drain_)
throw std::runtime_error("Unable to add task to draining pool");
auto tq = std::make_shared<TaskQueue>();
auto fut = tq->addTask(f, args...);
{
std::lock_guard<std::mutex> guard(mtx_);
taskQueues_.push_back(tq);
}
cv_.notify_one();
return fut;
}
void addTasks(std::shared_ptr<TaskQueue> &tq)
{
if (drain_)
throw std::runtime_error("Unable to add task to draining pool");
std::lock_guard<std::mutex> guard(mtx_);
taskQueues_.push_back(tq);
cv_.notify_one();
}
private:
// need to keep track of threads, so we can join them
std::vector<std::thread> workers_;
// the task queue
std::list<std::shared_ptr<TaskQueue>> taskQueues_;
std::list<std::shared_ptr<TaskQueue>>::iterator tqit_;
// synchronization
std::mutex mtx_;
std::condition_variable cv_;
std::atomic<bool> stop_;
std::atomic<bool> drain_;
};
} // namespace daggy

View File

@@ -0,0 +1,37 @@
#pragma once
#include <rapidjson/document.h>
#include <string>
#include <unordered_map>
#include <variant>
#include <vector>
#include "DAG.hpp"
#include "Defines.hpp"
#include "daggy/executors/task/TaskExecutor.hpp"
#include "daggy/loggers/dag_run/DAGRunLogger.hpp"
namespace daggy {
using TaskDAG = DAG<std::string, Task>;
std::string globalSub(std::string string, const std::string &pattern,
const std::string &replacement);
std::vector<Command> interpolateValues(const std::vector<std::string> &raw,
const ConfigValues &values);
TaskSet expandTaskSet(const TaskSet &tasks,
executors::task::TaskExecutor &executor,
const ConfigValues &interpolatedValues = {});
TaskDAG buildDAGFromTasks(
const TaskSet &tasks,
const std::unordered_map<std::string,
std::vector<loggers::dag_run::StateUpdateRecord>>
&updates = {});
void updateDAGFromTasks(TaskDAG &dag, const TaskSet &tasks);
std::ostream &operator<<(std::ostream &os, const TimePoint &tp);
} // namespace daggy

View File

@@ -0,0 +1,36 @@
#pragma once
#include <daggy/ThreadPool.hpp>
#include "TaskExecutor.hpp"
namespace daggy::executors::task {
class ForkingTaskExecutor : public TaskExecutor
{
public:
using Command = std::vector<std::string>;
explicit ForkingTaskExecutor(size_t nThreads);
~ForkingTaskExecutor() override;
// Validates the job to ensure that all required values are set and are of
// the right type,
bool validateTaskParameters(const ConfigValues &job) override;
std::vector<ConfigValues> expandTaskParameters(
const ConfigValues &job, const ConfigValues &expansionValues) override;
// Runs the task
std::future<AttemptRecord> execute(DAGRunID runID,
const std::string &taskName,
const Task &task) override;
bool stop(DAGRunID runID, const std::string &taskName) override;
private:
ThreadPool tp_;
std::mutex taskControlsGuard_;
AttemptRecord runTask(const Task &task, std::atomic<bool> &running);
std::unordered_map<std::string, std::atomic<bool>> taskControls_;
};
} // namespace daggy::executors::task

View File

@@ -0,0 +1,25 @@
#pragma once
#include "TaskExecutor.hpp"
namespace daggy::executors::task {
class NoopTaskExecutor : public TaskExecutor
{
public:
using Command = std::vector<std::string>;
// Validates the job to ensure that all required values are set and are of
// the right type,
bool validateTaskParameters(const ConfigValues &job) override;
std::vector<ConfigValues> expandTaskParameters(
const ConfigValues &job, const ConfigValues &expansionValues) override;
// Runs the task
std::future<AttemptRecord> execute(DAGRunID runID,
const std::string &taskName,
const Task &task) override;
bool stop(DAGRunID runID, const std::string &taskName) override;
};
} // namespace daggy::executors::task

View File

@@ -0,0 +1,46 @@
#pragma once
#include "TaskExecutor.hpp"
namespace daggy::executors::task {
class SlurmTaskExecutor : public TaskExecutor
{
public:
using Command = std::vector<std::string>;
SlurmTaskExecutor();
~SlurmTaskExecutor() override;
// Validates the job to ensure that all required values are set and are of
// the right type,
bool validateTaskParameters(const ConfigValues &job) override;
std::vector<ConfigValues> expandTaskParameters(
const ConfigValues &job, const ConfigValues &expansionValues) override;
// Runs the task
std::future<AttemptRecord> execute(DAGRunID runID,
const std::string &taskName,
const Task &task) override;
bool stop(DAGRunID runID, const std::string &taskName) override;
private:
struct Job
{
std::promise<AttemptRecord> prom;
std::string stdoutFile;
std::string stderrFile;
DAGRunID runID;
std::string taskName;
};
std::mutex promiseGuard_;
std::unordered_map<size_t, Job> runningJobs_;
std::atomic<bool> running_;
// Monitors jobs and resolves promises
std::thread monitorWorker_;
void monitor();
};
} // namespace daggy::executors::task

View File

@@ -0,0 +1,37 @@
#pragma once
#include <chrono>
#include <daggy/Defines.hpp>
#include <future>
#include <string>
#include <thread>
#include <vector>
/*
Executors run Tasks, returning a future with the results.
If there are many retries, logs are returned for each attempt.
*/
namespace daggy::executors::task {
class TaskExecutor
{
public:
virtual ~TaskExecutor() = default;
// Validates the job to ensure that all required values are set and are of
// the right type,
virtual bool validateTaskParameters(const ConfigValues &job) = 0;
// Will use the expansion values to return the fully expanded tasks.
virtual std::vector<ConfigValues> expandTaskParameters(
const ConfigValues &job, const ConfigValues &expansionValues) = 0;
// Blocking execution of a task
virtual std::future<AttemptRecord> execute(DAGRunID runID,
const std::string &taskName,
const Task &task) = 0;
// Kill a currently executing task. This will resolve the future.
virtual bool stop(DAGRunID runID, const std::string &taskName) = 0;
};
} // namespace daggy::executors::task

View File

@@ -0,0 +1,50 @@
#pragma once
#include <string>
#include "../../Defines.hpp"
#include "Defines.hpp"
/*
DAGRunLogger represents the interface to store all the state information
for daggy to run. Abstracted in case other back-end solutions need to
be supported.
*/
namespace daggy::loggers::dag_run {
class DAGRunLogger
{
public:
virtual ~DAGRunLogger() = default;
// Insertion / Updates
virtual DAGRunID startDAGRun(const DAGSpec &dagSpec) = 0;
virtual void addTask(DAGRunID dagRunID, const std::string &taskName,
const Task &task) = 0;
virtual void updateTask(DAGRunID dagRunID, const std::string &taskName,
const Task &task) = 0;
virtual void updateDAGRunState(DAGRunID dagRunID, RunState state) = 0;
virtual void logTaskAttempt(DAGRunID dagRunID, const std::string &taskName,
const AttemptRecord &attempt) = 0;
virtual void updateTaskState(DAGRunID dagRunID, const std::string &taskName,
RunState state) = 0;
// Querying
virtual DAGSpec getDAGSpec(DAGRunID dagRunID) = 0;
virtual std::vector<DAGRunSummary> queryDAGRuns(const std::string &tag = "",
bool all = false) = 0;
virtual RunState getDAGRunState(DAGRunID dagRunID) = 0;
virtual DAGRunRecord getDAGRun(DAGRunID dagRunID) = 0;
virtual Task getTask(DAGRunID dagRunID, const std::string &taskName) = 0;
virtual RunState getTaskState(DAGRunID dagRunID,
const std::string &taskName) = 0;
};
} // namespace daggy::loggers::dag_run

View File

@@ -0,0 +1,39 @@
#pragma once
#include <cstdint>
#include <string>
#include <unordered_map>
#include <unordered_set>
#include <vector>
#include "../../Defines.hpp"
namespace daggy::loggers::dag_run {
struct StateUpdateRecord
{
TimePoint time;
RunState state;
};
// Pretty heavy weight, but
struct DAGRunRecord
{
DAGSpec dagSpec;
std::unordered_map<std::string, RunState> taskRunStates;
std::unordered_map<std::string, std::vector<AttemptRecord>> taskAttempts;
std::unordered_map<std::string, std::vector<StateUpdateRecord>>
taskStateChanges;
std::vector<StateUpdateRecord> dagStateChanges;
};
struct DAGRunSummary
{
DAGRunID runID;
std::string tag;
RunState runState;
TimePoint startTime;
TimePoint lastUpdate;
std::unordered_map<RunState, size_t> taskStateCounts;
};
} // namespace daggy::loggers::dag_run

View File

@@ -0,0 +1,60 @@
#pragma once
#include <iostream>
#include <mutex>
#include "DAGRunLogger.hpp"
#include "Defines.hpp"
namespace daggy::loggers::dag_run {
/*
* This logger should only be used for debug purposes. It doesn't actually log
* anything, just prints stuff to stdout.
*/
class OStreamLogger : public DAGRunLogger
{
public:
explicit OStreamLogger(std::ostream &os);
~OStreamLogger() override;
// Execution
DAGRunID startDAGRun(const DAGSpec &dagSpec) override;
void addTask(DAGRunID dagRunID, const std::string &taskName,
const Task &task) override;
void updateTask(DAGRunID dagRunID, const std::string &taskName,
const Task &task) override;
void updateDAGRunState(DAGRunID dagRunID, RunState state) override;
void logTaskAttempt(DAGRunID, const std::string &taskName,
const AttemptRecord &attempt) override;
void updateTaskState(DAGRunID dagRunID, const std::string &taskName,
RunState state) override;
// Querying
DAGSpec getDAGSpec(DAGRunID dagRunID) override;
std::vector<DAGRunSummary> queryDAGRuns(const std::string &tag = "",
bool all = false) override;
RunState getDAGRunState(DAGRunID dagRunID) override;
DAGRunRecord getDAGRun(DAGRunID dagRunID) override;
Task getTask(DAGRunID dagRunID, const std::string &taskName) override;
RunState getTaskState(DAGRunID dagRunID,
const std::string &taskName) override;
private:
std::mutex guard_;
std::ostream &os_;
std::vector<DAGRunRecord> dagRuns_;
void _updateTaskState(DAGRunID dagRunID, const std::string &taskName,
RunState state);
void _updateDAGRunState(DAGRunID dagRunID, RunState state);
};
} // namespace daggy::loggers::dag_run

View File

@@ -0,0 +1,129 @@
#pragma once
#include <iterator>
#ifdef DAGGY_ENABLE_REDIS
#include <hiredis.h>
#include <algorithm>
#include <cstdlib>
#include <iostream>
#include <mutex>
#include <string>
#include <unordered_map>
#include <variant>
#include <vector>
/*
Why a Redis Helper? This wraps hiredis structs in a class with a destructor
that will clean up after itself.
The query() method is a bit wonky with all the variants, but it works well
enough.
Important note: The hiredis context is not thread safe, so neither is this.
Create contexts as needed.
*/
namespace daggy::loggers::dag_run::redis {
using RedisDatum = std::variant<std::string, double, size_t>;
// Either a single Datum, or a vector of Datum
struct RedisData
{
void operator=(const RedisDatum &val)
{
data_ = val;
}
void operator=(const RedisData &other)
{
data_ = other.data_;
}
void operator=(const std::vector<RedisDatum> &other)
{
data_ = other;
}
RedisDatum asDatum()
{
return std::get<RedisDatum>(data_);
}
template <typename T>
T as()
{
return std::get<T>(std::get<RedisDatum>(data_));
}
template <typename T>
std::vector<T> asList()
{
std::vector<T> data;
const auto &inp = std::get<std::vector<RedisDatum>>(data_);
std::transform(inp.begin(), inp.end(), std::back_inserter(data),
[](const auto &i) { return std::get<T>(i); });
return data;
}
template <typename T, typename V>
std::unordered_map<T, V> asHash()
{
std::unordered_map<T, V> data;
const auto &inp = std::get<std::vector<RedisDatum>>(data_);
if (inp.size() % 2 != 0)
throw std::runtime_error("Number of items is not even");
for (size_t i = 0; i < inp.size(); i += 2) {
data.emplace(std::get<T>(inp[i]), std::get<V>(inp[i + 1]));
}
return data;
}
std::variant<RedisDatum, std::vector<RedisDatum>> data_;
};
class RedisContext
{
public:
RedisContext(const std::string &host, int port);
template <class... Args>
RedisData query(Args &&...args)
{
std::lock_guard<std::mutex> lock(contextGuard_);
redisReply *reply = static_cast<redisReply *>(
redisCommand(ctx_, std::forward<Args>(args)...));
if (!reply) {
throw std::runtime_error("Cannot query redis.");
}
if (reply->type == REDIS_REPLY_ERROR) {
if (reply->str) {
std::string error{reply->str};
throw std::runtime_error("Error querying redis: " + error);
}
else {
throw std::runtime_error("Unknown error querying redis");
}
}
auto data = parseReply_(reply);
freeReplyObject(reply);
return data;
}
~RedisContext()
{
redisFree(ctx_);
}
private:
RedisData parseReply_(const redisReply *reply);
redisContext *ctx_;
std::mutex contextGuard_;
};
} // namespace daggy::loggers::dag_run::redis
#endif

View File

@@ -0,0 +1,102 @@
#pragma once
#ifdef DAGGY_ENABLE_REDIS
#include <iostream>
#include <mutex>
#include "DAGRunLogger.hpp"
#include "Defines.hpp"
#include "RedisHelper.hpp"
namespace daggy::loggers::dag_run {
/*
RunIDS are obtained from the counter dagRunIDs;
Keys are constructed from the dagRunID.
- dagRunIDs is an INTEGER COUNTER that returns the next dagRunID
- {runid}_spec is a HASH from taskName -> taskJSON
{
"tag": tag,
"tasks": { ...tasks... },
*/
class RedisLogger : public DAGRunLogger
{
public:
explicit RedisLogger(const std::string &prefix = "daggy",
const std::string &host = "127.0.0.1",
int port = 6379);
// Execution
DAGRunID startDAGRun(const DAGSpec &dagSpec) override;
void addTask(DAGRunID dagRunID, const std::string &taskName,
const Task &task) override;
void updateTask(DAGRunID dagRunID, const std::string &taskName,
const Task &task) override;
void updateDAGRunState(DAGRunID dagRunID, RunState state) override;
void logTaskAttempt(DAGRunID, const std::string &taskName,
const AttemptRecord &attempt) override;
void updateTaskState(DAGRunID dagRunID, const std::string &taskName,
RunState state) override;
// Querying
DAGSpec getDAGSpec(DAGRunID dagRunID) override;
std::vector<DAGRunSummary> queryDAGRuns(const std::string &tag = "",
bool all = false) override;
RunState getDAGRunState(DAGRunID dagRunID) override;
DAGRunRecord getDAGRun(DAGRunID dagRunID) override;
Task getTask(DAGRunID dagRunID, const std::string &taskName) override;
RunState getTaskState(DAGRunID dagRunID,
const std::string &taskName) override;
private:
const std::string prefix_;
const std::string dagRunIDsKey_;
redis::RedisContext ctx_;
inline const std::string getDAGPrefix_(DAGRunID runID) const
{
return prefix_ + "_" + std::to_string(runID) + "_";
}
#define GET_DAG_KEY(name, extra) \
inline std::string name(DAGRunID runID) const \
{ \
return getDAGPrefix_(runID) + extra; \
}
GET_DAG_KEY(getTagKey_, "tag");
GET_DAG_KEY(getTasksKey_, "tasks");
GET_DAG_KEY(getDAGStateKey_, "state");
GET_DAG_KEY(getDAGStateUpdateKey_, "stateUpdate");
GET_DAG_KEY(getTaskStatesKey_, "taskStates");
GET_DAG_KEY(getTaskVariablesKey_, "taskVariables");
GET_DAG_KEY(getTaskDefaultsKey_, "taskDefaults");
GET_DAG_KEY(getStartTimeKey_, "startTime");
GET_DAG_KEY(getLastUpdateKey_, "lastUpdate");
#define GET_TASK_KEY(name, category) \
inline std::string name(DAGRunID runID, const std::string &taskName) const \
{ \
return getDAGPrefix_(runID) + category + "_" + taskName; \
}
GET_TASK_KEY(getTaskStateUpdateKey_, "taskUpdateState");
GET_TASK_KEY(getTaskAttemptKey_, "taskAttempt");
};
} // namespace daggy::loggers::dag_run
#endif