Large re-organization to split daggyd away from the core libdaggy.

This paves the way for implementing daggys and other utilities.

Squashed commit of the following:

commit 1f77239ab3c9e44d190eef94531a39501c8c4dfe
Author: Ian Roddis <gitlab@ie2r.com>
Date:   Mon Oct 18 16:25:02 2021 -0300

    Adding README, stdout support for daggyd logging

commit c2c237224e84a3be68aaa597ce98af1365e74a13
Author: Ian Roddis <gitlab@ie2r.com>
Date:   Mon Oct 18 16:10:29 2021 -0300

    removing old daggyd

commit cfea2baf61ca10c535801c5a391d2d525a1a2d04
Author: Ian Roddis <gitlab@ie2r.com>
Date:   Mon Oct 18 16:10:09 2021 -0300

    Moving tests into their sub-project folders

commit e41ca42069bea1db16dd76b6684a3f692fef6b15
Author: Ian Roddis <gitlab@ie2r.com>
Date:   Mon Oct 18 15:57:40 2021 -0300

    Splitting out daggyd from libdaggy

commit be97b146c1d2446f5c03cb78707e921f18c60bd8
Author: Ian Roddis <gitlab@ie2r.com>
Date:   Mon Oct 18 15:56:55 2021 -0300

    Splitting out daggyd from libdaggy

commit cb61e140e9d6d8832d61fb7037fd4c0ff6edad00
Author: Ian Roddis <gitlab@ie2r.com>
Date:   Mon Oct 18 15:49:47 2021 -0300

    moving daggy to libdaggy
This commit is contained in:
Ian Roddis
2021-10-18 16:28:40 -03:00
parent 612bc8af8a
commit 470a6f2bb7
59 changed files with 586 additions and 52 deletions

View File

@@ -0,0 +1 @@
add_subdirectory(dag_run)

View File

@@ -0,0 +1,5 @@
target_sources(${PROJECT_NAME} PRIVATE
OStreamLogger.cpp
RedisLogger.cpp
RedisHelper.cpp
)

View File

@@ -0,0 +1,180 @@
#include <enum.h>
#include <algorithm>
#include <daggy/Serialization.hpp>
#include <daggy/loggers/dag_run/OStreamLogger.hpp>
#include <iterator>
namespace daggy::loggers::dag_run {
OStreamLogger::OStreamLogger(std::ostream &os)
: os_(os)
{
}
OStreamLogger::~OStreamLogger()
{
std::lock_guard<std::mutex> lock(guard_);
dagRuns_.clear();
}
// Execution
DAGRunID OStreamLogger::startDAGRun(const DAGSpec &dagSpec)
{
std::lock_guard<std::mutex> lock(guard_);
size_t runID = dagRuns_.size();
dagRuns_.emplace_back(DAGRunRecord{.dagSpec = dagSpec});
for (const auto &[name, _] : dagSpec.tasks) {
_updateTaskState(runID, name, RunState::QUEUED);
}
_updateDAGRunState(runID, RunState::QUEUED);
os_ << "Starting new DAGRun tagged " << dagSpec.tag << " with ID " << runID
<< " and " << dagSpec.tasks.size() << " tasks" << std::endl;
for (const auto &[name, task] : dagSpec.tasks) {
os_ << "TASK (" << name << "): " << configToJSON(task.job);
os_ << std::endl;
}
return runID;
}
void OStreamLogger::addTask(DAGRunID dagRunID, const std::string &taskName,
const Task &task)
{
std::lock_guard<std::mutex> lock(guard_);
auto &dagRun = dagRuns_[dagRunID];
dagRun.dagSpec.tasks[taskName] = task;
_updateTaskState(dagRunID, taskName, RunState::QUEUED);
}
void OStreamLogger::updateTask(DAGRunID dagRunID, const std::string &taskName,
const Task &task)
{
std::lock_guard<std::mutex> lock(guard_);
auto &dagRun = dagRuns_[dagRunID];
dagRun.dagSpec.tasks[taskName] = task;
}
void OStreamLogger::updateDAGRunState(DAGRunID dagRunID, RunState state)
{
std::lock_guard<std::mutex> lock(guard_);
_updateDAGRunState(dagRunID, state);
}
void OStreamLogger::_updateDAGRunState(DAGRunID dagRunID, RunState state)
{
os_ << "DAG State Change(" << dagRunID << "): " << state._to_string()
<< std::endl;
dagRuns_[dagRunID].dagStateChanges.push_back({Clock::now(), state});
}
void OStreamLogger::logTaskAttempt(DAGRunID dagRunID,
const std::string &taskName,
const AttemptRecord &attempt)
{
std::lock_guard<std::mutex> lock(guard_);
const std::string &msg =
attempt.rc == 0 ? attempt.outputLog : attempt.errorLog;
os_ << "Task Attempt (" << dagRunID << '/' << taskName << "): Ran with RC "
<< attempt.rc << ": " << msg << std::endl;
dagRuns_[dagRunID].taskAttempts[taskName].push_back(attempt);
}
void OStreamLogger::updateTaskState(DAGRunID dagRunID,
const std::string &taskName,
RunState state)
{
std::lock_guard<std::mutex> lock(guard_);
_updateTaskState(dagRunID, taskName, state);
}
void OStreamLogger::_updateTaskState(DAGRunID dagRunID,
const std::string &taskName,
RunState state)
{
auto &dagRun = dagRuns_.at(dagRunID);
dagRun.taskStateChanges[taskName].push_back({Clock::now(), state});
auto it = dagRun.taskRunStates.find(taskName);
if (it == dagRun.taskRunStates.end()) {
dagRun.taskRunStates.emplace(taskName, state);
}
else {
it->second = state;
}
os_ << "Task State Change (" << dagRunID << '/' << taskName
<< "): " << state._to_string() << std::endl;
}
// Querying
DAGSpec OStreamLogger::getDAGSpec(DAGRunID dagRunID)
{
std::lock_guard<std::mutex> lock(guard_);
return dagRuns_.at(dagRunID).dagSpec;
};
std::vector<DAGRunSummary> OStreamLogger::queryDAGRuns(const std::string &tag,
bool all)
{
std::vector<DAGRunSummary> summaries;
std::lock_guard<std::mutex> lock(guard_);
size_t i = 0;
for (const auto &run : dagRuns_) {
if ((!all) &&
(run.dagStateChanges.back().state == +RunState::COMPLETED)) {
continue;
}
if (!tag.empty() and tag != run.dagSpec.tag)
continue;
TimePoint lastTaskUpdate;
for (const auto &[_, updates] : run.taskStateChanges) {
for (const auto &update : updates) {
if (update.time > lastTaskUpdate)
lastTaskUpdate = update.time;
}
}
DAGRunSummary summary{
.runID = i,
.tag = run.dagSpec.tag,
.runState = run.dagStateChanges.back().state,
.startTime = run.dagStateChanges.front().time,
.lastUpdate = std::max<TimePoint>(lastTaskUpdate,
run.dagStateChanges.back().time)};
for (const auto &[_, taskState] : run.taskRunStates) {
summary.taskStateCounts[taskState]++;
}
summaries.emplace_back(summary);
}
return summaries;
}
DAGRunRecord OStreamLogger::getDAGRun(DAGRunID dagRunID)
{
std::lock_guard<std::mutex> lock(guard_);
return dagRuns_.at(dagRunID);
}
RunState OStreamLogger::getDAGRunState(DAGRunID dagRunID)
{
std::lock_guard<std::mutex> lock(guard_);
return dagRuns_.at(dagRunID).dagStateChanges.back().state;
}
Task OStreamLogger::getTask(DAGRunID dagRunID, const std::string &taskName)
{
std::lock_guard<std::mutex> lock(guard_);
return dagRuns_.at(dagRunID).dagSpec.tasks.at(taskName);
}
RunState OStreamLogger::getTaskState(DAGRunID dagRunID,
const std::string &taskName)
{
std::lock_guard<std::mutex> lock(guard_);
return dagRuns_.at(dagRunID).taskRunStates.at(taskName);
}
} // namespace daggy::loggers::dag_run

View File

@@ -0,0 +1,90 @@
#include <stdexcept>
#ifdef DAGGY_ENABLE_REDIS
#include <daggy/loggers/dag_run/RedisHelper.hpp>
namespace daggy::loggers::dag_run::redis {
RedisContext::RedisContext(const std::string &host, int port)
{
const struct timeval timeout = {0, 250000}; // .250 seconds
ctx_ = redisConnectWithTimeout(host.c_str(), port, timeout);
if (ctx_ == nullptr) {
throw std::runtime_error("Unable to ping redis server at " + host + ":" +
std::to_string(port));
}
}
RedisData RedisContext::parseReply_(const redisReply *reply)
{
RedisData data;
/*
switch (reply->type) {
case REDIS_REPLY_ERROR: {
std::cout << "\tERROR " << reply->str << std::endl;
break;
}
case REDIS_REPLY_STRING: {
std::cout << "\tSTRING" << std::endl;
break;
}
case REDIS_REPLY_VERB: {
std::cout << "\tVERB" << std::endl;
break;
}
case REDIS_REPLY_DOUBLE: {
std::cout << "\tDOUBLE" << std::endl;
break;
}
case REDIS_REPLY_INTEGER: {
std::cout << "\tINTEGER" << std::endl;
break;
}
case REDIS_REPLY_ARRAY: {
std::cout << "\tARRAY" << std::endl;
break;
}
case REDIS_REPLY_NIL: {
std::cout << "\tNIL" << std::endl;
break;
}
}
*/
switch (reply->type) {
case REDIS_REPLY_ERROR:
case REDIS_REPLY_STRING:
case REDIS_REPLY_VERB: {
std::string raw(reply->str);
if (raw[0] == '"' and raw[raw.size() - 1] == '"') {
data = raw.substr(1, raw.size() - 2);
}
else {
data = RedisDatum{raw};
}
break;
}
case REDIS_REPLY_DOUBLE: {
data = RedisDatum{reply->dval};
break;
}
case REDIS_REPLY_INTEGER: {
data = RedisDatum{(size_t)reply->integer};
break;
}
case REDIS_REPLY_ARRAY: {
std::vector<RedisDatum> parts;
for (size_t i = 0UL; i < reply->elements; ++i) {
parts.push_back(parseReply_(reply->element[i]).asDatum());
}
data = parts;
break;
}
}
return data;
}
} // namespace daggy::loggers::dag_run::redis
#endif

View File

@@ -0,0 +1,265 @@
#include <stdexcept>
#ifdef DAGGY_ENABLE_REDIS
#include <enum.h>
#include <algorithm>
#include <daggy/Serialization.hpp>
#include <daggy/loggers/dag_run/RedisLogger.hpp>
#include <iomanip>
#include <iterator>
namespace daggy::loggers::dag_run {
RedisLogger::RedisLogger(const std::string &prefix, const std::string &host,
int port)
: prefix_(prefix)
, dagRunIDsKey_(prefix_ + "_dagRunIDs")
, ctx_(host, port)
{
auto resp = ctx_.query("exists %s", dagRunIDsKey_.c_str());
if (resp.as<size_t>() == 0) {
ctx_.query("set %s %s", dagRunIDsKey_.c_str(), "0");
}
}
// Execution
DAGRunID RedisLogger::startDAGRun(const DAGSpec &dagSpec)
{
auto resp = ctx_.query("incr %s", dagRunIDsKey_.c_str());
DAGRunID runID = resp.as<size_t>();
ctx_.query("SET %s %s", getTagKey_(runID).c_str(), dagSpec.tag.c_str());
ctx_.query("SET %s %s", getStartTimeKey_(runID).c_str(),
timePointToString(Clock::now()).c_str());
ctx_.query("SET %s %s", getTaskVariablesKey_(runID).c_str(),
configToJSON(dagSpec.taskConfig.variables).c_str());
ctx_.query("SET %s %s", getTaskDefaultsKey_(runID).c_str(),
configToJSON(dagSpec.taskConfig.jobDefaults).c_str());
for (const auto &[taskName, task] : dagSpec.tasks) {
ctx_.query("HSET %s %s %s", getTasksKey_(runID).c_str(), taskName.c_str(),
taskToJSON(task).c_str());
updateTaskState(runID, taskName, RunState::QUEUED);
}
// Store tasks, initial states
for (const auto &[taskName, task] : dagSpec.tasks) {
updateTaskState(runID, taskName, RunState::QUEUED);
}
// Update the dag run state
updateDAGRunState(runID, RunState::QUEUED);
return runID;
}
void RedisLogger::addTask(DAGRunID dagRunID, const std::string &taskName,
const Task &task)
{
updateTask(dagRunID, taskName, task);
updateTaskState(dagRunID, taskName, RunState::QUEUED);
}
void RedisLogger::updateTask(DAGRunID dagRunID, const std::string &taskName,
const Task &task)
{
ctx_.query("HSET %s %s %s", getTasksKey_(dagRunID).c_str(),
taskName.c_str(), taskToJSON(task).c_str());
}
void RedisLogger::updateDAGRunState(DAGRunID dagRunID, RunState state)
{
// Set the state
ctx_.query("SET %s %s", getDAGStateKey_(dagRunID).c_str(),
state._to_string());
ctx_.query("SET %s %s", getLastUpdateKey_(dagRunID).c_str(),
timePointToString(Clock::now()).c_str());
// Add the update record
StateUpdateRecord rec{.time = Clock::now(), .state = state};
ctx_.query("RPUSH %s %s", getDAGStateUpdateKey_(dagRunID).c_str(),
stateUpdateRecordToJSON(rec).c_str());
}
void RedisLogger::logTaskAttempt(DAGRunID dagRunID,
const std::string &taskName,
const AttemptRecord &attempt)
{
std::string attemptJSON = attemptRecordToJSON(attempt);
ctx_.query("RPUSH %s %s", getTaskAttemptKey_(dagRunID, taskName).c_str(),
attemptJSON.c_str());
}
void RedisLogger::updateTaskState(DAGRunID dagRunID,
const std::string &taskName, RunState state)
{
// Set the state
ctx_.query(R"(HSET %s %s %s)", getTaskStatesKey_(dagRunID).c_str(),
taskName.c_str(), state._to_string());
ctx_.query(R"(SET %s %s)", getLastUpdateKey_(dagRunID),
timePointToString(Clock::now()).c_str());
// Add the update record
StateUpdateRecord rec{.time = Clock::now(), .state = state};
ctx_.query("RPUSH %s %s",
getTaskStateUpdateKey_(dagRunID, taskName).c_str(),
stateUpdateRecordToJSON(rec).c_str());
}
// Querying
DAGSpec RedisLogger::getDAGSpec(DAGRunID dagRunID)
{
DAGSpec spec;
spec.tag =
ctx_.query("GET %s", getTagKey_(dagRunID).c_str()).as<std::string>();
auto tasks = ctx_.query("HGETALL %s", getTasksKey_(dagRunID).c_str())
.asHash<std::string, std::string>();
for (const auto &[taskName, taskJSON] : tasks) {
spec.tasks.emplace(taskName, taskFromJSON(taskName, taskJSON));
}
auto taskVars = ctx_.query("GET %s", getTaskVariablesKey_(dagRunID).c_str())
.as<std::string>();
spec.taskConfig.variables = configFromJSON(taskVars);
auto jobDefaults =
ctx_.query("GET %s", getTaskDefaultsKey_(dagRunID).c_str())
.as<std::string>();
spec.taskConfig.jobDefaults = configFromJSON(jobDefaults);
return spec;
};
std::vector<DAGRunSummary> RedisLogger::queryDAGRuns(const std::string &tag,
bool all)
{
std::vector<DAGRunSummary> summaries;
auto reply = ctx_.query("GET %s", dagRunIDsKey_.c_str());
size_t maxRuns = std::stoull(reply.as<std::string>());
RunState state = RunState::QUEUED;
for (size_t runID = 1; runID <= maxRuns; ++runID) {
try {
state = getDAGRunState(runID);
}
catch (std::runtime_error &e) {
continue;
}
if (!all and state == +RunState::COMPLETED)
continue;
const auto dagTag =
ctx_.query("GET %s", getTagKey_(runID).c_str()).as<std::string>();
if (!tag.empty() and dagTag != tag)
continue;
const auto startTime =
ctx_.query("GET %s", getStartTimeKey_(runID).c_str())
.as<std::string>();
const auto lastTime =
ctx_.query("GET %s", getLastUpdateKey_(runID).c_str())
.as<std::string>();
DAGRunSummary summary{
.runID = runID,
.tag = dagTag,
.runState = state,
.startTime = stringToTimePoint(startTime),
.lastUpdate = stringToTimePoint(lastTime),
};
auto taskStates =
ctx_.query("HGETALL %s", getTaskStatesKey_(runID).c_str())
.asHash<std::string, std::string>();
for (const auto &[taskName, state] : taskStates) {
auto taskState = RunState::_from_string(state.c_str());
summary.taskStateCounts[taskState]++;
}
summaries.emplace_back(summary);
}
return summaries;
}
DAGRunRecord RedisLogger::getDAGRun(DAGRunID dagRunID)
{
DAGRunRecord rec;
rec.dagSpec = getDAGSpec(dagRunID);
// Populate DAG Updates
auto dagStateUpdates =
ctx_.query("LRANGE %s 0 -1", getDAGStateUpdateKey_(dagRunID).c_str())
.asList<std::string>();
std::transform(dagStateUpdates.begin(), dagStateUpdates.end(),
std::back_inserter(rec.dagStateChanges),
[](const auto &s) { return stateUpdateRecordFromJSON(s); });
// Populate taskRunStates
auto taskStates =
ctx_.query("HGETALL %s", getTaskStatesKey_(dagRunID).c_str())
.asHash<std::string, std::string>();
for (const auto &[taskName, state] : taskStates) {
rec.taskRunStates.emplace(taskName,
RunState::_from_string(state.c_str()));
}
for (const auto &[taskName, _] : rec.dagSpec.tasks) {
// Populate taskAttempts
auto taskAttempts =
ctx_.query("LRANGE %s 0 -1",
getTaskAttemptKey_(dagRunID, taskName).c_str())
.asList<std::string>();
std::transform(taskAttempts.begin(), taskAttempts.end(),
std::back_inserter(rec.taskAttempts[taskName]),
[](const auto &s) { return attemptRecordFromJSON(s); });
// Populate stateUpdates
auto taskStateUpdates =
ctx_.query("LRANGE %s 0 -1",
getTaskStateUpdateKey_(dagRunID, taskName).c_str())
.asList<std::string>();
auto &stateUpdates = rec.taskStateChanges[taskName];
std::transform(taskStateUpdates.begin(), taskStateUpdates.end(),
std::back_inserter(stateUpdates), [](const auto &s) {
return stateUpdateRecordFromJSON(s);
});
}
return rec;
}
RunState RedisLogger::getDAGRunState(DAGRunID dagRunID)
{
auto resp = ctx_.query("GET %s", getDAGStateKey_(dagRunID).c_str());
std::string stateStr = resp.as<std::string>();
if (stateStr.empty())
throw std::runtime_error("No such dagrun");
return RunState::_from_string(stateStr.c_str());
}
Task RedisLogger::getTask(DAGRunID dagRunID, const std::string &taskName)
{
auto resp = ctx_.query("HGET %s %s", getTasksKey_(dagRunID).c_str(),
taskName.c_str());
return taskFromJSON(taskName, resp.as<std::string>());
}
RunState RedisLogger::getTaskState(DAGRunID dagRunID,
const std::string &taskName)
{
auto resp = ctx_.query("HGET %s %s", getTaskStatesKey_(dagRunID).c_str(),
taskName.c_str());
return RunState::_from_string(resp.as<std::string>().c_str());
}
} // namespace daggy::loggers::dag_run
#endif