Large re-organization to split daggyd away from the core libdaggy.
This paves the way for implementing daggys and other utilities. Squashed commit of the following: commit 1f77239ab3c9e44d190eef94531a39501c8c4dfe Author: Ian Roddis <gitlab@ie2r.com> Date: Mon Oct 18 16:25:02 2021 -0300 Adding README, stdout support for daggyd logging commit c2c237224e84a3be68aaa597ce98af1365e74a13 Author: Ian Roddis <gitlab@ie2r.com> Date: Mon Oct 18 16:10:29 2021 -0300 removing old daggyd commit cfea2baf61ca10c535801c5a391d2d525a1a2d04 Author: Ian Roddis <gitlab@ie2r.com> Date: Mon Oct 18 16:10:09 2021 -0300 Moving tests into their sub-project folders commit e41ca42069bea1db16dd76b6684a3f692fef6b15 Author: Ian Roddis <gitlab@ie2r.com> Date: Mon Oct 18 15:57:40 2021 -0300 Splitting out daggyd from libdaggy commit be97b146c1d2446f5c03cb78707e921f18c60bd8 Author: Ian Roddis <gitlab@ie2r.com> Date: Mon Oct 18 15:56:55 2021 -0300 Splitting out daggyd from libdaggy commit cb61e140e9d6d8832d61fb7037fd4c0ff6edad00 Author: Ian Roddis <gitlab@ie2r.com> Date: Mon Oct 18 15:49:47 2021 -0300 moving daggy to libdaggy
This commit is contained in:
17
libdaggy/CMakeLists.txt
Normal file
17
libdaggy/CMakeLists.txt
Normal file
@@ -0,0 +1,17 @@
|
||||
project(libdaggy)
|
||||
|
||||
add_library(${PROJECT_NAME} STATIC)
|
||||
|
||||
IF (DAGGY_ENABLE_SLURM)
|
||||
target_link_libraries(${PROJECT_NAME} slurm)
|
||||
endif ()
|
||||
|
||||
IF (DAGGY_ENABLE_REDIS)
|
||||
target_link_libraries(${PROJECT_NAME} hiredis)
|
||||
endif ()
|
||||
|
||||
target_include_directories(${PROJECT_NAME} PUBLIC include)
|
||||
target_link_libraries(${PROJECT_NAME} pistache pthread rapidjson better-enums)
|
||||
|
||||
add_subdirectory(src)
|
||||
add_subdirectory(tests)
|
||||
82
libdaggy/include/daggy/DAG.hpp
Normal file
82
libdaggy/include/daggy/DAG.hpp
Normal file
@@ -0,0 +1,82 @@
|
||||
#pragma once
|
||||
|
||||
#include <deque>
|
||||
#include <functional>
|
||||
#include <iostream>
|
||||
#include <iterator>
|
||||
#include <optional>
|
||||
#include <queue>
|
||||
#include <sstream>
|
||||
#include <stdexcept>
|
||||
#include <unordered_map>
|
||||
#include <unordered_set>
|
||||
|
||||
#include "Defines.hpp"
|
||||
|
||||
/*
|
||||
The DAG structure in daggy is just to ensure that tasks are run
|
||||
in the correct dependent order.
|
||||
*/
|
||||
|
||||
namespace daggy {
|
||||
|
||||
template <typename T>
|
||||
struct Vertex
|
||||
{
|
||||
RunState state = RunState::QUEUED;
|
||||
uint32_t depCount = 0;
|
||||
T data;
|
||||
std::unordered_set<size_t> children;
|
||||
};
|
||||
|
||||
template <typename K, typename V>
|
||||
class DAG
|
||||
{
|
||||
public:
|
||||
// Vertices
|
||||
void addVertex(K id, V data);
|
||||
|
||||
std::unordered_set<K> getVertices() const;
|
||||
|
||||
// Edges
|
||||
void addEdge(const K &from, const K &to);
|
||||
|
||||
void addEdgeIf(const K &src,
|
||||
std::function<bool(const Vertex<V> &v)> predicate);
|
||||
|
||||
[[nodiscard]] bool isValid() const;
|
||||
|
||||
bool hasVertex(const K &id);
|
||||
|
||||
// Attributes
|
||||
[[nodiscard]] size_t size() const;
|
||||
|
||||
[[nodiscard]] bool empty() const;
|
||||
|
||||
// Reset the DAG to completely unvisited
|
||||
void reset();
|
||||
|
||||
// Reset any vertex with RUNNING state to QUEUED
|
||||
void resetRunning();
|
||||
|
||||
void setVertexState(const K &id, RunState state);
|
||||
|
||||
void forEach(std::function<void(const Vertex<V> &)> fun) const;
|
||||
|
||||
[[nodiscard]] bool allVisited() const;
|
||||
|
||||
std::optional<std::pair<K, V>> visitNext();
|
||||
|
||||
// WARNING: reference potentially invalidated on insertions.
|
||||
Vertex<V> &getVertex(const K &id);
|
||||
|
||||
void completeVisit(const K &id);
|
||||
|
||||
private:
|
||||
std::unordered_map<K, size_t> keyMap_;
|
||||
std::vector<K> vertexName_;
|
||||
std::vector<Vertex<V>> vertices_;
|
||||
};
|
||||
} // namespace daggy
|
||||
|
||||
#include "DAG.impl.hxx"
|
||||
176
libdaggy/include/daggy/DAG.impl.hxx
Normal file
176
libdaggy/include/daggy/DAG.impl.hxx
Normal file
@@ -0,0 +1,176 @@
|
||||
namespace daggy {
|
||||
template <typename K, typename V>
|
||||
size_t DAG<K, V>::size() const
|
||||
{
|
||||
return vertices_.size();
|
||||
}
|
||||
|
||||
template <typename K, typename V>
|
||||
bool DAG<K, V>::empty() const
|
||||
{
|
||||
return vertices_.empty();
|
||||
}
|
||||
|
||||
template <typename K, typename V>
|
||||
bool DAG<K, V>::hasVertex(const K &id)
|
||||
{
|
||||
return keyMap_.count(id) != 0;
|
||||
}
|
||||
|
||||
template <typename K, typename V>
|
||||
Vertex<V> &DAG<K, V>::getVertex(const K &id)
|
||||
{
|
||||
return vertices_[keyMap_.at(id)];
|
||||
}
|
||||
|
||||
template <typename K, typename V>
|
||||
std::unordered_set<K> DAG<K, V>::getVertices() const
|
||||
{
|
||||
std::unordered_set<K> keys;
|
||||
for (const auto it : keyMap_) {
|
||||
keys.insert(it.first);
|
||||
}
|
||||
return keys;
|
||||
}
|
||||
|
||||
template <typename K, typename V>
|
||||
void DAG<K, V>::addVertex(K id, V data)
|
||||
{
|
||||
if (keyMap_.count(id) != 0) {
|
||||
std::stringstream ss;
|
||||
ss << "A vertex with ID " << id << " already exists in the DAG";
|
||||
throw std::runtime_error(ss.str());
|
||||
}
|
||||
size_t idx = vertices_.size();
|
||||
vertexName_.emplace_back(id);
|
||||
vertices_.emplace_back(
|
||||
Vertex<V>{.state = RunState::QUEUED, .depCount = 0, .data = data});
|
||||
keyMap_.emplace(id, idx);
|
||||
}
|
||||
|
||||
template <typename K, typename V>
|
||||
void DAG<K, V>::addEdge(const K &from, const K &to)
|
||||
{
|
||||
size_t src = keyMap_.at(from);
|
||||
size_t dst = keyMap_.at(to);
|
||||
vertices_[src].children.insert(dst);
|
||||
vertices_[dst].depCount++;
|
||||
}
|
||||
|
||||
template <typename K, typename V>
|
||||
void DAG<K, V>::addEdgeIf(const K &src,
|
||||
std::function<bool(const Vertex<V> &v)> predicate)
|
||||
{
|
||||
size_t parentIdx = keyMap_.at(src);
|
||||
auto &parent = vertices_[parentIdx];
|
||||
for (size_t i = 0; i < vertices_.size(); ++i) {
|
||||
if (!predicate(vertices_[i]))
|
||||
continue;
|
||||
if (i == parentIdx)
|
||||
continue;
|
||||
parent.children.insert(i);
|
||||
vertices_[i].depCount++;
|
||||
}
|
||||
}
|
||||
|
||||
template <typename K, typename V>
|
||||
bool DAG<K, V>::isValid() const
|
||||
{
|
||||
std::vector<size_t> depCounts(vertices_.size(), 0);
|
||||
std::queue<size_t> ready;
|
||||
size_t processed = 0;
|
||||
|
||||
for (size_t i = 0; i < vertices_.size(); ++i) {
|
||||
depCounts[i] = vertices_[i].depCount;
|
||||
if (depCounts[i] == 0)
|
||||
ready.push(i);
|
||||
}
|
||||
|
||||
while (!ready.empty()) {
|
||||
const auto &k = ready.front();
|
||||
for (const auto &child : vertices_[k].children) {
|
||||
auto dc = --depCounts[child];
|
||||
if (dc == 0)
|
||||
ready.push(child);
|
||||
}
|
||||
processed++;
|
||||
ready.pop();
|
||||
}
|
||||
|
||||
return processed == vertices_.size();
|
||||
}
|
||||
|
||||
template <typename K, typename V>
|
||||
void DAG<K, V>::reset()
|
||||
{
|
||||
// Reset the state of all vertices
|
||||
for (auto &v : vertices_) {
|
||||
v.state = RunState::QUEUED;
|
||||
v.depCount = 0;
|
||||
}
|
||||
|
||||
// Calculate the upstream count
|
||||
for (auto &v : vertices_) {
|
||||
for (auto c : v.children) {
|
||||
vertices_[c].depCount++;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
template <typename K, typename V>
|
||||
void DAG<K, V>::resetRunning()
|
||||
{
|
||||
for (auto &v : vertices_) {
|
||||
if (v.state != +RunState::RUNNING)
|
||||
continue;
|
||||
v.state = RunState::QUEUED;
|
||||
}
|
||||
}
|
||||
|
||||
template <typename K, typename V>
|
||||
void DAG<K, V>::setVertexState(const K &id, RunState state)
|
||||
{
|
||||
vertices_[keyMap_.at(id)].state = state;
|
||||
}
|
||||
|
||||
template <typename K, typename V>
|
||||
bool DAG<K, V>::allVisited() const
|
||||
{
|
||||
return not std::any_of(
|
||||
vertices_.begin(), vertices_.end(),
|
||||
[](const auto &v) { return v.state != +RunState::COMPLETED; });
|
||||
}
|
||||
|
||||
template <typename K, typename V>
|
||||
std::optional<std::pair<K, V>> DAG<K, V>::visitNext()
|
||||
{
|
||||
for (size_t i = 0; i < vertices_.size(); ++i) {
|
||||
auto &v = vertices_[i];
|
||||
if (v.state != +RunState::QUEUED)
|
||||
continue;
|
||||
if (v.depCount != 0)
|
||||
continue;
|
||||
v.state = RunState::RUNNING;
|
||||
return std::make_pair(vertexName_[i], v.data);
|
||||
}
|
||||
return {};
|
||||
}
|
||||
|
||||
template <typename K, typename V>
|
||||
void DAG<K, V>::completeVisit(const K &id)
|
||||
{
|
||||
auto &v = vertices_[keyMap_.at(id)];
|
||||
v.state = RunState::COMPLETED;
|
||||
for (auto c : v.children) {
|
||||
--vertices_[c].depCount;
|
||||
}
|
||||
}
|
||||
|
||||
template <typename K, typename V>
|
||||
void DAG<K, V>::forEach(std::function<void(const Vertex<V> &)> fun) const
|
||||
{
|
||||
for (auto it = vertices_.begin(); it != vertices_.end(); ++it) {
|
||||
fun(*it);
|
||||
}
|
||||
}
|
||||
} // namespace daggy
|
||||
55
libdaggy/include/daggy/DAGRunner.hpp
Normal file
55
libdaggy/include/daggy/DAGRunner.hpp
Normal file
@@ -0,0 +1,55 @@
|
||||
#pragma once
|
||||
|
||||
#include <rapidjson/document.h>
|
||||
|
||||
#include <future>
|
||||
#include <iomanip>
|
||||
#include <string>
|
||||
#include <unordered_map>
|
||||
#include <variant>
|
||||
#include <vector>
|
||||
|
||||
#include "DAG.hpp"
|
||||
#include "Defines.hpp"
|
||||
#include "Serialization.hpp"
|
||||
#include "Utilities.hpp"
|
||||
#include "daggy/executors/task/TaskExecutor.hpp"
|
||||
#include "daggy/loggers/dag_run/DAGRunLogger.hpp"
|
||||
|
||||
using namespace std::chrono_literals;
|
||||
|
||||
namespace daggy {
|
||||
class DAGRunner
|
||||
{
|
||||
public:
|
||||
DAGRunner(DAGRunID runID, executors::task::TaskExecutor &executor,
|
||||
loggers::dag_run::DAGRunLogger &logger, TaskDAG dag,
|
||||
const TaskParameters &taskParams);
|
||||
|
||||
~DAGRunner();
|
||||
|
||||
TaskDAG run();
|
||||
void resetRunning();
|
||||
void stop(bool kill = false, bool blocking = false);
|
||||
|
||||
private:
|
||||
void collectFinished();
|
||||
void queuePending();
|
||||
void killRunning();
|
||||
|
||||
DAGRunID runID_;
|
||||
executors::task::TaskExecutor &executor_;
|
||||
loggers::dag_run::DAGRunLogger &logger_;
|
||||
TaskDAG dag_;
|
||||
const TaskParameters &taskParams_;
|
||||
std::atomic<bool> running_;
|
||||
std::atomic<bool> kill_;
|
||||
|
||||
ssize_t nRunningTasks_;
|
||||
ssize_t nErroredTasks_;
|
||||
std::unordered_map<std::string, std::future<AttemptRecord>> runningTasks_;
|
||||
std::unordered_map<std::string, size_t> taskAttemptCounts_;
|
||||
|
||||
std::mutex runGuard_;
|
||||
};
|
||||
} // namespace daggy
|
||||
77
libdaggy/include/daggy/Defines.hpp
Normal file
77
libdaggy/include/daggy/Defines.hpp
Normal file
@@ -0,0 +1,77 @@
|
||||
#pragma once
|
||||
|
||||
#include <enum.h>
|
||||
|
||||
#include <chrono>
|
||||
#include <string>
|
||||
#include <unordered_map>
|
||||
#include <unordered_set>
|
||||
#include <variant>
|
||||
#include <vector>
|
||||
|
||||
namespace daggy {
|
||||
// Commands and parameters
|
||||
using ConfigValue = std::variant<std::string, std::vector<std::string>>;
|
||||
using ConfigValues = std::unordered_map<std::string, ConfigValue>;
|
||||
using Command = std::vector<std::string>;
|
||||
|
||||
// Time
|
||||
using Clock = std::chrono::high_resolution_clock;
|
||||
using TimePoint = std::chrono::time_point<Clock>;
|
||||
|
||||
// DAG Runs
|
||||
using DAGRunID = size_t;
|
||||
|
||||
BETTER_ENUM(RunState, uint32_t, QUEUED = 1, RUNNING, RETRY, ERRORED, KILLED,
|
||||
PAUSED, COMPLETED);
|
||||
|
||||
struct Task
|
||||
{
|
||||
std::string definedName;
|
||||
bool isGenerator; // True if the output of this task is a JSON set of tasks
|
||||
// to complete
|
||||
uint32_t maxRetries;
|
||||
uint32_t retryIntervalSeconds; // Time to wait between retries
|
||||
ConfigValues job; // It's up to the individual inspectors to convert values
|
||||
// from strings // array of strings
|
||||
std::unordered_set<std::string> children;
|
||||
std::unordered_set<std::string> parents;
|
||||
|
||||
bool operator==(const Task &other) const
|
||||
{
|
||||
return (definedName == other.definedName) and
|
||||
(maxRetries == other.maxRetries) and
|
||||
(retryIntervalSeconds == other.retryIntervalSeconds) and
|
||||
(job == other.job) and (children == other.children) and
|
||||
(parents == other.parents) and (isGenerator == other.isGenerator);
|
||||
}
|
||||
};
|
||||
|
||||
using TaskSet = std::unordered_map<std::string, Task>;
|
||||
|
||||
// All the components required to define and run a DAG
|
||||
struct TaskParameters
|
||||
{
|
||||
ConfigValues variables;
|
||||
ConfigValues jobDefaults;
|
||||
};
|
||||
|
||||
struct DAGSpec
|
||||
{
|
||||
std::string tag;
|
||||
TaskSet tasks;
|
||||
TaskParameters taskConfig;
|
||||
};
|
||||
|
||||
struct AttemptRecord
|
||||
{
|
||||
TimePoint startTime;
|
||||
TimePoint stopTime;
|
||||
int rc; // RC from the task
|
||||
std::string executorLog; // Logs from the dag_executor
|
||||
std::string outputLog; // stdout from command
|
||||
std::string errorLog; // stderr from command
|
||||
};
|
||||
} // namespace daggy
|
||||
|
||||
BETTER_ENUMS_DECLARE_STD_HASH(daggy::RunState)
|
||||
68
libdaggy/include/daggy/Serialization.hpp
Normal file
68
libdaggy/include/daggy/Serialization.hpp
Normal file
@@ -0,0 +1,68 @@
|
||||
#pragma once
|
||||
|
||||
#include <rapidjson/document.h>
|
||||
|
||||
#include <string>
|
||||
#include <unordered_map>
|
||||
#include <variant>
|
||||
#include <vector>
|
||||
|
||||
#include "Defines.hpp"
|
||||
#include "Utilities.hpp"
|
||||
#include "loggers/dag_run/Defines.hpp"
|
||||
|
||||
namespace rj = rapidjson;
|
||||
|
||||
namespace daggy {
|
||||
void checkRJParse(const rj::ParseResult &result,
|
||||
const std::string &prefix = "");
|
||||
std::string dumpJSON(const rj::Value &doc);
|
||||
|
||||
// Parameters
|
||||
ConfigValues configFromJSON(const std::string &jsonSpec);
|
||||
|
||||
ConfigValues configFromJSON(const rj::Value &spec);
|
||||
|
||||
std::string configToJSON(const ConfigValues &config);
|
||||
|
||||
// Tasks
|
||||
Task taskFromJSON(const std::string &name, const rj::Value &spec,
|
||||
const ConfigValues &jobDefaults = {});
|
||||
Task taskFromJSON(const std::string &name, const std::string &spec,
|
||||
const ConfigValues &jobDefaults = {});
|
||||
|
||||
TaskSet tasksFromJSON(const std::string &jsonSpec,
|
||||
const ConfigValues &jobDefaults = {});
|
||||
|
||||
TaskSet tasksFromJSON(const rj::Value &spec,
|
||||
const ConfigValues &jobDefaults = {});
|
||||
|
||||
std::string taskToJSON(const Task &task);
|
||||
|
||||
std::string tasksToJSON(const TaskSet &tasks);
|
||||
|
||||
// Full specs
|
||||
DAGSpec dagFromJSON(const rj::Value &spec);
|
||||
DAGSpec dagFromJSON(const std::string &jsonSpec);
|
||||
|
||||
// Attempt Records
|
||||
std::string attemptRecordToJSON(const AttemptRecord &attemptRecord);
|
||||
AttemptRecord attemptRecordFromJSON(const std::string &json);
|
||||
AttemptRecord attemptRecordFromJSON(const rj::Value &spec);
|
||||
|
||||
// default serialization
|
||||
std::ostream &operator<<(std::ostream &os, const Task &task);
|
||||
|
||||
std::string timePointToString(const TimePoint &tp);
|
||||
|
||||
TimePoint stringToTimePoint(const std::string &timeStr);
|
||||
|
||||
/*
|
||||
DAGRun Loggers
|
||||
*/
|
||||
namespace logger = loggers::dag_run;
|
||||
|
||||
std::string stateUpdateRecordToJSON(const logger::StateUpdateRecord &rec);
|
||||
logger::StateUpdateRecord stateUpdateRecordFromJSON(const rj::Value &json);
|
||||
logger::StateUpdateRecord stateUpdateRecordFromJSON(const std::string &json);
|
||||
} // namespace daggy
|
||||
186
libdaggy/include/daggy/ThreadPool.hpp
Normal file
186
libdaggy/include/daggy/ThreadPool.hpp
Normal file
@@ -0,0 +1,186 @@
|
||||
#pragma once
|
||||
|
||||
#include <atomic>
|
||||
#include <condition_variable>
|
||||
#include <functional>
|
||||
#include <future>
|
||||
#include <list>
|
||||
#include <memory>
|
||||
#include <queue>
|
||||
#include <thread>
|
||||
#include <vector>
|
||||
|
||||
using namespace std::chrono_literals;
|
||||
|
||||
namespace daggy {
|
||||
|
||||
/*
|
||||
A Task Queue is a collection of async tasks to be executed by the
|
||||
thread pool. Using individual task queues allows for a rough QoS
|
||||
when a single thread may be submitting batches of requests --
|
||||
one producer won't starve out another, but all tasks will be run
|
||||
as quickly as possible.
|
||||
*/
|
||||
class TaskQueue
|
||||
{
|
||||
public:
|
||||
template <class F, class... Args>
|
||||
decltype(auto) addTask(F &&f, Args &&...args)
|
||||
{
|
||||
// using return_type = std::invoke_result<F, Args...>::type;
|
||||
using return_type = std::invoke_result_t<F, Args...>;
|
||||
|
||||
std::packaged_task<return_type()> task(
|
||||
std::bind(std::forward<F>(f), std::forward<Args>(args)...));
|
||||
|
||||
std::future<return_type> res = task.get_future();
|
||||
{
|
||||
std::lock_guard<std::mutex> guard(mtx_);
|
||||
tasks_.emplace(std::move(task));
|
||||
}
|
||||
return res;
|
||||
}
|
||||
|
||||
std::packaged_task<void()> pop()
|
||||
{
|
||||
std::lock_guard<std::mutex> guard(mtx_);
|
||||
auto task = std::move(tasks_.front());
|
||||
tasks_.pop();
|
||||
return task;
|
||||
}
|
||||
|
||||
size_t size()
|
||||
{
|
||||
std::lock_guard<std::mutex> guard(mtx_);
|
||||
return tasks_.size();
|
||||
}
|
||||
|
||||
bool empty()
|
||||
{
|
||||
std::lock_guard<std::mutex> guard(mtx_);
|
||||
return tasks_.empty();
|
||||
}
|
||||
|
||||
private:
|
||||
std::queue<std::packaged_task<void()>> tasks_;
|
||||
std::mutex mtx_;
|
||||
};
|
||||
|
||||
class ThreadPool
|
||||
{
|
||||
public:
|
||||
explicit ThreadPool(size_t nWorkers)
|
||||
: tqit_(taskQueues_.begin())
|
||||
, stop_(false)
|
||||
, drain_(false)
|
||||
{
|
||||
resize(nWorkers);
|
||||
}
|
||||
|
||||
~ThreadPool()
|
||||
{
|
||||
shutdown();
|
||||
}
|
||||
|
||||
void shutdown()
|
||||
{
|
||||
stop_ = true;
|
||||
cv_.notify_all();
|
||||
for (std::thread &worker : workers_) {
|
||||
if (worker.joinable())
|
||||
worker.join();
|
||||
}
|
||||
}
|
||||
|
||||
void drain()
|
||||
{
|
||||
drain_ = true;
|
||||
while (true) {
|
||||
{
|
||||
std::lock_guard<std::mutex> guard(mtx_);
|
||||
if (taskQueues_.empty())
|
||||
break;
|
||||
}
|
||||
std::this_thread::sleep_for(250ms);
|
||||
}
|
||||
}
|
||||
|
||||
void restart()
|
||||
{
|
||||
drain_ = false;
|
||||
}
|
||||
|
||||
void resize(size_t nWorkers)
|
||||
{
|
||||
shutdown();
|
||||
workers_.clear();
|
||||
stop_ = false;
|
||||
|
||||
for (size_t i = 0; i < nWorkers; ++i)
|
||||
workers_.emplace_back([&] {
|
||||
while (true) {
|
||||
std::packaged_task<void()> task;
|
||||
{
|
||||
std::unique_lock<std::mutex> lock(mtx_);
|
||||
cv_.wait(lock, [&] { return stop_ || !taskQueues_.empty(); });
|
||||
if (taskQueues_.empty()) {
|
||||
if (stop_)
|
||||
return;
|
||||
continue;
|
||||
}
|
||||
if (tqit_ == taskQueues_.end())
|
||||
tqit_ = taskQueues_.begin();
|
||||
task = (*tqit_)->pop();
|
||||
if ((*tqit_)->empty()) {
|
||||
tqit_ = taskQueues_.erase(tqit_);
|
||||
}
|
||||
else {
|
||||
tqit_++;
|
||||
}
|
||||
}
|
||||
task();
|
||||
}
|
||||
});
|
||||
};
|
||||
|
||||
template <class F, class... Args>
|
||||
decltype(auto) addTask(F &&f, Args &&...args)
|
||||
{
|
||||
if (drain_)
|
||||
throw std::runtime_error("Unable to add task to draining pool");
|
||||
auto tq = std::make_shared<TaskQueue>();
|
||||
|
||||
auto fut = tq->addTask(f, args...);
|
||||
|
||||
{
|
||||
std::lock_guard<std::mutex> guard(mtx_);
|
||||
taskQueues_.push_back(tq);
|
||||
}
|
||||
cv_.notify_one();
|
||||
return fut;
|
||||
}
|
||||
|
||||
void addTasks(std::shared_ptr<TaskQueue> &tq)
|
||||
{
|
||||
if (drain_)
|
||||
throw std::runtime_error("Unable to add task to draining pool");
|
||||
std::lock_guard<std::mutex> guard(mtx_);
|
||||
taskQueues_.push_back(tq);
|
||||
cv_.notify_one();
|
||||
}
|
||||
|
||||
private:
|
||||
// need to keep track of threads, so we can join them
|
||||
std::vector<std::thread> workers_;
|
||||
// the task queue
|
||||
std::list<std::shared_ptr<TaskQueue>> taskQueues_;
|
||||
std::list<std::shared_ptr<TaskQueue>>::iterator tqit_;
|
||||
|
||||
// synchronization
|
||||
std::mutex mtx_;
|
||||
std::condition_variable cv_;
|
||||
std::atomic<bool> stop_;
|
||||
std::atomic<bool> drain_;
|
||||
};
|
||||
|
||||
} // namespace daggy
|
||||
37
libdaggy/include/daggy/Utilities.hpp
Normal file
37
libdaggy/include/daggy/Utilities.hpp
Normal file
@@ -0,0 +1,37 @@
|
||||
#pragma once
|
||||
|
||||
#include <rapidjson/document.h>
|
||||
|
||||
#include <string>
|
||||
#include <unordered_map>
|
||||
#include <variant>
|
||||
#include <vector>
|
||||
|
||||
#include "DAG.hpp"
|
||||
#include "Defines.hpp"
|
||||
#include "daggy/executors/task/TaskExecutor.hpp"
|
||||
#include "daggy/loggers/dag_run/DAGRunLogger.hpp"
|
||||
|
||||
namespace daggy {
|
||||
using TaskDAG = DAG<std::string, Task>;
|
||||
|
||||
std::string globalSub(std::string string, const std::string &pattern,
|
||||
const std::string &replacement);
|
||||
|
||||
std::vector<Command> interpolateValues(const std::vector<std::string> &raw,
|
||||
const ConfigValues &values);
|
||||
|
||||
TaskSet expandTaskSet(const TaskSet &tasks,
|
||||
executors::task::TaskExecutor &executor,
|
||||
const ConfigValues &interpolatedValues = {});
|
||||
|
||||
TaskDAG buildDAGFromTasks(
|
||||
const TaskSet &tasks,
|
||||
const std::unordered_map<std::string,
|
||||
std::vector<loggers::dag_run::StateUpdateRecord>>
|
||||
&updates = {});
|
||||
|
||||
void updateDAGFromTasks(TaskDAG &dag, const TaskSet &tasks);
|
||||
|
||||
std::ostream &operator<<(std::ostream &os, const TimePoint &tp);
|
||||
} // namespace daggy
|
||||
@@ -0,0 +1,36 @@
|
||||
#pragma once
|
||||
|
||||
#include <daggy/ThreadPool.hpp>
|
||||
|
||||
#include "TaskExecutor.hpp"
|
||||
|
||||
namespace daggy::executors::task {
|
||||
class ForkingTaskExecutor : public TaskExecutor
|
||||
{
|
||||
public:
|
||||
using Command = std::vector<std::string>;
|
||||
|
||||
explicit ForkingTaskExecutor(size_t nThreads);
|
||||
~ForkingTaskExecutor() override;
|
||||
|
||||
// Validates the job to ensure that all required values are set and are of
|
||||
// the right type,
|
||||
bool validateTaskParameters(const ConfigValues &job) override;
|
||||
|
||||
std::vector<ConfigValues> expandTaskParameters(
|
||||
const ConfigValues &job, const ConfigValues &expansionValues) override;
|
||||
|
||||
// Runs the task
|
||||
std::future<AttemptRecord> execute(DAGRunID runID,
|
||||
const std::string &taskName,
|
||||
const Task &task) override;
|
||||
|
||||
bool stop(DAGRunID runID, const std::string &taskName) override;
|
||||
|
||||
private:
|
||||
ThreadPool tp_;
|
||||
std::mutex taskControlsGuard_;
|
||||
AttemptRecord runTask(const Task &task, std::atomic<bool> &running);
|
||||
std::unordered_map<std::string, std::atomic<bool>> taskControls_;
|
||||
};
|
||||
} // namespace daggy::executors::task
|
||||
25
libdaggy/include/daggy/executors/task/NoopTaskExecutor.hpp
Normal file
25
libdaggy/include/daggy/executors/task/NoopTaskExecutor.hpp
Normal file
@@ -0,0 +1,25 @@
|
||||
#pragma once
|
||||
|
||||
#include "TaskExecutor.hpp"
|
||||
|
||||
namespace daggy::executors::task {
|
||||
class NoopTaskExecutor : public TaskExecutor
|
||||
{
|
||||
public:
|
||||
using Command = std::vector<std::string>;
|
||||
|
||||
// Validates the job to ensure that all required values are set and are of
|
||||
// the right type,
|
||||
bool validateTaskParameters(const ConfigValues &job) override;
|
||||
|
||||
std::vector<ConfigValues> expandTaskParameters(
|
||||
const ConfigValues &job, const ConfigValues &expansionValues) override;
|
||||
|
||||
// Runs the task
|
||||
std::future<AttemptRecord> execute(DAGRunID runID,
|
||||
const std::string &taskName,
|
||||
const Task &task) override;
|
||||
|
||||
bool stop(DAGRunID runID, const std::string &taskName) override;
|
||||
};
|
||||
} // namespace daggy::executors::task
|
||||
46
libdaggy/include/daggy/executors/task/SlurmTaskExecutor.hpp
Normal file
46
libdaggy/include/daggy/executors/task/SlurmTaskExecutor.hpp
Normal file
@@ -0,0 +1,46 @@
|
||||
#pragma once
|
||||
|
||||
#include "TaskExecutor.hpp"
|
||||
|
||||
namespace daggy::executors::task {
|
||||
class SlurmTaskExecutor : public TaskExecutor
|
||||
{
|
||||
public:
|
||||
using Command = std::vector<std::string>;
|
||||
|
||||
SlurmTaskExecutor();
|
||||
~SlurmTaskExecutor() override;
|
||||
|
||||
// Validates the job to ensure that all required values are set and are of
|
||||
// the right type,
|
||||
bool validateTaskParameters(const ConfigValues &job) override;
|
||||
|
||||
std::vector<ConfigValues> expandTaskParameters(
|
||||
const ConfigValues &job, const ConfigValues &expansionValues) override;
|
||||
|
||||
// Runs the task
|
||||
std::future<AttemptRecord> execute(DAGRunID runID,
|
||||
const std::string &taskName,
|
||||
const Task &task) override;
|
||||
|
||||
bool stop(DAGRunID runID, const std::string &taskName) override;
|
||||
|
||||
private:
|
||||
struct Job
|
||||
{
|
||||
std::promise<AttemptRecord> prom;
|
||||
std::string stdoutFile;
|
||||
std::string stderrFile;
|
||||
DAGRunID runID;
|
||||
std::string taskName;
|
||||
};
|
||||
|
||||
std::mutex promiseGuard_;
|
||||
std::unordered_map<size_t, Job> runningJobs_;
|
||||
std::atomic<bool> running_;
|
||||
|
||||
// Monitors jobs and resolves promises
|
||||
std::thread monitorWorker_;
|
||||
void monitor();
|
||||
};
|
||||
} // namespace daggy::executors::task
|
||||
37
libdaggy/include/daggy/executors/task/TaskExecutor.hpp
Normal file
37
libdaggy/include/daggy/executors/task/TaskExecutor.hpp
Normal file
@@ -0,0 +1,37 @@
|
||||
#pragma once
|
||||
|
||||
#include <chrono>
|
||||
#include <daggy/Defines.hpp>
|
||||
#include <future>
|
||||
#include <string>
|
||||
#include <thread>
|
||||
#include <vector>
|
||||
|
||||
/*
|
||||
Executors run Tasks, returning a future with the results.
|
||||
If there are many retries, logs are returned for each attempt.
|
||||
*/
|
||||
|
||||
namespace daggy::executors::task {
|
||||
class TaskExecutor
|
||||
{
|
||||
public:
|
||||
virtual ~TaskExecutor() = default;
|
||||
|
||||
// Validates the job to ensure that all required values are set and are of
|
||||
// the right type,
|
||||
virtual bool validateTaskParameters(const ConfigValues &job) = 0;
|
||||
|
||||
// Will use the expansion values to return the fully expanded tasks.
|
||||
virtual std::vector<ConfigValues> expandTaskParameters(
|
||||
const ConfigValues &job, const ConfigValues &expansionValues) = 0;
|
||||
|
||||
// Blocking execution of a task
|
||||
virtual std::future<AttemptRecord> execute(DAGRunID runID,
|
||||
const std::string &taskName,
|
||||
const Task &task) = 0;
|
||||
|
||||
// Kill a currently executing task. This will resolve the future.
|
||||
virtual bool stop(DAGRunID runID, const std::string &taskName) = 0;
|
||||
};
|
||||
} // namespace daggy::executors::task
|
||||
50
libdaggy/include/daggy/loggers/dag_run/DAGRunLogger.hpp
Normal file
50
libdaggy/include/daggy/loggers/dag_run/DAGRunLogger.hpp
Normal file
@@ -0,0 +1,50 @@
|
||||
#pragma once
|
||||
|
||||
#include <string>
|
||||
|
||||
#include "../../Defines.hpp"
|
||||
#include "Defines.hpp"
|
||||
|
||||
/*
|
||||
DAGRunLogger represents the interface to store all the state information
|
||||
for daggy to run. Abstracted in case other back-end solutions need to
|
||||
be supported.
|
||||
*/
|
||||
|
||||
namespace daggy::loggers::dag_run {
|
||||
class DAGRunLogger
|
||||
{
|
||||
public:
|
||||
virtual ~DAGRunLogger() = default;
|
||||
|
||||
// Insertion / Updates
|
||||
virtual DAGRunID startDAGRun(const DAGSpec &dagSpec) = 0;
|
||||
|
||||
virtual void addTask(DAGRunID dagRunID, const std::string &taskName,
|
||||
const Task &task) = 0;
|
||||
|
||||
virtual void updateTask(DAGRunID dagRunID, const std::string &taskName,
|
||||
const Task &task) = 0;
|
||||
|
||||
virtual void updateDAGRunState(DAGRunID dagRunID, RunState state) = 0;
|
||||
|
||||
virtual void logTaskAttempt(DAGRunID dagRunID, const std::string &taskName,
|
||||
const AttemptRecord &attempt) = 0;
|
||||
|
||||
virtual void updateTaskState(DAGRunID dagRunID, const std::string &taskName,
|
||||
RunState state) = 0;
|
||||
|
||||
// Querying
|
||||
virtual DAGSpec getDAGSpec(DAGRunID dagRunID) = 0;
|
||||
|
||||
virtual std::vector<DAGRunSummary> queryDAGRuns(const std::string &tag = "",
|
||||
bool all = false) = 0;
|
||||
|
||||
virtual RunState getDAGRunState(DAGRunID dagRunID) = 0;
|
||||
virtual DAGRunRecord getDAGRun(DAGRunID dagRunID) = 0;
|
||||
|
||||
virtual Task getTask(DAGRunID dagRunID, const std::string &taskName) = 0;
|
||||
virtual RunState getTaskState(DAGRunID dagRunID,
|
||||
const std::string &taskName) = 0;
|
||||
};
|
||||
} // namespace daggy::loggers::dag_run
|
||||
39
libdaggy/include/daggy/loggers/dag_run/Defines.hpp
Normal file
39
libdaggy/include/daggy/loggers/dag_run/Defines.hpp
Normal file
@@ -0,0 +1,39 @@
|
||||
#pragma once
|
||||
|
||||
#include <cstdint>
|
||||
#include <string>
|
||||
#include <unordered_map>
|
||||
#include <unordered_set>
|
||||
#include <vector>
|
||||
|
||||
#include "../../Defines.hpp"
|
||||
|
||||
namespace daggy::loggers::dag_run {
|
||||
struct StateUpdateRecord
|
||||
{
|
||||
TimePoint time;
|
||||
RunState state;
|
||||
};
|
||||
|
||||
// Pretty heavy weight, but
|
||||
struct DAGRunRecord
|
||||
{
|
||||
DAGSpec dagSpec;
|
||||
std::unordered_map<std::string, RunState> taskRunStates;
|
||||
std::unordered_map<std::string, std::vector<AttemptRecord>> taskAttempts;
|
||||
std::unordered_map<std::string, std::vector<StateUpdateRecord>>
|
||||
taskStateChanges;
|
||||
std::vector<StateUpdateRecord> dagStateChanges;
|
||||
};
|
||||
|
||||
struct DAGRunSummary
|
||||
{
|
||||
DAGRunID runID;
|
||||
std::string tag;
|
||||
RunState runState;
|
||||
TimePoint startTime;
|
||||
TimePoint lastUpdate;
|
||||
std::unordered_map<RunState, size_t> taskStateCounts;
|
||||
};
|
||||
} // namespace daggy::loggers::dag_run
|
||||
|
||||
60
libdaggy/include/daggy/loggers/dag_run/OStreamLogger.hpp
Normal file
60
libdaggy/include/daggy/loggers/dag_run/OStreamLogger.hpp
Normal file
@@ -0,0 +1,60 @@
|
||||
#pragma once
|
||||
|
||||
#include <iostream>
|
||||
#include <mutex>
|
||||
|
||||
#include "DAGRunLogger.hpp"
|
||||
#include "Defines.hpp"
|
||||
|
||||
namespace daggy::loggers::dag_run {
|
||||
/*
|
||||
* This logger should only be used for debug purposes. It doesn't actually log
|
||||
* anything, just prints stuff to stdout.
|
||||
*/
|
||||
class OStreamLogger : public DAGRunLogger
|
||||
{
|
||||
public:
|
||||
explicit OStreamLogger(std::ostream &os);
|
||||
~OStreamLogger() override;
|
||||
|
||||
// Execution
|
||||
DAGRunID startDAGRun(const DAGSpec &dagSpec) override;
|
||||
|
||||
void addTask(DAGRunID dagRunID, const std::string &taskName,
|
||||
const Task &task) override;
|
||||
|
||||
void updateTask(DAGRunID dagRunID, const std::string &taskName,
|
||||
const Task &task) override;
|
||||
|
||||
void updateDAGRunState(DAGRunID dagRunID, RunState state) override;
|
||||
|
||||
void logTaskAttempt(DAGRunID, const std::string &taskName,
|
||||
const AttemptRecord &attempt) override;
|
||||
|
||||
void updateTaskState(DAGRunID dagRunID, const std::string &taskName,
|
||||
RunState state) override;
|
||||
|
||||
// Querying
|
||||
DAGSpec getDAGSpec(DAGRunID dagRunID) override;
|
||||
|
||||
std::vector<DAGRunSummary> queryDAGRuns(const std::string &tag = "",
|
||||
bool all = false) override;
|
||||
|
||||
RunState getDAGRunState(DAGRunID dagRunID) override;
|
||||
DAGRunRecord getDAGRun(DAGRunID dagRunID) override;
|
||||
|
||||
Task getTask(DAGRunID dagRunID, const std::string &taskName) override;
|
||||
RunState getTaskState(DAGRunID dagRunID,
|
||||
const std::string &taskName) override;
|
||||
|
||||
private:
|
||||
std::mutex guard_;
|
||||
std::ostream &os_;
|
||||
std::vector<DAGRunRecord> dagRuns_;
|
||||
|
||||
void _updateTaskState(DAGRunID dagRunID, const std::string &taskName,
|
||||
RunState state);
|
||||
|
||||
void _updateDAGRunState(DAGRunID dagRunID, RunState state);
|
||||
};
|
||||
} // namespace daggy::loggers::dag_run
|
||||
129
libdaggy/include/daggy/loggers/dag_run/RedisHelper.hpp
Normal file
129
libdaggy/include/daggy/loggers/dag_run/RedisHelper.hpp
Normal file
@@ -0,0 +1,129 @@
|
||||
#pragma once
|
||||
|
||||
#include <iterator>
|
||||
#ifdef DAGGY_ENABLE_REDIS
|
||||
|
||||
#include <hiredis.h>
|
||||
|
||||
#include <algorithm>
|
||||
#include <cstdlib>
|
||||
#include <iostream>
|
||||
#include <mutex>
|
||||
#include <string>
|
||||
#include <unordered_map>
|
||||
#include <variant>
|
||||
#include <vector>
|
||||
|
||||
/*
|
||||
Why a Redis Helper? This wraps hiredis structs in a class with a destructor
|
||||
that will clean up after itself.
|
||||
|
||||
The query() method is a bit wonky with all the variants, but it works well
|
||||
enough.
|
||||
|
||||
Important note: The hiredis context is not thread safe, so neither is this.
|
||||
Create contexts as needed.
|
||||
*/
|
||||
|
||||
namespace daggy::loggers::dag_run::redis {
|
||||
using RedisDatum = std::variant<std::string, double, size_t>;
|
||||
|
||||
// Either a single Datum, or a vector of Datum
|
||||
struct RedisData
|
||||
{
|
||||
void operator=(const RedisDatum &val)
|
||||
{
|
||||
data_ = val;
|
||||
}
|
||||
|
||||
void operator=(const RedisData &other)
|
||||
{
|
||||
data_ = other.data_;
|
||||
}
|
||||
|
||||
void operator=(const std::vector<RedisDatum> &other)
|
||||
{
|
||||
data_ = other;
|
||||
}
|
||||
|
||||
RedisDatum asDatum()
|
||||
{
|
||||
return std::get<RedisDatum>(data_);
|
||||
}
|
||||
|
||||
template <typename T>
|
||||
T as()
|
||||
{
|
||||
return std::get<T>(std::get<RedisDatum>(data_));
|
||||
}
|
||||
|
||||
template <typename T>
|
||||
std::vector<T> asList()
|
||||
{
|
||||
std::vector<T> data;
|
||||
const auto &inp = std::get<std::vector<RedisDatum>>(data_);
|
||||
std::transform(inp.begin(), inp.end(), std::back_inserter(data),
|
||||
[](const auto &i) { return std::get<T>(i); });
|
||||
return data;
|
||||
}
|
||||
|
||||
template <typename T, typename V>
|
||||
std::unordered_map<T, V> asHash()
|
||||
{
|
||||
std::unordered_map<T, V> data;
|
||||
const auto &inp = std::get<std::vector<RedisDatum>>(data_);
|
||||
if (inp.size() % 2 != 0)
|
||||
throw std::runtime_error("Number of items is not even");
|
||||
for (size_t i = 0; i < inp.size(); i += 2) {
|
||||
data.emplace(std::get<T>(inp[i]), std::get<V>(inp[i + 1]));
|
||||
}
|
||||
return data;
|
||||
}
|
||||
|
||||
std::variant<RedisDatum, std::vector<RedisDatum>> data_;
|
||||
};
|
||||
|
||||
class RedisContext
|
||||
{
|
||||
public:
|
||||
RedisContext(const std::string &host, int port);
|
||||
|
||||
template <class... Args>
|
||||
RedisData query(Args &&...args)
|
||||
{
|
||||
std::lock_guard<std::mutex> lock(contextGuard_);
|
||||
redisReply *reply = static_cast<redisReply *>(
|
||||
redisCommand(ctx_, std::forward<Args>(args)...));
|
||||
|
||||
if (!reply) {
|
||||
throw std::runtime_error("Cannot query redis.");
|
||||
}
|
||||
|
||||
if (reply->type == REDIS_REPLY_ERROR) {
|
||||
if (reply->str) {
|
||||
std::string error{reply->str};
|
||||
throw std::runtime_error("Error querying redis: " + error);
|
||||
}
|
||||
else {
|
||||
throw std::runtime_error("Unknown error querying redis");
|
||||
}
|
||||
}
|
||||
|
||||
auto data = parseReply_(reply);
|
||||
freeReplyObject(reply);
|
||||
return data;
|
||||
}
|
||||
|
||||
~RedisContext()
|
||||
{
|
||||
redisFree(ctx_);
|
||||
}
|
||||
|
||||
private:
|
||||
RedisData parseReply_(const redisReply *reply);
|
||||
redisContext *ctx_;
|
||||
std::mutex contextGuard_;
|
||||
};
|
||||
} // namespace daggy::loggers::dag_run::redis
|
||||
|
||||
#endif
|
||||
102
libdaggy/include/daggy/loggers/dag_run/RedisLogger.hpp
Normal file
102
libdaggy/include/daggy/loggers/dag_run/RedisLogger.hpp
Normal file
@@ -0,0 +1,102 @@
|
||||
#pragma once
|
||||
|
||||
#ifdef DAGGY_ENABLE_REDIS
|
||||
|
||||
#include <iostream>
|
||||
#include <mutex>
|
||||
|
||||
#include "DAGRunLogger.hpp"
|
||||
#include "Defines.hpp"
|
||||
#include "RedisHelper.hpp"
|
||||
|
||||
namespace daggy::loggers::dag_run {
|
||||
|
||||
/*
|
||||
RunIDS are obtained from the counter dagRunIDs;
|
||||
|
||||
Keys are constructed from the dagRunID.
|
||||
|
||||
- dagRunIDs is an INTEGER COUNTER that returns the next dagRunID
|
||||
|
||||
- {runid}_spec is a HASH from taskName -> taskJSON
|
||||
|
||||
{
|
||||
"tag": tag,
|
||||
"tasks": { ...tasks... },
|
||||
*/
|
||||
|
||||
class RedisLogger : public DAGRunLogger
|
||||
{
|
||||
public:
|
||||
explicit RedisLogger(const std::string &prefix = "daggy",
|
||||
const std::string &host = "127.0.0.1",
|
||||
int port = 6379);
|
||||
|
||||
// Execution
|
||||
DAGRunID startDAGRun(const DAGSpec &dagSpec) override;
|
||||
|
||||
void addTask(DAGRunID dagRunID, const std::string &taskName,
|
||||
const Task &task) override;
|
||||
|
||||
void updateTask(DAGRunID dagRunID, const std::string &taskName,
|
||||
const Task &task) override;
|
||||
|
||||
void updateDAGRunState(DAGRunID dagRunID, RunState state) override;
|
||||
|
||||
void logTaskAttempt(DAGRunID, const std::string &taskName,
|
||||
const AttemptRecord &attempt) override;
|
||||
|
||||
void updateTaskState(DAGRunID dagRunID, const std::string &taskName,
|
||||
RunState state) override;
|
||||
|
||||
// Querying
|
||||
DAGSpec getDAGSpec(DAGRunID dagRunID) override;
|
||||
|
||||
std::vector<DAGRunSummary> queryDAGRuns(const std::string &tag = "",
|
||||
bool all = false) override;
|
||||
|
||||
RunState getDAGRunState(DAGRunID dagRunID) override;
|
||||
DAGRunRecord getDAGRun(DAGRunID dagRunID) override;
|
||||
|
||||
Task getTask(DAGRunID dagRunID, const std::string &taskName) override;
|
||||
RunState getTaskState(DAGRunID dagRunID,
|
||||
const std::string &taskName) override;
|
||||
|
||||
private:
|
||||
const std::string prefix_;
|
||||
const std::string dagRunIDsKey_;
|
||||
redis::RedisContext ctx_;
|
||||
|
||||
inline const std::string getDAGPrefix_(DAGRunID runID) const
|
||||
{
|
||||
return prefix_ + "_" + std::to_string(runID) + "_";
|
||||
}
|
||||
|
||||
#define GET_DAG_KEY(name, extra) \
|
||||
inline std::string name(DAGRunID runID) const \
|
||||
{ \
|
||||
return getDAGPrefix_(runID) + extra; \
|
||||
}
|
||||
|
||||
GET_DAG_KEY(getTagKey_, "tag");
|
||||
GET_DAG_KEY(getTasksKey_, "tasks");
|
||||
GET_DAG_KEY(getDAGStateKey_, "state");
|
||||
GET_DAG_KEY(getDAGStateUpdateKey_, "stateUpdate");
|
||||
GET_DAG_KEY(getTaskStatesKey_, "taskStates");
|
||||
GET_DAG_KEY(getTaskVariablesKey_, "taskVariables");
|
||||
GET_DAG_KEY(getTaskDefaultsKey_, "taskDefaults");
|
||||
GET_DAG_KEY(getStartTimeKey_, "startTime");
|
||||
GET_DAG_KEY(getLastUpdateKey_, "lastUpdate");
|
||||
|
||||
#define GET_TASK_KEY(name, category) \
|
||||
inline std::string name(DAGRunID runID, const std::string &taskName) const \
|
||||
{ \
|
||||
return getDAGPrefix_(runID) + category + "_" + taskName; \
|
||||
}
|
||||
|
||||
GET_TASK_KEY(getTaskStateUpdateKey_, "taskUpdateState");
|
||||
GET_TASK_KEY(getTaskAttemptKey_, "taskAttempt");
|
||||
};
|
||||
} // namespace daggy::loggers::dag_run
|
||||
|
||||
#endif
|
||||
8
libdaggy/src/CMakeLists.txt
Normal file
8
libdaggy/src/CMakeLists.txt
Normal file
@@ -0,0 +1,8 @@
|
||||
target_sources(${PROJECT_NAME} PRIVATE
|
||||
Serialization.cpp
|
||||
Utilities.cpp
|
||||
DAGRunner.cpp
|
||||
)
|
||||
|
||||
add_subdirectory(executors)
|
||||
add_subdirectory(loggers)
|
||||
213
libdaggy/src/DAGRunner.cpp
Normal file
213
libdaggy/src/DAGRunner.cpp
Normal file
@@ -0,0 +1,213 @@
|
||||
#include <chrono>
|
||||
#include <daggy/DAGRunner.hpp>
|
||||
#include <mutex>
|
||||
#include <stdexcept>
|
||||
|
||||
namespace daggy {
|
||||
DAGRunner::DAGRunner(DAGRunID runID, executors::task::TaskExecutor &executor,
|
||||
loggers::dag_run::DAGRunLogger &logger, TaskDAG dag,
|
||||
const TaskParameters &taskParams)
|
||||
: runID_(runID)
|
||||
, executor_(executor)
|
||||
, logger_(logger)
|
||||
, dag_(dag)
|
||||
, taskParams_(taskParams)
|
||||
, running_(true)
|
||||
, kill_(true)
|
||||
, nRunningTasks_(0)
|
||||
, nErroredTasks_(0)
|
||||
{
|
||||
}
|
||||
|
||||
DAGRunner::~DAGRunner()
|
||||
{
|
||||
std::lock_guard<std::mutex> lock(runGuard_);
|
||||
}
|
||||
|
||||
TaskDAG DAGRunner::run()
|
||||
{
|
||||
kill_ = false;
|
||||
running_ = true;
|
||||
logger_.updateDAGRunState(runID_, RunState::RUNNING);
|
||||
|
||||
bool allVisited;
|
||||
{
|
||||
std::lock_guard<std::mutex> lock(runGuard_);
|
||||
allVisited = dag_.allVisited();
|
||||
}
|
||||
while (!allVisited) {
|
||||
{
|
||||
std::lock_guard<std::mutex> runLock(runGuard_);
|
||||
if (!running_ and kill_) {
|
||||
killRunning();
|
||||
}
|
||||
collectFinished();
|
||||
queuePending();
|
||||
|
||||
if (!running_ and (nRunningTasks_ - nErroredTasks_ <= 0)) {
|
||||
logger_.updateDAGRunState(runID_, RunState::KILLED);
|
||||
break;
|
||||
}
|
||||
|
||||
if (nRunningTasks_ > 0 and nErroredTasks_ == nRunningTasks_) {
|
||||
logger_.updateDAGRunState(runID_, RunState::ERRORED);
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
std::this_thread::sleep_for(250ms);
|
||||
{
|
||||
std::lock_guard<std::mutex> lock(runGuard_);
|
||||
allVisited = dag_.allVisited();
|
||||
}
|
||||
}
|
||||
|
||||
if (dag_.allVisited()) {
|
||||
logger_.updateDAGRunState(runID_, RunState::COMPLETED);
|
||||
}
|
||||
|
||||
running_ = false;
|
||||
return dag_;
|
||||
}
|
||||
|
||||
void DAGRunner::resetRunning()
|
||||
{
|
||||
if (running_)
|
||||
throw std::runtime_error("Unable to reset while DAG is running.");
|
||||
|
||||
std::lock_guard<std::mutex> lock(runGuard_);
|
||||
nRunningTasks_ = 0;
|
||||
nErroredTasks_ = 0;
|
||||
runningTasks_.clear();
|
||||
taskAttemptCounts_.clear();
|
||||
dag_.resetRunning();
|
||||
}
|
||||
|
||||
void DAGRunner::killRunning()
|
||||
{
|
||||
for (const auto &[taskName, _] : runningTasks_) {
|
||||
executor_.stop(runID_, taskName);
|
||||
}
|
||||
}
|
||||
|
||||
void DAGRunner::queuePending()
|
||||
{
|
||||
if (!running_)
|
||||
return;
|
||||
|
||||
// Check for any completed tasks
|
||||
// Add all remaining tasks in a task queue to avoid dominating the thread
|
||||
// pool
|
||||
auto t = dag_.visitNext();
|
||||
while (t.has_value()) {
|
||||
// Schedule the task to run
|
||||
auto &taskName = t.value().first;
|
||||
auto &task = t.value().second;
|
||||
taskAttemptCounts_[taskName] = 1;
|
||||
|
||||
logger_.updateTaskState(runID_, taskName, RunState::RUNNING);
|
||||
runningTasks_.emplace(taskName,
|
||||
executor_.execute(runID_, taskName, task));
|
||||
++nRunningTasks_;
|
||||
|
||||
auto nextTask = dag_.visitNext();
|
||||
if (not nextTask.has_value())
|
||||
break;
|
||||
t.emplace(nextTask.value());
|
||||
}
|
||||
}
|
||||
|
||||
void DAGRunner::collectFinished()
|
||||
{
|
||||
for (auto &[taskName, fut] : runningTasks_) {
|
||||
if (fut.valid() and fut.wait_for(1ms) == std::future_status::ready) {
|
||||
auto attempt = fut.get();
|
||||
logger_.logTaskAttempt(runID_, taskName, attempt);
|
||||
|
||||
// Not a reference, since adding tasks will invalidate references
|
||||
auto vert = dag_.getVertex(taskName);
|
||||
auto &task = vert.data;
|
||||
if (attempt.rc == 0) {
|
||||
logger_.updateTaskState(runID_, taskName, RunState::COMPLETED);
|
||||
if (task.isGenerator) {
|
||||
// Parse the output and update the DAGs
|
||||
try {
|
||||
auto parsedTasks =
|
||||
tasksFromJSON(attempt.outputLog, taskParams_.jobDefaults);
|
||||
auto newTasks =
|
||||
expandTaskSet(parsedTasks, executor_, taskParams_.variables);
|
||||
updateDAGFromTasks(dag_, newTasks);
|
||||
|
||||
// Add in dependencies from current task to new tasks
|
||||
for (const auto &[ntName, ntTask] : newTasks) {
|
||||
logger_.addTask(runID_, ntName, ntTask);
|
||||
task.children.insert(ntName);
|
||||
}
|
||||
|
||||
// Efficiently add new edges from generator task
|
||||
// to children
|
||||
std::unordered_set<std::string> baseNames;
|
||||
for (const auto &[k, v] : parsedTasks) {
|
||||
baseNames.insert(v.definedName);
|
||||
}
|
||||
dag_.addEdgeIf(taskName, [&](const auto &v) {
|
||||
return baseNames.count(v.data.definedName) > 0;
|
||||
});
|
||||
|
||||
logger_.updateTask(runID_, taskName, task);
|
||||
}
|
||||
catch (std::exception &e) {
|
||||
logger_.logTaskAttempt(
|
||||
runID_, taskName,
|
||||
AttemptRecord{
|
||||
.executorLog =
|
||||
std::string{"Failed to parse JSON output: "} +
|
||||
e.what()});
|
||||
logger_.updateTaskState(runID_, taskName, RunState::ERRORED);
|
||||
++nErroredTasks_;
|
||||
}
|
||||
}
|
||||
dag_.completeVisit(taskName);
|
||||
--nRunningTasks_;
|
||||
}
|
||||
else {
|
||||
// RC isn't 0
|
||||
if (taskAttemptCounts_[taskName] <= task.maxRetries) {
|
||||
logger_.updateTaskState(runID_, taskName, RunState::RETRY);
|
||||
runningTasks_[taskName] = executor_.execute(runID_, taskName, task);
|
||||
++taskAttemptCounts_[taskName];
|
||||
}
|
||||
else {
|
||||
if (logger_.getTaskState(runID_, taskName) == +RunState::RUNNING or
|
||||
logger_.getTaskState(runID_, taskName) == +RunState::RETRY) {
|
||||
logger_.updateTaskState(runID_, taskName, RunState::ERRORED);
|
||||
++nErroredTasks_;
|
||||
}
|
||||
else {
|
||||
// Task was killed
|
||||
--nRunningTasks_;
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
void DAGRunner::stop(bool kill, bool blocking)
|
||||
{
|
||||
kill_ = kill;
|
||||
running_ = false;
|
||||
|
||||
if (blocking) {
|
||||
while (true) {
|
||||
{
|
||||
std::lock_guard<std::mutex> lock(runGuard_);
|
||||
if (nRunningTasks_ - nErroredTasks_ == 0)
|
||||
break;
|
||||
}
|
||||
std::this_thread::sleep_for(250ms);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
} // namespace daggy
|
||||
418
libdaggy/src/Serialization.cpp
Normal file
418
libdaggy/src/Serialization.cpp
Normal file
@@ -0,0 +1,418 @@
|
||||
#include <rapidjson/document.h>
|
||||
#include <rapidjson/error/en.h>
|
||||
|
||||
#include "rapidjson/stringbuffer.h"
|
||||
#include "rapidjson/writer.h"
|
||||
#include <daggy/Serialization.hpp>
|
||||
#include <daggy/Utilities.hpp>
|
||||
#include <iomanip>
|
||||
#include <sstream>
|
||||
|
||||
namespace daggy {
|
||||
void checkRJParse(const rj::ParseResult &result, const std::string &prefix)
|
||||
{
|
||||
if (!result) {
|
||||
std::stringstream ss;
|
||||
ss << (prefix.empty() ? "" : prefix + ':')
|
||||
<< "Error parsing JSON: " << rj::GetParseError_En(result.Code())
|
||||
<< " at byte offset " << result.Offset();
|
||||
throw std::runtime_error(ss.str());
|
||||
}
|
||||
}
|
||||
|
||||
std::string dumpJSON(const rj::Value &doc)
|
||||
{
|
||||
rj::StringBuffer buffer;
|
||||
rj::Writer<rj::StringBuffer> writer(buffer);
|
||||
doc.Accept(writer);
|
||||
return buffer.GetString();
|
||||
}
|
||||
|
||||
ConfigValues configFromJSON(const std::string &jsonSpec)
|
||||
{
|
||||
rj::Document doc;
|
||||
checkRJParse(doc.Parse(jsonSpec.c_str()), "Parsing config");
|
||||
return configFromJSON(doc);
|
||||
}
|
||||
|
||||
ConfigValues configFromJSON(const rj::Value &spec)
|
||||
{
|
||||
std::unordered_map<std::string, ConfigValue> parameters;
|
||||
if (!spec.IsObject()) {
|
||||
throw std::runtime_error("Parameters in spec is not a JSON dictionary");
|
||||
}
|
||||
for (auto it = spec.MemberBegin(); it != spec.MemberEnd(); ++it) {
|
||||
if (!it->name.IsString()) {
|
||||
throw std::runtime_error("All keys must be strings.");
|
||||
}
|
||||
std::string name = it->name.GetString();
|
||||
if (it->value.IsArray()) {
|
||||
std::vector<std::string> values;
|
||||
for (size_t i = 0; i < it->value.Size(); ++i) {
|
||||
if (!it->value[i].IsString()) {
|
||||
throw std::runtime_error(
|
||||
"Attribute for " + std::string{it->name.GetString()} +
|
||||
" item " + std::to_string(i) + " is not a string.");
|
||||
}
|
||||
values.emplace_back(it->value[i].GetString());
|
||||
}
|
||||
parameters[name] = values;
|
||||
}
|
||||
else if (it->value.IsString()) {
|
||||
parameters[name] = it->value.GetString();
|
||||
}
|
||||
else {
|
||||
throw std::runtime_error("Attribute for " +
|
||||
std::string{it->name.GetString()} +
|
||||
" is not a string or an array.");
|
||||
}
|
||||
}
|
||||
return parameters;
|
||||
}
|
||||
|
||||
std::string configToJSON(const ConfigValues &config)
|
||||
{
|
||||
std::stringstream ss;
|
||||
ss << '{';
|
||||
bool first = true;
|
||||
for (const auto &[k, v] : config) {
|
||||
if (first) {
|
||||
first = false;
|
||||
}
|
||||
else {
|
||||
ss << ", ";
|
||||
}
|
||||
ss << std::quoted(k) << ": ";
|
||||
if (std::holds_alternative<std::string>(v)) {
|
||||
ss << std::quoted(std::get<std::string>(v));
|
||||
}
|
||||
else {
|
||||
ss << '[';
|
||||
const auto &values = std::get<std::vector<std::string>>(v);
|
||||
bool firstVal = true;
|
||||
for (const auto &val : values) {
|
||||
if (firstVal) {
|
||||
firstVal = false;
|
||||
}
|
||||
else {
|
||||
ss << ", ";
|
||||
}
|
||||
ss << std::quoted(val);
|
||||
}
|
||||
ss << ']';
|
||||
}
|
||||
}
|
||||
ss << '}';
|
||||
return ss.str();
|
||||
}
|
||||
|
||||
Task taskFromJSON(const std::string &name, const rj::Value &spec,
|
||||
const ConfigValues &jobDefaults)
|
||||
{
|
||||
Task task{.definedName = name,
|
||||
.isGenerator = false,
|
||||
.maxRetries = 0,
|
||||
.retryIntervalSeconds = 0,
|
||||
.job = jobDefaults};
|
||||
if (!spec.IsObject()) {
|
||||
throw std::runtime_error("Tasks is not an object");
|
||||
}
|
||||
|
||||
// Grab the standard fields with defaults;
|
||||
if (spec.HasMember("isGenerator")) {
|
||||
task.isGenerator = spec["isGenerator"].GetBool();
|
||||
}
|
||||
|
||||
if (spec.HasMember("maxRetries")) {
|
||||
task.maxRetries = spec["maxRetries"].GetInt();
|
||||
}
|
||||
|
||||
if (spec.HasMember("retryIntervalSeconds")) {
|
||||
task.retryIntervalSeconds = spec["retryIntervalSeconds"].GetInt();
|
||||
}
|
||||
|
||||
// Children / parents
|
||||
if (spec.HasMember("children")) {
|
||||
const auto &specChildren = spec["children"].GetArray();
|
||||
for (size_t c = 0; c < specChildren.Size(); ++c) {
|
||||
task.children.insert(specChildren[c].GetString());
|
||||
}
|
||||
}
|
||||
|
||||
if (spec.HasMember("parents")) {
|
||||
const auto &specParents = spec["parents"].GetArray();
|
||||
for (size_t c = 0; c < specParents.Size(); ++c) {
|
||||
task.parents.insert(specParents[c].GetString());
|
||||
}
|
||||
}
|
||||
|
||||
if (spec.HasMember("job")) {
|
||||
const auto ¶ms = spec["job"];
|
||||
if (!params.IsObject())
|
||||
throw std::runtime_error("job is not a dictionary.");
|
||||
for (auto it = params.MemberBegin(); it != params.MemberEnd(); ++it) {
|
||||
if (!it->name.IsString())
|
||||
throw std::runtime_error("job key must be a string.");
|
||||
if (it->value.IsArray()) {
|
||||
std::vector<std::string> values;
|
||||
for (size_t i = 0; i < it->value.Size(); ++i) {
|
||||
values.emplace_back(it->value[i].GetString());
|
||||
}
|
||||
task.job.insert_or_assign(it->name.GetString(), values);
|
||||
}
|
||||
else {
|
||||
task.job.insert_or_assign(it->name.GetString(),
|
||||
it->value.GetString());
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
return task;
|
||||
}
|
||||
|
||||
Task taskFromJSON(const std::string &name, const std::string &spec,
|
||||
const ConfigValues &jobDefaults)
|
||||
{
|
||||
rj::Document doc;
|
||||
checkRJParse(doc.Parse(spec.c_str()));
|
||||
return taskFromJSON(name, doc, jobDefaults);
|
||||
}
|
||||
|
||||
TaskSet tasksFromJSON(const std::string &jsonSpec,
|
||||
const ConfigValues &jobDefaults)
|
||||
{
|
||||
rj::Document doc;
|
||||
checkRJParse(doc.Parse(jsonSpec.c_str()));
|
||||
return tasksFromJSON(doc, jobDefaults);
|
||||
}
|
||||
|
||||
TaskSet tasksFromJSON(const rj::Value &spec, const ConfigValues &jobDefaults)
|
||||
{
|
||||
TaskSet tasks;
|
||||
if (!spec.IsObject()) {
|
||||
throw std::runtime_error("Tasks is not an object");
|
||||
}
|
||||
|
||||
// Tasks
|
||||
for (auto it = spec.MemberBegin(); it != spec.MemberEnd(); ++it) {
|
||||
if (!it->name.IsString())
|
||||
throw std::runtime_error("Task names must be a string.");
|
||||
if (!it->value.IsObject())
|
||||
throw std::runtime_error("Task definitions must be an object.");
|
||||
const auto &taskName = it->name.GetString();
|
||||
tasks.emplace(taskName, taskFromJSON(taskName, it->value, jobDefaults));
|
||||
}
|
||||
|
||||
// Normalize tasks so all the children are populated
|
||||
for (auto &[k, v] : tasks) {
|
||||
for (const auto &p : v.parents) {
|
||||
tasks[p].children.insert(k);
|
||||
}
|
||||
v.parents.clear();
|
||||
}
|
||||
|
||||
return tasks;
|
||||
}
|
||||
|
||||
// I really want to do this with rapidjson, but damn they make it ugly and
|
||||
// difficult. So we'll shortcut and generate the JSON directly.
|
||||
std::string taskToJSON(const Task &task)
|
||||
{
|
||||
std::stringstream ss;
|
||||
bool first;
|
||||
|
||||
ss << "{"
|
||||
<< R"("maxRetries": )" << task.maxRetries << ','
|
||||
<< R"("retryIntervalSeconds": )" << task.retryIntervalSeconds << ',';
|
||||
|
||||
ss << R"("job": )" << configToJSON(task.job) << ',';
|
||||
|
||||
ss << R"("children": [)";
|
||||
first = true;
|
||||
for (const auto &child : task.children) {
|
||||
if (!first)
|
||||
ss << ',';
|
||||
ss << std::quoted(child);
|
||||
first = false;
|
||||
}
|
||||
ss << "],";
|
||||
|
||||
ss << R"("parents": [)";
|
||||
first = true;
|
||||
for (const auto &parent : task.parents) {
|
||||
if (!first)
|
||||
ss << ',';
|
||||
ss << std::quoted(parent);
|
||||
first = false;
|
||||
}
|
||||
ss << "],";
|
||||
|
||||
ss << R"("isGenerator": )" << (task.isGenerator ? "true" : "false");
|
||||
|
||||
ss << '}';
|
||||
return ss.str();
|
||||
}
|
||||
|
||||
std::string tasksToJSON(const TaskSet &tasks)
|
||||
{
|
||||
std::stringstream ss;
|
||||
|
||||
ss << "{";
|
||||
|
||||
bool first = true;
|
||||
for (const auto &[name, task] : tasks) {
|
||||
if (!first)
|
||||
ss << ',';
|
||||
ss << std::quoted(name) << ": " << taskToJSON(task);
|
||||
first = false;
|
||||
}
|
||||
ss << "}";
|
||||
|
||||
return ss.str();
|
||||
}
|
||||
|
||||
std::ostream &operator<<(std::ostream &os, const Task &task)
|
||||
{
|
||||
os << taskToJSON(task);
|
||||
return os;
|
||||
}
|
||||
|
||||
std::string attemptRecordToJSON(const AttemptRecord &record)
|
||||
{
|
||||
rj::Document doc;
|
||||
doc.SetObject();
|
||||
auto &alloc = doc.GetAllocator();
|
||||
|
||||
auto startTime = timePointToString(record.startTime);
|
||||
doc.AddMember(
|
||||
"startTime",
|
||||
rj::Value().SetString(startTime.c_str(), startTime.size(), alloc),
|
||||
alloc);
|
||||
|
||||
auto stopTime = timePointToString(record.stopTime);
|
||||
doc.AddMember(
|
||||
"stopTime",
|
||||
rj::Value().SetString(stopTime.c_str(), stopTime.size(), alloc), alloc);
|
||||
|
||||
doc.AddMember("rc", rj::Value().SetInt(record.rc), alloc);
|
||||
|
||||
doc.AddMember("outputLog",
|
||||
rj::Value().SetString(record.outputLog.c_str(),
|
||||
record.outputLog.size(), alloc),
|
||||
alloc);
|
||||
|
||||
doc.AddMember("errorLog",
|
||||
rj::Value().SetString(record.errorLog.c_str(),
|
||||
record.errorLog.size(), alloc),
|
||||
alloc);
|
||||
|
||||
doc.AddMember("executorLog",
|
||||
rj::Value().SetString(record.executorLog.c_str(),
|
||||
record.executorLog.size(), alloc),
|
||||
alloc);
|
||||
|
||||
return dumpJSON(doc);
|
||||
}
|
||||
|
||||
AttemptRecord attemptRecordFromJSON(const std::string &json)
|
||||
{
|
||||
rj::Document doc;
|
||||
checkRJParse(doc.Parse(json.c_str()), "Parsing AttemptRecord");
|
||||
return attemptRecordFromJSON(doc);
|
||||
}
|
||||
|
||||
AttemptRecord attemptRecordFromJSON(const rj::Value &spec)
|
||||
{
|
||||
AttemptRecord rec;
|
||||
rec.startTime = stringToTimePoint(spec["startTime"].GetString());
|
||||
rec.stopTime = stringToTimePoint(spec["stopTime"].GetString());
|
||||
rec.rc = spec["rc"].GetInt();
|
||||
rec.executorLog = spec["executorLog"].GetString();
|
||||
rec.outputLog = spec["outputLog"].GetString();
|
||||
rec.errorLog = spec["errorLog"].GetString();
|
||||
|
||||
return rec;
|
||||
}
|
||||
|
||||
std::string timePointToString(const TimePoint &tp)
|
||||
{
|
||||
return std::to_string(tp.time_since_epoch().count());
|
||||
}
|
||||
|
||||
TimePoint stringToTimePoint(const std::string &timeString)
|
||||
{
|
||||
using namespace std::chrono;
|
||||
|
||||
size_t nanos = std::stoull(timeString);
|
||||
nanoseconds dur(nanos);
|
||||
|
||||
return TimePoint(dur);
|
||||
}
|
||||
|
||||
DAGSpec dagFromJSON(const rj::Value &spec)
|
||||
{
|
||||
DAGSpec info;
|
||||
|
||||
if (!spec.IsObject()) {
|
||||
throw std::runtime_error("Payload is not a dictionary.");
|
||||
}
|
||||
if (!spec.HasMember("tag")) {
|
||||
throw std::runtime_error("DAG Run is missing a name.");
|
||||
}
|
||||
if (!spec.HasMember("tasks")) {
|
||||
throw std::runtime_error("DAG Run has no tasks.");
|
||||
}
|
||||
|
||||
info.tag = spec["tag"].GetString();
|
||||
|
||||
// Get parameters if there are any
|
||||
if (spec.HasMember("parameters")) {
|
||||
info.taskConfig.variables = configFromJSON(spec["parameters"]);
|
||||
}
|
||||
|
||||
// Job Defaults
|
||||
if (spec.HasMember("jobDefaults")) {
|
||||
info.taskConfig.jobDefaults = configFromJSON(spec["jobDefaults"]);
|
||||
}
|
||||
|
||||
// Get the tasks
|
||||
info.tasks = tasksFromJSON(spec["tasks"], info.taskConfig.jobDefaults);
|
||||
|
||||
return info;
|
||||
}
|
||||
|
||||
DAGSpec dagFromJSON(const std::string &jsonSpec)
|
||||
{
|
||||
rj::Document doc;
|
||||
checkRJParse(doc.Parse(jsonSpec.c_str()), "Parsing config");
|
||||
return dagFromJSON(doc);
|
||||
}
|
||||
|
||||
std::string stateUpdateRecordToJSON(const logger::StateUpdateRecord &rec)
|
||||
{
|
||||
std::stringstream ss;
|
||||
ss << R"({ "time": )" << std::quoted(timePointToString(rec.time))
|
||||
<< R"(, "state": )" << std::quoted(rec.state._to_string()) << "}";
|
||||
return ss.str();
|
||||
}
|
||||
|
||||
logger::StateUpdateRecord stateUpdateRecordFromJSON(const rj::Value &json)
|
||||
{
|
||||
logger::StateUpdateRecord rec{.state = RunState::QUEUED};
|
||||
if (!json.HasMember("time"))
|
||||
throw std::runtime_error("StateUpdateRecord missing required field time");
|
||||
if (!json.HasMember("state"))
|
||||
throw std::runtime_error(
|
||||
"StateUpdateRecord missing required field state");
|
||||
|
||||
rec.state = RunState::_from_string(json["state"].GetString());
|
||||
rec.time = stringToTimePoint(json["time"].GetString());
|
||||
return rec;
|
||||
}
|
||||
logger::StateUpdateRecord stateUpdateRecordFromJSON(const std::string &json)
|
||||
{
|
||||
rj::Document doc;
|
||||
checkRJParse(doc.Parse(json.c_str()), "Parsing config");
|
||||
return stateUpdateRecordFromJSON(doc);
|
||||
}
|
||||
} // namespace daggy
|
||||
142
libdaggy/src/Utilities.cpp
Normal file
142
libdaggy/src/Utilities.cpp
Normal file
@@ -0,0 +1,142 @@
|
||||
#include <daggy/Serialization.hpp>
|
||||
#include <daggy/Utilities.hpp>
|
||||
#include <future>
|
||||
#include <iomanip>
|
||||
|
||||
using namespace std::chrono_literals;
|
||||
|
||||
namespace daggy {
|
||||
std::string globalSub(std::string string, const std::string &pattern,
|
||||
const std::string &replacement)
|
||||
{
|
||||
size_t pos = string.find(pattern);
|
||||
while (pos != std::string::npos) {
|
||||
string.replace(pos, pattern.size(), replacement);
|
||||
pos = string.find(pattern, pos + replacement.size());
|
||||
}
|
||||
return string;
|
||||
}
|
||||
|
||||
std::vector<std::vector<std::string>> interpolateValues(
|
||||
const std::vector<std::string> &raw, const ConfigValues &values)
|
||||
{
|
||||
std::vector<std::vector<std::string>> cooked{{}};
|
||||
|
||||
for (const auto &part : raw) {
|
||||
std::vector<std::string> expandedPart{part};
|
||||
|
||||
// Find all values of parameters, and expand them
|
||||
for (const auto &[paramRaw, paramValue] : values) {
|
||||
std::string param = "{{" + paramRaw + "}}";
|
||||
auto pos = part.find(param);
|
||||
if (pos == std::string::npos)
|
||||
continue;
|
||||
std::vector<std::string> newExpandedPart;
|
||||
|
||||
if (std::holds_alternative<std::string>(paramValue)) {
|
||||
for (auto &cmd : expandedPart) {
|
||||
newExpandedPart.push_back(
|
||||
globalSub(cmd, param, std::get<std::string>(paramValue)));
|
||||
}
|
||||
}
|
||||
else {
|
||||
for (const auto &val :
|
||||
std::get<std::vector<std::string>>(paramValue)) {
|
||||
for (const auto &cmd : expandedPart) {
|
||||
newExpandedPart.push_back(globalSub(cmd, param, val));
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
expandedPart.swap(newExpandedPart);
|
||||
}
|
||||
|
||||
std::vector<std::vector<std::string>> newCommands;
|
||||
for (const auto &newPart : expandedPart) {
|
||||
for (auto cmd : cooked) {
|
||||
cmd.push_back(newPart);
|
||||
newCommands.emplace_back(cmd);
|
||||
}
|
||||
}
|
||||
cooked.swap(newCommands);
|
||||
}
|
||||
return cooked;
|
||||
}
|
||||
|
||||
TaskSet expandTaskSet(const TaskSet &tasks,
|
||||
executors::task::TaskExecutor &executor,
|
||||
const ConfigValues &interpolatedValues)
|
||||
{
|
||||
// Expand the tasks first
|
||||
TaskSet newTaskSet;
|
||||
for (const auto &[baseName, task] : tasks) {
|
||||
executor.validateTaskParameters(task.job);
|
||||
const auto newJobs =
|
||||
executor.expandTaskParameters(task.job, interpolatedValues);
|
||||
size_t i = 0;
|
||||
for (const auto &newJob : newJobs) {
|
||||
Task newTask{task};
|
||||
newTask.job = newJob;
|
||||
newTaskSet.emplace(baseName + "_" + std::to_string(i), newTask);
|
||||
++i;
|
||||
}
|
||||
}
|
||||
return newTaskSet;
|
||||
}
|
||||
|
||||
void updateDAGFromTasks(TaskDAG &dag, const TaskSet &tasks)
|
||||
{
|
||||
// Add the missing vertices
|
||||
for (const auto &[name, task] : tasks) {
|
||||
dag.addVertex(name, task);
|
||||
}
|
||||
|
||||
// Add edges
|
||||
for (const auto &[name, t] : tasks) {
|
||||
const auto &task = t;
|
||||
dag.addEdgeIf(name, [&task](const auto &v) {
|
||||
return task.children.count(v.data.definedName) > 0;
|
||||
});
|
||||
}
|
||||
|
||||
if (!dag.isValid()) {
|
||||
throw std::runtime_error("DAG contains a cycle");
|
||||
}
|
||||
}
|
||||
|
||||
TaskDAG buildDAGFromTasks(
|
||||
const TaskSet &tasks,
|
||||
const std::unordered_map<std::string,
|
||||
std::vector<loggers::dag_run::StateUpdateRecord>>
|
||||
&updates)
|
||||
{
|
||||
TaskDAG dag;
|
||||
updateDAGFromTasks(dag, tasks);
|
||||
|
||||
// Replay any updates
|
||||
for (const auto &[taskName, taskUpdates] : updates) {
|
||||
for (const auto &update : taskUpdates) {
|
||||
switch (update.state) {
|
||||
case RunState::RUNNING:
|
||||
case RunState::RETRY:
|
||||
case RunState::PAUSED:
|
||||
case RunState::ERRORED:
|
||||
case RunState::KILLED:
|
||||
dag.setVertexState(taskName, RunState::RUNNING);
|
||||
break;
|
||||
case RunState::COMPLETED:
|
||||
case RunState::QUEUED:
|
||||
break;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
return dag;
|
||||
}
|
||||
|
||||
std::ostream &operator<<(std::ostream &os, const TimePoint &tp)
|
||||
{
|
||||
os << tp.time_since_epoch().count() << std::endl;
|
||||
return os;
|
||||
}
|
||||
} // namespace daggy
|
||||
1
libdaggy/src/executors/CMakeLists.txt
Normal file
1
libdaggy/src/executors/CMakeLists.txt
Normal file
@@ -0,0 +1 @@
|
||||
add_subdirectory(task)
|
||||
5
libdaggy/src/executors/task/CMakeLists.txt
Normal file
5
libdaggy/src/executors/task/CMakeLists.txt
Normal file
@@ -0,0 +1,5 @@
|
||||
target_sources(${PROJECT_NAME} PRIVATE
|
||||
SlurmTaskExecutor.cpp
|
||||
NoopTaskExecutor.cpp
|
||||
ForkingTaskExecutor.cpp
|
||||
)
|
||||
229
libdaggy/src/executors/task/ForkingTaskExecutor.cpp
Normal file
229
libdaggy/src/executors/task/ForkingTaskExecutor.cpp
Normal file
@@ -0,0 +1,229 @@
|
||||
#include <fcntl.h>
|
||||
#include <poll.h>
|
||||
#include <unistd.h>
|
||||
#include <wait.h>
|
||||
|
||||
#include <daggy/Utilities.hpp>
|
||||
#include <daggy/executors/task/ForkingTaskExecutor.hpp>
|
||||
#include <iomanip>
|
||||
|
||||
using namespace daggy::executors::task;
|
||||
|
||||
std::string slurp(int fd)
|
||||
{
|
||||
std::string result;
|
||||
|
||||
const ssize_t BUFFER_SIZE = 4096;
|
||||
char buffer[BUFFER_SIZE];
|
||||
|
||||
struct pollfd pfd
|
||||
{
|
||||
.fd = fd, .events = POLLIN, .revents = 0
|
||||
};
|
||||
poll(&pfd, 1, 1);
|
||||
|
||||
while (pfd.revents & POLLIN) {
|
||||
ssize_t bytes = read(fd, buffer, BUFFER_SIZE);
|
||||
if (bytes == 0) {
|
||||
break;
|
||||
}
|
||||
else {
|
||||
result.append(buffer, bytes);
|
||||
}
|
||||
pfd.revents = 0;
|
||||
poll(&pfd, 1, 1);
|
||||
}
|
||||
|
||||
return result;
|
||||
}
|
||||
|
||||
ForkingTaskExecutor::ForkingTaskExecutor(size_t nThreads)
|
||||
: tp_(nThreads)
|
||||
{
|
||||
}
|
||||
|
||||
ForkingTaskExecutor::~ForkingTaskExecutor()
|
||||
{
|
||||
std::lock_guard<std::mutex> lock(taskControlsGuard_);
|
||||
taskControls_.clear();
|
||||
}
|
||||
|
||||
bool ForkingTaskExecutor::stop(DAGRunID runID, const std::string &taskName)
|
||||
{
|
||||
std::string key = std::to_string(runID) + "_" + taskName;
|
||||
std::lock_guard<std::mutex> lock(taskControlsGuard_);
|
||||
auto it = taskControls_.find(key);
|
||||
if (it == taskControls_.end())
|
||||
return true;
|
||||
it->second = false;
|
||||
return true;
|
||||
}
|
||||
|
||||
std::future<daggy::AttemptRecord> ForkingTaskExecutor::execute(
|
||||
DAGRunID runID, const std::string &taskName, const Task &task)
|
||||
{
|
||||
std::string key = std::to_string(runID) + "_" + taskName;
|
||||
std::lock_guard<std::mutex> lock(taskControlsGuard_);
|
||||
auto [it, ins] = taskControls_.emplace(key, true);
|
||||
auto &running = it->second;
|
||||
return tp_.addTask([this, task, &running, key]() {
|
||||
auto ret = this->runTask(task, running);
|
||||
std::lock_guard<std::mutex> lock(this->taskControlsGuard_);
|
||||
this->taskControls_.extract(key);
|
||||
return ret;
|
||||
});
|
||||
}
|
||||
|
||||
daggy::AttemptRecord ForkingTaskExecutor::runTask(const Task &task,
|
||||
std::atomic<bool> &running)
|
||||
{
|
||||
AttemptRecord rec;
|
||||
|
||||
rec.startTime = Clock::now();
|
||||
|
||||
// Need to convert the strings
|
||||
std::vector<char *> argv;
|
||||
std::vector<char *> envp;
|
||||
|
||||
// Populate the command
|
||||
Command command;
|
||||
if (task.job.count("commandString")) {
|
||||
std::stringstream ss;
|
||||
ss << std::get<std::string>(task.job.at("commandString"));
|
||||
std::string tok;
|
||||
while (ss >> std::quoted(tok)) {
|
||||
command.push_back(tok);
|
||||
}
|
||||
}
|
||||
else {
|
||||
const auto cmd = std::get<Command>(task.job.at("command"));
|
||||
std::copy(cmd.begin(), cmd.end(), std::back_inserter(command));
|
||||
}
|
||||
std::transform(
|
||||
command.begin(), command.end(), std::back_inserter(argv),
|
||||
[](const std::string &s) { return const_cast<char *>(s.c_str()); });
|
||||
argv.push_back(nullptr);
|
||||
|
||||
// Populate the environment
|
||||
auto environment = (task.job.count("environment") == 0
|
||||
? std::vector<std::string>{}
|
||||
: std::get<Command>(task.job.at("environment")));
|
||||
std::transform(
|
||||
environment.begin(), environment.end(), std::back_inserter(envp),
|
||||
[](const std::string &s) { return const_cast<char *>(s.c_str()); });
|
||||
envp.push_back(nullptr);
|
||||
|
||||
// Create the pipe
|
||||
int stdoutPipe[2];
|
||||
int pipeRC = pipe2(stdoutPipe, O_DIRECT);
|
||||
if (pipeRC != 0)
|
||||
throw std::runtime_error("Unable to create pipe for stdout");
|
||||
int stderrPipe[2];
|
||||
pipeRC = pipe2(stderrPipe, O_DIRECT);
|
||||
if (pipeRC != 0)
|
||||
throw std::runtime_error("Unable to create pipe for stderr");
|
||||
|
||||
pid_t child = fork();
|
||||
if (child < 0) {
|
||||
throw std::runtime_error("Unable to fork child");
|
||||
}
|
||||
else if (child == 0) { // child
|
||||
while ((dup2(stdoutPipe[1], STDOUT_FILENO) == -1) && (errno == EINTR)) {
|
||||
}
|
||||
while ((dup2(stderrPipe[1], STDERR_FILENO) == -1) && (errno == EINTR)) {
|
||||
}
|
||||
close(stdoutPipe[0]);
|
||||
close(stderrPipe[0]);
|
||||
char **env = (envp.empty() ? nullptr : envp.data());
|
||||
auto res = execvpe(argv[0], argv.data(), env);
|
||||
std::cout << res << std::endl;
|
||||
exit(errno);
|
||||
}
|
||||
|
||||
std::atomic<bool> reading = true;
|
||||
std::thread stdoutReader([&]() {
|
||||
while (reading)
|
||||
rec.outputLog.append(slurp(stdoutPipe[0]));
|
||||
});
|
||||
std::thread stderrReader([&]() {
|
||||
while (reading)
|
||||
rec.errorLog.append(slurp(stderrPipe[0]));
|
||||
});
|
||||
|
||||
siginfo_t childInfo;
|
||||
while (running) {
|
||||
childInfo.si_pid = 0;
|
||||
waitid(P_PID, child, &childInfo, WEXITED | WNOHANG);
|
||||
if (childInfo.si_pid > 0) {
|
||||
break;
|
||||
}
|
||||
std::this_thread::sleep_for(250ms);
|
||||
}
|
||||
|
||||
if (!running) {
|
||||
rec.executorLog = "Killed";
|
||||
// Send the kills until pid is dead
|
||||
while (kill(child, SIGKILL) != -1) {
|
||||
// Need to collect the child to avoid a zombie process
|
||||
waitid(P_PID, child, &childInfo, WEXITED | WNOHANG);
|
||||
std::this_thread::sleep_for(50ms);
|
||||
}
|
||||
}
|
||||
|
||||
reading = false;
|
||||
|
||||
rec.stopTime = Clock::now();
|
||||
if (childInfo.si_pid > 0) {
|
||||
rec.rc = childInfo.si_status;
|
||||
}
|
||||
else {
|
||||
rec.rc = -1;
|
||||
}
|
||||
|
||||
stdoutReader.join();
|
||||
stderrReader.join();
|
||||
|
||||
close(stdoutPipe[0]);
|
||||
close(stderrPipe[0]);
|
||||
|
||||
return rec;
|
||||
}
|
||||
|
||||
bool ForkingTaskExecutor::validateTaskParameters(const ConfigValues &job)
|
||||
{
|
||||
// command or commandString is required
|
||||
if (job.count("command")) {
|
||||
if (!std::holds_alternative<Command>(job.at("command")))
|
||||
throw std::runtime_error(R"(command must be an array of strings)");
|
||||
}
|
||||
else {
|
||||
if (job.count("commandString") == 0) {
|
||||
throw std::runtime_error(R"(command or commandString must be defined.)");
|
||||
}
|
||||
if (!std::holds_alternative<std::string>(job.at("commandString")))
|
||||
throw std::runtime_error(R"(commandString must be a string)");
|
||||
}
|
||||
|
||||
if (job.count("environment")) {
|
||||
if (!std::holds_alternative<Command>(job.at("environment")))
|
||||
throw std::runtime_error(R"(environment must be an array of strings)");
|
||||
}
|
||||
|
||||
return true;
|
||||
}
|
||||
|
||||
std::vector<daggy::ConfigValues> ForkingTaskExecutor::expandTaskParameters(
|
||||
const ConfigValues &job, const ConfigValues &expansionValues)
|
||||
{
|
||||
std::vector<ConfigValues> newValues;
|
||||
|
||||
const auto command = std::get<Command>(job.at("command"));
|
||||
for (const auto &expandedCommand :
|
||||
interpolateValues(command, expansionValues)) {
|
||||
ConfigValues newCommand{job};
|
||||
newCommand.at("command") = expandedCommand;
|
||||
newValues.emplace_back(newCommand);
|
||||
}
|
||||
|
||||
return newValues;
|
||||
}
|
||||
51
libdaggy/src/executors/task/NoopTaskExecutor.cpp
Normal file
51
libdaggy/src/executors/task/NoopTaskExecutor.cpp
Normal file
@@ -0,0 +1,51 @@
|
||||
#include <daggy/Utilities.hpp>
|
||||
#include <daggy/executors/task/NoopTaskExecutor.hpp>
|
||||
|
||||
namespace daggy::executors::task {
|
||||
std::future<daggy::AttemptRecord> NoopTaskExecutor::execute(
|
||||
DAGRunID runID, const std::string &taskName, const Task &task)
|
||||
{
|
||||
std::promise<daggy::AttemptRecord> promise;
|
||||
auto ts = Clock::now();
|
||||
promise.set_value(AttemptRecord{.startTime = ts,
|
||||
.stopTime = ts,
|
||||
.rc = 0,
|
||||
.executorLog = taskName,
|
||||
.outputLog = taskName,
|
||||
.errorLog = taskName});
|
||||
return promise.get_future();
|
||||
}
|
||||
|
||||
bool NoopTaskExecutor::validateTaskParameters(const ConfigValues &job)
|
||||
{
|
||||
auto it = job.find("command");
|
||||
if (it == job.end())
|
||||
throw std::runtime_error(R"(job does not have a "command" argument)");
|
||||
if (!std::holds_alternative<Command>(it->second))
|
||||
throw std::runtime_error(
|
||||
R"(taskParameter's "command" must be an array of strings)");
|
||||
return true;
|
||||
}
|
||||
|
||||
std::vector<daggy::ConfigValues> NoopTaskExecutor::expandTaskParameters(
|
||||
const ConfigValues &job, const ConfigValues &expansionValues)
|
||||
{
|
||||
std::vector<ConfigValues> newValues;
|
||||
|
||||
const auto command = std::get<Command>(job.at("command"));
|
||||
for (const auto &expandedCommand :
|
||||
interpolateValues(command, expansionValues)) {
|
||||
ConfigValues newCommand{job};
|
||||
newCommand.at("command") = expandedCommand;
|
||||
newValues.emplace_back(newCommand);
|
||||
}
|
||||
|
||||
return newValues;
|
||||
}
|
||||
|
||||
bool NoopTaskExecutor::stop(DAGRunID runID, const std::string &taskName)
|
||||
{
|
||||
return true;
|
||||
}
|
||||
|
||||
} // namespace daggy::executors::task
|
||||
347
libdaggy/src/executors/task/SlurmTaskExecutor.cpp
Normal file
347
libdaggy/src/executors/task/SlurmTaskExecutor.cpp
Normal file
@@ -0,0 +1,347 @@
|
||||
#include <iomanip>
|
||||
#include <iterator>
|
||||
#include <mutex>
|
||||
#include <stdexcept>
|
||||
#ifdef DAGGY_ENABLE_SLURM
|
||||
#include <slurm/slurm.h>
|
||||
#include <string.h>
|
||||
#include <sys/resource.h>
|
||||
#include <sys/stat.h>
|
||||
#include <sys/types.h>
|
||||
|
||||
#include <csignal>
|
||||
#include <cstdlib>
|
||||
#include <daggy/Utilities.hpp>
|
||||
#include <daggy/executors/task/SlurmTaskExecutor.hpp>
|
||||
#include <filesystem>
|
||||
#include <fstream>
|
||||
#include <random>
|
||||
|
||||
namespace fs = std::filesystem;
|
||||
|
||||
namespace daggy::executors::task {
|
||||
std::string getUniqueTag(size_t nChars = 6)
|
||||
{
|
||||
std::string result(nChars, '\0');
|
||||
static std::random_device dev;
|
||||
static std::mt19937 rng(dev());
|
||||
|
||||
std::uniform_int_distribution<int> dist(0, 61);
|
||||
|
||||
const char *v =
|
||||
"0123456789abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ";
|
||||
|
||||
for (size_t i = 0; i < nChars; i++) {
|
||||
result[i] = v[dist(rng)];
|
||||
}
|
||||
return result;
|
||||
}
|
||||
|
||||
void readAndClean(const fs::path &fn, std::string &dest)
|
||||
{
|
||||
if (!fs::exists(fn))
|
||||
return;
|
||||
|
||||
std::ifstream ifh;
|
||||
ifh.open(fn);
|
||||
std::string contents(std::istreambuf_iterator<char>{ifh}, {});
|
||||
ifh.close();
|
||||
fs::remove_all(fn);
|
||||
|
||||
dest.swap(contents);
|
||||
}
|
||||
|
||||
SlurmTaskExecutor::SlurmTaskExecutor()
|
||||
: running_(true)
|
||||
, monitorWorker_(&SlurmTaskExecutor::monitor, this)
|
||||
{
|
||||
std::string priority =
|
||||
"SLURM_PRIO_PROCESS=" + std::to_string(getpriority(PRIO_PROCESS, 0));
|
||||
std::string submitDir = "SLURM_SUBMIT_DIR=" + fs::current_path().string();
|
||||
|
||||
const size_t MAX_HOSTNAME_LENGTH = 50;
|
||||
std::string submitHost(MAX_HOSTNAME_LENGTH, '\0');
|
||||
gethostname(submitHost.data(), MAX_HOSTNAME_LENGTH);
|
||||
submitHost = "SLURM_SUBMIT_HOST=" + submitHost;
|
||||
submitHost.resize(submitHost.find('\0'));
|
||||
|
||||
uint32_t mask = umask(0);
|
||||
umask(mask); // Restore the old mask
|
||||
|
||||
std::stringstream ss;
|
||||
ss << "SLURM_UMASK=0" << uint32_t{((mask >> 6) & 07)}
|
||||
<< uint32_t{((mask >> 3) & 07)} << uint32_t{(mask & 07)};
|
||||
|
||||
// Set some environment variables
|
||||
putenv(const_cast<char *>(priority.c_str()));
|
||||
putenv(const_cast<char *>(submitDir.c_str()));
|
||||
putenv(const_cast<char *>(submitHost.c_str()));
|
||||
putenv(const_cast<char *>(ss.str().c_str()));
|
||||
}
|
||||
|
||||
SlurmTaskExecutor::~SlurmTaskExecutor()
|
||||
{
|
||||
running_ = false;
|
||||
monitorWorker_.join();
|
||||
|
||||
// Resolve the remaining futures
|
||||
std::lock_guard<std::mutex> lock(promiseGuard_);
|
||||
for (auto &[jobID, job] : runningJobs_) {
|
||||
job.prom.set_value(
|
||||
AttemptRecord{.rc = -1, .executorLog = "executor killed"});
|
||||
}
|
||||
runningJobs_.clear();
|
||||
}
|
||||
|
||||
// Validates the job to ensure that all required values are set and are of
|
||||
// the right type,
|
||||
bool SlurmTaskExecutor::validateTaskParameters(const ConfigValues &job)
|
||||
{
|
||||
const std::unordered_set<std::string> requiredFields{
|
||||
"minCPUs", "minMemoryMB", "minTmpDiskMB", "priority",
|
||||
"timeLimitSeconds", "userID", "workDir", "tmpDir"};
|
||||
|
||||
for (const auto &requiredField : requiredFields) {
|
||||
if (job.count(requiredField) == 0) {
|
||||
throw std::runtime_error("Missing field " + requiredField);
|
||||
}
|
||||
}
|
||||
|
||||
// Require command or commandString
|
||||
if (job.count("command") + job.count("commandString") == 0)
|
||||
throw std::runtime_error(
|
||||
"Either command or commandString must be specified");
|
||||
|
||||
if (job.count("environment")) {
|
||||
if (!std::holds_alternative<Command>(job.at("environment")))
|
||||
throw std::runtime_error(R"(environment must be an array of strings)");
|
||||
}
|
||||
|
||||
return true;
|
||||
}
|
||||
|
||||
std::vector<ConfigValues> SlurmTaskExecutor::expandTaskParameters(
|
||||
const ConfigValues &job, const ConfigValues &expansionValues)
|
||||
{
|
||||
std::vector<ConfigValues> newValues;
|
||||
|
||||
const auto command = std::get<Command>(job.at("command"));
|
||||
for (const auto &expandedCommand :
|
||||
interpolateValues(command, expansionValues)) {
|
||||
ConfigValues newCommand{job};
|
||||
newCommand.at("command") = expandedCommand;
|
||||
newValues.emplace_back(newCommand);
|
||||
}
|
||||
|
||||
return newValues;
|
||||
}
|
||||
|
||||
std::future<AttemptRecord> SlurmTaskExecutor::execute(
|
||||
DAGRunID runID, const std::string &taskName, const Task &task)
|
||||
{
|
||||
std::stringstream executorLog;
|
||||
|
||||
const auto &job = task.job;
|
||||
const auto uniqueTaskName = taskName + "_" + getUniqueTag(6);
|
||||
|
||||
fs::path tmpDir = std::get<std::string>(job.at("tmpDir"));
|
||||
std::string stdoutFile = (tmpDir / (uniqueTaskName + ".stdout")).string();
|
||||
std::string stderrFile = (tmpDir / (uniqueTaskName + ".stderr")).string();
|
||||
std::string workDir = std::get<std::string>(job.at("workDir"));
|
||||
|
||||
// Convert command to argc / argv
|
||||
std::vector<char *> argv{nullptr};
|
||||
// Populate the command
|
||||
Command command;
|
||||
if (task.job.count("commandString")) {
|
||||
std::stringstream ss;
|
||||
ss << std::get<std::string>(task.job.at("commandString"));
|
||||
std::string tok;
|
||||
while (ss >> std::quoted(tok)) {
|
||||
command.push_back(tok);
|
||||
}
|
||||
}
|
||||
else {
|
||||
const auto cmd = std::get<Command>(task.job.at("command"));
|
||||
std::copy(cmd.begin(), cmd.end(), std::back_inserter(command));
|
||||
}
|
||||
std::transform(
|
||||
command.begin(), command.end(), std::back_inserter(argv),
|
||||
[](const std::string &s) { return const_cast<char *>(s.c_str()); });
|
||||
argv.push_back(nullptr);
|
||||
|
||||
std::vector<std::string> env{""};
|
||||
std::vector<char *> envp;
|
||||
auto it = task.job.find("environment");
|
||||
if (it != task.job.end()) {
|
||||
const auto environment = std::get<Command>(task.job.at("environment"));
|
||||
std::copy(environment.begin(), environment.end(),
|
||||
std::back_inserter(env));
|
||||
}
|
||||
std::transform(
|
||||
env.begin(), env.end(), std::back_inserter(envp),
|
||||
[](const std::string &s) { return const_cast<char *>(s.c_str()); });
|
||||
|
||||
char script[] = "#!/bin/bash\n$@\n";
|
||||
char stdinFile[] = "/dev/null";
|
||||
|
||||
// taken from slurm
|
||||
int error_code;
|
||||
job_desc_msg_t jd;
|
||||
submit_response_msg_t *resp_msg;
|
||||
|
||||
slurm_init_job_desc_msg(&jd);
|
||||
jd.contiguous = 1;
|
||||
jd.name = const_cast<char *>(taskName.c_str());
|
||||
jd.min_cpus = std::stoi(std::get<std::string>(job.at("minCPUs")));
|
||||
|
||||
jd.pn_min_memory = std::stoi(std::get<std::string>(job.at("minMemoryMB")));
|
||||
jd.pn_min_tmp_disk =
|
||||
std::stoi(std::get<std::string>(job.at("minTmpDiskMB")));
|
||||
jd.priority = std::stoi(std::get<std::string>(job.at("priority")));
|
||||
jd.shared = 0;
|
||||
jd.time_limit =
|
||||
std::stoi(std::get<std::string>(job.at("timeLimitSeconds")));
|
||||
jd.min_nodes = 1;
|
||||
jd.user_id = std::stoi(std::get<std::string>(job.at("userID")));
|
||||
jd.argv = argv.data();
|
||||
jd.argc = argv.size();
|
||||
// TODO figure out the script to run
|
||||
jd.script = script;
|
||||
jd.std_in = stdinFile;
|
||||
jd.std_err = const_cast<char *>(stderrFile.c_str());
|
||||
jd.std_out = const_cast<char *>(stdoutFile.c_str());
|
||||
jd.work_dir = const_cast<char *>(workDir.c_str());
|
||||
|
||||
// jd.env_size = 1;
|
||||
// jd.environment = env;
|
||||
jd.env_size = envp.size();
|
||||
jd.environment = envp.data();
|
||||
|
||||
error_code = slurm_submit_batch_job(&jd, &resp_msg);
|
||||
if (error_code) {
|
||||
std::stringstream ss;
|
||||
ss << "Unable to submit slurm job: " << slurm_strerror(error_code);
|
||||
throw std::runtime_error(ss.str());
|
||||
}
|
||||
|
||||
uint32_t jobID = resp_msg->job_id;
|
||||
executorLog << "Job " << resp_msg->job_submit_user_msg << '\n';
|
||||
slurm_free_submit_response_response_msg(resp_msg);
|
||||
|
||||
std::lock_guard<std::mutex> lock(promiseGuard_);
|
||||
Job newJob{.prom{},
|
||||
.stdoutFile = stdoutFile,
|
||||
.stderrFile = stderrFile,
|
||||
.runID = runID,
|
||||
.taskName = taskName};
|
||||
auto fut = newJob.prom.get_future();
|
||||
runningJobs_.emplace(jobID, std::move(newJob));
|
||||
|
||||
return fut;
|
||||
}
|
||||
|
||||
bool SlurmTaskExecutor::stop(DAGRunID runID, const std::string &taskName)
|
||||
{
|
||||
// Hopefully this isn't a common thing, so just scrap the current jobs and
|
||||
// kill them
|
||||
size_t jobID = 0;
|
||||
{
|
||||
std::lock_guard<std::mutex> lock(promiseGuard_);
|
||||
for (const auto &[k, v] : runningJobs_) {
|
||||
if (v.runID == runID and v.taskName == taskName) {
|
||||
jobID = k;
|
||||
break;
|
||||
}
|
||||
}
|
||||
if (jobID == 0)
|
||||
return true;
|
||||
}
|
||||
|
||||
// Send the kill message to slurm
|
||||
slurm_kill_job(jobID, SIGKILL, KILL_HURRY);
|
||||
return true;
|
||||
}
|
||||
|
||||
void SlurmTaskExecutor::monitor()
|
||||
{
|
||||
std::unordered_set<size_t> resolvedJobs;
|
||||
while (running_) {
|
||||
{
|
||||
std::lock_guard<std::mutex> lock(promiseGuard_);
|
||||
for (auto &[jobID, job] : runningJobs_) {
|
||||
job_info_msg_t *jobStatus;
|
||||
int error_code =
|
||||
slurm_load_job(&jobStatus, jobID, SHOW_ALL | SHOW_DETAIL);
|
||||
if (error_code != SLURM_SUCCESS)
|
||||
continue;
|
||||
|
||||
uint32_t idx = jobStatus->record_count;
|
||||
if (idx == 0)
|
||||
continue;
|
||||
idx--;
|
||||
const slurm_job_info_t &jobInfo = jobStatus->job_array[idx];
|
||||
AttemptRecord record;
|
||||
switch (jobInfo.job_state) {
|
||||
case JOB_PENDING:
|
||||
case JOB_SUSPENDED:
|
||||
case JOB_RUNNING:
|
||||
continue;
|
||||
// Job has finished
|
||||
case JOB_COMPLETE: /* completed execution successfully */
|
||||
record.rc = jobInfo.exit_code;
|
||||
break;
|
||||
case JOB_FAILED: /* completed execution unsuccessfully */
|
||||
record.rc = jobInfo.exit_code;
|
||||
record.executorLog = "Script errored.\n";
|
||||
break;
|
||||
case JOB_CANCELLED: /* cancelled by user */
|
||||
record.rc = 9; // matches SIGKILL
|
||||
record.executorLog = "Job cancelled by user.\n";
|
||||
break;
|
||||
case JOB_TIMEOUT: /* terminated on reaching time limit */
|
||||
record.rc = jobInfo.exit_code;
|
||||
record.executorLog = "Job exceeded time limit.\n";
|
||||
break;
|
||||
case JOB_NODE_FAIL: /* terminated on node failure */
|
||||
record.rc = jobInfo.exit_code;
|
||||
record.executorLog = "Node failed during execution\n";
|
||||
break;
|
||||
case JOB_PREEMPTED: /* terminated due to preemption */
|
||||
record.rc = jobInfo.exit_code;
|
||||
record.executorLog = "Job terminated due to pre-emption.\n";
|
||||
break;
|
||||
case JOB_BOOT_FAIL: /* terminated due to node boot failure */
|
||||
record.rc = jobInfo.exit_code;
|
||||
record.executorLog =
|
||||
"Job failed to run due to failure of compute node to "
|
||||
"boot.\n";
|
||||
break;
|
||||
case JOB_DEADLINE: /* terminated on deadline */
|
||||
record.rc = jobInfo.exit_code;
|
||||
record.executorLog = "Job terminated due to deadline.\n";
|
||||
break;
|
||||
case JOB_OOM: /* experienced out of memory error */
|
||||
record.rc = jobInfo.exit_code;
|
||||
record.executorLog = "Job terminated due to out-of-memory.\n";
|
||||
break;
|
||||
}
|
||||
slurm_free_job_info_msg(jobStatus);
|
||||
|
||||
readAndClean(job.stdoutFile, record.outputLog);
|
||||
readAndClean(job.stderrFile, record.errorLog);
|
||||
|
||||
job.prom.set_value(std::move(record));
|
||||
resolvedJobs.insert(jobID);
|
||||
}
|
||||
|
||||
for (const auto &jobID : resolvedJobs) {
|
||||
runningJobs_.extract(jobID);
|
||||
}
|
||||
}
|
||||
|
||||
std::this_thread::sleep_for(std::chrono::seconds(1));
|
||||
}
|
||||
}
|
||||
} // namespace daggy::executors::task
|
||||
#endif
|
||||
1
libdaggy/src/loggers/CMakeLists.txt
Normal file
1
libdaggy/src/loggers/CMakeLists.txt
Normal file
@@ -0,0 +1 @@
|
||||
add_subdirectory(dag_run)
|
||||
5
libdaggy/src/loggers/dag_run/CMakeLists.txt
Normal file
5
libdaggy/src/loggers/dag_run/CMakeLists.txt
Normal file
@@ -0,0 +1,5 @@
|
||||
target_sources(${PROJECT_NAME} PRIVATE
|
||||
OStreamLogger.cpp
|
||||
RedisLogger.cpp
|
||||
RedisHelper.cpp
|
||||
)
|
||||
180
libdaggy/src/loggers/dag_run/OStreamLogger.cpp
Normal file
180
libdaggy/src/loggers/dag_run/OStreamLogger.cpp
Normal file
@@ -0,0 +1,180 @@
|
||||
#include <enum.h>
|
||||
|
||||
#include <algorithm>
|
||||
#include <daggy/Serialization.hpp>
|
||||
#include <daggy/loggers/dag_run/OStreamLogger.hpp>
|
||||
#include <iterator>
|
||||
|
||||
namespace daggy::loggers::dag_run {
|
||||
OStreamLogger::OStreamLogger(std::ostream &os)
|
||||
: os_(os)
|
||||
{
|
||||
}
|
||||
|
||||
OStreamLogger::~OStreamLogger()
|
||||
{
|
||||
std::lock_guard<std::mutex> lock(guard_);
|
||||
dagRuns_.clear();
|
||||
}
|
||||
|
||||
// Execution
|
||||
DAGRunID OStreamLogger::startDAGRun(const DAGSpec &dagSpec)
|
||||
{
|
||||
std::lock_guard<std::mutex> lock(guard_);
|
||||
size_t runID = dagRuns_.size();
|
||||
dagRuns_.emplace_back(DAGRunRecord{.dagSpec = dagSpec});
|
||||
for (const auto &[name, _] : dagSpec.tasks) {
|
||||
_updateTaskState(runID, name, RunState::QUEUED);
|
||||
}
|
||||
_updateDAGRunState(runID, RunState::QUEUED);
|
||||
|
||||
os_ << "Starting new DAGRun tagged " << dagSpec.tag << " with ID " << runID
|
||||
<< " and " << dagSpec.tasks.size() << " tasks" << std::endl;
|
||||
for (const auto &[name, task] : dagSpec.tasks) {
|
||||
os_ << "TASK (" << name << "): " << configToJSON(task.job);
|
||||
os_ << std::endl;
|
||||
}
|
||||
return runID;
|
||||
}
|
||||
|
||||
void OStreamLogger::addTask(DAGRunID dagRunID, const std::string &taskName,
|
||||
const Task &task)
|
||||
{
|
||||
std::lock_guard<std::mutex> lock(guard_);
|
||||
auto &dagRun = dagRuns_[dagRunID];
|
||||
dagRun.dagSpec.tasks[taskName] = task;
|
||||
_updateTaskState(dagRunID, taskName, RunState::QUEUED);
|
||||
}
|
||||
|
||||
void OStreamLogger::updateTask(DAGRunID dagRunID, const std::string &taskName,
|
||||
const Task &task)
|
||||
{
|
||||
std::lock_guard<std::mutex> lock(guard_);
|
||||
auto &dagRun = dagRuns_[dagRunID];
|
||||
dagRun.dagSpec.tasks[taskName] = task;
|
||||
}
|
||||
|
||||
void OStreamLogger::updateDAGRunState(DAGRunID dagRunID, RunState state)
|
||||
{
|
||||
std::lock_guard<std::mutex> lock(guard_);
|
||||
_updateDAGRunState(dagRunID, state);
|
||||
}
|
||||
|
||||
void OStreamLogger::_updateDAGRunState(DAGRunID dagRunID, RunState state)
|
||||
{
|
||||
os_ << "DAG State Change(" << dagRunID << "): " << state._to_string()
|
||||
<< std::endl;
|
||||
dagRuns_[dagRunID].dagStateChanges.push_back({Clock::now(), state});
|
||||
}
|
||||
|
||||
void OStreamLogger::logTaskAttempt(DAGRunID dagRunID,
|
||||
const std::string &taskName,
|
||||
const AttemptRecord &attempt)
|
||||
{
|
||||
std::lock_guard<std::mutex> lock(guard_);
|
||||
const std::string &msg =
|
||||
attempt.rc == 0 ? attempt.outputLog : attempt.errorLog;
|
||||
os_ << "Task Attempt (" << dagRunID << '/' << taskName << "): Ran with RC "
|
||||
<< attempt.rc << ": " << msg << std::endl;
|
||||
|
||||
dagRuns_[dagRunID].taskAttempts[taskName].push_back(attempt);
|
||||
}
|
||||
|
||||
void OStreamLogger::updateTaskState(DAGRunID dagRunID,
|
||||
const std::string &taskName,
|
||||
RunState state)
|
||||
{
|
||||
std::lock_guard<std::mutex> lock(guard_);
|
||||
_updateTaskState(dagRunID, taskName, state);
|
||||
}
|
||||
|
||||
void OStreamLogger::_updateTaskState(DAGRunID dagRunID,
|
||||
const std::string &taskName,
|
||||
RunState state)
|
||||
{
|
||||
auto &dagRun = dagRuns_.at(dagRunID);
|
||||
dagRun.taskStateChanges[taskName].push_back({Clock::now(), state});
|
||||
auto it = dagRun.taskRunStates.find(taskName);
|
||||
if (it == dagRun.taskRunStates.end()) {
|
||||
dagRun.taskRunStates.emplace(taskName, state);
|
||||
}
|
||||
else {
|
||||
it->second = state;
|
||||
}
|
||||
|
||||
os_ << "Task State Change (" << dagRunID << '/' << taskName
|
||||
<< "): " << state._to_string() << std::endl;
|
||||
}
|
||||
|
||||
// Querying
|
||||
DAGSpec OStreamLogger::getDAGSpec(DAGRunID dagRunID)
|
||||
{
|
||||
std::lock_guard<std::mutex> lock(guard_);
|
||||
return dagRuns_.at(dagRunID).dagSpec;
|
||||
};
|
||||
|
||||
std::vector<DAGRunSummary> OStreamLogger::queryDAGRuns(const std::string &tag,
|
||||
bool all)
|
||||
{
|
||||
std::vector<DAGRunSummary> summaries;
|
||||
std::lock_guard<std::mutex> lock(guard_);
|
||||
size_t i = 0;
|
||||
for (const auto &run : dagRuns_) {
|
||||
if ((!all) &&
|
||||
(run.dagStateChanges.back().state == +RunState::COMPLETED)) {
|
||||
continue;
|
||||
}
|
||||
if (!tag.empty() and tag != run.dagSpec.tag)
|
||||
continue;
|
||||
|
||||
TimePoint lastTaskUpdate;
|
||||
for (const auto &[_, updates] : run.taskStateChanges) {
|
||||
for (const auto &update : updates) {
|
||||
if (update.time > lastTaskUpdate)
|
||||
lastTaskUpdate = update.time;
|
||||
}
|
||||
}
|
||||
|
||||
DAGRunSummary summary{
|
||||
.runID = i,
|
||||
.tag = run.dagSpec.tag,
|
||||
.runState = run.dagStateChanges.back().state,
|
||||
.startTime = run.dagStateChanges.front().time,
|
||||
.lastUpdate = std::max<TimePoint>(lastTaskUpdate,
|
||||
run.dagStateChanges.back().time)};
|
||||
|
||||
for (const auto &[_, taskState] : run.taskRunStates) {
|
||||
summary.taskStateCounts[taskState]++;
|
||||
}
|
||||
|
||||
summaries.emplace_back(summary);
|
||||
}
|
||||
return summaries;
|
||||
}
|
||||
|
||||
DAGRunRecord OStreamLogger::getDAGRun(DAGRunID dagRunID)
|
||||
{
|
||||
std::lock_guard<std::mutex> lock(guard_);
|
||||
return dagRuns_.at(dagRunID);
|
||||
}
|
||||
|
||||
RunState OStreamLogger::getDAGRunState(DAGRunID dagRunID)
|
||||
{
|
||||
std::lock_guard<std::mutex> lock(guard_);
|
||||
return dagRuns_.at(dagRunID).dagStateChanges.back().state;
|
||||
}
|
||||
|
||||
Task OStreamLogger::getTask(DAGRunID dagRunID, const std::string &taskName)
|
||||
{
|
||||
std::lock_guard<std::mutex> lock(guard_);
|
||||
return dagRuns_.at(dagRunID).dagSpec.tasks.at(taskName);
|
||||
}
|
||||
|
||||
RunState OStreamLogger::getTaskState(DAGRunID dagRunID,
|
||||
const std::string &taskName)
|
||||
{
|
||||
std::lock_guard<std::mutex> lock(guard_);
|
||||
return dagRuns_.at(dagRunID).taskRunStates.at(taskName);
|
||||
}
|
||||
|
||||
} // namespace daggy::loggers::dag_run
|
||||
90
libdaggy/src/loggers/dag_run/RedisHelper.cpp
Normal file
90
libdaggy/src/loggers/dag_run/RedisHelper.cpp
Normal file
@@ -0,0 +1,90 @@
|
||||
#include <stdexcept>
|
||||
#ifdef DAGGY_ENABLE_REDIS
|
||||
|
||||
#include <daggy/loggers/dag_run/RedisHelper.hpp>
|
||||
|
||||
namespace daggy::loggers::dag_run::redis {
|
||||
RedisContext::RedisContext(const std::string &host, int port)
|
||||
{
|
||||
const struct timeval timeout = {0, 250000}; // .250 seconds
|
||||
ctx_ = redisConnectWithTimeout(host.c_str(), port, timeout);
|
||||
if (ctx_ == nullptr) {
|
||||
throw std::runtime_error("Unable to ping redis server at " + host + ":" +
|
||||
std::to_string(port));
|
||||
}
|
||||
}
|
||||
|
||||
RedisData RedisContext::parseReply_(const redisReply *reply)
|
||||
{
|
||||
RedisData data;
|
||||
|
||||
/*
|
||||
switch (reply->type) {
|
||||
case REDIS_REPLY_ERROR: {
|
||||
std::cout << "\tERROR " << reply->str << std::endl;
|
||||
break;
|
||||
}
|
||||
case REDIS_REPLY_STRING: {
|
||||
std::cout << "\tSTRING" << std::endl;
|
||||
break;
|
||||
}
|
||||
case REDIS_REPLY_VERB: {
|
||||
std::cout << "\tVERB" << std::endl;
|
||||
break;
|
||||
}
|
||||
case REDIS_REPLY_DOUBLE: {
|
||||
std::cout << "\tDOUBLE" << std::endl;
|
||||
break;
|
||||
}
|
||||
case REDIS_REPLY_INTEGER: {
|
||||
std::cout << "\tINTEGER" << std::endl;
|
||||
break;
|
||||
}
|
||||
case REDIS_REPLY_ARRAY: {
|
||||
std::cout << "\tARRAY" << std::endl;
|
||||
break;
|
||||
}
|
||||
case REDIS_REPLY_NIL: {
|
||||
std::cout << "\tNIL" << std::endl;
|
||||
break;
|
||||
}
|
||||
}
|
||||
*/
|
||||
|
||||
switch (reply->type) {
|
||||
case REDIS_REPLY_ERROR:
|
||||
case REDIS_REPLY_STRING:
|
||||
case REDIS_REPLY_VERB: {
|
||||
std::string raw(reply->str);
|
||||
if (raw[0] == '"' and raw[raw.size() - 1] == '"') {
|
||||
data = raw.substr(1, raw.size() - 2);
|
||||
}
|
||||
else {
|
||||
data = RedisDatum{raw};
|
||||
}
|
||||
break;
|
||||
}
|
||||
case REDIS_REPLY_DOUBLE: {
|
||||
data = RedisDatum{reply->dval};
|
||||
break;
|
||||
}
|
||||
case REDIS_REPLY_INTEGER: {
|
||||
data = RedisDatum{(size_t)reply->integer};
|
||||
break;
|
||||
}
|
||||
case REDIS_REPLY_ARRAY: {
|
||||
std::vector<RedisDatum> parts;
|
||||
for (size_t i = 0UL; i < reply->elements; ++i) {
|
||||
parts.push_back(parseReply_(reply->element[i]).asDatum());
|
||||
}
|
||||
data = parts;
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
return data;
|
||||
}
|
||||
|
||||
} // namespace daggy::loggers::dag_run::redis
|
||||
|
||||
#endif
|
||||
265
libdaggy/src/loggers/dag_run/RedisLogger.cpp
Normal file
265
libdaggy/src/loggers/dag_run/RedisLogger.cpp
Normal file
@@ -0,0 +1,265 @@
|
||||
#include <stdexcept>
|
||||
#ifdef DAGGY_ENABLE_REDIS
|
||||
|
||||
#include <enum.h>
|
||||
|
||||
#include <algorithm>
|
||||
#include <daggy/Serialization.hpp>
|
||||
#include <daggy/loggers/dag_run/RedisLogger.hpp>
|
||||
#include <iomanip>
|
||||
#include <iterator>
|
||||
|
||||
namespace daggy::loggers::dag_run {
|
||||
RedisLogger::RedisLogger(const std::string &prefix, const std::string &host,
|
||||
int port)
|
||||
: prefix_(prefix)
|
||||
, dagRunIDsKey_(prefix_ + "_dagRunIDs")
|
||||
, ctx_(host, port)
|
||||
{
|
||||
auto resp = ctx_.query("exists %s", dagRunIDsKey_.c_str());
|
||||
|
||||
if (resp.as<size_t>() == 0) {
|
||||
ctx_.query("set %s %s", dagRunIDsKey_.c_str(), "0");
|
||||
}
|
||||
}
|
||||
|
||||
// Execution
|
||||
DAGRunID RedisLogger::startDAGRun(const DAGSpec &dagSpec)
|
||||
{
|
||||
auto resp = ctx_.query("incr %s", dagRunIDsKey_.c_str());
|
||||
|
||||
DAGRunID runID = resp.as<size_t>();
|
||||
|
||||
ctx_.query("SET %s %s", getTagKey_(runID).c_str(), dagSpec.tag.c_str());
|
||||
ctx_.query("SET %s %s", getStartTimeKey_(runID).c_str(),
|
||||
timePointToString(Clock::now()).c_str());
|
||||
ctx_.query("SET %s %s", getTaskVariablesKey_(runID).c_str(),
|
||||
configToJSON(dagSpec.taskConfig.variables).c_str());
|
||||
ctx_.query("SET %s %s", getTaskDefaultsKey_(runID).c_str(),
|
||||
configToJSON(dagSpec.taskConfig.jobDefaults).c_str());
|
||||
|
||||
for (const auto &[taskName, task] : dagSpec.tasks) {
|
||||
ctx_.query("HSET %s %s %s", getTasksKey_(runID).c_str(), taskName.c_str(),
|
||||
taskToJSON(task).c_str());
|
||||
updateTaskState(runID, taskName, RunState::QUEUED);
|
||||
}
|
||||
|
||||
// Store tasks, initial states
|
||||
for (const auto &[taskName, task] : dagSpec.tasks) {
|
||||
updateTaskState(runID, taskName, RunState::QUEUED);
|
||||
}
|
||||
|
||||
// Update the dag run state
|
||||
updateDAGRunState(runID, RunState::QUEUED);
|
||||
|
||||
return runID;
|
||||
}
|
||||
|
||||
void RedisLogger::addTask(DAGRunID dagRunID, const std::string &taskName,
|
||||
const Task &task)
|
||||
{
|
||||
updateTask(dagRunID, taskName, task);
|
||||
updateTaskState(dagRunID, taskName, RunState::QUEUED);
|
||||
}
|
||||
|
||||
void RedisLogger::updateTask(DAGRunID dagRunID, const std::string &taskName,
|
||||
const Task &task)
|
||||
{
|
||||
ctx_.query("HSET %s %s %s", getTasksKey_(dagRunID).c_str(),
|
||||
taskName.c_str(), taskToJSON(task).c_str());
|
||||
}
|
||||
|
||||
void RedisLogger::updateDAGRunState(DAGRunID dagRunID, RunState state)
|
||||
{
|
||||
// Set the state
|
||||
ctx_.query("SET %s %s", getDAGStateKey_(dagRunID).c_str(),
|
||||
state._to_string());
|
||||
|
||||
ctx_.query("SET %s %s", getLastUpdateKey_(dagRunID).c_str(),
|
||||
timePointToString(Clock::now()).c_str());
|
||||
|
||||
// Add the update record
|
||||
StateUpdateRecord rec{.time = Clock::now(), .state = state};
|
||||
ctx_.query("RPUSH %s %s", getDAGStateUpdateKey_(dagRunID).c_str(),
|
||||
stateUpdateRecordToJSON(rec).c_str());
|
||||
}
|
||||
|
||||
void RedisLogger::logTaskAttempt(DAGRunID dagRunID,
|
||||
const std::string &taskName,
|
||||
const AttemptRecord &attempt)
|
||||
{
|
||||
std::string attemptJSON = attemptRecordToJSON(attempt);
|
||||
ctx_.query("RPUSH %s %s", getTaskAttemptKey_(dagRunID, taskName).c_str(),
|
||||
attemptJSON.c_str());
|
||||
}
|
||||
|
||||
void RedisLogger::updateTaskState(DAGRunID dagRunID,
|
||||
const std::string &taskName, RunState state)
|
||||
{
|
||||
// Set the state
|
||||
ctx_.query(R"(HSET %s %s %s)", getTaskStatesKey_(dagRunID).c_str(),
|
||||
taskName.c_str(), state._to_string());
|
||||
|
||||
ctx_.query(R"(SET %s %s)", getLastUpdateKey_(dagRunID),
|
||||
timePointToString(Clock::now()).c_str());
|
||||
|
||||
// Add the update record
|
||||
StateUpdateRecord rec{.time = Clock::now(), .state = state};
|
||||
ctx_.query("RPUSH %s %s",
|
||||
getTaskStateUpdateKey_(dagRunID, taskName).c_str(),
|
||||
stateUpdateRecordToJSON(rec).c_str());
|
||||
}
|
||||
|
||||
// Querying
|
||||
DAGSpec RedisLogger::getDAGSpec(DAGRunID dagRunID)
|
||||
{
|
||||
DAGSpec spec;
|
||||
|
||||
spec.tag =
|
||||
ctx_.query("GET %s", getTagKey_(dagRunID).c_str()).as<std::string>();
|
||||
|
||||
auto tasks = ctx_.query("HGETALL %s", getTasksKey_(dagRunID).c_str())
|
||||
.asHash<std::string, std::string>();
|
||||
for (const auto &[taskName, taskJSON] : tasks) {
|
||||
spec.tasks.emplace(taskName, taskFromJSON(taskName, taskJSON));
|
||||
}
|
||||
|
||||
auto taskVars = ctx_.query("GET %s", getTaskVariablesKey_(dagRunID).c_str())
|
||||
.as<std::string>();
|
||||
spec.taskConfig.variables = configFromJSON(taskVars);
|
||||
|
||||
auto jobDefaults =
|
||||
ctx_.query("GET %s", getTaskDefaultsKey_(dagRunID).c_str())
|
||||
.as<std::string>();
|
||||
spec.taskConfig.jobDefaults = configFromJSON(jobDefaults);
|
||||
|
||||
return spec;
|
||||
};
|
||||
|
||||
std::vector<DAGRunSummary> RedisLogger::queryDAGRuns(const std::string &tag,
|
||||
bool all)
|
||||
{
|
||||
std::vector<DAGRunSummary> summaries;
|
||||
|
||||
auto reply = ctx_.query("GET %s", dagRunIDsKey_.c_str());
|
||||
|
||||
size_t maxRuns = std::stoull(reply.as<std::string>());
|
||||
|
||||
RunState state = RunState::QUEUED;
|
||||
for (size_t runID = 1; runID <= maxRuns; ++runID) {
|
||||
try {
|
||||
state = getDAGRunState(runID);
|
||||
}
|
||||
catch (std::runtime_error &e) {
|
||||
continue;
|
||||
}
|
||||
if (!all and state == +RunState::COMPLETED)
|
||||
continue;
|
||||
const auto dagTag =
|
||||
ctx_.query("GET %s", getTagKey_(runID).c_str()).as<std::string>();
|
||||
if (!tag.empty() and dagTag != tag)
|
||||
continue;
|
||||
|
||||
const auto startTime =
|
||||
ctx_.query("GET %s", getStartTimeKey_(runID).c_str())
|
||||
.as<std::string>();
|
||||
const auto lastTime =
|
||||
ctx_.query("GET %s", getLastUpdateKey_(runID).c_str())
|
||||
.as<std::string>();
|
||||
|
||||
DAGRunSummary summary{
|
||||
.runID = runID,
|
||||
.tag = dagTag,
|
||||
.runState = state,
|
||||
.startTime = stringToTimePoint(startTime),
|
||||
.lastUpdate = stringToTimePoint(lastTime),
|
||||
};
|
||||
|
||||
auto taskStates =
|
||||
ctx_.query("HGETALL %s", getTaskStatesKey_(runID).c_str())
|
||||
.asHash<std::string, std::string>();
|
||||
for (const auto &[taskName, state] : taskStates) {
|
||||
auto taskState = RunState::_from_string(state.c_str());
|
||||
summary.taskStateCounts[taskState]++;
|
||||
}
|
||||
summaries.emplace_back(summary);
|
||||
}
|
||||
|
||||
return summaries;
|
||||
}
|
||||
|
||||
DAGRunRecord RedisLogger::getDAGRun(DAGRunID dagRunID)
|
||||
{
|
||||
DAGRunRecord rec;
|
||||
|
||||
rec.dagSpec = getDAGSpec(dagRunID);
|
||||
|
||||
// Populate DAG Updates
|
||||
auto dagStateUpdates =
|
||||
ctx_.query("LRANGE %s 0 -1", getDAGStateUpdateKey_(dagRunID).c_str())
|
||||
.asList<std::string>();
|
||||
std::transform(dagStateUpdates.begin(), dagStateUpdates.end(),
|
||||
std::back_inserter(rec.dagStateChanges),
|
||||
[](const auto &s) { return stateUpdateRecordFromJSON(s); });
|
||||
|
||||
// Populate taskRunStates
|
||||
auto taskStates =
|
||||
ctx_.query("HGETALL %s", getTaskStatesKey_(dagRunID).c_str())
|
||||
.asHash<std::string, std::string>();
|
||||
for (const auto &[taskName, state] : taskStates) {
|
||||
rec.taskRunStates.emplace(taskName,
|
||||
RunState::_from_string(state.c_str()));
|
||||
}
|
||||
|
||||
for (const auto &[taskName, _] : rec.dagSpec.tasks) {
|
||||
// Populate taskAttempts
|
||||
auto taskAttempts =
|
||||
ctx_.query("LRANGE %s 0 -1",
|
||||
getTaskAttemptKey_(dagRunID, taskName).c_str())
|
||||
.asList<std::string>();
|
||||
std::transform(taskAttempts.begin(), taskAttempts.end(),
|
||||
std::back_inserter(rec.taskAttempts[taskName]),
|
||||
[](const auto &s) { return attemptRecordFromJSON(s); });
|
||||
|
||||
// Populate stateUpdates
|
||||
auto taskStateUpdates =
|
||||
ctx_.query("LRANGE %s 0 -1",
|
||||
getTaskStateUpdateKey_(dagRunID, taskName).c_str())
|
||||
.asList<std::string>();
|
||||
auto &stateUpdates = rec.taskStateChanges[taskName];
|
||||
std::transform(taskStateUpdates.begin(), taskStateUpdates.end(),
|
||||
std::back_inserter(stateUpdates), [](const auto &s) {
|
||||
return stateUpdateRecordFromJSON(s);
|
||||
});
|
||||
}
|
||||
|
||||
return rec;
|
||||
}
|
||||
|
||||
RunState RedisLogger::getDAGRunState(DAGRunID dagRunID)
|
||||
{
|
||||
auto resp = ctx_.query("GET %s", getDAGStateKey_(dagRunID).c_str());
|
||||
std::string stateStr = resp.as<std::string>();
|
||||
if (stateStr.empty())
|
||||
throw std::runtime_error("No such dagrun");
|
||||
return RunState::_from_string(stateStr.c_str());
|
||||
}
|
||||
|
||||
Task RedisLogger::getTask(DAGRunID dagRunID, const std::string &taskName)
|
||||
{
|
||||
auto resp = ctx_.query("HGET %s %s", getTasksKey_(dagRunID).c_str(),
|
||||
taskName.c_str());
|
||||
return taskFromJSON(taskName, resp.as<std::string>());
|
||||
}
|
||||
|
||||
RunState RedisLogger::getTaskState(DAGRunID dagRunID,
|
||||
const std::string &taskName)
|
||||
{
|
||||
auto resp = ctx_.query("HGET %s %s", getTaskStatesKey_(dagRunID).c_str(),
|
||||
taskName.c_str());
|
||||
return RunState::_from_string(resp.as<std::string>().c_str());
|
||||
}
|
||||
|
||||
} // namespace daggy::loggers::dag_run
|
||||
|
||||
#endif
|
||||
20
libdaggy/tests/CMakeLists.txt
Normal file
20
libdaggy/tests/CMakeLists.txt
Normal file
@@ -0,0 +1,20 @@
|
||||
project(libdaggy_tests)
|
||||
|
||||
add_executable(${PROJECT_NAME} main.cpp
|
||||
# unit tests
|
||||
unit_dag.cpp
|
||||
unit_dagrunner.cpp
|
||||
unit_dagrun_loggers.cpp
|
||||
unit_executor_forkingexecutor.cpp
|
||||
unit_executor_slurmexecutor.cpp
|
||||
unit_serialization.cpp
|
||||
unit_threadpool.cpp
|
||||
unit_utilities.cpp
|
||||
# integration tests
|
||||
int_basic.cpp
|
||||
# Performance checks
|
||||
perf_dag.cpp
|
||||
)
|
||||
target_link_libraries(${PROJECT_NAME} libdaggy stdc++fs Catch2::Catch2)
|
||||
|
||||
add_test(${PROJECT_NAME} ${CMAKE_CURRENT_BINARY_DIR}/${PROJECT_NAME})
|
||||
7
libdaggy/tests/int_basic.cpp
Normal file
7
libdaggy/tests/int_basic.cpp
Normal file
@@ -0,0 +1,7 @@
|
||||
#include <catch2/catch.hpp>
|
||||
#include <iostream>
|
||||
|
||||
TEST_CASE("General tests", "[general]")
|
||||
{
|
||||
REQUIRE(1 == 1);
|
||||
}
|
||||
15
libdaggy/tests/main.cpp
Normal file
15
libdaggy/tests/main.cpp
Normal file
@@ -0,0 +1,15 @@
|
||||
#include <iostream>
|
||||
|
||||
#include "daggy/DAG.hpp"
|
||||
|
||||
#define CATCH_CONFIG_MAIN
|
||||
|
||||
#include <catch2/catch.hpp>
|
||||
|
||||
TEST_CASE("Sanity tests", "[sanity]")
|
||||
{
|
||||
REQUIRE(1 == 1);
|
||||
}
|
||||
|
||||
// compile and run
|
||||
// g++ -std=c++17 -o test test.cpp && ./test
|
||||
64
libdaggy/tests/perf_dag.cpp
Normal file
64
libdaggy/tests/perf_dag.cpp
Normal file
@@ -0,0 +1,64 @@
|
||||
#ifdef CATCH_CONFIG_ENABLE_BENCHMARKING
|
||||
|
||||
#include <catch2/catch.hpp>
|
||||
#include <iostream>
|
||||
|
||||
#include "daggy/DAG.hpp"
|
||||
|
||||
inline std::string taskName(size_t i)
|
||||
{
|
||||
return "action_node" + std::to_string(i);
|
||||
}
|
||||
|
||||
daggy::DAG<std::string, size_t> createDAG(size_t N_NODES, size_t MAX_CHILDREN)
|
||||
{
|
||||
daggy::DAG<std::string, size_t> dag;
|
||||
|
||||
for (size_t i = 0; i < N_NODES; ++i) {
|
||||
dag.addVertex(taskName(i), i);
|
||||
}
|
||||
|
||||
static std::random_device dev;
|
||||
static std::mt19937 rng(dev());
|
||||
std::uniform_int_distribution<size_t> nDepDist(1, MAX_CHILDREN);
|
||||
|
||||
for (size_t i = 0; i < N_NODES - 1; ++i) {
|
||||
std::string parent = taskName(i);
|
||||
std::uniform_int_distribution<size_t> depDist(i + 1, N_NODES - 1);
|
||||
size_t nChildren = std::min(nDepDist(rng), N_NODES - i);
|
||||
|
||||
std::unordered_set<size_t> found;
|
||||
size_t tries = 0;
|
||||
while (found.size() < nChildren) {
|
||||
++tries;
|
||||
if (tries > nChildren * 2)
|
||||
break;
|
||||
auto child = depDist(rng);
|
||||
if (found.count(child) > 0)
|
||||
continue;
|
||||
found.insert(child);
|
||||
dag.addEdge(parent, taskName(child));
|
||||
}
|
||||
}
|
||||
|
||||
return dag;
|
||||
}
|
||||
|
||||
const size_t N_NODES = 10'000;
|
||||
const size_t MAX_CHILDREN = 10;
|
||||
|
||||
static auto DAG = createDAG(N_NODES, MAX_CHILDREN);
|
||||
|
||||
TEST_CASE("massive DAGs", "[dag_performance]")
|
||||
{
|
||||
BENCHMARK_ADVANCED("dag.reset")(Catch::Benchmark::Chronometer meter)
|
||||
{
|
||||
meter.measure([&] { return DAG.reset(); });
|
||||
};
|
||||
|
||||
BENCHMARK_ADVANCED("dag.isValid")(Catch::Benchmark::Chronometer meter)
|
||||
{
|
||||
meter.measure([&] { return DAG.isValid(); });
|
||||
};
|
||||
}
|
||||
#endif
|
||||
89
libdaggy/tests/unit_dag.cpp
Normal file
89
libdaggy/tests/unit_dag.cpp
Normal file
@@ -0,0 +1,89 @@
|
||||
#include <catch2/catch.hpp>
|
||||
#include <iostream>
|
||||
|
||||
#include "daggy/DAG.hpp"
|
||||
|
||||
TEST_CASE("dag_construction", "[dag]")
|
||||
{
|
||||
daggy::DAG<size_t, size_t> dag;
|
||||
|
||||
REQUIRE(dag.size() == 0);
|
||||
REQUIRE(dag.empty());
|
||||
|
||||
REQUIRE_NOTHROW(dag.addVertex(0, 0));
|
||||
for (size_t i = 1; i < 10; ++i) {
|
||||
dag.addVertex(i, i);
|
||||
REQUIRE(dag.hasVertex(i));
|
||||
REQUIRE(dag.getVertex(i).data == i);
|
||||
dag.addEdge(i - 1, i);
|
||||
}
|
||||
|
||||
REQUIRE(dag.size() == 10);
|
||||
REQUIRE(!dag.empty());
|
||||
|
||||
// Cannot add an edge that would result in a cycle
|
||||
dag.addEdge(9, 5);
|
||||
REQUIRE(!dag.isValid());
|
||||
|
||||
// Bounds checking
|
||||
SECTION("addEdge Bounds Checking")
|
||||
{
|
||||
REQUIRE_THROWS(dag.addEdge(20, 0));
|
||||
REQUIRE_THROWS(dag.addEdge(0, 20));
|
||||
}
|
||||
}
|
||||
|
||||
TEST_CASE("dag_traversal", "[dag]")
|
||||
{
|
||||
daggy::DAG<size_t, size_t> dag;
|
||||
|
||||
const int N_VERTICES = 10;
|
||||
|
||||
for (int i = 0; i < N_VERTICES; ++i) {
|
||||
dag.addVertex(i, i);
|
||||
}
|
||||
|
||||
/*
|
||||
0 ---------------------\
|
||||
1 ---------- \ \ /-----> 8
|
||||
2 ---- 3 ---- > 5 -------> 6 -----> 7
|
||||
4 -------------------------------/ \-----> 9
|
||||
*/
|
||||
|
||||
std::vector<std::pair<int, int>> edges{{0, 6}, {1, 5}, {5, 6}, {6, 7}, {2, 3},
|
||||
{3, 5}, {4, 7}, {7, 8}, {7, 9}};
|
||||
|
||||
for (const auto &[from, to] : edges) {
|
||||
dag.addEdge(from, to);
|
||||
}
|
||||
|
||||
SECTION("Basic Traversal")
|
||||
{
|
||||
dag.reset();
|
||||
std::vector<size_t> visitOrder(N_VERTICES);
|
||||
size_t i = 0;
|
||||
while (!dag.allVisited()) {
|
||||
auto o = dag.visitNext();
|
||||
REQUIRE(o.has_value());
|
||||
const auto v = o.value();
|
||||
dag.completeVisit(v.first);
|
||||
visitOrder[v.first] = i;
|
||||
++i;
|
||||
}
|
||||
|
||||
// Ensure visit order is preserved
|
||||
for (const auto &[from, to] : edges) {
|
||||
REQUIRE(visitOrder[from] <= visitOrder[to]);
|
||||
}
|
||||
}
|
||||
|
||||
SECTION("Iteration")
|
||||
{
|
||||
size_t nVisited = 0;
|
||||
dag.forEach([&](auto &k) {
|
||||
(void)k;
|
||||
++nVisited;
|
||||
});
|
||||
REQUIRE(nVisited == dag.size());
|
||||
}
|
||||
}
|
||||
129
libdaggy/tests/unit_dagrun_loggers.cpp
Normal file
129
libdaggy/tests/unit_dagrun_loggers.cpp
Normal file
@@ -0,0 +1,129 @@
|
||||
#include <catch2/catch.hpp>
|
||||
#include <filesystem>
|
||||
#include <fstream>
|
||||
#include <iostream>
|
||||
#include <sstream>
|
||||
|
||||
#include "daggy/Serialization.hpp"
|
||||
#include "daggy/loggers/dag_run/OStreamLogger.hpp"
|
||||
#include "daggy/loggers/dag_run/RedisLogger.hpp"
|
||||
|
||||
using namespace daggy;
|
||||
using namespace daggy::loggers::dag_run;
|
||||
|
||||
const TaskSet SAMPLE_TASKS{
|
||||
{"work_a",
|
||||
Task{.definedName{"work_a"},
|
||||
.job{{"command", std::vector<std::string>{"/bin/echo", "a"}}},
|
||||
.children{"c"}}},
|
||||
{"work_b",
|
||||
Task{.definedName{"work_b"},
|
||||
.job{{"command", std::vector<std::string>{"/bin/echo", "b"}}},
|
||||
.children{"c"}}},
|
||||
{"work_c",
|
||||
Task{.definedName{"work_c"},
|
||||
.job{{"command", std::vector<std::string>{"/bin/echo", "c"}}}}}};
|
||||
|
||||
namespace {
|
||||
void testDAGRunInit(DAGRunLogger &logger, const std::string &tag,
|
||||
const TaskSet &tasks)
|
||||
{
|
||||
auto runID = logger.startDAGRun(DAGSpec{.tag = tag, .tasks = tasks});
|
||||
|
||||
// Verify run shows up in the list
|
||||
SECTION("New run shows up in list of runs")
|
||||
{
|
||||
auto runs = logger.queryDAGRuns();
|
||||
REQUIRE(!runs.empty());
|
||||
auto it = std::find_if(runs.begin(), runs.end(), [runID](const auto &r) {
|
||||
return r.runID == runID;
|
||||
});
|
||||
REQUIRE(it != runs.end());
|
||||
REQUIRE(it->tag == tag);
|
||||
REQUIRE(it->runState == +RunState::QUEUED);
|
||||
}
|
||||
|
||||
// Verify dagSpec matches
|
||||
SECTION("Can retrieve DAG Spec")
|
||||
{
|
||||
auto spec = logger.getDAGSpec(runID);
|
||||
REQUIRE(spec.tag == tag);
|
||||
REQUIRE(spec.tasks == tasks);
|
||||
}
|
||||
|
||||
// Verify states
|
||||
SECTION("DAG State matches expectations")
|
||||
{
|
||||
REQUIRE(logger.getDAGRunState(runID) == +RunState::QUEUED);
|
||||
for (const auto &[k, _] : tasks) {
|
||||
REQUIRE(logger.getTaskState(runID, k) == +RunState::QUEUED);
|
||||
}
|
||||
}
|
||||
|
||||
// Verify integrity of run
|
||||
SECTION("Can retrieve the full run")
|
||||
{
|
||||
auto dagRun = logger.getDAGRun(runID);
|
||||
|
||||
REQUIRE(dagRun.dagSpec.tag == tag);
|
||||
REQUIRE(dagRun.dagSpec.tasks == tasks);
|
||||
|
||||
REQUIRE(dagRun.taskRunStates.size() == tasks.size());
|
||||
auto nonQueuedTask = std::find_if(
|
||||
dagRun.taskRunStates.begin(), dagRun.taskRunStates.end(),
|
||||
[](const auto &a) { return a.second != +RunState::QUEUED; });
|
||||
|
||||
REQUIRE(nonQueuedTask == dagRun.taskRunStates.end());
|
||||
REQUIRE(dagRun.dagStateChanges.size() == 1);
|
||||
REQUIRE(dagRun.dagStateChanges.back().state == +RunState::QUEUED);
|
||||
}
|
||||
|
||||
// Update DAG state and ensure that it's updated;
|
||||
SECTION("Can update DAG state and retrieve new state")
|
||||
{
|
||||
logger.updateDAGRunState(runID, RunState::RUNNING);
|
||||
auto dagRun = logger.getDAGRun(runID);
|
||||
REQUIRE(dagRun.dagStateChanges.back().state == +RunState::RUNNING);
|
||||
}
|
||||
|
||||
// Update a task state
|
||||
SECTION("Can update task state and retrieve new state")
|
||||
{
|
||||
for (const auto &[k, v] : tasks)
|
||||
logger.updateTaskState(runID, k, RunState::RUNNING);
|
||||
auto dagRun = logger.getDAGRun(runID);
|
||||
for (const auto &[k, v] : tasks) {
|
||||
REQUIRE(dagRun.taskRunStates.at(k) == +RunState::RUNNING);
|
||||
}
|
||||
}
|
||||
|
||||
SECTION("Log task attempt and retrieve it")
|
||||
{
|
||||
std::string error = "long error string\nwith new\n lines";
|
||||
logger.logTaskAttempt(runID, "work_a",
|
||||
AttemptRecord{.rc = 2, .errorLog = error});
|
||||
auto dagRun = logger.getDAGRun(runID);
|
||||
|
||||
REQUIRE(dagRun.taskAttempts["work_a"].size() == 1);
|
||||
REQUIRE(dagRun.taskAttempts["work_a"][0].errorLog == error);
|
||||
REQUIRE(dagRun.taskAttempts["work_a"][0].rc == 2);
|
||||
}
|
||||
}
|
||||
} // namespace
|
||||
|
||||
TEST_CASE("ostream_logger", "[ostream_logger]")
|
||||
{
|
||||
std::stringstream ss;
|
||||
daggy::loggers::dag_run::OStreamLogger logger(ss);
|
||||
|
||||
testDAGRunInit(logger, "init_test", SAMPLE_TASKS);
|
||||
}
|
||||
|
||||
#ifdef DAGGY_ENABLE_REDIS
|
||||
TEST_CASE("redis_logger", "[redis_logger]")
|
||||
{
|
||||
daggy::loggers::dag_run::RedisLogger logger;
|
||||
|
||||
testDAGRunInit(logger, "init_test", SAMPLE_TASKS);
|
||||
}
|
||||
#endif
|
||||
256
libdaggy/tests/unit_dagrunner.cpp
Normal file
256
libdaggy/tests/unit_dagrunner.cpp
Normal file
@@ -0,0 +1,256 @@
|
||||
#include <catch2/catch.hpp>
|
||||
#include <filesystem>
|
||||
#include <fstream>
|
||||
|
||||
#include "daggy/DAGRunner.hpp"
|
||||
#include "daggy/executors/task/ForkingTaskExecutor.hpp"
|
||||
#include "daggy/executors/task/NoopTaskExecutor.hpp"
|
||||
#include "daggy/loggers/dag_run/OStreamLogger.hpp"
|
||||
|
||||
namespace fs = std::filesystem;
|
||||
|
||||
TEST_CASE("dagrunner", "[dagrunner][dagrunner_order_preservation]")
|
||||
{
|
||||
daggy::executors::task::NoopTaskExecutor ex;
|
||||
std::stringstream ss;
|
||||
daggy::loggers::dag_run::OStreamLogger logger(ss);
|
||||
|
||||
daggy::TimePoint globalStartTime = daggy::Clock::now();
|
||||
|
||||
daggy::DAGSpec dagSpec;
|
||||
|
||||
std::string testParams{
|
||||
R"({"DATE": ["2021-05-06", "2021-05-07", "2021-05-08", "2021-05-09" ]})"};
|
||||
dagSpec.taskConfig.variables = daggy::configFromJSON(testParams);
|
||||
|
||||
std::string taskJSON = R"({
|
||||
"A": {"job": {"command": ["/usr/bin/touch", "{{DATE}}"]}, "children": [ "B","D" ]},
|
||||
"B": {"job": {"command": ["/usr/bin/touch", "{{DATE}}"]}, "children": [ "C","D","E" ]},
|
||||
"C": {"job": {"command": ["/usr/bin/touch", "{{DATE}}"]}, "children": [ "D"]},
|
||||
"D": {"job": {"command": ["/usr/bin/touch", "{{DATE}}"]}, "children": [ "E"]},
|
||||
"E": {"job": {"command": ["/usr/bin/touch", "{{DATE}}"]}}
|
||||
})";
|
||||
|
||||
dagSpec.tasks = expandTaskSet(daggy::tasksFromJSON(taskJSON), ex,
|
||||
dagSpec.taskConfig.variables);
|
||||
|
||||
REQUIRE(dagSpec.tasks.size() == 20);
|
||||
|
||||
auto dag = daggy::buildDAGFromTasks(dagSpec.tasks);
|
||||
auto runID = logger.startDAGRun(dagSpec);
|
||||
|
||||
daggy::DAGRunner runner(runID, ex, logger, dag, dagSpec.taskConfig);
|
||||
|
||||
auto endDAG = runner.run();
|
||||
|
||||
REQUIRE(endDAG.allVisited());
|
||||
|
||||
// Ensure the run order
|
||||
auto rec = logger.getDAGRun(runID);
|
||||
|
||||
daggy::TimePoint globalStopTime = daggy::Clock::now();
|
||||
std::array<daggy::TimePoint, 5> minTimes;
|
||||
minTimes.fill(globalStartTime);
|
||||
std::array<daggy::TimePoint, 5> maxTimes;
|
||||
maxTimes.fill(globalStopTime);
|
||||
|
||||
for (const auto &[k, v] : rec.taskAttempts) {
|
||||
size_t idx = k[0] - 65;
|
||||
auto &startTime = minTimes[idx];
|
||||
auto &stopTime = maxTimes[idx];
|
||||
startTime = std::max(startTime, v.front().startTime);
|
||||
stopTime = std::min(stopTime, v.back().stopTime);
|
||||
}
|
||||
|
||||
for (size_t i = 0; i < 5; ++i) {
|
||||
for (size_t j = i + 1; j < 4; ++j) {
|
||||
REQUIRE(maxTimes[i] < minTimes[j]);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
TEST_CASE("DAGRunner simple execution", "[dagrunner][dagrunner_simple]")
|
||||
{
|
||||
daggy::executors::task::ForkingTaskExecutor ex(10);
|
||||
std::stringstream ss;
|
||||
daggy::loggers::dag_run::OStreamLogger logger(ss);
|
||||
|
||||
daggy::DAGSpec dagSpec;
|
||||
|
||||
SECTION("Simple execution")
|
||||
{
|
||||
std::string prefix = (fs::current_path() / "asdlk").string();
|
||||
std::unordered_map<std::string, std::string> files{
|
||||
{"A", prefix + "_A"}, {"B", prefix + "_B"}, {"C", prefix + "_C"}};
|
||||
std::string taskJSON =
|
||||
R"({"A": {"job": {"command": ["/usr/bin/touch", ")" + files.at("A") +
|
||||
R"("]}, "children": ["C"]}, "B": {"job": {"command": ["/usr/bin/touch", ")" +
|
||||
files.at("B") +
|
||||
R"("]}, "children": ["C"]}, "C": {"job": {"command": ["/usr/bin/touch", ")" +
|
||||
files.at("C") + R"("]}}})";
|
||||
dagSpec.tasks = expandTaskSet(daggy::tasksFromJSON(taskJSON), ex);
|
||||
auto dag = daggy::buildDAGFromTasks(dagSpec.tasks);
|
||||
auto runID = logger.startDAGRun(dagSpec);
|
||||
daggy::DAGRunner runner(runID, ex, logger, dag, dagSpec.taskConfig);
|
||||
auto endDAG = runner.run();
|
||||
REQUIRE(endDAG.allVisited());
|
||||
|
||||
for (const auto &[_, file] : files) {
|
||||
REQUIRE(fs::exists(file));
|
||||
fs::remove(file);
|
||||
}
|
||||
|
||||
// Get the DAG Run Attempts
|
||||
auto record = logger.getDAGRun(runID);
|
||||
for (const auto &[_, attempts] : record.taskAttempts) {
|
||||
REQUIRE(attempts.size() == 1);
|
||||
REQUIRE(attempts.front().rc == 0);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
TEST_CASE("DAG Runner Restart old DAG", "[dagrunner][dagrunner_restart]")
|
||||
{
|
||||
daggy::executors::task::ForkingTaskExecutor ex(10);
|
||||
std::stringstream ss;
|
||||
daggy::loggers::dag_run::OStreamLogger logger(ss);
|
||||
daggy::DAGSpec dagSpec;
|
||||
|
||||
SECTION("Recovery from Error")
|
||||
{
|
||||
auto cleanup = []() {
|
||||
// Cleanup
|
||||
std::vector<fs::path> paths{"rec_error_A", "noexist"};
|
||||
for (const auto &pth : paths) {
|
||||
if (fs::exists(pth))
|
||||
fs::remove_all(pth);
|
||||
}
|
||||
};
|
||||
|
||||
cleanup();
|
||||
|
||||
std::string goodPrefix = "rec_error_";
|
||||
std::string badPrefix = "noexist/rec_error_";
|
||||
std::string taskJSON =
|
||||
R"({"A": {"job": {"command": ["/usr/bin/touch", ")" + goodPrefix +
|
||||
R"(A"]}, "children": ["C"]}, "B": {"job": {"command": ["/usr/bin/touch", ")" +
|
||||
badPrefix +
|
||||
R"(B"]}, "children": ["C"]}, "C": {"job": {"command": ["/usr/bin/touch", ")" +
|
||||
badPrefix + R"(C"]}}})";
|
||||
dagSpec.tasks = expandTaskSet(daggy::tasksFromJSON(taskJSON), ex);
|
||||
auto dag = daggy::buildDAGFromTasks(dagSpec.tasks);
|
||||
|
||||
auto runID = logger.startDAGRun(dagSpec);
|
||||
|
||||
daggy::DAGRunner runner(runID, ex, logger, dag, dagSpec.taskConfig);
|
||||
auto tryDAG = runner.run();
|
||||
|
||||
REQUIRE(!tryDAG.allVisited());
|
||||
|
||||
// Create the missing dir, then continue to run the DAG
|
||||
fs::create_directory("noexist");
|
||||
runner.resetRunning();
|
||||
auto endDAG = runner.run();
|
||||
|
||||
REQUIRE(endDAG.allVisited());
|
||||
|
||||
// Get the DAG Run Attempts
|
||||
auto record = logger.getDAGRun(runID);
|
||||
REQUIRE(record.taskAttempts["A_0"].size() == 1); // A ran fine
|
||||
REQUIRE(record.taskAttempts["B_0"].size() ==
|
||||
2); // B errored and had to be retried
|
||||
REQUIRE(record.taskAttempts["C_0"].size() ==
|
||||
1); // C wasn't run because B errored
|
||||
|
||||
cleanup();
|
||||
}
|
||||
}
|
||||
|
||||
TEST_CASE("DAG Runner Generator Tasks", "[dagrunner][dagrunner_generator]")
|
||||
{
|
||||
daggy::executors::task::ForkingTaskExecutor ex(10);
|
||||
std::stringstream ss;
|
||||
daggy::loggers::dag_run::OStreamLogger logger(ss);
|
||||
daggy::DAGSpec dagSpec;
|
||||
|
||||
SECTION("Generator tasks")
|
||||
{
|
||||
std::string testParams{R"({"DATE": ["2021-05-06", "2021-05-07" ]})"};
|
||||
dagSpec.taskConfig.variables = daggy::configFromJSON(testParams);
|
||||
|
||||
std::string generatorOutput =
|
||||
R"({"B": {"job": {"command": ["/usr/bin/echo", "-e", "{{DATE}}"]}, "children": ["C"]}})";
|
||||
fs::path ofn = fs::current_path() / "generator_test_output.json";
|
||||
std::ofstream ofh{ofn};
|
||||
ofh << generatorOutput << std::endl;
|
||||
ofh.close();
|
||||
|
||||
daggy::TimePoint globalStartTime = daggy::Clock::now();
|
||||
std::stringstream jsonTasks;
|
||||
jsonTasks
|
||||
<< R"({ "A": { "job": {"command": [ "/usr/bin/cat", )"
|
||||
<< std::quoted(ofn.string())
|
||||
<< R"(]}, "children": ["C"], "isGenerator": true},)"
|
||||
<< R"("C": { "job": {"command": [ "/usr/bin/echo", "hello!"]} } })";
|
||||
|
||||
dagSpec.tasks = daggy::tasksFromJSON(jsonTasks.str());
|
||||
REQUIRE(dagSpec.tasks.size() == 2);
|
||||
REQUIRE(dagSpec.tasks["A"].children ==
|
||||
std::unordered_set<std::string>{"C"});
|
||||
dagSpec.tasks =
|
||||
daggy::expandTaskSet(dagSpec.tasks, ex, dagSpec.taskConfig.variables);
|
||||
REQUIRE(dagSpec.tasks.size() == 2);
|
||||
REQUIRE(dagSpec.tasks["A_0"].children ==
|
||||
std::unordered_set<std::string>{"C"});
|
||||
auto dag = daggy::buildDAGFromTasks(dagSpec.tasks);
|
||||
REQUIRE(dag.size() == 2);
|
||||
|
||||
auto runID = logger.startDAGRun(dagSpec);
|
||||
daggy::DAGRunner runner(runID, ex, logger, dag, dagSpec.taskConfig);
|
||||
auto finalDAG = runner.run();
|
||||
|
||||
REQUIRE(finalDAG.allVisited());
|
||||
REQUIRE(finalDAG.size() == 4);
|
||||
|
||||
// Check the logger
|
||||
auto record = logger.getDAGRun(runID);
|
||||
|
||||
REQUIRE(record.dagSpec.tasks.size() == 4);
|
||||
REQUIRE(record.taskRunStates.size() == 4);
|
||||
for (const auto &[taskName, attempts] : record.taskAttempts) {
|
||||
REQUIRE(attempts.size() == 1);
|
||||
REQUIRE(attempts.back().rc == 0);
|
||||
}
|
||||
|
||||
// Ensure that children were updated properly
|
||||
REQUIRE(record.dagSpec.tasks["A_0"].children ==
|
||||
std::unordered_set<std::string>{"B_0", "B_1", "C"});
|
||||
REQUIRE(record.dagSpec.tasks["B_0"].children ==
|
||||
std::unordered_set<std::string>{"C"});
|
||||
REQUIRE(record.dagSpec.tasks["B_1"].children ==
|
||||
std::unordered_set<std::string>{"C"});
|
||||
REQUIRE(record.dagSpec.tasks["C_0"].children.empty());
|
||||
|
||||
// Ensure they were run in the right order
|
||||
// All A's get run before B's, which run before C's
|
||||
daggy::TimePoint globalStopTime = daggy::Clock::now();
|
||||
std::array<daggy::TimePoint, 3> minTimes;
|
||||
minTimes.fill(globalStartTime);
|
||||
std::array<daggy::TimePoint, 3> maxTimes;
|
||||
maxTimes.fill(globalStopTime);
|
||||
|
||||
for (const auto &[k, v] : record.taskAttempts) {
|
||||
size_t idx = k[0] - 65;
|
||||
auto &startTime = minTimes[idx];
|
||||
auto &stopTime = maxTimes[idx];
|
||||
startTime = std::max(startTime, v.front().startTime);
|
||||
stopTime = std::min(stopTime, v.back().stopTime);
|
||||
}
|
||||
|
||||
for (size_t i = 0; i < 3; ++i) {
|
||||
for (size_t j = i + 1; j < 2; ++j) {
|
||||
REQUIRE(maxTimes[i] < minTimes[j]);
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
181
libdaggy/tests/unit_executor_forkingexecutor.cpp
Normal file
181
libdaggy/tests/unit_executor_forkingexecutor.cpp
Normal file
@@ -0,0 +1,181 @@
|
||||
#include <catch2/catch.hpp>
|
||||
#include <filesystem>
|
||||
#include <fstream>
|
||||
#include <iostream>
|
||||
#include <thread>
|
||||
|
||||
#include "daggy/Serialization.hpp"
|
||||
#include "daggy/Utilities.hpp"
|
||||
#include "daggy/executors/task/ForkingTaskExecutor.hpp"
|
||||
|
||||
namespace fs = std::filesystem;
|
||||
|
||||
TEST_CASE("forking_executor", "[forking_executor]")
|
||||
{
|
||||
daggy::executors::task::ForkingTaskExecutor ex(10);
|
||||
|
||||
SECTION("Simple Run")
|
||||
{
|
||||
daggy::Task task{
|
||||
.job{{"command", daggy::executors::task::ForkingTaskExecutor::Command{
|
||||
"/usr/bin/echo", "abc", "123"}}}};
|
||||
|
||||
REQUIRE(ex.validateTaskParameters(task.job));
|
||||
|
||||
auto recFuture = ex.execute(0, "command", task);
|
||||
auto rec = recFuture.get();
|
||||
|
||||
REQUIRE(rec.rc == 0);
|
||||
REQUIRE(rec.outputLog.size() >= 6);
|
||||
REQUIRE(rec.errorLog.empty());
|
||||
}
|
||||
|
||||
SECTION("Simple Run using commandString")
|
||||
{
|
||||
daggy::Task task{.job{{"commandString", R"(/usr/bin/echo "abc 123")"}}};
|
||||
|
||||
REQUIRE(ex.validateTaskParameters(task.job));
|
||||
|
||||
auto recFuture = ex.execute(0, "command", task);
|
||||
auto rec = recFuture.get();
|
||||
|
||||
REQUIRE(rec.rc == 0);
|
||||
REQUIRE(rec.outputLog.size() >= 6);
|
||||
REQUIRE(rec.errorLog.empty());
|
||||
}
|
||||
|
||||
SECTION("Simple run with environment")
|
||||
{
|
||||
// Create the shell script
|
||||
auto scriptFile = fs::current_path() / "fork_simple.sh";
|
||||
|
||||
if (fs::exists(scriptFile))
|
||||
fs::remove_all(scriptFile);
|
||||
|
||||
std::ofstream ofh(scriptFile);
|
||||
ofh << "#!/bin/bash\necho \"${DAGGY_TEST_VAR}\"\necho "
|
||||
"\"${DAGGY_TEST_VAR2}\"\n";
|
||||
ofh.close();
|
||||
fs::permissions(scriptFile, fs::perms::owner_all,
|
||||
fs::perm_options::replace);
|
||||
|
||||
std::string valOne = "funky_times";
|
||||
std::string valTwo = "bleep_bloop";
|
||||
daggy::Task task{.job{{"command",
|
||||
daggy::executors::task::ForkingTaskExecutor::Command{
|
||||
scriptFile.string()}},
|
||||
{"environment", std::vector<std::string>{
|
||||
"DAGGY_TEST_VAR=" + valOne,
|
||||
"DAGGY_TEST_VAR2=" + valTwo}}}};
|
||||
|
||||
REQUIRE(ex.validateTaskParameters(task.job));
|
||||
|
||||
auto recFuture = ex.execute(0, "command", task);
|
||||
auto rec = recFuture.get();
|
||||
|
||||
REQUIRE(rec.rc == 0);
|
||||
REQUIRE(rec.outputLog.size() >= 6);
|
||||
REQUIRE(rec.outputLog.find(valOne) != std::string::npos);
|
||||
REQUIRE(rec.outputLog.find(valTwo) != std::string::npos);
|
||||
REQUIRE(rec.errorLog.empty());
|
||||
|
||||
// if (fs::exists(scriptFile)) fs::remove_all(scriptFile);
|
||||
}
|
||||
|
||||
SECTION("Error Run")
|
||||
{
|
||||
daggy::Task task{
|
||||
.job{{"command", daggy::executors::task::ForkingTaskExecutor::Command{
|
||||
"/usr/bin/expr", "1", "+", "+"}}}};
|
||||
|
||||
auto recFuture = ex.execute(0, "command", task);
|
||||
auto rec = recFuture.get();
|
||||
|
||||
REQUIRE(rec.rc == 2);
|
||||
REQUIRE(rec.errorLog.size() >= 20);
|
||||
REQUIRE(rec.outputLog.empty());
|
||||
}
|
||||
|
||||
SECTION("Killing a long task")
|
||||
{
|
||||
daggy::Task task{
|
||||
.job{{"command", daggy::executors::task::ForkingTaskExecutor::Command{
|
||||
"/usr/bin/sleep", "30"}}}};
|
||||
|
||||
auto start = daggy::Clock::now();
|
||||
auto recFuture = ex.execute(0, "command", task);
|
||||
std::this_thread::sleep_for(1s);
|
||||
ex.stop(0, "command");
|
||||
auto rec = recFuture.get();
|
||||
auto stop = daggy::Clock::now();
|
||||
|
||||
REQUIRE(rec.rc == 9);
|
||||
REQUIRE(rec.errorLog.empty());
|
||||
REQUIRE(rec.outputLog.empty());
|
||||
REQUIRE(rec.executorLog == "Killed");
|
||||
REQUIRE(
|
||||
std::chrono::duration_cast<std::chrono::seconds>(stop - start).count() <
|
||||
20);
|
||||
}
|
||||
|
||||
SECTION("Large Output")
|
||||
{
|
||||
const std::vector<std::string> BIG_FILES{"/usr/share/dict/linux.words",
|
||||
"/usr/share/dict/cracklib-small",
|
||||
"/etc/ssh/moduli"};
|
||||
|
||||
for (const auto &bigFile : BIG_FILES) {
|
||||
if (!std::filesystem::exists(bigFile))
|
||||
continue;
|
||||
|
||||
daggy::Task task{
|
||||
.job{{"command", daggy::executors::task::ForkingTaskExecutor::Command{
|
||||
"/usr/bin/cat", bigFile}}}};
|
||||
|
||||
auto recFuture = ex.execute(0, "command", task);
|
||||
auto rec = recFuture.get();
|
||||
|
||||
REQUIRE(rec.rc == 0);
|
||||
REQUIRE(rec.outputLog.size() == std::filesystem::file_size(bigFile));
|
||||
REQUIRE(rec.errorLog.empty());
|
||||
}
|
||||
}
|
||||
|
||||
SECTION("Parameter Expansion")
|
||||
{
|
||||
std::string testParams{R"({"DATE": ["2021-05-06", "2021-05-07" ]})"};
|
||||
auto params = daggy::configFromJSON(testParams);
|
||||
|
||||
std::string taskJSON =
|
||||
R"({"B": {"job": {"command": ["/usr/bin/echo", "{{DATE}}"]}, "children": ["C"]}})";
|
||||
auto tasks = daggy::tasksFromJSON(taskJSON);
|
||||
|
||||
auto result = daggy::expandTaskSet(tasks, ex, params);
|
||||
REQUIRE(result.size() == 2);
|
||||
}
|
||||
|
||||
SECTION("Build with expansion")
|
||||
{
|
||||
std::string testParams{
|
||||
R"({"DATE": ["2021-05-06", "2021-05-07" ], "SOURCE": "name"})"};
|
||||
auto params = daggy::configFromJSON(testParams);
|
||||
std::string testTasks =
|
||||
R"({"A": {"job": {"command": ["/bin/echo", "A"]}, "children": ["B"]}, "B": {"job": {"command": ["/bin/echo", "B", "{{SOURCE}}", "{{DATE}}"]}, "children": ["C"]}, "C": {"job": {"command": ["/bin/echo", "C"]}}})";
|
||||
auto tasks =
|
||||
daggy::expandTaskSet(daggy::tasksFromJSON(testTasks), ex, params);
|
||||
REQUIRE(tasks.size() == 4);
|
||||
}
|
||||
|
||||
SECTION("Build with expansion using parents instead of children")
|
||||
{
|
||||
std::string testParams{
|
||||
R"({"DATE": ["2021-05-06", "2021-05-07" ], "SOURCE": "name"})"};
|
||||
auto params = daggy::configFromJSON(testParams);
|
||||
std::string testTasks =
|
||||
R"({"A": {"job": {"command": ["/bin/echo", "A"]}}, "B": {"job": {"command": ["/bin/echo", "B", "{{SOURCE}}", "{{DATE}}"]}, "parents": ["A"]}, "C": {"job": {"command": ["/bin/echo", "C"]}, "parents": ["A"]}})";
|
||||
auto tasks =
|
||||
daggy::expandTaskSet(daggy::tasksFromJSON(testTasks), ex, params);
|
||||
|
||||
REQUIRE(tasks.size() == 4);
|
||||
}
|
||||
}
|
||||
211
libdaggy/tests/unit_executor_slurmexecutor.cpp
Normal file
211
libdaggy/tests/unit_executor_slurmexecutor.cpp
Normal file
@@ -0,0 +1,211 @@
|
||||
#include <unistd.h>
|
||||
|
||||
#include <catch2/catch.hpp>
|
||||
#include <filesystem>
|
||||
#include <fstream>
|
||||
#include <iostream>
|
||||
|
||||
#include "daggy/Serialization.hpp"
|
||||
#include "daggy/Utilities.hpp"
|
||||
#include "daggy/executors/task/SlurmTaskExecutor.hpp"
|
||||
|
||||
namespace fs = std::filesystem;
|
||||
|
||||
#ifdef DAGGY_ENABLE_SLURM
|
||||
|
||||
TEST_CASE("slurm environment", "[slurm_env]")
|
||||
{
|
||||
daggy::executors::task::SlurmTaskExecutor ex;
|
||||
|
||||
daggy::ConfigValues defaultJobValues{{"minCPUs", "1"},
|
||||
{"minMemoryMB", "100"},
|
||||
{"minTmpDiskMB", "0"},
|
||||
{"priority", "1"},
|
||||
{"timeLimitSeconds", "200"},
|
||||
{"userID", std::to_string(getuid())},
|
||||
{"workDir", fs::current_path().string()},
|
||||
{"tmpDir", fs::current_path().string()}};
|
||||
}
|
||||
|
||||
TEST_CASE("slurm_execution", "[slurm_executor]")
|
||||
{
|
||||
daggy::executors::task::SlurmTaskExecutor ex;
|
||||
|
||||
daggy::ConfigValues defaultJobValues{{"minCPUs", "1"},
|
||||
{"minMemoryMB", "100"},
|
||||
{"minTmpDiskMB", "0"},
|
||||
{"priority", "1"},
|
||||
{"timeLimitSeconds", "200"},
|
||||
{"userID", std::to_string(getuid())},
|
||||
{"workDir", fs::current_path().string()},
|
||||
{"tmpDir", fs::current_path().string()}};
|
||||
|
||||
SECTION("Simple Run")
|
||||
{
|
||||
daggy::Task task{.job{
|
||||
{"command", std::vector<std::string>{"/usr/bin/echo", "abc", "123"}}}};
|
||||
|
||||
task.job.merge(defaultJobValues);
|
||||
|
||||
REQUIRE(ex.validateTaskParameters(task.job));
|
||||
|
||||
auto recFuture = ex.execute(0, "command", task);
|
||||
auto rec = recFuture.get();
|
||||
|
||||
REQUIRE(rec.rc == 0);
|
||||
REQUIRE(rec.outputLog.size() >= 6);
|
||||
REQUIRE(rec.errorLog.empty());
|
||||
}
|
||||
|
||||
SECTION("Simple run with environment")
|
||||
{
|
||||
// Create the shell script
|
||||
auto scriptFile = fs::current_path() / "slurm_simple_env.sh";
|
||||
|
||||
if (fs::exists(scriptFile))
|
||||
fs::remove_all(scriptFile);
|
||||
|
||||
std::ofstream ofh(scriptFile);
|
||||
ofh << "#!/bin/bash\necho \"${DAGGY_TEST_VAR}\"\necho "
|
||||
"\"${DAGGY_TEST_VAR2}\"\n";
|
||||
ofh.close();
|
||||
fs::permissions(scriptFile, fs::perms::owner_all,
|
||||
fs::perm_options::replace);
|
||||
|
||||
std::string valOne = "funky_times";
|
||||
std::string valTwo = "bleep_bloop";
|
||||
|
||||
daggy::Task task{.job{{"command",
|
||||
daggy::executors::task::SlurmTaskExecutor::Command{
|
||||
scriptFile.string()}},
|
||||
{"environment", std::vector<std::string>{
|
||||
"DAGGY_TEST_VAR=" + valOne,
|
||||
"DAGGY_TEST_VAR2=" + valTwo}}}};
|
||||
task.job.merge(defaultJobValues);
|
||||
|
||||
REQUIRE(ex.validateTaskParameters(task.job));
|
||||
|
||||
auto recFuture = ex.execute(0, "command", task);
|
||||
auto rec = recFuture.get();
|
||||
|
||||
REQUIRE(rec.rc == 0);
|
||||
REQUIRE(rec.outputLog.size() >= 6);
|
||||
REQUIRE(rec.outputLog.find(valOne) != std::string::npos);
|
||||
REQUIRE(rec.outputLog.find(valTwo) != std::string::npos);
|
||||
REQUIRE(rec.errorLog.empty());
|
||||
|
||||
if (fs::exists(scriptFile))
|
||||
fs::remove_all(scriptFile);
|
||||
}
|
||||
|
||||
SECTION("Simple Run using commandString")
|
||||
{
|
||||
daggy::Task task{.job{{"commandString", R"(/usr/bin/echo "abc 123")"}}};
|
||||
task.job.merge(defaultJobValues);
|
||||
|
||||
REQUIRE(ex.validateTaskParameters(task.job));
|
||||
|
||||
auto recFuture = ex.execute(0, "command", task);
|
||||
auto rec = recFuture.get();
|
||||
|
||||
REQUIRE(rec.rc == 0);
|
||||
REQUIRE(rec.outputLog.size() >= 6);
|
||||
REQUIRE(rec.errorLog.empty());
|
||||
}
|
||||
|
||||
SECTION("Error Run")
|
||||
{
|
||||
daggy::Task task{
|
||||
.job{{"command", daggy::executors::task::SlurmTaskExecutor::Command{
|
||||
"/usr/bin/expr", "1", "+", "+"}}}};
|
||||
task.job.merge(defaultJobValues);
|
||||
|
||||
auto recFuture = ex.execute(0, "command", task);
|
||||
auto rec = recFuture.get();
|
||||
|
||||
REQUIRE(rec.rc != 0);
|
||||
REQUIRE(rec.errorLog.size() >= 20);
|
||||
REQUIRE(rec.outputLog.empty());
|
||||
}
|
||||
|
||||
SECTION("Killing a long task")
|
||||
{
|
||||
daggy::Task task{
|
||||
.job{{"command", daggy::executors::task::SlurmTaskExecutor::Command{
|
||||
"/usr/bin/sleep", "30"}}}};
|
||||
task.job.merge(defaultJobValues);
|
||||
|
||||
auto recFuture = ex.execute(0, "command", task);
|
||||
ex.stop(0, "command");
|
||||
auto rec = recFuture.get();
|
||||
|
||||
REQUIRE(rec.rc == 9);
|
||||
REQUIRE(rec.errorLog.empty());
|
||||
REQUIRE(rec.outputLog.empty());
|
||||
REQUIRE(rec.executorLog == "Job cancelled by user.\n");
|
||||
}
|
||||
|
||||
SECTION("Large Output")
|
||||
{
|
||||
const std::vector<std::string> BIG_FILES{"/usr/share/dict/linux.words",
|
||||
"/usr/share/dict/cracklib-small",
|
||||
"/etc/ssh/moduli"};
|
||||
|
||||
for (const auto &bigFile : BIG_FILES) {
|
||||
if (!std::filesystem::exists(bigFile))
|
||||
continue;
|
||||
|
||||
daggy::Task task{
|
||||
.job{{"command", daggy::executors::task::SlurmTaskExecutor::Command{
|
||||
"/usr/bin/cat", bigFile}}}};
|
||||
task.job.merge(defaultJobValues);
|
||||
|
||||
auto recFuture = ex.execute(0, "command", task);
|
||||
auto rec = recFuture.get();
|
||||
|
||||
REQUIRE(rec.rc == 0);
|
||||
REQUIRE(rec.outputLog.size() == std::filesystem::file_size(bigFile));
|
||||
REQUIRE(rec.errorLog.empty());
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
SECTION("Parameter Expansion")
|
||||
{
|
||||
std::string testParams{R"({"DATE": ["2021-05-06", "2021-05-07" ]})"};
|
||||
auto params = daggy::configFromJSON(testParams);
|
||||
|
||||
std::string taskJSON =
|
||||
R"({"B": {"job": {"command": ["/usr/bin/echo", "{{DATE}}"]}, "children": ["C"]}})";
|
||||
auto tasks = daggy::tasksFromJSON(taskJSON, defaultJobValues);
|
||||
|
||||
auto result = daggy::expandTaskSet(tasks, ex, params);
|
||||
REQUIRE(result.size() == 2);
|
||||
}
|
||||
|
||||
SECTION("Build with expansion")
|
||||
{
|
||||
std::string testParams{
|
||||
R"({"DATE": ["2021-05-06", "2021-05-07" ], "SOURCE": "name"})"};
|
||||
auto params = daggy::configFromJSON(testParams);
|
||||
std::string testTasks =
|
||||
R"({"A": {"job": {"command": ["/bin/echo", "A"]}, "children": ["B"]}, "B": {"job": {"command": ["/bin/echo", "B", "{{SOURCE}}", "{{DATE}}"]}, "children": ["C"]}, "C": {"job": {"command": ["/bin/echo", "C"]}}})";
|
||||
auto tasks = daggy::expandTaskSet(
|
||||
daggy::tasksFromJSON(testTasks, defaultJobValues), ex, params);
|
||||
REQUIRE(tasks.size() == 4);
|
||||
}
|
||||
|
||||
SECTION("Build with expansion using parents instead of children")
|
||||
{
|
||||
std::string testParams{
|
||||
R"({"DATE": ["2021-05-06", "2021-05-07" ], "SOURCE": "name"})"};
|
||||
auto params = daggy::configFromJSON(testParams);
|
||||
std::string testTasks =
|
||||
R"({"A": {"job": {"command": ["/bin/echo", "A"]}}, "B": {"job": {"command": ["/bin/echo", "B", "{{SOURCE}}", "{{DATE}}"]}, "parents": ["A"]}, "C": {"job": {"command": ["/bin/echo", "C"]}, "parents": ["A"]}})";
|
||||
auto tasks = daggy::expandTaskSet(
|
||||
daggy::tasksFromJSON(testTasks, defaultJobValues), ex, params);
|
||||
|
||||
REQUIRE(tasks.size() == 4);
|
||||
}
|
||||
}
|
||||
#endif
|
||||
104
libdaggy/tests/unit_serialization.cpp
Normal file
104
libdaggy/tests/unit_serialization.cpp
Normal file
@@ -0,0 +1,104 @@
|
||||
#include <catch2/catch.hpp>
|
||||
#include <filesystem>
|
||||
#include <fstream>
|
||||
#include <iostream>
|
||||
|
||||
#include "daggy/Serialization.hpp"
|
||||
|
||||
namespace fs = std::filesystem;
|
||||
|
||||
TEST_CASE("parameter_deserialization", "[deserialize_parameters]")
|
||||
{
|
||||
SECTION("Basic Parse")
|
||||
{
|
||||
std::string testParams{
|
||||
R"({"DATE": ["2021-05-06", "2021-05-07" ], "SOURCE": "name"})"};
|
||||
auto params = daggy::configFromJSON(testParams);
|
||||
REQUIRE(params.size() == 2);
|
||||
REQUIRE(std::holds_alternative<std::vector<std::string>>(params["DATE"]));
|
||||
REQUIRE(std::holds_alternative<std::string>(params["SOURCE"]));
|
||||
}
|
||||
SECTION("Invalid JSON")
|
||||
{
|
||||
std::string testParams{
|
||||
R"({"DATE": ["2021-05-06", "2021-05-07" ], "SOURCE": "name")"};
|
||||
REQUIRE_THROWS(daggy::configFromJSON(testParams));
|
||||
}
|
||||
SECTION("Non-string Keys")
|
||||
{
|
||||
std::string testParams{
|
||||
R"({"DATE": ["2021-05-06", "2021-05-07" ], 6: "name"})"};
|
||||
REQUIRE_THROWS(daggy::configFromJSON(testParams));
|
||||
}
|
||||
SECTION("Non-array/Non-string values")
|
||||
{
|
||||
std::string testParams{
|
||||
R"({"DATE": ["2021-05-06", "2021-05-07" ], "SOURCE": {"name": "kevin"}})"};
|
||||
REQUIRE_THROWS(daggy::configFromJSON(testParams));
|
||||
}
|
||||
}
|
||||
|
||||
TEST_CASE("task_deserialization", "[deserialize_task]")
|
||||
{
|
||||
SECTION("Build with no expansion")
|
||||
{
|
||||
std::string testTasks = R"({
|
||||
"A": {
|
||||
"job": { "command": ["/bin/echo", "A"] },
|
||||
"children": ["C"]
|
||||
},
|
||||
"B": {
|
||||
"job": {"command": ["/bin/echo", "B"]},
|
||||
"children": ["C"]
|
||||
},
|
||||
"C": {
|
||||
"job": {"command": ["/bin/echo", "C"]}
|
||||
}
|
||||
})";
|
||||
auto tasks = daggy::tasksFromJSON(testTasks);
|
||||
REQUIRE(tasks.size() == 3);
|
||||
}
|
||||
|
||||
SECTION("Build with job defaults")
|
||||
{
|
||||
std::string testTasks = R"({
|
||||
"A": {
|
||||
"job": { "command": ["/bin/echo", "A"] },
|
||||
"children": ["B"]
|
||||
},
|
||||
"B": {
|
||||
"job": {
|
||||
"command": ["/bin/echo", "C"],
|
||||
"memory": "1G"
|
||||
}
|
||||
}
|
||||
})";
|
||||
daggy::ConfigValues jobDefaults{{"runtime", "60"}, {"memory", "300M"}};
|
||||
auto tasks = daggy::tasksFromJSON(testTasks, jobDefaults);
|
||||
REQUIRE(tasks.size() == 2);
|
||||
REQUIRE(std::get<std::string>(tasks["A"].job["runtime"]) == "60");
|
||||
REQUIRE(std::get<std::string>(tasks["A"].job["memory"]) == "300M");
|
||||
REQUIRE(std::get<std::string>(tasks["B"].job["runtime"]) == "60");
|
||||
REQUIRE(std::get<std::string>(tasks["B"].job["memory"]) == "1G");
|
||||
}
|
||||
}
|
||||
|
||||
TEST_CASE("task_serialization", "[serialize_tasks]")
|
||||
{
|
||||
SECTION("Build with no expansion")
|
||||
{
|
||||
std::string testTasks =
|
||||
R"({"A": {"job": {"command": ["/bin/echo", "A"]}, "children": ["C"]}, "B": {"job": {"command": ["/bin/echo", "B"]}, "children": ["C"]}, "C": {"job": {"command": ["/bin/echo", "C"]}}})";
|
||||
auto tasks = daggy::tasksFromJSON(testTasks);
|
||||
|
||||
auto genJSON = daggy::tasksToJSON(tasks);
|
||||
auto regenTasks = daggy::tasksFromJSON(genJSON);
|
||||
|
||||
REQUIRE(regenTasks.size() == tasks.size());
|
||||
|
||||
for (const auto &[name, task] : regenTasks) {
|
||||
const auto &other = tasks[name];
|
||||
REQUIRE(task == other);
|
||||
}
|
||||
}
|
||||
}
|
||||
382
libdaggy/tests/unit_server.cpp
Normal file
382
libdaggy/tests/unit_server.cpp
Normal file
@@ -0,0 +1,382 @@
|
||||
#include <curl/curl.h>
|
||||
#include <pistache/client.h>
|
||||
#include <rapidjson/document.h>
|
||||
#include <sys/stat.h>
|
||||
|
||||
#include <catch2/catch.hpp>
|
||||
#include <daggy/Serialization.hpp>
|
||||
#include <daggy/Server.hpp>
|
||||
#include <daggy/executors/task/ForkingTaskExecutor.hpp>
|
||||
#include <daggy/executors/task/NoopTaskExecutor.hpp>
|
||||
#include <daggy/loggers/dag_run/OStreamLogger.hpp>
|
||||
#include <filesystem>
|
||||
#include <iostream>
|
||||
#include <thread>
|
||||
|
||||
namespace rj = rapidjson;
|
||||
|
||||
using namespace daggy;
|
||||
|
||||
#ifdef DEBUG_HTTP
|
||||
static int my_trace(CURL *handle, curl_infotype type, char *data, size_t size,
|
||||
void *userp)
|
||||
{
|
||||
const char *text;
|
||||
(void)handle; /* prevent compiler warning */
|
||||
(void)userp;
|
||||
|
||||
switch (type) {
|
||||
case CURLINFO_TEXT:
|
||||
fprintf(stderr, "== Info: %s", data);
|
||||
default: /* in case a new one is introduced to shock us */
|
||||
return 0;
|
||||
|
||||
case CURLINFO_HEADER_OUT:
|
||||
text = "=> Send header";
|
||||
break;
|
||||
case CURLINFO_DATA_OUT:
|
||||
text = "=> Send data";
|
||||
break;
|
||||
case CURLINFO_SSL_DATA_OUT:
|
||||
text = "=> Send SSL data";
|
||||
break;
|
||||
case CURLINFO_HEADER_IN:
|
||||
text = "<= Recv header";
|
||||
break;
|
||||
case CURLINFO_DATA_IN:
|
||||
text = "<= Recv data";
|
||||
break;
|
||||
case CURLINFO_SSL_DATA_IN:
|
||||
text = "<= Recv SSL data";
|
||||
break;
|
||||
}
|
||||
|
||||
std::cerr << "\n================== " << text
|
||||
<< " ==================" << std::endl
|
||||
<< data << std::endl;
|
||||
return 0;
|
||||
}
|
||||
#endif
|
||||
|
||||
enum HTTPCode : long
|
||||
{
|
||||
Ok = 200,
|
||||
Not_Found = 404
|
||||
};
|
||||
|
||||
struct HTTPResponse
|
||||
{
|
||||
HTTPCode code;
|
||||
std::string body;
|
||||
};
|
||||
|
||||
uint curlWriter(char *in, uint size, uint nmemb, std::stringstream *out)
|
||||
{
|
||||
uint r;
|
||||
r = size * nmemb;
|
||||
out->write(in, r);
|
||||
return r;
|
||||
}
|
||||
|
||||
HTTPResponse REQUEST(const std::string &url, const std::string &payload = "",
|
||||
const std::string &method = "GET")
|
||||
{
|
||||
HTTPResponse response;
|
||||
|
||||
CURL *curl;
|
||||
CURLcode res;
|
||||
struct curl_slist *headers = NULL;
|
||||
|
||||
curl_global_init(CURL_GLOBAL_ALL);
|
||||
|
||||
curl = curl_easy_init();
|
||||
if (curl) {
|
||||
std::stringstream buffer;
|
||||
|
||||
#ifdef DEBUG_HTTP
|
||||
curl_easy_setopt(curl, CURLOPT_DEBUGFUNCTION, my_trace);
|
||||
curl_easy_setopt(curl, CURLOPT_VERBOSE, 1L);
|
||||
#endif
|
||||
|
||||
curl_easy_setopt(curl, CURLOPT_URL, url.c_str());
|
||||
curl_easy_setopt(curl, CURLOPT_WRITEFUNCTION, curlWriter);
|
||||
curl_easy_setopt(curl, CURLOPT_WRITEDATA, &buffer);
|
||||
|
||||
if (!payload.empty()) {
|
||||
curl_easy_setopt(curl, CURLOPT_POSTFIELDSIZE, payload.size());
|
||||
curl_easy_setopt(curl, CURLOPT_POSTFIELDS, payload.c_str());
|
||||
headers = curl_slist_append(headers, "Content-Type: Application/Json");
|
||||
}
|
||||
curl_easy_setopt(curl, CURLOPT_CUSTOMREQUEST, method.c_str());
|
||||
headers = curl_slist_append(headers, "Expect:");
|
||||
curl_easy_setopt(curl, CURLOPT_HTTPHEADER, headers);
|
||||
|
||||
res = curl_easy_perform(curl);
|
||||
|
||||
if (res != CURLE_OK) {
|
||||
curl_easy_cleanup(curl);
|
||||
throw std::runtime_error(std::string{"CURL Failed: "} +
|
||||
curl_easy_strerror(res));
|
||||
}
|
||||
curl_easy_cleanup(curl);
|
||||
|
||||
curl_easy_getinfo(curl, CURLINFO_RESPONSE_CODE, &response.code);
|
||||
response.body = buffer.str();
|
||||
}
|
||||
|
||||
curl_global_cleanup();
|
||||
|
||||
return response;
|
||||
}
|
||||
|
||||
TEST_CASE("rest_endpoint", "[server_basic]")
|
||||
{
|
||||
std::stringstream ss;
|
||||
daggy::executors::task::ForkingTaskExecutor executor(10);
|
||||
daggy::loggers::dag_run::OStreamLogger logger(ss);
|
||||
Pistache::Address listenSpec("localhost", Pistache::Port(0));
|
||||
|
||||
const size_t nDAGRunners = 10, nWebThreads = 10;
|
||||
|
||||
daggy::Server server(listenSpec, logger, executor, nDAGRunners);
|
||||
server.init(nWebThreads);
|
||||
server.start();
|
||||
|
||||
const std::string host = "localhost:";
|
||||
const std::string baseURL = host + std::to_string(server.getPort());
|
||||
|
||||
SECTION("Ready Endpoint")
|
||||
{
|
||||
auto response = REQUEST(baseURL + "/ready");
|
||||
REQUIRE(response.code == HTTPCode::Ok);
|
||||
}
|
||||
|
||||
SECTION("Querying a non-existent dagrunid should fail ")
|
||||
{
|
||||
auto response = REQUEST(baseURL + "/v1/dagrun/100");
|
||||
REQUIRE(response.code != HTTPCode::Ok);
|
||||
}
|
||||
|
||||
SECTION("Simple DAGRun Submission")
|
||||
{
|
||||
std::string dagRun = R"({
|
||||
"tag": "unit_server",
|
||||
"parameters": { "FILE": [ "A", "B" ] },
|
||||
"tasks": {
|
||||
"touch": { "job": { "command": [ "/usr/bin/touch", "dagrun_{{FILE}}" ]} },
|
||||
"cat": { "job": { "command": [ "/usr/bin/cat", "dagrun_A", "dagrun_B" ]},
|
||||
"parents": [ "touch" ]
|
||||
}
|
||||
}
|
||||
})";
|
||||
|
||||
auto dagSpec = daggy::dagFromJSON(dagRun);
|
||||
|
||||
// Submit, and get the runID
|
||||
daggy::DAGRunID runID = 0;
|
||||
{
|
||||
auto response = REQUEST(baseURL + "/v1/dagrun/", dagRun, "POST");
|
||||
REQUIRE(response.code == HTTPCode::Ok);
|
||||
|
||||
rj::Document doc;
|
||||
daggy::checkRJParse(doc.Parse(response.body.c_str()));
|
||||
REQUIRE(doc.IsObject());
|
||||
REQUIRE(doc.HasMember("runID"));
|
||||
|
||||
runID = doc["runID"].GetUint64();
|
||||
}
|
||||
|
||||
// Ensure our runID shows up in the list of running DAGs
|
||||
{
|
||||
auto response = REQUEST(baseURL + "/v1/dagruns?all=1");
|
||||
REQUIRE(response.code == HTTPCode::Ok);
|
||||
|
||||
rj::Document doc;
|
||||
daggy::checkRJParse(doc.Parse(response.body.c_str()));
|
||||
REQUIRE(doc.IsArray());
|
||||
REQUIRE(doc.Size() >= 1);
|
||||
|
||||
// Ensure that our DAG is in the list and matches our given DAGRunID
|
||||
bool found = false;
|
||||
const auto &runs = doc.GetArray();
|
||||
for (size_t i = 0; i < runs.Size(); ++i) {
|
||||
const auto &run = runs[i];
|
||||
REQUIRE(run.IsObject());
|
||||
REQUIRE(run.HasMember("tag"));
|
||||
REQUIRE(run.HasMember("runID"));
|
||||
|
||||
std::string runName = run["tag"].GetString();
|
||||
if (runName == "unit_server") {
|
||||
REQUIRE(run["runID"].GetUint64() == runID);
|
||||
found = true;
|
||||
break;
|
||||
}
|
||||
}
|
||||
REQUIRE(found);
|
||||
}
|
||||
|
||||
// Ensure we can get one of our tasks
|
||||
{
|
||||
auto response = REQUEST(baseURL + "/v1/dagrun/" + std::to_string(runID) +
|
||||
"/task/cat_0");
|
||||
REQUIRE(response.code == HTTPCode::Ok);
|
||||
|
||||
rj::Document doc;
|
||||
daggy::checkRJParse(doc.Parse(response.body.c_str()));
|
||||
|
||||
REQUIRE_NOTHROW(daggy::taskFromJSON("cat", doc));
|
||||
auto task = daggy::taskFromJSON("cat", doc);
|
||||
|
||||
REQUIRE(task == dagSpec.tasks.at("cat"));
|
||||
}
|
||||
|
||||
// Wait until our DAG is complete
|
||||
bool complete = true;
|
||||
for (auto i = 0; i < 10; ++i) {
|
||||
auto response = REQUEST(baseURL + "/v1/dagrun/" + std::to_string(runID));
|
||||
REQUIRE(response.code == HTTPCode::Ok);
|
||||
rj::Document doc;
|
||||
daggy::checkRJParse(doc.Parse(response.body.c_str()));
|
||||
REQUIRE(doc.IsObject());
|
||||
|
||||
REQUIRE(doc.HasMember("taskStates"));
|
||||
const auto &taskStates = doc["taskStates"].GetObject();
|
||||
|
||||
size_t nStates = 0;
|
||||
for (auto it = taskStates.MemberBegin(); it != taskStates.MemberEnd();
|
||||
++it) {
|
||||
nStates++;
|
||||
}
|
||||
REQUIRE(nStates == 3);
|
||||
|
||||
complete = true;
|
||||
for (auto it = taskStates.MemberBegin(); it != taskStates.MemberEnd();
|
||||
++it) {
|
||||
std::string state = it->value.GetString();
|
||||
if (state != "COMPLETED") {
|
||||
complete = false;
|
||||
break;
|
||||
}
|
||||
}
|
||||
if (complete)
|
||||
break;
|
||||
std::this_thread::sleep_for(std::chrono::seconds(1));
|
||||
}
|
||||
REQUIRE(complete);
|
||||
|
||||
std::this_thread::sleep_for(std::chrono::seconds(2));
|
||||
for (const auto &pth : std::vector<fs::path>{"dagrun_A", "dagrun_B"}) {
|
||||
REQUIRE(fs::exists(pth));
|
||||
fs::remove(pth);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
TEST_CASE("Server cancels and resumes execution", "[server_resume]")
|
||||
{
|
||||
std::stringstream ss;
|
||||
daggy::executors::task::ForkingTaskExecutor executor(10);
|
||||
daggy::loggers::dag_run::OStreamLogger logger(ss);
|
||||
Pistache::Address listenSpec("localhost", Pistache::Port(0));
|
||||
|
||||
const size_t nDAGRunners = 10, nWebThreads = 10;
|
||||
|
||||
daggy::Server server(listenSpec, logger, executor, nDAGRunners);
|
||||
server.init(nWebThreads);
|
||||
server.start();
|
||||
|
||||
const std::string host = "localhost:";
|
||||
const std::string baseURL = host + std::to_string(server.getPort());
|
||||
|
||||
SECTION("Cancel / Resume DAGRun")
|
||||
{
|
||||
std::string dagRunJSON = R"({
|
||||
"tag": "unit_server",
|
||||
"tasks": {
|
||||
"touch_A": { "job": { "command": [ "/usr/bin/touch", "resume_touch_a" ]}, "children": ["touch_C"] },
|
||||
"sleep_B": { "job": { "command": [ "/usr/bin/sleep", "3" ]}, "children": ["touch_C"] },
|
||||
"touch_C": { "job": { "command": [ "/usr/bin/touch", "resume_touch_c" ]} }
|
||||
}
|
||||
})";
|
||||
|
||||
auto dagSpec = daggy::dagFromJSON(dagRunJSON);
|
||||
|
||||
// Submit, and get the runID
|
||||
daggy::DAGRunID runID;
|
||||
{
|
||||
auto response = REQUEST(baseURL + "/v1/dagrun/", dagRunJSON, "POST");
|
||||
REQUIRE(response.code == HTTPCode::Ok);
|
||||
|
||||
rj::Document doc;
|
||||
daggy::checkRJParse(doc.Parse(response.body.c_str()));
|
||||
REQUIRE(doc.IsObject());
|
||||
REQUIRE(doc.HasMember("runID"));
|
||||
|
||||
runID = doc["runID"].GetUint64();
|
||||
}
|
||||
|
||||
std::this_thread::sleep_for(1s);
|
||||
|
||||
// Stop the current run
|
||||
{
|
||||
auto response = REQUEST(
|
||||
baseURL + "/v1/dagrun/" + std::to_string(runID) + "/state/KILLED", "",
|
||||
"PATCH");
|
||||
REQUIRE(response.code == HTTPCode::Ok);
|
||||
REQUIRE(logger.getDAGRunState(runID) == +daggy::RunState::KILLED);
|
||||
}
|
||||
|
||||
// Verify that the run still exists
|
||||
{
|
||||
auto dagRun = logger.getDAGRun(runID);
|
||||
REQUIRE(dagRun.taskRunStates.at("touch_A_0") ==
|
||||
+daggy::RunState::COMPLETED);
|
||||
REQUIRE(fs::exists("resume_touch_a"));
|
||||
|
||||
REQUIRE(dagRun.taskRunStates.at("sleep_B_0") ==
|
||||
+daggy::RunState::ERRORED);
|
||||
REQUIRE(dagRun.taskRunStates.at("touch_C_0") == +daggy::RunState::QUEUED);
|
||||
}
|
||||
|
||||
// Set the errored task state
|
||||
{
|
||||
auto url = baseURL + "/v1/dagrun/" + std::to_string(runID) +
|
||||
"/task/sleep_B_0/state/QUEUED";
|
||||
auto response = REQUEST(url, "", "PATCH");
|
||||
REQUIRE(response.code == HTTPCode::Ok);
|
||||
REQUIRE(logger.getTaskState(runID, "sleep_B_0") ==
|
||||
+daggy::RunState::QUEUED);
|
||||
}
|
||||
|
||||
// Resume
|
||||
{
|
||||
struct stat s;
|
||||
|
||||
lstat("resume_touch_A", &s);
|
||||
auto preMTime = s.st_mtim.tv_sec;
|
||||
|
||||
auto response = REQUEST(
|
||||
baseURL + "/v1/dagrun/" + std::to_string(runID) + "/state/QUEUED", "",
|
||||
"PATCH");
|
||||
|
||||
// Wait for run to complete
|
||||
std::this_thread::sleep_for(5s);
|
||||
REQUIRE(logger.getDAGRunState(runID) == +daggy::RunState::COMPLETED);
|
||||
|
||||
REQUIRE(fs::exists("resume_touch_c"));
|
||||
REQUIRE(fs::exists("resume_touch_a"));
|
||||
|
||||
for (const auto &[taskName, task] : dagSpec.tasks) {
|
||||
REQUIRE(logger.getTaskState(runID, taskName + "_0") ==
|
||||
+daggy::RunState::COMPLETED);
|
||||
}
|
||||
|
||||
// Ensure "touch_A" wasn't run again
|
||||
lstat("resume_touch_A", &s);
|
||||
auto postMTime = s.st_mtim.tv_sec;
|
||||
REQUIRE(preMTime == postMTime);
|
||||
}
|
||||
}
|
||||
|
||||
server.shutdown();
|
||||
}
|
||||
45
libdaggy/tests/unit_threadpool.cpp
Normal file
45
libdaggy/tests/unit_threadpool.cpp
Normal file
@@ -0,0 +1,45 @@
|
||||
#include <catch2/catch.hpp>
|
||||
#include <future>
|
||||
#include <iostream>
|
||||
|
||||
#include "daggy/ThreadPool.hpp"
|
||||
|
||||
using namespace daggy;
|
||||
|
||||
TEST_CASE("threadpool", "[threadpool]")
|
||||
{
|
||||
std::atomic<uint32_t> cnt(0);
|
||||
ThreadPool tp(10);
|
||||
|
||||
std::vector<std::future<uint32_t>> rets;
|
||||
|
||||
SECTION("Adding large tasks queues with return values")
|
||||
{
|
||||
auto tq = std::make_shared<daggy::TaskQueue>();
|
||||
std::vector<std::future<uint32_t>> res;
|
||||
for (size_t i = 0; i < 100; ++i)
|
||||
res.emplace_back(tq->addTask([&cnt]() {
|
||||
cnt++;
|
||||
return cnt.load();
|
||||
}));
|
||||
tp.addTasks(tq);
|
||||
for (auto &r : res)
|
||||
r.get();
|
||||
REQUIRE(cnt == 100);
|
||||
}
|
||||
|
||||
SECTION("Slow runs")
|
||||
{
|
||||
std::vector<std::future<void>> res;
|
||||
using namespace std::chrono_literals;
|
||||
for (size_t i = 0; i < 100; ++i)
|
||||
res.push_back(tp.addTask([&cnt]() {
|
||||
std::this_thread::sleep_for(20ms);
|
||||
cnt++;
|
||||
return;
|
||||
}));
|
||||
for (auto &r : res)
|
||||
r.get();
|
||||
REQUIRE(cnt == 100);
|
||||
}
|
||||
}
|
||||
56
libdaggy/tests/unit_utilities.cpp
Normal file
56
libdaggy/tests/unit_utilities.cpp
Normal file
@@ -0,0 +1,56 @@
|
||||
#include <algorithm>
|
||||
#include <catch2/catch.hpp>
|
||||
#include <chrono>
|
||||
#include <filesystem>
|
||||
#include <fstream>
|
||||
#include <iomanip>
|
||||
#include <iostream>
|
||||
|
||||
#include "daggy/Serialization.hpp"
|
||||
#include "daggy/Utilities.hpp"
|
||||
|
||||
TEST_CASE("string_utilities", "[utilities_string]")
|
||||
{
|
||||
std::string test = "/this/is/{{A}}/test/{{A}}";
|
||||
auto res = daggy::globalSub(test, "{{A}}", "hello");
|
||||
REQUIRE(res == "/this/is/hello/test/hello");
|
||||
}
|
||||
|
||||
TEST_CASE("string_expansion", "[utilities_parameter_expansion]")
|
||||
{
|
||||
SECTION("Basic expansion")
|
||||
{
|
||||
std::string testParams{
|
||||
R"({"DATE": ["2021-05-06", "2021-05-07" ], "SOURCE": "name", "TYPE": ["a", "b", "c"]})"};
|
||||
auto params = daggy::configFromJSON(testParams);
|
||||
std::vector<std::string> cmd{"/usr/bin/echo", "{{DATE}}", "{{SOURCE}}",
|
||||
"{{TYPE}}"};
|
||||
auto allCommands = daggy::interpolateValues(cmd, params);
|
||||
|
||||
REQUIRE(allCommands.size() == 6);
|
||||
}
|
||||
|
||||
SECTION("Skip over unused parameters")
|
||||
{
|
||||
std::string testParams{
|
||||
R"({"DATE": ["2021-05-06", "2021-05-07" ], "SOURCE": "name", "TYPE": ["a", "b", "c"]})"};
|
||||
auto params = daggy::configFromJSON(testParams);
|
||||
std::vector<std::string> cmd{"/usr/bin/echo", "{{DATE}}", "{{SOURCE}}"};
|
||||
auto allCommands = daggy::interpolateValues(cmd, params);
|
||||
|
||||
// TYPE isn't used, so it's just |DATE| * |SOURCE|
|
||||
REQUIRE(allCommands.size() == 2);
|
||||
}
|
||||
|
||||
SECTION("Expand within a command part")
|
||||
{
|
||||
std::string testParams{
|
||||
R"({"DATE": ["2021-05-06", "2021-05-07" ], "SOURCE": ["A", "B"], "TYPE": ["a", "b", "c"]})"};
|
||||
auto params = daggy::configFromJSON(testParams);
|
||||
std::vector<std::string> cmd{"/usr/bin/touch", "{{DATE}}_{{SOURCE}}"};
|
||||
auto result = daggy::interpolateValues(cmd, params);
|
||||
|
||||
// TYPE isn't used, so it's just |DATE| * |SOURCE|
|
||||
REQUIRE(result.size() == 4);
|
||||
}
|
||||
}
|
||||
Reference in New Issue
Block a user