Adding clang-format, and reformating all sourcecode

This commit is contained in:
Ian Roddis
2021-09-21 09:41:11 -03:00
parent 39d5ae08be
commit 288ce28d29
36 changed files with 3355 additions and 2802 deletions

198
.clang-format Normal file
View File

@@ -0,0 +1,198 @@
---
DisableFormat: false
Language: Cpp
Standard: Auto
# Indentation rules
IndentWidth: 2
AccessModifierOffset: -2
ConstructorInitializerIndentWidth: 2
ContinuationIndentWidth: 4
IndentCaseLabels: true
IndentGotoLabels: true
IndentPPDirectives: None
IndentWrappedFunctionNames: false
NamespaceIndentation: All
UseTab: Never
TabWidth: 8
# Brace wrapping rules
BreakBeforeBraces: Custom
BraceWrapping:
AfterEnum: true
AfterClass: true
AfterStruct: true
AfterUnion: true
AfterNamespace: false
AfterExternBlock: false
AfterCaseLabel: false
AfterControlStatement: false
AfterFunction: true
BeforeCatch: true
BeforeElse: true
IndentBraces: false
SplitEmptyFunction: true
SplitEmptyRecord: true
SplitEmptyNamespace: true
# Line break rules
DeriveLineEnding: true
UseCRLF: false
KeepEmptyLinesAtTheStartOfBlocks: false
MaxEmptyLinesToKeep: 1
BinPackArguments: true
BinPackParameters: true
ExperimentalAutoDetectBinPacking: false
AlwaysBreakAfterReturnType: None
AlwaysBreakBeforeMultilineStrings: true
AlwaysBreakTemplateDeclarations: Yes
BreakInheritanceList: BeforeComma
BreakConstructorInitializers: BeforeComma
BreakBeforeInheritanceComma: true
BreakConstructorInitializersBeforeComma: true
BreakBeforeBinaryOperators: None
BreakBeforeTernaryOperators: true
BreakStringLiterals: true
AllowAllArgumentsOnNextLine: true
AllowAllParametersOfDeclarationOnNextLine: true
AllowAllConstructorInitializersOnNextLine: false
ConstructorInitializerAllOnOneLineOrOnePerLine: false
AllowShortBlocksOnASingleLine: Never
AllowShortCaseLabelsOnASingleLine: false
AllowShortFunctionsOnASingleLine: Empty
AllowShortIfStatementsOnASingleLine: false
AllowShortLambdasOnASingleLine: All
AllowShortLoopsOnASingleLine: false
# Line length rules
ColumnLimit: 80
ReflowComments: true
## line length penalties
## these determine where line breaks are inserted when over ColumnLimit
PenaltyBreakAssignment: 2
PenaltyBreakBeforeFirstCallParameter: 1
PenaltyBreakComment: 300
PenaltyBreakFirstLessLess: 120
PenaltyBreakString: 1000
PenaltyBreakTemplateDeclaration: 10
PenaltyExcessCharacter: 1000000
PenaltyReturnTypeOnItsOwnLine: 200
# Alignment rules
AlignAfterOpenBracket: Align
AlignConsecutiveAssignments: true
AlignConsecutiveDeclarations: false
AlignConsecutiveMacros: false
AlignEscapedNewlines: Left
AlignOperands: true
AlignTrailingComments: true
DerivePointerAlignment: true
PointerAlignment: Left
# Include ordering rules
IncludeBlocks: Regroup
SortIncludes: true
IncludeIsMainRegex: '([-_](test|unittest))?$'
IncludeIsMainSourceRegex: ''
IncludeCategories:
- Regex: '^".*\.h"'
Priority: 2
SortPriority: 0
- Regex: '^<.*\.h>'
Priority: 1
SortPriority: 0
- Regex: '^<.*'
Priority: 2
SortPriority: 0
- Regex: '.*'
Priority: 3
SortPriority: 0
# Namespace rules
CompactNamespaces: true
FixNamespaceComments: true
# Language extention macros
CommentPragmas: '^ IWYU pragma:'
MacroBlockBegin: ''
MacroBlockEnd: ''
ForEachMacros:
- foreach
- Q_FOREACH
- BOOST_FOREACH
StatementMacros:
- Q_UNUSED
- QT_REQUIRE_VERSION
# Spacing rules
SpaceAfterCStyleCast: false
SpaceAfterLogicalNot: false
SpaceAfterTemplateKeyword: true
SpaceBeforeAssignmentOperators: true
SpaceBeforeCpp11BracedList: false
SpaceBeforeCtorInitializerColon: true
SpaceBeforeInheritanceColon: true
SpaceBeforeParens: ControlStatements
SpaceBeforeRangeBasedForLoopColon: true
SpaceBeforeSquareBrackets: false
SpaceInEmptyBlock: false
SpaceInEmptyParentheses: false
SpacesBeforeTrailingComments: 2
SpacesInAngles: false
SpacesInCStyleCastParentheses: false
SpacesInConditionalStatement: false
SpacesInContainerLiterals: true
SpacesInParentheses: false
SpacesInSquareBrackets: false
# Rules for detecting embedded code blocks
RawStringFormats:
- Language: Cpp
Delimiters:
- cc
- CC
- cpp
- Cpp
- CPP
- 'c++'
- 'C++'
CanonicalDelimiter: ''
BasedOnStyle: google
- Language: TextProto
Delimiters:
- pb
- PB
- proto
- PROTO
EnclosingFunctions:
- EqualsProto
- EquivToProto
- PARSE_PARTIAL_TEXT_PROTO
- PARSE_TEST_PROTO
- PARSE_TEXT_PROTO
- ParseTextOrDie
- ParseTextProtoOrDie
CanonicalDelimiter: ''
BasedOnStyle: google
# C++ specific rules
Cpp11BracedListStyle: true
SortUsingDeclarations: true
...

View File

@@ -1,15 +1,15 @@
#pragma once
#include <iostream>
#include <deque>
#include <functional>
#include <iostream>
#include <iterator>
#include <optional>
#include <queue>
#include <sstream>
#include <stdexcept>
#include <unordered_map>
#include <unordered_set>
#include <iterator>
#include <functional>
#include <optional>
#include <sstream>
#include <queue>
#include "Defines.hpp"
@@ -21,17 +21,19 @@
namespace daggy {
template <typename K, typename V>
struct Vertex {
struct Vertex
{
RunState state;
uint32_t depCount;
V data;
std::unordered_set<K> children;
};
template <typename K, typename V>
class DAG {
class DAG
{
using Edge = std::pair<K, K>;
public:
// Vertices
void addVertex(K id, V data);
@@ -41,7 +43,8 @@ namespace daggy {
// Edges
void addEdge(const K &src, const K &dst);
void addEdgeIf(const K &src, std::function<bool(const Vertex<K, V> &v)> predicate);
void addEdgeIf(const K &src,
std::function<bool(const Vertex<K, V> &v)> predicate);
bool isValid() const;
@@ -64,7 +67,8 @@ namespace daggy {
void setVertexState(const K &id, RunState state);
void forEach(std::function<void(const std::pair<K, Vertex<K, V>> &)> fun) const;
void forEach(
std::function<void(const std::pair<K, Vertex<K, V>> &)> fun) const;
bool allVisited() const;
@@ -77,6 +81,6 @@ namespace daggy {
private:
std::unordered_map<K, Vertex<K, V>> vertices_;
};
}
} // namespace daggy
#include "DAG.impl.hxx"

View File

@@ -1,18 +1,31 @@
namespace daggy {
template <typename K, typename V>
size_t DAG<K, V>::size() const { return vertices_.size(); }
size_t DAG<K, V>::size() const
{
return vertices_.size();
}
template <typename K, typename V>
bool DAG<K, V>::empty() const { return vertices_.empty(); }
bool DAG<K, V>::empty() const
{
return vertices_.empty();
}
template <typename K, typename V>
bool DAG<K, V>::hasVertex(const K &id) { return vertices_.count(id) != 0; }
bool DAG<K, V>::hasVertex(const K &id)
{
return vertices_.count(id) != 0;
}
template <typename K, typename V>
Vertex <K, V> &DAG<K, V>::getVertex(const K &id) { return vertices_.at(id); }
Vertex<K, V> &DAG<K, V>::getVertex(const K &id)
{
return vertices_.at(id);
}
template <typename K, typename V>
std::unordered_set<K> DAG<K, V>::getVertices() const {
std::unordered_set<K> DAG<K, V>::getVertices() const
{
std::unordered_set<K> keys;
for (const auto it : vertices_) {
keys.insert(it.first);
@@ -21,50 +34,63 @@ namespace daggy {
}
template <typename K, typename V>
void DAG<K, V>::addVertex(K id, V data) {
void DAG<K, V>::addVertex(K id, V data)
{
if (vertices_.count(id) != 0) {
std::stringstream ss;
ss << "A vertex with ID " << id << " already exists in the DAG";
throw std::runtime_error(ss.str());
}
vertices_.emplace(id, Vertex<K, V>{.state = RunState::QUEUED, .depCount = 0, .data = data});
vertices_.emplace(
id,
Vertex<K, V>{.state = RunState::QUEUED, .depCount = 0, .data = data});
}
template <typename K, typename V>
void DAG<K, V>::addEdge(const K &from, const K &to) {
if (vertices_.find(from) == vertices_.end()) throw std::runtime_error("No such vertex");
if (vertices_.find(to) == vertices_.end()) throw std::runtime_error("No such vertex");
void DAG<K, V>::addEdge(const K &from, const K &to)
{
if (vertices_.find(from) == vertices_.end())
throw std::runtime_error("No such vertex");
if (vertices_.find(to) == vertices_.end())
throw std::runtime_error("No such vertex");
vertices_.at(from).children.insert(to);
vertices_.at(to).depCount++;
}
template <typename K, typename V>
void DAG<K, V>::addEdgeIf(const K &src, std::function<bool(const Vertex <K, V> &v)> predicate) {
void DAG<K, V>::addEdgeIf(
const K &src, std::function<bool(const Vertex<K, V> &v)> predicate)
{
auto &parent = vertices_.at(src);
for (auto &[name, vertex] : vertices_) {
if (! predicate(vertex)) continue;
if (name == src) continue;
if (!predicate(vertex))
continue;
if (name == src)
continue;
parent.children.insert(name);
vertex.depCount++;
}
}
template <typename K, typename V>
bool DAG<K, V>::isValid() const {
bool DAG<K, V>::isValid() const
{
std::unordered_map<K, size_t> depCounts;
std::queue<K> ready;
size_t processed = 0;
for (const auto &[k, v] : vertices_) {
depCounts[k] = v.depCount;
if (v.depCount == 0) ready.push(k);
if (v.depCount == 0)
ready.push(k);
}
while (!ready.empty()) {
const auto &k = ready.front();
for (const auto &child : vertices_.at(k).children) {
auto dc = --depCounts[child];
if (dc == 0) ready.push(child);
if (dc == 0)
ready.push(child);
}
processed++;
ready.pop();
@@ -74,7 +100,8 @@ namespace daggy {
}
template <typename K, typename V>
void DAG<K, V>::reset() {
void DAG<K, V>::reset()
{
// Reset the state of all vertices
for (auto &[_, v] : vertices_) {
v.state = RunState::QUEUED;
@@ -90,32 +117,39 @@ namespace daggy {
}
template <typename K, typename V>
void DAG<K, V>::resetRunning() {
void DAG<K, V>::resetRunning()
{
for (auto &[k, v] : vertices_) {
if (v.state != +RunState::RUNNING) continue;
if (v.state != +RunState::RUNNING)
continue;
v.state = RunState::QUEUED;
}
}
template <typename K, typename V>
void DAG<K, V>::setVertexState(const K &id, RunState state) {
void DAG<K, V>::setVertexState(const K &id, RunState state)
{
vertices_.at(id).state = state;
}
template <typename K, typename V>
bool DAG<K, V>::allVisited() const {
bool DAG<K, V>::allVisited() const
{
for (const auto &[_, v] : vertices_) {
if (v.state != +RunState::COMPLETED) return false;
if (v.state != +RunState::COMPLETED)
return false;
}
return true;
}
template <typename K, typename V>
std::optional<std::pair<K, V>>
DAG<K, V>::visitNext() {
std::optional<std::pair<K, V>> DAG<K, V>::visitNext()
{
for (auto &[k, v] : vertices_) {
if (v.state != +RunState::QUEUED) continue;
if (v.depCount != 0) continue;
if (v.state != +RunState::QUEUED)
continue;
if (v.depCount != 0)
continue;
v.state = RunState::RUNNING;
return std::make_pair(k, v.data);
}
@@ -123,7 +157,8 @@ namespace daggy {
}
template <typename K, typename V>
void DAG<K, V>::completeVisit(const K &id) {
void DAG<K, V>::completeVisit(const K &id)
{
auto &v = vertices_.at(id);
v.state = RunState::COMPLETED;
for (auto c : v.children) {
@@ -134,10 +169,10 @@ namespace daggy {
template <typename K, typename V>
void DAG<K, V>::forEach(std::function<void(const std::pair<K, Vertex<K, V>> &)
> fun) const {
for (
auto it = vertices_.begin();
it != vertices_.
>
fun) const
{
for (auto it = vertices_.begin(); it != vertices_.
end();
@@ -145,4 +180,4 @@ namespace daggy {
fun(*it);
}
}
}
} // namespace daggy

View File

@@ -1,5 +1,7 @@
#pragma once
#include <enum.h>
#include <chrono>
#include <string>
#include <unordered_map>
@@ -7,8 +9,6 @@
#include <variant>
#include <vector>
#include <enum.h>
namespace daggy {
// Commands and parameters
using ConfigValue = std::variant<std::string, std::vector<std::string>>;
@@ -22,38 +22,36 @@ namespace daggy {
// DAG Runs
using DAGRunID = size_t;
BETTER_ENUM(RunState, uint32_t,
QUEUED = 1 << 0,
RUNNING = 1 << 1,
RETRY = 1 << 2,
ERRORED = 1 << 3,
KILLED = 1 << 4,
COMPLETED = 1 << 5
);
BETTER_ENUM(RunState, uint32_t, QUEUED = 1 << 0, RUNNING = 1 << 1,
RETRY = 1 << 2, ERRORED = 1 << 3, KILLED = 1 << 4,
COMPLETED = 1 << 5);
struct Task {
struct Task
{
std::string definedName;
bool isGenerator; // True if the output of this task is a JSON set of tasks to complete
bool isGenerator; // True if the output of this task is a JSON set of tasks
// to complete
uint32_t maxRetries;
uint32_t retryIntervalSeconds; // Time to wait between retries
ConfigValues job; // It's up to the individual inspectors to convert values from strings // array of strings
ConfigValues job; // It's up to the individual inspectors to convert values
// from strings // array of strings
std::unordered_set<std::string> children;
std::unordered_set<std::string> parents;
bool operator==(const Task &other) const {
return (definedName == other.definedName)
and (maxRetries == other.maxRetries)
and (retryIntervalSeconds == other.retryIntervalSeconds)
and (job == other.job)
and (children == other.children)
and (parents == other.parents)
and (isGenerator == other.isGenerator);
bool operator==(const Task &other) const
{
return (definedName == other.definedName) and
(maxRetries == other.maxRetries) and
(retryIntervalSeconds == other.retryIntervalSeconds) and
(job == other.job) and (children == other.children) and
(parents == other.parents) and (isGenerator == other.isGenerator);
}
};
using TaskSet = std::unordered_map<std::string, Task>;
struct AttemptRecord {
struct AttemptRecord
{
TimePoint startTime;
TimePoint stopTime;
int rc; // RC from the task
@@ -61,6 +59,6 @@ namespace daggy {
std::string outputLog; // stdout from command
std::string errorLog; // stderr from command
};
}
} // namespace daggy
BETTER_ENUMS_DECLARE_STD_HASH(daggy::RunState)

View File

@@ -1,18 +1,19 @@
#pragma once
#include <vector>
#include <string>
#include <variant>
#include <unordered_map>
#include <rapidjson/document.h>
#include <string>
#include <unordered_map>
#include <variant>
#include <vector>
#include "Defines.hpp"
namespace rj = rapidjson;
namespace daggy {
void checkRJParse(const rj::ParseResult &result, const std::string &prefix = "");
void checkRJParse(const rj::ParseResult &result,
const std::string &prefix = "");
// Parameters
ConfigValues configFromJSON(const std::string &jsonSpec);
@@ -22,12 +23,14 @@ namespace daggy {
std::string configToJSON(const ConfigValues &config);
// Tasks
Task
taskFromJSON(const std::string &name, const rj::Value &spec, const ConfigValues &jobDefaults = {});
Task taskFromJSON(const std::string &name, const rj::Value &spec,
const ConfigValues &jobDefaults = {});
TaskSet tasksFromJSON(const std::string &jsonSpec, const ConfigValues &jobDefaults = {});
TaskSet tasksFromJSON(const std::string &jsonSpec,
const ConfigValues &jobDefaults = {});
TaskSet tasksFromJSON(const rj::Value &spec, const ConfigValues &jobDefaults = {});
TaskSet tasksFromJSON(const rj::Value &spec,
const ConfigValues &jobDefaults = {});
std::string taskToJSON(const Task &task);
@@ -42,4 +45,4 @@ namespace daggy {
std::string timePointToString(const TimePoint &tp);
TimePoint stringToTimePoint(const std::string &timeStr);
}
} // namespace daggy

View File

@@ -1,26 +1,31 @@
#pragma once
#include <filesystem>
#include <pistache/description.h>
#include <pistache/endpoint.h>
#include <pistache/http.h>
#include <filesystem>
#include "ThreadPool.hpp"
#include "loggers/dag_run/DAGRunLogger.hpp"
#include "executors/task/TaskExecutor.hpp"
#include "loggers/dag_run/DAGRunLogger.hpp"
namespace fs = std::filesystem;
namespace daggy {
class Server {
class Server
{
public:
Server(const Pistache::Address &listenSpec, loggers::dag_run::DAGRunLogger &logger,
executors::task::TaskExecutor &executor,
size_t nDAGRunners
)
: endpoint_(listenSpec), desc_("Daggy API", "0.1"), logger_(logger), executor_(executor),
runnerPool_(nDAGRunners) {}
Server(const Pistache::Address &listenSpec,
loggers::dag_run::DAGRunLogger &logger,
executors::task::TaskExecutor &executor, size_t nDAGRunners)
: endpoint_(listenSpec)
, desc_("Daggy API", "0.1")
, logger_(logger)
, executor_(executor)
, runnerPool_(nDAGRunners)
{
}
Server &setWebHandlerThreads(size_t nThreads);
@@ -37,15 +42,20 @@ namespace daggy {
private:
void createDescription();
void handleRunDAG(const Pistache::Rest::Request &request, Pistache::Http::ResponseWriter response);
void handleRunDAG(const Pistache::Rest::Request &request,
Pistache::Http::ResponseWriter response);
void handleGetDAGRuns(const Pistache::Rest::Request &request, Pistache::Http::ResponseWriter response);
void handleGetDAGRuns(const Pistache::Rest::Request &request,
Pistache::Http::ResponseWriter response);
void handleGetDAGRun(const Pistache::Rest::Request &request, Pistache::Http::ResponseWriter response);
void handleGetDAGRun(const Pistache::Rest::Request &request,
Pistache::Http::ResponseWriter response);
void handleReady(const Pistache::Rest::Request &request, Pistache::Http::ResponseWriter response);
void handleReady(const Pistache::Rest::Request &request,
Pistache::Http::ResponseWriter response);
bool handleAuth(const Pistache::Rest::Request &request, Pistache::Http::ResponseWriter &response);
bool handleAuth(const Pistache::Rest::Request &request,
Pistache::Http::ResponseWriter &response);
Pistache::Http::Endpoint endpoint_;
Pistache::Rest::Description desc_;
@@ -55,4 +65,4 @@ namespace daggy {
executors::task::TaskExecutor &executor_;
ThreadPool runnerPool_;
};
}
} // namespace daggy

View File

@@ -1,14 +1,14 @@
#pragma once
#include <atomic>
#include <condition_variable>
#include <functional>
#include <future>
#include <list>
#include <memory>
#include <queue>
#include <thread>
#include <vector>
#include <memory>
#include <condition_variable>
#include <future>
#include <queue>
#include <functional>
#include <list>
using namespace std::chrono_literals;
@@ -21,16 +21,17 @@ namespace daggy {
one producer won't starve out another, but all tasks will be run
as quickly as possible.
*/
class TaskQueue {
class TaskQueue
{
public:
template <class F, class... Args>
decltype(auto) addTask(F &&f, Args &&... args) {
decltype(auto) addTask(F &&f, Args &&...args)
{
// using return_type = std::invoke_result<F, Args...>::type;
using return_type = std::invoke_result_t<F, Args...>;
std::packaged_task<return_type()> task(
std::bind(std::forward<F>(f), std::forward<Args>(args)...)
);
std::bind(std::forward<F>(f), std::forward<Args>(args)...));
std::future<return_type> res = task.get_future();
{
@@ -40,19 +41,22 @@ namespace daggy {
return res;
}
std::packaged_task<void()> pop() {
std::packaged_task<void()> pop()
{
std::lock_guard<std::mutex> guard(mtx_);
auto task = std::move(tasks_.front());
tasks_.pop();
return task;
}
size_t size() {
size_t size()
{
std::lock_guard<std::mutex> guard(mtx_);
return tasks_.size();
}
bool empty() {
bool empty()
{
std::lock_guard<std::mutex> guard(mtx_);
return tasks_.empty();
}
@@ -62,17 +66,24 @@ namespace daggy {
std::mutex mtx_;
};
class ThreadPool {
class ThreadPool
{
public:
explicit ThreadPool(size_t nWorkers)
:
tqit_(taskQueues_.begin()), stop_(false), drain_(false) {
: tqit_(taskQueues_.begin())
, stop_(false)
, drain_(false)
{
resize(nWorkers);
}
~ThreadPool() { shutdown(); }
~ThreadPool()
{
shutdown();
}
void shutdown() {
void shutdown()
{
stop_ = true;
cv_.notify_all();
for (std::thread &worker : workers_) {
@@ -81,22 +92,26 @@ namespace daggy {
}
}
void drain() {
void drain()
{
drain_ = true;
while (true) {
{
std::lock_guard<std::mutex> guard(mtx_);
if (taskQueues_.empty()) break;
if (taskQueues_.empty())
break;
}
std::this_thread::sleep_for(250ms);
}
}
void restart() {
void restart()
{
drain_ = false;
}
void resize(size_t nWorkers) {
void resize(size_t nWorkers)
{
shutdown();
workers_.clear();
stop_ = false;
@@ -109,26 +124,30 @@ namespace daggy {
std::unique_lock<std::mutex> lock(mtx_);
cv_.wait(lock, [&] { return stop_ || !taskQueues_.empty(); });
if (taskQueues_.empty()) {
if (stop_) return;
if (stop_)
return;
continue;
}
if (tqit_ == taskQueues_.end()) tqit_ = taskQueues_.begin();
if (tqit_ == taskQueues_.end())
tqit_ = taskQueues_.begin();
task = std::move((*tqit_)->pop());
if ((*tqit_)->empty()) {
tqit_ = taskQueues_.erase(tqit_);
} else {
}
else {
tqit_++;
}
}
task();
}
}
);
});
};
template <class F, class... Args>
decltype(auto) addTask(F &&f, Args &&... args) {
if (drain_) throw std::runtime_error("Unable to add task to draining pool");
decltype(auto) addTask(F &&f, Args &&...args)
{
if (drain_)
throw std::runtime_error("Unable to add task to draining pool");
auto tq = std::make_shared<TaskQueue>();
auto fut = tq->addTask(f, args...);
@@ -141,8 +160,10 @@ namespace daggy {
return fut;
}
void addTasks(std::shared_ptr<TaskQueue> tq) {
if (drain_) throw std::runtime_error("Unable to add task to draining pool");
void addTasks(std::shared_ptr<TaskQueue> tq)
{
if (drain_)
throw std::runtime_error("Unable to add task to draining pool");
std::lock_guard<std::mutex> guard(mtx_);
taskQueues_.push_back(tq);
cv_.notify_one();
@@ -162,4 +183,4 @@ namespace daggy {
std::atomic<bool> drain_;
};
}
} // namespace daggy

View File

@@ -1,41 +1,39 @@
#pragma once
#include <vector>
#include <string>
#include <variant>
#include <unordered_map>
#include <rapidjson/document.h>
#include "daggy/loggers/dag_run/DAGRunLogger.hpp"
#include "daggy/executors/task/TaskExecutor.hpp"
#include "Defines.hpp"
#include <string>
#include <unordered_map>
#include <variant>
#include <vector>
#include "DAG.hpp"
#include "Defines.hpp"
#include "daggy/executors/task/TaskExecutor.hpp"
#include "daggy/loggers/dag_run/DAGRunLogger.hpp"
namespace daggy {
using TaskDAG = DAG<std::string, Task>;
std::string globalSub(std::string string, const std::string &pattern, const std::string &replacement);
std::string globalSub(std::string string, const std::string &pattern,
const std::string &replacement);
std::vector<Command> interpolateValues(const std::vector<std::string> &raw, const ConfigValues &values);
std::vector<Command> interpolateValues(const std::vector<std::string> &raw,
const ConfigValues &values);
TaskSet
expandTaskSet(const TaskSet &tasks,
TaskSet expandTaskSet(const TaskSet &tasks,
executors::task::TaskExecutor &executor,
const ConfigValues &interpolatedValues = {});
TaskDAG
buildDAGFromTasks(TaskSet &tasks,
TaskDAG buildDAGFromTasks(
TaskSet &tasks,
const std::vector<loggers::dag_run::TaskUpdateRecord> &updates = {});
void updateDAGFromTasks(TaskDAG &dag, const TaskSet &tasks);
TaskDAG runDAG(DAGRunID runID,
executors::task::TaskExecutor &executor,
loggers::dag_run::DAGRunLogger &logger,
TaskDAG dag,
TaskDAG runDAG(DAGRunID runID, executors::task::TaskExecutor &executor,
loggers::dag_run::DAGRunLogger &logger, TaskDAG dag,
const ConfigValues job = {});
std::ostream &operator<<(std::ostream &os, const TimePoint &tp);
}
} // namespace daggy

View File

@@ -1,27 +1,33 @@
#pragma once
#include "TaskExecutor.hpp"
#include <daggy/ThreadPool.hpp>
#include "TaskExecutor.hpp"
namespace daggy::executors::task {
class ForkingTaskExecutor : public TaskExecutor {
class ForkingTaskExecutor : public TaskExecutor
{
public:
using Command = std::vector<std::string>;
ForkingTaskExecutor(size_t nThreads)
: tp_(nThreads) {}
: tp_(nThreads)
{
}
// Validates the job to ensure that all required values are set and are of the right type,
// Validates the job to ensure that all required values are set and are of
// the right type,
bool validateTaskParameters(const ConfigValues &job) override;
std::vector<ConfigValues>
expandTaskParameters(const ConfigValues &job, const ConfigValues &expansionValues) override;
std::vector<ConfigValues> expandTaskParameters(
const ConfigValues &job, const ConfigValues &expansionValues) override;
// Runs the task
std::future<AttemptRecord> execute(const std::string &taskName, const Task &task) override;
std::future<AttemptRecord> execute(const std::string &taskName,
const Task &task) override;
private:
ThreadPool tp_;
AttemptRecord runTask(const Task &task);
};
}
} // namespace daggy::executors::task

View File

@@ -3,18 +3,20 @@
#include "TaskExecutor.hpp"
namespace daggy::executors::task {
class NoopTaskExecutor : public TaskExecutor {
class NoopTaskExecutor : public TaskExecutor
{
public:
using Command = std::vector<std::string>;
// Validates the job to ensure that all required values are set and are of the right type,
// Validates the job to ensure that all required values are set and are of
// the right type,
bool validateTaskParameters(const ConfigValues &job) override;
std::vector<ConfigValues>
expandTaskParameters(const ConfigValues &job, const ConfigValues &expansionValues) override;
std::vector<ConfigValues> expandTaskParameters(
const ConfigValues &job, const ConfigValues &expansionValues) override;
// Runs the task
std::future<AttemptRecord> execute(const std::string &taskName, const Task &task) override;
std::future<AttemptRecord> execute(const std::string &taskName,
const Task &task) override;
};
}
} // namespace daggy::executors::task

View File

@@ -3,24 +3,28 @@
#include "TaskExecutor.hpp"
namespace daggy::executors::task {
class SlurmTaskExecutor : public TaskExecutor {
class SlurmTaskExecutor : public TaskExecutor
{
public:
using Command = std::vector<std::string>;
SlurmTaskExecutor();
~SlurmTaskExecutor();
// Validates the job to ensure that all required values are set and are of the right type,
// Validates the job to ensure that all required values are set and are of
// the right type,
bool validateTaskParameters(const ConfigValues &job) override;
std::vector<ConfigValues>
expandTaskParameters(const ConfigValues &job, const ConfigValues &expansionValues) override;
std::vector<ConfigValues> expandTaskParameters(
const ConfigValues &job, const ConfigValues &expansionValues) override;
// Runs the task
std::future<AttemptRecord> execute(const std::string &taskName, const Task &task) override;
std::future<AttemptRecord> execute(const std::string &taskName,
const Task &task) override;
private:
struct Job {
struct Job
{
std::promise<AttemptRecord> prom;
std::string stdoutFile;
std::string stderrFile;
@@ -34,4 +38,4 @@ namespace daggy::executors::task {
std::thread monitorWorker_;
void monitor();
};
}
} // namespace daggy::executors::task

View File

@@ -1,31 +1,33 @@
#pragma once
#include <chrono>
#include <daggy/Defines.hpp>
#include <future>
#include <string>
#include <thread>
#include <vector>
#include <daggy/Defines.hpp>
/*
Executors run Tasks, returning a future with the results.
If there are many retries, logs are returned for each attempt.
*/
namespace daggy::executors::task {
class TaskExecutor {
class TaskExecutor
{
public:
virtual ~TaskExecutor() = default;
// Validates the job to ensure that all required values are set and are of the right type,
// Validates the job to ensure that all required values are set and are of
// the right type,
virtual bool validateTaskParameters(const ConfigValues &job) = 0;
// Will use the expansion values to return the fully expanded tasks.
virtual std::vector<ConfigValues>
expandTaskParameters(const ConfigValues &job, const ConfigValues &expansionValues) = 0;
virtual std::vector<ConfigValues> expandTaskParameters(
const ConfigValues &job, const ConfigValues &expansionValues) = 0;
// Blocking execution of a task
virtual std::future<AttemptRecord> execute(const std::string &taskName, const Task &task) = 0;
virtual std::future<AttemptRecord> execute(const std::string &taskName,
const Task &task) = 0;
};
}
} // namespace daggy::executors::task

View File

@@ -11,32 +11,32 @@
be supported.
*/
namespace daggy {
namespace loggers {
namespace dag_run {
class DAGRunLogger {
namespace daggy { namespace loggers { namespace dag_run {
class DAGRunLogger
{
public:
virtual ~DAGRunLogger() = default;
// Execution
virtual DAGRunID startDAGRun(std::string name, const TaskSet &tasks) = 0;
virtual void addTask(DAGRunID dagRunID, const std::string taskName, const Task &task) = 0;
virtual void addTask(DAGRunID dagRunID, const std::string taskName,
const Task &task) = 0;
virtual void updateTask(DAGRunID dagRunID, const std::string taskName, const Task &task) = 0;
virtual void updateTask(DAGRunID dagRunID, const std::string taskName,
const Task &task) = 0;
virtual void updateDAGRunState(DAGRunID dagRunID, RunState state) = 0;
virtual void
logTaskAttempt(DAGRunID dagRunID, const std::string &taskName, const AttemptRecord &attempt) = 0;
virtual void logTaskAttempt(DAGRunID dagRunID, const std::string &taskName,
const AttemptRecord &attempt) = 0;
virtual void updateTaskState(DAGRunID dagRunID, const std::string &taskName, RunState state) = 0;
virtual void updateTaskState(DAGRunID dagRunID, const std::string &taskName,
RunState state) = 0;
// Querying
virtual std::vector<DAGRunSummary> getDAGs(uint32_t stateMask) = 0;
virtual DAGRunRecord getDAGRun(DAGRunID dagRunID) = 0;
};
}
}
}
}}} // namespace daggy::loggers::dag_run

View File

@@ -2,26 +2,29 @@
#include <cstdint>
#include <string>
#include <vector>
#include <unordered_set>
#include <unordered_map>
#include <unordered_set>
#include <vector>
#include "../../Defines.hpp"
namespace daggy::loggers::dag_run {
struct TaskUpdateRecord {
struct TaskUpdateRecord
{
TimePoint time;
std::string taskName;
RunState newState;
};
struct DAGUpdateRecord {
struct DAGUpdateRecord
{
TimePoint time;
RunState newState;
};
// Pretty heavy weight, but
struct DAGRunRecord {
struct DAGRunRecord
{
std::string name;
TaskSet tasks;
std::unordered_map<std::string, RunState> taskRunStates;
@@ -30,7 +33,8 @@ namespace daggy::loggers::dag_run {
std::vector<DAGUpdateRecord> dagStateChanges;
};
struct DAGRunSummary {
struct DAGRunSummary
{
DAGRunID runID;
std::string name;
RunState runState;
@@ -38,4 +42,4 @@ namespace daggy::loggers::dag_run {
TimePoint lastUpdate;
std::unordered_map<RunState, size_t> taskStateCounts;
};
}
} // namespace daggy::loggers::dag_run

View File

@@ -1,10 +1,11 @@
#pragma once
#include <filesystem>
#include <rapidjson/document.h>
#include <atomic>
#include <filesystem>
#include <mutex>
#include <rapidjson/document.h>
#include "DAGRunLogger.hpp"
#include "Defines.hpp"
@@ -13,8 +14,8 @@ namespace rj = rapidjson;
namespace daggy::loggers::dag_run {
/*
* This logger should only be used for debug purposes. It's not really optimized for querying, and will
* use a ton of inodes to track state.
* This logger should only be used for debug purposes. It's not really
* optimized for querying, and will use a ton of inodes to track state.
*
* On the plus side, it's trivial to look at without using the API.
*
@@ -32,7 +33,8 @@ namespace daggy::loggers::dag_run {
* error.log
* executor.log
*/
class FileSystemLogger : public DAGRunLogger {
class FileSystemLogger : public DAGRunLogger
{
public:
FileSystemLogger(fs::path root);
@@ -41,10 +43,11 @@ namespace daggy::loggers::dag_run {
void updateDAGRunState(DAGRunID dagRunID, RunState state) override;
void
logTaskAttempt(DAGRunID, const std::string &taskName, const AttemptRecord &attempt) override;
void logTaskAttempt(DAGRunID, const std::string &taskName,
const AttemptRecord &attempt) override;
void updateTaskState(DAGRunID dagRunID, const std::string &taskName, RunState state) override;
void updateTaskState(DAGRunID dagRunID, const std::string &taskName,
RunState state) override;
// Querying
std::vector<DAGRunSummary> getDAGs(uint32_t stateMask) override;
@@ -64,4 +67,4 @@ namespace daggy::loggers::dag_run {
inline const fs::path getRunRoot(DAGRunID runID) const;
};
}
} // namespace daggy::loggers::dag_run

View File

@@ -6,30 +6,32 @@
#include "DAGRunLogger.hpp"
#include "Defines.hpp"
namespace daggy {
namespace loggers {
namespace dag_run {
namespace daggy { namespace loggers { namespace dag_run {
/*
* This logger should only be used for debug purposes. It doesn't actually log anything, just prints stuff
* to stdout.
* This logger should only be used for debug purposes. It doesn't actually log
* anything, just prints stuff to stdout.
*/
class OStreamLogger : public DAGRunLogger {
class OStreamLogger : public DAGRunLogger
{
public:
OStreamLogger(std::ostream &os);
// Execution
DAGRunID startDAGRun(std::string name, const TaskSet &tasks) override;
void addTask(DAGRunID dagRunID, const std::string taskName, const Task &task) override;
void addTask(DAGRunID dagRunID, const std::string taskName,
const Task &task) override;
void updateTask(DAGRunID dagRunID, const std::string taskName, const Task &task) override;
void updateTask(DAGRunID dagRunID, const std::string taskName,
const Task &task) override;
void updateDAGRunState(DAGRunID dagRunID, RunState state) override;
void
logTaskAttempt(DAGRunID, const std::string &taskName, const AttemptRecord &attempt) override;
void logTaskAttempt(DAGRunID, const std::string &taskName,
const AttemptRecord &attempt) override;
void updateTaskState(DAGRunID dagRunID, const std::string &taskName, RunState state) override;
void updateTaskState(DAGRunID dagRunID, const std::string &taskName,
RunState state) override;
// Querying
std::vector<DAGRunSummary> getDAGs(uint32_t stateMask) override;
@@ -41,10 +43,9 @@ namespace daggy {
std::ostream &os_;
std::vector<DAGRunRecord> dagRuns_;
void _updateTaskState(DAGRunID dagRunID, const std::string &taskName, RunState state);
void _updateTaskState(DAGRunID dagRunID, const std::string &taskName,
RunState state);
void _updateDAGRunState(DAGRunID dagRunID, RunState state);
};
}
}
}
}}} // namespace daggy::loggers::dag_run

View File

@@ -1,13 +1,13 @@
#include <sstream>
#include <iomanip>
#include <rapidjson/error/en.h>
#include <daggy/Serialization.hpp>
#include <daggy/Utilities.hpp>
#include <iomanip>
#include <sstream>
namespace daggy {
void checkRJParse(const rj::ParseResult &result, const std::string &prefix) {
void checkRJParse(const rj::ParseResult &result, const std::string &prefix)
{
if (!result) {
std::stringstream ss;
ss << (prefix.empty() ? "" : prefix + ':')
@@ -17,15 +17,19 @@ namespace daggy {
}
}
ConfigValues configFromJSON(const std::string &jsonSpec) {
ConfigValues configFromJSON(const std::string &jsonSpec)
{
rj::Document doc;
checkRJParse(doc.Parse(jsonSpec.c_str()), "Parsing config");
return configFromJSON(doc);
}
ConfigValues configFromJSON(const rj::Value &spec) {
ConfigValues configFromJSON(const rj::Value &spec)
{
std::unordered_map<std::string, ConfigValue> parameters;
if (!spec.IsObject()) { throw std::runtime_error("Parameters in spec is not a JSON dictionary"); }
if (!spec.IsObject()) {
throw std::runtime_error("Parameters in spec is not a JSON dictionary");
}
for (auto it = spec.MemberBegin(); it != spec.MemberEnd(); ++it) {
if (!it->name.IsString()) {
throw std::runtime_error("All keys must be strings.");
@@ -36,37 +40,52 @@ namespace daggy {
for (size_t i = 0; i < it->value.Size(); ++i) {
if (!it->value[i].IsString()) {
throw std::runtime_error(
"Attribute for " + std::string{it->name.GetString()} + " item " + std::to_string(i) +
" is not a string.");
"Attribute for " + std::string{it->name.GetString()} +
" item " + std::to_string(i) + " is not a string.");
}
values.emplace_back(it->value[i].GetString());
}
parameters[name] = values;
} else if (it->value.IsString()) {
}
else if (it->value.IsString()) {
parameters[name] = it->value.GetString();
} else {
throw std::runtime_error(
"Attribute for " + std::string{it->name.GetString()} + " is not a string or an array.");
}
else {
throw std::runtime_error("Attribute for " +
std::string{it->name.GetString()} +
" is not a string or an array.");
}
}
return parameters;
}
std::string configToJSON(const ConfigValues &config) {
std::string configToJSON(const ConfigValues &config)
{
std::stringstream ss;
ss << '{';
bool first = true;
for (const auto &[k, v] : config) {
if (first) { first = false; } else { ss << ", "; }
if (first) {
first = false;
}
else {
ss << ", ";
}
ss << std::quoted(k) << ": ";
if (std::holds_alternative<std::string>(v)) {
ss << std::quoted(std::get<std::string>(v));
} else {
}
else {
ss << '[';
const auto &vals = std::get<std::vector<std::string>>(v);
bool firstVal = true;
for (const auto &val : vals) {
if (firstVal) { firstVal = false; } else { ss << ", "; }
if (firstVal) {
firstVal = false;
}
else {
ss << ", ";
}
ss << std::quoted(val);
}
ss << ']';
@@ -76,16 +95,17 @@ namespace daggy {
return ss.str();
}
Task
taskFromJSON(const std::string &name, const rj::Value &spec, const ConfigValues &jobDefaults) {
Task task{
.definedName = name,
Task taskFromJSON(const std::string &name, const rj::Value &spec,
const ConfigValues &jobDefaults)
{
Task task{.definedName = name,
.isGenerator = false,
.maxRetries = 0,
.retryIntervalSeconds = 0,
.job = jobDefaults
};
if (!spec.IsObject()) { throw std::runtime_error("Tasks is not an object"); }
.job = jobDefaults};
if (!spec.IsObject()) {
throw std::runtime_error("Tasks is not an object");
}
// Grab the standard fields with defaults;
if (spec.HasMember("isGenerator")) {
@@ -117,17 +137,21 @@ namespace daggy {
if (spec.HasMember("job")) {
const auto &params = spec["job"];
if (!params.IsObject()) throw std::runtime_error("job is not a dictionary.");
if (!params.IsObject())
throw std::runtime_error("job is not a dictionary.");
for (auto it = params.MemberBegin(); it != params.MemberEnd(); ++it) {
if (!it->name.IsString()) throw std::runtime_error("job key must be a string.");
if (!it->name.IsString())
throw std::runtime_error("job key must be a string.");
if (it->value.IsArray()) {
std::vector<std::string> values;
for (size_t i = 0; i < it->value.Size(); ++i) {
values.emplace_back(it->value[i].GetString());
}
task.job.insert_or_assign(it->name.GetString(), values);
} else {
task.job.insert_or_assign(it->name.GetString(), it->value.GetString());
}
else {
task.job.insert_or_assign(it->name.GetString(),
it->value.GetString());
}
}
}
@@ -135,20 +159,27 @@ namespace daggy {
return task;
}
TaskSet tasksFromJSON(const std::string &jsonSpec, const ConfigValues &jobDefaults) {
TaskSet tasksFromJSON(const std::string &jsonSpec,
const ConfigValues &jobDefaults)
{
rj::Document doc;
checkRJParse(doc.Parse(jsonSpec.c_str()));
return tasksFromJSON(doc, jobDefaults);
}
TaskSet tasksFromJSON(const rj::Value &spec, const ConfigValues &jobDefaults) {
TaskSet tasksFromJSON(const rj::Value &spec, const ConfigValues &jobDefaults)
{
TaskSet tasks;
if (!spec.IsObject()) { throw std::runtime_error("Tasks is not an object"); }
if (!spec.IsObject()) {
throw std::runtime_error("Tasks is not an object");
}
// Tasks
for (auto it = spec.MemberBegin(); it != spec.MemberEnd(); ++it) {
if (!it->name.IsString()) throw std::runtime_error("Task names must be a string.");
if (!it->value.IsObject()) throw std::runtime_error("Task definitions must be an object.");
if (!it->name.IsString())
throw std::runtime_error("Task names must be a string.");
if (!it->value.IsObject())
throw std::runtime_error("Task definitions must be an object.");
const auto &taskName = it->name.GetString();
tasks.emplace(taskName, taskFromJSON(taskName, it->value, jobDefaults));
}
@@ -164,9 +195,10 @@ namespace daggy {
return tasks;
}
// I really want to do this with rapidjson, but damn they make it ugly and difficult.
// So we'll shortcut and generate the JSON directly.
std::string taskToJSON(const Task &task) {
// I really want to do this with rapidjson, but damn they make it ugly and
// difficult. So we'll shortcut and generate the JSON directly.
std::string taskToJSON(const Task &task)
{
std::stringstream ss;
bool first = false;
@@ -179,7 +211,8 @@ namespace daggy {
ss << R"("children": [)";
first = true;
for (const auto &child : task.children) {
if (!first) ss << ',';
if (!first)
ss << ',';
ss << std::quoted(child);
first = false;
}
@@ -188,7 +221,8 @@ namespace daggy {
ss << R"("parents": [)";
first = true;
for (const auto &parent : task.parents) {
if (!first) ss << ',';
if (!first)
ss << ',';
ss << std::quoted(parent);
first = false;
}
@@ -200,14 +234,16 @@ namespace daggy {
return ss.str();
}
std::string tasksToJSON(const TaskSet &tasks) {
std::string tasksToJSON(const TaskSet &tasks)
{
std::stringstream ss;
ss << "{";
bool first = true;
for (const auto &[name, task] : tasks) {
if (!first) ss << ',';
if (!first)
ss << ',';
ss << std::quoted(name) << ": " << taskToJSON(task);
first = false;
}
@@ -216,37 +252,41 @@ namespace daggy {
return ss.str();
}
std::ostream &operator<<(std::ostream &os, const Task &task) {
std::ostream &operator<<(std::ostream &os, const Task &task)
{
os << taskToJSON(task);
return os;
}
std::string attemptRecordToJSON(const AttemptRecord &record) {
std::string attemptRecordToJSON(const AttemptRecord &record)
{
std::stringstream ss;
ss << "{"
<< R"("startTime": )" << std::quoted(timePointToString(record.startTime)) << ','
<< R"("stopTime": )" << std::quoted(timePointToString(record.stopTime)) << ','
<< R"("rc": )" << std::to_string(record.rc) << ','
<< R"("executorLog": )" << std::quoted(record.executorLog) << ','
<< R"("outputLog": )" << std::quoted(record.outputLog) << ','
<< R"("errorLog": )" << std::quoted(record.errorLog)
<< '}';
<< R"("startTime": )" << std::quoted(timePointToString(record.startTime))
<< ',' << R"("stopTime": )"
<< std::quoted(timePointToString(record.stopTime)) << ',' << R"("rc": )"
<< std::to_string(record.rc) << ',' << R"("executorLog": )"
<< std::quoted(record.executorLog) << ',' << R"("outputLog": )"
<< std::quoted(record.outputLog) << ',' << R"("errorLog": )"
<< std::quoted(record.errorLog) << '}';
return ss.str();
}
std::string timePointToString(const TimePoint &tp) {
std::string timePointToString(const TimePoint &tp)
{
std::stringstream ss;
ss << tp;
return ss.str();
}
TimePoint stringToTimePoint(const std::string &timeString) {
TimePoint stringToTimePoint(const std::string &timeString)
{
std::tm dt;
std::stringstream ss{timeString};
ss >> std::get_time(&dt, "%Y-%m-%d %H:%M:%S %Z");
return Clock::from_time_t(mktime(&dt));
}
}
} // namespace daggy

View File

@@ -1,84 +1,82 @@
#include <iomanip>
#include <enum.h>
#include <daggy/Server.hpp>
#include <daggy/Serialization.hpp>
#include <daggy/Server.hpp>
#include <daggy/Utilities.hpp>
#include <iomanip>
#define REQ_ERROR(code, msg) response.send(Pistache::Http::Code::code, msg); return;
#define REQ_ERROR(code, msg) \
response.send(Pistache::Http::Code::code, msg); \
return;
namespace rj = rapidjson;
using namespace Pistache;
namespace daggy {
void Server::init(int threads) {
void Server::init(int threads)
{
auto opts = Http::Endpoint::options()
.threads(threads)
.flags(Pistache::Tcp::Options::ReuseAddr | Pistache::Tcp::Options::ReusePort)
.flags(Pistache::Tcp::Options::ReuseAddr |
Pistache::Tcp::Options::ReusePort)
.maxRequestSize(4294967296)
.maxResponseSize(4294967296);
endpoint_.init(opts);
createDescription();
}
void Server::start() {
void Server::start()
{
router_.initFromDescription(desc_);
endpoint_.setHandler(router_.handler());
endpoint_.serveThreaded();
}
void Server::shutdown() {
void Server::shutdown()
{
endpoint_.shutdown();
}
uint16_t Server::getPort() const {
uint16_t Server::getPort() const
{
return endpoint_.getPort();
}
void Server::createDescription() {
desc_
.info()
.license("MIT", "https://opensource.org/licenses/MIT");
void Server::createDescription()
{
desc_.info().license("MIT", "https://opensource.org/licenses/MIT");
auto backendErrorResponse = desc_.response(
Http::Code::Internal_Server_Error, "An error occured with the backend");
auto backendErrorResponse = desc_.response(Http::Code::Internal_Server_Error,
"An error occured with the backend");
desc_
.schemes(Rest::Scheme::Http)
desc_.schemes(Rest::Scheme::Http)
.basePath("/v1")
.produces(MIME(Application, Json))
.consumes(MIME(Application, Json));
desc_
.route(desc_.get("/ready"))
desc_.route(desc_.get("/ready"))
.bind(&Server::handleReady, this)
.response(Http::Code::Ok, "Response to the /ready call")
.hide();
auto versionPath = desc_.path("/v1");
auto dagPath = versionPath.path("/dagrun");
// Run a DAG
dagPath
.route(desc_.post("/"))
dagPath.route(desc_.post("/"))
.bind(&Server::handleRunDAG, this)
.produces(MIME(Application, Json), MIME(Application, Xml))
.response(Http::Code::Ok, "Run a DAG");
// List detailed DAG run
dagPath
.route(desc_.get("/:runID"))
dagPath.route(desc_.get("/:runID"))
.bind(&Server::handleGetDAGRun, this)
.produces(MIME(Application, Json), MIME(Application, Xml))
.response(Http::Code::Ok, "Details of a specific DAG run");
// List all DAG runs
dagPath
.route(desc_.get("/"))
dagPath.route(desc_.get("/"))
.bind(&Server::handleGetDAGRuns, this)
.produces(MIME(Application, Json), MIME(Application, Xml))
.response(Http::Code::Ok, "The list of all known DAG Runs");
@@ -90,19 +88,29 @@ namespace daggy {
* "job": {...}
* "tasks": {...}
*/
void Server::handleRunDAG(const Pistache::Rest::Request &request, Pistache::Http::ResponseWriter response) {
if (!handleAuth(request, response)) return;
void Server::handleRunDAG(const Pistache::Rest::Request &request,
Pistache::Http::ResponseWriter response)
{
if (!handleAuth(request, response))
return;
rj::Document doc;
try {
doc.Parse(request.body().c_str());
} catch (std::exception &e) {
}
catch (std::exception &e) {
REQ_ERROR(Bad_Request, std::string{"Invalid JSON payload: "} + e.what());
}
if (!doc.IsObject()) { REQ_ERROR(Bad_Request, "Payload is not a dictionary."); }
if (!doc.HasMember("name")) { REQ_ERROR(Bad_Request, "DAG Run is missing a name."); }
if (!doc.HasMember("tasks")) { REQ_ERROR(Bad_Request, "DAG Run has no tasks."); }
if (!doc.IsObject()) {
REQ_ERROR(Bad_Request, "Payload is not a dictionary.");
}
if (!doc.HasMember("name")) {
REQ_ERROR(Bad_Request, "DAG Run is missing a name.");
}
if (!doc.HasMember("tasks")) {
REQ_ERROR(Bad_Request, "DAG Run has no tasks.");
}
std::string runName = doc["name"].GetString();
@@ -112,7 +120,8 @@ namespace daggy {
try {
auto parsedParams = configFromJSON(doc["parameters"].GetObject());
parameters.swap(parsedParams);
} catch (std::exception &e) {
}
catch (std::exception &e) {
REQ_ERROR(Bad_Request, e.what());
}
}
@@ -123,7 +132,8 @@ namespace daggy {
try {
auto parsedJobDefaults = configFromJSON(doc["jobDefaults"].GetObject());
jobDefaults.swap(parsedJobDefaults);
} catch (std::exception &e) {
}
catch (std::exception &e) {
REQ_ERROR(Bad_Request, e.what());
}
}
@@ -134,7 +144,8 @@ namespace daggy {
auto taskTemplates = tasksFromJSON(doc["tasks"], jobDefaults);
auto expandedTasks = expandTaskSet(taskTemplates, executor_, parameters);
tasks.swap(expandedTasks);
} catch (std::exception &e) {
}
catch (std::exception &e) {
REQ_ERROR(Bad_Request, e.what());
}
@@ -142,14 +153,19 @@ namespace daggy {
auto runID = logger_.startDAGRun(runName, tasks);
auto dag = buildDAGFromTasks(tasks);
runnerPool_.addTask(
[this, parameters, runID, dag]() { runDAG(runID, this->executor_, this->logger_, dag, parameters); });
runnerPool_.addTask([this, parameters, runID, dag]() {
runDAG(runID, this->executor_, this->logger_, dag, parameters);
});
response.send(Pistache::Http::Code::Ok, R"({"runID": )" + std::to_string(runID) + "}");
response.send(Pistache::Http::Code::Ok,
R"({"runID": )" + std::to_string(runID) + "}");
}
void Server::handleGetDAGRuns(const Pistache::Rest::Request &request, Pistache::Http::ResponseWriter response) {
if (!handleAuth(request, response)) return;
void Server::handleGetDAGRuns(const Pistache::Rest::Request &request,
Pistache::Http::ResponseWriter response)
{
if (!handleAuth(request, response))
return;
auto dagRuns = logger_.getDAGs(0);
std::stringstream ss;
ss << '[';
@@ -158,21 +174,24 @@ namespace daggy {
for (const auto &run : dagRuns) {
if (first) {
first = false;
} else {
}
else {
ss << ", ";
}
ss << " {"
<< R"("runID": )" << run.runID << ','
<< R"("name": )" << std::quoted(run.name) << ","
<< R"("startTime": )" << std::quoted(timePointToString(run.startTime)) << ','
<< R"("lastUpdate": )" << std::quoted(timePointToString(run.lastUpdate)) << ','
<< R"("runID": )" << run.runID << ',' << R"("name": )"
<< std::quoted(run.name) << ","
<< R"("startTime": )" << std::quoted(timePointToString(run.startTime))
<< ',' << R"("lastUpdate": )"
<< std::quoted(timePointToString(run.lastUpdate)) << ','
<< R"("taskCounts": {)";
bool firstState = true;
for (const auto &[state, count] : run.taskStateCounts) {
if (firstState) {
firstState = false;
} else {
}
else {
ss << ", ";
}
ss << std::quoted(state._to_string()) << ':' << count;
@@ -185,24 +204,34 @@ namespace daggy {
response.send(Pistache::Http::Code::Ok, ss.str());
}
void Server::handleGetDAGRun(const Pistache::Rest::Request &request, Pistache::Http::ResponseWriter response) {
if (!handleAuth(request, response)) return;
if (!request.hasParam(":runID")) { REQ_ERROR(Not_Found, "No runID provided in URL"); }
void Server::handleGetDAGRun(const Pistache::Rest::Request &request,
Pistache::Http::ResponseWriter response)
{
if (!handleAuth(request, response))
return;
if (!request.hasParam(":runID")) {
REQ_ERROR(Not_Found, "No runID provided in URL");
}
DAGRunID runID = request.param(":runID").as<size_t>();
auto run = logger_.getDAGRun(runID);
bool first = true;
std::stringstream ss;
ss << "{"
<< R"("runID": )" << runID << ','
<< R"("name": )" << std::quoted(run.name) << ','
<< R"("tasks": )" << tasksToJSON(run.tasks) << ',';
<< R"("runID": )" << runID << ',' << R"("name": )"
<< std::quoted(run.name) << ',' << R"("tasks": )"
<< tasksToJSON(run.tasks) << ',';
// task run states
ss << R"("taskStates": { )";
first = true;
for (const auto &[name, state] : run.taskRunStates) {
if (first) { first = false; } else { ss << ','; }
if (first) {
first = false;
}
else {
ss << ',';
}
ss << std::quoted(name) << ": " << std::quoted(state._to_string());
}
ss << "},";
@@ -211,19 +240,29 @@ namespace daggy {
first = true;
ss << R"("taskAttempts": { )";
for (const auto &[taskName, attempts] : run.taskAttempts) {
if (first) { first = false; } else { ss << ','; }
if (first) {
first = false;
}
else {
ss << ',';
}
ss << std::quoted(taskName) << ": [";
bool firstAttempt = true;
for (const auto &attempt : attempts) {
if (firstAttempt) { firstAttempt = false; } else { ss << ','; }
ss << '{'
<< R"("startTime":)" << std::quoted(timePointToString(attempt.startTime)) << ','
<< R"("stopTime":)" << std::quoted(timePointToString(attempt.stopTime)) << ','
<< R"("rc":)" << attempt.rc << ','
<< R"("outputLog":)" << std::quoted(attempt.outputLog) << ','
<< R"("errorLog":)" << std::quoted(attempt.errorLog) << ','
<< R"("executorLog":)" << std::quoted(attempt.executorLog)
<< '}';
if (firstAttempt) {
firstAttempt = false;
}
else {
ss << ',';
}
ss << '{' << R"("startTime":)"
<< std::quoted(timePointToString(attempt.startTime)) << ','
<< R"("stopTime":)"
<< std::quoted(timePointToString(attempt.stopTime)) << ','
<< R"("rc":)" << attempt.rc << ',' << R"("outputLog":)"
<< std::quoted(attempt.outputLog) << ',' << R"("errorLog":)"
<< std::quoted(attempt.errorLog) << ',' << R"("executorLog":)"
<< std::quoted(attempt.executorLog) << '}';
}
ss << ']';
}
@@ -233,11 +272,15 @@ namespace daggy {
first = true;
ss << R"("dagStateChanges": [ )";
for (const auto &change : run.dagStateChanges) {
if (first) { first = false; } else { ss << ','; }
ss << '{'
<< R"("newState": )" << std::quoted(change.newState._to_string()) << ','
<< R"("time": )" << std::quoted(timePointToString(change.time))
<< '}';
if (first) {
first = false;
}
else {
ss << ',';
}
ss << '{' << R"("newState": )"
<< std::quoted(change.newState._to_string()) << ',' << R"("time": )"
<< std::quoted(timePointToString(change.time)) << '}';
}
ss << "]";
ss << '}';
@@ -245,16 +288,21 @@ namespace daggy {
response.send(Pistache::Http::Code::Ok, ss.str());
}
void Server::handleReady(const Pistache::Rest::Request &request, Pistache::Http::ResponseWriter response) {
void Server::handleReady(const Pistache::Rest::Request &request,
Pistache::Http::ResponseWriter response)
{
response.send(Pistache::Http::Code::Ok, "Ya like DAGs?");
}
/*
* handleAuth will check any auth methods and handle any responses in the case of failed auth. If it returns
* false, callers should cease handling the response
* handleAuth will check any auth methods and handle any responses in the case
* of failed auth. If it returns false, callers should cease handling the
* response
*/
bool Server::handleAuth(const Pistache::Rest::Request &request, Pistache::Http::ResponseWriter &response) {
bool Server::handleAuth(const Pistache::Rest::Request &request,
Pistache::Http::ResponseWriter &response)
{
(void)response;
return true;
}
}
} // namespace daggy

View File

@@ -1,13 +1,14 @@
#include <daggy/Serialization.hpp>
#include <daggy/Utilities.hpp>
#include <future>
#include <iomanip>
#include <daggy/Utilities.hpp>
#include <daggy/Serialization.hpp>
using namespace std::chrono_literals;
namespace daggy {
std::string globalSub(std::string string, const std::string &pattern, const std::string &replacement) {
std::string globalSub(std::string string, const std::string &pattern,
const std::string &replacement)
{
size_t pos = string.find(pattern);
while (pos != std::string::npos) {
string.replace(pos, pattern.size(), replacement);
@@ -16,8 +17,9 @@ namespace daggy {
return string;
}
std::vector<std::vector<std::string>>
interpolateValues(const std::vector<std::string> &raw, const ConfigValues &values) {
std::vector<std::vector<std::string>> interpolateValues(
const std::vector<std::string> &raw, const ConfigValues &values)
{
std::vector<std::vector<std::string>> cooked{{}};
for (const auto &part : raw) {
@@ -27,15 +29,19 @@ namespace daggy {
for (const auto &[paramRaw, paramValue] : values) {
std::string param = "{{" + paramRaw + "}}";
auto pos = part.find(param);
if (pos == std::string::npos) continue;
if (pos == std::string::npos)
continue;
std::vector<std::string> newExpandedPart;
if (std::holds_alternative<std::string>(paramValue)) {
for (auto &cmd : expandedPart) {
newExpandedPart.push_back(globalSub(cmd, param, std::get<std::string>(paramValue)));
newExpandedPart.push_back(
globalSub(cmd, param, std::get<std::string>(paramValue)));
}
} else {
for (const auto &val: std::get<std::vector<std::string>>(paramValue)) {
}
else {
for (const auto &val :
std::get<std::vector<std::string>>(paramValue)) {
for (auto cmd : expandedPart) {
newExpandedPart.push_back(globalSub(cmd, param, val));
}
@@ -57,15 +63,16 @@ namespace daggy {
return cooked;
}
TaskSet
expandTaskSet(const TaskSet &tasks,
TaskSet expandTaskSet(const TaskSet &tasks,
executors::task::TaskExecutor &executor,
const ConfigValues &interpolatedValues) {
const ConfigValues &interpolatedValues)
{
// Expand the tasks first
TaskSet newTaskSet;
for (const auto &[baseName, task] : tasks) {
executor.validateTaskParameters(task.job);
const auto newJobs = executor.expandTaskParameters(task.job, interpolatedValues);
const auto newJobs =
executor.expandTaskParameters(task.job, interpolatedValues);
size_t i = 0;
for (const auto &newJob : newJobs) {
Task newTask{task};
@@ -77,8 +84,8 @@ namespace daggy {
return newTaskSet;
}
void updateDAGFromTasks(TaskDAG &dag, const TaskSet &tasks) {
void updateDAGFromTasks(TaskDAG &dag, const TaskSet &tasks)
{
// Add the missing vertices
for (const auto &[name, task] : tasks) {
dag.addVertex(name, task);
@@ -86,7 +93,9 @@ namespace daggy {
// Add edges
for (const auto &[name, task] : tasks) {
dag.addEdgeIf(name, [&](const auto &v) { return task.children.count(v.data.definedName) > 0; });
dag.addEdgeIf(name, [&](const auto &v) {
return task.children.count(v.data.definedName) > 0;
});
}
if (!dag.isValid()) {
@@ -94,8 +103,10 @@ namespace daggy {
}
}
TaskDAG buildDAGFromTasks(TaskSet &tasks,
const std::vector<loggers::dag_run::TaskUpdateRecord> &updates) {
TaskDAG buildDAGFromTasks(
TaskSet &tasks,
const std::vector<loggers::dag_run::TaskUpdateRecord> &updates)
{
TaskDAG dag;
updateDAGFromTasks(dag, tasks);
@@ -118,12 +129,10 @@ namespace daggy {
return dag;
}
TaskDAG runDAG(DAGRunID runID,
executors::task::TaskExecutor &executor,
loggers::dag_run::DAGRunLogger &logger,
TaskDAG dag,
const ConfigValues parameters
) {
TaskDAG runDAG(DAGRunID runID, executors::task::TaskExecutor &executor,
loggers::dag_run::DAGRunLogger &logger, TaskDAG dag,
const ConfigValues parameters)
{
logger.updateDAGRunState(runID, RunState::RUNNING);
std::unordered_map<std::string, std::future<AttemptRecord>> runningTasks;
@@ -145,9 +154,7 @@ namespace daggy {
// Parse the output and update the DAGs
try {
auto newTasks = expandTaskSet(tasksFromJSON(attempt.outputLog),
executor,
parameters
);
executor, parameters);
updateDAGFromTasks(dag, newTasks);
for (const auto &[ntName, ntTask] : newTasks) {
@@ -156,9 +163,12 @@ namespace daggy {
task.children.insert(ntName);
}
logger.updateTask(runID, taskName, task);
} catch (std::exception &e) {
logger.logTaskAttempt(runID, taskName,
AttemptRecord{.executorLog =
}
catch (std::exception &e) {
logger.logTaskAttempt(
runID, taskName,
AttemptRecord{
.executorLog =
std::string{"Failed to parse JSON output: "} +
e.what()});
logger.updateTaskState(runID, taskName, RunState::ERRORED);
@@ -167,13 +177,15 @@ namespace daggy {
}
dag.completeVisit(taskName);
--running;
} else {
}
else {
// RC isn't 0
if (taskAttemptCounts[taskName] <= task.maxRetries) {
logger.updateTaskState(runID, taskName, RunState::RETRY);
runningTasks[taskName] = executor.execute(taskName, task);
++taskAttemptCounts[taskName];
} else {
}
else {
logger.updateTaskState(runID, taskName, RunState::ERRORED);
++errored;
}
@@ -181,7 +193,8 @@ namespace daggy {
}
}
// Add all remaining tasks in a task queue to avoid dominating the thread pool
// Add all remaining tasks in a task queue to avoid dominating the thread
// pool
auto t = dag.visitNext();
while (t.has_value()) {
// Schedule the task to run
@@ -194,7 +207,8 @@ namespace daggy {
++running;
auto nextTask = dag.visitNext();
if (not nextTask.has_value()) break;
if (not nextTask.has_value())
break;
t.emplace(nextTask.value());
}
if (running > 0 and errored == running) {
@@ -211,9 +225,10 @@ namespace daggy {
return dag;
}
std::ostream &operator<<(std::ostream &os, const TimePoint &tp) {
std::ostream &operator<<(std::ostream &os, const TimePoint &tp)
{
auto t_c = Clock::to_time_t(tp);
os << std::put_time(std::localtime(&t_c), "%Y-%m-%d %H:%M:%S %Z");
return os;
}
}
} // namespace daggy

View File

@@ -1,27 +1,32 @@
#include <daggy/executors/task/ForkingTaskExecutor.hpp>
#include <daggy/Utilities.hpp>
#include <fcntl.h>
#include <poll.h>
#include <unistd.h>
#include <wait.h>
#include <poll.h>
#include <daggy/Utilities.hpp>
#include <daggy/executors/task/ForkingTaskExecutor.hpp>
using namespace daggy::executors::task;
std::string slurp(int fd) {
std::string slurp(int fd)
{
std::string result;
const ssize_t BUFFER_SIZE = 4096;
char buffer[BUFFER_SIZE];
struct pollfd pfd{.fd = fd, .events = POLLIN, .revents = 0};
struct pollfd pfd
{
.fd = fd, .events = POLLIN, .revents = 0
};
poll(&pfd, 1, 1);
while (pfd.revents & POLLIN) {
ssize_t bytes = read(fd, buffer, BUFFER_SIZE);
if (bytes == 0) {
break;
} else {
}
else {
result.append(buffer, bytes);
}
pfd.revents = 0;
@@ -31,14 +36,14 @@ std::string slurp(int fd) {
return result;
}
std::future<daggy::AttemptRecord>
ForkingTaskExecutor::execute(const std::string &taskName, const Task &task) {
std::future<daggy::AttemptRecord> ForkingTaskExecutor::execute(
const std::string &taskName, const Task &task)
{
return tp_.addTask([this, task]() { return this->runTask(task); });
}
daggy::AttemptRecord
ForkingTaskExecutor::runTask(const Task & task) {
daggy::AttemptRecord ForkingTaskExecutor::runTask(const Task &task)
{
AttemptRecord rec;
rec.startTime = Clock::now();
@@ -46,28 +51,30 @@ ForkingTaskExecutor::runTask(const Task & task) {
// Need to convert the strings
std::vector<char *> argv;
const auto command = std::get<Command>(task.job.at("command"));
std::transform(command.begin(),
command.end(),
std::back_inserter(argv),
[](const std::string & s) {
return const_cast<char *>(s.c_str());
});
std::transform(
command.begin(), command.end(), std::back_inserter(argv),
[](const std::string &s) { return const_cast<char *>(s.c_str()); });
argv.push_back(nullptr);
// Create the pipe
int stdoutPipe[2];
int pipeRC = pipe2(stdoutPipe, O_DIRECT);
if (pipeRC != 0) throw std::runtime_error("Unable to create pipe for stdout");
if (pipeRC != 0)
throw std::runtime_error("Unable to create pipe for stdout");
int stderrPipe[2];
pipeRC = pipe2(stderrPipe, O_DIRECT);
if (pipeRC != 0) throw std::runtime_error("Unable to create pipe for stderr");
if (pipeRC != 0)
throw std::runtime_error("Unable to create pipe for stderr");
pid_t child = fork();
if (child < 0) {
throw std::runtime_error("Unable to fork child");
} else if (child == 0) { // child
while ((dup2(stdoutPipe[1], STDOUT_FILENO) == -1) && (errno == EINTR)) {}
while ((dup2(stderrPipe[1], STDERR_FILENO) == -1) && (errno == EINTR)) {}
}
else if (child == 0) { // child
while ((dup2(stdoutPipe[1], STDOUT_FILENO) == -1) && (errno == EINTR)) {
}
while ((dup2(stderrPipe[1], STDERR_FILENO) == -1) && (errno == EINTR)) {
}
close(stdoutPipe[0]);
close(stderrPipe[0]);
execvp(argv[0], argv.data());
@@ -75,8 +82,14 @@ ForkingTaskExecutor::runTask(const Task & task) {
}
std::atomic<bool> running = true;
std::thread stdoutReader([&]() { while (running) rec.outputLog.append(slurp(stdoutPipe[0])); });
std::thread stderrReader([&]() { while (running) rec.errorLog.append(slurp(stderrPipe[0])); });
std::thread stdoutReader([&]() {
while (running)
rec.outputLog.append(slurp(stdoutPipe[0]));
});
std::thread stderrReader([&]() {
while (running)
rec.errorLog.append(slurp(stderrPipe[0]));
});
int rc = 0;
waitpid(child, &rc, 0);
@@ -85,7 +98,8 @@ ForkingTaskExecutor::runTask(const Task & task) {
rec.stopTime = Clock::now();
if (WIFEXITED(rc)) {
rec.rc = WEXITSTATUS(rc);
} else {
}
else {
rec.rc = -1;
}
@@ -98,21 +112,25 @@ ForkingTaskExecutor::runTask(const Task & task) {
return rec;
}
bool ForkingTaskExecutor::validateTaskParameters(const ConfigValues &job) {
bool ForkingTaskExecutor::validateTaskParameters(const ConfigValues &job)
{
auto it = job.find("command");
if (it == job.end())
throw std::runtime_error(R"(job does not have a "command" argument)");
if (!std::holds_alternative<Command>(it->second))
throw std::runtime_error(R"(taskParameter's "command" must be an array of strings)");
throw std::runtime_error(
R"(taskParameter's "command" must be an array of strings)");
return true;
}
std::vector<daggy::ConfigValues>
ForkingTaskExecutor::expandTaskParameters(const ConfigValues &job, const ConfigValues &expansionValues) {
std::vector<daggy::ConfigValues> ForkingTaskExecutor::expandTaskParameters(
const ConfigValues &job, const ConfigValues &expansionValues)
{
std::vector<ConfigValues> newValues;
const auto command = std::get<Command>(job.at("command"));
for (const auto &expandedCommand: interpolateValues(command, expansionValues)) {
for (const auto &expandedCommand :
interpolateValues(command, expansionValues)) {
ConfigValues newCommand{job};
newCommand.at("command") = expandedCommand;
newValues.emplace_back(newCommand);

View File

@@ -1,37 +1,40 @@
#include <daggy/executors/task/NoopTaskExecutor.hpp>
#include <daggy/Utilities.hpp>
#include <daggy/executors/task/NoopTaskExecutor.hpp>
namespace daggy::executors::task {
std::future<daggy::AttemptRecord>
NoopTaskExecutor::execute(const std::string &taskName, const Task &task) {
std::future<daggy::AttemptRecord> NoopTaskExecutor::execute(
const std::string &taskName, const Task &task)
{
std::promise<daggy::AttemptRecord> promise;
auto ts = Clock::now();
promise.set_value(AttemptRecord{
.startTime = ts,
promise.set_value(AttemptRecord{.startTime = ts,
.stopTime = ts,
.rc = 0,
.executorLog = taskName,
.outputLog = taskName,
.errorLog = taskName
});
.errorLog = taskName});
return promise.get_future();
}
bool NoopTaskExecutor::validateTaskParameters(const ConfigValues &job) {
bool NoopTaskExecutor::validateTaskParameters(const ConfigValues &job)
{
auto it = job.find("command");
if (it == job.end())
throw std::runtime_error(R"(job does not have a "command" argument)");
if (!std::holds_alternative<Command>(it->second))
throw std::runtime_error(R"(taskParameter's "command" must be an array of strings)");
throw std::runtime_error(
R"(taskParameter's "command" must be an array of strings)");
return true;
}
std::vector<daggy::ConfigValues>
NoopTaskExecutor::expandTaskParameters(const ConfigValues &job, const ConfigValues &expansionValues) {
std::vector<daggy::ConfigValues> NoopTaskExecutor::expandTaskParameters(
const ConfigValues &job, const ConfigValues &expansionValues)
{
std::vector<ConfigValues> newValues;
const auto command = std::get<Command>(job.at("command"));
for (const auto &expandedCommand: interpolateValues(command, expansionValues)) {
for (const auto &expandedCommand :
interpolateValues(command, expansionValues)) {
ConfigValues newCommand{job};
newCommand.at("command") = expandedCommand;
newValues.emplace_back(newCommand);
@@ -39,4 +42,4 @@ namespace daggy::executors::task {
return newValues;
}
}
} // namespace daggy::executors::task

View File

@@ -1,32 +1,32 @@
#include <iterator>
#include <stdexcept>
#ifdef DAGGY_ENABLE_SLURM
#include <random>
#include <slurm/slurm.h>
#include <stdlib.h>
#include <sys/resource.h>
#include <sys/stat.h>
#include <sys/time.h>
#include <sys/types.h>
#include <daggy/Utilities.hpp>
#include <daggy/executors/task/SlurmTaskExecutor.hpp>
#include <filesystem>
#include <fstream>
#include <stdlib.h>
#include <sys/time.h>
#include <sys/resource.h>
#include <sys/types.h>
#include <sys/stat.h>
#include <slurm/slurm.h>
#include <daggy/executors/task/SlurmTaskExecutor.hpp>
#include <daggy/Utilities.hpp>
#include <random>
namespace fs = std::filesystem;
namespace daggy::executors::task {
std::string getUniqueTag(size_t nChars = 6) {
std::string getUniqueTag(size_t nChars = 6)
{
std::string result(nChars, '\0');
static std::random_device dev;
static std::mt19937 rng(dev());
std::uniform_int_distribution<int> dist(0, 61);
const char *v = "0123456789abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ";
const char *v =
"0123456789abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ";
for (size_t i = 0; i < nChars; i++) {
result[i] = v[dist(rng)];
@@ -34,8 +34,10 @@ namespace daggy::executors::task {
return result;
}
void readAndClean(const fs::path & fn, std::string & dest) {
if (! fs::exists(fn)) return;
void readAndClean(const fs::path &fn, std::string &dest)
{
if (!fs::exists(fn))
return;
std::ifstream ifh;
ifh.open(fn);
@@ -50,7 +52,8 @@ namespace daggy::executors::task {
: running_(true)
, monitorWorker_(&SlurmTaskExecutor::monitor, this)
{
std::string priority = "SLURM_PRIO_PROCESS=" + std::to_string(getpriority(PRIO_PROCESS, 0));
std::string priority =
"SLURM_PRIO_PROCESS=" + std::to_string(getpriority(PRIO_PROCESS, 0));
std::string submitDir = "SLURM_SUBMIT_DIR=" + fs::current_path().string();
const size_t MAX_HOSTNAME_LENGTH = 50;
@@ -63,10 +66,8 @@ namespace daggy::executors::task {
umask(mask); // Restore the old mask
std::stringstream ss;
ss << "SLURM_UMASK=0"
<< uint32_t{((mask >> 6) & 07)}
<< uint32_t{((mask >> 3) & 07)}
<< uint32_t{(mask & 07)};
ss << "SLURM_UMASK=0" << uint32_t{((mask >> 6) & 07)}
<< uint32_t{((mask >> 3) & 07)} << uint32_t{(mask & 07)};
// Set some environment variables
putenv(const_cast<char *>(priority.c_str()));
@@ -75,24 +76,20 @@ namespace daggy::executors::task {
putenv(const_cast<char *>(ss.str().c_str()));
}
SlurmTaskExecutor::~SlurmTaskExecutor() {
SlurmTaskExecutor::~SlurmTaskExecutor()
{
running_ = false;
monitorWorker_.join();
}
// Validates the job to ensure that all required values are set and are of the right type,
bool SlurmTaskExecutor::validateTaskParameters(const ConfigValues &job) {
// Validates the job to ensure that all required values are set and are of the
// right type,
bool SlurmTaskExecutor::validateTaskParameters(const ConfigValues &job)
{
const std::unordered_set<std::string> requiredFields{
"minCPUs",
"minMemoryMB",
"minTmpDiskMB",
"priority",
"timeLimitSeconds",
"userID",
"workDir",
"tmpDir",
"command"
};
"minCPUs", "minMemoryMB", "minTmpDiskMB",
"priority", "timeLimitSeconds", "userID",
"workDir", "tmpDir", "command"};
for (const auto &requiredField : requiredFields) {
if (job.count(requiredField) == 0) {
@@ -102,12 +99,14 @@ namespace daggy::executors::task {
return true;
}
std::vector<ConfigValues>
SlurmTaskExecutor::expandTaskParameters(const ConfigValues &job, const ConfigValues &expansionValues) {
std::vector<ConfigValues> SlurmTaskExecutor::expandTaskParameters(
const ConfigValues &job, const ConfigValues &expansionValues)
{
std::vector<ConfigValues> newValues;
const auto command = std::get<Command>(job.at("command"));
for (const auto &expandedCommand: interpolateValues(command, expansionValues)) {
for (const auto &expandedCommand :
interpolateValues(command, expansionValues)) {
ConfigValues newCommand{job};
newCommand.at("command") = expandedCommand;
newValues.emplace_back(newCommand);
@@ -116,8 +115,9 @@ namespace daggy::executors::task {
return newValues;
}
std::future<AttemptRecord>
SlurmTaskExecutor::execute(const std::string &taskName, const Task &task) {
std::future<AttemptRecord> SlurmTaskExecutor::execute(
const std::string &taskName, const Task &task)
{
std::stringstream executorLog;
const auto &job = task.job;
@@ -130,13 +130,11 @@ namespace daggy::executors::task {
// Convert command to argc / argv
std::vector<char *> argv{nullptr};
const auto command = std::get<std::vector<std::string>>(task.job.at("command"));
std::transform(command.begin(),
command.end(),
std::back_inserter(argv),
[](const std::string & s) {
return const_cast<char *>(s.c_str());
});
const auto command =
std::get<std::vector<std::string>>(task.job.at("command"));
std::transform(
command.begin(), command.end(), std::back_inserter(argv),
[](const std::string &s) { return const_cast<char *>(s.c_str()); });
char empty[] = "";
char *env[1];
@@ -156,10 +154,12 @@ namespace daggy::executors::task {
jd.min_cpus = std::stoi(std::get<std::string>(job.at("minCPUs")));
jd.pn_min_memory = std::stoi(std::get<std::string>(job.at("minMemoryMB")));
jd.pn_min_tmp_disk = std::stoi(std::get<std::string>(job.at("minTmpDiskMB")));
jd.pn_min_tmp_disk =
std::stoi(std::get<std::string>(job.at("minTmpDiskMB")));
jd.priority = std::stoi(std::get<std::string>(job.at("priority")));
jd.shared = 0;
jd.time_limit = std::stoi(std::get<std::string>(job.at("timeLimitSeconds")));
jd.time_limit =
std::stoi(std::get<std::string>(job.at("timeLimitSeconds")));
jd.min_nodes = 1;
jd.user_id = std::stoi(std::get<std::string>(job.at("userID")));
jd.argv = argv.data();
@@ -183,8 +183,7 @@ namespace daggy::executors::task {
error_code = slurm_submit_batch_job(&jd, &resp_msg);
if (error_code) {
std::stringstream ss;
ss << "Unable to submit slurm job: "
<< slurm_strerror(error_code);
ss << "Unable to submit slurm job: " << slurm_strerror(error_code);
throw std::runtime_error(ss.str());
}
@@ -193,29 +192,29 @@ namespace daggy::executors::task {
slurm_free_submit_response_response_msg(resp_msg);
std::lock_guard<std::mutex> lock(promiseGuard_);
Job newJob{
.prom{},
.stdoutFile = stdoutFile,
.stderrFile = stderrFile
};
Job newJob{.prom{}, .stdoutFile = stdoutFile, .stderrFile = stderrFile};
auto fut = newJob.prom.get_future();
runningJobs_.emplace(jobID, std::move(newJob));
return fut;
}
void SlurmTaskExecutor::monitor() {
void SlurmTaskExecutor::monitor()
{
std::unordered_set<size_t> resolvedJobs;
while (running_) {
{
std::lock_guard<std::mutex> lock(promiseGuard_);
for (auto &[jobID, job] : runningJobs_) {
job_info_msg_t *jobStatus;
int error_code = slurm_load_job(&jobStatus, jobID, SHOW_ALL | SHOW_DETAIL);
if (error_code != SLURM_SUCCESS) continue;
int error_code =
slurm_load_job(&jobStatus, jobID, SHOW_ALL | SHOW_DETAIL);
if (error_code != SLURM_SUCCESS)
continue;
uint32_t idx = jobStatus->record_count;
if (idx == 0) continue;
if (idx == 0)
continue;
idx--;
const slurm_job_info_t &jobInfo = jobStatus->job_array[idx];
AttemptRecord record;
@@ -243,7 +242,8 @@ namespace daggy::executors::task {
record.executorLog = "Job terminated due to pre-emption.\n";
break;
case JOB_BOOT_FAIL: /* terminated due to node boot failure */
record.executorLog = "Job failed to run due to failure of compute node to boot.\n";
record.executorLog =
"Job failed to run due to failure of compute node to boot.\n";
break;
case JOB_DEADLINE: /* terminated on deadline */
record.executorLog = "Job terminated due to deadline.\n";
@@ -270,5 +270,5 @@ namespace daggy::executors::task {
std::this_thread::sleep_for(std::chrono::milliseconds(250));
}
}
}
} // namespace daggy::executors::task
#endif

View File

@@ -1,45 +1,59 @@
#include <fstream>
#include <iomanip>
#include <enum.h>
#include <daggy/loggers/dag_run/FileSystemLogger.hpp>
#include <daggy/Serialization.hpp>
#include <daggy/Utilities.hpp>
#include <daggy/loggers/dag_run/FileSystemLogger.hpp>
#include <fstream>
#include <iomanip>
namespace fs = std::filesystem;
using namespace daggy::loggers::dag_run;
namespace daggy {
inline const fs::path FileSystemLogger::getCurrentPath() const { return root_ / "current"; }
inline const fs::path FileSystemLogger::getCurrentPath() const
{
return root_ / "current";
}
inline const fs::path FileSystemLogger::getRunsRoot() const { return root_ / "runs"; }
inline const fs::path FileSystemLogger::getRunsRoot() const
{
return root_ / "runs";
}
inline const fs::path FileSystemLogger::getRunRoot(DAGRunID runID) const {
inline const fs::path FileSystemLogger::getRunRoot(DAGRunID runID) const
{
return getRunsRoot() / std::to_string(runID);
}
FileSystemLogger::FileSystemLogger(fs::path root)
: root_(root), nextRunID_(0) {
const std::vector<fs::path> reqPaths{root_, getCurrentPath(), getRunsRoot()};
: root_(root)
, nextRunID_(0)
{
const std::vector<fs::path> reqPaths{root_, getCurrentPath(),
getRunsRoot()};
for (const auto &path : reqPaths) {
if (!fs::exists(path)) { fs::create_directories(path); }
if (!fs::exists(path)) {
fs::create_directories(path);
}
}
// Get the next run ID
for (auto &dir : fs::directory_iterator(getRunsRoot())) {
try {
size_t runID = std::stoull(dir.path().stem());
if (runID > nextRunID_) nextRunID_ = runID + 1;
} catch (std::exception &e) {
if (runID > nextRunID_)
nextRunID_ = runID + 1;
}
catch (std::exception &e) {
continue;
}
}
}
// Execution
DAGRunID FileSystemLogger::startDAGRun(std::string name, const TaskSet &tasks) {
DAGRunID FileSystemLogger::startDAGRun(std::string name, const TaskSet &tasks)
{
DAGRunID runID = nextRunID_++;
// TODO make this threadsafe
@@ -51,8 +65,10 @@ namespace daggy {
fs::create_directories(runRoot);
// Create meta.json with DAGRun Name and task definitions
std::ofstream ofh(runRoot / "metadata.json", std::ios::trunc | std::ios::binary);
ofh << R"({ "name": )" << std::quoted(name) << R"(, "tasks": )" << tasksToJSON(tasks) << "}\n";
std::ofstream ofh(runRoot / "metadata.json",
std::ios::trunc | std::ios::binary);
ofh << R"({ "name": )" << std::quoted(name) << R"(, "tasks": )"
<< tasksToJSON(tasks) << "}\n";
ofh.close();
// Task directories
@@ -65,19 +81,25 @@ namespace daggy {
return runID;
}
void FileSystemLogger::updateDAGRunState(DAGRunID dagRunID, RunState state) {
std::ofstream ofh(getRunRoot(dagRunID) / "states.csv", std::ios::binary | std::ios::app);
ofh << std::quoted(timePointToString(Clock::now())) << ',' << state._to_string() << '\n';
void FileSystemLogger::updateDAGRunState(DAGRunID dagRunID, RunState state)
{
std::ofstream ofh(getRunRoot(dagRunID) / "states.csv",
std::ios::binary | std::ios::app);
ofh << std::quoted(timePointToString(Clock::now())) << ','
<< state._to_string() << '\n';
ofh.flush();
ofh.close();
}
void
FileSystemLogger::logTaskAttempt(DAGRunID dagRunID, const std::string &taskName,
const AttemptRecord &attempt) {
void FileSystemLogger::logTaskAttempt(DAGRunID dagRunID,
const std::string &taskName,
const AttemptRecord &attempt)
{
auto taskRoot = getRunRoot(dagRunID) / taskName;
size_t i = 1;
while (fs::exists(taskRoot / std::to_string(i))) { ++i; }
while (fs::exists(taskRoot / std::to_string(i))) {
++i;
}
auto attemptDir = taskRoot / std::to_string(i);
fs::create_directories(attemptDir);
@@ -87,8 +109,10 @@ namespace daggy {
// Metadata
ofh.open(attemptDir / "metadata.json");
ofh << "{\n"
<< R"("startTime": )" << std::quoted(timePointToString(attempt.startTime)) << ",\n"
<< R"("stopTime": )" << std::quoted(timePointToString(attempt.stopTime)) << ",\n"
<< R"("startTime": )"
<< std::quoted(timePointToString(attempt.startTime)) << ",\n"
<< R"("stopTime": )" << std::quoted(timePointToString(attempt.stopTime))
<< ",\n"
<< R"("rc": )" << attempt.rc << '\n'
<< '}';
@@ -108,19 +132,26 @@ namespace daggy {
ofh.close();
}
void FileSystemLogger::updateTaskState(DAGRunID dagRunID, const std::string &taskName, RunState state) {
std::ofstream ofh(getRunRoot(dagRunID) / taskName / "states.csv", std::ios::binary | std::ios::app);
ofh << std::quoted(timePointToString(Clock::now())) << ',' << state._to_string() << '\n';
void FileSystemLogger::updateTaskState(DAGRunID dagRunID,
const std::string &taskName,
RunState state)
{
std::ofstream ofh(getRunRoot(dagRunID) / taskName / "states.csv",
std::ios::binary | std::ios::app);
ofh << std::quoted(timePointToString(Clock::now())) << ','
<< state._to_string() << '\n';
ofh.flush();
ofh.close();
}
// Querying
std::vector<DAGRunSummary> FileSystemLogger::getDAGs(uint32_t stateMask) {
std::vector<DAGRunSummary> FileSystemLogger::getDAGs(uint32_t stateMask)
{
return {};
}
DAGRunRecord FileSystemLogger::getDAGRun(DAGRunID dagRunID) {
DAGRunRecord FileSystemLogger::getDAGRun(DAGRunID dagRunID)
{
DAGRunRecord record;
auto runRoot = getRunRoot(dagRunID);
if (!fs::exists(runRoot)) {
@@ -150,10 +181,9 @@ namespace daggy {
std::getline(ss, time, ',');
std::getline(ss, state);
record.dagStateChanges.emplace_back(DAGUpdateRecord{
.time = stringToTimePoint(time),
.newState = RunState::_from_string(state.c_str())
});
record.dagStateChanges.emplace_back(
DAGUpdateRecord{.time = stringToTimePoint(time),
.newState = RunState::_from_string(state.c_str())});
}
ifh.close();
@@ -166,13 +196,17 @@ namespace daggy {
}
ifh.open(taskStateFile);
while (std::getline(ifh, line)) { continue; }
while (std::getline(ifh, line)) {
continue;
}
std::stringstream ss{line};
while (std::getline(ss, token, ',')) { continue; }
while (std::getline(ss, token, ',')) {
continue;
}
RunState taskState = RunState::_from_string(token.c_str());
record.taskRunStates.emplace(taskName, taskState);
ifh.close();
}
return record;
}
}
} // namespace daggy

View File

@@ -1,31 +1,29 @@
#include <iterator>
#include <algorithm>
#include <enum.h>
#include <daggy/loggers/dag_run/OStreamLogger.hpp>
#include <algorithm>
#include <daggy/Serialization.hpp>
#include <daggy/loggers/dag_run/OStreamLogger.hpp>
#include <iterator>
namespace daggy {
namespace loggers {
namespace dag_run {
OStreamLogger::OStreamLogger(std::ostream &os) : os_(os) {}
namespace daggy { namespace loggers { namespace dag_run {
OStreamLogger::OStreamLogger(std::ostream &os)
: os_(os)
{
}
// Execution
DAGRunID OStreamLogger::startDAGRun(std::string name, const TaskSet &tasks) {
DAGRunID OStreamLogger::startDAGRun(std::string name, const TaskSet &tasks)
{
std::lock_guard<std::mutex> lock(guard_);
size_t runID = dagRuns_.size();
dagRuns_.push_back({
.name = name,
.tasks = tasks
});
dagRuns_.push_back({.name = name, .tasks = tasks});
for (const auto &[name, _] : tasks) {
_updateTaskState(runID, name, RunState::QUEUED);
}
_updateDAGRunState(runID, RunState::QUEUED);
os_ << "Starting new DAGRun named " << name << " with ID " << runID << " and " << tasks.size()
<< " tasks" << std::endl;
os_ << "Starting new DAGRun named " << name << " with ID " << runID
<< " and " << tasks.size() << " tasks" << std::endl;
for (const auto &[name, task] : tasks) {
os_ << "TASK (" << name << "): " << configToJSON(task.job);
os_ << std::endl;
@@ -33,61 +31,78 @@ namespace daggy {
return runID;
}
void OStreamLogger::addTask(DAGRunID dagRunID, const std::string taskName, const Task &task) {
void OStreamLogger::addTask(DAGRunID dagRunID, const std::string taskName,
const Task &task)
{
std::lock_guard<std::mutex> lock(guard_);
auto &dagRun = dagRuns_[dagRunID];
dagRun.tasks[taskName] = task;
_updateTaskState(dagRunID, taskName, RunState::QUEUED);
}
void OStreamLogger::updateTask(DAGRunID dagRunID, const std::string taskName, const Task &task) {
void OStreamLogger::updateTask(DAGRunID dagRunID, const std::string taskName,
const Task &task)
{
std::lock_guard<std::mutex> lock(guard_);
auto &dagRun = dagRuns_[dagRunID];
dagRun.tasks[taskName] = task;
}
void OStreamLogger::updateDAGRunState(DAGRunID dagRunID, RunState state) {
void OStreamLogger::updateDAGRunState(DAGRunID dagRunID, RunState state)
{
std::lock_guard<std::mutex> lock(guard_);
_updateDAGRunState(dagRunID, state);
}
void OStreamLogger::_updateDAGRunState(DAGRunID dagRunID, RunState state) {
os_ << "DAG State Change(" << dagRunID << "): " << state._to_string() << std::endl;
void OStreamLogger::_updateDAGRunState(DAGRunID dagRunID, RunState state)
{
os_ << "DAG State Change(" << dagRunID << "): " << state._to_string()
<< std::endl;
dagRuns_[dagRunID].dagStateChanges.push_back({Clock::now(), state});
}
void OStreamLogger::logTaskAttempt(DAGRunID dagRunID, const std::string &taskName,
const AttemptRecord &attempt) {
void OStreamLogger::logTaskAttempt(DAGRunID dagRunID,
const std::string &taskName,
const AttemptRecord &attempt)
{
std::lock_guard<std::mutex> lock(guard_);
const std::string &msg = attempt.rc == 0 ? attempt.outputLog : attempt.errorLog;
os_ << "Task Attempt (" << dagRunID << '/' << taskName << "): Ran with RC " << attempt.rc << ": "
<< msg << std::endl;
const std::string &msg =
attempt.rc == 0 ? attempt.outputLog : attempt.errorLog;
os_ << "Task Attempt (" << dagRunID << '/' << taskName << "): Ran with RC "
<< attempt.rc << ": " << msg << std::endl;
dagRuns_[dagRunID].taskAttempts[taskName].push_back(attempt);
}
void OStreamLogger::updateTaskState(DAGRunID dagRunID, const std::string &taskName, RunState state) {
void OStreamLogger::updateTaskState(DAGRunID dagRunID,
const std::string &taskName,
RunState state)
{
std::lock_guard<std::mutex> lock(guard_);
_updateTaskState(dagRunID, taskName, state);
}
void OStreamLogger::_updateTaskState(DAGRunID dagRunID, const std::string &taskName, RunState state) {
void OStreamLogger::_updateTaskState(DAGRunID dagRunID,
const std::string &taskName,
RunState state)
{
auto &dagRun = dagRuns_.at(dagRunID);
dagRun.taskStateChanges.push_back({Clock::now(), taskName, state});
auto it = dagRun.taskRunStates.find(taskName);
if (it == dagRun.taskRunStates.end()) {
dagRun.taskRunStates.emplace(taskName, state);
} else {
}
else {
it->second = state;
}
os_ << "Task State Change (" << dagRunID << '/' << taskName << "): "
<< state._to_string()
<< std::endl;
os_ << "Task State Change (" << dagRunID << '/' << taskName
<< "): " << state._to_string() << std::endl;
}
// Querying
std::vector<DAGRunSummary> OStreamLogger::getDAGs(uint32_t stateMask) {
std::vector<DAGRunSummary> OStreamLogger::getDAGs(uint32_t stateMask)
{
std::vector<DAGRunSummary> summaries;
std::lock_guard<std::mutex> lock(guard_);
size_t i = 0;
@@ -98,8 +113,7 @@ namespace daggy {
.runState = run.dagStateChanges.back().newState,
.startTime = run.dagStateChanges.front().time,
.lastUpdate = std::max<TimePoint>(run.taskStateChanges.back().time,
run.dagStateChanges.back().time)
};
run.dagStateChanges.back().time)};
for (const auto &[_, taskState] : run.taskRunStates) {
summary.taskStateCounts[taskState]++;
@@ -110,13 +124,12 @@ namespace daggy {
return summaries;
}
DAGRunRecord OStreamLogger::getDAGRun(DAGRunID dagRunID) {
DAGRunRecord OStreamLogger::getDAGRun(DAGRunID dagRunID)
{
if (dagRunID >= dagRuns_.size()) {
throw std::runtime_error("No such DAGRun ID");
}
std::lock_guard<std::mutex> lock(guard_);
return dagRuns_[dagRunID];
}
}
}
}
}}} // namespace daggy::loggers::dag_run

View File

@@ -1,9 +1,9 @@
#include <catch2/catch.hpp>
#include <iostream>
#include "daggy/DAG.hpp"
#include <catch2/catch.hpp>
TEST_CASE("General tests", "[general]") {
TEST_CASE("General tests", "[general]")
{
REQUIRE(1 == 1);
}

View File

@@ -6,7 +6,8 @@
#include <catch2/catch.hpp>
TEST_CASE("Sanity tests", "[sanity]") {
TEST_CASE("Sanity tests", "[sanity]")
{
REQUIRE(1 == 1);
}

View File

@@ -1,10 +1,10 @@
#include <catch2/catch.hpp>
#include <iostream>
#include "daggy/DAG.hpp"
#include <catch2/catch.hpp>
TEST_CASE("dag_construction", "[dag]") {
TEST_CASE("dag_construction", "[dag]")
{
daggy::DAG<size_t, size_t> dag;
REQUIRE(dag.size() == 0);
@@ -26,18 +26,22 @@ TEST_CASE("dag_construction", "[dag]") {
REQUIRE(!dag.isValid());
// Bounds checking
SECTION("addEdge Bounds Checking") {
SECTION("addEdge Bounds Checking")
{
REQUIRE_THROWS(dag.addEdge(20, 0));
REQUIRE_THROWS(dag.addEdge(0, 20));
}
}
TEST_CASE("dag_traversal", "[dag]") {
TEST_CASE("dag_traversal", "[dag]")
{
daggy::DAG<size_t, size_t> dag;
const int N_VERTICES = 10;
for (int i = 0; i < N_VERTICES; ++i) { dag.addVertex(i, i); }
for (int i = 0; i < N_VERTICES; ++i) {
dag.addVertex(i, i);
}
/*
0 ---------------------\
@@ -46,23 +50,15 @@ TEST_CASE("dag_traversal", "[dag]") {
4 -------------------------------/ \-----> 9
*/
std::vector<std::pair<int, int>> edges{
{0, 6},
{1, 5},
{5, 6},
{6, 7},
{2, 3},
{3, 5},
{4, 7},
{7, 8},
{7, 9}
};
std::vector<std::pair<int, int>> edges{{0, 6}, {1, 5}, {5, 6}, {6, 7}, {2, 3},
{3, 5}, {4, 7}, {7, 8}, {7, 9}};
for (auto const [from, to] : edges) {
dag.addEdge(from, to);
}
SECTION("Basic Traversal") {
SECTION("Basic Traversal")
{
dag.reset();
std::vector<int> visitOrder(N_VERTICES);
size_t i = 0;
@@ -79,7 +75,8 @@ TEST_CASE("dag_traversal", "[dag]") {
}
}
SECTION("Iteration") {
SECTION("Iteration")
{
size_t nVisited = 0;
dag.forEach([&](auto &k) {
(void)k;

View File

@@ -1,8 +1,7 @@
#include <iostream>
#include <catch2/catch.hpp>
#include <filesystem>
#include <fstream>
#include <catch2/catch.hpp>
#include <iostream>
#include "daggy/loggers/dag_run/FileSystemLogger.hpp"
#include "daggy/loggers/dag_run/OStreamLogger.hpp"
@@ -13,19 +12,26 @@ using namespace daggy;
using namespace daggy::loggers::dag_run;
const TaskSet SAMPLE_TASKS{
{"work_a", Task{.job{{"command", std::vector<std::string>{"/bin/echo", "a"}}}, .children{"c"}}},
{"work_b", Task{.job{{"command", std::vector<std::string>{"/bin/echo", "b"}}}, .children{"c"}}},
{"work_c", Task{.job{{"command", std::vector<std::string>{"/bin/echo", "c"}}}}}
};
{"work_a",
Task{.job{{"command", std::vector<std::string>{"/bin/echo", "a"}}},
.children{"c"}}},
{"work_b",
Task{.job{{"command", std::vector<std::string>{"/bin/echo", "b"}}},
.children{"c"}}},
{"work_c",
Task{.job{{"command", std::vector<std::string>{"/bin/echo", "c"}}}}}};
inline DAGRunID testDAGRunInit(DAGRunLogger &logger, const std::string &name, const TaskSet &tasks) {
inline DAGRunID testDAGRunInit(DAGRunLogger &logger, const std::string &name,
const TaskSet &tasks)
{
auto runID = logger.startDAGRun(name, tasks);
auto dagRun = logger.getDAGRun(runID);
REQUIRE(dagRun.tasks == tasks);
REQUIRE(dagRun.taskRunStates.size() == tasks.size());
auto nonQueuedTask = std::find_if(dagRun.taskRunStates.begin(), dagRun.taskRunStates.end(),
auto nonQueuedTask =
std::find_if(dagRun.taskRunStates.begin(), dagRun.taskRunStates.end(),
[](const auto &a) { return a.second != +RunState::QUEUED; });
REQUIRE(nonQueuedTask == dagRun.taskRunStates.end());
@@ -54,12 +60,14 @@ TEST_CASE("Filesystem Logger", "[filesystem_logger]") {
}
*/
TEST_CASE("ostream_logger", "[ostream_logger]") {
TEST_CASE("ostream_logger", "[ostream_logger]")
{
// cleanup();
std::stringstream ss;
daggy::loggers::dag_run::OStreamLogger logger(ss);
SECTION("DAGRun Starts") {
SECTION("DAGRun Starts")
{
testDAGRunInit(logger, "init_test", SAMPLE_TASKS);
}

View File

@@ -1,18 +1,20 @@
#include <iostream>
#include <catch2/catch.hpp>
#include <filesystem>
#include <iostream>
#include "daggy/executors/task/ForkingTaskExecutor.hpp"
#include "daggy/Serialization.hpp"
#include "daggy/Utilities.hpp"
#include "daggy/executors/task/ForkingTaskExecutor.hpp"
#include <catch2/catch.hpp>
TEST_CASE("forking_executor", "[forking_executor]") {
TEST_CASE("forking_executor", "[forking_executor]")
{
daggy::executors::task::ForkingTaskExecutor ex(10);
SECTION("Simple Run") {
daggy::Task task{.job{
{"command", daggy::executors::task::ForkingTaskExecutor::Command{"/usr/bin/echo", "abc", "123"}}}};
SECTION("Simple Run")
{
daggy::Task task{
.job{{"command", daggy::executors::task::ForkingTaskExecutor::Command{
"/usr/bin/echo", "abc", "123"}}}};
REQUIRE(ex.validateTaskParameters(task.job));
@@ -24,9 +26,11 @@ TEST_CASE("forking_executor", "[forking_executor]") {
REQUIRE(rec.errorLog.empty());
}
SECTION("Error Run") {
daggy::Task task{.job{
{"command", daggy::executors::task::ForkingTaskExecutor::Command{"/usr/bin/expr", "1", "+", "+"}}}};
SECTION("Error Run")
{
daggy::Task task{
.job{{"command", daggy::executors::task::ForkingTaskExecutor::Command{
"/usr/bin/expr", "1", "+", "+"}}}};
auto recFuture = ex.execute("command", task);
auto rec = recFuture.get();
@@ -36,16 +40,19 @@ TEST_CASE("forking_executor", "[forking_executor]") {
REQUIRE(rec.outputLog.empty());
}
SECTION("Large Output") {
const std::vector<std::string> BIG_FILES{
"/usr/share/dict/linux.words", "/usr/share/dict/cracklib-small", "/etc/ssh/moduli"
};
SECTION("Large Output")
{
const std::vector<std::string> BIG_FILES{"/usr/share/dict/linux.words",
"/usr/share/dict/cracklib-small",
"/etc/ssh/moduli"};
for (const auto &bigFile : BIG_FILES) {
if (!std::filesystem::exists(bigFile)) continue;
if (!std::filesystem::exists(bigFile))
continue;
daggy::Task task{.job{
{"command", daggy::executors::task::ForkingTaskExecutor::Command{"/usr/bin/cat", bigFile}}}};
daggy::Task task{
.job{{"command", daggy::executors::task::ForkingTaskExecutor::Command{
"/usr/bin/cat", bigFile}}}};
auto recFuture = ex.execute("command", task);
auto rec = recFuture.get();
@@ -56,30 +63,40 @@ TEST_CASE("forking_executor", "[forking_executor]") {
}
}
SECTION("Parameter Expansion") {
SECTION("Parameter Expansion")
{
std::string testParams{R"({"DATE": ["2021-05-06", "2021-05-07" ]})"};
auto params = daggy::configFromJSON(testParams);
std::string taskJSON = R"({"B": {"job": {"command": ["/usr/bin/echo", "{{DATE}}"]}, "children": ["C"]}})";
std::string taskJSON =
R"({"B": {"job": {"command": ["/usr/bin/echo", "{{DATE}}"]}, "children": ["C"]}})";
auto tasks = daggy::tasksFromJSON(taskJSON);
auto result = daggy::expandTaskSet(tasks, ex, params);
REQUIRE(result.size() == 2);
}
SECTION("Build with expansion") {
std::string testParams{R"({"DATE": ["2021-05-06", "2021-05-07" ], "SOURCE": "name"})"};
SECTION("Build with expansion")
{
std::string testParams{
R"({"DATE": ["2021-05-06", "2021-05-07" ], "SOURCE": "name"})"};
auto params = daggy::configFromJSON(testParams);
std::string testTasks = R"({"A": {"job": {"command": ["/bin/echo", "A"]}, "children": ["B"]}, "B": {"job": {"command": ["/bin/echo", "B", "{{SOURCE}}", "{{DATE}}"]}, "children": ["C"]}, "C": {"job": {"command": ["/bin/echo", "C"]}}})";
auto tasks = daggy::expandTaskSet(daggy::tasksFromJSON(testTasks), ex, params);
std::string testTasks =
R"({"A": {"job": {"command": ["/bin/echo", "A"]}, "children": ["B"]}, "B": {"job": {"command": ["/bin/echo", "B", "{{SOURCE}}", "{{DATE}}"]}, "children": ["C"]}, "C": {"job": {"command": ["/bin/echo", "C"]}}})";
auto tasks =
daggy::expandTaskSet(daggy::tasksFromJSON(testTasks), ex, params);
REQUIRE(tasks.size() == 4);
}
SECTION("Build with expansion using parents instead of children") {
std::string testParams{R"({"DATE": ["2021-05-06", "2021-05-07" ], "SOURCE": "name"})"};
SECTION("Build with expansion using parents instead of children")
{
std::string testParams{
R"({"DATE": ["2021-05-06", "2021-05-07" ], "SOURCE": "name"})"};
auto params = daggy::configFromJSON(testParams);
std::string testTasks = R"({"A": {"job": {"command": ["/bin/echo", "A"]}}, "B": {"job": {"command": ["/bin/echo", "B", "{{SOURCE}}", "{{DATE}}"]}, "parents": ["A"]}, "C": {"job": {"command": ["/bin/echo", "C"]}, "parents": ["A"]}})";
auto tasks = daggy::expandTaskSet(daggy::tasksFromJSON(testTasks), ex, params);
std::string testTasks =
R"({"A": {"job": {"command": ["/bin/echo", "A"]}}, "B": {"job": {"command": ["/bin/echo", "B", "{{SOURCE}}", "{{DATE}}"]}, "parents": ["A"]}, "C": {"job": {"command": ["/bin/echo", "C"]}, "parents": ["A"]}})";
auto tasks =
daggy::expandTaskSet(daggy::tasksFromJSON(testTasks), ex, params);
REQUIRE(tasks.size() == 4);
}

View File

@@ -1,37 +1,35 @@
#include <iostream>
#include <filesystem>
#include <unistd.h>
#include <sys/types.h>
#include "daggy/executors/task/SlurmTaskExecutor.hpp"
#include "daggy/Serialization.hpp"
#include "daggy/Utilities.hpp"
#include <unistd.h>
#include <catch2/catch.hpp>
#include <filesystem>
#include <iostream>
#include "daggy/Serialization.hpp"
#include "daggy/Utilities.hpp"
#include "daggy/executors/task/SlurmTaskExecutor.hpp"
namespace fs = std::filesystem;
#ifdef DAGGY_ENABLE_SLURM
TEST_CASE("slurm_execution", "[slurm_executor]") {
TEST_CASE("slurm_execution", "[slurm_executor]")
{
daggy::executors::task::SlurmTaskExecutor ex;
daggy::ConfigValues defaultJobValues{
{"minCPUs", "1"},
daggy::ConfigValues defaultJobValues{{"minCPUs", "1"},
{"minMemoryMB", "100"},
{"minTmpDiskMB", "0"},
{"priority", "1"},
{"timeLimitSeconds", "200"},
{"userID", std::to_string(getuid())},
{"workDir", fs::current_path().string()},
{"tmpDir", fs::current_path().string()}
};
{"tmpDir", fs::current_path().string()}};
SECTION("Simple Run") {
SECTION("Simple Run")
{
daggy::Task task{.job{
{"command", std::vector<std::string>{"/usr/bin/echo", "abc", "123"}}
}};
{"command", std::vector<std::string>{"/usr/bin/echo", "abc", "123"}}}};
task.job.merge(defaultJobValues);
@@ -45,9 +43,11 @@ TEST_CASE("slurm_execution", "[slurm_executor]") {
REQUIRE(rec.errorLog.empty());
}
SECTION("Error Run") {
daggy::Task task{.job{
{"command", daggy::executors::task::SlurmTaskExecutor::Command{"/usr/bin/expr", "1", "+", "+"}}}};
SECTION("Error Run")
{
daggy::Task task{
.job{{"command", daggy::executors::task::SlurmTaskExecutor::Command{
"/usr/bin/expr", "1", "+", "+"}}}};
task.job.merge(defaultJobValues);
auto recFuture = ex.execute("command", task);
@@ -58,16 +58,19 @@ TEST_CASE("slurm_execution", "[slurm_executor]") {
REQUIRE(rec.outputLog.empty());
}
SECTION("Large Output") {
const std::vector<std::string> BIG_FILES{
"/usr/share/dict/linux.words", "/usr/share/dict/cracklib-small", "/etc/ssh/moduli"
};
SECTION("Large Output")
{
const std::vector<std::string> BIG_FILES{"/usr/share/dict/linux.words",
"/usr/share/dict/cracklib-small",
"/etc/ssh/moduli"};
for (const auto &bigFile : BIG_FILES) {
if (!std::filesystem::exists(bigFile)) continue;
if (!std::filesystem::exists(bigFile))
continue;
daggy::Task task{.job{
{"command", daggy::executors::task::SlurmTaskExecutor::Command{"/usr/bin/cat", bigFile}}}};
daggy::Task task{
.job{{"command", daggy::executors::task::SlurmTaskExecutor::Command{
"/usr/bin/cat", bigFile}}}};
task.job.merge(defaultJobValues);
auto recFuture = ex.execute("command", task);
@@ -80,30 +83,40 @@ TEST_CASE("slurm_execution", "[slurm_executor]") {
}
}
SECTION("Parameter Expansion") {
SECTION("Parameter Expansion")
{
std::string testParams{R"({"DATE": ["2021-05-06", "2021-05-07" ]})"};
auto params = daggy::configFromJSON(testParams);
std::string taskJSON = R"({"B": {"job": {"command": ["/usr/bin/echo", "{{DATE}}"]}, "children": ["C"]}})";
std::string taskJSON =
R"({"B": {"job": {"command": ["/usr/bin/echo", "{{DATE}}"]}, "children": ["C"]}})";
auto tasks = daggy::tasksFromJSON(taskJSON, defaultJobValues);
auto result = daggy::expandTaskSet(tasks, ex, params);
REQUIRE(result.size() == 2);
}
SECTION("Build with expansion") {
std::string testParams{R"({"DATE": ["2021-05-06", "2021-05-07" ], "SOURCE": "name"})"};
SECTION("Build with expansion")
{
std::string testParams{
R"({"DATE": ["2021-05-06", "2021-05-07" ], "SOURCE": "name"})"};
auto params = daggy::configFromJSON(testParams);
std::string testTasks = R"({"A": {"job": {"command": ["/bin/echo", "A"]}, "children": ["B"]}, "B": {"job": {"command": ["/bin/echo", "B", "{{SOURCE}}", "{{DATE}}"]}, "children": ["C"]}, "C": {"job": {"command": ["/bin/echo", "C"]}}})";
auto tasks = daggy::expandTaskSet(daggy::tasksFromJSON(testTasks, defaultJobValues), ex, params);
std::string testTasks =
R"({"A": {"job": {"command": ["/bin/echo", "A"]}, "children": ["B"]}, "B": {"job": {"command": ["/bin/echo", "B", "{{SOURCE}}", "{{DATE}}"]}, "children": ["C"]}, "C": {"job": {"command": ["/bin/echo", "C"]}}})";
auto tasks = daggy::expandTaskSet(
daggy::tasksFromJSON(testTasks, defaultJobValues), ex, params);
REQUIRE(tasks.size() == 4);
}
SECTION("Build with expansion using parents instead of children") {
std::string testParams{R"({"DATE": ["2021-05-06", "2021-05-07" ], "SOURCE": "name"})"};
SECTION("Build with expansion using parents instead of children")
{
std::string testParams{
R"({"DATE": ["2021-05-06", "2021-05-07" ], "SOURCE": "name"})"};
auto params = daggy::configFromJSON(testParams);
std::string testTasks = R"({"A": {"job": {"command": ["/bin/echo", "A"]}}, "B": {"job": {"command": ["/bin/echo", "B", "{{SOURCE}}", "{{DATE}}"]}, "parents": ["A"]}, "C": {"job": {"command": ["/bin/echo", "C"]}, "parents": ["A"]}})";
auto tasks = daggy::expandTaskSet(daggy::tasksFromJSON(testTasks, defaultJobValues), ex, params);
std::string testTasks =
R"({"A": {"job": {"command": ["/bin/echo", "A"]}}, "B": {"job": {"command": ["/bin/echo", "B", "{{SOURCE}}", "{{DATE}}"]}, "parents": ["A"]}, "C": {"job": {"command": ["/bin/echo", "C"]}, "parents": ["A"]}})";
auto tasks = daggy::expandTaskSet(
daggy::tasksFromJSON(testTasks, defaultJobValues), ex, params);
REQUIRE(tasks.size() == 4);
}

View File

@@ -1,34 +1,47 @@
#include <iostream>
#include <catch2/catch.hpp>
#include <filesystem>
#include <fstream>
#include <catch2/catch.hpp>
#include <iostream>
#include "daggy/Serialization.hpp"
namespace fs = std::filesystem;
TEST_CASE("parameter_deserialization", "[deserialize_parameters]") {
SECTION("Basic Parse") {
std::string testParams{R"({"DATE": ["2021-05-06", "2021-05-07" ], "SOURCE": "name"})"};
TEST_CASE("parameter_deserialization", "[deserialize_parameters]")
{
SECTION("Basic Parse")
{
std::string testParams{
R"({"DATE": ["2021-05-06", "2021-05-07" ], "SOURCE": "name"})"};
auto params = daggy::configFromJSON(testParams);
REQUIRE(params.size() == 2);
REQUIRE(std::holds_alternative<std::vector<std::string>>(params["DATE"]));
REQUIRE(std::holds_alternative<std::string>(params["SOURCE"]));
}SECTION("Invalid JSON") {
std::string testParams{R"({"DATE": ["2021-05-06", "2021-05-07" ], "SOURCE": "name")"};
}
SECTION("Invalid JSON")
{
std::string testParams{
R"({"DATE": ["2021-05-06", "2021-05-07" ], "SOURCE": "name")"};
REQUIRE_THROWS(daggy::configFromJSON(testParams));
}SECTION("Non-string Keys") {
std::string testParams{R"({"DATE": ["2021-05-06", "2021-05-07" ], 6: "name"})"};
}
SECTION("Non-string Keys")
{
std::string testParams{
R"({"DATE": ["2021-05-06", "2021-05-07" ], 6: "name"})"};
REQUIRE_THROWS(daggy::configFromJSON(testParams));
}SECTION("Non-array/Non-string values") {
std::string testParams{R"({"DATE": ["2021-05-06", "2021-05-07" ], "SOURCE": {"name": "kevin"}})"};
}
SECTION("Non-array/Non-string values")
{
std::string testParams{
R"({"DATE": ["2021-05-06", "2021-05-07" ], "SOURCE": {"name": "kevin"}})"};
REQUIRE_THROWS(daggy::configFromJSON(testParams));
}
}
TEST_CASE("task_deserialization", "[deserialize_task]") {
SECTION("Build with no expansion") {
TEST_CASE("task_deserialization", "[deserialize_task]")
{
SECTION("Build with no expansion")
{
std::string testTasks = R"({
"A": {
"job": { "command": ["/bin/echo", "A"] },
@@ -46,7 +59,8 @@ TEST_CASE("task_deserialization", "[deserialize_task]") {
REQUIRE(tasks.size() == 3);
}
SECTION("Build with job defaults") {
SECTION("Build with job defaults")
{
std::string testTasks = R"({
"A": {
"job": { "command": ["/bin/echo", "A"] },
@@ -59,8 +73,7 @@ TEST_CASE("task_deserialization", "[deserialize_task]") {
}
}
})";
daggy::ConfigValues jobDefaults{{"runtime", "60"},
{"memory", "300M"}};
daggy::ConfigValues jobDefaults{{"runtime", "60"}, {"memory", "300M"}};
auto tasks = daggy::tasksFromJSON(testTasks, jobDefaults);
REQUIRE(tasks.size() == 2);
REQUIRE(std::get<std::string>(tasks["A"].job["runtime"]) == "60");
@@ -70,9 +83,12 @@ TEST_CASE("task_deserialization", "[deserialize_task]") {
}
}
TEST_CASE("task_serialization", "[serialize_tasks]") {
SECTION("Build with no expansion") {
std::string testTasks = R"({"A": {"job": {"command": ["/bin/echo", "A"]}, "children": ["C"]}, "B": {"job": {"command": ["/bin/echo", "B"]}, "children": ["C"]}, "C": {"job": {"command": ["/bin/echo", "C"]}}})";
TEST_CASE("task_serialization", "[serialize_tasks]")
{
SECTION("Build with no expansion")
{
std::string testTasks =
R"({"A": {"job": {"command": ["/bin/echo", "A"]}, "children": ["C"]}, "B": {"job": {"command": ["/bin/echo", "B"]}, "children": ["C"]}, "C": {"job": {"command": ["/bin/echo", "C"]}}})";
auto tasks = daggy::tasksFromJSON(testTasks);
auto genJSON = daggy::tasksToJSON(tasks);

View File

@@ -1,20 +1,19 @@
#include <iostream>
#include <filesystem>
#include <fstream>
#include <catch2/catch.hpp>
#include <pistache/client.h>
#include <rapidjson/document.h>
#include <daggy/Server.hpp>
#include <catch2/catch.hpp>
#include <daggy/Serialization.hpp>
#include <daggy/Server.hpp>
#include <daggy/executors/task/ForkingTaskExecutor.hpp>
#include <daggy/loggers/dag_run/OStreamLogger.hpp>
#include <filesystem>
#include <fstream>
#include <iostream>
namespace rj = rapidjson;
Pistache::Http::Response
REQUEST(std::string url, std::string payload = "") {
Pistache::Http::Response REQUEST(std::string url, std::string payload = "")
{
Pistache::Http::Experimental::Client client;
client.init();
Pistache::Http::Response response;
@@ -35,11 +34,11 @@ REQUEST(std::string url, std::string payload = "") {
error = true;
try {
std::rethrow_exception(ptr);
} catch (std::exception &e) {
}
catch (std::exception &e) {
msg = e.what();
}
}
);
});
Pistache::Async::Barrier<Pistache::Http::Response> barrier(request);
barrier.wait_for(std::chrono::seconds(2));
@@ -50,14 +49,14 @@ REQUEST(std::string url, std::string payload = "") {
return response;
}
TEST_CASE("rest_endpoint", "[server_basic]") {
TEST_CASE("rest_endpoint", "[server_basic]")
{
std::stringstream ss;
daggy::executors::task::ForkingTaskExecutor executor(10);
daggy::loggers::dag_run::OStreamLogger logger(ss);
Pistache::Address listenSpec("localhost", Pistache::Port(0));
const size_t nDAGRunners = 10,
nWebThreads = 10;
const size_t nDAGRunners = 10, nWebThreads = 10;
daggy::Server server(listenSpec, logger, executor, nDAGRunners);
server.init(nWebThreads);
@@ -66,17 +65,20 @@ TEST_CASE("rest_endpoint", "[server_basic]") {
const std::string host = "localhost:";
const std::string baseURL = host + std::to_string(server.getPort());
SECTION ("Ready Endpoint") {
SECTION("Ready Endpoint")
{
auto response = REQUEST(baseURL + "/ready");
REQUIRE(response.code() == Pistache::Http::Code::Ok);
}
SECTION ("Querying a non-existent dagrunid should fail ") {
SECTION("Querying a non-existent dagrunid should fail ")
{
auto response = REQUEST(baseURL + "/v1/dagrun/100");
REQUIRE(response.code() != Pistache::Http::Code::Ok);
}
SECTION("Simple DAGRun Submission") {
SECTION("Simple DAGRun Submission")
{
std::string dagRun = R"({
"name": "unit_server",
"parameters": { "FILE": [ "A", "B" ] },
@@ -88,7 +90,6 @@ TEST_CASE("rest_endpoint", "[server_basic]") {
}
})";
// Submit, and get the runID
daggy::DAGRunID runID = 0;
{
@@ -145,20 +146,23 @@ TEST_CASE("rest_endpoint", "[server_basic]") {
const auto &taskStates = doc["taskStates"].GetObject();
size_t nStates = 0;
for (auto it = taskStates.MemberBegin(); it != taskStates.MemberEnd(); ++it) {
for (auto it = taskStates.MemberBegin(); it != taskStates.MemberEnd();
++it) {
nStates++;
}
REQUIRE(nStates == 3);
complete = true;
for (auto it = taskStates.MemberBegin(); it != taskStates.MemberEnd(); ++it) {
for (auto it = taskStates.MemberBegin(); it != taskStates.MemberEnd();
++it) {
std::string state = it->value.GetString();
if (state != "COMPLETED") {
complete = false;
break;
}
}
if (complete) break;
if (complete)
break;
std::this_thread::sleep_for(std::chrono::seconds(1));
}
REQUIRE(complete);

View File

@@ -1,19 +1,20 @@
#include <iostream>
#include <catch2/catch.hpp>
#include <future>
#include <iostream>
#include "daggy/ThreadPool.hpp"
#include <catch2/catch.hpp>
using namespace daggy;
TEST_CASE("threadpool", "[threadpool]") {
TEST_CASE("threadpool", "[threadpool]")
{
std::atomic<uint32_t> cnt(0);
ThreadPool tp(10);
std::vector<std::future<uint32_t>> rets;
SECTION("Adding large tasks queues with return values") {
SECTION("Adding large tasks queues with return values")
{
auto tq = std::make_shared<daggy::TaskQueue>();
std::vector<std::future<uint32_t>> res;
for (size_t i = 0; i < 100; ++i)
@@ -22,11 +23,13 @@ TEST_CASE("threadpool", "[threadpool]") {
return cnt.load();
})));
tp.addTasks(tq);
for (auto &r: res) r.get();
for (auto &r : res)
r.get();
REQUIRE(cnt == 100);
}
SECTION("Slow runs") {
SECTION("Slow runs")
{
std::vector<std::future<void>> res;
using namespace std::chrono_literals;
for (size_t i = 0; i < 100; ++i)
@@ -35,7 +38,8 @@ TEST_CASE("threadpool", "[threadpool]") {
cnt++;
return;
}));
for (auto &r: res) r.get();
for (auto &r : res)
r.get();
REQUIRE(cnt == 100);
}
}

View File

@@ -1,41 +1,45 @@
#include <iostream>
#include <algorithm>
#include <catch2/catch.hpp>
#include <chrono>
#include <filesystem>
#include <fstream>
#include <iomanip>
#include <algorithm>
#include <iostream>
#include <random>
#include <catch2/catch.hpp>
#include "daggy/Utilities.hpp"
#include "daggy/Serialization.hpp"
#include "daggy/Utilities.hpp"
#include "daggy/executors/task/ForkingTaskExecutor.hpp"
#include "daggy/executors/task/NoopTaskExecutor.hpp"
#include "daggy/loggers/dag_run/OStreamLogger.hpp"
namespace fs = std::filesystem;
TEST_CASE("string_utilities", "[utilities_string]") {
TEST_CASE("string_utilities", "[utilities_string]")
{
std::string test = "/this/is/{{A}}/test/{{A}}";
auto res = daggy::globalSub(test, "{{A}}", "hello");
REQUIRE(res == "/this/is/hello/test/hello");
}
TEST_CASE("string_expansion", "[utilities_parameter_expansion]") {
SECTION("Basic expansion") {
std::string testParams{R"({"DATE": ["2021-05-06", "2021-05-07" ], "SOURCE": "name", "TYPE": ["a", "b", "c"]})"};
TEST_CASE("string_expansion", "[utilities_parameter_expansion]")
{
SECTION("Basic expansion")
{
std::string testParams{
R"({"DATE": ["2021-05-06", "2021-05-07" ], "SOURCE": "name", "TYPE": ["a", "b", "c"]})"};
auto params = daggy::configFromJSON(testParams);
std::vector<std::string> cmd{"/usr/bin/echo", "{{DATE}}", "{{SOURCE}}", "{{TYPE}}"};
std::vector<std::string> cmd{"/usr/bin/echo", "{{DATE}}", "{{SOURCE}}",
"{{TYPE}}"};
auto allCommands = daggy::interpolateValues(cmd, params);
REQUIRE(allCommands.size() == 6);
}
SECTION("Skip over unused parameters") {
std::string testParams{R"({"DATE": ["2021-05-06", "2021-05-07" ], "SOURCE": "name", "TYPE": ["a", "b", "c"]})"};
SECTION("Skip over unused parameters")
{
std::string testParams{
R"({"DATE": ["2021-05-06", "2021-05-07" ], "SOURCE": "name", "TYPE": ["a", "b", "c"]})"};
auto params = daggy::configFromJSON(testParams);
std::vector<std::string> cmd{"/usr/bin/echo", "{{DATE}}", "{{SOURCE}}"};
auto allCommands = daggy::interpolateValues(cmd, params);
@@ -44,7 +48,8 @@ TEST_CASE("string_expansion", "[utilities_parameter_expansion]") {
REQUIRE(allCommands.size() == 2);
}
SECTION("Expand within a command part") {
SECTION("Expand within a command part")
{
std::string testParams{
R"({"DATE": ["2021-05-06", "2021-05-07" ], "SOURCE": ["A", "B"], "TYPE": ["a", "b", "c"]})"};
auto params = daggy::configFromJSON(testParams);
@@ -53,19 +58,19 @@ TEST_CASE("string_expansion", "[utilities_parameter_expansion]") {
// TYPE isn't used, so it's just |DATE| * |SOURCE|
REQUIRE(result.size() == 4);
}
}
TEST_CASE("dag_runner_order", "[dagrun_order]") {
TEST_CASE("dag_runner_order", "[dagrun_order]")
{
daggy::executors::task::NoopTaskExecutor ex;
std::stringstream ss;
daggy::loggers::dag_run::OStreamLogger logger(ss);
daggy::TimePoint startTime = daggy::Clock::now();
std::string testParams{R"({"DATE": ["2021-05-06", "2021-05-07", "2021-05-08", "2021-05-09" ]})"};
std::string testParams{
R"({"DATE": ["2021-05-06", "2021-05-07", "2021-05-08", "2021-05-09" ]})"};
auto params = daggy::configFromJSON(testParams);
std::string taskJSON = R"({
@@ -90,8 +95,10 @@ TEST_CASE("dag_runner_order", "[dagrun_order]") {
auto rec = logger.getDAGRun(runID);
daggy::TimePoint stopTime = daggy::Clock::now();
std::array<daggy::TimePoint, 5> minTimes; minTimes.fill(startTime);
std::array<daggy::TimePoint, 5> maxTimes; maxTimes.fill(stopTime);
std::array<daggy::TimePoint, 5> minTimes;
minTimes.fill(startTime);
std::array<daggy::TimePoint, 5> maxTimes;
maxTimes.fill(stopTime);
for (const auto &[k, v] : rec.taskAttempts) {
size_t idx = k[0] - 65;
@@ -108,21 +115,23 @@ TEST_CASE("dag_runner_order", "[dagrun_order]") {
}
}
TEST_CASE("dag_runner", "[utilities_dag_runner]") {
TEST_CASE("dag_runner", "[utilities_dag_runner]")
{
daggy::executors::task::ForkingTaskExecutor ex(10);
std::stringstream ss;
daggy::loggers::dag_run::OStreamLogger logger(ss);
SECTION("Simple execution") {
SECTION("Simple execution")
{
std::string prefix = (fs::current_path() / "asdlk").string();
std::unordered_map<std::string, std::string> files{
{"A", prefix + "_A"},
{"B", prefix + "_B"},
{"C", prefix + "_C"}};
std::string taskJSON = R"({"A": {"job": {"command": ["/usr/bin/touch", ")"
+ files.at("A") + R"("]}, "children": ["C"]}, "B": {"job": {"command": ["/usr/bin/touch", ")"
+ files.at("B") + R"("]}, "children": ["C"]}, "C": {"job": {"command": ["/usr/bin/touch", ")"
+ files.at("C") + R"("]}}})";
{"A", prefix + "_A"}, {"B", prefix + "_B"}, {"C", prefix + "_C"}};
std::string taskJSON =
R"({"A": {"job": {"command": ["/usr/bin/touch", ")" + files.at("A") +
R"("]}, "children": ["C"]}, "B": {"job": {"command": ["/usr/bin/touch", ")" +
files.at("B") +
R"("]}, "children": ["C"]}, "C": {"job": {"command": ["/usr/bin/touch", ")" +
files.at("C") + R"("]}}})";
auto tasks = expandTaskSet(daggy::tasksFromJSON(taskJSON), ex);
auto dag = daggy::buildDAGFromTasks(tasks);
auto runID = logger.startDAGRun("test_run", tasks);
@@ -143,12 +152,14 @@ TEST_CASE("dag_runner", "[utilities_dag_runner]") {
}
}
SECTION("Recovery from Error") {
SECTION("Recovery from Error")
{
auto cleanup = []() {
// Cleanup
std::vector<fs::path> paths{"rec_error_A", "noexist"};
for (const auto &pth : paths) {
if (fs::exists(pth)) fs::remove_all(pth);
if (fs::exists(pth))
fs::remove_all(pth);
}
};
@@ -156,12 +167,12 @@ TEST_CASE("dag_runner", "[utilities_dag_runner]") {
std::string goodPrefix = "rec_error_";
std::string badPrefix = "noexist/rec_error_";
std::string taskJSON = R"({"A": {"job": {"command": ["/usr/bin/touch", ")"
+ goodPrefix +
R"(A"]}, "children": ["C"]}, "B": {"job": {"command": ["/usr/bin/touch", ")"
+ badPrefix +
R"(B"]}, "children": ["C"]}, "C": {"job": {"command": ["/usr/bin/touch", ")"
+ badPrefix + R"(C"]}}})";
std::string taskJSON =
R"({"A": {"job": {"command": ["/usr/bin/touch", ")" + goodPrefix +
R"(A"]}, "children": ["C"]}, "B": {"job": {"command": ["/usr/bin/touch", ")" +
badPrefix +
R"(B"]}, "children": ["C"]}, "C": {"job": {"command": ["/usr/bin/touch", ")" +
badPrefix + R"(C"]}}})";
auto tasks = expandTaskSet(daggy::tasksFromJSON(taskJSON), ex);
auto dag = daggy::buildDAGFromTasks(tasks);
@@ -181,17 +192,21 @@ TEST_CASE("dag_runner", "[utilities_dag_runner]") {
// Get the DAG Run Attempts
auto record = logger.getDAGRun(runID);
REQUIRE(record.taskAttempts["A_0"].size() == 1); // A ran fine
REQUIRE(record.taskAttempts["B_0"].size() == 2); // B errored and had to be retried
REQUIRE(record.taskAttempts["C_0"].size() == 1); // C wasn't run because B errored
REQUIRE(record.taskAttempts["B_0"].size() ==
2); // B errored and had to be retried
REQUIRE(record.taskAttempts["C_0"].size() ==
1); // C wasn't run because B errored
cleanup();
}
SECTION("Generator tasks") {
SECTION("Generator tasks")
{
std::string testParams{R"({"DATE": ["2021-05-06", "2021-05-07" ]})"};
auto params = daggy::configFromJSON(testParams);
std::string generatorOutput = R"({"B": {"job": {"command": ["/usr/bin/echo", "-e", "{{DATE}}"]}, "children": ["C"]}})";
std::string generatorOutput =
R"({"B": {"job": {"command": ["/usr/bin/echo", "-e", "{{DATE}}"]}, "children": ["C"]}})";
fs::path ofn = fs::current_path() / "generator_test_output.json";
std::ofstream ofh{ofn};
ofh << generatorOutput << std::endl;
@@ -199,7 +214,9 @@ TEST_CASE("dag_runner", "[utilities_dag_runner]") {
std::stringstream jsonTasks;
jsonTasks << R"({ "A": { "job": {"command": [ "/usr/bin/cat", )" << std::quoted(ofn.string())
jsonTasks
<< R"({ "A": { "job": {"command": [ "/usr/bin/cat", )"
<< std::quoted(ofn.string())
<< R"(]}, "children": ["C"], "isGenerator": true},)"
<< R"("C": { "job": {"command": [ "/usr/bin/echo", "hello!"]} } })";
@@ -227,9 +244,12 @@ TEST_CASE("dag_runner", "[utilities_dag_runner]") {
}
// Ensure that children were updated properly
REQUIRE(record.tasks["A_0"].children == std::unordered_set<std::string>{"B_0", "B_1", "C"});
REQUIRE(record.tasks["B_0"].children == std::unordered_set<std::string>{"C"});
REQUIRE(record.tasks["B_1"].children == std::unordered_set<std::string>{"C"});
REQUIRE(record.tasks["A_0"].children ==
std::unordered_set<std::string>{"B_0", "B_1", "C"});
REQUIRE(record.tasks["B_0"].children ==
std::unordered_set<std::string>{"C"});
REQUIRE(record.tasks["B_1"].children ==
std::unordered_set<std::string>{"C"});
REQUIRE(record.tasks["C_0"].children.empty());
}
}

View File

@@ -1,16 +1,15 @@
#include <iostream>
#include <fstream>
#include <filesystem>
#include <argparse.hpp>
#include <pistache/client.h>
#include <rapidjson/document.h>
#include <argparse.hpp>
#include <filesystem>
#include <fstream>
#include <iostream>
namespace rj = rapidjson;
Pistache::Http::Response
REQUEST(std::string url, std::string payload = "") {
Pistache::Http::Response REQUEST(std::string url, std::string payload = "")
{
Pistache::Http::Experimental::Client client;
client.init();
Pistache::Http::Response response;
@@ -31,11 +30,11 @@ REQUEST(std::string url, std::string payload = "") {
error = true;
try {
std::rethrow_exception(ptr);
} catch (std::exception &e) {
}
catch (std::exception &e) {
msg = e.what();
}
}
);
});
Pistache::Async::Barrier<Pistache::Http::Response> barrier(request);
barrier.wait_for(std::chrono::seconds(2));
@@ -46,7 +45,8 @@ REQUEST(std::string url, std::string payload = "") {
return response;
}
int main(int argc, char **argv) {
int main(int argc, char **argv)
{
argparse::ArgumentParser args("Daggy Client");
args.add_argument("-v", "--verbose")
@@ -55,10 +55,8 @@ int main(int argc, char **argv) {
args.add_argument("--url")
.help("base URL of server")
.default_value("http://localhost:2503");
args.add_argument("--sync")
.default_value(false)
.implicit_value(true)
.help("Poll for job to complete");
args.add_argument("--sync").default_value(false).implicit_value(true).help(
"Poll for job to complete");
args.add_argument("--action")
.help("Number of tasks to run concurrently")
.default_value(30)
@@ -66,7 +64,8 @@ int main(int argc, char **argv) {
try {
args.parse_args(argc, argv);
} catch (std::exception &e) {
}
catch (std::exception &e) {
std::cout << "Error: " << e.what() << std::endl;
std::cout << args;
exit(1);

View File

@@ -1,13 +1,11 @@
#include <iostream>
#include <fstream>
#include <atomic>
#include <sys/stat.h>
#include <signal.h>
#include <sys/stat.h>
#include <argparse.hpp>
#include <atomic>
#include <daggy/Server.hpp>
#include <fstream>
#include <iostream>
// Add executors here
#ifdef DAGGY_ENABLE_SLURM
@@ -24,14 +22,15 @@
/*
#include <stdio.h>
#include <stdlib.h>
#include <unistd.h>
#include <sys/types.h>
#include <syslog.h>
#include <unistd.h>
*/
static std::atomic<bool> running{true};
void signalHandler(int signal) {
void signalHandler(int signal)
{
switch (signal) {
case SIGHUP:
break;
@@ -42,22 +41,27 @@ void signalHandler(int signal) {
}
}
void daemonize() {
void daemonize()
{
pid_t pid;
struct sigaction newSigAction;
sigset_t newSigSet;
/* Check if parent process id is set */
if (getppid() == 1) { return; }
if (getppid() == 1) {
return;
}
/* Set signal mask - signals we want to block */
sigemptyset(&newSigSet);
sigaddset(&newSigSet, SIGCHLD); /* ignore child - i.e. we don't need to wait for it */
sigaddset(&newSigSet,
SIGCHLD); /* ignore child - i.e. we don't need to wait for it */
sigaddset(&newSigSet, SIGTSTP); /* ignore Tty stop signals */
sigaddset(&newSigSet, SIGTTOU); /* ignore Tty background writes */
sigaddset(&newSigSet, SIGTTIN); /* ignore Tty background reads */
sigprocmask(SIG_BLOCK, &newSigSet, NULL); /* Block the above specified signals */
sigprocmask(SIG_BLOCK, &newSigSet,
NULL); /* Block the above specified signals */
/* Set up a signal handler */
newSigAction.sa_handler = signalHandler;
@@ -71,8 +75,12 @@ void daemonize() {
// Fork once
pid = fork();
if (pid < 0) { exit(EXIT_FAILURE); }
if (pid > 0) { exit(EXIT_SUCCESS); }
if (pid < 0) {
exit(EXIT_FAILURE);
}
if (pid > 0) {
exit(EXIT_SUCCESS);
}
/* On success: The child process becomes session leader */
if (setsid() < 0) {
@@ -99,18 +107,19 @@ void daemonize() {
(void)rc;
/* Close all open file descriptors */
for (auto x = sysconf(_SC_OPEN_MAX); x >= 0; x--) { close(x); }
for (auto x = sysconf(_SC_OPEN_MAX); x >= 0; x--) {
close(x);
}
}
int main(int argc, char **argv) {
int main(int argc, char **argv)
{
argparse::ArgumentParser args("Daggy");
args.add_argument("-v", "--verbose")
.default_value(false)
.implicit_value(true);
args.add_argument("-d", "--daemon")
.default_value(false)
.implicit_value(true);
args.add_argument("-d", "--daemon").default_value(false).implicit_value(true);
args.add_argument("--ip")
.help("IP address to listen to")
.default_value(std::string{"127.0.0.1"});
@@ -136,7 +145,8 @@ int main(int argc, char **argv) {
try {
args.parse_args(argc, argv);
} catch (std::exception &e) {
}
catch (std::exception &e) {
std::cout << "Error: " << e.what() << std::endl;
std::cout << args;
exit(1);
@@ -156,7 +166,8 @@ int main(int argc, char **argv) {
std::cout << "Unable to daemonize if logging to stdout" << std::endl;
exit(1);
}
} else {
}
else {
fs::path logFn{logFileName};
if (!logFn.is_absolute()) {
logFileName = (fs::current_path() / logFileName).string();
@@ -164,12 +175,14 @@ int main(int argc, char **argv) {
}
if (verbose) {
std::cout << "Server running at http://" << listenIP << ':' << listenPort << std::endl
std::cout << "Server running at http://" << listenIP << ':' << listenPort
<< std::endl
<< "Max DAG Processing: " << dagThreads << std::endl
<< "Max Task Execution: " << executorThreads << std::endl
<< "Max Web Clients: " << webThreads << std::endl
<< "Logging to: " << logFileName << std::endl
<< std::endl << "Ctrl-C to exit" << std::endl;
<< std::endl
<< "Ctrl-C to exit" << std::endl;
}
if (asDaemon) {
@@ -179,8 +192,10 @@ int main(int argc, char **argv) {
std::ofstream logFH;
std::unique_ptr<daggy::loggers::dag_run::DAGRunLogger> logger;
if (logFileName == "-") {
logger = std::make_unique<daggy::loggers::dag_run::OStreamLogger>(std::cout);
} else {
logger =
std::make_unique<daggy::loggers::dag_run::OStreamLogger>(std::cout);
}
else {
logFH.open(logFileName, std::ios::app);
logger = std::make_unique<daggy::loggers::dag_run::OStreamLogger>(logFH);
}
@@ -196,7 +211,6 @@ int main(int argc, char **argv) {
server.init(webThreads);
server.start();
running = true;
while (running) {
std::this_thread::sleep_for(std::chrono::seconds(30));